Instead of custom code, use the __atomic builtins to implement the rcu_dereference(), rcu_cmpxchg_pointer(), rcu_xchg_pointer() and rcu_assign_pointer().
Signed-off-by: Ondřej Surý <ond...@sury.org> --- include/urcu/arch.h | 20 +++++++++ include/urcu/arch/alpha.h | 6 +++ include/urcu/arch/arm.h | 12 ++++++ include/urcu/arch/mips.h | 2 + include/urcu/arch/nios2.h | 2 + include/urcu/arch/ppc.h | 6 +++ include/urcu/arch/s390.h | 2 + include/urcu/arch/sparc64.h | 6 +++ include/urcu/arch/x86.h | 20 +++++++++ include/urcu/static/pointer.h | 77 +++++++---------------------------- 10 files changed, 90 insertions(+), 63 deletions(-) diff --git a/include/urcu/arch.h b/include/urcu/arch.h index d3914da..aec6fa1 100644 --- a/include/urcu/arch.h +++ b/include/urcu/arch.h @@ -21,6 +21,26 @@ #ifndef _URCU_ARCH_H #define _URCU_ARCH_H +#if !defined(__has_feature) +#define __has_feature(x) 0 +#endif /* if !defined(__has_feature) */ + +/* GCC defines __SANITIZE_ADDRESS__, so reuse the macro for clang */ +#if __has_feature(address_sanitizer) +#define __SANITIZE_ADDRESS__ 1 +#endif /* if __has_feature(address_sanitizer) */ + +#ifdef __SANITIZE_THREAD__ +/* FIXME: Somebody who understands the barriers should look into this */ +#define cmm_mb() __atomic_thread_fence(__ATOMIC_ACQ_REL) +#define cmm_rmb() __atomic_thread_fence(__ATOMIC_ACQUIRE) +#define cmm_wmb() __atomic_thread_fence(__ATOMIC_RELEASE) +#define cmm_smp_mb() __atomic_thread_fence(__ATOMIC_ACQ_REL) +#define cmm_smp_rmb() __atomic_thread_fence(__ATOMIC_ACQUIRE) +#define cmm_smp_wmb() __atomic_thread_fence(__ATOMIC_RELEASE) +#define cmm_read_barrier_depends() __atomic_thread_fence(__ATOMIC_ACQ_REL) +#endif + /* * Architecture detection using compiler defines. * diff --git a/include/urcu/arch/alpha.h b/include/urcu/arch/alpha.h index dc33e28..84526ef 100644 --- a/include/urcu/arch/alpha.h +++ b/include/urcu/arch/alpha.h @@ -29,9 +29,15 @@ extern "C" { #endif +#ifndef cmm_mb #define cmm_mb() __asm__ __volatile__ ("mb":::"memory") +#endif +#ifndef cmm_wmb #define cmm_wmb() __asm__ __volatile__ ("wmb":::"memory") +#endif +#ifndef cmm_read_barrier_depends #define cmm_read_barrier_depends() __asm__ __volatile__ ("mb":::"memory") +#endif /* * On Linux, define the membarrier system call number if not yet available in diff --git a/include/urcu/arch/arm.h b/include/urcu/arch/arm.h index 54ca4fa..4950e13 100644 --- a/include/urcu/arch/arm.h +++ b/include/urcu/arch/arm.h @@ -42,16 +42,28 @@ extern "C" { /* * Issues full system DMB operation. */ +#ifndef cmm_mb #define cmm_mb() __asm__ __volatile__ ("dmb sy":::"memory") +#endif +#ifndef cmm_rmb #define cmm_rmb() __asm__ __volatile__ ("dmb sy":::"memory") +#endif +#ifndef cmm_wmb #define cmm_wmb() __asm__ __volatile__ ("dmb sy":::"memory") +#endif /* * Issues DMB operation only to the inner shareable domain. */ +#ifndef cmm_smp_mb #define cmm_smp_mb() __asm__ __volatile__ ("dmb ish":::"memory") +#endif +#ifndef cmm_smp_rmb #define cmm_smp_rmb() __asm__ __volatile__ ("dmb ish":::"memory") +#endif +#ifndef cmm_smp_wmb #define cmm_smp_wmb() __asm__ __volatile__ ("dmb ish":::"memory") +#endif #endif /* URCU_ARCH_ARMV7 */ diff --git a/include/urcu/arch/mips.h b/include/urcu/arch/mips.h index ea5b7e9..b9ee021 100644 --- a/include/urcu/arch/mips.h +++ b/include/urcu/arch/mips.h @@ -30,11 +30,13 @@ extern "C" { #endif +#ifndef cmm_mb #define cmm_mb() __asm__ __volatile__ ( \ " .set mips2 \n" \ " sync \n" \ " .set mips0 \n" \ :::"memory") +#endif #ifdef __cplusplus } diff --git a/include/urcu/arch/nios2.h b/include/urcu/arch/nios2.h index b4f3e50..5def45c 100644 --- a/include/urcu/arch/nios2.h +++ b/include/urcu/arch/nios2.h @@ -29,7 +29,9 @@ extern "C" { #endif +#ifndef cmm_mb #define cmm_mb() cmm_barrier() +#endif #ifdef __cplusplus } diff --git a/include/urcu/arch/ppc.h b/include/urcu/arch/ppc.h index 791529e..b8ec40d 100644 --- a/include/urcu/arch/ppc.h +++ b/include/urcu/arch/ppc.h @@ -48,7 +48,9 @@ extern "C" { * order cacheable and non-cacheable memory operations separately---i.e. * not the latter against the former. */ +#ifndef cmm_mb #define cmm_mb() __asm__ __volatile__ ("sync":::"memory") +#endif /* * lwsync orders loads in cacheable memory with respect to other loads, @@ -56,8 +58,12 @@ extern "C" { * Therefore, use it for barriers ordering accesses to cacheable memory * only. */ +#ifndef cmm_smp_rmb #define cmm_smp_rmb() __asm__ __volatile__ (LWSYNC_OPCODE:::"memory") +#endif +#ifndef cmm_smp_rmb #define cmm_smp_wmb() __asm__ __volatile__ (LWSYNC_OPCODE:::"memory") +#endif #define mftbl() \ __extension__ \ diff --git a/include/urcu/arch/s390.h b/include/urcu/arch/s390.h index 67461b4..2733873 100644 --- a/include/urcu/arch/s390.h +++ b/include/urcu/arch/s390.h @@ -39,7 +39,9 @@ extern "C" { #define CAA_CACHE_LINE_SIZE 128 +#ifndef cmm_mb #define cmm_mb() __asm__ __volatile__("bcr 15,0" : : : "memory") +#endif #define HAS_CAA_GET_CYCLES diff --git a/include/urcu/arch/sparc64.h b/include/urcu/arch/sparc64.h index 1ff40f5..32a6b0e 100644 --- a/include/urcu/arch/sparc64.h +++ b/include/urcu/arch/sparc64.h @@ -49,9 +49,15 @@ __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \ "1:\n" \ : : : "memory") +#ifndef cmm_mb #define cmm_mb() membar_safe("#LoadLoad | #LoadStore | #StoreStore | #StoreLoad") +#endif +#ifndef cmm_rmb #define cmm_rmb() membar_safe("#LoadLoad") +#endif +#ifndef cmm_wmb #define cmm_wmb() membar_safe("#StoreStore") +#endif #ifdef __cplusplus } diff --git a/include/urcu/arch/x86.h b/include/urcu/arch/x86.h index 744f9f9..6be9d38 100644 --- a/include/urcu/arch/x86.h +++ b/include/urcu/arch/x86.h @@ -46,15 +46,23 @@ extern "C" { /* For backwards compat */ #define CONFIG_RCU_HAVE_FENCE 1 +#ifndef cmm_mb #define cmm_mb() __asm__ __volatile__ ("mfence":::"memory") +#endif /* * Define cmm_rmb/cmm_wmb to "strict" barriers that may be needed when * using SSE or working with I/O areas. cmm_smp_rmb/cmm_smp_wmb are * only compiler barriers, which is enough for general use. */ +#ifndef cmm_rmb #define cmm_rmb() __asm__ __volatile__ ("lfence":::"memory") +#endif + +#ifndef cmm_wmb #define cmm_wmb() __asm__ __volatile__ ("sfence"::: "memory") +#endif + #define cmm_smp_rmb() cmm_barrier() #define cmm_smp_wmb() cmm_barrier() @@ -72,15 +80,27 @@ extern "C" { * under our feet; cmm_smp_wmb() ceases to be a nop for these processors. */ #if (CAA_BITS_PER_LONG == 32) +#ifndef cmm_mb #define cmm_mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)":::"memory") +#endif +#ifndef cmm_rmb #define cmm_rmb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)":::"memory") +#endif +#ifndef cmm_wmb #define cmm_wmb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)":::"memory") +#endif #else +#ifndef cmm_mb #define cmm_mb() __asm__ __volatile__ ("lock; addl $0,0(%%rsp)":::"memory") +#endif +#ifndef cmm_rmb #define cmm_rmb() __asm__ __volatile__ ("lock; addl $0,0(%%rsp)":::"memory") +#endif +#ifndef cmm_wmb #define cmm_wmb() __asm__ __volatile__ ("lock; addl $0,0(%%rsp)":::"memory") #endif #endif +#endif #define caa_cpu_relax() __asm__ __volatile__ ("rep; nop" : : : "memory") diff --git a/include/urcu/static/pointer.h b/include/urcu/static/pointer.h index 9e46a57..3f116f3 100644 --- a/include/urcu/static/pointer.h +++ b/include/urcu/static/pointer.h @@ -38,6 +38,8 @@ extern "C" { #endif +#define _rcu_get_pointer(addr) __atomic_load_n(addr, __ATOMIC_CONSUME) + /** * _rcu_dereference - reads (copy) a RCU-protected pointer to a local variable * into a RCU read-side critical section. The pointer can later be safely @@ -49,14 +51,6 @@ extern "C" { * Inserts memory barriers on architectures that require them (currently only * Alpha) and documents which pointers are protected by RCU. * - * With C standards prior to C11/C++11, the compiler memory barrier in - * CMM_LOAD_SHARED() ensures that value-speculative optimizations (e.g. - * VSS: Value Speculation Scheduling) does not perform the data read - * before the pointer read by speculating the value of the pointer. - * Correct ordering is ensured because the pointer is read as a volatile - * access. This acts as a global side-effect operation, which forbids - * reordering of dependent memory operations. - * * With C standards C11/C++11, concerns about dependency-breaking * optimizations are taken care of by the "memory_order_consume" atomic * load. @@ -65,10 +59,6 @@ extern "C" { * explicit because the pointer used as input argument is a pointer, * not an _Atomic type as required by C11/C++11. * - * By defining URCU_DEREFERENCE_USE_VOLATILE, the user requires use of - * volatile access to implement rcu_dereference rather than - * memory_order_consume load from the C11/C++11 standards. - * * This may improve performance on weakly-ordered architectures where * the compiler implements memory_order_consume as a * memory_order_acquire, which is stricter than required by the @@ -83,35 +73,7 @@ extern "C" { * meets the 10-line criterion in LGPL, allowing this function to be * expanded directly in non-LGPL code. */ - -#if !defined (URCU_DEREFERENCE_USE_VOLATILE) && \ - ((defined (__cplusplus) && __cplusplus >= 201103L) || \ - (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L)) -# define __URCU_DEREFERENCE_USE_ATOMIC_CONSUME -#endif - -/* - * If p is const (the pointer itself, not what it points to), using - * __typeof__(p) would declare a const variable, leading to - * -Wincompatible-pointer-types errors. Using the statement expression - * makes it an rvalue and gets rid of the const-ness. - */ -#ifdef __URCU_DEREFERENCE_USE_ATOMIC_CONSUME -# define _rcu_dereference(p) __extension__ ({ \ - __typeof__(__extension__ ({ \ - __typeof__(p) __attribute__((unused)) _________p0 = { 0 }; \ - _________p0; \ - })) _________p1; \ - __atomic_load(&(p), &_________p1, __ATOMIC_CONSUME); \ - (_________p1); \ - }) -#else -# define _rcu_dereference(p) __extension__ ({ \ - __typeof__(p) _________p1 = CMM_LOAD_SHARED(p); \ - cmm_smp_read_barrier_depends(); \ - (_________p1); \ - }) -#endif +#define _rcu_dereference(p) _rcu_get_pointer(&(p)) /** * _rcu_cmpxchg_pointer - same as rcu_assign_pointer, but tests if the pointer @@ -126,12 +88,12 @@ extern "C" { * meets the 10-line criterion in LGPL, allowing this function to be * expanded directly in non-LGPL code. */ -#define _rcu_cmpxchg_pointer(p, old, _new) \ - __extension__ \ - ({ \ - __typeof__(*p) _________pold = (old); \ - __typeof__(*p) _________pnew = (_new); \ - uatomic_cmpxchg(p, _________pold, _________pnew); \ +#define _rcu_cmpxchg_pointer(p, old, _new) \ + ({ \ + __typeof__(*(p)) __old = old; \ + __atomic_compare_exchange_n(p, &__old, _new, 0, \ + __ATOMIC_ACQ_REL, __ATOMIC_CONSUME); \ + __old; \ }) /** @@ -145,22 +107,11 @@ extern "C" { * meets the 10-line criterion in LGPL, allowing this function to be * expanded directly in non-LGPL code. */ -#define _rcu_xchg_pointer(p, v) \ - __extension__ \ - ({ \ - __typeof__(*p) _________pv = (v); \ - uatomic_xchg(p, _________pv); \ - }) - +#define _rcu_xchg_pointer(p, v) \ + __atomic_exchange_n(p, v, __ATOMIC_ACQ_REL) -#define _rcu_set_pointer(p, v) \ - do { \ - __typeof__(*p) _________pv = (v); \ - if (!__builtin_constant_p(v) || \ - ((v) != NULL)) \ - cmm_wmb(); \ - uatomic_set(p, _________pv); \ - } while (0) +#define _rcu_set_pointer(p, v) \ + __atomic_store_n(p, v, __ATOMIC_RELEASE) /** * _rcu_assign_pointer - assign (publicize) a pointer to a new data structure @@ -178,7 +129,7 @@ extern "C" { * meets the 10-line criterion in LGPL, allowing this function to be * expanded directly in non-LGPL code. */ -#define _rcu_assign_pointer(p, v) _rcu_set_pointer(&(p), v) +#define _rcu_assign_pointer(p, v) rcu_set_pointer(&(p), v) #ifdef __cplusplus } -- 2.39.2 _______________________________________________ lttng-dev mailing list lttng-dev@lists.lttng.org https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev