Instead of a custom code, use the __atomic_thread_fence() builtin to implement the cmm_mb(), cmm_rmb(), cmm_wmb(), cmm_smp_mb(), cmm_smp_rmb(), and cmm_smp_wmb() on all architectures, and cmm_read_barrier_depends() on alpha (otherwise it's still no-op).
family of functions Signed-off-by: Ondřej Surý <ond...@sury.org> --- include/urcu/arch/alpha.h | 6 +++--- include/urcu/arch/arm.h | 14 ------------- include/urcu/arch/generic.h | 6 +++--- include/urcu/arch/mips.h | 6 ------ include/urcu/arch/nios2.h | 2 -- include/urcu/arch/ppc.h | 25 ---------------------- include/urcu/arch/s390.h | 2 -- include/urcu/arch/sparc64.h | 13 ------------ include/urcu/arch/x86.h | 42 +++---------------------------------- 9 files changed, 9 insertions(+), 107 deletions(-) diff --git a/include/urcu/arch/alpha.h b/include/urcu/arch/alpha.h index dc33e28..61687c7 100644 --- a/include/urcu/arch/alpha.h +++ b/include/urcu/arch/alpha.h @@ -29,9 +29,9 @@ extern "C" { #endif -#define cmm_mb() __asm__ __volatile__ ("mb":::"memory") -#define cmm_wmb() __asm__ __volatile__ ("wmb":::"memory") -#define cmm_read_barrier_depends() __asm__ __volatile__ ("mb":::"memory") +#ifndef cmm_read_barrier_depends +#define cmm_read_barrier_depends() __atomic_thread_fence(__ATOMIC_CONSUME) +#endif /* * On Linux, define the membarrier system call number if not yet available in diff --git a/include/urcu/arch/arm.h b/include/urcu/arch/arm.h index 54ca4fa..b3671dc 100644 --- a/include/urcu/arch/arm.h +++ b/include/urcu/arch/arm.h @@ -39,20 +39,6 @@ extern "C" { /* For backwards compat. */ #define CONFIG_RCU_ARM_HAVE_DMB 1 -/* - * Issues full system DMB operation. - */ -#define cmm_mb() __asm__ __volatile__ ("dmb sy":::"memory") -#define cmm_rmb() __asm__ __volatile__ ("dmb sy":::"memory") -#define cmm_wmb() __asm__ __volatile__ ("dmb sy":::"memory") - -/* - * Issues DMB operation only to the inner shareable domain. - */ -#define cmm_smp_mb() __asm__ __volatile__ ("dmb ish":::"memory") -#define cmm_smp_rmb() __asm__ __volatile__ ("dmb ish":::"memory") -#define cmm_smp_wmb() __asm__ __volatile__ ("dmb ish":::"memory") - #endif /* URCU_ARCH_ARMV7 */ #include <stdlib.h> diff --git a/include/urcu/arch/generic.h b/include/urcu/arch/generic.h index be6e41e..2715162 100644 --- a/include/urcu/arch/generic.h +++ b/include/urcu/arch/generic.h @@ -44,15 +44,15 @@ extern "C" { */ #ifndef cmm_mb -#define cmm_mb() __sync_synchronize() +#define cmm_mb() __atomic_thread_fence(__ATOMIC_SEQ_CST) #endif #ifndef cmm_rmb -#define cmm_rmb() cmm_mb() +#define cmm_rmb() __atomic_thread_fence(__ATOMIC_ACQUIRE) #endif #ifndef cmm_wmb -#define cmm_wmb() cmm_mb() +#define cmm_wmb() __atomic_thread_fence(__ATOMIC_RELEASE) #endif #define cmm_mc() cmm_barrier() diff --git a/include/urcu/arch/mips.h b/include/urcu/arch/mips.h index ea5b7e9..ffe65c0 100644 --- a/include/urcu/arch/mips.h +++ b/include/urcu/arch/mips.h @@ -30,12 +30,6 @@ extern "C" { #endif -#define cmm_mb() __asm__ __volatile__ ( \ - " .set mips2 \n" \ - " sync \n" \ - " .set mips0 \n" \ - :::"memory") - #ifdef __cplusplus } #endif diff --git a/include/urcu/arch/nios2.h b/include/urcu/arch/nios2.h index b4f3e50..cd6bdb8 100644 --- a/include/urcu/arch/nios2.h +++ b/include/urcu/arch/nios2.h @@ -29,8 +29,6 @@ extern "C" { #endif -#define cmm_mb() cmm_barrier() - #ifdef __cplusplus } #endif diff --git a/include/urcu/arch/ppc.h b/include/urcu/arch/ppc.h index 791529e..618f79c 100644 --- a/include/urcu/arch/ppc.h +++ b/include/urcu/arch/ppc.h @@ -34,31 +34,6 @@ extern "C" { /* Include size of POWER5+ L3 cache lines: 256 bytes */ #define CAA_CACHE_LINE_SIZE 256 -#ifdef __NO_LWSYNC__ -#define LWSYNC_OPCODE "sync\n" -#else -#define LWSYNC_OPCODE "lwsync\n" -#endif - -/* - * Use sync for all cmm_mb/rmb/wmb barriers because lwsync does not - * preserve ordering of cacheable vs. non-cacheable accesses, so it - * should not be used to order with respect to MMIO operations. An - * eieio+lwsync pair is also not enough for cmm_rmb, because it will - * order cacheable and non-cacheable memory operations separately---i.e. - * not the latter against the former. - */ -#define cmm_mb() __asm__ __volatile__ ("sync":::"memory") - -/* - * lwsync orders loads in cacheable memory with respect to other loads, - * and stores in cacheable memory with respect to other stores. - * Therefore, use it for barriers ordering accesses to cacheable memory - * only. - */ -#define cmm_smp_rmb() __asm__ __volatile__ (LWSYNC_OPCODE:::"memory") -#define cmm_smp_wmb() __asm__ __volatile__ (LWSYNC_OPCODE:::"memory") - #define mftbl() \ __extension__ \ ({ \ diff --git a/include/urcu/arch/s390.h b/include/urcu/arch/s390.h index 67461b4..c22fdf9 100644 --- a/include/urcu/arch/s390.h +++ b/include/urcu/arch/s390.h @@ -39,8 +39,6 @@ extern "C" { #define CAA_CACHE_LINE_SIZE 128 -#define cmm_mb() __asm__ __volatile__("bcr 15,0" : : : "memory") - #define HAS_CAA_GET_CYCLES typedef uint64_t caa_cycles_t; diff --git a/include/urcu/arch/sparc64.h b/include/urcu/arch/sparc64.h index 1ff40f5..b4e25ca 100644 --- a/include/urcu/arch/sparc64.h +++ b/include/urcu/arch/sparc64.h @@ -40,19 +40,6 @@ extern "C" { #define CAA_CACHE_LINE_SIZE 256 -/* - * Inspired from the Linux kernel. Workaround Spitfire bug #51. - */ -#define membar_safe(type) \ -__asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \ - "membar " type "\n" \ - "1:\n" \ - : : : "memory") - -#define cmm_mb() membar_safe("#LoadLoad | #LoadStore | #StoreStore | #StoreLoad") -#define cmm_rmb() membar_safe("#LoadLoad") -#define cmm_wmb() membar_safe("#StoreStore") - #ifdef __cplusplus } #endif diff --git a/include/urcu/arch/x86.h b/include/urcu/arch/x86.h index 744f9f9..af4487d 100644 --- a/include/urcu/arch/x86.h +++ b/include/urcu/arch/x86.h @@ -46,44 +46,8 @@ extern "C" { /* For backwards compat */ #define CONFIG_RCU_HAVE_FENCE 1 -#define cmm_mb() __asm__ __volatile__ ("mfence":::"memory") - -/* - * Define cmm_rmb/cmm_wmb to "strict" barriers that may be needed when - * using SSE or working with I/O areas. cmm_smp_rmb/cmm_smp_wmb are - * only compiler barriers, which is enough for general use. - */ -#define cmm_rmb() __asm__ __volatile__ ("lfence":::"memory") -#define cmm_wmb() __asm__ __volatile__ ("sfence"::: "memory") -#define cmm_smp_rmb() cmm_barrier() -#define cmm_smp_wmb() cmm_barrier() - -#else - -/* - * We leave smp_rmb/smp_wmb as full barriers for processors that do not have - * fence instructions. - * - * An empty cmm_smp_rmb() may not be enough on old PentiumPro multiprocessor - * systems, due to an erratum. The Linux kernel says that "Even distro - * kernels should think twice before enabling this", but for now let's - * be conservative and leave the full barrier on 32-bit processors. Also, - * IDT WinChip supports weak store ordering, and the kernel may enable it - * under our feet; cmm_smp_wmb() ceases to be a nop for these processors. - */ -#if (CAA_BITS_PER_LONG == 32) -#define cmm_mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)":::"memory") -#define cmm_rmb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)":::"memory") -#define cmm_wmb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)":::"memory") -#else -#define cmm_mb() __asm__ __volatile__ ("lock; addl $0,0(%%rsp)":::"memory") -#define cmm_rmb() __asm__ __volatile__ ("lock; addl $0,0(%%rsp)":::"memory") -#define cmm_wmb() __asm__ __volatile__ ("lock; addl $0,0(%%rsp)":::"memory") -#endif #endif -#define caa_cpu_relax() __asm__ __volatile__ ("rep; nop" : : : "memory") - #define HAS_CAA_GET_CYCLES #define rdtscll(val) \ @@ -98,10 +62,10 @@ typedef uint64_t caa_cycles_t; static inline caa_cycles_t caa_get_cycles(void) { - caa_cycles_t ret = 0; + caa_cycles_t ret = 0; - rdtscll(ret); - return ret; + rdtscll(ret); + return ret; } /* -- 2.39.2 _______________________________________________ lttng-dev mailing list lttng-dev@lists.lttng.org https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev