On 2023-03-17 17:37, Ondřej Surý via lttng-dev wrote:
Instead of custom code, use the __atomic builtins to implement the
rcu_dereference(), rcu_cmpxchg_pointer(), rcu_xchg_pointer() and
rcu_assign_pointer().

This also changes the cmm_mb() family of functions, but not everywhere. This should be documented.

I'm also unsure why architecture code has #ifndef cmm_mb when we would expect the generic arch implementation to be conditional (the other way around).


Signed-off-by: Ondřej Surý <ond...@sury.org>
---
  include/urcu/arch.h           | 20 +++++++++
  include/urcu/arch/alpha.h     |  6 +++
  include/urcu/arch/arm.h       | 12 ++++++
  include/urcu/arch/mips.h      |  2 +
  include/urcu/arch/nios2.h     |  2 +
  include/urcu/arch/ppc.h       |  6 +++
  include/urcu/arch/s390.h      |  2 +
  include/urcu/arch/sparc64.h   |  6 +++
  include/urcu/arch/x86.h       | 20 +++++++++
  include/urcu/static/pointer.h | 77 +++++++----------------------------
  10 files changed, 90 insertions(+), 63 deletions(-)

diff --git a/include/urcu/arch.h b/include/urcu/arch.h
index d3914da..aec6fa1 100644
--- a/include/urcu/arch.h
+++ b/include/urcu/arch.h
@@ -21,6 +21,26 @@
  #ifndef _URCU_ARCH_H
  #define _URCU_ARCH_H
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif /* if !defined(__has_feature) */
+
+/* GCC defines __SANITIZE_ADDRESS__, so reuse the macro for clang */
+#if __has_feature(address_sanitizer)
+#define __SANITIZE_ADDRESS__ 1
+#endif /* if __has_feature(address_sanitizer) */
+
+#ifdef __SANITIZE_THREAD__
+/* FIXME: Somebody who understands the barriers should look into this */
+#define cmm_mb() __atomic_thread_fence(__ATOMIC_ACQ_REL)

This really needs to be __ATOMIC_SEQ_CST.

+#define cmm_rmb() __atomic_thread_fence(__ATOMIC_ACQUIRE)
+#define cmm_wmb() __atomic_thread_fence(__ATOMIC_RELEASE)

I am really unsure that rmb/wmb semantics map to acq/rel. Paul, can you confirm ?

+#define cmm_smp_mb() __atomic_thread_fence(__ATOMIC_ACQ_REL)

SEQ_CST.

+#define cmm_smp_rmb() __atomic_thread_fence(__ATOMIC_ACQUIRE)
+#define cmm_smp_wmb() __atomic_thread_fence(__ATOMIC_RELEASE)

Unsure (see above).

+#define cmm_read_barrier_depends() __atomic_thread_fence(__ATOMIC_ACQ_REL)

This would map to __ATOMIC_CONSUME, but AFAIK the current implementation of this semantic is done with __ATOMIC_ACQUIRE which is stronger than what is really needed here. So we can expect a slowdown on some architectures if we go that way.

Should we favor code simplicity and long-term maintainability at the expense of performance in the short-term ? Or should we keep arch-specific implementations until the toolchains end up implementing a proper consume semantic ?

+#endif
+
  /*
   * Architecture detection using compiler defines.
   *
diff --git a/include/urcu/arch/alpha.h b/include/urcu/arch/alpha.h
index dc33e28..84526ef 100644
--- a/include/urcu/arch/alpha.h
+++ b/include/urcu/arch/alpha.h
@@ -29,9 +29,15 @@
  extern "C" {
  #endif
+#ifndef cmm_mb
  #define cmm_mb()                      __asm__ __volatile__ ("mb":::"memory")
+#endif
+#ifndef cmm_wmb
  #define cmm_wmb()                     __asm__ __volatile__ ("wmb":::"memory")
+#endif
+#ifndef cmm_read_barrier_depends
  #define cmm_read_barrier_depends()    __asm__ __volatile__ ("mb":::"memory")
+#endif
[...]
diff --git a/include/urcu/static/pointer.h b/include/urcu/static/pointer.h
index 9e46a57..3f116f3 100644
--- a/include/urcu/static/pointer.h
+++ b/include/urcu/static/pointer.h
@@ -38,6 +38,8 @@
  extern "C" {
  #endif
+#define _rcu_get_pointer(addr) __atomic_load_n(addr, __ATOMIC_CONSUME)
+
  /**
   * _rcu_dereference - reads (copy) a RCU-protected pointer to a local variable
   * into a RCU read-side critical section. The pointer can later be safely
@@ -49,14 +51,6 @@ extern "C" {
   * Inserts memory barriers on architectures that require them (currently only
   * Alpha) and documents which pointers are protected by RCU.
   *
- * With C standards prior to C11/C++11, the compiler memory barrier in
- * CMM_LOAD_SHARED() ensures that value-speculative optimizations (e.g.
- * VSS: Value Speculation Scheduling) does not perform the data read
- * before the pointer read by speculating the value of the pointer.
- * Correct ordering is ensured because the pointer is read as a volatile
- * access. This acts as a global side-effect operation, which forbids
- * reordering of dependent memory operations.

We should document that we end up relying on CONSUME for rcu_dereference in the patch commit message.

- *
   * With C standards C11/C++11, concerns about dependency-breaking
   * optimizations are taken care of by the "memory_order_consume" atomic
   * load.
@@ -65,10 +59,6 @@ extern "C" {
   * explicit because the pointer used as input argument is a pointer,
   * not an _Atomic type as required by C11/C++11.
   *
- * By defining URCU_DEREFERENCE_USE_VOLATILE, the user requires use of
- * volatile access to implement rcu_dereference rather than
- * memory_order_consume load from the C11/C++11 standards.
- *
   * This may improve performance on weakly-ordered architectures where
   * the compiler implements memory_order_consume as a
   * memory_order_acquire, which is stricter than required by the
@@ -83,35 +73,7 @@ extern "C" {
   * meets the 10-line criterion in LGPL, allowing this function to be
   * expanded directly in non-LGPL code.
   */
-
-#if !defined (URCU_DEREFERENCE_USE_VOLATILE) &&                \
-       ((defined (__cplusplus) && __cplusplus >= 201103L) ||        \
-       (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L))
-# define __URCU_DEREFERENCE_USE_ATOMIC_CONSUME
-#endif
-
-/*
- * If p is const (the pointer itself, not what it points to), using
- * __typeof__(p) would declare a const variable, leading to
- * -Wincompatible-pointer-types errors.  Using the statement expression
- * makes it an rvalue and gets rid of the const-ness.
- */
-#ifdef __URCU_DEREFERENCE_USE_ATOMIC_CONSUME
-# define _rcu_dereference(p) __extension__ ({                                  
        \
-                               __typeof__(__extension__ ({                     
        \
-                                       __typeof__(p) __attribute__((unused)) 
_________p0 = { 0 }; \
-                                       _________p0;                            
        \
-                               })) _________p1;                                
        \
-                               __atomic_load(&(p), &_________p1, 
__ATOMIC_CONSUME);    \
-                               (_________p1);                                  
        \
-                       })
-#else
-# define _rcu_dereference(p) __extension__ ({                                  
        \
-                               __typeof__(p) _________p1 = CMM_LOAD_SHARED(p); 
        \
-                               cmm_smp_read_barrier_depends();                 
        \
-                               (_________p1);                                  
        \
-                       })
-#endif
+#define _rcu_dereference(p) _rcu_get_pointer(&(p))
/**
   * _rcu_cmpxchg_pointer - same as rcu_assign_pointer, but tests if the pointer
@@ -126,12 +88,12 @@ extern "C" {
   * meets the 10-line criterion in LGPL, allowing this function to be
   * expanded directly in non-LGPL code.
   */
-#define _rcu_cmpxchg_pointer(p, old, _new)                             \
-       __extension__                                                   \
-       ({                                                              \
-               __typeof__(*p) _________pold = (old);                   \
-               __typeof__(*p) _________pnew = (_new);                  \
-               uatomic_cmpxchg(p, _________pold, _________pnew);       \
+#define _rcu_cmpxchg_pointer(p, old, _new)                                     
        \
+       ({                                                                      
        \
+               __typeof__(*(p)) __old = old;                                   
        \
+               __atomic_compare_exchange_n(p, &__old, _new, 0,                 
            \
+                                           __ATOMIC_ACQ_REL, 
__ATOMIC_CONSUME);        \

__ATOMIC_SEQ_CST on both success and failure.

+               __old;                                                          
        \
        })
/**
@@ -145,22 +107,11 @@ extern "C" {
   * meets the 10-line criterion in LGPL, allowing this function to be
   * expanded directly in non-LGPL code.
   */
-#define _rcu_xchg_pointer(p, v)                                \
-       __extension__                                   \
-       ({                                              \
-               __typeof__(*p) _________pv = (v);       \
-               uatomic_xchg(p, _________pv);           \
-       })
-
+#define _rcu_xchg_pointer(p, v) \
+       __atomic_exchange_n(p, v, __ATOMIC_ACQ_REL)

__ATOMIC_SEQ_CST.

-#define _rcu_set_pointer(p, v) \
-       do {                                            \
-               __typeof__(*p) _________pv = (v);       \
-               if (!__builtin_constant_p(v) ||         \
-                   ((v) != NULL))                      \
-                       cmm_wmb();                              \
-               uatomic_set(p, _________pv);            \
-       } while (0)
+#define _rcu_set_pointer(p, v) \
+       __atomic_store_n(p, v, __ATOMIC_RELEASE)

OK.

Thanks,

Mathieu

/**
   * _rcu_assign_pointer - assign (publicize) a pointer to a new data structure
@@ -178,7 +129,7 @@ extern "C" {
   * meets the 10-line criterion in LGPL, allowing this function to be
   * expanded directly in non-LGPL code.
   */
-#define _rcu_assign_pointer(p, v)      _rcu_set_pointer(&(p), v)
+#define _rcu_assign_pointer(p, v) rcu_set_pointer(&(p), v)
#ifdef __cplusplus
  }

--
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com

_______________________________________________
lttng-dev mailing list
lttng-dev@lists.lttng.org
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

Reply via email to