On 2023-03-17 17:37, Ondřej Surý via lttng-dev wrote:
Replace the custom assembly code in include/urcu/uatomic/ with __atomic builtins provided by C11-compatible compiler.
[...]
+#define UATOMIC_HAS_ATOMIC_BYTE +#define UATOMIC_HAS_ATOMIC_SHORT + +#define uatomic_set(addr, v) __atomic_store_n(addr, v, __ATOMIC_RELEASE) + +#define uatomic_read(addr) __atomic_load_n((addr), __ATOMIC_CONSUME) + +#define uatomic_xchg(addr, v) __atomic_exchange_n((addr), (v), __ATOMIC_ACQ_REL) + +#define uatomic_cmpxchg(addr, old, new) \ + ({ \ + __typeof__(*(addr)) __old = old; \ + __atomic_compare_exchange_n(addr, &__old, new, 0, \ + __ATOMIC_ACQ_REL, __ATOMIC_CONSUME);\
In doc/uatomic-api.md, we document: "```c type uatomic_cmpxchg(type *addr, type old, type new); ``` An atomic read-modify-write operation that performs this sequence of operations atomically: check if `addr` contains `old`. If true, then replace the content of `addr` by `new`. Return the value previously contained by `addr`. This function implies a full memory barrier before and after the atomic operation." This would map to a "__ATOMIC_ACQ_REL" semantic on cmpxchg failure rather than __ATOMIC_CONSUME".
+ __old; \ + }) + +#define uatomic_add_return(addr, v) \ + __atomic_add_fetch((addr), (v), __ATOMIC_ACQ_REL) + +#define uatomic_add(addr, v) \ + (void)__atomic_add_fetch((addr), (v), __ATOMIC_RELAXED) + +#define uatomic_sub_return(addr, v) \ + __atomic_sub_fetch((addr), (v), __ATOMIC_ACQ_REL) + +#define uatomic_sub(addr, v) \ + (void)__atomic_sub_fetch((addr), (v), __ATOMIC_RELAXED) + +#define uatomic_and(addr, mask) \ + (void)__atomic_and_fetch((addr), (mask), __ATOMIC_RELAXED) + +#define uatomic_or(addr, mask) \ + (void)__atomic_or_fetch((addr), (mask), __ATOMIC_RELAXED) + +#define uatomic_inc(addr) (void)__atomic_add_fetch((addr), 1, __ATOMIC_RELAXED) +#define uatomic_dec(addr) (void)__atomic_sub_fetch((addr), 1, __ATOMIC_RELAXED) + +#define cmm_smp_mb__before_uatomic_and() __atomic_thread_fence(__ATOMIC_ACQ_REL) +#define cmm_smp_mb__after_uatomic_and() __atomic_thread_fence(__ATOMIC_ACQ_REL) +#define cmm_smp_mb__before_uatomic_or() __atomic_thread_fence(__ATOMIC_ACQ_REL) +#define cmm_smp_mb__after_uatomic_or() __atomic_thread_fence(__ATOMIC_ACQ_REL) +#define cmm_smp_mb__before_uatomic_add() __atomic_thread_fence(__ATOMIC_ACQ_REL) +#define cmm_smp_mb__after_uatomic_add() __atomic_thread_fence(__ATOMIC_ACQ_REL) +#define cmm_smp_mb__before_uatomic_sub() cmm_smp_mb__before_uatomic_add() +#define cmm_smp_mb__after_uatomic_sub() cmm_smp_mb__after_uatomic_add() +#define cmm_smp_mb__before_uatomic_inc() cmm_smp_mb__before_uatomic_add() +#define cmm_smp_mb__after_uatomic_inc() cmm_smp_mb__after_uatomic_add() +#define cmm_smp_mb__before_uatomic_dec() cmm_smp_mb__before_uatomic_add() +#define cmm_smp_mb__after_uatomic_dec() cmm_smp_mb__after_uatomic_add() + +#define cmm_smp_mb() cmm_mb()
While OK for the general case, I would recommend that we immediately implement something more efficient on x86 32/64 which takes into account that __ATOMIC_ACQ_REL atomic operations are implemented with LOCK prefixed atomic ops, which imply the barrier already, leaving the before/after_uatomic_*() as no-ops.
Thanks, Mathieu -- Mathieu Desnoyers EfficiOS Inc. https://www.efficios.com _______________________________________________ lttng-dev mailing list lttng-dev@lists.lttng.org https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev