<snip> > > > Subject: [EXT] [PATCH v3 1/3] eal/arm64: add 128-bit atomic compare > > > exchange > > > > > > Add 128-bit atomic compare exchange on aarch64. > > > > > > Signed-off-by: Phil Yang <phil.y...@arm.com> > > > Tested-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com> > > > Reviewed-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com> > > > --- > > > v3: > > > 1. Avoid duplication code with macro. (Jerin Jocob) 2. Make invalid > > > memory order to strongest barrier. (Jerin Jocob) 3. Update > > > doc/guides/prog_guide/env_abstraction_layer.rst. (Eads Gage) 4. Fix > > > 32-bit x86 builds issue. (Eads Gage) 5. Correct documentation issues > > > in UT. (Eads Gage) > > > > > > .../common/include/arch/arm/rte_atomic_64.h | 165 > > > +++++++++++++++++++++ > > > .../common/include/arch/x86/rte_atomic_64.h | 12 -- > > > lib/librte_eal/common/include/generic/rte_atomic.h | 17 ++- > > > 3 files changed, 181 insertions(+), 13 deletions(-) > > > > > > diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h > > > b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h > > > index 97060e4..2080c4d 100644 > > > --- a/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h > > > +++ b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h > > > @@ -1,5 +1,6 @@ > > > /* SPDX-License-Identifier: BSD-3-Clause > > > * Copyright(c) 2015 Cavium, Inc > > > + * Copyright(c) 2019 Arm Limited > > > */ > > > > > > #ifndef _RTE_ATOMIC_ARM64_H_ > > > @@ -14,6 +15,9 @@ extern "C" { > > > #endif > > > > > > #include "generic/rte_atomic.h" > > > +#include <rte_branch_prediction.h> > > > +#include <rte_compat.h> > > > +#include <rte_debug.h> > > > > > > #define dsb(opt) asm volatile("dsb " #opt : : : "memory") #define > > > dmb(opt) asm volatile("dmb " #opt : : : "memory") @@ -40,6 +44,167 > > > @@ extern "C" { > > > > > > #define rte_cio_rmb() dmb(oshld) > > > > > > +/*------------------------ 128 bit atomic operations > > > +-------------------------*/ > > > + > > > +#define RTE_HAS_ACQ(mo) ((mo) != __ATOMIC_RELAXED && (mo) != > > > +__ATOMIC_RELEASE) #define RTE_HAS_RLS(mo) ((mo) == > > __ATOMIC_RELEASE > > > || \ > > > + (mo) == __ATOMIC_ACQ_REL || \ > > > + (mo) == __ATOMIC_SEQ_CST) > > > + > > > +#define RTE_MO_LOAD(mo) (RTE_HAS_ACQ((mo)) \ > > > + ? __ATOMIC_ACQUIRE : __ATOMIC_RELAXED) #define > > > RTE_MO_STORE(mo) > > > +(RTE_HAS_RLS((mo)) \ > > > + ? __ATOMIC_RELEASE : __ATOMIC_RELAXED) > > > + > > > +#ifdef __ARM_FEATURE_ATOMICS > > > +#define __ATOMIC128_CAS_OP(cas_op_name, op_string) > \ > > > +static inline rte_int128_t > > > \ > > > +cas_op_name(rte_int128_t *dst, rte_int128_t old, > > > \ > > > + rte_int128_t updated) > > > \ > > > +{ > > > \ > > > + /* caspX instructions register pair must start from even-numbered > > > + * register at operand 1. > > > + * So, specify registers for local variables here. > > > + */ > > > \ > > > + register uint64_t x0 __asm("x0") = (uint64_t)old.val[0]; > > > \ > > > > I understand CASP limitation on register has to be even and odd. > > Is there anyway to remove explicit x0 register allocation and choose > > compiler to decide the register. Some reason with optimize(03) gcc > > makes correctly but not clang. > > > > Hardcoding to specific register makes compiler to not optimize the > > stuff, especially if it is inline function. > > It look like the limitation fixed recently in gcc. > https://patches.linaro.org/patch/147991/ > > Not sure about old gcc and clang. ARM compiler experts may know the exact > status > We could use syntax as follows, an example is in [1] static inline rte_int128_t __rte_casp(rte_int128_t *dst, rte_int128_t old, rte_int128_t updated, int mo) { __asm__ volatile("caspl %0, %H0, %1, %H1, [%2]" : "+r" (old) : "r" (updated), "r" (dst) : "memory"); return old; }
[1] https://godbolt.org/z/EUJnuG