> Subject: [PATCH v6 01/39] eal: use C11 alignas > > The current location used for __rte_aligned(a) for alignment of types > and variables is not compatible with MSVC. There is only a single > location accepted by both toolchains. > > For variables standard C11 offers alignas(a) supported by conformant > compilers i.e. both MSVC and GCC. > > For types the standard offers no alignment facility that compatibly > interoperates with C and C++ but may be achieved by relocating the > placement of __rte_aligned(a) to the aforementioned location accepted > by all currently supported toolchains. > > To allow alignment for both compilers do the following: > > * Expand __rte_aligned(a) to __declspec(align(a)) when building > with MSVC. > > * Move __rte_aligned from the end of {struct,union} definitions to > be between {struct,union} and tag. > > The placement between {struct,union} and the tag allows the desired > alignment to be imparted on the type regardless of the toolchain being > used for all of GCC, LLVM, MSVC compilers building both C and C++. > > * Replace use of __rte_aligned(a) on variables/fields with alignas(a). > > Signed-off-by: Tyler Retzlaff <roret...@linux.microsoft.com> > Acked-by: Morten Brørup <m...@smartsharesystems.com> > Acked-by: Bruce Richardson <bruce.richard...@intel.com> > --- > lib/eal/arm/include/rte_vect.h | 4 ++-- > lib/eal/common/malloc_elem.h | 4 ++-- > lib/eal/common/malloc_heap.h | 4 ++-- > lib/eal/common/rte_keepalive.c | 3 ++- > lib/eal/common/rte_random.c | 4 ++-- > lib/eal/common/rte_service.c | 8 ++++---- > lib/eal/include/generic/rte_atomic.h | 4 ++-- > lib/eal/include/rte_common.h | 23 +++++++++++++++-------- > lib/eal/loongarch/include/rte_vect.h | 8 ++++---- > lib/eal/ppc/include/rte_vect.h | 4 ++-- > lib/eal/riscv/include/rte_vect.h | 4 ++-- > lib/eal/x86/include/rte_vect.h | 4 ++-- > lib/eal/x86/rte_power_intrinsics.c | 10 ++++++---- > 13 files changed, 47 insertions(+), 37 deletions(-) > > diff --git a/lib/eal/arm/include/rte_vect.h b/lib/eal/arm/include/rte_vect.h > index 8cfe4bd..c97d299 100644 > --- a/lib/eal/arm/include/rte_vect.h > +++ b/lib/eal/arm/include/rte_vect.h > @@ -24,14 +24,14 @@ > #define XMM_SIZE (sizeof(xmm_t)) > #define XMM_MASK (XMM_SIZE - 1) > > -typedef union rte_xmm { > +typedef union __rte_aligned(16) rte_xmm { > xmm_t x; > uint8_t u8[XMM_SIZE / sizeof(uint8_t)]; > uint16_t u16[XMM_SIZE / sizeof(uint16_t)]; > uint32_t u32[XMM_SIZE / sizeof(uint32_t)]; > uint64_t u64[XMM_SIZE / sizeof(uint64_t)]; > double pd[XMM_SIZE / sizeof(double)]; > -} __rte_aligned(16) rte_xmm_t; > +} rte_xmm_t; > > #if defined(RTE_ARCH_ARM) && defined(RTE_ARCH_32) > /* NEON intrinsic vqtbl1q_u8() is not supported in ARMv7-A(AArch32) */ > diff --git a/lib/eal/common/malloc_elem.h b/lib/eal/common/malloc_elem.h > index 952ce73..c7ff671 100644 > --- a/lib/eal/common/malloc_elem.h > +++ b/lib/eal/common/malloc_elem.h > @@ -20,7 +20,7 @@ enum elem_state { > ELEM_PAD /* element is a padding-only header */ > }; > > -struct malloc_elem { > +struct __rte_cache_aligned malloc_elem { > struct malloc_heap *heap; > struct malloc_elem *volatile prev; > /**< points to prev elem in memseg */ > @@ -48,7 +48,7 @@ struct malloc_elem { > size_t user_size; > uint64_t asan_cookie[2]; /* must be next to header_cookie */ > #endif > -} __rte_cache_aligned; > +}; > > static const unsigned int MALLOC_ELEM_HEADER_LEN = sizeof(struct > malloc_elem); > > diff --git a/lib/eal/common/malloc_heap.h b/lib/eal/common/malloc_heap.h > index 8f3ab57..0c49588 100644 > --- a/lib/eal/common/malloc_heap.h > +++ b/lib/eal/common/malloc_heap.h > @@ -21,7 +21,7 @@ > /** > * Structure to hold malloc heap > */ > -struct malloc_heap { > +struct __rte_cache_aligned malloc_heap { > rte_spinlock_t lock; > LIST_HEAD(, malloc_elem) free_head[RTE_HEAP_NUM_FREELISTS]; > struct malloc_elem *volatile first; > @@ -31,7 +31,7 @@ struct malloc_heap { > unsigned int socket_id; > size_t total_size; > char name[RTE_HEAP_NAME_MAX_LEN]; > -} __rte_cache_aligned; > +}; > > void * > malloc_heap_alloc(const char *type, size_t size, int socket, unsigned int > flags, > diff --git a/lib/eal/common/rte_keepalive.c b/lib/eal/common/rte_keepalive.c > index f6db973..391c1be 100644 > --- a/lib/eal/common/rte_keepalive.c > +++ b/lib/eal/common/rte_keepalive.c > @@ -2,6 +2,7 @@ > * Copyright(c) 2015-2016 Intel Corporation > */ > > +#include <stdalign.h> > #include <inttypes.h> > > #include <rte_common.h> > @@ -19,7 +20,7 @@ struct rte_keepalive { > /* > * Each element must be cache aligned to prevent false sharing. > */ > - enum rte_keepalive_state core_state __rte_cache_aligned; > + alignas(RTE_CACHE_LINE_SIZE) enum rte_keepalive_state > core_state; > } live_data[RTE_KEEPALIVE_MAXCORES]; > > /** Last-seen-alive timestamps */ > diff --git a/lib/eal/common/rte_random.c b/lib/eal/common/rte_random.c > index 7709b8f..90e91b3 100644 > --- a/lib/eal/common/rte_random.c > +++ b/lib/eal/common/rte_random.c > @@ -13,14 +13,14 @@ > #include <rte_lcore.h> > #include <rte_random.h> > > -struct rte_rand_state { > +struct __rte_cache_aligned rte_rand_state { > uint64_t z1; > uint64_t z2; > uint64_t z3; > uint64_t z4; > uint64_t z5; > RTE_CACHE_GUARD; > -} __rte_cache_aligned; > +}; > > /* One instance each for every lcore id-equipped thread, and one > * additional instance to be shared by all others threads (i.e., all > diff --git a/lib/eal/common/rte_service.c b/lib/eal/common/rte_service.c > index d959c91..5637993 100644 > --- a/lib/eal/common/rte_service.c > +++ b/lib/eal/common/rte_service.c > @@ -32,7 +32,7 @@ > #define RUNSTATE_RUNNING 1 > > /* internal representation of a service */ > -struct rte_service_spec_impl { > +struct __rte_cache_aligned rte_service_spec_impl { > /* public part of the struct */ > struct rte_service_spec spec; > > @@ -53,7 +53,7 @@ struct rte_service_spec_impl { > * on currently. > */ > RTE_ATOMIC(uint32_t) num_mapped_cores; > -} __rte_cache_aligned; > +}; > > struct service_stats { > RTE_ATOMIC(uint64_t) calls; > @@ -61,7 +61,7 @@ struct service_stats { > }; > > /* the internal values of a service core */ > -struct core_state { > +struct __rte_cache_aligned core_state { > /* map of services IDs are run on this core */ > uint64_t service_mask; > RTE_ATOMIC(uint8_t) runstate; /* running or stopped */ > @@ -71,7 +71,7 @@ struct core_state { > RTE_ATOMIC(uint64_t) loops; > RTE_ATOMIC(uint64_t) cycles; > struct service_stats service_stats[RTE_SERVICE_NUM_MAX]; > -} __rte_cache_aligned; > +}; > > static uint32_t rte_service_count; > static struct rte_service_spec_impl *rte_services; > diff --git a/lib/eal/include/generic/rte_atomic.h > b/lib/eal/include/generic/rte_atomic.h > index 0e639da..f859707 100644 > --- a/lib/eal/include/generic/rte_atomic.h > +++ b/lib/eal/include/generic/rte_atomic.h > @@ -1094,7 +1094,7 @@ static inline void rte_atomic64_clear(rte_atomic64_t *v) > /** > * 128-bit integer structure. > */ > -typedef struct { > +typedef struct __rte_aligned(16) { > union { > uint64_t val[2]; > #ifdef RTE_ARCH_64 > @@ -1103,7 +1103,7 @@ static inline void rte_atomic64_clear(rte_atomic64_t *v) > #endif > #endif > }; > -} __rte_aligned(16) rte_int128_t; > +} rte_int128_t; > > #ifdef __DOXYGEN__ > > diff --git a/lib/eal/include/rte_common.h b/lib/eal/include/rte_common.h > index 1cc1222..0908aa0 100644 > --- a/lib/eal/include/rte_common.h > +++ b/lib/eal/include/rte_common.h > @@ -12,6 +12,8 @@ > * for DPDK. > */ > > +#include <stdalign.h> > + > #ifdef __cplusplus > extern "C" { > #endif > @@ -63,10 +65,19 @@ > #endif > > /** > - * Force alignment > + * Force type alignment > + * > + * This macro should be used when alignment of a struct or union type > + * is required. For toolchain compatibility it should appear between > + * the {struct,union} keyword and tag. e.g. > + * > + * struct __rte_aligned(8) tag { ... }; > + * > + * If alignment of an object/variable is required then this macro should > + * not be used, instead prefer C11 alignas(a). > */ > #ifdef RTE_TOOLCHAIN_MSVC > -#define __rte_aligned(a) > +#define __rte_aligned(a) __declspec(align(a)) > #else > #define __rte_aligned(a) __attribute__((__aligned__(a))) > #endif > @@ -538,18 +549,14 @@ static void __attribute__((destructor(RTE_PRIO(prio)), > used)) func(void) > #define RTE_CACHE_LINE_MIN_SIZE 64 > > /** Force alignment to cache line. */ > -#ifdef RTE_TOOLCHAIN_MSVC > -#define __rte_cache_aligned > -#else > #define __rte_cache_aligned __rte_aligned(RTE_CACHE_LINE_SIZE) > -#endif > > /** Force minimum cache line alignment. */ > #define __rte_cache_min_aligned __rte_aligned(RTE_CACHE_LINE_MIN_SIZE) > > #define _RTE_CACHE_GUARD_HELPER2(unique) \ > - char cache_guard_ ## unique[RTE_CACHE_LINE_SIZE * > RTE_CACHE_GUARD_LINES] \ > - __rte_cache_aligned > + alignas(RTE_CACHE_LINE_SIZE) \ > + char cache_guard_ ## unique[RTE_CACHE_LINE_SIZE * RTE_CACHE_GUARD_LINES] > #define _RTE_CACHE_GUARD_HELPER1(unique) _RTE_CACHE_GUARD_HELPER2(unique) > /** > * Empty cache lines, to guard against false sharing-like effects > diff --git a/lib/eal/loongarch/include/rte_vect.h > b/lib/eal/loongarch/include/rte_vect.h > index 1546515..aa334e8 100644 > --- a/lib/eal/loongarch/include/rte_vect.h > +++ b/lib/eal/loongarch/include/rte_vect.h > @@ -15,7 +15,7 @@ > > #define RTE_VECT_DEFAULT_SIMD_BITWIDTH RTE_VECT_SIMD_DISABLED > > -typedef union xmm { > +typedef union __rte_aligned(16) xmm { > int8_t i8[16]; > int16_t i16[8]; > int32_t i32[4]; > @@ -25,19 +25,19 @@ > uint32_t u32[4]; > uint64_t u64[2]; > double pd[2]; > -} __rte_aligned(16) xmm_t; > +} xmm_t; > > #define XMM_SIZE (sizeof(xmm_t)) > #define XMM_MASK (XMM_SIZE - 1) > > -typedef union rte_xmm { > +typedef union __rte_aligned(16) rte_xmm { > xmm_t x; > uint8_t u8[XMM_SIZE / sizeof(uint8_t)]; > uint16_t u16[XMM_SIZE / sizeof(uint16_t)]; > uint32_t u32[XMM_SIZE / sizeof(uint32_t)]; > uint64_t u64[XMM_SIZE / sizeof(uint64_t)]; > double pd[XMM_SIZE / sizeof(double)]; > -} __rte_aligned(16) rte_xmm_t; > +} rte_xmm_t; > > static inline xmm_t > vect_load_128(void *p) > diff --git a/lib/eal/ppc/include/rte_vect.h b/lib/eal/ppc/include/rte_vect.h > index a5f009b..c8bace2 100644 > --- a/lib/eal/ppc/include/rte_vect.h > +++ b/lib/eal/ppc/include/rte_vect.h > @@ -22,14 +22,14 @@ > #define XMM_SIZE (sizeof(xmm_t)) > #define XMM_MASK (XMM_SIZE - 1) > > -typedef union rte_xmm { > +typedef union __rte_aligned(16) rte_xmm { > xmm_t x; > uint8_t u8[XMM_SIZE / sizeof(uint8_t)]; > uint16_t u16[XMM_SIZE / sizeof(uint16_t)]; > uint32_t u32[XMM_SIZE / sizeof(uint32_t)]; > uint64_t u64[XMM_SIZE / sizeof(uint64_t)]; > double pd[XMM_SIZE / sizeof(double)]; > -} __rte_aligned(16) rte_xmm_t; > +} rte_xmm_t; > > #ifdef __cplusplus > } > diff --git a/lib/eal/riscv/include/rte_vect.h > b/lib/eal/riscv/include/rte_vect.h > index da9092a..6df10fa 100644 > --- a/lib/eal/riscv/include/rte_vect.h > +++ b/lib/eal/riscv/include/rte_vect.h > @@ -22,14 +22,14 @@ > #define XMM_SIZE (sizeof(xmm_t)) > #define XMM_MASK (XMM_SIZE - 1) > > -typedef union rte_xmm { > +typedef union __rte_aligned(16) rte_xmm { > xmm_t x; > uint8_t u8[XMM_SIZE / sizeof(uint8_t)]; > uint16_t u16[XMM_SIZE / sizeof(uint16_t)]; > uint32_t u32[XMM_SIZE / sizeof(uint32_t)]; > uint64_t u64[XMM_SIZE / sizeof(uint64_t)]; > double pd[XMM_SIZE / sizeof(double)]; > -} __rte_aligned(16) rte_xmm_t; > +} rte_xmm_t; > > static inline xmm_t > vect_load_128(void *p) > diff --git a/lib/eal/x86/include/rte_vect.h b/lib/eal/x86/include/rte_vect.h > index 560f9e4..a1a537e 100644 > --- a/lib/eal/x86/include/rte_vect.h > +++ b/lib/eal/x86/include/rte_vect.h > @@ -91,7 +91,7 @@ > #define RTE_X86_ZMM_SIZE (sizeof(__m512i)) > #define RTE_X86_ZMM_MASK (RTE_X86_ZMM_SIZE - 1) > > -typedef union __rte_x86_zmm { > +typedef union __rte_aligned(RTE_X86_ZMM_SIZE) __rte_x86_zmm { > __m512i z; > ymm_t y[RTE_X86_ZMM_SIZE / sizeof(ymm_t)]; > xmm_t x[RTE_X86_ZMM_SIZE / sizeof(xmm_t)]; > @@ -100,7 +100,7 @@ > uint32_t u32[RTE_X86_ZMM_SIZE / sizeof(uint32_t)]; > uint64_t u64[RTE_X86_ZMM_SIZE / sizeof(uint64_t)]; > double pd[RTE_X86_ZMM_SIZE / sizeof(double)]; > -} __rte_aligned(RTE_X86_ZMM_SIZE) __rte_x86_zmm_t; > +} __rte_x86_zmm_t; > > #endif /* __AVX512F__ */ > > diff --git a/lib/eal/x86/rte_power_intrinsics.c > b/lib/eal/x86/rte_power_intrinsics.c > index 532a2e6..6d9b642 100644 > --- a/lib/eal/x86/rte_power_intrinsics.c > +++ b/lib/eal/x86/rte_power_intrinsics.c > @@ -2,6 +2,8 @@ > * Copyright(c) 2020 Intel Corporation > */ > > +#include <stdalign.h> > + > #include <rte_common.h> > #include <rte_lcore.h> > #include <rte_rtm.h> > @@ -12,10 +14,10 @@ > /* > * Per-lcore structure holding current status of C0.2 sleeps. > */ > -static struct power_wait_status { > +static alignas(RTE_CACHE_LINE_SIZE) struct power_wait_status { > rte_spinlock_t lock; > volatile void *monitor_addr; /**< NULL if not currently sleeping */ > -} __rte_cache_aligned wait_status[RTE_MAX_LCORE]; > +} wait_status[RTE_MAX_LCORE]; > > /* > * This function uses UMONITOR/UMWAIT instructions and will enter C0.2 state. > @@ -85,10 +87,10 @@ static void amd_mwaitx(const uint64_t timeout) > #endif > } > > -static struct { > +static alignas(RTE_CACHE_LINE_SIZE) struct { > void (*mmonitor)(volatile void *addr); > void (*mwait)(const uint64_t timeout); > -} __rte_cache_aligned power_monitor_ops; > +} power_monitor_ops; > > static inline void > __umwait_wakeup(volatile void *addr) > --
Acked-by: Konstantin Ananyev <konstantin.anan...@huawei.com> > 1.8.3.1