Re: [PATCH] eal: use C11 alignas instead of GCC attribute aligned

Mattias Rönnblom Thu, 16 Nov 2023 02:13:10 -0800

On 2023-11-15 18:39, Tyler Retzlaff wrote:

Now that we have enabled C11 replace the use of __rte_cache_aligned
and __rte_aligned(n) with alignas(RTE_CACHE_LINE_SIZE) and
__rte_aligned(n) respectively.


Signed-off-by: Tyler Retzlaff <roret...@linux.microsoft.com>
---
  lib/eal/arm/include/rte_vect.h       | 4 +++-
  lib/eal/common/malloc_elem.h         | 4 +++-
  lib/eal/common/malloc_heap.h         | 4 +++-
  lib/eal/common/rte_keepalive.c       | 4 +++-
  lib/eal/common/rte_random.c          | 5 ++++-
  lib/eal/common/rte_service.c         | 7 +++++--
  lib/eal/include/generic/rte_atomic.h | 4 +++-
  lib/eal/loongarch/include/rte_vect.h | 7 +++++--
  lib/eal/ppc/include/rte_vect.h       | 5 ++++-
  lib/eal/riscv/include/rte_vect.h     | 4 +++-
  lib/eal/x86/include/rte_vect.h       | 4 +++-
  lib/eal/x86/rte_power_intrinsics.c   | 8 ++++++--
  12 files changed, 45 insertions(+), 15 deletions(-)

diff --git a/lib/eal/arm/include/rte_vect.h b/lib/eal/arm/include/rte_vect.h
index 8cfe4bd..c7a3b2e 100644
--- a/lib/eal/arm/include/rte_vect.h
+++ b/lib/eal/arm/include/rte_vect.h
@@ -5,6 +5,7 @@
  #ifndef _RTE_VECT_ARM_H_
  #define _RTE_VECT_ARM_H_

+#include <stdalign.h>

  #include <stdint.h>
  #include "generic/rte_vect.h"
  #include "rte_debug.h"
@@ -25,13 +26,14 @@
  #define       XMM_MASK        (XMM_SIZE - 1)

typedef union rte_xmm {

+       alignas(16)
        xmm_t    x >         uint8_t  u8[XMM_SIZE / sizeof(uint8_t)];
        uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
        uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
        uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
        double   pd[XMM_SIZE / sizeof(double)];
-} __rte_aligned(16) rte_xmm_t;
+} rte_xmm_t;

#if defined(RTE_ARCH_ARM) && defined(RTE_ARCH_32)

  /* NEON intrinsic vqtbl1q_u8() is not supported in ARMv7-A(AArch32) */
diff --git a/lib/eal/common/malloc_elem.h b/lib/eal/common/malloc_elem.h
index 952ce73..c2c336e 100644
--- a/lib/eal/common/malloc_elem.h
+++ b/lib/eal/common/malloc_elem.h
@@ -5,6 +5,7 @@
  #ifndef MALLOC_ELEM_H_
  #define MALLOC_ELEM_H_

+#include <stdalign.h>

  #include <stdbool.h>

#include <rte_common.h>

@@ -21,6 +22,7 @@ enum elem_state {
  };

struct malloc_elem {

+       alignas(RTE_CACHE_LINE_SIZE)
        struct malloc_heap *heap;
        struct malloc_elem *volatile prev;
        /**< points to prev elem in memseg */
@@ -48,7 +50,7 @@ struct malloc_elem {
        size_t user_size;
        uint64_t asan_cookie[2]; /* must be next to header_cookie */
  #endif
-} __rte_cache_aligned;
+};

static const unsigned int MALLOC_ELEM_HEADER_LEN = sizeof(struct malloc_elem);diff --git a/lib/eal/common/malloc_heap.h b/lib/eal/common/malloc_heap.h

index 8f3ab57..a724bfb 100644
--- a/lib/eal/common/malloc_heap.h
+++ b/lib/eal/common/malloc_heap.h
@@ -5,6 +5,7 @@
  #ifndef MALLOC_HEAP_H_
  #define MALLOC_HEAP_H_

+#include <stdalign.h>

  #include <stdbool.h>
  #include <sys/queue.h>

@@ -22,6 +23,7 @@

   * Structure to hold malloc heap
   */
  struct malloc_heap {
+       alignas(RTE_CACHE_LINE_SIZE)
        rte_spinlock_t lock;
        LIST_HEAD(, malloc_elem) free_head[RTE_HEAP_NUM_FREELISTS];
        struct malloc_elem *volatile first;
@@ -31,7 +33,7 @@ struct malloc_heap {
        unsigned int socket_id;
        size_t total_size;
        char name[RTE_HEAP_NAME_MAX_LEN];
-} __rte_cache_aligned;
+};

void *

  malloc_heap_alloc(const char *type, size_t size, int socket, unsigned int 
flags,
diff --git a/lib/eal/common/rte_keepalive.c b/lib/eal/common/rte_keepalive.c
index e0494b2..67a898d 100644
--- a/lib/eal/common/rte_keepalive.c
+++ b/lib/eal/common/rte_keepalive.c
@@ -3,6 +3,7 @@
   */

#include <inttypes.h>

+#include <stdalign.h>

#include <rte_common.h>

  #include <rte_cycles.h>
@@ -17,7 +18,8 @@ struct rte_keepalive {
                /*
                 * Each element must be cache aligned to prevent false sharing.
                 */
-               enum rte_keepalive_state core_state __rte_cache_aligned;
+               alignas(RTE_CACHE_LINE_SIZE)
+               enum rte_keepalive_state core_state;
        } live_data[RTE_KEEPALIVE_MAXCORES];

/** Last-seen-alive timestamps */

diff --git a/lib/eal/common/rte_random.c b/lib/eal/common/rte_random.c
index 7709b8f..c04917e 100644
--- a/lib/eal/common/rte_random.c
+++ b/lib/eal/common/rte_random.c
@@ -2,6 +2,8 @@
   * Copyright(c) 2019 Ericsson AB
   */

+#include <stdalign.h>

+
  #ifdef __RDSEED__
  #include <x86intrin.h>
  #endif
@@ -14,13 +16,14 @@
  #include <rte_random.h>

struct rte_rand_state {

+       alignas(RTE_CACHE_LINE_SIZE)
        uint64_t z1;

Formatting convention question: the alignas(n) and the field shouldn'tbe on the same line?


It could be useful to have a macro, so it would be:

RTE_CACHE_ALIGNAS uint64_t z1;

...which is horter than:

alignas(RTE_CACHE_LINE_SIZE) uint64_t z1;

and by tomorrow, it will feel as natural and obvious as the open-codedversion.


I don't know. Just some thoughts.

        uint64_t z2;
        uint64_t z3;
        uint64_t z4;
        uint64_t z5;
        RTE_CACHE_GUARD;
-} __rte_cache_aligned;
+};

/* One instance each for every lcore id-equipped thread, and one

   * additional instance to be shared by all others threads (i.e., all
diff --git a/lib/eal/common/rte_service.c b/lib/eal/common/rte_service.c
index e183d2e..861ae31 100644
--- a/lib/eal/common/rte_service.c
+++ b/lib/eal/common/rte_service.c
@@ -2,6 +2,7 @@
   * Copyright(c) 2017 Intel Corporation
   */

+#include <stdalign.h>

  #include <stdio.h>
  #include <inttypes.h>
  #include <string.h>
@@ -33,6 +34,7 @@

/* internal representation of a service */

  struct rte_service_spec_impl {
+       alignas(RTE_CACHE_LINE_SIZE)
        /* public part of the struct */
        struct rte_service_spec spec;

@@ -53,7 +55,7 @@ struct rte_service_spec_impl {

         * on currently.
         */
        RTE_ATOMIC(uint32_t) num_mapped_cores;
-} __rte_cache_aligned;
+};

struct service_stats {

        RTE_ATOMIC(uint64_t) calls;
@@ -62,6 +64,7 @@ struct service_stats {

/* the internal values of a service core */

  struct core_state {
+       alignas(RTE_CACHE_LINE_SIZE)
        /* map of services IDs are run on this core */
        uint64_t service_mask;
        RTE_ATOMIC(uint8_t) runstate; /* running or stopped */
@@ -71,7 +74,7 @@ struct core_state {
        RTE_ATOMIC(uint64_t) loops;
        RTE_ATOMIC(uint64_t) cycles;
        struct service_stats service_stats[RTE_SERVICE_NUM_MAX];
-} __rte_cache_aligned;
+};

static uint32_t rte_service_count;

  static struct rte_service_spec_impl *rte_services;
diff --git a/lib/eal/include/generic/rte_atomic.h 
b/lib/eal/include/generic/rte_atomic.h
index 0e639da..bc9213c 100644
--- a/lib/eal/include/generic/rte_atomic.h
+++ b/lib/eal/include/generic/rte_atomic.h
@@ -12,6 +12,7 @@
   * This file defines a generic API for atomic operations.
   */

+#include <stdalign.h>

  #include <stdint.h>

#include <rte_common.h>

@@ -1096,6 +1097,7 @@ static inline void rte_atomic64_clear(rte_atomic64_t *v)
   */
  typedef struct {
        union {
+               alignas(16)
                uint64_t val[2];
  #ifdef RTE_ARCH_64
  #ifndef RTE_TOOLCHAIN_MSVC
@@ -1103,7 +1105,7 @@ static inline void rte_atomic64_clear(rte_atomic64_t *v)
  #endif
  #endif
        };
-} __rte_aligned(16) rte_int128_t;
+} rte_int128_t;

#ifdef __DOXYGEN__diff --git a/lib/eal/loongarch/include/rte_vect.h b/lib/eal/loongarch/include/rte_vect.h

index 1546515..856d87b 100644
--- a/lib/eal/loongarch/include/rte_vect.h
+++ b/lib/eal/loongarch/include/rte_vect.h
@@ -5,6 +5,7 @@
  #ifndef RTE_VECT_LOONGARCH_H
  #define RTE_VECT_LOONGARCH_H

+#include <stdalign.h>

  #include <stdint.h>
  #include "generic/rte_vect.h"
  #include "rte_common.h"
@@ -16,6 +17,7 @@
  #define RTE_VECT_DEFAULT_SIMD_BITWIDTH RTE_VECT_SIMD_DISABLED

typedef union xmm {

+       alignas(16)
        int8_t   i8[16];
        int16_t  i16[8];
        int32_t  i32[4];
@@ -25,19 +27,20 @@
        uint32_t u32[4];
        uint64_t u64[2];
        double   pd[2];
-} __rte_aligned(16) xmm_t;
+} xmm_t;

#define XMM_SIZE (sizeof(xmm_t))

  #define XMM_MASK        (XMM_SIZE - 1)

typedef union rte_xmm {

+       alignas(16)
        xmm_t    x;
        uint8_t  u8[XMM_SIZE / sizeof(uint8_t)];
        uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
        uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
        uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
        double   pd[XMM_SIZE / sizeof(double)];
-} __rte_aligned(16) rte_xmm_t;
+} rte_xmm_t;

static inline xmm_t

  vect_load_128(void *p)
diff --git a/lib/eal/ppc/include/rte_vect.h b/lib/eal/ppc/include/rte_vect.h
index a5f009b..e6702a4 100644
--- a/lib/eal/ppc/include/rte_vect.h
+++ b/lib/eal/ppc/include/rte_vect.h
@@ -6,6 +6,8 @@
  #ifndef _RTE_VECT_PPC_64_H_
  #define _RTE_VECT_PPC_64_H_

+#include <stdalign.h>

+
  #include "rte_altivec.h"

#include "generic/rte_vect.h"

@@ -23,13 +25,14 @@
  #define       XMM_MASK        (XMM_SIZE - 1)

typedef union rte_xmm {

+       alignas(16)
        xmm_t    x;
        uint8_t  u8[XMM_SIZE / sizeof(uint8_t)];
        uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
        uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
        uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
        double   pd[XMM_SIZE / sizeof(double)];
-} __rte_aligned(16) rte_xmm_t;
+} rte_xmm_t;

#ifdef __cplusplus

  }
diff --git a/lib/eal/riscv/include/rte_vect.h b/lib/eal/riscv/include/rte_vect.h
index 2f97f43..32d4386 100644
--- a/lib/eal/riscv/include/rte_vect.h
+++ b/lib/eal/riscv/include/rte_vect.h
@@ -7,6 +7,7 @@
  #ifndef RTE_VECT_RISCV_H
  #define RTE_VECT_RISCV_H

+#include <stdalign.h>

  #include <stdint.h>
  #include "generic/rte_vect.h"
  #include "rte_common.h"
@@ -23,13 +24,14 @@
  #define XMM_MASK      (XMM_SIZE - 1)

typedef union rte_xmm {

+       alignas(16) /* !! NOTE !! changed to 16 it looks like this was a bug? */
        xmm_t           x;
        uint8_t         u8[XMM_SIZE / sizeof(uint8_t)];
        uint16_t        u16[XMM_SIZE / sizeof(uint16_t)];
        uint32_t        u32[XMM_SIZE / sizeof(uint32_t)];
        uint64_t        u64[XMM_SIZE / sizeof(uint64_t)];
        double          pd[XMM_SIZE / sizeof(double)];
-} __rte_aligned(8) rte_xmm_t;
+} rte_xmm_t;

static inline xmm_t

  vect_load_128(void *p)
diff --git a/lib/eal/x86/include/rte_vect.h b/lib/eal/x86/include/rte_vect.h
index 560f9e4..2e5669d 100644
--- a/lib/eal/x86/include/rte_vect.h
+++ b/lib/eal/x86/include/rte_vect.h
@@ -11,6 +11,7 @@
   * RTE SSE/AVX related header.
   */

+#include <stdalign.h>

  #include <stdint.h>
  #include <rte_config.h>
  #include <rte_common.h>
@@ -92,6 +93,7 @@
  #define RTE_X86_ZMM_MASK      (RTE_X86_ZMM_SIZE - 1)

typedef union __rte_x86_zmm {

+       alignas(RTE_X86_ZMM_SIZE)
        __m512i  z;
        ymm_t    y[RTE_X86_ZMM_SIZE / sizeof(ymm_t)];
        xmm_t    x[RTE_X86_ZMM_SIZE / sizeof(xmm_t)];
@@ -100,7 +102,7 @@
        uint32_t u32[RTE_X86_ZMM_SIZE / sizeof(uint32_t)];
        uint64_t u64[RTE_X86_ZMM_SIZE / sizeof(uint64_t)];
        double   pd[RTE_X86_ZMM_SIZE / sizeof(double)];
-} __rte_aligned(RTE_X86_ZMM_SIZE) __rte_x86_zmm_t;
+} __rte_x86_zmm_t;

#endif /* __AVX512F__ */diff --git a/lib/eal/x86/rte_power_intrinsics.c b/lib/eal/x86/rte_power_intrinsics.c

index 532a2e6..5636543 100644
--- a/lib/eal/x86/rte_power_intrinsics.c
+++ b/lib/eal/x86/rte_power_intrinsics.c
@@ -2,6 +2,8 @@
   * Copyright(c) 2020 Intel Corporation
   */

+#include <stdalign.h>

+
  #include <rte_common.h>
  #include <rte_lcore.h>
  #include <rte_rtm.h>
@@ -13,9 +15,10 @@
   * Per-lcore structure holding current status of C0.2 sleeps.
   */
  static struct power_wait_status {
+       alignas(RTE_CACHE_LINE_SIZE)
        rte_spinlock_t lock;
        volatile void *monitor_addr; /**< NULL if not currently sleeping */
-} __rte_cache_aligned wait_status[RTE_MAX_LCORE];
+} wait_status[RTE_MAX_LCORE];

/*

   * This function uses UMONITOR/UMWAIT instructions and will enter C0.2 state.
@@ -86,9 +89,10 @@ static void amd_mwaitx(const uint64_t timeout)
  }

static struct {

+       alignas(RTE_CACHE_LINE_SIZE)
        void (*mmonitor)(volatile void *addr);
        void (*mwait)(const uint64_t timeout);
-} __rte_cache_aligned power_monitor_ops;
+} power_monitor_ops;

static inline void

  __umwait_wakeup(volatile void *addr)

Re: [PATCH] eal: use C11 alignas instead of GCC attribute aligned

Reply via email to