Adapt rte_pause.h APIs to use standard C11 atomics. Update consumers of
the pause APIs for the API break.

Signed-off-by: Tyler Retzlaff <roret...@linux.microsoft.com>
Acked-by: Morten Brørup <m...@smartsharesystems.com>
---
 app/test-bbdev/test_bbdev_perf.c         | 123 ++++++++++++++++++++-----------
 app/test/test_func_reentrancy.c          |   8 +-
 app/test/test_mcslock.c                  |  12 +--
 app/test/test_mempool_perf.c             |   8 +-
 app/test/test_pflock.c                   |  12 +--
 app/test/test_pmd_perf.c                 |  10 +--
 app/test/test_ring_perf.c                |   8 +-
 app/test/test_rwlock.c                   |   8 +-
 app/test/test_spinlock.c                 |   8 +-
 app/test/test_stack_perf.c               |  12 +--
 app/test/test_ticketlock.c               |   8 +-
 app/test/test_timer.c                    |  16 ++--
 drivers/event/opdl/opdl_ring.c           |  47 ++++++------
 drivers/net/mlx5/mlx5_hws_cnt.h          |   4 +-
 drivers/net/thunderx/nicvf_rxtx.c        |   5 +-
 drivers/net/thunderx/nicvf_struct.h      |   2 +-
 lib/bpf/bpf_pkt.c                        |   6 +-
 lib/distributor/distributor_private.h    |   2 +-
 lib/distributor/rte_distributor_single.c |  44 +++++------
 lib/eal/arm/include/rte_pause_64.h       |  28 +++----
 lib/eal/common/eal_memcfg.h              |   2 +-
 lib/eal/include/generic/rte_pause.h      |  51 ++++++-------
 lib/eal/include/rte_mcslock.h            |  14 ++--
 lib/eal/include/rte_pflock.h             |  22 +++---
 lib/eal/include/rte_ticketlock.h         |   8 +-
 lib/ring/rte_ring_c11_pvt.h              |  42 ++++++-----
 lib/ring/rte_ring_core.h                 |   4 +-
 lib/ring/rte_ring_generic_pvt.h          |  19 +++--
 lib/ring/rte_ring_peek_elem_pvt.h        |   2 +-
 29 files changed, 291 insertions(+), 244 deletions(-)

diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c
index 276bbf0..c3a3a28 100644
--- a/app/test-bbdev/test_bbdev_perf.c
+++ b/app/test-bbdev/test_bbdev_perf.c
@@ -143,7 +143,7 @@ struct test_op_params {
        uint16_t num_to_process;
        uint16_t num_lcores;
        int vector_mask;
-       uint16_t sync;
+       uint16_t _Atomic sync;
        struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
 };
 
@@ -158,7 +158,7 @@ struct thread_params {
        uint8_t iter_count;
        double iter_average;
        double bler;
-       uint16_t nb_dequeued;
+       uint16_t _Atomic nb_dequeued;
        int16_t processing_status;
        uint16_t burst_sz;
        struct test_op_params *op_params;
@@ -3021,27 +3021,32 @@ typedef int (test_case_function)(struct active_device 
*ad,
        if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
                deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
                                &tp->dec_ops[
-                                       __atomic_load_n(&tp->nb_dequeued, 
__ATOMIC_RELAXED)],
+                                       atomic_load_explicit(&tp->nb_dequeued,
+                                               memory_order_relaxed)],
                                burst_sz);
        else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
                deq = rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
                                &tp->dec_ops[
-                                       __atomic_load_n(&tp->nb_dequeued, 
__ATOMIC_RELAXED)],
+                                       atomic_load_explicit(&tp->nb_dequeued,
+                                               memory_order_relaxed)],
                                burst_sz);
        else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
                deq = rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
                                &tp->enc_ops[
-                                       __atomic_load_n(&tp->nb_dequeued, 
__ATOMIC_RELAXED)],
+                                       atomic_load_explicit(&tp->nb_dequeued,
+                                               memory_order_relaxed)],
                                burst_sz);
        else if (test_vector.op_type == RTE_BBDEV_OP_FFT)
                deq = rte_bbdev_dequeue_fft_ops(dev_id, queue_id,
                                &tp->fft_ops[
-                                       __atomic_load_n(&tp->nb_dequeued, 
__ATOMIC_RELAXED)],
+                                       atomic_load_explicit(&tp->nb_dequeued,
+                                               memory_order_relaxed)],
                                burst_sz);
        else /*RTE_BBDEV_OP_TURBO_ENC*/
                deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
                                &tp->enc_ops[
-                                       __atomic_load_n(&tp->nb_dequeued, 
__ATOMIC_RELAXED)],
+                                       atomic_load_explicit(&tp->nb_dequeued,
+                                               memory_order_relaxed)],
                                burst_sz);
 
        if (deq < burst_sz) {
@@ -3052,8 +3057,9 @@ typedef int (test_case_function)(struct active_device *ad,
                return;
        }
 
-       if (__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) + deq < 
num_ops) {
-               __atomic_fetch_add(&tp->nb_dequeued, deq, __ATOMIC_RELAXED);
+       if (atomic_load_explicit(&tp->nb_dequeued, memory_order_relaxed) + deq 
< num_ops) {
+               atomic_fetch_add_explicit(&tp->nb_dequeued, deq,
+                       memory_order_relaxed);
                return;
        }
 
@@ -3126,7 +3132,8 @@ typedef int (test_case_function)(struct active_device *ad,
        tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) /
                        ((double)total_time / (double)rte_get_tsc_hz());
 
-       __atomic_fetch_add(&tp->nb_dequeued, deq, __ATOMIC_RELAXED);
+       atomic_fetch_add_explicit(&tp->nb_dequeued, deq,
+               memory_order_relaxed);
 }
 
 static int
@@ -3165,9 +3172,10 @@ typedef int (test_case_function)(struct active_device 
*ad,
        bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
 
        __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
-       __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
+       atomic_store_explicit(&tp->nb_dequeued, 0, memory_order_relaxed);
 
-       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, 
__ATOMIC_RELAXED);
+       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
+               memory_order_relaxed);
 
        ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
                                num_to_process);
@@ -3222,10 +3230,12 @@ typedef int (test_case_function)(struct active_device 
*ad,
                        /* Wait until processing of previous batch is
                         * completed
                         */
-                       rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, 
__ATOMIC_RELAXED);
+                       rte_wait_until_equal_16(&tp->nb_dequeued,
+                               enqueued, memory_order_relaxed);
                }
                if (j != TEST_REPETITIONS - 1)
-                       __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
+                       atomic_store_explicit(&tp->nb_dequeued, 0,
+                               memory_order_relaxed);
        }
 
        return TEST_SUCCESS;
@@ -3262,9 +3272,10 @@ typedef int (test_case_function)(struct active_device 
*ad,
        bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
 
        __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
-       __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
+       atomic_store_explicit(&tp->nb_dequeued, 0, memory_order_relaxed);
 
-       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, 
__ATOMIC_RELAXED);
+       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
+               memory_order_relaxed);
 
        ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
                                num_to_process);
@@ -3313,10 +3324,12 @@ typedef int (test_case_function)(struct active_device 
*ad,
                        /* Wait until processing of previous batch is
                         * completed
                         */
-                       rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, 
__ATOMIC_RELAXED);
+                       rte_wait_until_equal_16(&tp->nb_dequeued,
+                               enqueued, memory_order_relaxed);
                }
                if (j != TEST_REPETITIONS - 1)
-                       __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
+                       atomic_store_explicit(&tp->nb_dequeued, 0,
+                               memory_order_relaxed);
        }
 
        return TEST_SUCCESS;
@@ -3352,9 +3365,10 @@ typedef int (test_case_function)(struct active_device 
*ad,
        bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
 
        __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
-       __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
+       atomic_store_explicit(&tp->nb_dequeued, 0, memory_order_relaxed);
 
-       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, 
__ATOMIC_RELAXED);
+       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
+               memory_order_relaxed);
 
        ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
                        num_to_process);
@@ -3399,10 +3413,12 @@ typedef int (test_case_function)(struct active_device 
*ad,
                        /* Wait until processing of previous batch is
                         * completed
                         */
-                       rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, 
__ATOMIC_RELAXED);
+                       rte_wait_until_equal_16(&tp->nb_dequeued,
+                               enqueued, memory_order_relaxed);
                }
                if (j != TEST_REPETITIONS - 1)
-                       __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
+                       atomic_store_explicit(&tp->nb_dequeued, 0,
+                               memory_order_relaxed);
        }
 
        return TEST_SUCCESS;
@@ -3439,9 +3455,10 @@ typedef int (test_case_function)(struct active_device 
*ad,
        bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
 
        __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
-       __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
+       atomic_store_explicit(&tp->nb_dequeued, 0, memory_order_relaxed);
 
-       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, 
__ATOMIC_RELAXED);
+       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
+               memory_order_relaxed);
 
        ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
                        num_to_process);
@@ -3488,10 +3505,12 @@ typedef int (test_case_function)(struct active_device 
*ad,
                        /* Wait until processing of previous batch is
                         * completed
                         */
-                       rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, 
__ATOMIC_RELAXED);
+                       rte_wait_until_equal_16(&tp->nb_dequeued,
+                               enqueued, memory_order_relaxed);
                }
                if (j != TEST_REPETITIONS - 1)
-                       __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
+                       atomic_store_explicit(&tp->nb_dequeued, 0,
+                               memory_order_relaxed);
        }
 
        return TEST_SUCCESS;
@@ -3528,9 +3547,10 @@ typedef int (test_case_function)(struct active_device 
*ad,
        bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
 
        __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
-       __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
+       atomic_store_explicit(&tp->nb_dequeued, 0, memory_order_relaxed);
 
-       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, 
__ATOMIC_RELAXED);
+       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
+               memory_order_relaxed);
 
        ret = rte_bbdev_fft_op_alloc_bulk(tp->op_params->mp, ops,
                        num_to_process);
@@ -3575,10 +3595,12 @@ typedef int (test_case_function)(struct active_device 
*ad,
                        /* Wait until processing of previous batch is
                         * completed
                         */
-                       rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, 
__ATOMIC_RELAXED);
+                       rte_wait_until_equal_16(&tp->nb_dequeued,
+                               enqueued, memory_order_relaxed);
                }
                if (j != TEST_REPETITIONS - 1)
-                       __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
+                       atomic_store_explicit(&tp->nb_dequeued, 0,
+                               memory_order_relaxed);
        }
 
        return TEST_SUCCESS;
@@ -3613,7 +3635,8 @@ typedef int (test_case_function)(struct active_device *ad,
 
        bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
 
-       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, 
__ATOMIC_RELAXED);
+       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
+               memory_order_relaxed);
 
        ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
        TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
@@ -3732,7 +3755,8 @@ typedef int (test_case_function)(struct active_device *ad,
 
        bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
 
-       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, 
__ATOMIC_RELAXED);
+       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
+               memory_order_relaxed);
 
        ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
        TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
@@ -3867,7 +3891,8 @@ typedef int (test_case_function)(struct active_device *ad,
 
        bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
 
-       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, 
__ATOMIC_RELAXED);
+       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
+               memory_order_relaxed);
 
        ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
        TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
@@ -3990,7 +4015,8 @@ typedef int (test_case_function)(struct active_device *ad,
 
        bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
 
-       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, 
__ATOMIC_RELAXED);
+       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
+               memory_order_relaxed);
 
        ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
        TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
@@ -4121,7 +4147,8 @@ typedef int (test_case_function)(struct active_device *ad,
 
        bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
 
-       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, 
__ATOMIC_RELAXED);
+       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
+               memory_order_relaxed);
 
        ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
                        num_ops);
@@ -4222,7 +4249,8 @@ typedef int (test_case_function)(struct active_device *ad,
 
        bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
 
-       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, 
__ATOMIC_RELAXED);
+       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
+               memory_order_relaxed);
 
        ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
                        num_ops);
@@ -4323,7 +4351,8 @@ typedef int (test_case_function)(struct active_device *ad,
 
        bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
 
-       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, 
__ATOMIC_RELAXED);
+       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
+               memory_order_relaxed);
 
        ret = rte_bbdev_fft_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
        TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
@@ -4519,7 +4548,8 @@ typedef int (test_case_function)(struct active_device *ad,
        else
                return TEST_SKIPPED;
 
-       __atomic_store_n(&op_params->sync, SYNC_WAIT, __ATOMIC_RELAXED);
+       atomic_store_explicit(&op_params->sync, SYNC_WAIT,
+               memory_order_relaxed);
 
        /* Main core is set at first entry */
        t_params[0].dev_id = ad->dev_id;
@@ -4542,7 +4572,8 @@ typedef int (test_case_function)(struct active_device *ad,
                                &t_params[used_cores++], lcore_id);
        }
 
-       __atomic_store_n(&op_params->sync, SYNC_START, __ATOMIC_RELAXED);
+       atomic_store_explicit(&op_params->sync, SYNC_START,
+               memory_order_relaxed);
        ret = bler_function(&t_params[0]);
 
        /* Main core is always used */
@@ -4641,7 +4672,8 @@ typedef int (test_case_function)(struct active_device *ad,
                        throughput_function = throughput_pmd_lcore_enc;
        }
 
-       __atomic_store_n(&op_params->sync, SYNC_WAIT, __ATOMIC_RELAXED);
+       atomic_store_explicit(&op_params->sync, SYNC_WAIT,
+               memory_order_relaxed);
 
        /* Main core is set at first entry */
        t_params[0].dev_id = ad->dev_id;
@@ -4664,7 +4696,8 @@ typedef int (test_case_function)(struct active_device *ad,
                                &t_params[used_cores++], lcore_id);
        }
 
-       __atomic_store_n(&op_params->sync, SYNC_START, __ATOMIC_RELAXED);
+       atomic_store_explicit(&op_params->sync, SYNC_START,
+               memory_order_relaxed);
        ret = throughput_function(&t_params[0]);
 
        /* Main core is always used */
@@ -4694,8 +4727,8 @@ typedef int (test_case_function)(struct active_device *ad,
         * Wait for main lcore operations.
         */
        tp = &t_params[0];
-       while ((__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) <
-               op_params->num_to_process) &&
+       while ((atomic_load_explicit(&tp->nb_dequeued,
+               memory_order_relaxed) < op_params->num_to_process) &&
                (__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED) !=
                TEST_FAILED))
                rte_pause();
@@ -4708,8 +4741,8 @@ typedef int (test_case_function)(struct active_device *ad,
        for (used_cores = 1; used_cores < num_lcores; used_cores++) {
                tp = &t_params[used_cores];
 
-               while ((__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) <
-                       op_params->num_to_process) &&
+               while ((atomic_load_explicit(&tp->nb_dequeued,
+                       memory_order_relaxed) < op_params->num_to_process) &&
                        (__atomic_load_n(&tp->processing_status, 
__ATOMIC_RELAXED) !=
                        TEST_FAILED))
                        rte_pause();
diff --git a/app/test/test_func_reentrancy.c b/app/test/test_func_reentrancy.c
index ae9de6f..833ceaf 100644
--- a/app/test/test_func_reentrancy.c
+++ b/app/test/test_func_reentrancy.c
@@ -54,11 +54,11 @@
 #define MAX_LCORES     (rte_memzone_max_get() / (MAX_ITER_MULTI * 4U))
 
 static uint32_t obj_count;
-static uint32_t synchro;
+static uint32_t _Atomic synchro;
 
 #define WAIT_SYNCHRO_FOR_WORKERS()   do { \
        if (lcore_self != rte_get_main_lcore())                  \
-               rte_wait_until_equal_32(&synchro, 1, __ATOMIC_RELAXED); \
+               rte_wait_until_equal_32(&synchro, 1, memory_order_relaxed); \
 } while(0)
 
 /*
@@ -438,7 +438,7 @@ struct test_case test_cases[] = {
                return -1;
 
        __atomic_store_n(&obj_count, 0, __ATOMIC_RELAXED);
-       __atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+       atomic_store_explicit(&synchro, 0, memory_order_relaxed);
 
        cores = RTE_MIN(rte_lcore_count(), MAX_LCORES);
        RTE_LCORE_FOREACH_WORKER(lcore_id) {
@@ -448,7 +448,7 @@ struct test_case test_cases[] = {
                rte_eal_remote_launch(pt_case->func, pt_case->arg, lcore_id);
        }
 
-       __atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+       atomic_store_explicit(&synchro, 1, memory_order_relaxed);
 
        if (pt_case->func(pt_case->arg) < 0)
                ret = -1;
diff --git a/app/test/test_mcslock.c b/app/test/test_mcslock.c
index 52e45e7..3c26c69 100644
--- a/app/test/test_mcslock.c
+++ b/app/test/test_mcslock.c
@@ -42,7 +42,7 @@
 
 static unsigned int count;
 
-static uint32_t synchro;
+static uint32_t _Atomic synchro;
 
 static int
 test_mcslock_per_core(__rte_unused void *arg)
@@ -75,7 +75,7 @@
        rte_mcslock_t ml_perf_me;
 
        /* wait synchro */
-       rte_wait_until_equal_32(&synchro, 1, __ATOMIC_RELAXED);
+       rte_wait_until_equal_32(&synchro, 1, memory_order_relaxed);
 
        begin = rte_get_timer_cycles();
        while (lcount < MAX_LOOP) {
@@ -100,14 +100,14 @@
        const unsigned int lcore = rte_lcore_id();
 
        printf("\nTest with no lock on single core...\n");
-       __atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+       atomic_store_explicit(&synchro, 1, memory_order_relaxed);
        load_loop_fn(&lock);
        printf("Core [%u] Cost Time = %"PRIu64" us\n",
                        lcore, time_count[lcore]);
        memset(time_count, 0, sizeof(time_count));
 
        printf("\nTest with lock on single core...\n");
-       __atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+       atomic_store_explicit(&synchro, 1, memory_order_relaxed);
        lock = 1;
        load_loop_fn(&lock);
        printf("Core [%u] Cost Time = %"PRIu64" us\n",
@@ -116,11 +116,11 @@
 
        printf("\nTest with lock on %u cores...\n", (rte_lcore_count()));
 
-       __atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+       atomic_store_explicit(&synchro, 0, memory_order_relaxed);
        rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MAIN);
 
        /* start synchro and launch test on main */
-       __atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+       atomic_store_explicit(&synchro, 1, memory_order_relaxed);
        load_loop_fn(&lock);
 
        rte_eal_mp_wait_lcore();
diff --git a/app/test/test_mempool_perf.c b/app/test/test_mempool_perf.c
index ce7c624..06f9fc1 100644
--- a/app/test/test_mempool_perf.c
+++ b/app/test/test_mempool_perf.c
@@ -88,7 +88,7 @@
 static int use_external_cache;
 static unsigned external_cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE;
 
-static uint32_t synchro;
+static uint32_t _Atomic synchro;
 
 /* number of objects in one bulk operation (get or put) */
 static unsigned n_get_bulk;
@@ -188,7 +188,7 @@ struct mempool_test_stats {
 
        /* wait synchro for workers */
        if (lcore_id != rte_get_main_lcore())
-               rte_wait_until_equal_32(&synchro, 1, __ATOMIC_RELAXED);
+               rte_wait_until_equal_32(&synchro, 1, memory_order_relaxed);
 
        start_cycles = rte_get_timer_cycles();
 
@@ -233,7 +233,7 @@ struct mempool_test_stats {
        int ret;
        unsigned cores_save = cores;
 
-       __atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+       atomic_store_explicit(&synchro, 0, memory_order_relaxed);
 
        /* reset stats */
        memset(stats, 0, sizeof(stats));
@@ -258,7 +258,7 @@ struct mempool_test_stats {
        }
 
        /* start synchro and launch test on main */
-       __atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+       atomic_store_explicit(&synchro, 1, memory_order_relaxed);
 
        ret = per_lcore_mempool_test(mp);
 
diff --git a/app/test/test_pflock.c b/app/test/test_pflock.c
index 38da6bc..28addf8 100644
--- a/app/test/test_pflock.c
+++ b/app/test/test_pflock.c
@@ -31,7 +31,7 @@
 
 static rte_pflock_t sl;
 static rte_pflock_t sl_tab[RTE_MAX_LCORE];
-static uint32_t synchro;
+static uint32_t _Atomic synchro;
 
 static int
 test_pflock_per_core(__rte_unused void *arg)
@@ -69,7 +69,7 @@
 
        /* wait synchro for workers */
        if (lcore != rte_get_main_lcore())
-               rte_wait_until_equal_32(&synchro, 1, __ATOMIC_RELAXED);
+               rte_wait_until_equal_32(&synchro, 1, memory_order_relaxed);
 
        begin = rte_rdtsc_precise();
        while (lcount < MAX_LOOP) {
@@ -99,7 +99,7 @@
        const unsigned int lcore = rte_lcore_id();
 
        printf("\nTest with no lock on single core...\n");
-       __atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+       atomic_store_explicit(&synchro, 1, memory_order_relaxed);
        load_loop_fn(&lock);
        printf("Core [%u] Cost Time = %"PRIu64" us\n",
                        lcore, time_count[lcore]);
@@ -107,7 +107,7 @@
 
        printf("\nTest with phase-fair lock on single core...\n");
        lock = 1;
-       __atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+       atomic_store_explicit(&synchro, 1, memory_order_relaxed);
        load_loop_fn(&lock);
        printf("Core [%u] Cost Time = %"PRIu64" us\n",
                        lcore, time_count[lcore]);
@@ -116,12 +116,12 @@
        printf("\nPhase-fair test on %u cores...\n", rte_lcore_count());
 
        /* clear synchro and start workers */
-       __atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+       atomic_store_explicit(&synchro, 0, memory_order_relaxed);
        if (rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MAIN) < 0)
                return -1;
 
        /* start synchro and launch test on main */
-       __atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+       atomic_store_explicit(&synchro, 1, memory_order_relaxed);
        load_loop_fn(&lock);
 
        rte_eal_mp_wait_lcore();
diff --git a/app/test/test_pmd_perf.c b/app/test/test_pmd_perf.c
index 3ef590c..1562bbb 100644
--- a/app/test/test_pmd_perf.c
+++ b/app/test/test_pmd_perf.c
@@ -537,7 +537,7 @@ enum {
        return 0;
 }
 
-static uint64_t start;
+static uint64_t _Atomic start;
 
 static inline int
 poll_burst(void *args)
@@ -575,7 +575,7 @@ enum {
                num[portid] = pkt_per_port;
        }
 
-       rte_wait_until_equal_64(&start, 1, __ATOMIC_ACQUIRE);
+       rte_wait_until_equal_64(&start, 1, memory_order_acquire);
 
        cur_tsc = rte_rdtsc();
        while (total) {
@@ -629,9 +629,9 @@ enum {
 
        /* only when polling first */
        if (flags == SC_BURST_POLL_FIRST)
-               __atomic_store_n(&start, 1, __ATOMIC_RELAXED);
+               atomic_store_explicit(&start, 1, memory_order_relaxed);
        else
-               __atomic_store_n(&start, 0, __ATOMIC_RELAXED);
+               atomic_store_explicit(&start, 0, memory_order_relaxed);
 
        /* start polling thread
         * if in POLL_FIRST mode, poll once launched;
@@ -655,7 +655,7 @@ enum {
 
        /* only when polling second  */
        if (flags == SC_BURST_XMIT_FIRST)
-               __atomic_store_n(&start, 1, __ATOMIC_RELEASE);
+               atomic_store_explicit(&start, 1, memory_order_release);
 
        /* wait for polling finished */
        diff_tsc = rte_eal_wait_lcore(lcore);
diff --git a/app/test/test_ring_perf.c b/app/test/test_ring_perf.c
index 3972fd9..f0f5ef1 100644
--- a/app/test/test_ring_perf.c
+++ b/app/test/test_ring_perf.c
@@ -320,7 +320,7 @@ struct thread_params {
        return 0;
 }
 
-static uint32_t synchro;
+static uint32_t _Atomic synchro;
 static uint64_t queue_count[RTE_MAX_LCORE];
 
 #define TIME_MS 100
@@ -342,7 +342,7 @@ struct thread_params {
 
        /* wait synchro for workers */
        if (lcore != rte_get_main_lcore())
-               rte_wait_until_equal_32(&synchro, 1, __ATOMIC_RELAXED);
+               rte_wait_until_equal_32(&synchro, 1, memory_order_relaxed);
 
        begin = rte_get_timer_cycles();
        while (time_diff < hz * TIME_MS / 1000) {
@@ -397,12 +397,12 @@ struct thread_params {
                param.r = r;
 
                /* clear synchro and start workers */
-               __atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+               atomic_store_explicit(&synchro, 0, memory_order_relaxed);
                if (rte_eal_mp_remote_launch(lcore_f, &param, SKIP_MAIN) < 0)
                        return -1;
 
                /* start synchro and launch test on main */
-               __atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+               atomic_store_explicit(&synchro, 1, memory_order_relaxed);
                lcore_f(&param);
 
                rte_eal_mp_wait_lcore();
diff --git a/app/test/test_rwlock.c b/app/test/test_rwlock.c
index 4ae0bf8..dfbd0d6 100644
--- a/app/test/test_rwlock.c
+++ b/app/test/test_rwlock.c
@@ -35,7 +35,7 @@
 
 static rte_rwlock_t sl;
 static rte_rwlock_t sl_tab[RTE_MAX_LCORE];
-static uint32_t synchro;
+static uint32_t _Atomic synchro;
 
 enum {
        LC_TYPE_RDLOCK,
@@ -101,7 +101,7 @@ struct try_rwlock_lcore {
 
        /* wait synchro for workers */
        if (lcore != rte_get_main_lcore())
-               rte_wait_until_equal_32(&synchro, 1, __ATOMIC_RELAXED);
+               rte_wait_until_equal_32(&synchro, 1, memory_order_relaxed);
 
        begin = rte_rdtsc_precise();
        while (lcount < MAX_LOOP) {
@@ -134,12 +134,12 @@ struct try_rwlock_lcore {
        printf("\nRwlock Perf Test on %u cores...\n", rte_lcore_count());
 
        /* clear synchro and start workers */
-       __atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+       atomic_store_explicit(&synchro, 0, memory_order_relaxed);
        if (rte_eal_mp_remote_launch(load_loop_fn, NULL, SKIP_MAIN) < 0)
                return -1;
 
        /* start synchro and launch test on main */
-       __atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+       atomic_store_explicit(&synchro, 1, memory_order_relaxed);
        load_loop_fn(NULL);
 
        rte_eal_mp_wait_lcore();
diff --git a/app/test/test_spinlock.c b/app/test/test_spinlock.c
index 3f59372..38724a1 100644
--- a/app/test/test_spinlock.c
+++ b/app/test/test_spinlock.c
@@ -48,7 +48,7 @@
 static rte_spinlock_recursive_t slr;
 static unsigned count = 0;
 
-static uint32_t synchro;
+static uint32_t _Atomic synchro;
 
 static int
 test_spinlock_per_core(__rte_unused void *arg)
@@ -110,7 +110,7 @@
 
        /* wait synchro for workers */
        if (lcore != rte_get_main_lcore())
-               rte_wait_until_equal_32(&synchro, 1, __ATOMIC_RELAXED);
+               rte_wait_until_equal_32(&synchro, 1, memory_order_relaxed);
 
        begin = rte_get_timer_cycles();
        while (lcount < MAX_LOOP) {
@@ -149,11 +149,11 @@
        printf("\nTest with lock on %u cores...\n", rte_lcore_count());
 
        /* Clear synchro and start workers */
-       __atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+       atomic_store_explicit(&synchro, 0, memory_order_relaxed);
        rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MAIN);
 
        /* start synchro and launch test on main */
-       __atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+       atomic_store_explicit(&synchro, 1, memory_order_relaxed);
        load_loop_fn(&lock);
 
        rte_eal_mp_wait_lcore();
diff --git a/app/test/test_stack_perf.c b/app/test/test_stack_perf.c
index 1eae00a..67510d6 100644
--- a/app/test/test_stack_perf.c
+++ b/app/test/test_stack_perf.c
@@ -23,7 +23,7 @@
  */
 static volatile unsigned int bulk_sizes[] = {8, MAX_BURST};
 
-static uint32_t lcore_barrier;
+static uint32_t _Atomic lcore_barrier;
 
 struct lcore_pair {
        unsigned int c1;
@@ -143,8 +143,8 @@ struct thread_args {
        s = args->s;
        size = args->sz;
 
-       __atomic_fetch_sub(&lcore_barrier, 1, __ATOMIC_RELAXED);
-       rte_wait_until_equal_32(&lcore_barrier, 0, __ATOMIC_RELAXED);
+       atomic_fetch_sub_explicit(&lcore_barrier, 1, memory_order_relaxed);
+       rte_wait_until_equal_32(&lcore_barrier, 0, memory_order_relaxed);
 
        uint64_t start = rte_rdtsc();
 
@@ -173,7 +173,7 @@ struct thread_args {
        unsigned int i;
 
        for (i = 0; i < RTE_DIM(bulk_sizes); i++) {
-               __atomic_store_n(&lcore_barrier, 2, __ATOMIC_RELAXED);
+               atomic_store_explicit(&lcore_barrier, 2, memory_order_relaxed);
 
                args[0].sz = args[1].sz = bulk_sizes[i];
                args[0].s = args[1].s = s;
@@ -206,7 +206,7 @@ struct thread_args {
                int cnt = 0;
                double avg;
 
-               __atomic_store_n(&lcore_barrier, n, __ATOMIC_RELAXED);
+               atomic_store_explicit(&lcore_barrier, n, memory_order_relaxed);
 
                RTE_LCORE_FOREACH_WORKER(lcore_id) {
                        if (++cnt >= n)
@@ -300,7 +300,7 @@ struct thread_args {
        struct lcore_pair cores;
        struct rte_stack *s;
 
-       __atomic_store_n(&lcore_barrier, 0, __ATOMIC_RELAXED);
+       atomic_store_explicit(&lcore_barrier, 0, memory_order_relaxed);
 
        s = rte_stack_create(STACK_NAME, STACK_SIZE, rte_socket_id(), flags);
        if (s == NULL) {
diff --git a/app/test/test_ticketlock.c b/app/test/test_ticketlock.c
index 242c136..f12d1e5 100644
--- a/app/test/test_ticketlock.c
+++ b/app/test/test_ticketlock.c
@@ -48,7 +48,7 @@
 static rte_ticketlock_recursive_t tlr;
 static unsigned int count;
 
-static uint32_t synchro;
+static uint32_t _Atomic synchro;
 
 static int
 test_ticketlock_per_core(__rte_unused void *arg)
@@ -111,7 +111,7 @@
 
        /* wait synchro for workers */
        if (lcore != rte_get_main_lcore())
-               rte_wait_until_equal_32(&synchro, 1, __ATOMIC_RELAXED);
+               rte_wait_until_equal_32(&synchro, 1, memory_order_relaxed);
 
        begin = rte_rdtsc_precise();
        while (lcore_count[lcore] < MAX_LOOP) {
@@ -153,11 +153,11 @@
        printf("\nTest with lock on %u cores...\n", rte_lcore_count());
 
        /* Clear synchro and start workers */
-       __atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+       atomic_store_explicit(&synchro, 0, memory_order_relaxed);
        rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MAIN);
 
        /* start synchro and launch test on main */
-       __atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+       atomic_store_explicit(&synchro, 1, memory_order_relaxed);
        load_loop_fn(&lock);
 
        rte_eal_mp_wait_lcore();
diff --git a/app/test/test_timer.c b/app/test/test_timer.c
index 0c36dc9..cf89a19 100644
--- a/app/test/test_timer.c
+++ b/app/test/test_timer.c
@@ -202,7 +202,7 @@ struct mytimerinfo {
 
 /* Need to synchronize worker lcores through multiple steps. */
 enum { WORKER_WAITING = 1, WORKER_RUN_SIGNAL, WORKER_RUNNING, WORKER_FINISHED 
};
-static uint16_t lcore_state[RTE_MAX_LCORE];
+static uint16_t _Atomic lcore_state[RTE_MAX_LCORE];
 
 static void
 main_init_workers(void)
@@ -210,7 +210,7 @@ struct mytimerinfo {
        unsigned i;
 
        RTE_LCORE_FOREACH_WORKER(i) {
-               __atomic_store_n(&lcore_state[i], WORKER_WAITING, 
__ATOMIC_RELAXED);
+               atomic_store_explicit(&lcore_state[i], WORKER_WAITING, 
memory_order_relaxed);
        }
 }
 
@@ -220,10 +220,10 @@ struct mytimerinfo {
        unsigned i;
 
        RTE_LCORE_FOREACH_WORKER(i) {
-               __atomic_store_n(&lcore_state[i], WORKER_RUN_SIGNAL, 
__ATOMIC_RELEASE);
+               atomic_store_explicit(&lcore_state[i], WORKER_RUN_SIGNAL, 
memory_order_release);
        }
        RTE_LCORE_FOREACH_WORKER(i) {
-               rte_wait_until_equal_16(&lcore_state[i], WORKER_RUNNING, 
__ATOMIC_ACQUIRE);
+               rte_wait_until_equal_16(&lcore_state[i], WORKER_RUNNING, 
memory_order_acquire);
        }
 }
 
@@ -233,7 +233,7 @@ struct mytimerinfo {
        unsigned i;
 
        RTE_LCORE_FOREACH_WORKER(i) {
-               rte_wait_until_equal_16(&lcore_state[i], WORKER_FINISHED, 
__ATOMIC_ACQUIRE);
+               rte_wait_until_equal_16(&lcore_state[i], WORKER_FINISHED, 
memory_order_acquire);
        }
 }
 
@@ -242,8 +242,8 @@ struct mytimerinfo {
 {
        unsigned lcore_id = rte_lcore_id();
 
-       rte_wait_until_equal_16(&lcore_state[lcore_id], WORKER_RUN_SIGNAL, 
__ATOMIC_ACQUIRE);
-       __atomic_store_n(&lcore_state[lcore_id], WORKER_RUNNING, 
__ATOMIC_RELEASE);
+       rte_wait_until_equal_16(&lcore_state[lcore_id], WORKER_RUN_SIGNAL, 
memory_order_acquire);
+       atomic_store_explicit(&lcore_state[lcore_id], WORKER_RUNNING, 
memory_order_release);
 }
 
 static void
@@ -251,7 +251,7 @@ struct mytimerinfo {
 {
        unsigned lcore_id = rte_lcore_id();
 
-       __atomic_store_n(&lcore_state[lcore_id], WORKER_FINISHED, 
__ATOMIC_RELEASE);
+       atomic_store_explicit(&lcore_state[lcore_id], WORKER_FINISHED, 
memory_order_release);
 }
 
 
diff --git a/drivers/event/opdl/opdl_ring.c b/drivers/event/opdl/opdl_ring.c
index 69392b5..e1fa674 100644
--- a/drivers/event/opdl/opdl_ring.c
+++ b/drivers/event/opdl/opdl_ring.c
@@ -52,7 +52,7 @@ struct shared_state {
        uint32_t head;  /* Head sequence number (for multi thread operation) */
        char _pad2[RTE_CACHE_LINE_SIZE * 3];
        struct opdl_stage *stage;  /* back pointer */
-       uint32_t tail;  /* Tail sequence number */
+       uint32_t _Atomic tail;  /* Tail sequence number */
        char _pad3[RTE_CACHE_LINE_SIZE * 2];
 } __rte_cache_aligned;
 
@@ -169,7 +169,7 @@ struct opdl_ring {
 {
        uint32_t i;
        uint32_t this_tail = s->shared.tail;
-       uint32_t min_seq = __atomic_load_n(&s->deps[0]->tail, __ATOMIC_ACQUIRE);
+       uint32_t min_seq = atomic_load_explicit(&s->deps[0]->tail, 
memory_order_acquire);
        /* Input stage sequence numbers are greater than the sequence numbers of
         * its dependencies so an offset of t->num_slots is needed when
         * calculating available slots and also the condition which is used to
@@ -180,16 +180,16 @@ struct opdl_ring {
        if (is_input_stage(s)) {
                wrap = s->num_slots;
                for (i = 1; i < s->num_deps; i++) {
-                       uint32_t seq = __atomic_load_n(&s->deps[i]->tail,
-                                       __ATOMIC_ACQUIRE);
+                       uint32_t seq = atomic_load_explicit(&s->deps[i]->tail,
+                                       memory_order_acquire);
                        if ((this_tail - seq) > (this_tail - min_seq))
                                min_seq = seq;
                }
        } else {
                wrap = 0;
                for (i = 1; i < s->num_deps; i++) {
-                       uint32_t seq = __atomic_load_n(&s->deps[i]->tail,
-                                       __ATOMIC_ACQUIRE);
+                       uint32_t seq = atomic_load_explicit(&s->deps[i]->tail,
+                                       memory_order_acquire);
                        if ((seq - this_tail) < (min_seq - this_tail))
                                min_seq = seq;
                }
@@ -299,7 +299,8 @@ struct opdl_ring {
        copy_entries_in(t, head, entries, num_entries);
 
        s->head += num_entries;
-       __atomic_store_n(&s->shared.tail, s->head, __ATOMIC_RELEASE);
+       atomic_store_explicit(&s->shared.tail, s->head,
+               memory_order_release);
 
        return num_entries;
 }
@@ -382,18 +383,18 @@ struct opdl_ring {
                /* There should be no race condition here. If shared.tail
                 * matches, no other core can update it until this one does.
                 */
-               if (__atomic_load_n(&s->shared.tail, __ATOMIC_ACQUIRE) ==
-                               tail) {
+               if (atomic_load_explicit(&s->shared.tail,
+                       memory_order_acquire) == tail) {
                        if (num_entries >= (head - tail)) {
                                claim_mgr_remove(disclaims);
-                               __atomic_store_n(&s->shared.tail, head,
-                                               __ATOMIC_RELEASE);
+                               atomic_store_explicit(&s->shared.tail, head,
+                                               memory_order_release);
                                num_entries -= (head - tail);
                        } else {
                                claim_mgr_move_tail(disclaims, num_entries);
-                               __atomic_store_n(&s->shared.tail,
+                               atomic_store_explicit(&s->shared.tail,
                                                num_entries + tail,
-                                               __ATOMIC_RELEASE);
+                                               memory_order_release);
                                num_entries = 0;
                        }
                } else if (block == false)
@@ -473,10 +474,11 @@ struct opdl_ring {
        /* If another thread started inputting before this one, but hasn't
         * finished, we need to wait for it to complete to update the tail.
         */
-       rte_wait_until_equal_32(&s->shared.tail, old_head, __ATOMIC_ACQUIRE);
+       rte_wait_until_equal_32(&s->shared.tail, old_head,
+               memory_order_acquire);
 
-       __atomic_store_n(&s->shared.tail, old_head + num_entries,
-                       __ATOMIC_RELEASE);
+       atomic_store_explicit(&s->shared.tail, old_head + num_entries,
+                       memory_order_release);
 
        return num_entries;
 }
@@ -628,8 +630,8 @@ struct opdl_ring {
                                num_entries, s->head - old_tail);
                num_entries = s->head - old_tail;
        }
-       __atomic_store_n(&s->shared.tail, num_entries + old_tail,
-                       __ATOMIC_RELEASE);
+       atomic_store_explicit(&s->shared.tail, num_entries + old_tail,
+                       memory_order_release);
 }
 
 uint32_t
@@ -658,7 +660,8 @@ struct opdl_ring {
        copy_entries_in(t, head, entries, num_entries);
 
        s->head += num_entries;
-       __atomic_store_n(&s->shared.tail, s->head, __ATOMIC_RELEASE);
+       atomic_store_explicit(&s->shared.tail, s->head,
+               memory_order_release);
 
        return num_entries;
 
@@ -677,7 +680,8 @@ struct opdl_ring {
        copy_entries_out(t, head, entries, num_entries);
 
        s->head += num_entries;
-       __atomic_store_n(&s->shared.tail, s->head, __ATOMIC_RELEASE);
+       atomic_store_explicit(&s->shared.tail, s->head,
+               memory_order_release);
 
        return num_entries;
 }
@@ -756,7 +760,8 @@ struct opdl_ring {
                return 0;
        }
        if (s->threadsafe == false) {
-               __atomic_store_n(&s->shared.tail, s->head, __ATOMIC_RELEASE);
+               atomic_store_explicit(&s->shared.tail, s->head,
+                       memory_order_release);
                s->seq += s->num_claimed;
                s->shadow_head = s->head;
                s->num_claimed = 0;
diff --git a/drivers/net/mlx5/mlx5_hws_cnt.h b/drivers/net/mlx5/mlx5_hws_cnt.h
index f462665..99977a6 100644
--- a/drivers/net/mlx5/mlx5_hws_cnt.h
+++ b/drivers/net/mlx5/mlx5_hws_cnt.h
@@ -386,7 +386,7 @@ struct mlx5_hws_age_param {
 
        MLX5_ASSERT(r->prod.sync_type == RTE_RING_SYNC_ST);
        MLX5_ASSERT(r->cons.sync_type == RTE_RING_SYNC_ST);
-       current_head = __atomic_load_n(&r->prod.head, __ATOMIC_RELAXED);
+       current_head = atomic_load_explicit(&r->prod.head, 
memory_order_relaxed);
        MLX5_ASSERT(n <= r->capacity);
        MLX5_ASSERT(n <= rte_ring_count(r));
        revert2head = current_head - n;
@@ -394,7 +394,7 @@ struct mlx5_hws_age_param {
        __rte_ring_get_elem_addr(r, revert2head, sizeof(cnt_id_t), n,
                        &zcd->ptr1, &zcd->n1, &zcd->ptr2);
        /* Update tail */
-       __atomic_store_n(&r->prod.tail, revert2head, __ATOMIC_RELEASE);
+       atomic_store_explicit(&r->prod.tail, revert2head, memory_order_release);
        return n;
 }
 
diff --git a/drivers/net/thunderx/nicvf_rxtx.c 
b/drivers/net/thunderx/nicvf_rxtx.c
index defa551..0db9505 100644
--- a/drivers/net/thunderx/nicvf_rxtx.c
+++ b/drivers/net/thunderx/nicvf_rxtx.c
@@ -385,9 +385,10 @@
                ltail++;
        }
 
-       rte_wait_until_equal_32(&rbdr->tail, next_tail, __ATOMIC_RELAXED);
+       rte_wait_until_equal_32(&rbdr->tail, next_tail,
+               memory_order_relaxed);
 
-       __atomic_store_n(&rbdr->tail, ltail, __ATOMIC_RELEASE);
+       atomic_store_explicit(&rbdr->tail, ltail, memory_order_release);
        nicvf_addr_write(door, to_fill);
        return to_fill;
 }
diff --git a/drivers/net/thunderx/nicvf_struct.h 
b/drivers/net/thunderx/nicvf_struct.h
index 13cf8fe..38c72b6 100644
--- a/drivers/net/thunderx/nicvf_struct.h
+++ b/drivers/net/thunderx/nicvf_struct.h
@@ -20,7 +20,7 @@ struct nicvf_rbdr {
        struct rbdr_entry_t *desc;
        nicvf_iova_addr_t phys;
        uint32_t buffsz;
-       uint32_t tail;
+       uint32_t _Atomic tail;
        uint32_t next_tail;
        uint32_t head;
        uint32_t qlen_mask;
diff --git a/lib/bpf/bpf_pkt.c b/lib/bpf/bpf_pkt.c
index ffd2db7..d0d50cd 100644
--- a/lib/bpf/bpf_pkt.c
+++ b/lib/bpf/bpf_pkt.c
@@ -25,7 +25,7 @@
 
 struct bpf_eth_cbi {
        /* used by both data & control path */
-       uint32_t use;    /*usage counter */
+       uint32_t _Atomic use;    /*usage counter */
        const struct rte_eth_rxtx_callback *cb;  /* callback handle */
        struct rte_bpf *bpf;
        struct rte_bpf_jit jit;
@@ -110,8 +110,8 @@ struct bpf_eth_cbh {
 
        /* in use, busy wait till current RX/TX iteration is finished */
        if ((puse & BPF_ETH_CBI_INUSE) != 0) {
-               RTE_WAIT_UNTIL_MASKED((uint32_t *)(uintptr_t)&cbi->use,
-                       UINT32_MAX, !=, puse, __ATOMIC_RELAXED);
+               RTE_WAIT_UNTIL_MASKED(&cbi->use,
+                       UINT32_MAX, !=, puse, memory_order_relaxed);
        }
 }
 
diff --git a/lib/distributor/distributor_private.h 
b/lib/distributor/distributor_private.h
index 7101f63..3b43d3d 100644
--- a/lib/distributor/distributor_private.h
+++ b/lib/distributor/distributor_private.h
@@ -52,7 +52,7 @@
  * Only 64-bits of the memory is actually used though.
  */
 union rte_distributor_buffer_single {
-       volatile int64_t bufptr64;
+       int64_t _Atomic bufptr64;
        char pad[RTE_CACHE_LINE_SIZE*3];
 } __rte_cache_aligned;
 
diff --git a/lib/distributor/rte_distributor_single.c 
b/lib/distributor/rte_distributor_single.c
index 2c77ac4..7a9a3d9 100644
--- a/lib/distributor/rte_distributor_single.c
+++ b/lib/distributor/rte_distributor_single.c
@@ -32,10 +32,10 @@
        int64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
                        | RTE_DISTRIB_GET_BUF;
        RTE_WAIT_UNTIL_MASKED(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK,
-               ==, 0, __ATOMIC_RELAXED);
+               ==, 0, memory_order_relaxed);
 
        /* Sync with distributor on GET_BUF flag. */
-       __atomic_store_n(&(buf->bufptr64), req, __ATOMIC_RELEASE);
+       atomic_store_explicit(&buf->bufptr64, req, memory_order_release);
 }
 
 struct rte_mbuf *
@@ -44,7 +44,7 @@ struct rte_mbuf *
 {
        union rte_distributor_buffer_single *buf = &d->bufs[worker_id];
        /* Sync with distributor. Acquire bufptr64. */
-       if (__atomic_load_n(&buf->bufptr64, __ATOMIC_ACQUIRE)
+       if (atomic_load_explicit(&buf->bufptr64, memory_order_acquire)
                & RTE_DISTRIB_GET_BUF)
                return NULL;
 
@@ -72,10 +72,10 @@ struct rte_mbuf *
        uint64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
                        | RTE_DISTRIB_RETURN_BUF;
        RTE_WAIT_UNTIL_MASKED(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK,
-               ==, 0, __ATOMIC_RELAXED);
+               ==, 0, memory_order_relaxed);
 
        /* Sync with distributor on RETURN_BUF flag. */
-       __atomic_store_n(&(buf->bufptr64), req, __ATOMIC_RELEASE);
+       atomic_store_explicit(&buf->bufptr64, req, memory_order_release);
        return 0;
 }
 
@@ -119,7 +119,7 @@ struct rte_mbuf *
        d->in_flight_tags[wkr] = 0;
        d->in_flight_bitmask &= ~(1UL << wkr);
        /* Sync with worker. Release bufptr64. */
-       __atomic_store_n(&(d->bufs[wkr].bufptr64), 0, __ATOMIC_RELEASE);
+       atomic_store_explicit(&d->bufs[wkr].bufptr64, 0, memory_order_release);
        if (unlikely(d->backlog[wkr].count != 0)) {
                /* On return of a packet, we need to move the
                 * queued packets for this core elsewhere.
@@ -165,21 +165,21 @@ struct rte_mbuf *
        for (wkr = 0; wkr < d->num_workers; wkr++) {
                uintptr_t oldbuf = 0;
                /* Sync with worker. Acquire bufptr64. */
-               const int64_t data = __atomic_load_n(&(d->bufs[wkr].bufptr64),
-                                                       __ATOMIC_ACQUIRE);
+               const int64_t data = 
atomic_load_explicit(&d->bufs[wkr].bufptr64,
+                                                       memory_order_acquire);
 
                if (data & RTE_DISTRIB_GET_BUF) {
                        flushed++;
                        if (d->backlog[wkr].count)
                                /* Sync with worker. Release bufptr64. */
-                               __atomic_store_n(&(d->bufs[wkr].bufptr64),
+                               atomic_store_explicit(&d->bufs[wkr].bufptr64,
                                        backlog_pop(&d->backlog[wkr]),
-                                       __ATOMIC_RELEASE);
+                                       memory_order_release);
                        else {
                                /* Sync with worker on GET_BUF flag. */
-                               __atomic_store_n(&(d->bufs[wkr].bufptr64),
+                               atomic_store_explicit(&d->bufs[wkr].bufptr64,
                                        RTE_DISTRIB_GET_BUF,
-                                       __ATOMIC_RELEASE);
+                                       memory_order_release);
                                d->in_flight_tags[wkr] = 0;
                                d->in_flight_bitmask &= ~(1UL << wkr);
                        }
@@ -217,8 +217,8 @@ struct rte_mbuf *
        while (next_idx < num_mbufs || next_mb != NULL) {
                uintptr_t oldbuf = 0;
                /* Sync with worker. Acquire bufptr64. */
-               int64_t data = __atomic_load_n(&(d->bufs[wkr].bufptr64),
-                                               __ATOMIC_ACQUIRE);
+               int64_t data = atomic_load_explicit(&d->bufs[wkr].bufptr64,
+                                               memory_order_acquire);
 
                if (!next_mb) {
                        next_mb = mbufs[next_idx++];
@@ -264,15 +264,15 @@ struct rte_mbuf *
 
                        if (d->backlog[wkr].count)
                                /* Sync with worker. Release bufptr64. */
-                               __atomic_store_n(&(d->bufs[wkr].bufptr64),
+                               atomic_store_explicit(&d->bufs[wkr].bufptr64,
                                                backlog_pop(&d->backlog[wkr]),
-                                               __ATOMIC_RELEASE);
+                                               memory_order_release);
 
                        else {
                                /* Sync with worker. Release bufptr64.  */
-                               __atomic_store_n(&(d->bufs[wkr].bufptr64),
+                               atomic_store_explicit(&d->bufs[wkr].bufptr64,
                                                next_value,
-                                               __ATOMIC_RELEASE);
+                                               memory_order_release);
                                d->in_flight_tags[wkr] = new_tag;
                                d->in_flight_bitmask |= (1UL << wkr);
                                next_mb = NULL;
@@ -294,8 +294,8 @@ struct rte_mbuf *
        for (wkr = 0; wkr < d->num_workers; wkr++)
                if (d->backlog[wkr].count &&
                                /* Sync with worker. Acquire bufptr64. */
-                               (__atomic_load_n(&(d->bufs[wkr].bufptr64),
-                               __ATOMIC_ACQUIRE) & RTE_DISTRIB_GET_BUF)) {
+                               (atomic_load_explicit(&d->bufs[wkr].bufptr64,
+                               memory_order_acquire) & RTE_DISTRIB_GET_BUF)) {
 
                        int64_t oldbuf = d->bufs[wkr].bufptr64 >>
                                        RTE_DISTRIB_FLAG_BITS;
@@ -303,9 +303,9 @@ struct rte_mbuf *
                        store_return(oldbuf, d, &ret_start, &ret_count);
 
                        /* Sync with worker. Release bufptr64. */
-                       __atomic_store_n(&(d->bufs[wkr].bufptr64),
+                       atomic_store_explicit(&d->bufs[wkr].bufptr64,
                                backlog_pop(&d->backlog[wkr]),
-                               __ATOMIC_RELEASE);
+                               memory_order_release);
                }
 
        d->returns.start = ret_start;
diff --git a/lib/eal/arm/include/rte_pause_64.h 
b/lib/eal/arm/include/rte_pause_64.h
index 5f70e97..96ad050 100644
--- a/lib/eal/arm/include/rte_pause_64.h
+++ b/lib/eal/arm/include/rte_pause_64.h
@@ -148,13 +148,13 @@ static inline void rte_pause(void)
 }
 
 static __rte_always_inline void
-rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
-               int memorder)
+rte_wait_until_equal_16(volatile uint16_t _Atomic *addr, uint16_t expected,
+               memory_order memorder)
 {
        uint16_t value;
 
-       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
-               memorder != __ATOMIC_RELAXED);
+       RTE_BUILD_BUG_ON(memorder != memory_order_acquire &&
+               memorder != memory_order_relaxed);
 
        __RTE_ARM_LOAD_EXC_16(addr, value, memorder)
        if (value != expected) {
@@ -167,13 +167,13 @@ static inline void rte_pause(void)
 }
 
 static __rte_always_inline void
-rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
-               int memorder)
+rte_wait_until_equal_32(volatile uint32_t _Atomic *addr, uint32_t expected,
+               memory_order memorder)
 {
        uint32_t value;
 
-       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
-               memorder != __ATOMIC_RELAXED);
+       RTE_BUILD_BUG_ON(memorder != memory_order_acquire &&
+               memorder != memory_order_relaxed);
 
        __RTE_ARM_LOAD_EXC_32(addr, value, memorder)
        if (value != expected) {
@@ -186,13 +186,13 @@ static inline void rte_pause(void)
 }
 
 static __rte_always_inline void
-rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
-               int memorder)
+rte_wait_until_equal_64(volatile uint64_t _Atomic *addr, uint64_t expected,
+               memory_order memorder)
 {
        uint64_t value;
 
-       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
-               memorder != __ATOMIC_RELAXED);
+       RTE_BUILD_BUG_ON(memorder != memory_order_acquire &&
+               memorder != memory_order_relaxed);
 
        __RTE_ARM_LOAD_EXC_64(addr, value, memorder)
        if (value != expected) {
@@ -206,8 +206,8 @@ static inline void rte_pause(void)
 
 #define RTE_WAIT_UNTIL_MASKED(addr, mask, cond, expected, memorder) do {  \
        RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));                \
-       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&                  \
-               memorder != __ATOMIC_RELAXED);                            \
+       RTE_BUILD_BUG_ON(memorder != memory_order_acquire &&              \
+               memorder != memory_order_relaxed);                        \
        const uint32_t size = sizeof(*(addr)) << 3;                       \
        typeof(*(addr)) expected_value = (expected);                      \
        typeof(*(addr)) value;                                            \
diff --git a/lib/eal/common/eal_memcfg.h b/lib/eal/common/eal_memcfg.h
index 8889ba0..0decc29 100644
--- a/lib/eal/common/eal_memcfg.h
+++ b/lib/eal/common/eal_memcfg.h
@@ -18,7 +18,7 @@
  * Memory configuration shared across multiple processes.
  */
 struct rte_mem_config {
-       volatile uint32_t magic;   /**< Magic number - sanity check. */
+       uint32_t _Atomic magic;   /**< Magic number - sanity check. */
        uint32_t version;
        /**< Prevent secondary processes using different DPDK versions. */
 
diff --git a/lib/eal/include/generic/rte_pause.h 
b/lib/eal/include/generic/rte_pause.h
index ec1f418..04c0ac6 100644
--- a/lib/eal/include/generic/rte_pause.h
+++ b/lib/eal/include/generic/rte_pause.h
@@ -12,6 +12,7 @@
  * CPU pause operation.
  */
 
+#include <stdatomic.h>
 #include <stdint.h>
 #include <assert.h>
 #include <rte_common.h>
@@ -35,13 +36,13 @@
  *  A 16-bit expected value to be in the memory location.
  * @param memorder
  *  Two different memory orders that can be specified:
- *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  memory_order_acquire and memory_order_relaxed. These map to
  *  C++11 memory orders with the same names, see the C++11 standard or
  *  the GCC wiki on atomic synchronization for detailed definition.
  */
 static __rte_always_inline void
-rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
-               int memorder);
+rte_wait_until_equal_16(volatile uint16_t _Atomic *addr, uint16_t expected,
+               memory_order memorder);
 
 /**
  * Wait for *addr to be updated with a 32-bit expected value, with a relaxed
@@ -53,13 +54,13 @@
  *  A 32-bit expected value to be in the memory location.
  * @param memorder
  *  Two different memory orders that can be specified:
- *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  memory_order_acquire and memory_order_relaxed. These map to
  *  C++11 memory orders with the same names, see the C++11 standard or
  *  the GCC wiki on atomic synchronization for detailed definition.
  */
 static __rte_always_inline void
-rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
-               int memorder);
+rte_wait_until_equal_32(volatile uint32_t _Atomic *addr, uint32_t expected,
+               memory_order memorder);
 
 /**
  * Wait for *addr to be updated with a 64-bit expected value, with a relaxed
@@ -71,42 +72,42 @@
  *  A 64-bit expected value to be in the memory location.
  * @param memorder
  *  Two different memory orders that can be specified:
- *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  memory_order_acquire and memory_order_relaxed. These map to
  *  C++11 memory orders with the same names, see the C++11 standard or
  *  the GCC wiki on atomic synchronization for detailed definition.
  */
 static __rte_always_inline void
-rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
-               int memorder);
+rte_wait_until_equal_64(volatile uint64_t _Atomic *addr, uint64_t expected,
+               memory_order memorder);
 
 #ifndef RTE_WAIT_UNTIL_EQUAL_ARCH_DEFINED
 static __rte_always_inline void
-rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
-               int memorder)
+rte_wait_until_equal_16(volatile uint16_t _Atomic *addr, uint16_t expected,
+               memory_order memorder)
 {
-       assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+       assert(memorder == memory_order_acquire || memorder == 
memory_order_relaxed);
 
-       while (__atomic_load_n(addr, memorder) != expected)
+       while (atomic_load_explicit(addr, memorder) != expected)
                rte_pause();
 }
 
 static __rte_always_inline void
-rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
-               int memorder)
+rte_wait_until_equal_32(volatile uint32_t _Atomic *addr, uint32_t expected,
+               memory_order memorder)
 {
-       assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+       assert(memorder == memory_order_acquire || memorder == 
memory_order_relaxed);
 
-       while (__atomic_load_n(addr, memorder) != expected)
+       while (atomic_load_explicit(addr, memorder) != expected)
                rte_pause();
 }
 
 static __rte_always_inline void
-rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
-               int memorder)
+rte_wait_until_equal_64(volatile uint64_t _Atomic *addr, uint64_t expected,
+               memory_order memorder)
 {
-       assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+       assert(memorder == memory_order_acquire || memorder == 
memory_order_relaxed);
 
-       while (__atomic_load_n(addr, memorder) != expected)
+       while (atomic_load_explicit(addr, memorder) != expected)
                rte_pause();
 }
 
@@ -124,16 +125,16 @@
  *  An expected value to be in the memory location.
  * @param memorder
  *  Two different memory orders that can be specified:
- *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  memory_order_acquire and memory_order_relaxed. These map to
  *  C++11 memory orders with the same names, see the C++11 standard or
  *  the GCC wiki on atomic synchronization for detailed definition.
  */
 #define RTE_WAIT_UNTIL_MASKED(addr, mask, cond, expected, memorder) do { \
        RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));               \
-       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&                 \
-               memorder != __ATOMIC_RELAXED);                           \
+       RTE_BUILD_BUG_ON(memorder != memory_order_acquire &&             \
+               memorder != memory_order_relaxed);                       \
        typeof(*(addr)) expected_value = (expected);                     \
-       while (!((__atomic_load_n((addr), (memorder)) & (mask)) cond     \
+       while (!((atomic_load_explicit((addr), (memorder)) & (mask)) cond \
                        expected_value))                                 \
                rte_pause();                                             \
 } while (0)
diff --git a/lib/eal/include/rte_mcslock.h b/lib/eal/include/rte_mcslock.h
index a805cb2..da25be4 100644
--- a/lib/eal/include/rte_mcslock.h
+++ b/lib/eal/include/rte_mcslock.h
@@ -33,7 +33,7 @@
  */
 typedef struct rte_mcslock {
        struct rte_mcslock *next;
-       int locked; /* 1 if the queue locked, 0 otherwise */
+       uint32_t _Atomic locked; /* 1 if the queue locked, 0 otherwise */
 } rte_mcslock_t;
 
 /**
@@ -53,7 +53,7 @@
        rte_mcslock_t *prev;
 
        /* Init me node */
-       __atomic_store_n(&me->locked, 1, __ATOMIC_RELAXED);
+       atomic_store_explicit(&me->locked, 1, memory_order_relaxed);
        __atomic_store_n(&me->next, NULL, __ATOMIC_RELAXED);
 
        /* If the queue is empty, the exchange operation is enough to acquire
@@ -88,7 +88,7 @@
         * to spin on me->locked until the previous lock holder resets
         * the me->locked using mcslock_unlock().
         */
-       rte_wait_until_equal_32((uint32_t *)&me->locked, 0, __ATOMIC_ACQUIRE);
+       rte_wait_until_equal_32(&me->locked, 0, memory_order_acquire);
 }
 
 /**
@@ -120,14 +120,14 @@
                /* More nodes added to the queue by other CPUs.
                 * Wait until the next pointer is set.
                 */
-               uintptr_t *next;
-               next = (uintptr_t *)&me->next;
+               uintptr_t _Atomic *next;
+               next = (uintptr_t _Atomic *)&me->next;
                RTE_WAIT_UNTIL_MASKED(next, UINTPTR_MAX, !=, 0,
-                       __ATOMIC_RELAXED);
+                       memory_order_relaxed);
        }
 
        /* Pass lock to next waiter. */
-       __atomic_store_n(&me->next->locked, 0, __ATOMIC_RELEASE);
+       atomic_store_explicit(&me->next->locked, 0, memory_order_release);
 }
 
 /**
diff --git a/lib/eal/include/rte_pflock.h b/lib/eal/include/rte_pflock.h
index a3f7291..1c32512 100644
--- a/lib/eal/include/rte_pflock.h
+++ b/lib/eal/include/rte_pflock.h
@@ -40,8 +40,8 @@
  */
 struct rte_pflock {
        struct {
-               uint16_t in;
-               uint16_t out;
+               uint16_t _Atomic in;
+               uint16_t _Atomic out;
        } rd, wr;
 };
 typedef struct rte_pflock rte_pflock_t;
@@ -116,14 +116,14 @@ struct rte_pflock {
         * If no writer is present, then the operation has completed
         * successfully.
         */
-       w = __atomic_fetch_add(&pf->rd.in, RTE_PFLOCK_RINC, __ATOMIC_ACQUIRE)
+       w = atomic_fetch_add_explicit(&pf->rd.in, RTE_PFLOCK_RINC, 
memory_order_acquire)
                & RTE_PFLOCK_WBITS;
        if (w == 0)
                return;
 
        /* Wait for current write phase to complete. */
        RTE_WAIT_UNTIL_MASKED(&pf->rd.in, RTE_PFLOCK_WBITS, !=, w,
-               __ATOMIC_ACQUIRE);
+               memory_order_acquire);
 }
 
 /**
@@ -139,7 +139,7 @@ struct rte_pflock {
 static inline void
 rte_pflock_read_unlock(rte_pflock_t *pf)
 {
-       __atomic_fetch_add(&pf->rd.out, RTE_PFLOCK_RINC, __ATOMIC_RELEASE);
+       atomic_fetch_add_explicit(&pf->rd.out, RTE_PFLOCK_RINC, 
memory_order_release);
 }
 
 /**
@@ -160,8 +160,8 @@ struct rte_pflock {
        /* Acquire ownership of write-phase.
         * This is same as rte_ticketlock_lock().
         */
-       ticket = __atomic_fetch_add(&pf->wr.in, 1, __ATOMIC_RELAXED);
-       rte_wait_until_equal_16(&pf->wr.out, ticket, __ATOMIC_ACQUIRE);
+       ticket = atomic_fetch_add_explicit(&pf->wr.in, 1, memory_order_relaxed);
+       rte_wait_until_equal_16(&pf->wr.out, ticket, memory_order_acquire);
 
        /*
         * Acquire ticket on read-side in order to allow them
@@ -172,10 +172,10 @@ struct rte_pflock {
         * speculatively.
         */
        w = RTE_PFLOCK_PRES | (ticket & RTE_PFLOCK_PHID);
-       ticket = __atomic_fetch_add(&pf->rd.in, w, __ATOMIC_RELAXED);
+       ticket = atomic_fetch_add_explicit(&pf->rd.in, w, memory_order_relaxed);
 
        /* Wait for any pending readers to flush. */
-       rte_wait_until_equal_16(&pf->rd.out, ticket, __ATOMIC_ACQUIRE);
+       rte_wait_until_equal_16(&pf->rd.out, ticket, memory_order_acquire);
 }
 
 /**
@@ -192,10 +192,10 @@ struct rte_pflock {
 rte_pflock_write_unlock(rte_pflock_t *pf)
 {
        /* Migrate from write phase to read phase. */
-       __atomic_fetch_and(&pf->rd.in, RTE_PFLOCK_LSB, __ATOMIC_RELEASE);
+       atomic_fetch_and_explicit(&pf->rd.in, RTE_PFLOCK_LSB, 
memory_order_release);
 
        /* Allow other writers to continue. */
-       __atomic_fetch_add(&pf->wr.out, 1, __ATOMIC_RELEASE);
+       atomic_fetch_add_explicit(&pf->wr.out, 1, memory_order_release);
 }
 
 #ifdef __cplusplus
diff --git a/lib/eal/include/rte_ticketlock.h b/lib/eal/include/rte_ticketlock.h
index 5db0d8a..5206b62 100644
--- a/lib/eal/include/rte_ticketlock.h
+++ b/lib/eal/include/rte_ticketlock.h
@@ -31,7 +31,7 @@
 typedef union {
        uint32_t tickets;
        struct {
-               uint16_t current;
+               uint16_t _Atomic current;
                uint16_t next;
        } s;
 } rte_ticketlock_t;
@@ -63,7 +63,7 @@
 rte_ticketlock_lock(rte_ticketlock_t *tl)
 {
        uint16_t me = __atomic_fetch_add(&tl->s.next, 1, __ATOMIC_RELAXED);
-       rte_wait_until_equal_16(&tl->s.current, me, __ATOMIC_ACQUIRE);
+       rte_wait_until_equal_16(&tl->s.current, me, memory_order_acquire);
 }
 
 /**
@@ -75,8 +75,8 @@
 static inline void
 rte_ticketlock_unlock(rte_ticketlock_t *tl)
 {
-       uint16_t i = __atomic_load_n(&tl->s.current, __ATOMIC_RELAXED);
-       __atomic_store_n(&tl->s.current, i + 1, __ATOMIC_RELEASE);
+       uint16_t i = atomic_load_explicit(&tl->s.current, memory_order_relaxed);
+       atomic_store_explicit(&tl->s.current, i + 1, memory_order_release);
 }
 
 /**
diff --git a/lib/ring/rte_ring_c11_pvt.h b/lib/ring/rte_ring_c11_pvt.h
index f895950..887054e 100644
--- a/lib/ring/rte_ring_c11_pvt.h
+++ b/lib/ring/rte_ring_c11_pvt.h
@@ -22,9 +22,9 @@
         * we need to wait for them to complete
         */
        if (!single)
-               rte_wait_until_equal_32(&ht->tail, old_val, __ATOMIC_RELAXED);
+               rte_wait_until_equal_32(&ht->tail, old_val, 
memory_order_relaxed);
 
-       __atomic_store_n(&ht->tail, new_val, __ATOMIC_RELEASE);
+       atomic_store_explicit(&ht->tail, new_val, memory_order_release);
 }
 
 /**
@@ -61,7 +61,7 @@
        unsigned int max = n;
        int success;
 
-       *old_head = __atomic_load_n(&r->prod.head, __ATOMIC_RELAXED);
+       *old_head = atomic_load_explicit(&r->prod.head, memory_order_relaxed);
        do {
                /* Reset n to the initial burst count */
                n = max;
@@ -72,8 +72,8 @@
                /* load-acquire synchronize with store-release of ht->tail
                 * in update_tail.
                 */
-               cons_tail = __atomic_load_n(&r->cons.tail,
-                                       __ATOMIC_ACQUIRE);
+               cons_tail = atomic_load_explicit(&r->cons.tail,
+                                       memory_order_acquire);
 
                /* The subtraction is done between two unsigned 32bits value
                 * (the result is always modulo 32 bits even if we have
@@ -91,14 +91,15 @@
                        return 0;
 
                *new_head = *old_head + n;
-               if (is_sp)
-                       r->prod.head = *new_head, success = 1;
-               else
+               if (is_sp) {
+                       r->prod.head = *new_head;
+                       success = 1;
+               } else
                        /* on failure, *old_head is updated */
-                       success = __atomic_compare_exchange_n(&r->prod.head,
+                       success = 
atomic_compare_exchange_strong_explicit(&r->prod.head,
                                        old_head, *new_head,
-                                       0, __ATOMIC_RELAXED,
-                                       __ATOMIC_RELAXED);
+                                       memory_order_relaxed,
+                                       memory_order_relaxed);
        } while (unlikely(success == 0));
        return n;
 }
@@ -137,7 +138,7 @@
        int success;
 
        /* move cons.head atomically */
-       *old_head = __atomic_load_n(&r->cons.head, __ATOMIC_RELAXED);
+       *old_head = atomic_load_explicit(&r->cons.head, memory_order_relaxed);
        do {
                /* Restore n as it may change every loop */
                n = max;
@@ -148,8 +149,8 @@
                /* this load-acquire synchronize with store-release of ht->tail
                 * in update_tail.
                 */
-               prod_tail = __atomic_load_n(&r->prod.tail,
-                                       __ATOMIC_ACQUIRE);
+               prod_tail = atomic_load_explicit(&r->prod.tail,
+                                       memory_order_acquire);
 
                /* The subtraction is done between two unsigned 32bits value
                 * (the result is always modulo 32 bits even if we have
@@ -166,14 +167,15 @@
                        return 0;
 
                *new_head = *old_head + n;
-               if (is_sc)
-                       r->cons.head = *new_head, success = 1;
-               else
+               if (is_sc) {
+                       r->cons.head = *new_head;
+                       success = 1;
+               } else
                        /* on failure, *old_head will be updated */
-                       success = __atomic_compare_exchange_n(&r->cons.head,
+                       success = 
atomic_compare_exchange_strong_explicit(&r->cons.head,
                                                        old_head, *new_head,
-                                                       0, __ATOMIC_RELAXED,
-                                                       __ATOMIC_RELAXED);
+                                                       memory_order_relaxed,
+                                                       memory_order_relaxed);
        } while (unlikely(success == 0));
        return n;
 }
diff --git a/lib/ring/rte_ring_core.h b/lib/ring/rte_ring_core.h
index 82b2370..cf3cb84 100644
--- a/lib/ring/rte_ring_core.h
+++ b/lib/ring/rte_ring_core.h
@@ -66,8 +66,8 @@ enum rte_ring_sync_type {
  * but offset for *sync_type* and *tail* values should remain the same.
  */
 struct rte_ring_headtail {
-       volatile uint32_t head;      /**< prod/consumer head. */
-       volatile uint32_t tail;      /**< prod/consumer tail. */
+       uint32_t _Atomic head;      /**< prod/consumer head. */
+       uint32_t _Atomic tail;      /**< prod/consumer tail. */
        RTE_STD_C11
        union {
                /** sync type of prod/cons */
diff --git a/lib/ring/rte_ring_generic_pvt.h b/lib/ring/rte_ring_generic_pvt.h
index 5acb6e5..12a3ca8 100644
--- a/lib/ring/rte_ring_generic_pvt.h
+++ b/lib/ring/rte_ring_generic_pvt.h
@@ -89,11 +89,14 @@
                        return 0;
 
                *new_head = *old_head + n;
-               if (is_sp)
-                       r->prod.head = *new_head, success = 1;
-               else
-                       success = rte_atomic32_cmpset(&r->prod.head,
-                                       *old_head, *new_head);
+               if (is_sp) {
+                       r->prod.head = *new_head;
+                       success = 1;
+               } else
+                       /* NOTE: review for potential ordering optimization */
+                       success = 
atomic_compare_exchange_strong_explicit(&r->prod.head,
+                                       old_head, *new_head,
+                                       memory_order_seq_cst, 
memory_order_seq_cst);
        } while (unlikely(success == 0));
        return n;
 }
@@ -162,8 +165,10 @@
                        rte_smp_rmb();
                        success = 1;
                } else {
-                       success = rte_atomic32_cmpset(&r->cons.head, *old_head,
-                                       *new_head);
+                       /* NOTE: review for potential ordering optimization */
+                       success = 
atomic_compare_exchange_strong_explicit(&r->cons.head,
+                                       old_head, *new_head,
+                                       memory_order_seq_cst, 
memory_order_seq_cst);
                }
        } while (unlikely(success == 0));
        return n;
diff --git a/lib/ring/rte_ring_peek_elem_pvt.h 
b/lib/ring/rte_ring_peek_elem_pvt.h
index bb0a7d5..6707e38 100644
--- a/lib/ring/rte_ring_peek_elem_pvt.h
+++ b/lib/ring/rte_ring_peek_elem_pvt.h
@@ -59,7 +59,7 @@
 
        pos = tail + num;
        ht->head = pos;
-       __atomic_store_n(&ht->tail, pos, __ATOMIC_RELEASE);
+       atomic_store_explicit(&ht->tail, pos, memory_order_release);
 }
 
 /**
-- 
1.8.3.1

Reply via email to