<snip>

> Subject: [PATCH v1 11/12] app/bbdev: use compiler atomics for thread sync
There are multiple fields which are getting synchronized. How about the 
following:
app/bbdev: use compiler atomics for synchronizing shared data

> 
> Convert rte_atomic usages to compiler atomic built-ins for thread params
> sync in bbdev cases.
> 
> Signed-off-by: Joyce Kong <joyce.k...@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.w...@arm.com>
Otherwise, looks good
Reviewed-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com>

> ---
>  app/test-bbdev/test_bbdev_perf.c | 135 ++++++++++++++-----------------
>  1 file changed, 59 insertions(+), 76 deletions(-)
> 
> diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-
> bbdev/test_bbdev_perf.c
> index 469597b8b3..dc62e16216 100644
> --- a/app/test-bbdev/test_bbdev_perf.c
> +++ b/app/test-bbdev/test_bbdev_perf.c
> @@ -133,7 +133,7 @@ struct test_op_params {
>       uint16_t num_to_process;
>       uint16_t num_lcores;
>       int vector_mask;
> -     rte_atomic16_t sync;
> +     uint16_t sync;
>       struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
>  };
> 
> @@ -148,9 +148,9 @@ struct thread_params {
>       uint8_t iter_count;
>       double iter_average;
>       double bler;
> -     rte_atomic16_t nb_dequeued;
> -     rte_atomic16_t processing_status;
> -     rte_atomic16_t burst_sz;
> +     uint16_t nb_dequeued;
> +     int16_t processing_status;
> +     uint16_t burst_sz;
>       struct test_op_params *op_params;
>       struct rte_bbdev_dec_op *dec_ops[MAX_BURST];
>       struct rte_bbdev_enc_op *enc_ops[MAX_BURST]; @@ -2594,46
> +2594,46 @@ dequeue_event_callback(uint16_t dev_id,
>       }
> 
>       if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) {
> -             rte_atomic16_set(&tp->processing_status, TEST_FAILED);
> +             __atomic_store_n(&tp->processing_status, TEST_FAILED,
> +__ATOMIC_RELAXED);
>               printf(
>                       "Dequeue interrupt handler called for incorrect
> event!\n");
>               return;
>       }
> 
> -     burst_sz = rte_atomic16_read(&tp->burst_sz);
> +     burst_sz = __atomic_load_n(&tp->burst_sz, __ATOMIC_RELAXED);
>       num_ops = tp->op_params->num_to_process;
> 
>       if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
>               deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
>                               &tp->dec_ops[
> -                                     rte_atomic16_read(&tp-
> >nb_dequeued)],
> +                                     __atomic_load_n(&tp->nb_dequeued,
> __ATOMIC_RELAXED)],
>                               burst_sz);
>       else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
>               deq = rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
>                               &tp->dec_ops[
> -                                     rte_atomic16_read(&tp-
> >nb_dequeued)],
> +                                     __atomic_load_n(&tp->nb_dequeued,
> __ATOMIC_RELAXED)],
>                               burst_sz);
>       else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
>               deq = rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
>                               &tp->enc_ops[
> -                                     rte_atomic16_read(&tp-
> >nb_dequeued)],
> +                                     __atomic_load_n(&tp->nb_dequeued,
> __ATOMIC_RELAXED)],
>                               burst_sz);
>       else /*RTE_BBDEV_OP_TURBO_ENC*/
>               deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
>                               &tp->enc_ops[
> -                                     rte_atomic16_read(&tp-
> >nb_dequeued)],
> +                                     __atomic_load_n(&tp->nb_dequeued,
> __ATOMIC_RELAXED)],
>                               burst_sz);
> 
>       if (deq < burst_sz) {
>               printf(
>                       "After receiving the interrupt all operations should be
> dequeued. Expected: %u, got: %u\n",
>                       burst_sz, deq);
> -             rte_atomic16_set(&tp->processing_status, TEST_FAILED);
> +             __atomic_store_n(&tp->processing_status, TEST_FAILED,
> +__ATOMIC_RELAXED);
>               return;
>       }
> 
> -     if (rte_atomic16_read(&tp->nb_dequeued) + deq < num_ops) {
> -             rte_atomic16_add(&tp->nb_dequeued, deq);
> +     if (__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) + deq
> < num_ops) {
> +             __atomic_fetch_add(&tp->nb_dequeued, deq,
> __ATOMIC_RELAXED);
>               return;
>       }
> 
> @@ -2670,7 +2670,7 @@ dequeue_event_callback(uint16_t dev_id,
> 
>       if (ret) {
>               printf("Buffers validation failed\n");
> -             rte_atomic16_set(&tp->processing_status, TEST_FAILED);
> +             __atomic_store_n(&tp->processing_status, TEST_FAILED,
> +__ATOMIC_RELAXED);
>       }
> 
>       switch (test_vector.op_type) {
> @@ -2691,7 +2691,7 @@ dequeue_event_callback(uint16_t dev_id,
>               break;
>       default:
>               printf("Unknown op type: %d\n", test_vector.op_type);
> -             rte_atomic16_set(&tp->processing_status, TEST_FAILED);
> +             __atomic_store_n(&tp->processing_status, TEST_FAILED,
> +__ATOMIC_RELAXED);
>               return;
>       }
> 
> @@ -2700,7 +2700,7 @@ dequeue_event_callback(uint16_t dev_id,
>       tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) /
>                       ((double)total_time / (double)rte_get_tsc_hz());
> 
> -     rte_atomic16_add(&tp->nb_dequeued, deq);
> +     __atomic_fetch_add(&tp->nb_dequeued, deq, __ATOMIC_RELAXED);
>  }
> 
>  static int
> @@ -2738,11 +2738,10 @@ throughput_intr_lcore_ldpc_dec(void *arg)
> 
>       bufs = &tp->op_params-
> >q_bufs[GET_SOCKET(info.socket_id)][queue_id];
> 
> -     rte_atomic16_clear(&tp->processing_status);
> -     rte_atomic16_clear(&tp->nb_dequeued);
> +     __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
> +     __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
> 
> -     while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
> -             rte_pause();
> +     rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
> +__ATOMIC_RELAXED);
> 
>       ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
>                               num_to_process);
> @@ -2790,17 +2789,15 @@ throughput_intr_lcore_ldpc_dec(void *arg)
>                        * the number of operations is not a multiple of
>                        * burst size.
>                        */
> -                     rte_atomic16_set(&tp->burst_sz, num_to_enq);
> +                     __atomic_store_n(&tp->burst_sz, num_to_enq,
> __ATOMIC_RELAXED);
> 
>                       /* Wait until processing of previous batch is
>                        * completed
>                        */
> -                     while (rte_atomic16_read(&tp->nb_dequeued) !=
> -                                     (int16_t) enqueued)
> -                             rte_pause();
> +                     rte_wait_until_equal_16(&tp->nb_dequeued,
> enqueued,
> +__ATOMIC_RELAXED);
>               }
>               if (j != TEST_REPETITIONS - 1)
> -                     rte_atomic16_clear(&tp->nb_dequeued);
> +                     __atomic_store_n(&tp->nb_dequeued, 0,
> __ATOMIC_RELAXED);
>       }
> 
>       return TEST_SUCCESS;
> @@ -2835,11 +2832,10 @@ throughput_intr_lcore_dec(void *arg)
> 
>       bufs = &tp->op_params-
> >q_bufs[GET_SOCKET(info.socket_id)][queue_id];
> 
> -     rte_atomic16_clear(&tp->processing_status);
> -     rte_atomic16_clear(&tp->nb_dequeued);
> +     __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
> +     __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
> 
> -     while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
> -             rte_pause();
> +     rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
> +__ATOMIC_RELAXED);
> 
>       ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
>                               num_to_process);
> @@ -2880,17 +2876,15 @@ throughput_intr_lcore_dec(void *arg)
>                        * the number of operations is not a multiple of
>                        * burst size.
>                        */
> -                     rte_atomic16_set(&tp->burst_sz, num_to_enq);
> +                     __atomic_store_n(&tp->burst_sz, num_to_enq,
> __ATOMIC_RELAXED);
> 
>                       /* Wait until processing of previous batch is
>                        * completed
>                        */
> -                     while (rte_atomic16_read(&tp->nb_dequeued) !=
> -                                     (int16_t) enqueued)
> -                             rte_pause();
> +                     rte_wait_until_equal_16(&tp->nb_dequeued,
> enqueued,
> +__ATOMIC_RELAXED);
>               }
>               if (j != TEST_REPETITIONS - 1)
> -                     rte_atomic16_clear(&tp->nb_dequeued);
> +                     __atomic_store_n(&tp->nb_dequeued, 0,
> __ATOMIC_RELAXED);
>       }
> 
>       return TEST_SUCCESS;
> @@ -2925,11 +2919,10 @@ throughput_intr_lcore_enc(void *arg)
> 
>       bufs = &tp->op_params-
> >q_bufs[GET_SOCKET(info.socket_id)][queue_id];
> 
> -     rte_atomic16_clear(&tp->processing_status);
> -     rte_atomic16_clear(&tp->nb_dequeued);
> +     __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
> +     __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
> 
> -     while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
> -             rte_pause();
> +     rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
> +__ATOMIC_RELAXED);
> 
>       ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
>                       num_to_process);
> @@ -2969,17 +2962,15 @@ throughput_intr_lcore_enc(void *arg)
>                        * the number of operations is not a multiple of
>                        * burst size.
>                        */
> -                     rte_atomic16_set(&tp->burst_sz, num_to_enq);
> +                     __atomic_store_n(&tp->burst_sz, num_to_enq,
> __ATOMIC_RELAXED);
> 
>                       /* Wait until processing of previous batch is
>                        * completed
>                        */
> -                     while (rte_atomic16_read(&tp->nb_dequeued) !=
> -                                     (int16_t) enqueued)
> -                             rte_pause();
> +                     rte_wait_until_equal_16(&tp->nb_dequeued,
> enqueued,
> +__ATOMIC_RELAXED);
>               }
>               if (j != TEST_REPETITIONS - 1)
> -                     rte_atomic16_clear(&tp->nb_dequeued);
> +                     __atomic_store_n(&tp->nb_dequeued, 0,
> __ATOMIC_RELAXED);
>       }
> 
>       return TEST_SUCCESS;
> @@ -3015,11 +3006,10 @@ throughput_intr_lcore_ldpc_enc(void *arg)
> 
>       bufs = &tp->op_params-
> >q_bufs[GET_SOCKET(info.socket_id)][queue_id];
> 
> -     rte_atomic16_clear(&tp->processing_status);
> -     rte_atomic16_clear(&tp->nb_dequeued);
> +     __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
> +     __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
> 
> -     while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
> -             rte_pause();
> +     rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
> +__ATOMIC_RELAXED);
> 
>       ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
>                       num_to_process);
> @@ -3061,17 +3051,15 @@ throughput_intr_lcore_ldpc_enc(void *arg)
>                        * the number of operations is not a multiple of
>                        * burst size.
>                        */
> -                     rte_atomic16_set(&tp->burst_sz, num_to_enq);
> +                     __atomic_store_n(&tp->burst_sz, num_to_enq,
> __ATOMIC_RELAXED);
> 
>                       /* Wait until processing of previous batch is
>                        * completed
>                        */
> -                     while (rte_atomic16_read(&tp->nb_dequeued) !=
> -                                     (int16_t) enqueued)
> -                             rte_pause();
> +                     rte_wait_until_equal_16(&tp->nb_dequeued,
> enqueued,
> +__ATOMIC_RELAXED);
>               }
>               if (j != TEST_REPETITIONS - 1)
> -                     rte_atomic16_clear(&tp->nb_dequeued);
> +                     __atomic_store_n(&tp->nb_dequeued, 0,
> __ATOMIC_RELAXED);
>       }
> 
>       return TEST_SUCCESS;
> @@ -3105,8 +3093,7 @@ throughput_pmd_lcore_dec(void *arg)
> 
>       bufs = &tp->op_params-
> >q_bufs[GET_SOCKET(info.socket_id)][queue_id];
> 
> -     while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
> -             rte_pause();
> +     rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
> +__ATOMIC_RELAXED);
> 
>       ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq,
> num_ops);
>       TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
> @@ -3209,8 +3196,7 @@ bler_pmd_lcore_ldpc_dec(void *arg)
> 
>       bufs = &tp->op_params-
> >q_bufs[GET_SOCKET(info.socket_id)][queue_id];
> 
> -     while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
> -             rte_pause();
> +     rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
> +__ATOMIC_RELAXED);
> 
>       ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq,
> num_ops);
>       TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
> @@ -3339,8 +3325,7 @@ throughput_pmd_lcore_ldpc_dec(void *arg)
> 
>       bufs = &tp->op_params-
> >q_bufs[GET_SOCKET(info.socket_id)][queue_id];
> 
> -     while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
> -             rte_pause();
> +     rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
> +__ATOMIC_RELAXED);
> 
>       ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq,
> num_ops);
>       TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
> @@ -3456,8 +3441,7 @@ throughput_pmd_lcore_enc(void *arg)
> 
>       bufs = &tp->op_params-
> >q_bufs[GET_SOCKET(info.socket_id)][queue_id];
> 
> -     while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
> -             rte_pause();
> +     rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
> +__ATOMIC_RELAXED);
> 
>       ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
>                       num_ops);
> @@ -3547,8 +3531,7 @@ throughput_pmd_lcore_ldpc_enc(void *arg)
> 
>       bufs = &tp->op_params-
> >q_bufs[GET_SOCKET(info.socket_id)][queue_id];
> 
> -     while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
> -             rte_pause();
> +     rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
> +__ATOMIC_RELAXED);
> 
>       ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
>                       num_ops);
> @@ -3731,7 +3714,7 @@ bler_test(struct active_device *ad,
>       else
>               return TEST_SKIPPED;
> 
> -     rte_atomic16_set(&op_params->sync, SYNC_WAIT);
> +     __atomic_store_n(&op_params->sync, SYNC_WAIT,
> __ATOMIC_RELAXED);
> 
>       /* Main core is set at first entry */
>       t_params[0].dev_id = ad->dev_id;
> @@ -3754,7 +3737,7 @@ bler_test(struct active_device *ad,
>                               &t_params[used_cores++], lcore_id);
>       }
> 
> -     rte_atomic16_set(&op_params->sync, SYNC_START);
> +     __atomic_store_n(&op_params->sync, SYNC_START,
> __ATOMIC_RELAXED);
>       ret = bler_function(&t_params[0]);
> 
>       /* Main core is always used */
> @@ -3849,7 +3832,7 @@ throughput_test(struct active_device *ad,
>                       throughput_function = throughput_pmd_lcore_enc;
>       }
> 
> -     rte_atomic16_set(&op_params->sync, SYNC_WAIT);
> +     __atomic_store_n(&op_params->sync, SYNC_WAIT,
> __ATOMIC_RELAXED);
> 
>       /* Main core is set at first entry */
>       t_params[0].dev_id = ad->dev_id;
> @@ -3872,7 +3855,7 @@ throughput_test(struct active_device *ad,
>                               &t_params[used_cores++], lcore_id);
>       }
> 
> -     rte_atomic16_set(&op_params->sync, SYNC_START);
> +     __atomic_store_n(&op_params->sync, SYNC_START,
> __ATOMIC_RELAXED);
>       ret = throughput_function(&t_params[0]);
> 
>       /* Main core is always used */
> @@ -3902,29 +3885,29 @@ throughput_test(struct active_device *ad,
>        * Wait for main lcore operations.
>        */
>       tp = &t_params[0];
> -     while ((rte_atomic16_read(&tp->nb_dequeued) <
> -                     op_params->num_to_process) &&
> -                     (rte_atomic16_read(&tp->processing_status) !=
> -                     TEST_FAILED))
> +     while ((__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) <
> +             op_params->num_to_process) &&
> +             (__atomic_load_n(&tp->processing_status,
> __ATOMIC_RELAXED) !=
> +             TEST_FAILED))
>               rte_pause();
> 
>       tp->ops_per_sec /= TEST_REPETITIONS;
>       tp->mbps /= TEST_REPETITIONS;
> -     ret |= (int)rte_atomic16_read(&tp->processing_status);
> +     ret |= (int)__atomic_load_n(&tp->processing_status,
> __ATOMIC_RELAXED);
> 
>       /* Wait for worker lcores operations */
>       for (used_cores = 1; used_cores < num_lcores; used_cores++) {
>               tp = &t_params[used_cores];
> 
> -             while ((rte_atomic16_read(&tp->nb_dequeued) <
> -                             op_params->num_to_process) &&
> -                             (rte_atomic16_read(&tp->processing_status)
> !=
> -                             TEST_FAILED))
> +             while ((__atomic_load_n(&tp->nb_dequeued,
> __ATOMIC_RELAXED) <
> +                     op_params->num_to_process) &&
> +                     (__atomic_load_n(&tp->processing_status,
> __ATOMIC_RELAXED) !=
> +                     TEST_FAILED))
>                       rte_pause();
> 
>               tp->ops_per_sec /= TEST_REPETITIONS;
>               tp->mbps /= TEST_REPETITIONS;
> -             ret |= (int)rte_atomic16_read(&tp->processing_status);
> +             ret |= (int)__atomic_load_n(&tp->processing_status,
> +__ATOMIC_RELAXED);
>       }
> 
>       /* Print throughput if test passed */
> --
> 2.17.1

Reply via email to