<snip> > Subject: [PATCH v1 11/12] app/bbdev: use compiler atomics for thread sync There are multiple fields which are getting synchronized. How about the following: app/bbdev: use compiler atomics for synchronizing shared data
> > Convert rte_atomic usages to compiler atomic built-ins for thread params > sync in bbdev cases. > > Signed-off-by: Joyce Kong <joyce.k...@arm.com> > Reviewed-by: Ruifeng Wang <ruifeng.w...@arm.com> Otherwise, looks good Reviewed-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com> > --- > app/test-bbdev/test_bbdev_perf.c | 135 ++++++++++++++----------------- > 1 file changed, 59 insertions(+), 76 deletions(-) > > diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test- > bbdev/test_bbdev_perf.c > index 469597b8b3..dc62e16216 100644 > --- a/app/test-bbdev/test_bbdev_perf.c > +++ b/app/test-bbdev/test_bbdev_perf.c > @@ -133,7 +133,7 @@ struct test_op_params { > uint16_t num_to_process; > uint16_t num_lcores; > int vector_mask; > - rte_atomic16_t sync; > + uint16_t sync; > struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES]; > }; > > @@ -148,9 +148,9 @@ struct thread_params { > uint8_t iter_count; > double iter_average; > double bler; > - rte_atomic16_t nb_dequeued; > - rte_atomic16_t processing_status; > - rte_atomic16_t burst_sz; > + uint16_t nb_dequeued; > + int16_t processing_status; > + uint16_t burst_sz; > struct test_op_params *op_params; > struct rte_bbdev_dec_op *dec_ops[MAX_BURST]; > struct rte_bbdev_enc_op *enc_ops[MAX_BURST]; @@ -2594,46 > +2594,46 @@ dequeue_event_callback(uint16_t dev_id, > } > > if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) { > - rte_atomic16_set(&tp->processing_status, TEST_FAILED); > + __atomic_store_n(&tp->processing_status, TEST_FAILED, > +__ATOMIC_RELAXED); > printf( > "Dequeue interrupt handler called for incorrect > event!\n"); > return; > } > > - burst_sz = rte_atomic16_read(&tp->burst_sz); > + burst_sz = __atomic_load_n(&tp->burst_sz, __ATOMIC_RELAXED); > num_ops = tp->op_params->num_to_process; > > if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) > deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id, > &tp->dec_ops[ > - rte_atomic16_read(&tp- > >nb_dequeued)], > + __atomic_load_n(&tp->nb_dequeued, > __ATOMIC_RELAXED)], > burst_sz); > else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) > deq = rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, > &tp->dec_ops[ > - rte_atomic16_read(&tp- > >nb_dequeued)], > + __atomic_load_n(&tp->nb_dequeued, > __ATOMIC_RELAXED)], > burst_sz); > else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) > deq = rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, > &tp->enc_ops[ > - rte_atomic16_read(&tp- > >nb_dequeued)], > + __atomic_load_n(&tp->nb_dequeued, > __ATOMIC_RELAXED)], > burst_sz); > else /*RTE_BBDEV_OP_TURBO_ENC*/ > deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id, > &tp->enc_ops[ > - rte_atomic16_read(&tp- > >nb_dequeued)], > + __atomic_load_n(&tp->nb_dequeued, > __ATOMIC_RELAXED)], > burst_sz); > > if (deq < burst_sz) { > printf( > "After receiving the interrupt all operations should be > dequeued. Expected: %u, got: %u\n", > burst_sz, deq); > - rte_atomic16_set(&tp->processing_status, TEST_FAILED); > + __atomic_store_n(&tp->processing_status, TEST_FAILED, > +__ATOMIC_RELAXED); > return; > } > > - if (rte_atomic16_read(&tp->nb_dequeued) + deq < num_ops) { > - rte_atomic16_add(&tp->nb_dequeued, deq); > + if (__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) + deq > < num_ops) { > + __atomic_fetch_add(&tp->nb_dequeued, deq, > __ATOMIC_RELAXED); > return; > } > > @@ -2670,7 +2670,7 @@ dequeue_event_callback(uint16_t dev_id, > > if (ret) { > printf("Buffers validation failed\n"); > - rte_atomic16_set(&tp->processing_status, TEST_FAILED); > + __atomic_store_n(&tp->processing_status, TEST_FAILED, > +__ATOMIC_RELAXED); > } > > switch (test_vector.op_type) { > @@ -2691,7 +2691,7 @@ dequeue_event_callback(uint16_t dev_id, > break; > default: > printf("Unknown op type: %d\n", test_vector.op_type); > - rte_atomic16_set(&tp->processing_status, TEST_FAILED); > + __atomic_store_n(&tp->processing_status, TEST_FAILED, > +__ATOMIC_RELAXED); > return; > } > > @@ -2700,7 +2700,7 @@ dequeue_event_callback(uint16_t dev_id, > tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) / > ((double)total_time / (double)rte_get_tsc_hz()); > > - rte_atomic16_add(&tp->nb_dequeued, deq); > + __atomic_fetch_add(&tp->nb_dequeued, deq, __ATOMIC_RELAXED); > } > > static int > @@ -2738,11 +2738,10 @@ throughput_intr_lcore_ldpc_dec(void *arg) > > bufs = &tp->op_params- > >q_bufs[GET_SOCKET(info.socket_id)][queue_id]; > > - rte_atomic16_clear(&tp->processing_status); > - rte_atomic16_clear(&tp->nb_dequeued); > + __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED); > + __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); > > - while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) > - rte_pause(); > + rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, > +__ATOMIC_RELAXED); > > ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops, > num_to_process); > @@ -2790,17 +2789,15 @@ throughput_intr_lcore_ldpc_dec(void *arg) > * the number of operations is not a multiple of > * burst size. > */ > - rte_atomic16_set(&tp->burst_sz, num_to_enq); > + __atomic_store_n(&tp->burst_sz, num_to_enq, > __ATOMIC_RELAXED); > > /* Wait until processing of previous batch is > * completed > */ > - while (rte_atomic16_read(&tp->nb_dequeued) != > - (int16_t) enqueued) > - rte_pause(); > + rte_wait_until_equal_16(&tp->nb_dequeued, > enqueued, > +__ATOMIC_RELAXED); > } > if (j != TEST_REPETITIONS - 1) > - rte_atomic16_clear(&tp->nb_dequeued); > + __atomic_store_n(&tp->nb_dequeued, 0, > __ATOMIC_RELAXED); > } > > return TEST_SUCCESS; > @@ -2835,11 +2832,10 @@ throughput_intr_lcore_dec(void *arg) > > bufs = &tp->op_params- > >q_bufs[GET_SOCKET(info.socket_id)][queue_id]; > > - rte_atomic16_clear(&tp->processing_status); > - rte_atomic16_clear(&tp->nb_dequeued); > + __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED); > + __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); > > - while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) > - rte_pause(); > + rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, > +__ATOMIC_RELAXED); > > ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops, > num_to_process); > @@ -2880,17 +2876,15 @@ throughput_intr_lcore_dec(void *arg) > * the number of operations is not a multiple of > * burst size. > */ > - rte_atomic16_set(&tp->burst_sz, num_to_enq); > + __atomic_store_n(&tp->burst_sz, num_to_enq, > __ATOMIC_RELAXED); > > /* Wait until processing of previous batch is > * completed > */ > - while (rte_atomic16_read(&tp->nb_dequeued) != > - (int16_t) enqueued) > - rte_pause(); > + rte_wait_until_equal_16(&tp->nb_dequeued, > enqueued, > +__ATOMIC_RELAXED); > } > if (j != TEST_REPETITIONS - 1) > - rte_atomic16_clear(&tp->nb_dequeued); > + __atomic_store_n(&tp->nb_dequeued, 0, > __ATOMIC_RELAXED); > } > > return TEST_SUCCESS; > @@ -2925,11 +2919,10 @@ throughput_intr_lcore_enc(void *arg) > > bufs = &tp->op_params- > >q_bufs[GET_SOCKET(info.socket_id)][queue_id]; > > - rte_atomic16_clear(&tp->processing_status); > - rte_atomic16_clear(&tp->nb_dequeued); > + __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED); > + __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); > > - while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) > - rte_pause(); > + rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, > +__ATOMIC_RELAXED); > > ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops, > num_to_process); > @@ -2969,17 +2962,15 @@ throughput_intr_lcore_enc(void *arg) > * the number of operations is not a multiple of > * burst size. > */ > - rte_atomic16_set(&tp->burst_sz, num_to_enq); > + __atomic_store_n(&tp->burst_sz, num_to_enq, > __ATOMIC_RELAXED); > > /* Wait until processing of previous batch is > * completed > */ > - while (rte_atomic16_read(&tp->nb_dequeued) != > - (int16_t) enqueued) > - rte_pause(); > + rte_wait_until_equal_16(&tp->nb_dequeued, > enqueued, > +__ATOMIC_RELAXED); > } > if (j != TEST_REPETITIONS - 1) > - rte_atomic16_clear(&tp->nb_dequeued); > + __atomic_store_n(&tp->nb_dequeued, 0, > __ATOMIC_RELAXED); > } > > return TEST_SUCCESS; > @@ -3015,11 +3006,10 @@ throughput_intr_lcore_ldpc_enc(void *arg) > > bufs = &tp->op_params- > >q_bufs[GET_SOCKET(info.socket_id)][queue_id]; > > - rte_atomic16_clear(&tp->processing_status); > - rte_atomic16_clear(&tp->nb_dequeued); > + __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED); > + __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); > > - while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) > - rte_pause(); > + rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, > +__ATOMIC_RELAXED); > > ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops, > num_to_process); > @@ -3061,17 +3051,15 @@ throughput_intr_lcore_ldpc_enc(void *arg) > * the number of operations is not a multiple of > * burst size. > */ > - rte_atomic16_set(&tp->burst_sz, num_to_enq); > + __atomic_store_n(&tp->burst_sz, num_to_enq, > __ATOMIC_RELAXED); > > /* Wait until processing of previous batch is > * completed > */ > - while (rte_atomic16_read(&tp->nb_dequeued) != > - (int16_t) enqueued) > - rte_pause(); > + rte_wait_until_equal_16(&tp->nb_dequeued, > enqueued, > +__ATOMIC_RELAXED); > } > if (j != TEST_REPETITIONS - 1) > - rte_atomic16_clear(&tp->nb_dequeued); > + __atomic_store_n(&tp->nb_dequeued, 0, > __ATOMIC_RELAXED); > } > > return TEST_SUCCESS; > @@ -3105,8 +3093,7 @@ throughput_pmd_lcore_dec(void *arg) > > bufs = &tp->op_params- > >q_bufs[GET_SOCKET(info.socket_id)][queue_id]; > > - while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) > - rte_pause(); > + rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, > +__ATOMIC_RELAXED); > > ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, > num_ops); > TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); > @@ -3209,8 +3196,7 @@ bler_pmd_lcore_ldpc_dec(void *arg) > > bufs = &tp->op_params- > >q_bufs[GET_SOCKET(info.socket_id)][queue_id]; > > - while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) > - rte_pause(); > + rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, > +__ATOMIC_RELAXED); > > ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, > num_ops); > TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); > @@ -3339,8 +3325,7 @@ throughput_pmd_lcore_ldpc_dec(void *arg) > > bufs = &tp->op_params- > >q_bufs[GET_SOCKET(info.socket_id)][queue_id]; > > - while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) > - rte_pause(); > + rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, > +__ATOMIC_RELAXED); > > ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, > num_ops); > TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); > @@ -3456,8 +3441,7 @@ throughput_pmd_lcore_enc(void *arg) > > bufs = &tp->op_params- > >q_bufs[GET_SOCKET(info.socket_id)][queue_id]; > > - while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) > - rte_pause(); > + rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, > +__ATOMIC_RELAXED); > > ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq, > num_ops); > @@ -3547,8 +3531,7 @@ throughput_pmd_lcore_ldpc_enc(void *arg) > > bufs = &tp->op_params- > >q_bufs[GET_SOCKET(info.socket_id)][queue_id]; > > - while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) > - rte_pause(); > + rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, > +__ATOMIC_RELAXED); > > ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq, > num_ops); > @@ -3731,7 +3714,7 @@ bler_test(struct active_device *ad, > else > return TEST_SKIPPED; > > - rte_atomic16_set(&op_params->sync, SYNC_WAIT); > + __atomic_store_n(&op_params->sync, SYNC_WAIT, > __ATOMIC_RELAXED); > > /* Main core is set at first entry */ > t_params[0].dev_id = ad->dev_id; > @@ -3754,7 +3737,7 @@ bler_test(struct active_device *ad, > &t_params[used_cores++], lcore_id); > } > > - rte_atomic16_set(&op_params->sync, SYNC_START); > + __atomic_store_n(&op_params->sync, SYNC_START, > __ATOMIC_RELAXED); > ret = bler_function(&t_params[0]); > > /* Main core is always used */ > @@ -3849,7 +3832,7 @@ throughput_test(struct active_device *ad, > throughput_function = throughput_pmd_lcore_enc; > } > > - rte_atomic16_set(&op_params->sync, SYNC_WAIT); > + __atomic_store_n(&op_params->sync, SYNC_WAIT, > __ATOMIC_RELAXED); > > /* Main core is set at first entry */ > t_params[0].dev_id = ad->dev_id; > @@ -3872,7 +3855,7 @@ throughput_test(struct active_device *ad, > &t_params[used_cores++], lcore_id); > } > > - rte_atomic16_set(&op_params->sync, SYNC_START); > + __atomic_store_n(&op_params->sync, SYNC_START, > __ATOMIC_RELAXED); > ret = throughput_function(&t_params[0]); > > /* Main core is always used */ > @@ -3902,29 +3885,29 @@ throughput_test(struct active_device *ad, > * Wait for main lcore operations. > */ > tp = &t_params[0]; > - while ((rte_atomic16_read(&tp->nb_dequeued) < > - op_params->num_to_process) && > - (rte_atomic16_read(&tp->processing_status) != > - TEST_FAILED)) > + while ((__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) < > + op_params->num_to_process) && > + (__atomic_load_n(&tp->processing_status, > __ATOMIC_RELAXED) != > + TEST_FAILED)) > rte_pause(); > > tp->ops_per_sec /= TEST_REPETITIONS; > tp->mbps /= TEST_REPETITIONS; > - ret |= (int)rte_atomic16_read(&tp->processing_status); > + ret |= (int)__atomic_load_n(&tp->processing_status, > __ATOMIC_RELAXED); > > /* Wait for worker lcores operations */ > for (used_cores = 1; used_cores < num_lcores; used_cores++) { > tp = &t_params[used_cores]; > > - while ((rte_atomic16_read(&tp->nb_dequeued) < > - op_params->num_to_process) && > - (rte_atomic16_read(&tp->processing_status) > != > - TEST_FAILED)) > + while ((__atomic_load_n(&tp->nb_dequeued, > __ATOMIC_RELAXED) < > + op_params->num_to_process) && > + (__atomic_load_n(&tp->processing_status, > __ATOMIC_RELAXED) != > + TEST_FAILED)) > rte_pause(); > > tp->ops_per_sec /= TEST_REPETITIONS; > tp->mbps /= TEST_REPETITIONS; > - ret |= (int)rte_atomic16_read(&tp->processing_status); > + ret |= (int)__atomic_load_n(&tp->processing_status, > +__ATOMIC_RELAXED); > } > > /* Print throughput if test passed */ > -- > 2.17.1