On 10/26/20 9:50 AM, Chautru, Nicolas wrote: >> -----Original Message----- >> From: Tom Rix <t...@redhat.com> >> Sent: Monday, October 26, 2020 6:32 AM >> To: Chautru, Nicolas <nicolas.chau...@intel.com>; dev@dpdk.org; >> akhil.go...@nxp.com >> Cc: david.march...@redhat.com >> Subject: Re: [PATCH v5 3/7] app/bbdev: include explicit HARQ preloading >> >> >> On 10/23/20 4:42 PM, Nicolas Chautru wrote: >>> Run preloading explicitly for unit tests. Load each code block by >>> reusing existing input op then restore for the actual test. >>> >>> Signed-off-by: Nicolas Chautru <nicolas.chau...@intel.com> >>> Acked-by: Liu Tianjiao <tianjiao....@intel.com> >>> --- >>> app/test-bbdev/main.h | 1 + >>> app/test-bbdev/test_bbdev_perf.c | 51 >>> +++++++++++++++++++++------------------- >>> 2 files changed, 28 insertions(+), 24 deletions(-) >>> >>> diff --git a/app/test-bbdev/main.h b/app/test-bbdev/main.h index >>> fb3dec8..dc10a50 100644 >>> --- a/app/test-bbdev/main.h >>> +++ b/app/test-bbdev/main.h >>> @@ -17,6 +17,7 @@ >>> #define TEST_SKIPPED 1 >>> >>> #define MAX_BURST 512U >>> +#define MAX_OPS 1024U >> This #define is not consistently used. >> >> ex/ see retrieve_harq_ddr, the old 1024 is still being used. > Thanks I missed it. I will change this now. > >>> #define DEFAULT_BURST 32U >>> #define DEFAULT_OPS 64U >>> #define DEFAULT_ITER 6U >>> diff --git a/app/test-bbdev/test_bbdev_perf.c >>> b/app/test-bbdev/test_bbdev_perf.c >>> index b62848e..f30cbdb 100644 >>> --- a/app/test-bbdev/test_bbdev_perf.c >>> +++ b/app/test-bbdev/test_bbdev_perf.c >>> @@ -2513,20 +2513,20 @@ typedef int (test_case_function)(struct >> active_device *ad, >>> bool preload) >>> { >>> uint16_t j; >>> - int ret; >>> - uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * 1024; >>> - struct rte_bbdev_op_data save_hc_in, save_hc_out; >>> - struct rte_bbdev_dec_op *ops_deq[MAX_BURST]; >>> + int deq; >>> + uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * >> MAX_OPS; >>> + struct rte_bbdev_op_data save_hc_in[MAX_OPS], >> save_hc_out[MAX_OPS]; >>> + struct rte_bbdev_dec_op *ops_deq[MAX_OPS]; >>> uint32_t flags = ops[0]->ldpc_dec.op_flags; >>> bool mem_in = flags & >> RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE; >>> bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE; >>> bool mem_out = flags & >> RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; >>> bool hc_out = flags & >> RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE; >>> bool h_comp = flags & >> RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; >>> - for (j = 0; j < n; ++j) { >>> - if ((mem_in || hc_in) && preload) { >>> - save_hc_in = ops[j]- >>> ldpc_dec.harq_combined_input; >>> - save_hc_out = ops[j]- >>> ldpc_dec.harq_combined_output; >>> + if ((mem_in || hc_in) && preload) { >>> + for (j = 0; j < n; ++j) { >>> + save_hc_in[j] = ops[j]- >>> ldpc_dec.harq_combined_input; >>> + save_hc_out[j] = ops[j]- >>> ldpc_dec.harq_combined_output; >>> ops[j]->ldpc_dec.op_flags = >>> >> RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK + >> RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; >> >> flags are usually handled with bit operators, not arithmetic. >> >> this seems to be a general issue. > This is keeping same coding style as rest of file. So keeping as is. Ugh. Add to a cleanup list. > >>> @@ -2536,16 +2536,23 @@ typedef int (test_case_function)(struct >> active_device *ad, >>> ops[j]->ldpc_dec.harq_combined_output.offset = >>> harq_offset; >>> ops[j]->ldpc_dec.harq_combined_input.offset = 0; >>> - rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, >>> - &ops[j], 1); >>> - ret = 0; >>> - while (ret == 0) >>> - ret = rte_bbdev_dequeue_ldpc_dec_ops( >>> - dev_id, queue_id, &ops_deq[j], 1); >>> + harq_offset += HARQ_INCR; >>> + } >>> + rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, >> &ops[0], n); >> Add check the return is 'n' >>> + deq = 0; >>> + while (deq != n) >>> + deq += rte_bbdev_dequeue_ldpc_dec_ops( >>> + dev_id, queue_id, &ops_deq[deq], >>> + n - deq); >> Add check the return >= 0 > This cannot be <0. uint16_t
ok Tom > >> Tom >> >>> + /* Restore the operations */ >>> + for (j = 0; j < n; ++j) { >>> ops[j]->ldpc_dec.op_flags = flags; >>> - ops[j]->ldpc_dec.harq_combined_input = >> save_hc_in; >>> - ops[j]->ldpc_dec.harq_combined_output = >> save_hc_out; >>> + ops[j]->ldpc_dec.harq_combined_input = >> save_hc_in[j]; >>> + ops[j]->ldpc_dec.harq_combined_output = >> save_hc_out[j]; >>> } >>> + } >>> + harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS; >>> + for (j = 0; j < n; ++j) { >>> /* Adjust HARQ offset when we reach external DDR */ >>> if (mem_in || hc_in) >>> ops[j]->ldpc_dec.harq_combined_input.offset >>> @@ -3231,11 +3238,9 @@ typedef int (test_case_function)(struct >> active_device *ad, >>> mbuf_reset( >>> ops_enq[j]- >>> ldpc_dec.harq_combined_output.data); >>> } >>> - if (extDdr) { >>> - bool preload = i == (TEST_REPETITIONS - 1); >>> + if (extDdr) >>> preload_harq_ddr(tp->dev_id, queue_id, ops_enq, >>> - num_ops, preload); >>> - } >>> + num_ops, true); >>> start_time = rte_rdtsc_precise(); >>> >>> for (enq = 0, deq = 0; enq < num_ops;) { @@ -3362,11 >> +3367,9 @@ >>> typedef int (test_case_function)(struct active_device *ad, >>> mbuf_reset( >>> ops_enq[j]- >>> ldpc_dec.harq_combined_output.data); >>> } >>> - if (extDdr) { >>> - bool preload = i == (TEST_REPETITIONS - 1); >>> + if (extDdr) >>> preload_harq_ddr(tp->dev_id, queue_id, ops_enq, >>> - num_ops, preload); >>> - } >>> + num_ops, true); >>> start_time = rte_rdtsc_precise(); >>> >>> for (enq = 0, deq = 0; enq < num_ops;) {