On 10/4/23 23:11, Chautru, Nicolas wrote:
Hi Maxime,-----Original Message----- From: Maxime Coquelin <maxime.coque...@redhat.com> Sent: Tuesday, October 3, 2023 7:28 AM To: Chautru, Nicolas <nicolas.chau...@intel.com>; dev@dpdk.org Cc: hemant.agra...@nxp.com; david.march...@redhat.com; Vargas, Hernan <hernan.var...@intel.com> Subject: Re: [PATCH v3 08/12] baseband/acc: add FEC capabilities for the VRB2 variant On 9/29/23 18:35, Nicolas Chautru wrote:New implementation for some of the FEC features specific to the VRB2 variant. Signed-off-by: Nicolas Chautru <nicolas.chau...@intel.com> --- drivers/baseband/acc/rte_vrb_pmd.c | 567++++++++++++++++++++++++++++-1 file changed, 548 insertions(+), 19 deletions(-) diff --git a/drivers/baseband/acc/rte_vrb_pmd.c b/drivers/baseband/acc/rte_vrb_pmd.c index 48e779ce77..93add82947 100644 --- a/drivers/baseband/acc/rte_vrb_pmd.c +++ b/drivers/baseband/acc/rte_vrb_pmd.c @@ -1235,6 +1235,94 @@ vrb_dev_info_get(struct rte_bbdev *dev, structrte_bbdev_driver_info *dev_info)}; static const struct rte_bbdev_op_cap vrb2_bbdev_capabilities[] = { + { + .type = RTE_BBDEV_OP_TURBO_DEC, + .cap.turbo_dec = { + .capability_flags = +RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE |+ RTE_BBDEV_TURBO_CRC_TYPE_24B | +RTE_BBDEV_TURBO_DEC_CRC_24B_DROP |+ RTE_BBDEV_TURBO_EQUALIZER | +RTE_BBDEV_TURBO_SOFT_OUT_SATURATE |+RTE_BBDEV_TURBO_HALF_ITERATION_EVEN |+RTE_BBDEV_TURBO_CONTINUE_CRC_MATCH |+ RTE_BBDEV_TURBO_SOFT_OUTPUT | +RTE_BBDEV_TURBO_EARLY_TERMINATION |+RTE_BBDEV_TURBO_DEC_INTERRUPTS |+RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN |+RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT |+ RTE_BBDEV_TURBO_MAP_DEC | +RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP |+RTE_BBDEV_TURBO_DEC_SCATTER_GATHER,+ .max_llr_modulus = INT8_MAX, + .num_buffers_src = +RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,+ .num_buffers_hard_out = +RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,+ .num_buffers_soft_out = +RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,+ } + }, + { + .type = RTE_BBDEV_OP_TURBO_ENC, + .cap.turbo_enc = { + .capability_flags = +RTE_BBDEV_TURBO_CRC_24B_ATTACH |+RTE_BBDEV_TURBO_RV_INDEX_BYPASS |+ RTE_BBDEV_TURBO_RATE_MATCH | +RTE_BBDEV_TURBO_ENC_INTERRUPTS |+RTE_BBDEV_TURBO_ENC_SCATTER_GATHER,+ .num_buffers_src = +RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,+ .num_buffers_dst = +RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,+ } + }, + { + .type = RTE_BBDEV_OP_LDPC_ENC, + .cap.ldpc_enc = { + .capability_flags = + RTE_BBDEV_LDPC_RATE_MATCH | + RTE_BBDEV_LDPC_CRC_24B_ATTACH|+RTE_BBDEV_LDPC_INTERLEAVER_BYPASS |+ RTE_BBDEV_LDPC_ENC_INTERRUPTS|+RTE_BBDEV_LDPC_ENC_SCATTER_GATHER |+RTE_BBDEV_LDPC_ENC_CONCATENATION,+ .num_buffers_src = +RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,+ .num_buffers_dst = +RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,+ } + }, + { + .type = RTE_BBDEV_OP_LDPC_DEC, + .cap.ldpc_dec = { + .capability_flags = + RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK | + RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP | + RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK | + RTE_BBDEV_LDPC_CRC_TYPE_16_CHECK | + RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE|+RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE |+ RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE|+ RTE_BBDEV_LDPC_DEINTERLEAVER_BYPASS | + RTE_BBDEV_LDPC_DEC_SCATTER_GATHER | +RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION |+RTE_BBDEV_LDPC_HARQ_4BIT_COMPRESSION |+ RTE_BBDEV_LDPC_LLR_COMPRESSION | + RTE_BBDEV_LDPC_SOFT_OUT_ENABLE | + RTE_BBDEV_LDPC_SOFT_OUT_RM_BYPASS | +RTE_BBDEV_LDPC_SOFT_OUT_DEINTERLEAVER_BYPASS |+ RTE_BBDEV_LDPC_DEC_INTERRUPTS, + .llr_size = 8, + .llr_decimals = 2, + .num_buffers_src = +RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,+ .num_buffers_hard_out = +RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,+ .num_buffers_soft_out = 0, + } + }, RTE_BBDEV_END_OF_CAPABILITIES_LIST() }; @@ -1774,6 +1862,141 @@ vrb1_dma_desc_ld_fill(struct rte_bbdev_dec_op*op,return 0; } +/* Fill in a frame control word for LDPC decoding. */ static inline +void vrb2_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc_fcw_ld +*fcw, + union acc_harq_layout_data *harq_layout) { + uint16_t harq_out_length, harq_in_length, ncb_p, k0_p, parity_offset; + uint32_t harq_index; + uint32_t l;This is so similar with vrb1_fcw_ld_fill() that it does not make sense to duplicate so much code. Do you confirm there are no other difference than the SOFT_OUT stuff, and reusing vrb2_fcw_ld_fill on VRB1 would just work as the op_flags are checked (and they should not be set if capability is not advertized)?There are quite of lot of difference to the fundamental underlying IP, the IP decoder is different with different tuning point, the SO and HARQ support are different. Still I believe we can support both in the same function without being a too much a problem moving forward. Doing this in v4.
Thanks,
+ fcw->qm = op->ldpc_dec.q_m; + fcw->nfiller = op->ldpc_dec.n_filler; + fcw->BG = (op->ldpc_dec.basegraph - 1); + fcw->Zc = op->ldpc_dec.z_c; + fcw->ncb = op->ldpc_dec.n_cb; + fcw->k0 = get_k0(fcw->ncb, fcw->Zc, op->ldpc_dec.basegraph, + op->ldpc_dec.rv_index); + if (op->ldpc_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) + fcw->rm_e = op->ldpc_dec.cb_params.e; + else + fcw->rm_e = (op->ldpc_dec.tb_params.r < + op->ldpc_dec.tb_params.cab) ? + op->ldpc_dec.tb_params.ea : + op->ldpc_dec.tb_params.eb; + + if (unlikely(check_bit(op->ldpc_dec.op_flags, + RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) && + (op->ldpc_dec.harq_combined_input.length == 0))) { + rte_bbdev_log(WARNING, "Null HARQ input size provided"); + /* Disable HARQ input in that case to carry forward. */ + op->ldpc_dec.op_flags ^=RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE;+ } + if (unlikely(fcw->rm_e == 0)) { + rte_bbdev_log(WARNING, "Null E input provided"); + fcw->rm_e = 2; + } + + fcw->hcin_en = check_bit(op->ldpc_dec.op_flags, + RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE); + fcw->hcout_en = check_bit(op->ldpc_dec.op_flags, + RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE); + fcw->crc_select = check_bit(op->ldpc_dec.op_flags, + RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK); + fcw->so_en = check_bit(op->ldpc_dec.op_flags,RTE_BBDEV_LDPC_SOFT_OUT_ENABLE);+ fcw->so_bypass_intlv = check_bit(op->ldpc_dec.op_flags, +RTE_BBDEV_LDPC_SOFT_OUT_DEINTERLEAVER_BYPASS);+ fcw->so_bypass_rm = check_bit(op->ldpc_dec.op_flags,RTE_BBDEV_LDPC_SOFT_OUT_RM_BYPASS);+ fcw->bypass_dec = 0; + fcw->bypass_intlv = check_bit(op->ldpc_dec.op_flags, + RTE_BBDEV_LDPC_DEINTERLEAVER_BYPASS); + if (op->ldpc_dec.q_m == 1) { + fcw->bypass_intlv = 1; + fcw->qm = 2; + } + if (check_bit(op->ldpc_dec.op_flags,RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION)) {+ fcw->hcin_decomp_mode = 1; + fcw->hcout_comp_mode = 1; + } else if (check_bit(op->ldpc_dec.op_flags,RTE_BBDEV_LDPC_HARQ_4BIT_COMPRESSION)) {+ fcw->hcin_decomp_mode = 4; + fcw->hcout_comp_mode = 4; + } else { + fcw->hcin_decomp_mode = 0; + fcw->hcout_comp_mode = 0; + } + + fcw->llr_pack_mode = check_bit(op->ldpc_dec.op_flags, + RTE_BBDEV_LDPC_LLR_COMPRESSION); + harq_index = hq_index(op->ldpc_dec.harq_combined_output.offset); + if (fcw->hcin_en > 0) { + harq_in_length = op->ldpc_dec.harq_combined_input.length; + if (fcw->hcin_decomp_mode == 1) + harq_in_length = harq_in_length * 8 / 6; + else if (fcw->hcin_decomp_mode == 4) + harq_in_length = harq_in_length * 2; + harq_in_length = RTE_MIN(harq_in_length, op->ldpc_dec.n_cb + - op->ldpc_dec.n_filler); + harq_in_length = RTE_ALIGN_CEIL(harq_in_length, 64); + fcw->hcin_size0 = harq_in_length; + fcw->hcin_offset = 0; + fcw->hcin_size1 = 0; + } else { + fcw->hcin_size0 = 0; + fcw->hcin_offset = 0; + fcw->hcin_size1 = 0; + } + + fcw->itmax = op->ldpc_dec.iter_max; + fcw->so_it = op->ldpc_dec.iter_max; + fcw->itstop = check_bit(op->ldpc_dec.op_flags,RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE);+ fcw->cnu_algo = ACC_ALGO_MSA; + fcw->synd_precoder = fcw->itstop; + + fcw->minsum_offset = 1; + fcw->dec_llrclip = 2; + + /* + * These are all implicitly set + * fcw->synd_post = 0; + * fcw->dec_convllr = 0; + * fcw->hcout_convllr = 0; + * fcw->hcout_size1 = 0; + * fcw->hcout_offset = 0; + * fcw->negstop_th = 0; + * fcw->negstop_it = 0; + * fcw->negstop_en = 0; + * fcw->gain_i = 1; + * fcw->gain_h = 1; + */ + if (fcw->hcout_en > 0) { + parity_offset = (op->ldpc_dec.basegraph == 1 ? 20 : 8) + * op->ldpc_dec.z_c - op->ldpc_dec.n_filler; + k0_p = (fcw->k0 > parity_offset) ? + fcw->k0 - op->ldpc_dec.n_filler : fcw->k0; + ncb_p = fcw->ncb - op->ldpc_dec.n_filler; + l = k0_p + fcw->rm_e; + harq_out_length = (uint16_t) fcw->hcin_size0; + harq_out_length = RTE_MIN(RTE_MAX(harq_out_length, l),ncb_p);+ harq_out_length = RTE_ALIGN_CEIL(harq_out_length, 64); + fcw->hcout_size0 = harq_out_length; + fcw->hcout_size1 = 0; + fcw->hcout_offset = 0; + harq_layout[harq_index].offset = fcw->hcout_offset; + harq_layout[harq_index].size0 = fcw->hcout_size0; + } else { + fcw->hcout_size0 = 0; + fcw->hcout_size1 = 0; + fcw->hcout_offset = 0; + } + + fcw->tb_crc_select = 0; + if (check_bit(op->ldpc_dec.op_flags,RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK))+ fcw->tb_crc_select = 2; + if (check_bit(op->ldpc_dec.op_flags,RTE_BBDEV_LDPC_CRC_TYPE_16_CHECK))+ fcw->tb_crc_select = 1; +} + static inline void vrb_dma_desc_ld_update(struct rte_bbdev_dec_op *op, struct acc_dma_req_desc *desc, @@ -1817,6 +2040,139 @@ vrb_dma_desc_ld_update(structrte_bbdev_dec_op *op,desc->op_addr = op; } +static inline int +vrb2_dma_desc_ld_fill(struct rte_bbdev_dec_op *op, + struct acc_dma_req_desc *desc, + struct rte_mbuf **input, struct rte_mbuf *h_output, + uint32_t *in_offset, uint32_t *h_out_offset, + uint32_t *h_out_length, uint32_t *mbuf_total_left, + uint32_t *seg_total_left, struct acc_fcw_ld *fcw) +{Same here. I compared with vrb1_dma_desc_ld_fill(), and I don't see why we need two functions. The only differences are either backed by capability checks, and vrb1 already sets fcw->hcin_decomp_mode, so this code should work as-is on vrb1 if I'm not mistaken.Yes fair enough, doing this in v3.
Thanks.
+ struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec; + int next_triplet = 1; /* FCW already done. */ + uint32_t input_length; + uint16_t output_length, crc24_overlap = 0; + uint16_t sys_cols, K, h_p_size, h_np_size; + + acc_header_init(desc); + + if (check_bit(op->ldpc_dec.op_flags,RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP))+ crc24_overlap = 24; + + /* Compute some LDPC BG lengths. */ + input_length = fcw->rm_e; + if (check_bit(op->ldpc_dec.op_flags,RTE_BBDEV_LDPC_LLR_COMPRESSION))+ input_length = (input_length * 3 + 3) / 4; + sys_cols = (dec->basegraph == 1) ? 22 : 10; + K = sys_cols * dec->z_c; + output_length = K - dec->n_filler - crc24_overlap; + + if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left <input_length))) {+ rte_bbdev_log(ERR, + "Mismatch between mbuf length and includedCB sizes: mbuf len %u, cb len %u",+ *mbuf_total_left, input_length); + return -1; + } + + next_triplet = acc_dma_fill_blk_type_in(desc, input, + in_offset, input_length, + seg_total_left, next_triplet, + check_bit(op->ldpc_dec.op_flags, + RTE_BBDEV_LDPC_DEC_SCATTER_GATHER)); + + if (unlikely(next_triplet < 0)) { + rte_bbdev_log(ERR, + "Mismatch between data to process and mbufdata length in bbdev_op: %p",+ op); + return -1; + } + + if (check_bit(op->ldpc_dec.op_flags,RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE)) {+ if (op->ldpc_dec.harq_combined_input.data == 0) { + rte_bbdev_log(ERR, "HARQ input is not defined"); + return -1; + } + h_p_size = fcw->hcin_size0 + fcw->hcin_size1; + if (fcw->hcin_decomp_mode == 1) + h_p_size = (h_p_size * 3 + 3) / 4; + else if (fcw->hcin_decomp_mode == 4) + h_p_size = h_p_size / 2; + if (op->ldpc_dec.harq_combined_input.data == 0) { + rte_bbdev_log(ERR, "HARQ input is not defined"); + return -1; + } + acc_dma_fill_blk_type( + desc, + op->ldpc_dec.harq_combined_input.data, + op->ldpc_dec.harq_combined_input.offset, + h_p_size, + next_triplet, + ACC_DMA_BLKID_IN_HARQ); + next_triplet++; + } + + desc->data_ptrs[next_triplet - 1].last = 1; + desc->m2dlen = next_triplet; + *mbuf_total_left -= input_length; + + next_triplet = acc_dma_fill_blk_type(desc, h_output, + *h_out_offset, output_length >> 3, next_triplet, + ACC_DMA_BLKID_OUT_HARD); + + if (check_bit(op->ldpc_dec.op_flags,RTE_BBDEV_LDPC_SOFT_OUT_ENABLE)) {+ if (op->ldpc_dec.soft_output.data == 0) { + rte_bbdev_log(ERR, "Soft output is not defined"); + return -1; + } + dec->soft_output.length = fcw->rm_e; + acc_dma_fill_blk_type(desc, dec->soft_output.data, dec- soft_output.offset, + fcw->rm_e, next_triplet,ACC_DMA_BLKID_OUT_SOFT);+ next_triplet++; + } + + if (check_bit(op->ldpc_dec.op_flags, +RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE)) {+ if (op->ldpc_dec.harq_combined_output.data == 0) { + rte_bbdev_log(ERR, "HARQ output is not defined"); + return -1; + } + + /* Pruned size of the HARQ */ + h_p_size = fcw->hcout_size0 + fcw->hcout_size1; + /* Non-Pruned size of the HARQ */ + h_np_size = fcw->hcout_offset > 0 ? + fcw->hcout_offset + fcw->hcout_size1 : + h_p_size; + if (fcw->hcin_decomp_mode == 1) { + h_np_size = (h_np_size * 3 + 3) / 4; + h_p_size = (h_p_size * 3 + 3) / 4; + } else if (fcw->hcin_decomp_mode == 4) { + h_np_size = h_np_size / 2; + h_p_size = h_p_size / 2; + } + dec->harq_combined_output.length = h_np_size; + acc_dma_fill_blk_type( + desc, + dec->harq_combined_output.data, + dec->harq_combined_output.offset, + h_p_size, + next_triplet, + ACC_DMA_BLKID_OUT_HARQ); + + next_triplet++; + } + + *h_out_length = output_length >> 3; + dec->hard_output.length += *h_out_length; + *h_out_offset += *h_out_length; + desc->data_ptrs[next_triplet - 1].last = 1; + desc->d2mlen = next_triplet - desc->m2dlen; + + desc->op_addr = op; + + return 0; +} + /* Enqueue one encode operations for device in CB mode. */ static inline int enqueue_enc_one_op_cb(struct acc_queue *q, struct rte_bbdev_enc_op*op,@@ -1877,6 +2233,7 @@ enqueue_ldpc_enc_n_op_cb(struct acc_queue *q,struct rte_bbdev_enc_op **ops,/** This could be done at polling. */ acc_header_init(&desc->req); desc->req.numCBs = num; + desc->req.dltb = 0; in_length_in_bytes = ops[0]->ldpc_enc.input.data->data_len; out_length = (enc->cb_params.e + 7) >> 3; @@ -2102,6 +2459,105 @@ vrb1_enqueue_ldpc_enc_one_op_tb(structacc_queue *q, struct rte_bbdev_enc_op *opreturn return_descs; } +/* Fill in a frame control word for LDPC encoding. */ +static inline void +vrb2_fcw_letb_fill(const struct rte_bbdev_enc_op *op, struct acc_fcw_le*fcw)+{ + fcw->qm = op->ldpc_enc.q_m; + fcw->nfiller = op->ldpc_enc.n_filler; + fcw->BG = (op->ldpc_enc.basegraph - 1); + fcw->Zc = op->ldpc_enc.z_c; + fcw->ncb = op->ldpc_enc.n_cb; + fcw->k0 = get_k0(fcw->ncb, fcw->Zc, op->ldpc_enc.basegraph, + op->ldpc_enc.rv_index); + fcw->rm_e = op->ldpc_enc.tb_params.ea; + fcw->rm_e_b = op->ldpc_enc.tb_params.eb; + fcw->crc_select = check_bit(op->ldpc_enc.op_flags, + RTE_BBDEV_LDPC_CRC_24B_ATTACH); + fcw->bypass_intlv = 0; + if (op->ldpc_enc.tb_params.c > 1) { + fcw->mcb_count = 0; + fcw->C = op->ldpc_enc.tb_params.c; + fcw->Cab = op->ldpc_enc.tb_params.cab; + } else { + fcw->mcb_count = 1; + fcw->C = 0; + } +} + +/* Enqueue one encode operations for device in TB mode. + * returns the number of descs used. + */ +static inline int +vrb2_enqueue_ldpc_enc_one_op_tb(struct acc_queue *q, structrte_bbdev_enc_op *op,+ uint16_t enq_descs) +{ + union acc_dma_desc *desc = NULL; + uint32_t in_offset, out_offset, out_length, seg_total_left; + struct rte_mbuf *input, *output_head, *output; + + uint16_t desc_idx = ((q->sw_ring_head + enq_descs) & q- sw_ring_wrap_mask); + desc = q->ring_addr + desc_idx;Use acc_desc()?thanks+ vrb2_fcw_letb_fill(op, &desc->req.fcw_le); + struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc; + int next_triplet = 1; /* FCW already done */ + uint32_t in_length_in_bytes; + uint16_t K, in_length_in_bits; + + input = enc->input.data; + output_head = output = enc->output.data; + in_offset = enc->input.offset; + out_offset = enc->output.offset; + seg_total_left = rte_pktmbuf_data_len(enc->input.data) - in_offset; + + acc_header_init(&desc->req); + K = (enc->basegraph == 1 ? 22 : 10) * enc->z_c; + in_length_in_bits = K - enc->n_filler; + if ((enc->op_flags & RTE_BBDEV_LDPC_CRC_24A_ATTACH) || + (enc->op_flags &RTE_BBDEV_LDPC_CRC_24B_ATTACH))+ in_length_in_bits -= 24; + in_length_in_bytes = (in_length_in_bits >> 3) * enc->tb_params.c; + + next_triplet = acc_dma_fill_blk_type_in(&desc->req, &input,&in_offset,+ in_length_in_bytes, &seg_total_left, next_triplet, + check_bit(enc->op_flags,RTE_BBDEV_LDPC_ENC_SCATTER_GATHER));+ if (unlikely(next_triplet < 0)) { + rte_bbdev_log(ERR, + "Mismatch between data to process and mbufdata length in bbdev_op: %p",+ op); + return -1; + } + desc->req.data_ptrs[next_triplet - 1].last = 1; + desc->req.m2dlen = next_triplet; + + /* Set output length */ + /* Integer round up division by 8 */ + out_length = (enc->tb_params.ea * enc->tb_params.cab + + enc->tb_params.eb * (enc->tb_params.c - enc- tb_params.cab) + 7) >> 3; + + next_triplet = acc_dma_fill_blk_type(&desc->req, output, out_offset, + out_length, next_triplet, ACC_DMA_BLKID_OUT_ENC); + enc->output.length = out_length; + out_offset += out_length; + desc->req.data_ptrs[next_triplet - 1].last = 1; + desc->req.data_ptrs[next_triplet - 1].dma_ext = 0; + desc->req.d2mlen = next_triplet - desc->req.m2dlen; + desc->req.numCBs = enc->tb_params.c; + if (desc->req.numCBs > 1) + desc->req.dltb = 1; + desc->req.op_addr = op; + + if (out_length < ACC_MAX_E_MBUF) + mbuf_append(output_head, output, out_length); + +#ifdef RTE_LIBRTE_BBDEV_DEBUG + rte_memdump(stderr, "FCW", &desc->req.fcw_le, sizeof(desc- req.fcw_le)); + rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc)); +#endif + /* One CB (one op) was successfully prepared to enqueue */ + return 1;This function is quite different from the VRB1 variant. Is the underlying hardware completely different, or just a different implementation?The underlying HW is different in this mode of operation, notably as it supports RTE_BBDEV_LDPC_ENC_CONCATENATION hence more of true TB implementation. Kept separate on purpose.
Ack, makes sense here.
+} + /** Enqueue one decode operations for device in CB mode. */ static inline int enqueue_dec_one_op_cb(struct acc_queue *q, struct rte_bbdev_dec_op*op,@@ -2215,10 +2671,16 @@ vrb_enqueue_ldpc_dec_one_op_cb(structacc_queue *q, struct rte_bbdev_dec_op *op,else seg_total_left = fcw->rm_e; - ret = vrb1_dma_desc_ld_fill(op, &desc->req, &input, h_output, - &in_offset, &h_out_offset, - &h_out_length, &mbuf_total_left, - &seg_total_left, fcw); + if (q->d->device_variant == VRB1_VARIANT) + ret = vrb1_dma_desc_ld_fill(op, &desc->req, &input,h_output,+ &in_offset, &h_out_offset, + &h_out_length, &mbuf_total_left, + &seg_total_left, fcw); + else + ret = vrb2_dma_desc_ld_fill(op, &desc->req, &input,h_output,+ &in_offset, &h_out_offset, + &h_out_length, &mbuf_total_left, + &seg_total_left, fcw); if (unlikely(ret < 0)) return ret; } @@ -2308,11 +2770,18 @@ vrb_enqueue_ldpc_dec_one_op_tb(structacc_queue *q, struct rte_bbdev_dec_op *op,rte_memcpy(&desc->req.fcw_ld, &desc_first->req.fcw_ld,ACC_FCW_LD_BLEN);desc->req.fcw_ld.tb_trailer_size = (c - r - 1) * trail_len; - ret = vrb1_dma_desc_ld_fill(op, &desc->req, &input, - h_output, &in_offset, &h_out_offset, - &h_out_length, - &mbuf_total_left, &seg_total_left, - &desc->req.fcw_ld); + if (q->d->device_variant == VRB1_VARIANT) + ret = vrb1_dma_desc_ld_fill(op, &desc->req, &input, + h_output, &in_offset, &h_out_offset, + &h_out_length, + &mbuf_total_left, &seg_total_left, + &desc->req.fcw_ld); + else + ret = vrb2_dma_desc_ld_fill(op, &desc->req, &input, + h_output, &in_offset, &h_out_offset, + &h_out_length, + &mbuf_total_left, &seg_total_left, + &desc->req.fcw_ld); if (unlikely(ret < 0)) return ret; @@ -2576,14 +3045,22 @@ vrb_enqueue_ldpc_enc_tb(structrte_bbdev_queue_data *q_data,int descs_used; for (i = 0; i < num; ++i) { - cbs_in_tb = get_num_cbs_in_tb_ldpc_enc(&ops[i]->ldpc_enc); - /* Check if there are available space for further processing. */ - if (unlikely((avail - cbs_in_tb < 0) || (cbs_in_tb == 0))) { - acc_enqueue_ring_full(q_data); - break; + if (q->d->device_variant == VRB1_VARIANT) { + cbs_in_tb = get_num_cbs_in_tb_ldpc_enc(&ops[i]- ldpc_enc); + /* Check if there are available space for furtherprocessing. */+ if (unlikely((avail - cbs_in_tb < 0) || (cbs_in_tb == 0))) { + acc_enqueue_ring_full(q_data); + break; + } + descs_used = vrb1_enqueue_ldpc_enc_one_op_tb(q,ops[i],+ enqueued_descs, cbs_in_tb); + } else { + if (unlikely(avail < 1)) { + acc_enqueue_ring_full(q_data); + break; + } + descs_used = vrb2_enqueue_ldpc_enc_one_op_tb(q,ops[i], enqueued_descs);} - - descs_used = vrb1_enqueue_ldpc_enc_one_op_tb(q, ops[i],enqueued_descs, cbs_in_tb);if (descs_used < 0) { acc_enqueue_invalid(q_data); break; @@ -2865,6 +3342,52 @@ vrb_dequeue_enc_one_op_cb(struct acc_queue*q, struct rte_bbdev_enc_op **ref_op,return desc->req.numCBs; } +/* Dequeue one LDPC encode operations from VRB2 device in TB mode. */ +static inline int +vrb2_dequeue_ldpc_enc_one_op_tb(struct acc_queue *q, structrte_bbdev_enc_op **ref_op,+ uint16_t *dequeued_ops, uint32_t *aq_dequeued, + uint16_t *dequeued_descs) +{ + union acc_dma_desc *desc, atom_desc; + union acc_dma_rsp_desc rsp; + struct rte_bbdev_enc_op *op; + int desc_idx = ((q->sw_ring_tail + *dequeued_descs) & q- sw_ring_wrap_mask); + + desc = q->ring_addr + desc_idx; + atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,__ATOMIC_RELAXED);+ + /* Check fdone bit. */ + if (!(atom_desc.rsp.val & ACC_FDONE)) + return -1; + + rsp.val = atom_desc.rsp.val; + rte_bbdev_log_debug("Resp. desc %p: %x", desc, rsp.val); + + /* Dequeue. */ + op = desc->req.op_addr; + + /* Clearing status, it will be set based on response. */ + op->status = 0; + op->status |= rsp.input_err << RTE_BBDEV_DATA_ERROR; + op->status |= rsp.dma_err << RTE_BBDEV_DRV_ERROR; + op->status |= rsp.fcw_err << RTE_BBDEV_DRV_ERROR; + op->status |= rsp.engine_hung << RTE_BBDEV_ENGINE_ERROR; + + if (desc->req.last_desc_in_batch) { + (*aq_dequeued)++; + desc->req.last_desc_in_batch = 0; + } + desc->rsp.val = ACC_DMA_DESC_TYPE; + desc->rsp.add_info_0 = 0; /* Reserved bits. */ + desc->rsp.add_info_1 = 0; /* Reserved bits. */ + + /* One op was successfully dequeued */ + ref_op[0] = op; + (*dequeued_descs)++; + (*dequeued_ops)++; + return 1; +} + /* Dequeue one LDPC encode operations from device in TB mode. * That operation may cover multiple descriptors. */ @@ -3189,9 +3712,14 @@ vrb_dequeue_ldpc_enc(structrte_bbdev_queue_data *q_data,for (i = 0; i < avail; i++) { if (cbm == RTE_BBDEV_TRANSPORT_BLOCK) - ret = vrb_dequeue_enc_one_op_tb(q,&ops[dequeued_ops],- &dequeued_ops, &aq_dequeued, - &dequeued_descs, num); + if (q->d->device_variant == VRB1_VARIANT) + ret = vrb_dequeue_enc_one_op_tb(q,&ops[dequeued_ops],+ &dequeued_ops,&aq_dequeued,+ &dequeued_descs, num); + else + ret = vrb2_dequeue_ldpc_enc_one_op_tb(q,&ops[dequeued_ops],+ &dequeued_ops,&aq_dequeued,+ &dequeued_descs); else ret = vrb_dequeue_enc_one_op_cb(q,&ops[dequeued_ops],&dequeued_ops, &aq_dequeued, @@ -3536,6 +4064,7 @@ vrb_bbdev_init(struct rte_bbdev *dev, structrte_pci_driver *drv)} else { d->device_variant = VRB2_VARIANT; d->queue_offset = vrb2_queue_offset; + d->fcw_ld_fill = vrb2_fcw_ld_fill; d->num_qgroups = VRB2_NUM_QGRPS; d->num_aqs = VRB2_NUM_AQS; if (d->pf_device)It looks like most (60%+) of the code in this patch could be removed if duplication was avoided. Thanks, Maxime