On 10/4/23 23:11, Chautru, Nicolas wrote:
Hi Maxime,

-----Original Message-----
From: Maxime Coquelin <maxime.coque...@redhat.com>
Sent: Tuesday, October 3, 2023 7:28 AM
To: Chautru, Nicolas <nicolas.chau...@intel.com>; dev@dpdk.org
Cc: hemant.agra...@nxp.com; david.march...@redhat.com; Vargas, Hernan
<hernan.var...@intel.com>
Subject: Re: [PATCH v3 08/12] baseband/acc: add FEC capabilities for the VRB2
variant



On 9/29/23 18:35, Nicolas Chautru wrote:
New implementation for some of the FEC features specific to the VRB2
variant.

Signed-off-by: Nicolas Chautru <nicolas.chau...@intel.com>
---
   drivers/baseband/acc/rte_vrb_pmd.c | 567
++++++++++++++++++++++++++++-
   1 file changed, 548 insertions(+), 19 deletions(-)

diff --git a/drivers/baseband/acc/rte_vrb_pmd.c
b/drivers/baseband/acc/rte_vrb_pmd.c
index 48e779ce77..93add82947 100644
--- a/drivers/baseband/acc/rte_vrb_pmd.c
+++ b/drivers/baseband/acc/rte_vrb_pmd.c
@@ -1235,6 +1235,94 @@ vrb_dev_info_get(struct rte_bbdev *dev, struct
rte_bbdev_driver_info *dev_info)
        };

        static const struct rte_bbdev_op_cap vrb2_bbdev_capabilities[] = {
+               {
+                       .type = RTE_BBDEV_OP_TURBO_DEC,
+                       .cap.turbo_dec = {
+                               .capability_flags =
+
        RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE |
+                                       RTE_BBDEV_TURBO_CRC_TYPE_24B |
+
        RTE_BBDEV_TURBO_DEC_CRC_24B_DROP |
+                                       RTE_BBDEV_TURBO_EQUALIZER |
+
        RTE_BBDEV_TURBO_SOFT_OUT_SATURATE |
+
        RTE_BBDEV_TURBO_HALF_ITERATION_EVEN |
+
        RTE_BBDEV_TURBO_CONTINUE_CRC_MATCH |
+                                       RTE_BBDEV_TURBO_SOFT_OUTPUT |
+
        RTE_BBDEV_TURBO_EARLY_TERMINATION |
+
        RTE_BBDEV_TURBO_DEC_INTERRUPTS |
+
        RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN |
+
        RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT |
+                                       RTE_BBDEV_TURBO_MAP_DEC |
+
        RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP |
+
        RTE_BBDEV_TURBO_DEC_SCATTER_GATHER,
+                               .max_llr_modulus = INT8_MAX,
+                               .num_buffers_src =
+
        RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
+                               .num_buffers_hard_out =
+
        RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
+                               .num_buffers_soft_out =
+
        RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
+                       }
+               },
+               {
+                       .type = RTE_BBDEV_OP_TURBO_ENC,
+                       .cap.turbo_enc = {
+                               .capability_flags =
+
        RTE_BBDEV_TURBO_CRC_24B_ATTACH |
+
        RTE_BBDEV_TURBO_RV_INDEX_BYPASS |
+                                       RTE_BBDEV_TURBO_RATE_MATCH |
+
        RTE_BBDEV_TURBO_ENC_INTERRUPTS |
+
        RTE_BBDEV_TURBO_ENC_SCATTER_GATHER,
+                               .num_buffers_src =
+
        RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
+                               .num_buffers_dst =
+
        RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
+                       }
+               },
+               {
+                       .type   = RTE_BBDEV_OP_LDPC_ENC,
+                       .cap.ldpc_enc = {
+                               .capability_flags =
+                                       RTE_BBDEV_LDPC_RATE_MATCH |
+                                       RTE_BBDEV_LDPC_CRC_24B_ATTACH
|
+
        RTE_BBDEV_LDPC_INTERLEAVER_BYPASS |
+                                       RTE_BBDEV_LDPC_ENC_INTERRUPTS
|
+
        RTE_BBDEV_LDPC_ENC_SCATTER_GATHER |
+
        RTE_BBDEV_LDPC_ENC_CONCATENATION,
+                               .num_buffers_src =
+
        RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
+                               .num_buffers_dst =
+
        RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
+                       }
+               },
+               {
+                       .type   = RTE_BBDEV_OP_LDPC_DEC,
+                       .cap.ldpc_dec = {
+                       .capability_flags =
+                               RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK |
+                               RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP |
+                               RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK |
+                               RTE_BBDEV_LDPC_CRC_TYPE_16_CHECK |
+                               RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE
|
+
        RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE |
+                               RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE
|
+                               RTE_BBDEV_LDPC_DEINTERLEAVER_BYPASS |
+                               RTE_BBDEV_LDPC_DEC_SCATTER_GATHER |
+
        RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION |
+
        RTE_BBDEV_LDPC_HARQ_4BIT_COMPRESSION |
+                               RTE_BBDEV_LDPC_LLR_COMPRESSION |
+                               RTE_BBDEV_LDPC_SOFT_OUT_ENABLE |
+                               RTE_BBDEV_LDPC_SOFT_OUT_RM_BYPASS |
+
        RTE_BBDEV_LDPC_SOFT_OUT_DEINTERLEAVER_BYPASS |
+                               RTE_BBDEV_LDPC_DEC_INTERRUPTS,
+                       .llr_size = 8,
+                       .llr_decimals = 2,
+                       .num_buffers_src =
+
        RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
+                       .num_buffers_hard_out =
+
        RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
+                       .num_buffers_soft_out = 0,
+                       }
+               },
                RTE_BBDEV_END_OF_CAPABILITIES_LIST()
        };

@@ -1774,6 +1862,141 @@ vrb1_dma_desc_ld_fill(struct rte_bbdev_dec_op
*op,
        return 0;
   }

+/* Fill in a frame control word for LDPC decoding. */ static inline
+void vrb2_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc_fcw_ld
+*fcw,
+               union acc_harq_layout_data *harq_layout) {
+       uint16_t harq_out_length, harq_in_length, ncb_p, k0_p, parity_offset;
+       uint32_t harq_index;
+       uint32_t l;


This is so similar with vrb1_fcw_ld_fill() that it does not make sense
to duplicate so much code.

Do you confirm there are no other difference than the SOFT_OUT stuff,
and reusing vrb2_fcw_ld_fill on VRB1 would just work as the op_flags are
checked (and they should not be set if capability is not advertized)?

There are quite of lot of difference to the fundamental underlying IP, the  IP 
decoder is different with different tuning point, the SO and HARQ support are 
different.
Still I believe we can support both in the same function without being a too 
much a problem moving forward. Doing this in v4.

Thanks,





+       fcw->qm = op->ldpc_dec.q_m;
+       fcw->nfiller = op->ldpc_dec.n_filler;
+       fcw->BG = (op->ldpc_dec.basegraph - 1);
+       fcw->Zc = op->ldpc_dec.z_c;
+       fcw->ncb = op->ldpc_dec.n_cb;
+       fcw->k0 = get_k0(fcw->ncb, fcw->Zc, op->ldpc_dec.basegraph,
+                       op->ldpc_dec.rv_index);
+       if (op->ldpc_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK)
+               fcw->rm_e = op->ldpc_dec.cb_params.e;
+       else
+               fcw->rm_e = (op->ldpc_dec.tb_params.r <
+                               op->ldpc_dec.tb_params.cab) ?
+                                               op->ldpc_dec.tb_params.ea :
+                                               op->ldpc_dec.tb_params.eb;
+
+       if (unlikely(check_bit(op->ldpc_dec.op_flags,
+                       RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) &&
+                       (op->ldpc_dec.harq_combined_input.length == 0))) {
+               rte_bbdev_log(WARNING, "Null HARQ input size provided");
+               /* Disable HARQ input in that case to carry forward. */
+               op->ldpc_dec.op_flags ^=
RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE;
+       }
+       if (unlikely(fcw->rm_e == 0)) {
+               rte_bbdev_log(WARNING, "Null E input provided");
+               fcw->rm_e = 2;
+       }
+
+       fcw->hcin_en = check_bit(op->ldpc_dec.op_flags,
+                       RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE);
+       fcw->hcout_en = check_bit(op->ldpc_dec.op_flags,
+                       RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
+       fcw->crc_select = check_bit(op->ldpc_dec.op_flags,
+                       RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK);
+       fcw->so_en = check_bit(op->ldpc_dec.op_flags,
RTE_BBDEV_LDPC_SOFT_OUT_ENABLE);
+       fcw->so_bypass_intlv = check_bit(op->ldpc_dec.op_flags,
+
        RTE_BBDEV_LDPC_SOFT_OUT_DEINTERLEAVER_BYPASS);
+       fcw->so_bypass_rm = check_bit(op->ldpc_dec.op_flags,
RTE_BBDEV_LDPC_SOFT_OUT_RM_BYPASS);
+       fcw->bypass_dec = 0;
+       fcw->bypass_intlv = check_bit(op->ldpc_dec.op_flags,
+                       RTE_BBDEV_LDPC_DEINTERLEAVER_BYPASS);
+       if (op->ldpc_dec.q_m == 1) {
+               fcw->bypass_intlv = 1;
+               fcw->qm = 2;
+       }
+       if (check_bit(op->ldpc_dec.op_flags,
RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION)) {
+               fcw->hcin_decomp_mode = 1;
+               fcw->hcout_comp_mode = 1;
+       } else if (check_bit(op->ldpc_dec.op_flags,
RTE_BBDEV_LDPC_HARQ_4BIT_COMPRESSION)) {
+               fcw->hcin_decomp_mode = 4;
+               fcw->hcout_comp_mode = 4;
+       } else {
+               fcw->hcin_decomp_mode = 0;
+               fcw->hcout_comp_mode = 0;
+       }
+
+       fcw->llr_pack_mode = check_bit(op->ldpc_dec.op_flags,
+                       RTE_BBDEV_LDPC_LLR_COMPRESSION);
+       harq_index = hq_index(op->ldpc_dec.harq_combined_output.offset);
+       if (fcw->hcin_en > 0) {
+               harq_in_length = op->ldpc_dec.harq_combined_input.length;
+               if (fcw->hcin_decomp_mode == 1)
+                       harq_in_length = harq_in_length * 8 / 6;
+               else if (fcw->hcin_decomp_mode == 4)
+                       harq_in_length = harq_in_length * 2;
+               harq_in_length = RTE_MIN(harq_in_length, op->ldpc_dec.n_cb
+                               - op->ldpc_dec.n_filler);
+               harq_in_length = RTE_ALIGN_CEIL(harq_in_length, 64);
+               fcw->hcin_size0 = harq_in_length;
+               fcw->hcin_offset = 0;
+               fcw->hcin_size1 = 0;
+       } else {
+               fcw->hcin_size0 = 0;
+               fcw->hcin_offset = 0;
+               fcw->hcin_size1 = 0;
+       }
+
+       fcw->itmax = op->ldpc_dec.iter_max;
+       fcw->so_it = op->ldpc_dec.iter_max;
+       fcw->itstop = check_bit(op->ldpc_dec.op_flags,
RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE);
+       fcw->cnu_algo = ACC_ALGO_MSA;
+       fcw->synd_precoder = fcw->itstop;
+
+       fcw->minsum_offset = 1;
+       fcw->dec_llrclip   = 2;
+
+       /*
+        * These are all implicitly set
+        * fcw->synd_post = 0;
+        * fcw->dec_convllr = 0;
+        * fcw->hcout_convllr = 0;
+        * fcw->hcout_size1 = 0;
+        * fcw->hcout_offset = 0;
+        * fcw->negstop_th = 0;
+        * fcw->negstop_it = 0;
+        * fcw->negstop_en = 0;
+        * fcw->gain_i = 1;
+        * fcw->gain_h = 1;
+        */
+       if (fcw->hcout_en > 0) {
+               parity_offset = (op->ldpc_dec.basegraph == 1 ? 20 : 8)
+                       * op->ldpc_dec.z_c - op->ldpc_dec.n_filler;
+               k0_p = (fcw->k0 > parity_offset) ?
+                               fcw->k0 - op->ldpc_dec.n_filler : fcw->k0;
+               ncb_p = fcw->ncb - op->ldpc_dec.n_filler;
+               l = k0_p + fcw->rm_e;
+               harq_out_length = (uint16_t) fcw->hcin_size0;
+               harq_out_length = RTE_MIN(RTE_MAX(harq_out_length, l),
ncb_p);
+               harq_out_length = RTE_ALIGN_CEIL(harq_out_length, 64);
+               fcw->hcout_size0 = harq_out_length;
+               fcw->hcout_size1 = 0;
+               fcw->hcout_offset = 0;
+               harq_layout[harq_index].offset = fcw->hcout_offset;
+               harq_layout[harq_index].size0 = fcw->hcout_size0;
+       } else {
+               fcw->hcout_size0 = 0;
+               fcw->hcout_size1 = 0;
+               fcw->hcout_offset = 0;
+       }
+
+       fcw->tb_crc_select = 0;
+       if (check_bit(op->ldpc_dec.op_flags,
RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK))
+               fcw->tb_crc_select = 2;
+       if (check_bit(op->ldpc_dec.op_flags,
RTE_BBDEV_LDPC_CRC_TYPE_16_CHECK))
+               fcw->tb_crc_select = 1;
+}
+
   static inline void
   vrb_dma_desc_ld_update(struct rte_bbdev_dec_op *op,
                struct acc_dma_req_desc *desc,
@@ -1817,6 +2040,139 @@ vrb_dma_desc_ld_update(struct
rte_bbdev_dec_op *op,
        desc->op_addr = op;
   }

+static inline int
+vrb2_dma_desc_ld_fill(struct rte_bbdev_dec_op *op,
+               struct acc_dma_req_desc *desc,
+               struct rte_mbuf **input, struct rte_mbuf *h_output,
+               uint32_t *in_offset, uint32_t *h_out_offset,
+               uint32_t *h_out_length, uint32_t *mbuf_total_left,
+               uint32_t *seg_total_left, struct acc_fcw_ld *fcw)
+{
Same here.

I compared with vrb1_dma_desc_ld_fill(), and I don't see why we need two
functions.

The only differences are either backed by capability checks, and vrb1
already sets fcw->hcin_decomp_mode, so this code should work as-is on
vrb1 if I'm not mistaken.

Yes fair enough, doing this in v3.

Thanks.



+       struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec;
+       int next_triplet = 1; /* FCW already done. */
+       uint32_t input_length;
+       uint16_t output_length, crc24_overlap = 0;
+       uint16_t sys_cols, K, h_p_size, h_np_size;
+
+       acc_header_init(desc);
+
+       if (check_bit(op->ldpc_dec.op_flags,
RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP))
+               crc24_overlap = 24;
+
+       /* Compute some LDPC BG lengths. */
+       input_length = fcw->rm_e;
+       if (check_bit(op->ldpc_dec.op_flags,
RTE_BBDEV_LDPC_LLR_COMPRESSION))
+               input_length = (input_length * 3 + 3) / 4;
+       sys_cols = (dec->basegraph == 1) ? 22 : 10;
+       K = sys_cols * dec->z_c;
+       output_length = K - dec->n_filler - crc24_overlap;
+
+       if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left <
input_length))) {
+               rte_bbdev_log(ERR,
+                               "Mismatch between mbuf length and included
CB sizes: mbuf len %u, cb len %u",
+                               *mbuf_total_left, input_length);
+               return -1;
+       }
+
+       next_triplet = acc_dma_fill_blk_type_in(desc, input,
+                       in_offset, input_length,
+                       seg_total_left, next_triplet,
+                       check_bit(op->ldpc_dec.op_flags,
+                       RTE_BBDEV_LDPC_DEC_SCATTER_GATHER));
+
+       if (unlikely(next_triplet < 0)) {
+               rte_bbdev_log(ERR,
+                               "Mismatch between data to process and mbuf
data length in bbdev_op: %p",
+                               op);
+               return -1;
+       }
+
+       if (check_bit(op->ldpc_dec.op_flags,
RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE)) {
+               if (op->ldpc_dec.harq_combined_input.data == 0) {
+                       rte_bbdev_log(ERR, "HARQ input is not defined");
+                       return -1;
+               }
+               h_p_size = fcw->hcin_size0 + fcw->hcin_size1;
+               if (fcw->hcin_decomp_mode == 1)
+                       h_p_size = (h_p_size * 3 + 3) / 4;
+               else if (fcw->hcin_decomp_mode == 4)
+                       h_p_size = h_p_size / 2;
+               if (op->ldpc_dec.harq_combined_input.data == 0) {
+                       rte_bbdev_log(ERR, "HARQ input is not defined");
+                       return -1;
+               }
+               acc_dma_fill_blk_type(
+                               desc,
+                               op->ldpc_dec.harq_combined_input.data,
+                               op->ldpc_dec.harq_combined_input.offset,
+                               h_p_size,
+                               next_triplet,
+                               ACC_DMA_BLKID_IN_HARQ);
+               next_triplet++;
+       }
+
+       desc->data_ptrs[next_triplet - 1].last = 1;
+       desc->m2dlen = next_triplet;
+       *mbuf_total_left -= input_length;
+
+       next_triplet = acc_dma_fill_blk_type(desc, h_output,
+                       *h_out_offset, output_length >> 3, next_triplet,
+                       ACC_DMA_BLKID_OUT_HARD);
+
+       if (check_bit(op->ldpc_dec.op_flags,
RTE_BBDEV_LDPC_SOFT_OUT_ENABLE)) {
+               if (op->ldpc_dec.soft_output.data == 0) {
+                       rte_bbdev_log(ERR, "Soft output is not defined");
+                       return -1;
+               }
+               dec->soft_output.length = fcw->rm_e;
+               acc_dma_fill_blk_type(desc, dec->soft_output.data, dec-
soft_output.offset,
+                               fcw->rm_e, next_triplet,
ACC_DMA_BLKID_OUT_SOFT);
+               next_triplet++;
+       }
+
+       if (check_bit(op->ldpc_dec.op_flags,
+
        RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE)) {
+               if (op->ldpc_dec.harq_combined_output.data == 0) {
+                       rte_bbdev_log(ERR, "HARQ output is not defined");
+                       return -1;
+               }
+
+               /* Pruned size of the HARQ */
+               h_p_size = fcw->hcout_size0 + fcw->hcout_size1;
+               /* Non-Pruned size of the HARQ */
+               h_np_size = fcw->hcout_offset > 0 ?
+                               fcw->hcout_offset + fcw->hcout_size1 :
+                               h_p_size;
+               if (fcw->hcin_decomp_mode == 1) {
+                       h_np_size = (h_np_size * 3 + 3) / 4;
+                       h_p_size = (h_p_size * 3 + 3) / 4;
+               } else if (fcw->hcin_decomp_mode == 4) {
+                       h_np_size = h_np_size / 2;
+                       h_p_size = h_p_size / 2;
+               }
+               dec->harq_combined_output.length = h_np_size;
+               acc_dma_fill_blk_type(
+                               desc,
+                               dec->harq_combined_output.data,
+                               dec->harq_combined_output.offset,
+                               h_p_size,
+                               next_triplet,
+                               ACC_DMA_BLKID_OUT_HARQ);
+
+               next_triplet++;
+       }
+
+       *h_out_length = output_length >> 3;
+       dec->hard_output.length += *h_out_length;
+       *h_out_offset += *h_out_length;
+       desc->data_ptrs[next_triplet - 1].last = 1;
+       desc->d2mlen = next_triplet - desc->m2dlen;
+
+       desc->op_addr = op;
+
+       return 0;
+}
+
   /* Enqueue one encode operations for device in CB mode. */
   static inline int
   enqueue_enc_one_op_cb(struct acc_queue *q, struct rte_bbdev_enc_op
*op,
@@ -1877,6 +2233,7 @@ enqueue_ldpc_enc_n_op_cb(struct acc_queue *q,
struct rte_bbdev_enc_op **ops,
        /** This could be done at polling. */
        acc_header_init(&desc->req);
        desc->req.numCBs = num;
+       desc->req.dltb = 0;

        in_length_in_bytes = ops[0]->ldpc_enc.input.data->data_len;
        out_length = (enc->cb_params.e + 7) >> 3;
@@ -2102,6 +2459,105 @@ vrb1_enqueue_ldpc_enc_one_op_tb(struct
acc_queue *q, struct rte_bbdev_enc_op *op
        return return_descs;
   }

+/* Fill in a frame control word for LDPC encoding. */
+static inline void
+vrb2_fcw_letb_fill(const struct rte_bbdev_enc_op *op, struct acc_fcw_le
*fcw)
+{
+       fcw->qm = op->ldpc_enc.q_m;
+       fcw->nfiller = op->ldpc_enc.n_filler;
+       fcw->BG = (op->ldpc_enc.basegraph - 1);
+       fcw->Zc = op->ldpc_enc.z_c;
+       fcw->ncb = op->ldpc_enc.n_cb;
+       fcw->k0 = get_k0(fcw->ncb, fcw->Zc, op->ldpc_enc.basegraph,
+                       op->ldpc_enc.rv_index);
+       fcw->rm_e = op->ldpc_enc.tb_params.ea;
+       fcw->rm_e_b = op->ldpc_enc.tb_params.eb;
+       fcw->crc_select = check_bit(op->ldpc_enc.op_flags,
+                       RTE_BBDEV_LDPC_CRC_24B_ATTACH);
+       fcw->bypass_intlv = 0;
+       if (op->ldpc_enc.tb_params.c > 1) {
+               fcw->mcb_count = 0;
+               fcw->C = op->ldpc_enc.tb_params.c;
+               fcw->Cab = op->ldpc_enc.tb_params.cab;
+       } else {
+               fcw->mcb_count = 1;
+               fcw->C = 0;
+       }
+}
+
+/* Enqueue one encode operations for device in TB mode.
+ * returns the number of descs used.
+ */
+static inline int
+vrb2_enqueue_ldpc_enc_one_op_tb(struct acc_queue *q, struct
rte_bbdev_enc_op *op,
+               uint16_t enq_descs)
+{
+       union acc_dma_desc *desc = NULL;
+       uint32_t in_offset, out_offset, out_length, seg_total_left;
+       struct rte_mbuf *input, *output_head, *output;
+
+       uint16_t desc_idx = ((q->sw_ring_head + enq_descs) & q-
sw_ring_wrap_mask);
+       desc = q->ring_addr + desc_idx;

Use acc_desc()?

thanks


+       vrb2_fcw_letb_fill(op, &desc->req.fcw_le);
+       struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc;
+       int next_triplet = 1; /* FCW already done */
+       uint32_t in_length_in_bytes;
+       uint16_t K, in_length_in_bits;
+
+       input = enc->input.data;
+       output_head = output = enc->output.data;
+       in_offset = enc->input.offset;
+       out_offset = enc->output.offset;
+       seg_total_left = rte_pktmbuf_data_len(enc->input.data) - in_offset;
+
+       acc_header_init(&desc->req);
+       K = (enc->basegraph == 1 ? 22 : 10) * enc->z_c;
+       in_length_in_bits = K - enc->n_filler;
+       if ((enc->op_flags & RTE_BBDEV_LDPC_CRC_24A_ATTACH) ||
+                       (enc->op_flags &
RTE_BBDEV_LDPC_CRC_24B_ATTACH))
+               in_length_in_bits -= 24;
+       in_length_in_bytes = (in_length_in_bits >> 3) * enc->tb_params.c;
+
+       next_triplet = acc_dma_fill_blk_type_in(&desc->req, &input,
&in_offset,
+                       in_length_in_bytes, &seg_total_left, next_triplet,
+                       check_bit(enc->op_flags,
RTE_BBDEV_LDPC_ENC_SCATTER_GATHER));
+       if (unlikely(next_triplet < 0)) {
+               rte_bbdev_log(ERR,
+                               "Mismatch between data to process and mbuf
data length in bbdev_op: %p",
+                               op);
+               return -1;
+       }
+       desc->req.data_ptrs[next_triplet - 1].last = 1;
+       desc->req.m2dlen = next_triplet;
+
+       /* Set output length */
+       /* Integer round up division by 8 */
+       out_length = (enc->tb_params.ea * enc->tb_params.cab +
+                       enc->tb_params.eb * (enc->tb_params.c - enc-
tb_params.cab)  + 7) >> 3;
+
+       next_triplet = acc_dma_fill_blk_type(&desc->req, output, out_offset,
+                       out_length, next_triplet, ACC_DMA_BLKID_OUT_ENC);
+       enc->output.length = out_length;
+       out_offset += out_length;
+       desc->req.data_ptrs[next_triplet - 1].last = 1;
+       desc->req.data_ptrs[next_triplet - 1].dma_ext = 0;
+       desc->req.d2mlen = next_triplet - desc->req.m2dlen;
+       desc->req.numCBs = enc->tb_params.c;
+       if (desc->req.numCBs > 1)
+               desc->req.dltb = 1;
+       desc->req.op_addr = op;
+
+       if (out_length < ACC_MAX_E_MBUF)
+               mbuf_append(output_head, output, out_length);
+
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
+       rte_memdump(stderr, "FCW", &desc->req.fcw_le, sizeof(desc-
req.fcw_le));
+       rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
+#endif
+       /* One CB (one op) was successfully prepared to enqueue */
+       return 1;

This function is quite different from the VRB1 variant.
Is the underlying hardware completely different, or just a different
implementation?

The underlying HW is different in this mode of operation, notably as it
supports RTE_BBDEV_LDPC_ENC_CONCATENATION hence more of true TB
implementation.
Kept separate on purpose.

Ack, makes sense here.


+}
+
   /** Enqueue one decode operations for device in CB mode. */
   static inline int
   enqueue_dec_one_op_cb(struct acc_queue *q, struct rte_bbdev_dec_op
*op,
@@ -2215,10 +2671,16 @@ vrb_enqueue_ldpc_dec_one_op_cb(struct
acc_queue *q, struct rte_bbdev_dec_op *op,
                else
                        seg_total_left = fcw->rm_e;

-               ret = vrb1_dma_desc_ld_fill(op, &desc->req, &input, h_output,
-                               &in_offset, &h_out_offset,
-                               &h_out_length, &mbuf_total_left,
-                               &seg_total_left, fcw);
+               if (q->d->device_variant == VRB1_VARIANT)
+                       ret = vrb1_dma_desc_ld_fill(op, &desc->req, &input,
h_output,
+                                       &in_offset, &h_out_offset,
+                                       &h_out_length, &mbuf_total_left,
+                                       &seg_total_left, fcw);
+               else
+                       ret = vrb2_dma_desc_ld_fill(op, &desc->req, &input,
h_output,
+                                       &in_offset, &h_out_offset,
+                                       &h_out_length, &mbuf_total_left,
+                                       &seg_total_left, fcw);
                if (unlikely(ret < 0))
                        return ret;
        }
@@ -2308,11 +2770,18 @@ vrb_enqueue_ldpc_dec_one_op_tb(struct
acc_queue *q, struct rte_bbdev_dec_op *op,
                rte_memcpy(&desc->req.fcw_ld, &desc_first->req.fcw_ld,
ACC_FCW_LD_BLEN);
                desc->req.fcw_ld.tb_trailer_size = (c - r - 1) * trail_len;

-               ret = vrb1_dma_desc_ld_fill(op, &desc->req, &input,
-                               h_output, &in_offset, &h_out_offset,
-                               &h_out_length,
-                               &mbuf_total_left, &seg_total_left,
-                               &desc->req.fcw_ld);
+               if (q->d->device_variant == VRB1_VARIANT)
+                       ret = vrb1_dma_desc_ld_fill(op, &desc->req, &input,
+                                       h_output, &in_offset, &h_out_offset,
+                                       &h_out_length,
+                                       &mbuf_total_left, &seg_total_left,
+                                       &desc->req.fcw_ld);
+               else
+                       ret = vrb2_dma_desc_ld_fill(op, &desc->req, &input,
+                                       h_output, &in_offset, &h_out_offset,
+                                       &h_out_length,
+                                       &mbuf_total_left, &seg_total_left,
+                                       &desc->req.fcw_ld);

                if (unlikely(ret < 0))
                        return ret;
@@ -2576,14 +3045,22 @@ vrb_enqueue_ldpc_enc_tb(struct
rte_bbdev_queue_data *q_data,
        int descs_used;

        for (i = 0; i < num; ++i) {
-               cbs_in_tb = get_num_cbs_in_tb_ldpc_enc(&ops[i]->ldpc_enc);
-               /* Check if there are available space for further processing. */
-               if (unlikely((avail - cbs_in_tb < 0) || (cbs_in_tb == 0))) {
-                       acc_enqueue_ring_full(q_data);
-                       break;
+               if (q->d->device_variant == VRB1_VARIANT) {
+                       cbs_in_tb = get_num_cbs_in_tb_ldpc_enc(&ops[i]-
ldpc_enc);
+                       /* Check if there are available space for further
processing. */
+                       if (unlikely((avail - cbs_in_tb < 0) || (cbs_in_tb == 
0))) {
+                               acc_enqueue_ring_full(q_data);
+                               break;
+                       }
+                       descs_used = vrb1_enqueue_ldpc_enc_one_op_tb(q,
ops[i],
+                                       enqueued_descs, cbs_in_tb);
+               } else {
+                       if (unlikely(avail < 1)) {
+                               acc_enqueue_ring_full(q_data);
+                               break;
+                       }
+                       descs_used = vrb2_enqueue_ldpc_enc_one_op_tb(q,
ops[i], enqueued_descs);
                }
-
-               descs_used = vrb1_enqueue_ldpc_enc_one_op_tb(q, ops[i],
enqueued_descs, cbs_in_tb);
                if (descs_used < 0) {
                        acc_enqueue_invalid(q_data);
                        break;
@@ -2865,6 +3342,52 @@ vrb_dequeue_enc_one_op_cb(struct acc_queue
*q, struct rte_bbdev_enc_op **ref_op,
        return desc->req.numCBs;
   }

+/* Dequeue one LDPC encode operations from VRB2 device in TB mode. */
+static inline int
+vrb2_dequeue_ldpc_enc_one_op_tb(struct acc_queue *q, struct
rte_bbdev_enc_op **ref_op,
+               uint16_t *dequeued_ops, uint32_t *aq_dequeued,
+               uint16_t *dequeued_descs)
+{
+       union acc_dma_desc *desc, atom_desc;
+       union acc_dma_rsp_desc rsp;
+       struct rte_bbdev_enc_op *op;
+       int desc_idx = ((q->sw_ring_tail + *dequeued_descs) & q-
sw_ring_wrap_mask);
+
+       desc = q->ring_addr + desc_idx;
+       atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
__ATOMIC_RELAXED);
+
+       /* Check fdone bit. */
+       if (!(atom_desc.rsp.val & ACC_FDONE))
+               return -1;
+
+       rsp.val = atom_desc.rsp.val;
+       rte_bbdev_log_debug("Resp. desc %p: %x", desc, rsp.val);
+
+       /* Dequeue. */
+       op = desc->req.op_addr;
+
+       /* Clearing status, it will be set based on response. */
+       op->status = 0;
+       op->status |= rsp.input_err << RTE_BBDEV_DATA_ERROR;
+       op->status |= rsp.dma_err << RTE_BBDEV_DRV_ERROR;
+       op->status |= rsp.fcw_err << RTE_BBDEV_DRV_ERROR;
+       op->status |= rsp.engine_hung << RTE_BBDEV_ENGINE_ERROR;
+
+       if (desc->req.last_desc_in_batch) {
+               (*aq_dequeued)++;
+               desc->req.last_desc_in_batch = 0;
+       }
+       desc->rsp.val = ACC_DMA_DESC_TYPE;
+       desc->rsp.add_info_0 = 0; /* Reserved bits. */
+       desc->rsp.add_info_1 = 0; /* Reserved bits. */
+
+       /* One op was successfully dequeued */
+       ref_op[0] = op;
+       (*dequeued_descs)++;
+       (*dequeued_ops)++;
+       return 1;
+}
+
   /* Dequeue one LDPC encode operations from device in TB mode.
    * That operation may cover multiple descriptors.
    */
@@ -3189,9 +3712,14 @@ vrb_dequeue_ldpc_enc(struct
rte_bbdev_queue_data *q_data,

        for (i = 0; i < avail; i++) {
                if (cbm == RTE_BBDEV_TRANSPORT_BLOCK)
-                       ret = vrb_dequeue_enc_one_op_tb(q,
&ops[dequeued_ops],
-                                       &dequeued_ops, &aq_dequeued,
-                                       &dequeued_descs, num);
+                       if (q->d->device_variant == VRB1_VARIANT)
+                               ret = vrb_dequeue_enc_one_op_tb(q,
&ops[dequeued_ops],
+                                               &dequeued_ops,
&aq_dequeued,
+                                               &dequeued_descs, num);
+                       else
+                               ret = vrb2_dequeue_ldpc_enc_one_op_tb(q,
&ops[dequeued_ops],
+                                               &dequeued_ops,
&aq_dequeued,
+                                               &dequeued_descs);
                else
                        ret = vrb_dequeue_enc_one_op_cb(q,
&ops[dequeued_ops],
                                        &dequeued_ops, &aq_dequeued,
@@ -3536,6 +4064,7 @@ vrb_bbdev_init(struct rte_bbdev *dev, struct
rte_pci_driver *drv)
        } else {
                d->device_variant = VRB2_VARIANT;
                d->queue_offset = vrb2_queue_offset;
+               d->fcw_ld_fill = vrb2_fcw_ld_fill;
                d->num_qgroups = VRB2_NUM_QGRPS;
                d->num_aqs = VRB2_NUM_AQS;
                if (d->pf_device)


It looks like most (60%+) of the code in this patch could be removed if
duplication was avoided.

Thanks,
Maxime


Reply via email to