On 10/4/23 23:18, Chautru, Nicolas wrote:
Hi Maxime,

-----Original Message-----
From: Maxime Coquelin <maxime.coque...@redhat.com>
Sent: Wednesday, October 4, 2023 12:11 AM
To: Chautru, Nicolas <nicolas.chau...@intel.com>; dev@dpdk.org
Cc: hemant.agra...@nxp.com; david.march...@redhat.com; Vargas, Hernan
<hernan.var...@intel.com>
Subject: Re: [PATCH v3 09/12] baseband/acc: add FFT support to VRB2 variant



On 10/3/23 20:20, Chautru, Nicolas wrote:
Hi Maxime,

-----Original Message-----
From: Maxime Coquelin <maxime.coque...@redhat.com>
Sent: Tuesday, October 3, 2023 7:37 AM
To: Chautru, Nicolas <nicolas.chau...@intel.com>; dev@dpdk.org
Cc: hemant.agra...@nxp.com; david.march...@redhat.com; Vargas,
Hernan
<hernan.var...@intel.com>
Subject: Re: [PATCH v3 09/12] baseband/acc: add FFT support to VRB2
variant



On 9/29/23 18:35, Nicolas Chautru wrote:
Support for the FFT the processing specific to the
VRB2 variant.

Signed-off-by: Nicolas Chautru <nicolas.chau...@intel.com>
---
    drivers/baseband/acc/rte_vrb_pmd.c | 132
++++++++++++++++++++++++++++-
    1 file changed, 128 insertions(+), 4 deletions(-)

diff --git a/drivers/baseband/acc/rte_vrb_pmd.c
b/drivers/baseband/acc/rte_vrb_pmd.c
index 93add82947..ce4b90d8e7 100644
--- a/drivers/baseband/acc/rte_vrb_pmd.c
+++ b/drivers/baseband/acc/rte_vrb_pmd.c
@@ -903,6 +903,9 @@ vrb_queue_setup(struct rte_bbdev *dev, uint16_t
queue_id,
                        ACC_FCW_LD_BLEN : (conf->op_type ==
RTE_BBDEV_OP_FFT ?
                        ACC_FCW_FFT_BLEN : ACC_FCW_MLDTS_BLEN))));

+       if ((q->d->device_variant == VRB2_VARIANT) && (conf->op_type ==
RTE_BBDEV_OP_FFT))
+               fcw_len = ACC_FCW_FFT_BLEN_3;
+
        for (desc_idx = 0; desc_idx < d->sw_ring_max_depth; desc_idx++) {
                desc = q->ring_addr + desc_idx;
                desc->req.word0 = ACC_DMA_DESC_TYPE; @@ -1323,6
+1326,24 @@
vrb_dev_info_get(struct rte_bbdev *dev, struct rte_bbdev_driver_info
*dev_info)
                        .num_buffers_soft_out = 0,
                        }
                },
+               {
+                       .type   = RTE_BBDEV_OP_FFT,
+                       .cap.fft = {
+                               .capability_flags =
+
        RTE_BBDEV_FFT_WINDOWING |
+
        RTE_BBDEV_FFT_CS_ADJUSTMENT |
+
        RTE_BBDEV_FFT_DFT_BYPASS |
+
        RTE_BBDEV_FFT_IDFT_BYPASS |
+                                               RTE_BBDEV_FFT_FP16_INPUT
|
+
        RTE_BBDEV_FFT_FP16_OUTPUT |
+
        RTE_BBDEV_FFT_POWER_MEAS |
+
        RTE_BBDEV_FFT_WINDOWING_BYPASS,
+                               .num_buffers_src =
+                                               1,
+                               .num_buffers_dst =
+                                               1,
+                       }
+               },
                RTE_BBDEV_END_OF_CAPABILITIES_LIST()
        };

@@ -3849,6 +3870,47 @@ vrb1_fcw_fft_fill(struct rte_bbdev_fft_op
*op,
struct acc_fcw_fft *fcw)
                fcw->bypass = 0;
    }

+/* Fill in a frame control word for FFT processing. */ static
+inline void vrb2_fcw_fft_fill(struct rte_bbdev_fft_op *op, struct
+acc_fcw_fft_3 *fcw) {
+       fcw->in_frame_size = op->fft.input_sequence_size;
+       fcw->leading_pad_size = op->fft.input_leading_padding;
+       fcw->out_frame_size = op->fft.output_sequence_size;
+       fcw->leading_depad_size = op->fft.output_leading_depadding;
+       fcw->cs_window_sel = op->fft.window_index[0] +
+                       (op->fft.window_index[1] << 8) +
+                       (op->fft.window_index[2] << 16) +
+                       (op->fft.window_index[3] << 24);
+       fcw->cs_window_sel2 = op->fft.window_index[4] +
+                       (op->fft.window_index[5] << 8);
+       fcw->cs_enable_bmap = op->fft.cs_bitmap;
+       fcw->num_antennas = op->fft.num_antennas_log2;
+       fcw->idft_size = op->fft.idft_log2;
+       fcw->dft_size = op->fft.dft_log2;
+       fcw->cs_offset = op->fft.cs_time_adjustment;
+       fcw->idft_shift = op->fft.idft_shift;
+       fcw->dft_shift = op->fft.dft_shift;
+       fcw->cs_multiplier = op->fft.ncs_reciprocal;
+       fcw->power_shift = op->fft.power_shift; > +    fcw->exp_adj = op-
fft.fp16_exp_adjust;
+       fcw->fp16_in = check_bit(op->fft.op_flags,
RTE_BBDEV_FFT_FP16_INPUT);
+       fcw->fp16_out = check_bit(op->fft.op_flags,
RTE_BBDEV_FFT_FP16_OUTPUT);
+       fcw->power_en = check_bit(op->fft.op_flags,
RTE_BBDEV_FFT_POWER_MEAS);
+       if (check_bit(op->fft.op_flags,
+                       RTE_BBDEV_FFT_IDFT_BYPASS)) {
+               if (check_bit(op->fft.op_flags,
+                               RTE_BBDEV_FFT_WINDOWING_BYPASS))
+                       fcw->bypass = 2;
+               else
+                       fcw->bypass = 1;
+       } else if (check_bit(op->fft.op_flags,
+                       RTE_BBDEV_FFT_DFT_BYPASS))
+               fcw->bypass = 3;
+       else
+               fcw->bypass = 0;

The only difference I see with VRB1 are backed by corresponding
op_flags (POWER & FP16), is that correct? If so, it does not make
sense to me to have a specific function for VRB2.

There are more changes but these are only formally enabled in the next
stepping hence some of the related code is not included yet. More generally
the FCW and IP is different from VRB1 implementation.

Currently, the code is almost identical so vrb1 implementation should be
reused. If some later changes makes the two implementations diverge, then we
can consider having a dedicated function for VRB2 at that time.

If I may, I believe this is best as-is notably for patches and support.
The functions are fairly small (not much code overlap quantitatively) and the 
underlying IP is different
(with more differences we can enable over time). I don’t think it would help 
anyone really to try to make them
coexist for a small period of time.
Does that sound fair?

I disagree, as I explained the code currently is almost identical, so
just share the code.

You will diverge, if *really* necessary, when it will make more sense to
have two separate functions. For now it is not the case in my opinion.

Thanks,
Maxime





+}
+
    static inline int
    vrb1_dma_desc_fft_fill(struct rte_bbdev_fft_op *op,
                struct acc_dma_req_desc *desc,
@@ -3882,6 +3944,58 @@ vrb1_dma_desc_fft_fill(struct
rte_bbdev_fft_op
*op,
        return 0;
    }

+static inline int
+vrb2_dma_desc_fft_fill(struct rte_bbdev_fft_op *op,
+               struct acc_dma_req_desc *desc,
+               struct rte_mbuf *input, struct rte_mbuf *output, struct
rte_mbuf *win_input,
+               struct rte_mbuf *pwr, uint32_t *in_offset, uint32_t
*out_offset,
+               uint32_t *win_offset, uint32_t *pwr_offset) {
+       bool pwr_en = check_bit(op->fft.op_flags,
RTE_BBDEV_FFT_POWER_MEAS);
+       bool win_en = check_bit(op->fft.op_flags,
RTE_BBDEV_FFT_DEWINDOWING);
+       int num_cs = 0, i, bd_idx = 1;
+
+       /* FCW already done */
+       acc_header_init(desc);
+
+       RTE_SET_USED(win_input);
+       RTE_SET_USED(win_offset);
+
+       desc->data_ptrs[bd_idx].address = rte_pktmbuf_iova_offset(input,
*in_offset);
+       desc->data_ptrs[bd_idx].blen = op->fft.input_sequence_size *
ACC_IQ_SIZE;
+       desc->data_ptrs[bd_idx].blkid = ACC_DMA_BLKID_IN;
+       desc->data_ptrs[bd_idx].last = 1;
+       desc->data_ptrs[bd_idx].dma_ext = 0;
+       bd_idx++;
+
+       desc->data_ptrs[bd_idx].address = rte_pktmbuf_iova_offset(output,
*out_offset);
+       desc->data_ptrs[bd_idx].blen = op->fft.output_sequence_size *
ACC_IQ_SIZE;
+       desc->data_ptrs[bd_idx].blkid = ACC_DMA_BLKID_OUT_HARD;
+       desc->data_ptrs[bd_idx].last = pwr_en ? 0 : 1;
+       desc->data_ptrs[bd_idx].dma_ext = 0;
+       desc->m2dlen = win_en ? 3 : 2;
+       desc->d2mlen = pwr_en ? 2 : 1;
+       desc->ib_ant_offset = op->fft.input_sequence_size;
+       desc->num_ant = op->fft.num_antennas_log2 - 3;
+
+       for (i = 0; i < RTE_BBDEV_MAX_CS; i++)
+               if (check_bit(op->fft.cs_bitmap, 1 << i))
+                       num_cs++;
+       desc->num_cs = num_cs;
+
+       if (pwr_en && pwr) {
+               bd_idx++;
+               desc->data_ptrs[bd_idx].address =
rte_pktmbuf_iova_offset(pwr, *pwr_offset);
+               desc->data_ptrs[bd_idx].blen = num_cs * (1 << op-
fft.num_antennas_log2) * 4;
+               desc->data_ptrs[bd_idx].blkid = ACC_DMA_BLKID_OUT_SOFT;
+               desc->data_ptrs[bd_idx].last = 1;
+               desc->data_ptrs[bd_idx].dma_ext = 0;
+       }
+       desc->ob_cyc_offset = op->fft.output_sequence_size;
+       desc->ob_ant_offset = op->fft.output_sequence_size * num_cs;
+       desc->op_addr = op;
+       return 0;
+}

    /** Enqueue one FFT operation for device. */
    static inline int
@@ -3889,22 +4003,32 @@ vrb_enqueue_fft_one_op(struct acc_queue
*q,
struct rte_bbdev_fft_op *op,
                uint16_t total_enqueued_cbs)
    {
        union acc_dma_desc *desc;
-       struct rte_mbuf *input, *output;
-       uint32_t in_offset, out_offset;
+       struct rte_mbuf *input, *output, *pwr, *win;
+       uint32_t in_offset, out_offset, pwr_offset, win_offset;
        struct acc_fcw_fft *fcw;

        desc = acc_desc(q, total_enqueued_cbs);
        input = op->fft.base_input.data;
        output = op->fft.base_output.data;
+       pwr = op->fft.power_meas_output.data;
+       win = op->fft.dewindowing_input.data;
        in_offset = op->fft.base_input.offset;
        out_offset = op->fft.base_output.offset;
+       pwr_offset = op->fft.power_meas_output.offset;
+       win_offset = op->fft.dewindowing_input.offset;

        fcw = (struct acc_fcw_fft *) (q->fcw_ring +
                        ((q->sw_ring_head + total_enqueued_cbs) & q-
sw_ring_wrap_mask)
                        * ACC_MAX_FCW_SIZE);

-       vrb1_fcw_fft_fill(op, fcw);
-       vrb1_dma_desc_fft_fill(op, &desc->req, input, output, &in_offset,
&out_offset);
+       if (q->d->device_variant == VRB1_VARIANT) {
+               vrb1_fcw_fft_fill(op, fcw);
+               vrb1_dma_desc_fft_fill(op, &desc->req, input, output,
&in_offset, &out_offset);
+       } else {
+               vrb2_fcw_fft_fill(op, (struct acc_fcw_fft_3 *) fcw);
+               vrb2_dma_desc_fft_fill(op, &desc->req, input, output, win,
pwr,
+                               &in_offset, &out_offset, &win_offset,
&pwr_offset);
+       }
    #ifdef RTE_LIBRTE_BBDEV_DEBUG
        rte_memdump(stderr, "FCW", &desc->req.fcw_fft,
                        sizeof(desc->req.fcw_fft));



Reply via email to