On 10/3/24 22:49, Hernan Vargas wrote:
Refactor to manage queue memory per operation more flexibly for VRB
devices.

Signed-off-by: Hernan Vargas <hernan.var...@intel.com>
---
  drivers/baseband/acc/acc_common.h  |   5 +
  drivers/baseband/acc/rte_vrb_pmd.c | 214 ++++++++++++++++++++---------
  2 files changed, 157 insertions(+), 62 deletions(-)

diff --git a/drivers/baseband/acc/acc_common.h 
b/drivers/baseband/acc/acc_common.h
index b1f81e73e68d..adbac0dcca70 100644
--- a/drivers/baseband/acc/acc_common.h
+++ b/drivers/baseband/acc/acc_common.h
@@ -149,6 +149,8 @@
  #define VRB2_VF_ID_SHIFT     6
#define ACC_MAX_FFT_WIN 16
+#define ACC_MAX_RING_BUFFER  64
+#define VRB2_MAX_Q_PER_OP 256
extern int acc_common_logtype; @@ -581,6 +583,9 @@ struct acc_device {
        void *sw_rings_base;  /* Base addr of un-aligned memory for sw rings */
        void *sw_rings;  /* 64MBs of 64MB aligned memory for sw rings */
        rte_iova_t sw_rings_iova;  /* IOVA address of sw_rings */
+       void *sw_rings_array[ACC_MAX_RING_BUFFER];  /* Array of aligned memory 
for sw rings. */
+       rte_iova_t sw_rings_iova_array[ACC_MAX_RING_BUFFER];  /* Array of 
sw_rings IOVA. */
+       uint32_t queue_index[ACC_MAX_RING_BUFFER]; /* Tracking queue index per 
ring buffer. */
        /* Virtual address of the info memory routed to the this function under
         * operation, whether it is PF or VF.
         * HW may DMA information data at this location asynchronously
diff --git a/drivers/baseband/acc/rte_vrb_pmd.c 
b/drivers/baseband/acc/rte_vrb_pmd.c
index bae01e563826..2c62a5b3e329 100644
--- a/drivers/baseband/acc/rte_vrb_pmd.c
+++ b/drivers/baseband/acc/rte_vrb_pmd.c
@@ -281,7 +281,7 @@ fetch_acc_config(struct rte_bbdev *dev)
                /* Check the depth of the AQs. */
                reg_len0 = acc_reg_read(d, d->reg_addr->depth_log0_offset);
                reg_len1 = acc_reg_read(d, d->reg_addr->depth_log1_offset);
-               for (acc = 0; acc < NUM_ACC; acc++) {
+               for (acc = 0; acc < VRB1_NUM_ACCS; acc++) {
                        qtopFromAcc(&q_top, acc, acc_conf);
                        if (q_top->first_qgroup_index < ACC_NUM_QGRPS_PER_WORD)
                                q_top->aq_depth_log2 =
@@ -290,7 +290,7 @@ fetch_acc_config(struct rte_bbdev *dev)
                                q_top->aq_depth_log2 = (reg_len1 >> 
((q_top->first_qgroup_index -
                                                ACC_NUM_QGRPS_PER_WORD) * 4)) & 
0xF;
                }
-       } else {
+       } else if (d->device_variant == VRB2_VARIANT) {
                reg0 = acc_reg_read(d, d->reg_addr->qman_group_func);
                reg1 = acc_reg_read(d, d->reg_addr->qman_group_func + 4);
                reg2 = acc_reg_read(d, d->reg_addr->qman_group_func + 8);
@@ -308,7 +308,7 @@ fetch_acc_config(struct rte_bbdev *dev)
                                        idx = (reg2 >> ((qg % 
ACC_NUM_QGRPS_PER_WORD) * 4)) & 0x7;
                                else
                                        idx = (reg3 >> ((qg % 
ACC_NUM_QGRPS_PER_WORD) * 4)) & 0x7;
-                               if (idx < VRB_NUM_ACCS) {
+                               if (idx < VRB2_NUM_ACCS) {
                                        acc = qman_func_id[idx];
                                        updateQtop(acc, qg, acc_conf, d);
                                }
@@ -321,7 +321,7 @@ fetch_acc_config(struct rte_bbdev *dev)
                reg_len2 = acc_reg_read(d, d->reg_addr->depth_log0_offset + 8);
                reg_len3 = acc_reg_read(d, d->reg_addr->depth_log0_offset + 12);
- for (acc = 0; acc < NUM_ACC; acc++) {
+               for (acc = 0; acc < VRB2_NUM_ACCS; acc++) {
                        qtopFromAcc(&q_top, acc, acc_conf);
                        if (q_top->first_qgroup_index / ACC_NUM_QGRPS_PER_WORD 
== 0)
                                q_top->aq_depth_log2 = (reg_len0 >> 
((q_top->first_qgroup_index %

This function could be much heavily refactored.
If we look at was is actuallt performed, VRB1 and VRB2 logic is the
same, just a couple of value differs (they could be set at probe time).

I might propose something in the future.

@@ -543,6 +543,7 @@ vrb_setup_queues(struct rte_bbdev *dev, uint16_t 
num_queues, int socket_id)
  {
        uint32_t phys_low, phys_high, value;
        struct acc_device *d = dev->data->dev_private;
+       uint16_t queues_per_op, i;
        int ret;
if (d->pf_device && !d->acc_conf.pf_mode_en) {
@@ -564,27 +565,37 @@ vrb_setup_queues(struct rte_bbdev *dev, uint16_t 
num_queues, int socket_id)
                return -ENODEV;
        }
- alloc_sw_rings_min_mem(dev, d, num_queues, socket_id);
+       if (d->device_variant == VRB1_VARIANT) {
+               alloc_sw_rings_min_mem(dev, d, num_queues, socket_id);
- /* If minimal memory space approach failed, then allocate
-        * the 2 * 64MB block for the sw rings.
-        */
-       if (d->sw_rings == NULL)
-               alloc_2x64mb_sw_rings_mem(dev, d, socket_id);
+               /* If minimal memory space approach failed, then allocate
+                * the 2 * 64MB block for the sw rings.
+                */
+               if (d->sw_rings == NULL)
+                       alloc_2x64mb_sw_rings_mem(dev, d, socket_id);
- if (d->sw_rings == NULL) {
-               rte_bbdev_log(NOTICE,
-                               "Failure allocating sw_rings memory");
-               return -ENOMEM;
+               if (d->sw_rings == NULL) {
+                       rte_bbdev_log(NOTICE, "Failure allocating sw_rings 
memory");
+                       return -ENOMEM;
+               }
+       } else if (d->device_variant == VRB2_VARIANT) {
+               queues_per_op = RTE_MIN(VRB2_MAX_Q_PER_OP, num_queues);
+               for (i = 0; i <= RTE_BBDEV_OP_MLDTS; i++) {
+                       alloc_sw_rings_min_mem(dev, d, queues_per_op, 
socket_id);
+                       if (d->sw_rings == NULL) {
+                               rte_bbdev_log(NOTICE, "Failure allocating sw_rings 
memory %d", i);
+                               return -ENOMEM;
+                       }
+                       /* Moves the pointer to the relevant array. */
+                       d->sw_rings_array[i] = d->sw_rings;
+                       d->sw_rings_iova_array[i] = d->sw_rings_iova;
+                       d->sw_rings = NULL;
+                       d->sw_rings_base = NULL;
+                       d->sw_rings_iova = 0;
+                       d->queue_index[i] = 0;
+               }
        }
- /* Configure device with the base address for DMA descriptor rings.
-        * Same descriptor rings used for UL and DL DMA Engines.
-        * Note : Assuming only VF0 bundle is used for PF mode.
-        */
-       phys_high = (uint32_t)(d->sw_rings_iova >> 32);
-       phys_low  = (uint32_t)(d->sw_rings_iova & ~(ACC_SIZE_64MBYTE-1));
-
        /* Read the populated cfg from device registers. */
        fetch_acc_config(dev);
@@ -599,20 +610,60 @@ vrb_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int socket_id)
        if (d->pf_device)
                acc_reg_write(d, VRB1_PfDmaAxiControl, 1);
- acc_reg_write(d, d->reg_addr->dma_ring_ul5g_hi, phys_high);
-       acc_reg_write(d, d->reg_addr->dma_ring_ul5g_lo, phys_low);
-       acc_reg_write(d, d->reg_addr->dma_ring_dl5g_hi, phys_high);
-       acc_reg_write(d, d->reg_addr->dma_ring_dl5g_lo, phys_low);
-       acc_reg_write(d, d->reg_addr->dma_ring_ul4g_hi, phys_high);
-       acc_reg_write(d, d->reg_addr->dma_ring_ul4g_lo, phys_low);
-       acc_reg_write(d, d->reg_addr->dma_ring_dl4g_hi, phys_high);
-       acc_reg_write(d, d->reg_addr->dma_ring_dl4g_lo, phys_low);
-       acc_reg_write(d, d->reg_addr->dma_ring_fft_hi, phys_high);
-       acc_reg_write(d, d->reg_addr->dma_ring_fft_lo, phys_low);
-       if (d->device_variant == VRB2_VARIANT) {
-               acc_reg_write(d, d->reg_addr->dma_ring_mld_hi, phys_high);
-               acc_reg_write(d, d->reg_addr->dma_ring_mld_lo, phys_low);
+       if (d->device_variant == VRB1_VARIANT) {
+               /* Configure device with the base address for DMA descriptor 
rings.
+                * Same descriptor rings used for UL and DL DMA Engines.
+                * Note : Assuming only VF0 bundle is used for PF mode.
+                */
+               phys_high = (uint32_t)(d->sw_rings_iova >> 32);
+               phys_low  = (uint32_t)(d->sw_rings_iova & 
~(ACC_SIZE_64MBYTE-1));
+               acc_reg_write(d, d->reg_addr->dma_ring_ul5g_hi, phys_high);
+               acc_reg_write(d, d->reg_addr->dma_ring_ul5g_lo, phys_low);
+               acc_reg_write(d, d->reg_addr->dma_ring_dl5g_hi, phys_high);
+               acc_reg_write(d, d->reg_addr->dma_ring_dl5g_lo, phys_low);
+               acc_reg_write(d, d->reg_addr->dma_ring_ul4g_hi, phys_high);
+               acc_reg_write(d, d->reg_addr->dma_ring_ul4g_lo, phys_low);
+               acc_reg_write(d, d->reg_addr->dma_ring_dl4g_hi, phys_high);
+               acc_reg_write(d, d->reg_addr->dma_ring_dl4g_lo, phys_low);
+               acc_reg_write(d, d->reg_addr->dma_ring_fft_hi, phys_high);
+               acc_reg_write(d, d->reg_addr->dma_ring_fft_lo, phys_low);
+       } else if (d->device_variant == VRB2_VARIANT) {
+               /* Configure device with the base address for DMA descriptor 
rings.
+                * Different ring buffer used for each operation type.
+                * Note : Assuming only VF0 bundle is used for PF mode.
+                */
+               acc_reg_write(d, d->reg_addr->dma_ring_ul5g_hi,
+                               
(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_LDPC_DEC] >> 32));
+               acc_reg_write(d, d->reg_addr->dma_ring_ul5g_lo,
+                               
(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_LDPC_DEC]
+                               & ~(ACC_SIZE_64MBYTE - 1)));
+               acc_reg_write(d, d->reg_addr->dma_ring_dl5g_hi,
+                               
(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_LDPC_ENC] >> 32));
+               acc_reg_write(d, d->reg_addr->dma_ring_dl5g_lo,
+                               
(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_LDPC_ENC]
+                               & ~(ACC_SIZE_64MBYTE - 1)));
+               acc_reg_write(d, d->reg_addr->dma_ring_ul4g_hi,
+                               
(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_TURBO_DEC] >> 32));
+               acc_reg_write(d, d->reg_addr->dma_ring_ul4g_lo,
+                               
(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_TURBO_DEC]
+                               & ~(ACC_SIZE_64MBYTE - 1)));
+               acc_reg_write(d, d->reg_addr->dma_ring_dl4g_hi,
+                               
(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_TURBO_ENC] >> 32));
+               acc_reg_write(d, d->reg_addr->dma_ring_dl4g_lo,
+                               
(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_TURBO_ENC]
+                               & ~(ACC_SIZE_64MBYTE - 1)));
+               acc_reg_write(d, d->reg_addr->dma_ring_fft_hi,
+                               (uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_FFT] 
>> 32));
+               acc_reg_write(d, d->reg_addr->dma_ring_fft_lo,
+                               
(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_FFT]
+                               & ~(ACC_SIZE_64MBYTE - 1)));
+               acc_reg_write(d, d->reg_addr->dma_ring_mld_hi,
+                               (uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_MLDTS] 
>> 32));
+               acc_reg_write(d, d->reg_addr->dma_ring_mld_lo,
+                               
(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_MLDTS]
+                               & ~(ACC_SIZE_64MBYTE - 1)));
        }
+
        /*
         * Configure Ring Size to the max queue ring size
         * (used for wrapping purpose).
@@ -636,19 +687,21 @@ vrb_setup_queues(struct rte_bbdev *dev, uint16_t 
num_queues, int socket_id)
phys_high = (uint32_t)(d->tail_ptr_iova >> 32);
        phys_low  = (uint32_t)(d->tail_ptr_iova);
-       acc_reg_write(d, d->reg_addr->tail_ptrs_ul5g_hi, phys_high);
-       acc_reg_write(d, d->reg_addr->tail_ptrs_ul5g_lo, phys_low);
-       acc_reg_write(d, d->reg_addr->tail_ptrs_dl5g_hi, phys_high);
-       acc_reg_write(d, d->reg_addr->tail_ptrs_dl5g_lo, phys_low);
-       acc_reg_write(d, d->reg_addr->tail_ptrs_ul4g_hi, phys_high);
-       acc_reg_write(d, d->reg_addr->tail_ptrs_ul4g_lo, phys_low);
-       acc_reg_write(d, d->reg_addr->tail_ptrs_dl4g_hi, phys_high);
-       acc_reg_write(d, d->reg_addr->tail_ptrs_dl4g_lo, phys_low);
-       acc_reg_write(d, d->reg_addr->tail_ptrs_fft_hi, phys_high);
-       acc_reg_write(d, d->reg_addr->tail_ptrs_fft_lo, phys_low);
-       if (d->device_variant == VRB2_VARIANT) {
-               acc_reg_write(d, d->reg_addr->tail_ptrs_mld_hi, phys_high);
-               acc_reg_write(d, d->reg_addr->tail_ptrs_mld_lo, phys_low);
+       {
+               acc_reg_write(d, d->reg_addr->tail_ptrs_ul5g_hi, phys_high);
+               acc_reg_write(d, d->reg_addr->tail_ptrs_ul5g_lo, phys_low);
+               acc_reg_write(d, d->reg_addr->tail_ptrs_dl5g_hi, phys_high);
+               acc_reg_write(d, d->reg_addr->tail_ptrs_dl5g_lo, phys_low);
+               acc_reg_write(d, d->reg_addr->tail_ptrs_ul4g_hi, phys_high);
+               acc_reg_write(d, d->reg_addr->tail_ptrs_ul4g_lo, phys_low);
+               acc_reg_write(d, d->reg_addr->tail_ptrs_dl4g_hi, phys_high);
+               acc_reg_write(d, d->reg_addr->tail_ptrs_dl4g_lo, phys_low);
+               acc_reg_write(d, d->reg_addr->tail_ptrs_fft_hi, phys_high);
+               acc_reg_write(d, d->reg_addr->tail_ptrs_fft_lo, phys_low);
+               if (d->device_variant == VRB2_VARIANT) {
+                       acc_reg_write(d, d->reg_addr->tail_ptrs_mld_hi, 
phys_high);
+                       acc_reg_write(d, d->reg_addr->tail_ptrs_mld_lo, 
phys_low);
+               }
        }
ret = allocate_info_ring(dev);
@@ -684,8 +737,13 @@ vrb_setup_queues(struct rte_bbdev *dev, uint16_t 
num_queues, int socket_id)
        rte_free(d->tail_ptrs);
        d->tail_ptrs = NULL;
  free_sw_rings:
-       rte_free(d->sw_rings_base);
-       d->sw_rings = NULL;
+       if (d->device_variant == VRB1_VARIANT) {
+               rte_free(d->sw_rings_base);
+               d->sw_rings = NULL;

It was not caught initially, but it looks akward to free sw_rings_base, and then set sw_rings to NULL.
sw_rings_based should also be set to NULL.

+       } else if (d->device_variant == VRB2_VARIANT) {
+               for (i = 0; i <= RTE_BBDEV_OP_MLDTS; i++)
+                       rte_free(d->sw_rings_array[i]);

Same here, you should set sw_rings_array[i] to NULL to avoid double free
later.

+       }
return ret;
  }
@@ -809,17 +867,34 @@ vrb_intr_enable(struct rte_bbdev *dev)
  static int
  vrb_dev_close(struct rte_bbdev *dev)
  {
+       int i;
        struct acc_device *d = dev->data->dev_private;
+
        vrb_check_ir(d);
-       if (d->sw_rings_base != NULL) {
-               rte_free(d->tail_ptrs);
-               rte_free(d->info_ring);
-               rte_free(d->sw_rings_base);
-               rte_free(d->harq_layout);
-               d->tail_ptrs = NULL;
-               d->info_ring = NULL;
-               d->sw_rings_base = NULL;
-               d->harq_layout = NULL;
+       if (d->device_variant == VRB1_VARIANT) {
+               if (d->sw_rings_base != NULL) {
+                       rte_free(d->tail_ptrs);
+                       rte_free(d->info_ring);
+                       rte_free(d->sw_rings_base);
+                       rte_free(d->harq_layout);
+                       d->tail_ptrs = NULL;
+                       d->info_ring = NULL;
+                       d->sw_rings_base = NULL;
+                       d->harq_layout = NULL;
+               }

Actually, it would be cleanner to perform the free operations
systematically, no need to check whether sw_rings_base is NULL.

+       } else if (d->device_variant == VRB2_VARIANT) {
+               if (d->sw_rings_array[1] != NULL) {

Why 1 and not 0? And as mentionned above this can be done
unconditionnally.

+                       rte_free(d->tail_ptrs);
+                       rte_free(d->info_ring);
+                       rte_free(d->harq_layout);
+                       d->tail_ptrs = NULL;
+                       d->info_ring = NULL;
+                       d->harq_layout = NULL;
+                       for (i = 0; i <= RTE_BBDEV_OP_MLDTS; i++) {
+                               rte_free(d->sw_rings_array[i]);
+                               d->sw_rings_array[i] = NULL;
+                       }
+               }
        }
        /* Ensure all in flight HW transactions are completed. */
        usleep(ACC_LONG_WAIT);
@@ -890,8 +965,16 @@ vrb_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
        }
q->d = d;
-       q->ring_addr = RTE_PTR_ADD(d->sw_rings, (d->sw_ring_size * queue_id));
-       q->ring_addr_iova = d->sw_rings_iova + (d->sw_ring_size * queue_id);
+       if (d->device_variant == VRB1_VARIANT) {
+               q->ring_addr = RTE_PTR_ADD(d->sw_rings, (d->sw_ring_size * 
queue_id));
+               q->ring_addr_iova = d->sw_rings_iova + (d->sw_ring_size * 
queue_id);
+       } else if (d->device_variant == VRB2_VARIANT) {
+               q->ring_addr = RTE_PTR_ADD(d->sw_rings_array[conf->op_type],
+                               (d->sw_ring_size * 
d->queue_index[conf->op_type]));
+               q->ring_addr_iova = d->sw_rings_iova_array[conf->op_type] +
+                               (d->sw_ring_size * 
d->queue_index[conf->op_type]);
+               d->queue_index[conf->op_type]++;
+       }
/* Prepare the Ring with default descriptor format. */
        union acc_dma_desc *desc = NULL;
@@ -1347,8 +1430,14 @@ vrb_dev_info_get(struct rte_bbdev *dev, struct 
rte_bbdev_driver_info *dev_info)
        dev_info->queue_priority[RTE_BBDEV_OP_FFT] = 
d->acc_conf.q_fft.num_qgroups;
        dev_info->queue_priority[RTE_BBDEV_OP_MLDTS] = 
d->acc_conf.q_mld.num_qgroups;
        dev_info->max_num_queues = 0;
-       for (i = RTE_BBDEV_OP_NONE; i <= RTE_BBDEV_OP_MLDTS; i++)
+       for (i = RTE_BBDEV_OP_NONE; i <= RTE_BBDEV_OP_MLDTS; i++) {
+               if (unlikely(dev_info->num_queues[i] > VRB2_MAX_Q_PER_OP)) {
+                       rte_bbdev_log(ERR, "Unexpected number of queues %d exposed 
for op %d",
+                                       dev_info->num_queues[i], i);
+                       dev_info->num_queues[i] = VRB2_MAX_Q_PER_OP;
+               }
                dev_info->max_num_queues += dev_info->num_queues[i];
+       }
        dev_info->queue_size_lim = ACC_MAX_QUEUE_DEPTH;
        dev_info->hardware_accelerated = true;
        dev_info->max_dl_queue_priority =
@@ -4239,7 +4328,8 @@ vrb_bbdev_init(struct rte_bbdev *dev, struct 
rte_pci_driver *drv)
                        d->reg_addr = &vrb1_pf_reg_addr;
                else
                        d->reg_addr = &vrb1_vf_reg_addr;
-       } else {
+       } else if ((pci_dev->id.device_id == RTE_VRB2_PF_DEVICE_ID) ||
+                       (pci_dev->id.device_id == RTE_VRB2_VF_DEVICE_ID)) {
                d->device_variant = VRB2_VARIANT;
                d->queue_offset = vrb2_queue_offset;
                d->num_qgroups = VRB2_NUM_QGRPS;

Reply via email to