-----Original Message-----
From: Maxime Coquelin <maxime.coque...@redhat.com>
Sent: Friday, October 4, 2024 5:08 AM
To: Vargas, Hernan <hernan.var...@intel.com>; dev@dpdk.org;
gak...@marvell.com; t...@redhat.com
Cc: Chautru, Nicolas <nicolas.chau...@intel.com>; Zhang, Qi Z
<qi.z.zh...@intel.com>
Subject: Re: [PATCH v2 02/10] baseband/acc: queue allocation refactor
On 10/3/24 22:49, Hernan Vargas wrote:
Refactor to manage queue memory per operation more flexibly for VRB
devices.
Signed-off-by: Hernan Vargas <hernan.var...@intel.com>
---
drivers/baseband/acc/acc_common.h | 5 +
drivers/baseband/acc/rte_vrb_pmd.c | 214 ++++++++++++++++++++---------
2 files changed, 157 insertions(+), 62 deletions(-)
diff --git a/drivers/baseband/acc/acc_common.h
b/drivers/baseband/acc/acc_common.h
index b1f81e73e68d..adbac0dcca70 100644
--- a/drivers/baseband/acc/acc_common.h
+++ b/drivers/baseband/acc/acc_common.h
@@ -149,6 +149,8 @@
#define VRB2_VF_ID_SHIFT 6
#define ACC_MAX_FFT_WIN 16
+#define ACC_MAX_RING_BUFFER 64
+#define VRB2_MAX_Q_PER_OP 256
extern int acc_common_logtype;
@@ -581,6 +583,9 @@ struct acc_device {
void *sw_rings_base; /* Base addr of un-aligned memory for sw rings
*/
void *sw_rings; /* 64MBs of 64MB aligned memory for sw rings */
rte_iova_t sw_rings_iova; /* IOVA address of sw_rings */
+ void *sw_rings_array[ACC_MAX_RING_BUFFER]; /* Array of aligned
memory for sw rings. */
+ rte_iova_t sw_rings_iova_array[ACC_MAX_RING_BUFFER]; /* Array
of sw_rings IOVA. */
+ uint32_t queue_index[ACC_MAX_RING_BUFFER]; /* Tracking queue
index
+per ring buffer. */
/* Virtual address of the info memory routed to the this function under
* operation, whether it is PF or VF.
* HW may DMA information data at this location asynchronously diff
--git a/drivers/baseband/acc/rte_vrb_pmd.c
b/drivers/baseband/acc/rte_vrb_pmd.c
index bae01e563826..2c62a5b3e329 100644
--- a/drivers/baseband/acc/rte_vrb_pmd.c
+++ b/drivers/baseband/acc/rte_vrb_pmd.c
@@ -281,7 +281,7 @@ fetch_acc_config(struct rte_bbdev *dev)
/* Check the depth of the AQs. */
reg_len0 = acc_reg_read(d, d->reg_addr->depth_log0_offset);
reg_len1 = acc_reg_read(d, d->reg_addr->depth_log1_offset);
- for (acc = 0; acc < NUM_ACC; acc++) {
+ for (acc = 0; acc < VRB1_NUM_ACCS; acc++) {
qtopFromAcc(&q_top, acc, acc_conf);
if (q_top->first_qgroup_index <
ACC_NUM_QGRPS_PER_WORD)
q_top->aq_depth_log2 =
@@ -290,7 +290,7 @@ fetch_acc_config(struct rte_bbdev *dev)
q_top->aq_depth_log2 = (reg_len1 >> ((q_top-
first_qgroup_index -
ACC_NUM_QGRPS_PER_WORD) * 4)) & 0xF;
}
- } else {
+ } else if (d->device_variant == VRB2_VARIANT) {
reg0 = acc_reg_read(d, d->reg_addr->qman_group_func);
reg1 = acc_reg_read(d, d->reg_addr->qman_group_func + 4);
reg2 = acc_reg_read(d, d->reg_addr->qman_group_func + 8);
@@
-308,7 +308,7 @@ fetch_acc_config(struct rte_bbdev *dev)
idx = (reg2 >> ((qg %
ACC_NUM_QGRPS_PER_WORD) * 4)) & 0x7;
else
idx = (reg3 >> ((qg %
ACC_NUM_QGRPS_PER_WORD) * 4)) & 0x7;
- if (idx < VRB_NUM_ACCS) {
+ if (idx < VRB2_NUM_ACCS) {
acc = qman_func_id[idx];
updateQtop(acc, qg, acc_conf, d);
}
@@ -321,7 +321,7 @@ fetch_acc_config(struct rte_bbdev *dev)
reg_len2 = acc_reg_read(d, d->reg_addr->depth_log0_offset +
8);
reg_len3 = acc_reg_read(d, d->reg_addr->depth_log0_offset +
12);
- for (acc = 0; acc < NUM_ACC; acc++) {
+ for (acc = 0; acc < VRB2_NUM_ACCS; acc++) {
qtopFromAcc(&q_top, acc, acc_conf);
if (q_top->first_qgroup_index /
ACC_NUM_QGRPS_PER_WORD == 0)
q_top->aq_depth_log2 = (reg_len0 >> ((q_top-
first_qgroup_index
%
This function could be much heavily refactored.
If we look at was is actuallt performed, VRB1 and VRB2 logic is the same, just a
couple of value differs (they could be set at probe time).
I might propose something in the future.
@@ -543,6 +543,7 @@ vrb_setup_queues(struct rte_bbdev *dev, uint16_t
num_queues, int socket_id)
{
uint32_t phys_low, phys_high, value;
struct acc_device *d = dev->data->dev_private;
+ uint16_t queues_per_op, i;
int ret;
if (d->pf_device && !d->acc_conf.pf_mode_en) { @@ -564,27 +565,37
@@ vrb_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int
socket_id)
return -ENODEV;
}
- alloc_sw_rings_min_mem(dev, d, num_queues, socket_id);
+ if (d->device_variant == VRB1_VARIANT) {
+ alloc_sw_rings_min_mem(dev, d, num_queues, socket_id);
- /* If minimal memory space approach failed, then allocate
- * the 2 * 64MB block for the sw rings.
- */
- if (d->sw_rings == NULL)
- alloc_2x64mb_sw_rings_mem(dev, d, socket_id);
+ /* If minimal memory space approach failed, then allocate
+ * the 2 * 64MB block for the sw rings.
+ */
+ if (d->sw_rings == NULL)
+ alloc_2x64mb_sw_rings_mem(dev, d, socket_id);
- if (d->sw_rings == NULL) {
- rte_bbdev_log(NOTICE,
- "Failure allocating sw_rings memory");
- return -ENOMEM;
+ if (d->sw_rings == NULL) {
+ rte_bbdev_log(NOTICE, "Failure allocating sw_rings
memory");
+ return -ENOMEM;
+ }
+ } else if (d->device_variant == VRB2_VARIANT) {
+ queues_per_op = RTE_MIN(VRB2_MAX_Q_PER_OP,
num_queues);
+ for (i = 0; i <= RTE_BBDEV_OP_MLDTS; i++) {
+ alloc_sw_rings_min_mem(dev, d, queues_per_op,
socket_id);
+ if (d->sw_rings == NULL) {
+ rte_bbdev_log(NOTICE, "Failure allocating
sw_rings memory %d", i);
+ return -ENOMEM;
+ }
+ /* Moves the pointer to the relevant array. */
+ d->sw_rings_array[i] = d->sw_rings;
+ d->sw_rings_iova_array[i] = d->sw_rings_iova;
+ d->sw_rings = NULL;
+ d->sw_rings_base = NULL;
+ d->sw_rings_iova = 0;
+ d->queue_index[i] = 0;
+ }
}
- /* Configure device with the base address for DMA descriptor rings.
- * Same descriptor rings used for UL and DL DMA Engines.
- * Note : Assuming only VF0 bundle is used for PF mode.
- */
- phys_high = (uint32_t)(d->sw_rings_iova >> 32);
- phys_low = (uint32_t)(d->sw_rings_iova & ~(ACC_SIZE_64MBYTE-1));
-
/* Read the populated cfg from device registers. */
fetch_acc_config(dev);
@@ -599,20 +610,60 @@ vrb_setup_queues(struct rte_bbdev *dev,
uint16_t num_queues, int socket_id)
if (d->pf_device)
acc_reg_write(d, VRB1_PfDmaAxiControl, 1);
- acc_reg_write(d, d->reg_addr->dma_ring_ul5g_hi, phys_high);
- acc_reg_write(d, d->reg_addr->dma_ring_ul5g_lo, phys_low);
- acc_reg_write(d, d->reg_addr->dma_ring_dl5g_hi, phys_high);
- acc_reg_write(d, d->reg_addr->dma_ring_dl5g_lo, phys_low);
- acc_reg_write(d, d->reg_addr->dma_ring_ul4g_hi, phys_high);
- acc_reg_write(d, d->reg_addr->dma_ring_ul4g_lo, phys_low);
- acc_reg_write(d, d->reg_addr->dma_ring_dl4g_hi, phys_high);
- acc_reg_write(d, d->reg_addr->dma_ring_dl4g_lo, phys_low);
- acc_reg_write(d, d->reg_addr->dma_ring_fft_hi, phys_high);
- acc_reg_write(d, d->reg_addr->dma_ring_fft_lo, phys_low);
- if (d->device_variant == VRB2_VARIANT) {
- acc_reg_write(d, d->reg_addr->dma_ring_mld_hi, phys_high);
- acc_reg_write(d, d->reg_addr->dma_ring_mld_lo, phys_low);
+ if (d->device_variant == VRB1_VARIANT) {
+ /* Configure device with the base address for DMA descriptor
rings.
+ * Same descriptor rings used for UL and DL DMA Engines.
+ * Note : Assuming only VF0 bundle is used for PF mode.
+ */
+ phys_high = (uint32_t)(d->sw_rings_iova >> 32);
+ phys_low = (uint32_t)(d->sw_rings_iova &
~(ACC_SIZE_64MBYTE-1));
+ acc_reg_write(d, d->reg_addr->dma_ring_ul5g_hi, phys_high);
+ acc_reg_write(d, d->reg_addr->dma_ring_ul5g_lo, phys_low);
+ acc_reg_write(d, d->reg_addr->dma_ring_dl5g_hi, phys_high);
+ acc_reg_write(d, d->reg_addr->dma_ring_dl5g_lo, phys_low);
+ acc_reg_write(d, d->reg_addr->dma_ring_ul4g_hi, phys_high);
+ acc_reg_write(d, d->reg_addr->dma_ring_ul4g_lo, phys_low);
+ acc_reg_write(d, d->reg_addr->dma_ring_dl4g_hi, phys_high);
+ acc_reg_write(d, d->reg_addr->dma_ring_dl4g_lo, phys_low);
+ acc_reg_write(d, d->reg_addr->dma_ring_fft_hi, phys_high);
+ acc_reg_write(d, d->reg_addr->dma_ring_fft_lo, phys_low);
+ } else if (d->device_variant == VRB2_VARIANT) {
+ /* Configure device with the base address for DMA descriptor
rings.
+ * Different ring buffer used for each operation type.
+ * Note : Assuming only VF0 bundle is used for PF mode.
+ */
+ acc_reg_write(d, d->reg_addr->dma_ring_ul5g_hi,
+ (uint32_t)(d-
sw_rings_iova_array[RTE_BBDEV_OP_LDPC_DEC] >> 32));
+ acc_reg_write(d, d->reg_addr->dma_ring_ul5g_lo,
+ (uint32_t)(d-
sw_rings_iova_array[RTE_BBDEV_OP_LDPC_DEC]
+ & ~(ACC_SIZE_64MBYTE - 1)));
+ acc_reg_write(d, d->reg_addr->dma_ring_dl5g_hi,
+ (uint32_t)(d-
sw_rings_iova_array[RTE_BBDEV_OP_LDPC_ENC] >> 32));
+ acc_reg_write(d, d->reg_addr->dma_ring_dl5g_lo,
+ (uint32_t)(d-
sw_rings_iova_array[RTE_BBDEV_OP_LDPC_ENC]
+ & ~(ACC_SIZE_64MBYTE - 1)));
+ acc_reg_write(d, d->reg_addr->dma_ring_ul4g_hi,
+ (uint32_t)(d-
sw_rings_iova_array[RTE_BBDEV_OP_TURBO_DEC] >> 32));
+ acc_reg_write(d, d->reg_addr->dma_ring_ul4g_lo,
+ (uint32_t)(d-
sw_rings_iova_array[RTE_BBDEV_OP_TURBO_DEC]
+ & ~(ACC_SIZE_64MBYTE - 1)));
+ acc_reg_write(d, d->reg_addr->dma_ring_dl4g_hi,
+ (uint32_t)(d-
sw_rings_iova_array[RTE_BBDEV_OP_TURBO_ENC] >> 32));
+ acc_reg_write(d, d->reg_addr->dma_ring_dl4g_lo,
+ (uint32_t)(d-
sw_rings_iova_array[RTE_BBDEV_OP_TURBO_ENC]
+ & ~(ACC_SIZE_64MBYTE - 1)));
+ acc_reg_write(d, d->reg_addr->dma_ring_fft_hi,
+ (uint32_t)(d-
sw_rings_iova_array[RTE_BBDEV_OP_FFT] >> 32));
+ acc_reg_write(d, d->reg_addr->dma_ring_fft_lo,
+ (uint32_t)(d-
sw_rings_iova_array[RTE_BBDEV_OP_FFT]
+ & ~(ACC_SIZE_64MBYTE - 1)));
+ acc_reg_write(d, d->reg_addr->dma_ring_mld_hi,
+ (uint32_t)(d-
sw_rings_iova_array[RTE_BBDEV_OP_MLDTS] >> 32));
+ acc_reg_write(d, d->reg_addr->dma_ring_mld_lo,
+ (uint32_t)(d-
sw_rings_iova_array[RTE_BBDEV_OP_MLDTS]
+ & ~(ACC_SIZE_64MBYTE - 1)));
}
+
/*
* Configure Ring Size to the max queue ring size
* (used for wrapping purpose).
@@ -636,19 +687,21 @@ vrb_setup_queues(struct rte_bbdev *dev,
uint16_t
num_queues, int socket_id)
phys_high = (uint32_t)(d->tail_ptr_iova >> 32);
phys_low = (uint32_t)(d->tail_ptr_iova);
- acc_reg_write(d, d->reg_addr->tail_ptrs_ul5g_hi, phys_high);
- acc_reg_write(d, d->reg_addr->tail_ptrs_ul5g_lo, phys_low);
- acc_reg_write(d, d->reg_addr->tail_ptrs_dl5g_hi, phys_high);
- acc_reg_write(d, d->reg_addr->tail_ptrs_dl5g_lo, phys_low);
- acc_reg_write(d, d->reg_addr->tail_ptrs_ul4g_hi, phys_high);
- acc_reg_write(d, d->reg_addr->tail_ptrs_ul4g_lo, phys_low);
- acc_reg_write(d, d->reg_addr->tail_ptrs_dl4g_hi, phys_high);
- acc_reg_write(d, d->reg_addr->tail_ptrs_dl4g_lo, phys_low);
- acc_reg_write(d, d->reg_addr->tail_ptrs_fft_hi, phys_high);
- acc_reg_write(d, d->reg_addr->tail_ptrs_fft_lo, phys_low);
- if (d->device_variant == VRB2_VARIANT) {
- acc_reg_write(d, d->reg_addr->tail_ptrs_mld_hi, phys_high);
- acc_reg_write(d, d->reg_addr->tail_ptrs_mld_lo, phys_low);
+ {
+ acc_reg_write(d, d->reg_addr->tail_ptrs_ul5g_hi, phys_high);
+ acc_reg_write(d, d->reg_addr->tail_ptrs_ul5g_lo, phys_low);
+ acc_reg_write(d, d->reg_addr->tail_ptrs_dl5g_hi, phys_high);
+ acc_reg_write(d, d->reg_addr->tail_ptrs_dl5g_lo, phys_low);
+ acc_reg_write(d, d->reg_addr->tail_ptrs_ul4g_hi, phys_high);
+ acc_reg_write(d, d->reg_addr->tail_ptrs_ul4g_lo, phys_low);
+ acc_reg_write(d, d->reg_addr->tail_ptrs_dl4g_hi, phys_high);
+ acc_reg_write(d, d->reg_addr->tail_ptrs_dl4g_lo, phys_low);
+ acc_reg_write(d, d->reg_addr->tail_ptrs_fft_hi, phys_high);
+ acc_reg_write(d, d->reg_addr->tail_ptrs_fft_lo, phys_low);
+ if (d->device_variant == VRB2_VARIANT) {
+ acc_reg_write(d, d->reg_addr->tail_ptrs_mld_hi,
phys_high);
+ acc_reg_write(d, d->reg_addr->tail_ptrs_mld_lo,
phys_low);
+ }
}
ret = allocate_info_ring(dev);
@@ -684,8 +737,13 @@ vrb_setup_queues(struct rte_bbdev *dev, uint16_t
num_queues, int socket_id)
rte_free(d->tail_ptrs);
d->tail_ptrs = NULL;
free_sw_rings:
- rte_free(d->sw_rings_base);
- d->sw_rings = NULL;
+ if (d->device_variant == VRB1_VARIANT) {
+ rte_free(d->sw_rings_base);
+ d->sw_rings = NULL;
It was not caught initially, but it looks akward to free sw_rings_base, and then
set sw_rings to NULL.
sw_rings_based should also be set to NULL.
+ } else if (d->device_variant == VRB2_VARIANT) {
+ for (i = 0; i <= RTE_BBDEV_OP_MLDTS; i++)
+ rte_free(d->sw_rings_array[i]);
Same here, you should set sw_rings_array[i] to NULL to avoid double free
later.