Hi Tom, > From: Tom Rix <t...@redhat.com> > On 9/30/20 3:54 PM, Chautru, Nicolas wrote: > > Hi Tom, > > > >> From: Tom Rix <t...@redhat.com> > >> On 9/28/20 5:29 PM, Nicolas Chautru wrote: > >>> Add configure function to configure the PF from within the > >>> bbdev-test itself without external application configuration the device. > >>> > >>> Signed-off-by: Nicolas Chautru <nicolas.chau...@intel.com> > >>> Acked-by: Liu Tianjiao <tianjiao....@intel.com> > >>> --- > >>> app/test-bbdev/test_bbdev_perf.c | 72 +++ > >>> doc/guides/rel_notes/release_20_11.rst | 5 + > >>> drivers/baseband/acc100/meson.build | 2 + > >>> drivers/baseband/acc100/rte_acc100_cfg.h | 17 + > >>> drivers/baseband/acc100/rte_acc100_pmd.c | 505 > >> +++++++++++++++++++++ > >>> .../acc100/rte_pmd_bbdev_acc100_version.map | 7 + > >>> 6 files changed, 608 insertions(+) > >>> > >>> diff --git a/app/test-bbdev/test_bbdev_perf.c > >>> b/app/test-bbdev/test_bbdev_perf.c > >>> index 45c0d62..32f23ff 100644 > >>> --- a/app/test-bbdev/test_bbdev_perf.c > >>> +++ b/app/test-bbdev/test_bbdev_perf.c > >>> @@ -52,6 +52,18 @@ > >>> #define FLR_5G_TIMEOUT 610 > >>> #endif > >>> > >>> +#ifdef RTE_LIBRTE_PMD_BBDEV_ACC100 > >>> +#include <rte_acc100_cfg.h> > >>> +#define ACC100PF_DRIVER_NAME ("intel_acc100_pf") > >>> +#define ACC100VF_DRIVER_NAME ("intel_acc100_vf") > >>> +#define ACC100_QMGR_NUM_AQS 16 > >>> +#define ACC100_QMGR_NUM_QGS 2 > >>> +#define ACC100_QMGR_AQ_DEPTH 5 > >>> +#define ACC100_QMGR_INVALID_IDX -1 > >>> +#define ACC100_QMGR_RR 1 > >>> +#define ACC100_QOS_GBR 0 > >>> +#endif > >>> + > >>> #define OPS_CACHE_SIZE 256U > >>> #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */ > >>> > >>> @@ -653,6 +665,66 @@ typedef int (test_case_function)(struct > >> active_device *ad, > >>> info->dev_name); > >>> } > >>> #endif > >>> +#ifdef RTE_LIBRTE_PMD_BBDEV_ACC100 > >> seems like this function would break if one of the other bbdev's were > >> #defined. > > No these are independent. By default they are all defined. > ok > > > > > >>> + if ((get_init_device() == true) && > >>> + (!strcmp(info->drv.driver_name, > >> ACC100PF_DRIVER_NAME))) { > >>> + struct acc100_conf conf; > >>> + unsigned int i; > >>> + > >>> + printf("Configure ACC100 FEC Driver %s with default > >> values\n", > >>> + info->drv.driver_name); > >>> + > >>> + /* clear default configuration before initialization */ > >>> + memset(&conf, 0, sizeof(struct acc100_conf)); > >>> + > >>> + /* Always set in PF mode for built-in configuration */ > >>> + conf.pf_mode_en = true; > >>> + for (i = 0; i < RTE_ACC100_NUM_VFS; ++i) { > >>> + conf.arb_dl_4g[i].gbr_threshold1 = > >> ACC100_QOS_GBR; > >>> + conf.arb_dl_4g[i].gbr_threshold1 = > >> ACC100_QOS_GBR; > >>> + conf.arb_dl_4g[i].round_robin_weight = > >> ACC100_QMGR_RR; > >>> + conf.arb_ul_4g[i].gbr_threshold1 = > >> ACC100_QOS_GBR; > >>> + conf.arb_ul_4g[i].gbr_threshold1 = > >> ACC100_QOS_GBR; > >>> + conf.arb_ul_4g[i].round_robin_weight = > >> ACC100_QMGR_RR; > >>> + conf.arb_dl_5g[i].gbr_threshold1 = > >> ACC100_QOS_GBR; > >>> + conf.arb_dl_5g[i].gbr_threshold1 = > >> ACC100_QOS_GBR; > >>> + conf.arb_dl_5g[i].round_robin_weight = > >> ACC100_QMGR_RR; > >>> + conf.arb_ul_5g[i].gbr_threshold1 = > >> ACC100_QOS_GBR; > >>> + conf.arb_ul_5g[i].gbr_threshold1 = > >> ACC100_QOS_GBR; > >>> + conf.arb_ul_5g[i].round_robin_weight = > >> ACC100_QMGR_RR; > >>> + } > >>> + > >>> + conf.input_pos_llr_1_bit = true; > >>> + conf.output_pos_llr_1_bit = true; > >>> + conf.num_vf_bundles = 1; /**< Number of VF bundles to > >> setup */ > >>> + > >>> + conf.q_ul_4g.num_qgroups = ACC100_QMGR_NUM_QGS; > >>> + conf.q_ul_4g.first_qgroup_index = > >> ACC100_QMGR_INVALID_IDX; > >>> + conf.q_ul_4g.num_aqs_per_groups = > >> ACC100_QMGR_NUM_AQS; > >>> + conf.q_ul_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH; > >>> + conf.q_dl_4g.num_qgroups = ACC100_QMGR_NUM_QGS; > >>> + conf.q_dl_4g.first_qgroup_index = > >> ACC100_QMGR_INVALID_IDX; > >>> + conf.q_dl_4g.num_aqs_per_groups = > >> ACC100_QMGR_NUM_AQS; > >>> + conf.q_dl_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH; > >>> + conf.q_ul_5g.num_qgroups = ACC100_QMGR_NUM_QGS; > >>> + conf.q_ul_5g.first_qgroup_index = > >> ACC100_QMGR_INVALID_IDX; > >>> + conf.q_ul_5g.num_aqs_per_groups = > >> ACC100_QMGR_NUM_AQS; > >>> + conf.q_ul_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH; > >>> + conf.q_dl_5g.num_qgroups = ACC100_QMGR_NUM_QGS; > >>> + conf.q_dl_5g.first_qgroup_index = > >> ACC100_QMGR_INVALID_IDX; > >>> + conf.q_dl_5g.num_aqs_per_groups = > >> ACC100_QMGR_NUM_AQS; > >>> + conf.q_dl_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH; > >>> + > >>> + /* setup PF with configuration information */ > >>> + ret = acc100_configure(info->dev_name, &conf); > >>> + TEST_ASSERT_SUCCESS(ret, > >>> + "Failed to configure ACC100 PF for bbdev > >> %s", > >>> + info->dev_name); > >>> + /* Let's refresh this now this is configured */ > >>> + } > >>> + rte_bbdev_info_get(dev_id, info); > >> The other bbdev's do not call rte_bbdev_info_get, can this be removed ? > > Actually it should be added outside for all versions > > post-configuraion. Thanks > > > >>> +#endif > >>> + > >>> nb_queues = RTE_MIN(rte_lcore_count(), info- > drv.max_num_queues); > >>> nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES); > >>> > >>> diff --git a/doc/guides/rel_notes/release_20_11.rst > >>> b/doc/guides/rel_notes/release_20_11.rst > >>> index 73ac08f..c8d0586 100644 > >>> --- a/doc/guides/rel_notes/release_20_11.rst > >>> +++ b/doc/guides/rel_notes/release_20_11.rst > >>> @@ -55,6 +55,11 @@ New Features > >>> Also, make sure to start the actual text at the margin. > >>> ======================================================= > >>> > >>> +* **Added Intel ACC100 bbdev PMD.** > >>> + > >>> + Added a new ``acc100`` bbdev driver for the Intel\ |reg| ACC100 > >>> + accelerator also known as Mount Bryce. See the > >>> + :doc:`../bbdevs/acc100` BBDEV guide for more details on this new > driver. > >>> > >>> Removed Items > >>> ------------- > >>> diff --git a/drivers/baseband/acc100/meson.build > >>> b/drivers/baseband/acc100/meson.build > >>> index 8afafc2..7ac44dc 100644 > >>> --- a/drivers/baseband/acc100/meson.build > >>> +++ b/drivers/baseband/acc100/meson.build > >>> @@ -4,3 +4,5 @@ > >>> deps += ['bbdev', 'bus_vdev', 'ring', 'pci', 'bus_pci'] > >>> > >>> sources = files('rte_acc100_pmd.c') > >>> + > >>> +install_headers('rte_acc100_cfg.h') > >>> diff --git a/drivers/baseband/acc100/rte_acc100_cfg.h > >>> b/drivers/baseband/acc100/rte_acc100_cfg.h > >>> index 73bbe36..7f523bc 100644 > >>> --- a/drivers/baseband/acc100/rte_acc100_cfg.h > >>> +++ b/drivers/baseband/acc100/rte_acc100_cfg.h > >>> @@ -89,6 +89,23 @@ struct acc100_conf { > >>> struct rte_arbitration_t arb_dl_5g[RTE_ACC100_NUM_VFS]; }; > >>> > >>> +/** > >>> + * Configure a ACC100 device > >>> + * > >>> + * @param dev_name > >>> + * The name of the device. This is the short form of PCI BDF, e.g. > 00:01.0. > >>> + * It can also be retrieved for a bbdev device from the dev_name field > in > >> the > >>> + * rte_bbdev_info structure returned by rte_bbdev_info_get(). > >>> + * @param conf > >>> + * Configuration to apply to ACC100 HW. > >>> + * > >>> + * @return > >>> + * Zero on success, negative value on failure. > >>> + */ > >>> +__rte_experimental > >>> +int > >>> +acc100_configure(const char *dev_name, struct acc100_conf *conf); > >>> + > >>> #ifdef __cplusplus > >>> } > >>> #endif > >>> diff --git a/drivers/baseband/acc100/rte_acc100_pmd.c > >>> b/drivers/baseband/acc100/rte_acc100_pmd.c > >>> index 3589814..b50dd32 100644 > >>> --- a/drivers/baseband/acc100/rte_acc100_pmd.c > >>> +++ b/drivers/baseband/acc100/rte_acc100_pmd.c > >>> @@ -85,6 +85,26 @@ > >>> > >>> enum {UL_4G = 0, UL_5G, DL_4G, DL_5G, NUM_ACC}; > >>> > >>> +/* Return the accelerator enum for a Queue Group Index */ static > >>> +inline int accFromQgid(int qg_idx, const struct acc100_conf > >>> +*acc100_conf) { > >>> + int accQg[ACC100_NUM_QGRPS]; > >>> + int NumQGroupsPerFn[NUM_ACC]; > >>> + int acc, qgIdx, qgIndex = 0; > >>> + for (qgIdx = 0; qgIdx < ACC100_NUM_QGRPS; qgIdx++) > >>> + accQg[qgIdx] = 0; > >>> + NumQGroupsPerFn[UL_4G] = acc100_conf->q_ul_4g.num_qgroups; > >>> + NumQGroupsPerFn[UL_5G] = acc100_conf->q_ul_5g.num_qgroups; > >>> + NumQGroupsPerFn[DL_4G] = acc100_conf->q_dl_4g.num_qgroups; > >>> + NumQGroupsPerFn[DL_5G] = acc100_conf->q_dl_5g.num_qgroups; > >>> + for (acc = UL_4G; acc < NUM_ACC; acc++) > >>> + for (qgIdx = 0; qgIdx < NumQGroupsPerFn[acc]; qgIdx++) > >>> + accQg[qgIndex++] = acc; > >> This looks inefficient, is there a way this could be calculated > >> without filling arrays to > >> > >> access 1 value ? > > That is not time critical, and the same common code is run each time. > ok > > > >>> + acc = accQg[qg_idx]; > >>> + return acc; > >>> +} > >>> + > >>> /* Return the queue topology for a Queue Group Index */ static > >>> inline void qtopFromAcc(struct rte_q_topology_t **qtop, int > >>> acc_enum, @@ -113,6 +133,30 @@ > >>> *qtop = p_qtop; > >>> } > >>> > >>> +/* Return the AQ depth for a Queue Group Index */ static inline int > >>> +aqDepth(int qg_idx, struct acc100_conf *acc100_conf) { > >>> + struct rte_q_topology_t *q_top = NULL; > >>> + int acc_enum = accFromQgid(qg_idx, acc100_conf); > >>> + qtopFromAcc(&q_top, acc_enum, acc100_conf); > >>> + if (unlikely(q_top == NULL)) > >>> + return 0; > >> This error is not handled well be the callers. > >> > >> aqNum is similar. > > This fails in a consistent basis, by having not queue available and handling > this as the default case. > ok > > > >>> + return q_top->aq_depth_log2; > >>> +} > >>> + > >>> +/* Return the AQ depth for a Queue Group Index */ static inline int > >>> +aqNum(int qg_idx, struct acc100_conf *acc100_conf) { > >>> + struct rte_q_topology_t *q_top = NULL; > >>> + int acc_enum = accFromQgid(qg_idx, acc100_conf); > >>> + qtopFromAcc(&q_top, acc_enum, acc100_conf); > >>> + if (unlikely(q_top == NULL)) > >>> + return 0; > >>> + return q_top->num_aqs_per_groups; > >>> +} > >>> + > >>> static void > >>> initQTop(struct acc100_conf *acc100_conf) { @@ -4177,3 +4221,464 > >>> @@ static int acc100_pci_remove(struct rte_pci_device *pci_dev) > >>> RTE_PMD_REGISTER_PCI_TABLE(ACC100PF_DRIVER_NAME, > >>> pci_id_acc100_pf_map); > >> RTE_PMD_REGISTER_PCI(ACC100VF_DRIVER_NAME, > >>> acc100_pci_vf_driver); > >>> RTE_PMD_REGISTER_PCI_TABLE(ACC100VF_DRIVER_NAME, > >>> pci_id_acc100_vf_map); > >>> + > >>> +/* > >>> + * Implementation to fix the power on status of some 5GUL engines > >>> + * This requires DMA permission if ported outside DPDK > >> This sounds like a workaround, can more detail be added here ? > > There are comments through the code I believe: > > - /* Detect engines in undefined state */ > > - /* Force each engine which is in unspecified state */ > > - /* Reset LDPC Cores */ > > - /* Check engine power-on status again */ Do you believe this is not > explicit enough. Power-on status may be in an undefined state hence this > engine are avtivate with dummy payload to make sure they are in a > predicable state once configuration is done. > > Yes, not explicit enough. They do not say it is a workaround so someone else > would not know that > > this is needed or is likely needs adjusting in the future. Maybe change > > /* Check engine power-on status again */ to > > /* > > * Power-on status may be in an undefined state. > > * Active this engine with a dummy payload to make sure the state is > defined. > > */ >
OK I will add a bit more in comments. Thanks > Tom > > >>> + */ > >>> +static void > >>> +poweron_cleanup(struct rte_bbdev *bbdev, struct acc100_device *d, > >>> + struct acc100_conf *conf) > >>> +{ > >>> + int i, template_idx, qg_idx; > >>> + uint32_t address, status, payload; > >>> + printf("Need to clear power-on 5GUL status in internal memory\n"); > >>> + /* Reset LDPC Cores */ > >>> + for (i = 0; i < ACC100_ENGINES_MAX; i++) > >>> + acc100_reg_write(d, HWPfFecUl5gCntrlReg + > >>> + ACC100_ENGINE_OFFSET * i, > >> ACC100_RESET_HI); > >>> + usleep(LONG_WAIT); > >>> + for (i = 0; i < ACC100_ENGINES_MAX; i++) > >>> + acc100_reg_write(d, HWPfFecUl5gCntrlReg + > >>> + ACC100_ENGINE_OFFSET * i, > >> ACC100_RESET_LO); > >>> + usleep(LONG_WAIT); > >>> + /* Prepare dummy workload */ > >>> + alloc_2x64mb_sw_rings_mem(bbdev, d, 0); > >>> + /* Set base addresses */ > >>> + uint32_t phys_high = (uint32_t)(d->sw_rings_phys >> 32); > >>> + uint32_t phys_low = (uint32_t)(d->sw_rings_phys & > >>> + ~(ACC100_SIZE_64MBYTE-1)); > >>> + acc100_reg_write(d, HWPfDmaFec5GulDescBaseHiRegVf, > >> phys_high); > >>> + acc100_reg_write(d, HWPfDmaFec5GulDescBaseLoRegVf, phys_low); > >>> + > >>> + /* Descriptor for a dummy 5GUL code block processing*/ > >>> + union acc100_dma_desc *desc = NULL; > >>> + desc = d->sw_rings; > >>> + desc->req.data_ptrs[0].address = d->sw_rings_phys + > >>> + ACC100_DESC_FCW_OFFSET; > >>> + desc->req.data_ptrs[0].blen = ACC100_FCW_LD_BLEN; > >>> + desc->req.data_ptrs[0].blkid = ACC100_DMA_BLKID_FCW; > >>> + desc->req.data_ptrs[0].last = 0; > >>> + desc->req.data_ptrs[0].dma_ext = 0; > >>> + desc->req.data_ptrs[1].address = d->sw_rings_phys + 512; > >>> + desc->req.data_ptrs[1].blkid = ACC100_DMA_BLKID_IN; > >>> + desc->req.data_ptrs[1].last = 1; > >>> + desc->req.data_ptrs[1].dma_ext = 0; > >>> + desc->req.data_ptrs[1].blen = 44; > >>> + desc->req.data_ptrs[2].address = d->sw_rings_phys + 1024; > >>> + desc->req.data_ptrs[2].blkid = ACC100_DMA_BLKID_OUT_ENC; > >>> + desc->req.data_ptrs[2].last = 1; > >>> + desc->req.data_ptrs[2].dma_ext = 0; > >>> + desc->req.data_ptrs[2].blen = 5; > >>> + /* Dummy FCW */ > >>> + desc->req.fcw_ld.FCWversion = ACC100_FCW_VER; > >>> + desc->req.fcw_ld.qm = 1; > >>> + desc->req.fcw_ld.nfiller = 30; > >>> + desc->req.fcw_ld.BG = 2 - 1; > >>> + desc->req.fcw_ld.Zc = 7; > >>> + desc->req.fcw_ld.ncb = 350; > >>> + desc->req.fcw_ld.rm_e = 4; > >>> + desc->req.fcw_ld.itmax = 10; > >>> + desc->req.fcw_ld.gain_i = 1; > >>> + desc->req.fcw_ld.gain_h = 1; > >>> + > >>> + int engines_to_restart[SIG_UL_5G_LAST + 1] = {0}; > >>> + int num_failed_engine = 0; > >>> + /* Detect engines in undefined state */ > >>> + for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST; > >>> + template_idx++) { > >>> + /* Check engine power-on status */ > >>> + address = HwPfFecUl5gIbDebugReg + > >>> + ACC100_ENGINE_OFFSET * template_idx; > >>> + status = (acc100_reg_read(d, address) >> 4) & 0xF; > >>> + if (status == 0) { > >>> + engines_to_restart[num_failed_engine] = > >> template_idx; > >>> + num_failed_engine++; > >>> + } > >>> + } > >>> + > >>> + int numQqsAcc = conf->q_ul_5g.num_qgroups; > >>> + int numQgs = conf->q_ul_5g.num_qgroups; > >>> + payload = 0; > >>> + for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); > >> qg_idx++) > >>> + payload |= (1 << qg_idx); > >>> + /* Force each engine which is in unspecified state */ > >>> + for (i = 0; i < num_failed_engine; i++) { > >>> + int failed_engine = engines_to_restart[i]; > >>> + printf("Force engine %d\n", failed_engine); > >>> + for (template_idx = SIG_UL_5G; template_idx <= > >> SIG_UL_5G_LAST; > >>> + template_idx++) { > >>> + address = HWPfQmgrGrpTmplateReg4Indx > >>> + + BYTES_IN_WORD * template_idx; > >>> + if (template_idx == failed_engine) > >>> + acc100_reg_write(d, address, payload); > >>> + else > >>> + acc100_reg_write(d, address, 0); > >>> + } > >>> + /* Reset descriptor header */ > >>> + desc->req.word0 = ACC100_DMA_DESC_TYPE; > >>> + desc->req.word1 = 0; > >>> + desc->req.word2 = 0; > >>> + desc->req.word3 = 0; > >>> + desc->req.numCBs = 1; > >>> + desc->req.m2dlen = 2; > >>> + desc->req.d2mlen = 1; > >>> + /* Enqueue the code block for processing */ > >>> + union acc100_enqueue_reg_fmt enq_req; > >>> + enq_req.val = 0; > >>> + enq_req.addr_offset = ACC100_DESC_OFFSET; > >>> + enq_req.num_elem = 1; > >>> + enq_req.req_elem_addr = 0; > >>> + rte_wmb(); > >>> + acc100_reg_write(d, HWPfQmgrIngressAq + 0x100, > >> enq_req.val); > >>> + usleep(LONG_WAIT * 100); > >>> + if (desc->req.word0 != 2) > >>> + printf("DMA Response %#"PRIx32"\n", desc- > >>> req.word0); > >>> + } > >>> + > >>> + /* Reset LDPC Cores */ > >>> + for (i = 0; i < ACC100_ENGINES_MAX; i++) > >>> + acc100_reg_write(d, HWPfFecUl5gCntrlReg + > >>> + ACC100_ENGINE_OFFSET * i, > >> ACC100_RESET_HI); > >>> + usleep(LONG_WAIT); > >>> + for (i = 0; i < ACC100_ENGINES_MAX; i++) > >>> + acc100_reg_write(d, HWPfFecUl5gCntrlReg + > >>> + ACC100_ENGINE_OFFSET * i, > >> ACC100_RESET_LO); > >>> + usleep(LONG_WAIT); > >>> + acc100_reg_write(d, HWPfHi5GHardResetReg, > >> ACC100_RESET_HARD); > >>> + usleep(LONG_WAIT); > >>> + int numEngines = 0; > >>> + /* Check engine power-on status again */ > >>> + for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST; > >>> + template_idx++) { > >>> + address = HwPfFecUl5gIbDebugReg + > >>> + ACC100_ENGINE_OFFSET * template_idx; > >>> + status = (acc100_reg_read(d, address) >> 4) & 0xF; > >>> + address = HWPfQmgrGrpTmplateReg4Indx > >>> + + BYTES_IN_WORD * template_idx; > >>> + if (status == 1) { > >>> + acc100_reg_write(d, address, payload); > >>> + numEngines++; > >>> + } else > >>> + acc100_reg_write(d, address, 0); > >>> + } > >>> + printf("Number of 5GUL engines %d\n", numEngines); > >>> + > >>> + if (d->sw_rings_base != NULL) > >>> + rte_free(d->sw_rings_base); > >>> + usleep(LONG_WAIT); > >>> +} > >>> + > >>> +/* Initial configuration of a ACC100 device prior to running > >>> +configure() */ int acc100_configure(const char *dev_name, struct > >>> +acc100_conf *conf) { > >>> + rte_bbdev_log(INFO, "acc100_configure"); > >>> + uint32_t payload, address, status; > >> maybe value or data would be a better variable name than payload. > >> > >> would mean changing acc100_reg_write > > transparent to me, but can change given DPDK uses term value. > > > > > >>> + int qg_idx, template_idx, vf_idx, acc, i; > >>> + struct rte_bbdev *bbdev = rte_bbdev_get_named_dev(dev_name); > >>> + > >>> + /* Compile time checks */ > >>> + RTE_BUILD_BUG_ON(sizeof(struct acc100_dma_req_desc) != 256); > >>> + RTE_BUILD_BUG_ON(sizeof(union acc100_dma_desc) != 256); > >>> + RTE_BUILD_BUG_ON(sizeof(struct acc100_fcw_td) != 24); > >>> + RTE_BUILD_BUG_ON(sizeof(struct acc100_fcw_te) != 32); > >>> + > >>> + if (bbdev == NULL) { > >>> + rte_bbdev_log(ERR, > >>> + "Invalid dev_name (%s), or device is not yet initialised", > >>> + dev_name); > >>> + return -ENODEV; > >>> + } > >>> + struct acc100_device *d = bbdev->data->dev_private; > >>> + > >>> + /* Store configuration */ > >>> + rte_memcpy(&d->acc100_conf, conf, sizeof(d->acc100_conf)); > >>> + > >>> + /* PCIe Bridge configuration */ > >>> + acc100_reg_write(d, HwPfPcieGpexBridgeControl, > >> ACC100_CFG_PCI_BRIDGE); > >>> + for (i = 1; i < 17; i++) > >> 17 is a magic number, use a #define > >> > >> this is a general issue. > > These are only used once but still agreed. > > > >>> + acc100_reg_write(d, > >>> + > >> HwPfPcieGpexAxiAddrMappingWindowPexBaseHigh > >>> + + i * 16, 0); > >>> + > >>> + /* PCIe Link Trainiing and Status State Machine */ > >>> + acc100_reg_write(d, HwPfPcieGpexLtssmStateCntrl, 0xDFC00000); > >>> + > >>> + /* Prevent blocking AXI read on BRESP for AXI Write */ > >>> + address = HwPfPcieGpexAxiPioControl; > >>> + payload = ACC100_CFG_PCI_AXI; > >>> + acc100_reg_write(d, address, payload); > >>> + > >>> + /* 5GDL PLL phase shift */ > >>> + acc100_reg_write(d, HWPfChaDl5gPllPhshft0, 0x1); > >>> + > >>> + /* Explicitly releasing AXI as this may be stopped after PF FLR/BME */ > >>> + address = HWPfDmaAxiControl; > >>> + payload = 1; > >>> + acc100_reg_write(d, address, payload); > >>> + > >>> + /* DDR Configuration */ > >>> + address = HWPfDdrBcTim6; > >>> + payload = acc100_reg_read(d, address); > >>> + payload &= 0xFFFFFFFB; /* Bit 2 */ #ifdef ACC100_DDR_ECC_ENABLE > >>> + payload |= 0x4; > >>> +#endif > >>> + acc100_reg_write(d, address, payload); > >>> + address = HWPfDdrPhyDqsCountNum; > >>> +#ifdef ACC100_DDR_ECC_ENABLE > >>> + payload = 9; > >>> +#else > >>> + payload = 8; > >>> +#endif > >>> + acc100_reg_write(d, address, payload); > >>> + > >>> + /* Set default descriptor signature */ > >>> + address = HWPfDmaDescriptorSignatuture; > >>> + payload = 0; > >>> + acc100_reg_write(d, address, payload); > >>> + > >>> + /* Enable the Error Detection in DMA */ > >>> + payload = ACC100_CFG_DMA_ERROR; > >>> + address = HWPfDmaErrorDetectionEn; > >>> + acc100_reg_write(d, address, payload); > >>> + > >>> + /* AXI Cache configuration */ > >>> + payload = ACC100_CFG_AXI_CACHE; > >>> + address = HWPfDmaAxcacheReg; > >>> + acc100_reg_write(d, address, payload); > >>> + > >>> + /* Default DMA Configuration (Qmgr Enabled) */ > >>> + address = HWPfDmaConfig0Reg; > >>> + payload = 0; > >>> + acc100_reg_write(d, address, payload); > >>> + address = HWPfDmaQmanen; > >>> + payload = 0; > >>> + acc100_reg_write(d, address, payload); > >>> + > >>> + /* Default RLIM/ALEN configuration */ > >>> + address = HWPfDmaConfig1Reg; > >>> + payload = (1 << 31) + (23 << 8) + (1 << 6) + 7; > >>> + acc100_reg_write(d, address, payload); > >>> + > >>> + /* Configure DMA Qmanager addresses */ > >>> + address = HWPfDmaQmgrAddrReg; > >>> + payload = HWPfQmgrEgressQueuesTemplate; > >>> + acc100_reg_write(d, address, payload); > >>> + > >>> + /* ===== Qmgr Configuration ===== */ > >>> + /* Configuration of the AQueue Depth QMGR_GRP_0_DEPTH_LOG2 > >> for UL */ > >>> + int totalQgs = conf->q_ul_4g.num_qgroups + > >>> + conf->q_ul_5g.num_qgroups + > >>> + conf->q_dl_4g.num_qgroups + > >>> + conf->q_dl_5g.num_qgroups; > >>> + for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) { > >>> + address = HWPfQmgrDepthLog2Grp + > >>> + BYTES_IN_WORD * qg_idx; > >>> + payload = aqDepth(qg_idx, conf); > >>> + acc100_reg_write(d, address, payload); > >>> + address = HWPfQmgrTholdGrp + > >>> + BYTES_IN_WORD * qg_idx; > >>> + payload = (1 << 16) + (1 << (aqDepth(qg_idx, conf) - 1)); > >>> + acc100_reg_write(d, address, payload); > >>> + } > >>> + > >>> + /* Template Priority in incremental order */ > >>> + for (template_idx = 0; template_idx < ACC100_NUM_TMPL; > >>> + template_idx++) { > >>> + address = HWPfQmgrGrpTmplateReg0Indx + > >>> + BYTES_IN_WORD * (template_idx % 8); > >>> + payload = TMPL_PRI_0; > >>> + acc100_reg_write(d, address, payload); > >>> + address = HWPfQmgrGrpTmplateReg1Indx + > >>> + BYTES_IN_WORD * (template_idx % 8); > >>> + payload = TMPL_PRI_1; > >>> + acc100_reg_write(d, address, payload); > >>> + address = HWPfQmgrGrpTmplateReg2indx + > >>> + BYTES_IN_WORD * (template_idx % 8); > >>> + payload = TMPL_PRI_2; > >>> + acc100_reg_write(d, address, payload); > >>> + address = HWPfQmgrGrpTmplateReg3Indx + > >>> + BYTES_IN_WORD * (template_idx % 8); > >>> + payload = TMPL_PRI_3; > >>> + acc100_reg_write(d, address, payload); > >>> + } > >>> + > >>> + address = HWPfQmgrGrpPriority; > >>> + payload = ACC100_CFG_QMGR_HI_P; > >>> + acc100_reg_write(d, address, payload); > >>> + > >>> + /* Template Configuration */ > >>> + for (template_idx = 0; template_idx < ACC100_NUM_TMPL; > >> template_idx++) { > >>> + payload = 0; > >>> + address = HWPfQmgrGrpTmplateReg4Indx > >>> + + BYTES_IN_WORD * template_idx; > >>> + acc100_reg_write(d, address, payload); > >>> + } > >>> + /* 4GUL */ > >>> + int numQgs = conf->q_ul_4g.num_qgroups; > >>> + int numQqsAcc = 0; > >>> + payload = 0; > >>> + for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); > >> qg_idx++) > >>> + payload |= (1 << qg_idx); > >>> + for (template_idx = SIG_UL_4G; template_idx <= SIG_UL_4G_LAST; > >>> + template_idx++) { > >>> + address = HWPfQmgrGrpTmplateReg4Indx > >>> + + BYTES_IN_WORD*template_idx; > >>> + acc100_reg_write(d, address, payload); > >>> + } > >>> + /* 5GUL */ > >>> + numQqsAcc += numQgs; > >>> + numQgs = conf->q_ul_5g.num_qgroups; > >>> + payload = 0; > >>> + int numEngines = 0; > >>> + for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); > >> qg_idx++) > >>> + payload |= (1 << qg_idx); > >>> + for (template_idx = SIG_UL_5G; template_idx <= SIG_UL_5G_LAST; > >>> + template_idx++) { > >>> + /* Check engine power-on status */ > >>> + address = HwPfFecUl5gIbDebugReg + > >>> + ACC100_ENGINE_OFFSET * template_idx; > >>> + status = (acc100_reg_read(d, address) >> 4) & 0xF; > >>> + address = HWPfQmgrGrpTmplateReg4Indx > >>> + + BYTES_IN_WORD * template_idx; > >>> + if (status == 1) { > >>> + acc100_reg_write(d, address, payload); > >>> + numEngines++; > >>> + } else > >>> + acc100_reg_write(d, address, 0); > >>> + #if RTE_ACC100_SINGLE_FEC == 1 > >> #if should be at start of line > > ok > > > >>> + payload = 0; > >>> + #endif > >>> + } > >>> + printf("Number of 5GUL engines %d\n", numEngines); > >>> + /* 4GDL */ > >>> + numQqsAcc += numQgs; > >>> + numQgs = conf->q_dl_4g.num_qgroups; > >>> + payload = 0; > >>> + for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); > >> qg_idx++) > >>> + payload |= (1 << qg_idx); > >>> + for (template_idx = SIG_DL_4G; template_idx <= SIG_DL_4G_LAST; > >>> + template_idx++) { > >>> + address = HWPfQmgrGrpTmplateReg4Indx > >>> + + BYTES_IN_WORD*template_idx; > >>> + acc100_reg_write(d, address, payload); > >>> + #if RTE_ACC100_SINGLE_FEC == 1 > >>> + payload = 0; > >>> + #endif > >>> + } > >>> + /* 5GDL */ > >>> + numQqsAcc += numQgs; > >>> + numQgs = conf->q_dl_5g.num_qgroups; > >>> + payload = 0; > >>> + for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); > >> qg_idx++) > >>> + payload |= (1 << qg_idx); > >>> + for (template_idx = SIG_DL_5G; template_idx <= SIG_DL_5G_LAST; > >>> + template_idx++) { > >>> + address = HWPfQmgrGrpTmplateReg4Indx > >>> + + BYTES_IN_WORD*template_idx; > >>> + acc100_reg_write(d, address, payload); > >>> + #if RTE_ACC100_SINGLE_FEC == 1 > >>> + payload = 0; > >>> + #endif > >>> + } > >>> + > >>> + /* Queue Group Function mapping */ > >>> + int qman_func_id[5] = {0, 2, 1, 3, 4}; > >>> + address = HWPfQmgrGrpFunction0; > >>> + payload = 0; > >>> + for (qg_idx = 0; qg_idx < 8; qg_idx++) { > >>> + acc = accFromQgid(qg_idx, conf); > >>> + payload |= qman_func_id[acc]<<(qg_idx * 4); > >>> + } > >>> + acc100_reg_write(d, address, payload); > >>> + > >>> + /* Configuration of the Arbitration QGroup depth to 1 */ > >>> + for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) { > >>> + address = HWPfQmgrArbQDepthGrp + > >>> + BYTES_IN_WORD * qg_idx; > >>> + payload = 0; > >>> + acc100_reg_write(d, address, payload); > >>> + } > >>> + > >>> + /* Enabling AQueues through the Queue hierarchy*/ > >>> + for (vf_idx = 0; vf_idx < ACC100_NUM_VFS; vf_idx++) { > >>> + for (qg_idx = 0; qg_idx < ACC100_NUM_QGRPS; qg_idx++) { > >>> + payload = 0; > >>> + if (vf_idx < conf->num_vf_bundles && > >>> + qg_idx < totalQgs) > >>> + payload = (1 << aqNum(qg_idx, conf)) - 1; > >>> + address = HWPfQmgrAqEnableVf > >>> + + vf_idx * BYTES_IN_WORD; > >>> + payload += (qg_idx << 16); > >>> + acc100_reg_write(d, address, payload); > >>> + } > >>> + } > >>> + > >>> + /* This pointer to ARAM (256kB) is shifted by 2 (4B per register) */ > >>> + uint32_t aram_address = 0; > >>> + for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) { > >>> + for (vf_idx = 0; vf_idx < conf->num_vf_bundles; vf_idx++) { > >>> + address = HWPfQmgrVfBaseAddr + vf_idx > >>> + * BYTES_IN_WORD + qg_idx > >>> + * BYTES_IN_WORD * 64; > >>> + payload = aram_address; > >>> + acc100_reg_write(d, address, payload); > >>> + /* Offset ARAM Address for next memory bank > >>> + * - increment of 4B > >>> + */ > >>> + aram_address += aqNum(qg_idx, conf) * > >>> + (1 << aqDepth(qg_idx, conf)); > >>> + } > >>> + } > >>> + > >>> + if (aram_address > WORDS_IN_ARAM_SIZE) { > >>> + rte_bbdev_log(ERR, "ARAM Configuration not fitting %d > >> %d\n", > >>> + aram_address, WORDS_IN_ARAM_SIZE); > >>> + return -EINVAL; > >>> + } > >>> + > >>> + /* ==== HI Configuration ==== */ > >>> + > >>> + /* Prevent Block on Transmit Error */ > >>> + address = HWPfHiBlockTransmitOnErrorEn; > >>> + payload = 0; > >>> + acc100_reg_write(d, address, payload); > >>> + /* Prevents to drop MSI */ > >>> + address = HWPfHiMsiDropEnableReg; > >>> + payload = 0; > >>> + acc100_reg_write(d, address, payload); > >>> + /* Set the PF Mode register */ > >>> + address = HWPfHiPfMode; > >>> + payload = (conf->pf_mode_en) ? 2 : 0; > >>> + acc100_reg_write(d, address, payload); > >>> + /* Enable Error Detection in HW */ > >>> + address = HWPfDmaErrorDetectionEn; > >>> + payload = 0x3D7; > >>> + acc100_reg_write(d, address, payload); > >>> + > >>> + /* QoS overflow init */ > >>> + payload = 1; > >>> + address = HWPfQosmonAEvalOverflow0; > >>> + acc100_reg_write(d, address, payload); > >>> + address = HWPfQosmonBEvalOverflow0; > >>> + acc100_reg_write(d, address, payload); > >>> + > >>> + /* HARQ DDR Configuration */ > >>> + unsigned int ddrSizeInMb = 512; /* Fixed to 512 MB per VF for now > >> */ > >>> + for (vf_idx = 0; vf_idx < conf->num_vf_bundles; vf_idx++) { > >>> + address = HWPfDmaVfDdrBaseRw + vf_idx > >>> + * 0x10; > >>> + payload = ((vf_idx * (ddrSizeInMb / 64)) << 16) + > >>> + (ddrSizeInMb - 1); > >>> + acc100_reg_write(d, address, payload); > >>> + } > >>> + usleep(LONG_WAIT); > >> Is sleep needed here ? the reg_write has one. > > This one is needed on top > > > >>> + > >> Since this seems like a workaround, add a comment here. > > fair enough, ok, thanks > > > >> Tom > >> > >>> + if (numEngines < (SIG_UL_5G_LAST + 1)) > >>> + poweron_cleanup(bbdev, d, conf); > >>> + > >>> + rte_bbdev_log_debug("PF Tip configuration complete for %s", > >> dev_name); > >>> + return 0; > >>> +} > >>> diff --git > >>> a/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map > >>> b/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map > >>> index 4a76d1d..91c234d 100644 > >>> --- a/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map > >>> +++ b/drivers/baseband/acc100/rte_pmd_bbdev_acc100_version.map > >>> @@ -1,3 +1,10 @@ > >>> DPDK_21 { > >>> local: *; > >>> }; > >>> + > >>> +EXPERIMENTAL { > >>> + global: > >>> + > >>> + acc100_configure; > >>> + > >>> +};