On Wed, Apr 5, 2023 at 7:56 PM Sathesh Edara <sed...@marvell.com> wrote: > > This patch adds ISM specific functionality.
See following commit as reference, and update new acronyms like ISM and others at devtools/words-case.txt commit 33c942d19260817502b49403f0baaab6113774b2 Author: Ashwin Sekhar T K <asek...@marvell.com> Date: Fri Sep 17 16:28:39 2021 +0530 devtools: add Marvell acronyms for commit checks Update word list with Marvell specific acronyms. CPT -> Cryptographic Accelerator Unit CQ -> Completion Queue LBK -> Loopback Interface Unit LMT -> Large Atomic Store Unit MCAM -> Match Content Addressable Memory NIX -> Network Interface Controller Unit NPA -> Network Pool Allocator NPC -> Network Parser and CAM Unit ROC -> Rest Of Chip RQ -> Receive Queue RVU -> Resource Virtualization Unit SQ -> Send Queue SSO -> Schedule Synchronize Order Unit TIM -> Timer Unit Suggested-by: Ferruh Yigit <ferruh.yi...@intel.com> Signed-off-by: Ashwin Sekhar T K <asek...@marvell.com> Reviewed-by: Jerin Jacob <jer...@marvell.com> > > Signed-off-by: Sathesh Edara <sed...@marvell.com> > --- > drivers/net/octeon_ep/cnxk_ep_vf.c | 35 +++++++++++++++-- > drivers/net/octeon_ep/cnxk_ep_vf.h | 12 ++++++ > drivers/net/octeon_ep/otx2_ep_vf.c | 45 ++++++++++++++++++--- > drivers/net/octeon_ep/otx2_ep_vf.h | 14 +++++++ > drivers/net/octeon_ep/otx_ep_common.h | 16 ++++++++ > drivers/net/octeon_ep/otx_ep_ethdev.c | 36 +++++++++++++++++ > drivers/net/octeon_ep/otx_ep_rxtx.c | 56 +++++++++++++++++++++------ > 7 files changed, 194 insertions(+), 20 deletions(-) > > diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.c > b/drivers/net/octeon_ep/cnxk_ep_vf.c > index 1a92887109..a437ae68cb 100644 > --- a/drivers/net/octeon_ep/cnxk_ep_vf.c > +++ b/drivers/net/octeon_ep/cnxk_ep_vf.c > @@ -2,11 +2,12 @@ > * Copyright(C) 2022 Marvell. > */ > > +#include <inttypes.h> > #include <errno.h> > > #include <rte_common.h> > #include <rte_cycles.h> > - > +#include <rte_memzone.h> > #include "cnxk_ep_vf.h" > > static void > @@ -85,6 +86,7 @@ cnxk_ep_vf_setup_iq_regs(struct otx_ep_device *otx_ep, > uint32_t iq_no) > struct otx_ep_instr_queue *iq = otx_ep->instr_queue[iq_no]; > int loop = OTX_EP_BUSY_LOOP_COUNT; > volatile uint64_t reg_val = 0ull; > + uint64_t ism_addr; > > reg_val = oct_ep_read64(otx_ep->hw_addr + > CNXK_EP_R_IN_CONTROL(iq_no)); > > @@ -132,6 +134,19 @@ cnxk_ep_vf_setup_iq_regs(struct otx_ep_device *otx_ep, > uint32_t iq_no) > */ > oct_ep_write64(OTX_EP_CLEAR_SDP_IN_INT_LVLS, > otx_ep->hw_addr + CNXK_EP_R_IN_INT_LEVELS(iq_no)); > + /* Set up IQ ISM registers and structures */ > + ism_addr = (otx_ep->ism_buffer_mz->iova | CNXK_EP_ISM_EN > + | CNXK_EP_ISM_MSIX_DIS) > + + CNXK_EP_IQ_ISM_OFFSET(iq_no); > + rte_write64(ism_addr, (uint8_t *)otx_ep->hw_addr + > + CNXK_EP_R_IN_CNTS_ISM(iq_no)); > + iq->inst_cnt_ism = > + (uint32_t *)((uint8_t *)otx_ep->ism_buffer_mz->addr > + + CNXK_EP_IQ_ISM_OFFSET(iq_no)); > + otx_ep_err("SDP_R[%d] INST Q ISM virt: %p, dma: 0x%" PRIX64, iq_no, > + (void *)iq->inst_cnt_ism, ism_addr); > + *iq->inst_cnt_ism = 0; > + iq->inst_cnt_ism_prev = 0; > return 0; > } > > @@ -142,6 +157,7 @@ cnxk_ep_vf_setup_oq_regs(struct otx_ep_device *otx_ep, > uint32_t oq_no) > uint64_t oq_ctl = 0ull; > int loop = OTX_EP_BUSY_LOOP_COUNT; > struct otx_ep_droq *droq = otx_ep->droq[oq_no]; > + uint64_t ism_addr; > > /* Wait on IDLE to set to 1, supposed to configure BADDR > * as long as IDLE is 0 > @@ -201,9 +217,22 @@ cnxk_ep_vf_setup_oq_regs(struct otx_ep_device *otx_ep, > uint32_t oq_no) > rte_write32((uint32_t)reg_val, droq->pkts_sent_reg); > > otx_ep_dbg("SDP_R[%d]_sent: %x", oq_no, > rte_read32(droq->pkts_sent_reg)); > - loop = OTX_EP_BUSY_LOOP_COUNT; > + /* Set up ISM registers and structures */ > + ism_addr = (otx_ep->ism_buffer_mz->iova | CNXK_EP_ISM_EN > + | CNXK_EP_ISM_MSIX_DIS) > + + CNXK_EP_OQ_ISM_OFFSET(oq_no); > + rte_write64(ism_addr, (uint8_t *)otx_ep->hw_addr + > + CNXK_EP_R_OUT_CNTS_ISM(oq_no)); > + droq->pkts_sent_ism = > + (uint32_t *)((uint8_t *)otx_ep->ism_buffer_mz->addr > + + CNXK_EP_OQ_ISM_OFFSET(oq_no)); > + otx_ep_err("SDP_R[%d] OQ ISM virt: %p dma: 0x%" PRIX64, > + oq_no, (void *)droq->pkts_sent_ism, ism_addr); > + *droq->pkts_sent_ism = 0; > + droq->pkts_sent_ism_prev = 0; > > - while (((rte_read32(droq->pkts_sent_reg)) != 0ull)) { > + loop = OTX_EP_BUSY_LOOP_COUNT; > + while (((rte_read32(droq->pkts_sent_reg)) != 0ull) && loop--) { > reg_val = rte_read32(droq->pkts_sent_reg); > rte_write32((uint32_t)reg_val, droq->pkts_sent_reg); > rte_delay_ms(1); > diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.h > b/drivers/net/octeon_ep/cnxk_ep_vf.h > index aaa5774552..072b38ea15 100644 > --- a/drivers/net/octeon_ep/cnxk_ep_vf.h > +++ b/drivers/net/octeon_ep/cnxk_ep_vf.h > @@ -27,6 +27,7 @@ > #define CNXK_EP_R_IN_INT_LEVELS_START 0x10060 > #define CNXK_EP_R_IN_PKT_CNT_START 0x10080 > #define CNXK_EP_R_IN_BYTE_CNT_START 0x10090 > +#define CNXK_EP_R_IN_CNTS_ISM_START 0x10520 > > #define CNXK_EP_R_IN_CONTROL(ring) \ > (CNXK_EP_R_IN_CONTROL_START + ((ring) * CNXK_EP_RING_OFFSET)) > @@ -55,6 +56,8 @@ > #define CNXK_EP_R_IN_BYTE_CNT(ring) \ > (CNXK_EP_R_IN_BYTE_CNT_START + ((ring) * CNXK_EP_RING_OFFSET)) > > +#define CNXK_EP_R_IN_CNTS_ISM(ring) \ > + (CNXK_EP_R_IN_CNTS_ISM_START + ((ring) * CNXK_EP_RING_OFFSET)) > > /** Rings per Virtual Function **/ > #define CNXK_EP_R_IN_CTL_RPVF_MASK (0xF) > @@ -87,6 +90,7 @@ > #define CNXK_EP_R_OUT_ENABLE_START 0x10170 > #define CNXK_EP_R_OUT_PKT_CNT_START 0x10180 > #define CNXK_EP_R_OUT_BYTE_CNT_START 0x10190 > +#define CNXK_EP_R_OUT_CNTS_ISM_START 0x10510 > > #define CNXK_EP_R_OUT_CNTS(ring) \ > (CNXK_EP_R_OUT_CNTS_START + ((ring) * CNXK_EP_RING_OFFSET)) > @@ -118,6 +122,9 @@ > #define CNXK_EP_R_OUT_BYTE_CNT(ring) \ > (CNXK_EP_R_OUT_BYTE_CNT_START + ((ring) * CNXK_EP_RING_OFFSET)) > > +#define CNXK_EP_R_OUT_CNTS_ISM(ring) \ > + (CNXK_EP_R_OUT_CNTS_ISM_START + ((ring) * CNXK_EP_RING_OFFSET)) > + > /*------------------ R_OUT Masks ----------------*/ > #define CNXK_EP_R_OUT_INT_LEVELS_BMODE (1ULL << 63) > #define CNXK_EP_R_OUT_INT_LEVELS_TIMET (32) > @@ -161,4 +168,9 @@ struct cnxk_ep_instr_64B { > uint64_t exhdr[4]; > }; > > +#define CNXK_EP_IQ_ISM_OFFSET(queue) (RTE_CACHE_LINE_SIZE * (queue) + 4) > +#define CNXK_EP_OQ_ISM_OFFSET(queue) (RTE_CACHE_LINE_SIZE * (queue)) > +#define CNXK_EP_ISM_EN (0x1) > +#define CNXK_EP_ISM_MSIX_DIS (0x2) > + > #endif /*_CNXK_EP_VF_H_ */ > diff --git a/drivers/net/octeon_ep/otx2_ep_vf.c > b/drivers/net/octeon_ep/otx2_ep_vf.c > index 3e4895862b..ced3a415a5 100644 > --- a/drivers/net/octeon_ep/otx2_ep_vf.c > +++ b/drivers/net/octeon_ep/otx2_ep_vf.c > @@ -6,6 +6,7 @@ > > #include <rte_common.h> > #include <rte_cycles.h> > +#include <rte_memzone.h> > #include "otx_ep_common.h" > #include "otx2_ep_vf.h" > > @@ -236,6 +237,7 @@ otx2_vf_setup_iq_regs(struct otx_ep_device *otx_ep, > uint32_t iq_no) > { > struct otx_ep_instr_queue *iq = otx_ep->instr_queue[iq_no]; > volatile uint64_t reg_val = 0ull; > + uint64_t ism_addr; > int loop = SDP_VF_BUSY_LOOP_COUNT; > > reg_val = oct_ep_read64(otx_ep->hw_addr + SDP_VF_R_IN_CONTROL(iq_no)); > @@ -282,6 +284,22 @@ otx2_vf_setup_iq_regs(struct otx_ep_device *otx_ep, > uint32_t iq_no) > */ > oct_ep_write64(OTX_EP_CLEAR_SDP_IN_INT_LVLS, > otx_ep->hw_addr + SDP_VF_R_IN_INT_LEVELS(iq_no)); > + > + /* Set up IQ ISM registers and structures */ > + ism_addr = (otx_ep->ism_buffer_mz->iova | OTX2_EP_ISM_EN > + | OTX2_EP_ISM_MSIX_DIS) > + + OTX2_EP_IQ_ISM_OFFSET(iq_no); > + oct_ep_write64(ism_addr, (uint8_t *)otx_ep->hw_addr + > + SDP_VF_R_IN_CNTS_ISM(iq_no)); > + iq->inst_cnt_ism = > + (uint32_t *)((uint8_t *)otx_ep->ism_buffer_mz->addr > + + OTX2_EP_IQ_ISM_OFFSET(iq_no)); > + otx_ep_err("SDP_R[%d] INST Q ISM virt: %p, dma: 0x%x", iq_no, > + (void *)iq->inst_cnt_ism, > + (unsigned int)ism_addr); > + *iq->inst_cnt_ism = 0; > + iq->inst_cnt_ism_prev = 0; > + > return 0; > } > > @@ -290,6 +308,7 @@ otx2_vf_setup_oq_regs(struct otx_ep_device *otx_ep, > uint32_t oq_no) > { > volatile uint64_t reg_val = 0ull; > uint64_t oq_ctl = 0ull; > + uint64_t ism_addr; > int loop = OTX_EP_BUSY_LOOP_COUNT; > struct otx_ep_droq *droq = otx_ep->droq[oq_no]; > > @@ -351,18 +370,32 @@ otx2_vf_setup_oq_regs(struct otx_ep_device *otx_ep, > uint32_t oq_no) > > otx_ep_dbg("SDP_R[%d]_sent: %x", oq_no, > rte_read32(droq->pkts_sent_reg)); > > - loop = OTX_EP_BUSY_LOOP_COUNT; > + /* Set up ISM registers and structures */ > + ism_addr = (otx_ep->ism_buffer_mz->iova | OTX2_EP_ISM_EN > + | OTX2_EP_ISM_MSIX_DIS) > + + OTX2_EP_OQ_ISM_OFFSET(oq_no); > + oct_ep_write64(ism_addr, (uint8_t *)otx_ep->hw_addr + > + SDP_VF_R_OUT_CNTS_ISM(oq_no)); > + droq->pkts_sent_ism = > + (uint32_t *)((uint8_t *)otx_ep->ism_buffer_mz->addr > + + OTX2_EP_OQ_ISM_OFFSET(oq_no)); > + otx_ep_err("SDP_R[%d] OQ ISM virt: %p, dma: 0x%x", oq_no, > + (void *)droq->pkts_sent_ism, > + (unsigned int)ism_addr); > + *droq->pkts_sent_ism = 0; > + droq->pkts_sent_ism_prev = 0; > + > + loop = SDP_VF_BUSY_LOOP_COUNT; > while (((rte_read32(droq->pkts_sent_reg)) != 0ull) && loop--) { > reg_val = rte_read32(droq->pkts_sent_reg); > rte_write32((uint32_t)reg_val, droq->pkts_sent_reg); > rte_delay_ms(1); > } > - > - if (loop < 0) { > - otx_ep_err("Packets sent register value is not cleared\n"); > + if (loop < 0) > return -EIO; > - } > - otx_ep_dbg("SDP_R[%d]_sent: %x", oq_no, > rte_read32(droq->pkts_sent_reg)); > + otx_ep_dbg("SDP_R[%d]_sent: %x", oq_no, > + rte_read32(droq->pkts_sent_reg)); > + > return 0; > } > > diff --git a/drivers/net/octeon_ep/otx2_ep_vf.h > b/drivers/net/octeon_ep/otx2_ep_vf.h > index 36c0b25dea..7c799475ab 100644 > --- a/drivers/net/octeon_ep/otx2_ep_vf.h > +++ b/drivers/net/octeon_ep/otx2_ep_vf.h > @@ -42,6 +42,7 @@ > #define SDP_VF_R_IN_INT_LEVELS_START (0x10060) > #define SDP_VF_R_IN_PKT_CNT_START (0x10080) > #define SDP_VF_R_IN_BYTE_CNT_START (0x10090) > +#define SDP_VF_R_IN_CNTS_ISM_START (0x10520) > > #define SDP_VF_R_IN_CONTROL(ring) \ > (SDP_VF_R_IN_CONTROL_START + ((ring) * SDP_VF_RING_OFFSET)) > @@ -70,6 +71,9 @@ > #define SDP_VF_R_IN_BYTE_CNT(ring) \ > (SDP_VF_R_IN_BYTE_CNT_START + ((ring) * SDP_VF_RING_OFFSET)) > > +#define SDP_VF_R_IN_CNTS_ISM(ring) \ > + (SDP_VF_R_IN_CNTS_ISM_START + (SDP_VF_RING_OFFSET * (ring))) > + > /* SDP VF OQ Registers */ > #define SDP_VF_R_OUT_CNTS_START (0x10100) > #define SDP_VF_R_OUT_INT_LEVELS_START (0x10110) > @@ -80,6 +84,7 @@ > #define SDP_VF_R_OUT_ENABLE_START (0x10160) > #define SDP_VF_R_OUT_PKT_CNT_START (0x10180) > #define SDP_VF_R_OUT_BYTE_CNT_START (0x10190) > +#define SDP_VF_R_OUT_CNTS_ISM_START (0x10510) > > #define SDP_VF_R_OUT_CONTROL(ring) \ > (SDP_VF_R_OUT_CONTROL_START + ((ring) * SDP_VF_RING_OFFSET)) > @@ -108,6 +113,9 @@ > #define SDP_VF_R_OUT_BYTE_CNT(ring) \ > (SDP_VF_R_OUT_BYTE_CNT_START + ((ring) * SDP_VF_RING_OFFSET)) > > +#define SDP_VF_R_OUT_CNTS_ISM(ring) \ > + (SDP_VF_R_OUT_CNTS_ISM_START + (SDP_VF_RING_OFFSET * (ring))) > + > /* SDP VF IQ Masks */ > #define SDP_VF_R_IN_CTL_RPVF_MASK (0xF) > #define SDP_VF_R_IN_CTL_RPVF_POS (48) > @@ -143,6 +151,12 @@ struct otx2_ep_instr_64B { > uint64_t exhdr[4]; > }; > > +#define OTX2_EP_IQ_ISM_OFFSET(queue) (RTE_CACHE_LINE_SIZE * (queue) + 4) > +#define OTX2_EP_OQ_ISM_OFFSET(queue) (RTE_CACHE_LINE_SIZE * (queue)) > +#define OTX2_EP_ISM_EN (0x1) > +#define OTX2_EP_ISM_MSIX_DIS (0x2) > +#define OTX2_EP_MAX_RX_PKT_LEN (16384) > + > union out_int_lvl_t { > uint64_t d64; > struct { > diff --git a/drivers/net/octeon_ep/otx_ep_common.h > b/drivers/net/octeon_ep/otx_ep_common.h > index a3260d5243..76528ed49d 100644 > --- a/drivers/net/octeon_ep/otx_ep_common.h > +++ b/drivers/net/octeon_ep/otx_ep_common.h > @@ -185,6 +185,9 @@ struct otx_ep_instr_queue { > */ > uint32_t flush_index; > > + /* Free-running/wrapping instruction counter for IQ. */ > + uint32_t inst_cnt; > + > /* This keeps track of the instructions pending in this queue. */ > uint64_t instr_pending; > > @@ -211,6 +214,12 @@ struct otx_ep_instr_queue { > > /* Memory zone */ > const struct rte_memzone *iq_mz; > + > + /* Location in memory updated by SDP ISM */ > + uint32_t *inst_cnt_ism; > + > + /* track inst count locally to consolidate HW counter updates */ > + uint32_t inst_cnt_ism_prev; > }; > > /** Descriptor format. > @@ -355,6 +364,10 @@ struct otx_ep_droq { > const struct rte_memzone *desc_ring_mz; > > const struct rte_memzone *info_mz; > + > + /* Pointer to host memory copy of output packet count, set by ISM */ > + uint32_t *pkts_sent_ism; > + uint32_t pkts_sent_ism_prev; > }; > #define OTX_EP_DROQ_SIZE (sizeof(struct otx_ep_droq)) > > @@ -459,6 +472,9 @@ struct otx_ep_device { > uint64_t rx_offloads; > > uint64_t tx_offloads; > + > + /* DMA buffer for SDP ISM messages */ > + const struct rte_memzone *ism_buffer_mz; > }; > > int otx_ep_setup_iqs(struct otx_ep_device *otx_ep, uint32_t iq_no, > diff --git a/drivers/net/octeon_ep/otx_ep_ethdev.c > b/drivers/net/octeon_ep/otx_ep_ethdev.c > index 5677a2d6a6..30a7a450fb 100644 > --- a/drivers/net/octeon_ep/otx_ep_ethdev.c > +++ b/drivers/net/octeon_ep/otx_ep_ethdev.c > @@ -2,6 +2,7 @@ > * Copyright(C) 2021 Marvell. > */ > > +#include <inttypes.h> > #include <ethdev_pci.h> > > #include "otx_ep_common.h" > @@ -90,6 +91,32 @@ otx_ep_dev_stop(struct rte_eth_dev *eth_dev) > return 0; > } > > +/* > + * We only need 2 uint32_t locations per IOQ, but separate these so > + * each IOQ has the variables on its own cache line. > + */ > +#define OTX_EP_ISM_BUFFER_SIZE (OTX_EP_MAX_IOQS_PER_VF * RTE_CACHE_LINE_SIZE) > +static int > +otx_ep_ism_setup(struct otx_ep_device *otx_epvf) > +{ > + otx_epvf->ism_buffer_mz = > + rte_eth_dma_zone_reserve(otx_epvf->eth_dev, "ism", > + 0, OTX_EP_ISM_BUFFER_SIZE, > + OTX_EP_PCI_RING_ALIGN, 0); > + > + /* Same DMA buffer is shared by OQ and IQ, clear it at start */ > + memset(otx_epvf->ism_buffer_mz->addr, 0, OTX_EP_ISM_BUFFER_SIZE); > + if (otx_epvf->ism_buffer_mz == NULL) { > + otx_ep_err("Failed to allocate ISM buffer\n"); > + return(-1); > + } > + otx_ep_dbg("ISM: virt: 0x%p, dma: 0x%" PRIX64, > + (void *)otx_epvf->ism_buffer_mz->addr, > + otx_epvf->ism_buffer_mz->iova); > + > + return 0; > +} > + > static int > otx_ep_chip_specific_setup(struct otx_ep_device *otx_epvf) > { > @@ -110,6 +137,8 @@ otx_ep_chip_specific_setup(struct otx_ep_device *otx_epvf) > otx_epvf->chip_id = dev_id; > ret = otx2_ep_vf_setup_device(otx_epvf); > otx_epvf->fn_list.disable_io_queues(otx_epvf); > + if (otx_ep_ism_setup(otx_epvf)) > + ret = -EINVAL; > break; > case PCI_DEVID_CN10KA_EP_NET_VF: > case PCI_DEVID_CN10KB_EP_NET_VF: > @@ -118,6 +147,8 @@ otx_ep_chip_specific_setup(struct otx_ep_device *otx_epvf) > otx_epvf->chip_id = dev_id; > ret = cnxk_ep_vf_setup_device(otx_epvf); > otx_epvf->fn_list.disable_io_queues(otx_epvf); > + if (otx_ep_ism_setup(otx_epvf)) > + ret = -EINVAL; > break; > default: > otx_ep_err("Unsupported device\n"); > @@ -434,6 +465,11 @@ otx_ep_dev_close(struct rte_eth_dev *eth_dev) > } > otx_ep_dbg("Num IQs:%d freed\n", otx_epvf->nb_tx_queues); > > + if (rte_eth_dma_zone_free(eth_dev, "ism", 0)) { > + otx_ep_err("Failed to delete ISM buffer\n"); > + return -EINVAL; > + } > + > return 0; > } > > diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.c > b/drivers/net/octeon_ep/otx_ep_rxtx.c > index 9712e6cce6..c4153bd583 100644 > --- a/drivers/net/octeon_ep/otx_ep_rxtx.c > +++ b/drivers/net/octeon_ep/otx_ep_rxtx.c > @@ -20,6 +20,7 @@ > #define OTX_EP_INFO_SIZE 8 > #define OTX_EP_FSZ_FS0 0 > #define DROQ_REFILL_THRESHOLD 16 > +#define OTX2_SDP_REQUEST_ISM (0x1ULL << 63) > > static void > otx_ep_dmazone_free(const struct rte_memzone *mz) > @@ -412,15 +413,32 @@ otx_ep_iqreq_add(struct otx_ep_instr_queue *iq, void > *buf, > static uint32_t > otx_vf_update_read_index(struct otx_ep_instr_queue *iq) > { > - uint32_t new_idx = rte_read32(iq->inst_cnt_reg); > - if (unlikely(new_idx == 0xFFFFFFFFU)) > - rte_write32(new_idx, iq->inst_cnt_reg); > + uint32_t val; > + > + /* > + * Batch subtractions from the HW counter to reduce PCIe traffic > + * This adds an extra local variable, but almost halves the > + * number of PCIe writes. > + */ > + val = *iq->inst_cnt_ism; > + iq->inst_cnt += val - iq->inst_cnt_ism_prev; > + iq->inst_cnt_ism_prev = val; > + > + if (val > (uint32_t)(1 << 31)) { > + /* > + * Only subtract the packet count in the HW counter > + * when count above halfway to saturation. > + */ > + rte_write32(val, iq->inst_cnt_reg); > + *iq->inst_cnt_ism = 0; > + iq->inst_cnt_ism_prev = 0; > + } > + rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg); > + > /* Modulo of the new index with the IQ size will give us > * the new index. > */ > - new_idx &= (iq->nb_desc - 1); > - > - return new_idx; > + return iq->inst_cnt & (iq->nb_desc - 1); > } > > static void > @@ -962,14 +980,30 @@ otx_ep_droq_read_packet(struct otx_ep_device *otx_ep, > static inline uint32_t > otx_ep_check_droq_pkts(struct otx_ep_droq *droq) > { > - volatile uint64_t pkt_count; > uint32_t new_pkts; > + uint32_t val; > + > + /* > + * Batch subtractions from the HW counter to reduce PCIe traffic > + * This adds an extra local variable, but almost halves the > + * number of PCIe writes. > + */ > + val = *droq->pkts_sent_ism; > + new_pkts = val - droq->pkts_sent_ism_prev; > + droq->pkts_sent_ism_prev = val; > > - /* Latest available OQ packets */ > - pkt_count = rte_read32(droq->pkts_sent_reg); > - rte_write32(pkt_count, droq->pkts_sent_reg); > - new_pkts = pkt_count; > + if (val > (uint32_t)(1 << 31)) { > + /* > + * Only subtract the packet count in the HW counter > + * when count above halfway to saturation. > + */ > + rte_write32(val, droq->pkts_sent_reg); > + *droq->pkts_sent_ism = 0; > + droq->pkts_sent_ism_prev = 0; > + } > + rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg); > droq->pkts_pending += new_pkts; > + > return new_pkts; > } > > -- > 2.31.1 >