On Wed, Sep 21, 2022 at 07:26:20PM +0530, Shijith Thotton wrote: > Enabled the flag pmd_iova_as_va in cnxk driver build files as they work > with IOVA as VA. Updated cn9k and cn10k soc build configurations to > enable the IOVA as VA build by default. > > Signed-off-by: Shijith Thotton <sthot...@marvell.com> > --- > config/arm/meson.build | 8 +++- > drivers/common/cnxk/meson.build | 1 + > drivers/crypto/cnxk/cn10k_ipsec_la_ops.h | 4 +- > drivers/crypto/cnxk/cn9k_ipsec_la_ops.h | 2 +- > drivers/crypto/cnxk/meson.build | 2 + > drivers/dma/cnxk/meson.build | 1 + > drivers/event/cnxk/meson.build | 1 + > drivers/mempool/cnxk/meson.build | 1 + > drivers/net/cnxk/cn10k_tx.h | 55 +++++++----------------- > drivers/net/cnxk/cn9k_tx.h | 55 +++++++----------------- > drivers/net/cnxk/cnxk_ethdev.h | 1 - > drivers/net/cnxk/meson.build | 1 + > drivers/raw/cnxk_bphy/meson.build | 1 + > drivers/raw/cnxk_gpio/meson.build | 1 + > 14 files changed, 50 insertions(+), 84 deletions(-) > > diff --git a/config/arm/meson.build b/config/arm/meson.build > index 9f1636e0d5..4e95e8b388 100644 > --- a/config/arm/meson.build > +++ b/config/arm/meson.build > @@ -294,7 +294,8 @@ soc_cn10k = { > 'flags': [ > ['RTE_MAX_LCORE', 24], > ['RTE_MAX_NUMA_NODES', 1], > - ['RTE_MEMPOOL_ALIGN', 128] > + ['RTE_MEMPOOL_ALIGN', 128], > + ['RTE_IOVA_AS_VA', 1] > ], > 'part_number': '0xd49', > 'extra_march_features': ['crypto'], > @@ -370,7 +371,10 @@ soc_cn9k = { > 'description': 'Marvell OCTEON 9', > 'implementer': '0x43', > 'part_number': '0xb2', > - 'numa': false > + 'numa': false, > + 'flags': [ > + ['RTE_IOVA_AS_VA', 1] > + ] > }
I think this could go in a separate patch: "disable IOVA as PA for octeontx2/3" The reason is that this patch clearly breaks the API (m->buf_iova field becomes invalid) and the ABI (mbuf fields are moved) for these architectures. This ABI breakage has to be advertised in the release note. In fact, it should have been advertised before, but I suppose it does not impact general purpose arm distributions, so I guess it is ok. One other thing to highlight: enabling RTE_IOVA_AS_VA means that it disable all drivers that do not have the pmd_iova_as_va flag. Are there use-cases where drivers other than cnxk are used? For instance, is there a PCI bus which is likely to be used by a driver/* ? > > soc_stingray = { > diff --git a/drivers/common/cnxk/meson.build b/drivers/common/cnxk/meson.build > index 6f808271d1..d019cfa8d1 100644 > --- a/drivers/common/cnxk/meson.build > +++ b/drivers/common/cnxk/meson.build > @@ -86,3 +86,4 @@ sources += files('cnxk_telemetry_bphy.c', > ) > > deps += ['bus_pci', 'net', 'telemetry'] > +pmd_iova_as_va = true > diff --git a/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h > b/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h > index 66cfe6ca98..16db14344d 100644 > --- a/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h > +++ b/drivers/crypto/cnxk/cn10k_ipsec_la_ops.h > @@ -85,7 +85,7 @@ process_outb_sa(struct roc_cpt_lf *lf, struct rte_crypto_op > *cop, > > /* Prepare CPT instruction */ > inst->w4.u64 = inst_w4_u64 | rte_pktmbuf_pkt_len(m_src); > - dptr = rte_pktmbuf_iova(m_src); > + dptr = rte_pktmbuf_mtod(m_src, uint64_t); > inst->dptr = dptr; > inst->rptr = dptr; > > @@ -102,7 +102,7 @@ process_inb_sa(struct rte_crypto_op *cop, struct > cn10k_ipsec_sa *sa, > > /* Prepare CPT instruction */ > inst->w4.u64 = sa->inst.w4 | rte_pktmbuf_pkt_len(m_src); > - dptr = rte_pktmbuf_iova(m_src); > + dptr = rte_pktmbuf_mtod(m_src, uint64_t); > inst->dptr = dptr; > inst->rptr = dptr; > > diff --git a/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h > b/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h > index e469596756..8b68e4c728 100644 > --- a/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h > +++ b/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h > @@ -99,7 +99,7 @@ process_inb_sa(struct rte_crypto_op *cop, struct > cn9k_ipsec_sa *sa, > > /* Prepare CPT instruction */ > inst->w4.u64 = sa->inst.w4 | rte_pktmbuf_pkt_len(m_src); > - inst->dptr = inst->rptr = rte_pktmbuf_iova(m_src); > + inst->dptr = inst->rptr = rte_pktmbuf_mtod(m_src, uint64_t); > inst->w7.u64 = sa->inst.w7; > } > #endif /* __CN9K_IPSEC_LA_OPS_H__ */ > diff --git a/drivers/crypto/cnxk/meson.build b/drivers/crypto/cnxk/meson.build > index 23a1cc3aac..764e7bb99a 100644 > --- a/drivers/crypto/cnxk/meson.build > +++ b/drivers/crypto/cnxk/meson.build > @@ -31,3 +31,5 @@ if get_option('buildtype').contains('debug') > else > cflags += [ '-ULA_IPSEC_DEBUG' ] > endif > + > +pmd_iova_as_va = true > diff --git a/drivers/dma/cnxk/meson.build b/drivers/dma/cnxk/meson.build > index d4be4ee860..ef0e3db109 100644 > --- a/drivers/dma/cnxk/meson.build > +++ b/drivers/dma/cnxk/meson.build > @@ -3,3 +3,4 @@ > > deps += ['bus_pci', 'common_cnxk', 'dmadev'] > sources = files('cnxk_dmadev.c') > +pmd_iova_as_va = true > diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build > index b27bae7b12..650d0d4256 100644 > --- a/drivers/event/cnxk/meson.build > +++ b/drivers/event/cnxk/meson.build > @@ -479,3 +479,4 @@ foreach flag: extra_flags > endforeach > > deps += ['bus_pci', 'common_cnxk', 'net_cnxk', 'crypto_cnxk'] > +pmd_iova_as_va = true > diff --git a/drivers/mempool/cnxk/meson.build > b/drivers/mempool/cnxk/meson.build > index d5d1978569..a328176457 100644 > --- a/drivers/mempool/cnxk/meson.build > +++ b/drivers/mempool/cnxk/meson.build > @@ -17,3 +17,4 @@ sources = files( > ) > > deps += ['eal', 'mbuf', 'kvargs', 'bus_pci', 'common_cnxk', 'mempool'] > +pmd_iova_as_va = true > diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h > index ea13866b20..2ef62da132 100644 > --- a/drivers/net/cnxk/cn10k_tx.h > +++ b/drivers/net/cnxk/cn10k_tx.h > @@ -1775,14 +1775,6 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t > *ws, > mbuf2 = (uint64_t *)tx_pkts[2]; > mbuf3 = (uint64_t *)tx_pkts[3]; > > - mbuf0 = (uint64_t *)((uintptr_t)mbuf0 + > - offsetof(struct rte_mbuf, buf_iova)); > - mbuf1 = (uint64_t *)((uintptr_t)mbuf1 + > - offsetof(struct rte_mbuf, buf_iova)); > - mbuf2 = (uint64_t *)((uintptr_t)mbuf2 + > - offsetof(struct rte_mbuf, buf_iova)); > - mbuf3 = (uint64_t *)((uintptr_t)mbuf3 + > - offsetof(struct rte_mbuf, buf_iova)); > /* > * Get mbuf's, olflags, iova, pktlen, dataoff > * dataoff_iovaX.D[0] = iova, > @@ -1790,28 +1782,24 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t > *ws, > * len_olflagsX.D[0] = ol_flags, > * len_olflagsX.D[1](63:32) = mbuf->pkt_len > */ > - dataoff_iova0 = vld1q_u64(mbuf0); > - len_olflags0 = vld1q_u64(mbuf0 + 2); > - dataoff_iova1 = vld1q_u64(mbuf1); > - len_olflags1 = vld1q_u64(mbuf1 + 2); > - dataoff_iova2 = vld1q_u64(mbuf2); > - len_olflags2 = vld1q_u64(mbuf2 + 2); > - dataoff_iova3 = vld1q_u64(mbuf3); > - len_olflags3 = vld1q_u64(mbuf3 + 2); > + dataoff_iova0 = > + vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off, > vld1q_u64(mbuf0), 1); > + len_olflags0 = vld1q_u64(mbuf0 + 3); > + dataoff_iova1 = > + vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off, > vld1q_u64(mbuf1), 1); > + len_olflags1 = vld1q_u64(mbuf1 + 3); > + dataoff_iova2 = > + vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off, > vld1q_u64(mbuf2), 1); > + len_olflags2 = vld1q_u64(mbuf2 + 3); > + dataoff_iova3 = > + vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off, > vld1q_u64(mbuf3), 1); > + len_olflags3 = vld1q_u64(mbuf3 + 3); > > /* Move mbufs to point pool */ > - mbuf0 = (uint64_t *)((uintptr_t)mbuf0 + > - offsetof(struct rte_mbuf, pool) - > - offsetof(struct rte_mbuf, buf_iova)); > - mbuf1 = (uint64_t *)((uintptr_t)mbuf1 + > - offsetof(struct rte_mbuf, pool) - > - offsetof(struct rte_mbuf, buf_iova)); > - mbuf2 = (uint64_t *)((uintptr_t)mbuf2 + > - offsetof(struct rte_mbuf, pool) - > - offsetof(struct rte_mbuf, buf_iova)); > - mbuf3 = (uint64_t *)((uintptr_t)mbuf3 + > - offsetof(struct rte_mbuf, pool) - > - offsetof(struct rte_mbuf, buf_iova)); > + mbuf0 = (uint64_t *)((uintptr_t)mbuf0 + offsetof(struct > rte_mbuf, pool)); > + mbuf1 = (uint64_t *)((uintptr_t)mbuf1 + offsetof(struct > rte_mbuf, pool)); > + mbuf2 = (uint64_t *)((uintptr_t)mbuf2 + offsetof(struct > rte_mbuf, pool)); > + mbuf3 = (uint64_t *)((uintptr_t)mbuf3 + offsetof(struct > rte_mbuf, pool)); > > if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F | > NIX_TX_OFFLOAD_L3_L4_CSUM_F)) { > @@ -1861,17 +1849,6 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t > *ws, > xtmp128 = vzip2q_u64(len_olflags0, len_olflags1); > ytmp128 = vzip2q_u64(len_olflags2, len_olflags3); > > - /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */ > - const uint64x2_t and_mask0 = { > - 0xFFFFFFFFFFFFFFFF, > - 0x000000000000FFFF, > - }; > - > - dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0); > - dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0); > - dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0); > - dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0); > - > /* > * Pick only 16 bits of pktlen preset at bits 63:32 > * and place them at bits 15:0. > diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h > index 6ce81f5c96..f5d99ccb5a 100644 > --- a/drivers/net/cnxk/cn9k_tx.h > +++ b/drivers/net/cnxk/cn9k_tx.h > @@ -1005,14 +1005,6 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct > rte_mbuf **tx_pkts, > mbuf2 = (uint64_t *)tx_pkts[2]; > mbuf3 = (uint64_t *)tx_pkts[3]; > > - mbuf0 = (uint64_t *)((uintptr_t)mbuf0 + > - offsetof(struct rte_mbuf, buf_iova)); > - mbuf1 = (uint64_t *)((uintptr_t)mbuf1 + > - offsetof(struct rte_mbuf, buf_iova)); > - mbuf2 = (uint64_t *)((uintptr_t)mbuf2 + > - offsetof(struct rte_mbuf, buf_iova)); > - mbuf3 = (uint64_t *)((uintptr_t)mbuf3 + > - offsetof(struct rte_mbuf, buf_iova)); > /* > * Get mbuf's, olflags, iova, pktlen, dataoff > * dataoff_iovaX.D[0] = iova, > @@ -1020,28 +1012,24 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct > rte_mbuf **tx_pkts, > * len_olflagsX.D[0] = ol_flags, > * len_olflagsX.D[1](63:32) = mbuf->pkt_len > */ > - dataoff_iova0 = vld1q_u64(mbuf0); > - len_olflags0 = vld1q_u64(mbuf0 + 2); > - dataoff_iova1 = vld1q_u64(mbuf1); > - len_olflags1 = vld1q_u64(mbuf1 + 2); > - dataoff_iova2 = vld1q_u64(mbuf2); > - len_olflags2 = vld1q_u64(mbuf2 + 2); > - dataoff_iova3 = vld1q_u64(mbuf3); > - len_olflags3 = vld1q_u64(mbuf3 + 2); > + dataoff_iova0 = > + vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off, > vld1q_u64(mbuf0), 1); > + len_olflags0 = vld1q_u64(mbuf0 + 3); > + dataoff_iova1 = > + vsetq_lane_u64(((struct rte_mbuf *)mbuf1)->data_off, > vld1q_u64(mbuf1), 1); > + len_olflags1 = vld1q_u64(mbuf1 + 3); > + dataoff_iova2 = > + vsetq_lane_u64(((struct rte_mbuf *)mbuf2)->data_off, > vld1q_u64(mbuf2), 1); > + len_olflags2 = vld1q_u64(mbuf2 + 3); > + dataoff_iova3 = > + vsetq_lane_u64(((struct rte_mbuf *)mbuf3)->data_off, > vld1q_u64(mbuf3), 1); > + len_olflags3 = vld1q_u64(mbuf3 + 3); > > /* Move mbufs to point pool */ > - mbuf0 = (uint64_t *)((uintptr_t)mbuf0 + > - offsetof(struct rte_mbuf, pool) - > - offsetof(struct rte_mbuf, buf_iova)); > - mbuf1 = (uint64_t *)((uintptr_t)mbuf1 + > - offsetof(struct rte_mbuf, pool) - > - offsetof(struct rte_mbuf, buf_iova)); > - mbuf2 = (uint64_t *)((uintptr_t)mbuf2 + > - offsetof(struct rte_mbuf, pool) - > - offsetof(struct rte_mbuf, buf_iova)); > - mbuf3 = (uint64_t *)((uintptr_t)mbuf3 + > - offsetof(struct rte_mbuf, pool) - > - offsetof(struct rte_mbuf, buf_iova)); > + mbuf0 = (uint64_t *)((uintptr_t)mbuf0 + offsetof(struct > rte_mbuf, pool)); > + mbuf1 = (uint64_t *)((uintptr_t)mbuf1 + offsetof(struct > rte_mbuf, pool)); > + mbuf2 = (uint64_t *)((uintptr_t)mbuf2 + offsetof(struct > rte_mbuf, pool)); > + mbuf3 = (uint64_t *)((uintptr_t)mbuf3 + offsetof(struct > rte_mbuf, pool)); > > if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F | > NIX_TX_OFFLOAD_L3_L4_CSUM_F)) { > @@ -1091,17 +1079,6 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct > rte_mbuf **tx_pkts, > xtmp128 = vzip2q_u64(len_olflags0, len_olflags1); > ytmp128 = vzip2q_u64(len_olflags2, len_olflags3); > > - /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */ > - const uint64x2_t and_mask0 = { > - 0xFFFFFFFFFFFFFFFF, > - 0x000000000000FFFF, > - }; > - > - dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0); > - dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0); > - dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0); > - dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0); > - > /* > * Pick only 16 bits of pktlen preset at bits 63:32 > * and place them at bits 15:0. > diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h > index 4cb7c9e90c..abf1e4215f 100644 > --- a/drivers/net/cnxk/cnxk_ethdev.h > +++ b/drivers/net/cnxk/cnxk_ethdev.h > @@ -690,7 +690,6 @@ cnxk_pktmbuf_detach(struct rte_mbuf *m) > > m->priv_size = priv_size; > m->buf_addr = (char *)m + mbuf_size; > - m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size; > m->buf_len = (uint16_t)buf_len; > rte_pktmbuf_reset_headroom(m); > m->data_len = 0; > diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build > index f347e98fce..01489b3a36 100644 > --- a/drivers/net/cnxk/meson.build > +++ b/drivers/net/cnxk/meson.build > @@ -194,3 +194,4 @@ foreach flag: extra_flags > endforeach > > headers = files('rte_pmd_cnxk.h') > +pmd_iova_as_va = true > diff --git a/drivers/raw/cnxk_bphy/meson.build > b/drivers/raw/cnxk_bphy/meson.build > index 14147feaf4..781ed63e05 100644 > --- a/drivers/raw/cnxk_bphy/meson.build > +++ b/drivers/raw/cnxk_bphy/meson.build > @@ -10,3 +10,4 @@ sources = files( > 'cnxk_bphy_irq.c', > ) > headers = files('rte_pmd_bphy.h') > +pmd_iova_as_va = true > diff --git a/drivers/raw/cnxk_gpio/meson.build > b/drivers/raw/cnxk_gpio/meson.build > index a75a5b9084..f9aed173b6 100644 > --- a/drivers/raw/cnxk_gpio/meson.build > +++ b/drivers/raw/cnxk_gpio/meson.build > @@ -9,3 +9,4 @@ sources = files( > 'cnxk_gpio_selftest.c', > ) > headers = files('rte_pmd_cnxk_gpio.h') > +pmd_iova_as_va = true > -- > 2.25.1 >