Claim pmd_supports_disable_iova_as_pa. Remove buf_iova access when RTE_IOVA_AS_PA is not defined.
The patch simply replace buf_iova with buf_addr at IOVA as VA mode. Some SIMD instructions in data path may be over used, further optimization is expected. Signed-off-by: Qi Zhang <qi.z.zh...@intel.com> --- v2: - fix title and commit log - use rte_pktmbuf_iova drivers/common/iavf/meson.build | 1 + drivers/net/ice/ice_rxtx_common_avx.h | 24 ++++++++++++++++++++++++ drivers/net/ice/ice_rxtx_vec_avx2.c | 15 +++++---------- drivers/net/ice/ice_rxtx_vec_avx512.c | 20 ++++++++++---------- drivers/net/ice/ice_rxtx_vec_sse.c | 11 +++++++++-- drivers/net/ice/meson.build | 6 +++--- 6 files changed, 52 insertions(+), 25 deletions(-) diff --git a/drivers/common/iavf/meson.build b/drivers/common/iavf/meson.build index 977652223b..af8a4983e0 100644 --- a/drivers/common/iavf/meson.build +++ b/drivers/common/iavf/meson.build @@ -6,3 +6,4 @@ sources = files('iavf_adminq.c', 'iavf_common.c', 'iavf_impl.c') if cc.has_argument('-Wno-pointer-to-int-cast') cflags += '-Wno-pointer-to-int-cast' endif +pmd_supports_disable_iova_as_pa = true diff --git a/drivers/net/ice/ice_rxtx_common_avx.h b/drivers/net/ice/ice_rxtx_common_avx.h index 81e0db5dd3..e69e23997f 100644 --- a/drivers/net/ice/ice_rxtx_common_avx.h +++ b/drivers/net/ice/ice_rxtx_common_avx.h @@ -54,15 +54,23 @@ ice_rxq_rearm_common(struct ice_rx_queue *rxq, __rte_unused bool avx512) mb0 = rxep[0].mbuf; mb1 = rxep[1].mbuf; +#if RTE_IOVA_AS_PA /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) != offsetof(struct rte_mbuf, buf_addr) + 8); +#endif vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr); vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr); +#if RTE_IOVA_AS_PA /* convert pa to dma_addr hdr/data */ dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0); dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1); +#else + /* convert va to dma_addr hdr/data */ + dma_addr0 = _mm_unpacklo_epi64(vaddr0, vaddr0); + dma_addr1 = _mm_unpacklo_epi64(vaddr1, vaddr1); +#endif /* add headroom to pa values */ dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room); @@ -97,9 +105,11 @@ ice_rxq_rearm_common(struct ice_rx_queue *rxq, __rte_unused bool avx512) mb6 = rxep[6].mbuf; mb7 = rxep[7].mbuf; +#if RTE_IOVA_AS_PA /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) != offsetof(struct rte_mbuf, buf_addr) + 8); +#endif vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr); vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr); vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr); @@ -132,9 +142,15 @@ ice_rxq_rearm_common(struct ice_rx_queue *rxq, __rte_unused bool avx512) _mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5), vaddr6_7, 1); +#if RTE_IOVA_AS_PA /* convert pa to dma_addr hdr/data */ dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3); dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7); +#else + /* convert va to dma_addr hdr/data */ + dma_addr0_3 = _mm512_unpacklo_epi64(vaddr0_3, vaddr0_3); + dma_addr4_7 = _mm512_unpacklo_epi64(vaddr4_7, vaddr4_7); +#endif /* add headroom to pa values */ dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room); @@ -161,9 +177,11 @@ ice_rxq_rearm_common(struct ice_rx_queue *rxq, __rte_unused bool avx512) mb2 = rxep[2].mbuf; mb3 = rxep[3].mbuf; +#if RTE_IOVA_AS_PA /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) != offsetof(struct rte_mbuf, buf_addr) + 8); +#endif vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr); vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr); vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr); @@ -180,9 +198,15 @@ ice_rxq_rearm_common(struct ice_rx_queue *rxq, __rte_unused bool avx512) _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2), vaddr3, 1); +#if RTE_IOVA_AS_PA /* convert pa to dma_addr hdr/data */ dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1); dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3); +#else + /* convert va to dma_addr hdr/data */ + dma_addr0_1 = _mm256_unpacklo_epi64(vaddr0_1, vaddr0_1); + dma_addr2_3 = _mm256_unpacklo_epi64(vaddr2_3, vaddr2_3); +#endif /* add headroom to pa values */ dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room); diff --git a/drivers/net/ice/ice_rxtx_vec_avx2.c b/drivers/net/ice/ice_rxtx_vec_avx2.c index 31d6af42fd..fd13ff18f1 100644 --- a/drivers/net/ice/ice_rxtx_vec_avx2.c +++ b/drivers/net/ice/ice_rxtx_vec_avx2.c @@ -821,8 +821,7 @@ ice_vtx1(volatile struct ice_tx_desc *txdp, if (offload) ice_txd_enable_offload(pkt, &high_qw); - __m128i descriptor = _mm_set_epi64x(high_qw, - pkt->buf_iova + pkt->data_off); + __m128i descriptor = _mm_set_epi64x(high_qw, rte_pktmbuf_iova(pkt)); _mm_store_si128((__m128i *)txdp, descriptor); } @@ -868,16 +867,12 @@ ice_vtx(volatile struct ice_tx_desc *txdp, __m256i desc2_3 = _mm256_set_epi64x - (hi_qw3, - pkt[3]->buf_iova + pkt[3]->data_off, - hi_qw2, - pkt[2]->buf_iova + pkt[2]->data_off); + (hi_qw3, rte_pktmbuf_iova(pkt[3]), + hi_qw2, rte_pktmbuf_iova(pkt[2])); __m256i desc0_1 = _mm256_set_epi64x - (hi_qw1, - pkt[1]->buf_iova + pkt[1]->data_off, - hi_qw0, - pkt[0]->buf_iova + pkt[0]->data_off); + (hi_qw1, rte_pktmbuf_iova(pkt[1]), + hi_qw0, rte_pktmbuf_iova(pkt[0])); _mm256_store_si256((void *)(txdp + 2), desc2_3); _mm256_store_si256((void *)txdp, desc0_1); } diff --git a/drivers/net/ice/ice_rxtx_vec_avx512.c b/drivers/net/ice/ice_rxtx_vec_avx512.c index 5bfd5152df..7e388b7569 100644 --- a/drivers/net/ice/ice_rxtx_vec_avx512.c +++ b/drivers/net/ice/ice_rxtx_vec_avx512.c @@ -56,8 +56,13 @@ ice_rxq_rearm(struct ice_rx_queue *rxq) } } +#if RTE_IOVA_AS_PA const __m512i iova_offsets = _mm512_set1_epi64 (offsetof(struct rte_mbuf, buf_iova)); +#else + const __m512i iova_offsets = _mm512_set1_epi64 + (offsetof(struct rte_mbuf, buf_addr)); +#endif const __m512i headroom = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM); #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC @@ -1092,8 +1097,7 @@ ice_vtx1(volatile struct ice_tx_desc *txdp, if (do_offload) ice_txd_enable_offload(pkt, &high_qw); - __m128i descriptor = _mm_set_epi64x(high_qw, - pkt->buf_iova + pkt->data_off); + __m128i descriptor = _mm_set_epi64x(high_qw, rte_pktmbuf_iova(pkt)); _mm_store_si128((__m128i *)txdp, descriptor); } @@ -1132,14 +1136,10 @@ ice_vtx(volatile struct ice_tx_desc *txdp, struct rte_mbuf **pkt, __m512i desc0_3 = _mm512_set_epi64 - (hi_qw3, - pkt[3]->buf_iova + pkt[3]->data_off, - hi_qw2, - pkt[2]->buf_iova + pkt[2]->data_off, - hi_qw1, - pkt[1]->buf_iova + pkt[1]->data_off, - hi_qw0, - pkt[0]->buf_iova + pkt[0]->data_off); + (hi_qw3, rte_pktmbuf_iova(pkt[3]), + hi_qw2, rte_pktmbuf_iova(pkt[2]), + hi_qw1, rte_pktmbuf_iova(pkt[1]), + hi_qw0, rte_pktmbuf_iova(pkt[0])); _mm512_storeu_si512((void *)txdp, desc0_3); } diff --git a/drivers/net/ice/ice_rxtx_vec_sse.c b/drivers/net/ice/ice_rxtx_vec_sse.c index fd94cedde3..72dfd58308 100644 --- a/drivers/net/ice/ice_rxtx_vec_sse.c +++ b/drivers/net/ice/ice_rxtx_vec_sse.c @@ -68,15 +68,23 @@ ice_rxq_rearm(struct ice_rx_queue *rxq) mb0 = rxep[0].mbuf; mb1 = rxep[1].mbuf; +#if RTE_IOVA_AS_PA /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) != offsetof(struct rte_mbuf, buf_addr) + 8); +#endif vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr); vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr); +#if RTE_IOVA_AS_PA /* convert pa to dma_addr hdr/data */ dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0); dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1); +#else + /* convert va to dma_addr hdr/data */ + dma_addr0 = _mm_unpacklo_epi64(vaddr0, vaddr0); + dma_addr1 = _mm_unpacklo_epi64(vaddr1, vaddr1); +#endif /* add headroom to pa values */ dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room); @@ -671,8 +679,7 @@ ice_vtx1(volatile struct ice_tx_desc *txdp, struct rte_mbuf *pkt, ((uint64_t)flags << ICE_TXD_QW1_CMD_S) | ((uint64_t)pkt->data_len << ICE_TXD_QW1_TX_BUF_SZ_S)); - __m128i descriptor = _mm_set_epi64x(high_qw, - pkt->buf_iova + pkt->data_off); + __m128i descriptor = _mm_set_epi64x(high_qw, rte_pktmbuf_iova(pkt)); _mm_store_si128((__m128i *)txdp, descriptor); } diff --git a/drivers/net/ice/meson.build b/drivers/net/ice/meson.build index 8efa533e0b..123b190f72 100644 --- a/drivers/net/ice/meson.build +++ b/drivers/net/ice/meson.build @@ -20,7 +20,6 @@ testpmd_sources = files('ice_testpmd.c') deps += ['hash', 'net', 'common_iavf'] includes += include_directories('base', '../../common/iavf') -iova_as_pa = dpdk_conf.get('RTE_IOVA_AS_PA') if arch_subdir == 'x86' sources += files('ice_rxtx_vec_sse.c') @@ -35,7 +34,7 @@ if arch_subdir == 'x86' if cc.get_define('__AVX2__', args: machine_args) != '' cflags += ['-DCC_AVX2_SUPPORT'] sources += files('ice_rxtx_vec_avx2.c') - elif iova_as_pa == 1 and cc.has_argument('-mavx2') + elif cc.has_argument('-mavx2') cflags += ['-DCC_AVX2_SUPPORT'] ice_avx2_lib = static_library('ice_avx2_lib', 'ice_rxtx_vec_avx2.c', @@ -56,7 +55,7 @@ if arch_subdir == 'x86' cc.has_argument('-mavx512bw') ) - if iova_as_pa == 1 and (ice_avx512_cpu_support == true or ice_avx512_cc_support == true) + if ice_avx512_cpu_support == true or ice_avx512_cc_support == true cflags += ['-DCC_AVX512_SUPPORT'] avx512_args = [cflags, '-mavx512f', '-mavx512bw'] if cc.has_argument('-march=skylake-avx512') @@ -79,3 +78,4 @@ sources += files( 'ice_dcf_parent.c', 'ice_dcf_sched.c', ) +pmd_supports_disable_iova_as_pa = true -- 2.31.1