Disable software pre-fetch actions on Skylake and later platforms.
Hardware can fetch needed data for vhost, additional software pre-fetch
will impact performance.

Signed-off-by: Marvin Liu <yong....@intel.com>

diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index 30839a001..5f3b42e56 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -16,6 +16,12 @@ CFLAGS += -I vhost_user
 CFLAGS += -fno-strict-aliasing
 LDLIBS += -lpthread
 
+AVX512_SUPPORT=$(shell $(CC) -march=native -dM -E - </dev/null |grep AVX512F)
+
+ifneq ($(AVX512_SUPPORT),)
+CFLAGS += -DDISABLE_SWPREFETCH
+endif
+
 ifeq ($(RTE_TOOLCHAIN), gcc)
 ifeq ($(shell test $(GCC_VERSION) -ge 83 && echo 1), 1)
 CFLAGS += -DSUPPORT_GCC_UNROLL_PRAGMA
diff --git a/lib/librte_vhost/meson.build b/lib/librte_vhost/meson.build
index ddf0ee579..5c6f0c0b4 100644
--- a/lib/librte_vhost/meson.build
+++ b/lib/librte_vhost/meson.build
@@ -15,6 +15,10 @@ elif (toolchain == 'clang' and 
cc.version().version_compare('>=3.7.0'))
 elif (toolchain == 'icc' and cc.version().version_compare('>=16.0.0'))
        cflags += '-DSUPPORT_ICC_UNROLL_PRAGMA'
 endif
+r = run_command(toolchain, '-march=native', '-dM', '-E', '-', '</dev/null', 
'|', 'grep AVX512F')
+if (r.stdout().strip() != '')
+       cflags += '-DDISABLE_SWPREFETCH'
+endif
 dpdk_conf.set('RTE_LIBRTE_VHOST_POSTCOPY',
              cc.has_header('linux/userfaultfd.h'))
 version = 4
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 56c2080fb..046e497c2 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -1075,7 +1075,9 @@ virtio_dev_rx_batch_packed(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
 
        UNROLL_PRAGMA(UNROLL_PRAGMA_PARAM)
        for (i = 0; i < PACKED_BATCH_SIZE; i++) {
+#ifndef DISABLE_SWPREFETCH
                rte_prefetch0((void *)(uintptr_t)desc_addrs[i]);
+#endif
                hdrs[i] = (struct virtio_net_hdr_mrg_rxbuf *)
                                        (uintptr_t)desc_addrs[i];
                lens[i] = pkts[i]->pkt_len + dev->vhost_hlen;
@@ -1144,7 +1146,9 @@ virtio_dev_rx_packed(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
        uint32_t remained = count;
 
        do {
+#ifndef DISABLE_SWPREFETCH
                rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
+#endif
 
                if (remained >= PACKED_BATCH_SIZE) {
                        if (!virtio_dev_rx_batch_packed(dev, vq, pkts)) {
@@ -1790,7 +1794,9 @@ virtio_dev_tx_batch_packed(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
 
        UNROLL_PRAGMA(UNROLL_PRAGMA_PARAM)
        for (i = 0; i < PACKED_BATCH_SIZE; i++) {
+#ifndef DISABLE_SWPREFETCH
                rte_prefetch0((void *)(uintptr_t)desc_addrs[i]);
+#endif
                rte_memcpy(rte_pktmbuf_mtod_offset(pkts[i], void *, 0),
                           (void *)(uintptr_t)(desc_addrs[i] + buf_offset),
                           pkts[i]->pkt_len);
@@ -2046,7 +2052,9 @@ virtio_dev_tx_packed(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
        uint32_t remained = count;
 
        do {
+#ifndef DISABLE_SWPREFETCH
                rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
+#endif
 
                if (remained >= PACKED_BATCH_SIZE) {
                        if (!virtio_dev_tx_batch_packed(dev, vq, mbuf_pool,
-- 
2.17.1

Reply via email to