Create macro for adding unroll pragma before for each loop. Batch functions will be contained of several small loops which can be optimized by compilers' loop unrolling pragma.
Signed-off-by: Marvin Liu <yong....@intel.com> diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile index 8623e91c0..87ce1fb27 100644 --- a/lib/librte_vhost/Makefile +++ b/lib/librte_vhost/Makefile @@ -16,6 +16,24 @@ CFLAGS += -I vhost_user CFLAGS += -fno-strict-aliasing LDLIBS += -lpthread +ifeq ($(RTE_TOOLCHAIN), gcc) +ifeq ($(shell test $(GCC_VERSION) -ge 83 && echo 1), 1) +CFLAGS += -DVHOST_GCC_UNROLL_PRAGMA +endif +endif + +ifeq ($(RTE_TOOLCHAIN), clang) +ifeq ($(shell test $(CLANG_MAJOR_VERSION)$(CLANG_MINOR_VERSION) -ge 37 && echo 1), 1) +CFLAGS += -DVHOST_CLANG_UNROLL_PRAGMA +endif +endif + +ifeq ($(RTE_TOOLCHAIN), icc) +ifeq ($(shell test $(ICC_MAJOR_VERSION) -ge 16 && echo 1), 1) +CFLAGS += -DVHOST_ICC_UNROLL_PRAGMA +endif +endif + ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y) LDLIBS += -lnuma endif diff --git a/lib/librte_vhost/meson.build b/lib/librte_vhost/meson.build index cb1123ae3..00435777e 100644 --- a/lib/librte_vhost/meson.build +++ b/lib/librte_vhost/meson.build @@ -8,6 +8,13 @@ endif if has_libnuma == 1 dpdk_conf.set10('RTE_LIBRTE_VHOST_NUMA', true) endif +if (toolchain == 'gcc' and cc.version().version_compare('>=8.3.0')) + cflags += '-DVHOST_GCC_UNROLL_PRAGMA' +elif (toolchain == 'clang' and cc.version().version_compare('>=3.7.0')) + cflags += '-DVHOST_CLANG_UNROLL_PRAGMA' +elif (toolchain == 'icc' and cc.version().version_compare('>=16.0.0')) + cflags += '-DVHOST_ICC_UNROLL_PRAGMA' +endif dpdk_conf.set('RTE_LIBRTE_VHOST_POSTCOPY', cc.has_header('linux/userfaultfd.h')) version = 4 diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index 02b3c91ff..a2b9221e0 100644 --- a/lib/librte_vhost/vhost.h +++ b/lib/librte_vhost/vhost.h @@ -39,6 +39,30 @@ #define VHOST_LOG_CACHE_NR 32 +#define PACKED_BATCH_SIZE (RTE_CACHE_LINE_SIZE / \ + sizeof(struct vring_packed_desc)) +#define PACKED_BATCH_MASK (PACKED_BATCH_SIZE - 1) + +#ifdef VHOST_GCC_UNROLL_PRAGMA +#define vhost_for_each_try_unroll(iter, val, size) _Pragma("GCC unroll 4") \ + for (iter = val; iter < size; iter++) +#endif + +#ifdef VHOST_CLANG_UNROLL_PRAGMA +#define vhost_for_each_try_unroll(iter, val, size) _Pragma("unroll 4") \ + for (iter = val; iter < size; iter++) +#endif + +#ifdef VHOST_ICC_UNROLL_PRAGMA +#define vhost_for_each_try_unroll(iter, val, size) _Pragma("unroll (4)") \ + for (iter = val; iter < size; iter++) +#endif + +#ifndef vhost_for_each_try_unroll +#define vhost_for_each_try_unroll(iter, val, num) \ + for (iter = val; iter < num; iter++) +#endif + /** * Structure contains buffer address, length and descriptor index * from vring to do scatter RX. -- 2.17.1