On Tue, Feb 21, 2017 at 03:17:42AM +0000, David Hunt wrote: > Add an optimised version of the in-flight flow matching algorithm > using SIMD instructions. This should give up to 1.5x over the scalar > versions performance. > > Falls back to scalar version if SSE4.2 not available > > Signed-off-by: David Hunt <david.h...@intel.com> > --- > lib/librte_distributor/Makefile | 7 ++ > lib/librte_distributor/rte_distributor.c | 16 ++- > .../rte_distributor_match_generic.c | 43 ++++++++ > lib/librte_distributor/rte_distributor_match_sse.c | 113 > +++++++++++++++++++++ > lib/librte_distributor/rte_distributor_private.h | 5 + > 5 files changed, 182 insertions(+), 2 deletions(-) > create mode 100644 lib/librte_distributor/rte_distributor_match_generic.c > create mode 100644 lib/librte_distributor/rte_distributor_match_sse.c > > diff --git a/lib/librte_distributor/Makefile b/lib/librte_distributor/Makefile > index 276695a..5b599c6 100644 > --- a/lib/librte_distributor/Makefile > +++ b/lib/librte_distributor/Makefile > @@ -44,6 +44,13 @@ LIBABIVER := 1 > # all source are stored in SRCS-y > SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) := rte_distributor_v20.c > SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += rte_distributor.c > +ifeq ($(CONFIG_RTE_ARCH_X86),y) > +SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += rte_distributor_match_sse.c > +CFLAGS_rte_distributor_match_sse.o += -msse4.2 > +else > +SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += rte_distributor_match_generic.c > +endif > + > > # install this header file > SYMLINK-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR)-include := rte_distributor_v20.h > diff --git a/lib/librte_distributor/rte_distributor.c > b/lib/librte_distributor/rte_distributor.c > index ae8d508..b8e171c 100644 > --- a/lib/librte_distributor/rte_distributor.c > +++ b/lib/librte_distributor/rte_distributor.c > @@ -392,7 +392,13 @@ rte_distributor_process(struct rte_distributor *d, > for (; i < RTE_DIST_BURST_SIZE; i++) > flows[i] = 0; > > - find_match_scalar(d, &flows[0], &matches[0]); > + switch (d->dist_match_fn) { > + case RTE_DIST_MATCH_VECTOR: > + find_match_vec(d, &flows[0], &matches[0]); > + break; > + default: > + find_match_scalar(d, &flows[0], &matches[0]); > + } > > /* > * Matches array now contain the intended worker ID (+1) of > @@ -608,7 +614,13 @@ rte_distributor_create(const char *name, > snprintf(d->name, sizeof(d->name), "%s", name); > d->num_workers = num_workers; > d->alg_type = alg_type; > - d->dist_match_fn = RTE_DIST_MATCH_SCALAR; > + > +#if defined(RTE_ARCH_X86) > + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_2)) { > + d->dist_match_fn = RTE_DIST_MATCH_VECTOR; > + } else
Minor nit: you can remove the braces here. /Bruce