On Tue, Feb 21, 2017 at 03:17:42AM +0000, David Hunt wrote:
> Add an optimised version of the in-flight flow matching algorithm
> using SIMD instructions. This should give up to 1.5x over the scalar
> versions performance.
> 
> Falls back to scalar version if SSE4.2 not available
> 
> Signed-off-by: David Hunt <david.h...@intel.com>
> ---
>  lib/librte_distributor/Makefile                    |   7 ++
>  lib/librte_distributor/rte_distributor.c           |  16 ++-
>  .../rte_distributor_match_generic.c                |  43 ++++++++
>  lib/librte_distributor/rte_distributor_match_sse.c | 113 
> +++++++++++++++++++++
>  lib/librte_distributor/rte_distributor_private.h   |   5 +
>  5 files changed, 182 insertions(+), 2 deletions(-)
>  create mode 100644 lib/librte_distributor/rte_distributor_match_generic.c
>  create mode 100644 lib/librte_distributor/rte_distributor_match_sse.c
> 
> diff --git a/lib/librte_distributor/Makefile b/lib/librte_distributor/Makefile
> index 276695a..5b599c6 100644
> --- a/lib/librte_distributor/Makefile
> +++ b/lib/librte_distributor/Makefile
> @@ -44,6 +44,13 @@ LIBABIVER := 1
>  # all source are stored in SRCS-y
>  SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) := rte_distributor_v20.c
>  SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += rte_distributor.c
> +ifeq ($(CONFIG_RTE_ARCH_X86),y)
> +SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += rte_distributor_match_sse.c
> +CFLAGS_rte_distributor_match_sse.o += -msse4.2
> +else
> +SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += rte_distributor_match_generic.c
> +endif
> +
>  
>  # install this header file
>  SYMLINK-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR)-include := rte_distributor_v20.h
> diff --git a/lib/librte_distributor/rte_distributor.c 
> b/lib/librte_distributor/rte_distributor.c
> index ae8d508..b8e171c 100644
> --- a/lib/librte_distributor/rte_distributor.c
> +++ b/lib/librte_distributor/rte_distributor.c
> @@ -392,7 +392,13 @@ rte_distributor_process(struct rte_distributor *d,
>               for (; i < RTE_DIST_BURST_SIZE; i++)
>                       flows[i] = 0;
>  
> -             find_match_scalar(d, &flows[0], &matches[0]);
> +             switch (d->dist_match_fn) {
> +             case RTE_DIST_MATCH_VECTOR:
> +                     find_match_vec(d, &flows[0], &matches[0]);
> +                     break;
> +             default:
> +                     find_match_scalar(d, &flows[0], &matches[0]);
> +             }
>  
>               /*
>                * Matches array now contain the intended worker ID (+1) of
> @@ -608,7 +614,13 @@ rte_distributor_create(const char *name,
>       snprintf(d->name, sizeof(d->name), "%s", name);
>       d->num_workers = num_workers;
>       d->alg_type = alg_type;
> -     d->dist_match_fn = RTE_DIST_MATCH_SCALAR;
> +
> +#if defined(RTE_ARCH_X86)
> +     if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_2)) {
> +             d->dist_match_fn = RTE_DIST_MATCH_VECTOR;
> +     } else

Minor nit: you can remove the braces here.

/Bruce

Reply via email to