> When choosing a vector path to take, an extra condition must be
> satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
> path. These checks are added in the check alg helper functions.
> 
> Cc: Konstantin Ananyev <konstantin.anan...@intel.com>
> 
> Signed-off-by: Ciara Power <ciara.po...@intel.com>
> ---
>  lib/librte_acl/rte_acl.c | 27 +++++++++++++++++++++------
>  1 file changed, 21 insertions(+), 6 deletions(-)

I think we need to update PG for ACL (section " Classification methods")
to reflect these changes. 
And probably doxygen comments for rte_acl_set_ctx_classify() too.
 
> diff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c
> index 7c2f60b2d6..4ec6c982c9 100644
> --- a/lib/librte_acl/rte_acl.c
> +++ b/lib/librte_acl/rte_acl.c
> @@ -16,6 +16,8 @@ static struct rte_tailq_elem rte_acl_tailq = {
>  };
>  EAL_REGISTER_TAILQ(rte_acl_tailq)
> 
> +uint16_t max_simd_bitwidth;
> +
>  #ifndef CC_AVX512_SUPPORT
>  /*
>   * If the compiler doesn't support AVX512 instructions,
> @@ -114,9 +116,13 @@ acl_check_alg_arm(enum rte_acl_classify_alg alg)
>  {
>       if (alg == RTE_ACL_CLASSIFY_NEON) {
>  #if defined(RTE_ARCH_ARM64)
> -             return 0;
> +             if (max_simd_bitwidth >= RTE_SIMD_128)

All these are control path functions.
So no point to have local copy of max_simd_bitwidth.
Here and in all other places within that file we can just call
rte_get_max_simd_bitwidth() straightway.

> +                     return 0;
> +             else
> +                     return -ENOTSUP;
>  #elif defined(RTE_ARCH_ARM)
> -             if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON))
> +             if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON) &&
> +                             max_simd_bitwidth >= RTE_SIMD_128)
>                       return 0;
>               return -ENOTSUP;
>  #else
> @@ -136,7 +142,10 @@ acl_check_alg_ppc(enum rte_acl_classify_alg alg)
>  {
>       if (alg == RTE_ACL_CLASSIFY_ALTIVEC) {
>  #if defined(RTE_ARCH_PPC_64)
> -             return 0;
> +             if (max_simd_bitwidth >= RTE_SIMD_128)
> +                     return 0;
> +             else
> +                     return -ENOTSUP;
>  #else
>               return -ENOTSUP;
>  #endif
> @@ -158,7 +167,8 @@ acl_check_alg_x86(enum rte_acl_classify_alg alg)
>               if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) &&
>                       rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512VL) &&
>                       rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512CD) &&
> -                     rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW))
> +                     rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) &&
> +                     max_simd_bitwidth >= RTE_SIMD_512)


That's not exactly correct, as we have two algs (256 and 512 bit-width) for 
avx512.
So we have to check different max-simd valued for different algorithms.
Something like that:

static int
acl_check_avx512_cpu_flags(void)
{
        return (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) &&
                        rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512VL) &&
                        rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512CD) &&
                        rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW));
 }                      


static int
acl_check_alg_x86(enum rte_acl_classify_alg alg)
{
        if (alg == RTE_ACL_CLASSIFY_AVX512X32) {
#ifdef CC_AVX512_SUPPORT
        if (acl_check_avx512_cpu_flags) != 0 &&
                        rte_get_max_simd_bitwidth() >=  RTE_SIMD_512)
                return 0;
#endif
        return -ENOTSUP;

        if (alg == RTE_ACL_CLASSIFY_AVX512X16) {
#ifdef CC_AVX512_SUPPORT
        if (acl_check_avx512_cpu_flags) != 0 &&
                        rte_get_max_simd_bitwidth() >=  RTE_SIMD_256)
                return 0;
#endif
        return -ENOTSUP;

        if (alg == RTE_ACL_CLASSIFY_AVX2) {     
                ....

>                       return 0;
>  #endif
>               return -ENOTSUP;
> @@ -166,7 +176,8 @@ acl_check_alg_x86(enum rte_acl_classify_alg alg)
> 
>       if (alg == RTE_ACL_CLASSIFY_AVX2) {
>  #ifdef CC_AVX2_SUPPORT
> -             if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
> +             if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
> +                             max_simd_bitwidth >= RTE_SIMD_256)
>                       return 0;
>  #endif
>               return -ENOTSUP;
> @@ -174,7 +185,8 @@ acl_check_alg_x86(enum rte_acl_classify_alg alg)
> 
>       if (alg == RTE_ACL_CLASSIFY_SSE) {
>  #ifdef RTE_ARCH_X86
> -             if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_1))
> +             if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_1) &&
> +                             max_simd_bitwidth >= RTE_SIMD_128)
>                       return 0;
>  #endif
>               return -ENOTSUP;
> @@ -406,6 +418,9 @@ rte_acl_create(const struct rte_acl_param *param)
>               TAILQ_INSERT_TAIL(acl_list, te, next);
>       }
> 
> +     if (max_simd_bitwidth == 0)
> +             max_simd_bitwidth = rte_get_max_simd_bitwidth();
> +
>  exit:
>       rte_mcfg_tailq_write_unlock();
>       return ctx;
> --
> 2.22.0

Reply via email to