> When choosing a vector path to take, an extra condition must be > satisfied to ensure the max SIMD bitwidth allows for the CPU enabled > path. These checks are added in the check alg helper functions. > > Cc: Konstantin Ananyev <konstantin.anan...@intel.com> > > Signed-off-by: Ciara Power <ciara.po...@intel.com> > --- > lib/librte_acl/rte_acl.c | 27 +++++++++++++++++++++------ > 1 file changed, 21 insertions(+), 6 deletions(-)
I think we need to update PG for ACL (section " Classification methods") to reflect these changes. And probably doxygen comments for rte_acl_set_ctx_classify() too. > diff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c > index 7c2f60b2d6..4ec6c982c9 100644 > --- a/lib/librte_acl/rte_acl.c > +++ b/lib/librte_acl/rte_acl.c > @@ -16,6 +16,8 @@ static struct rte_tailq_elem rte_acl_tailq = { > }; > EAL_REGISTER_TAILQ(rte_acl_tailq) > > +uint16_t max_simd_bitwidth; > + > #ifndef CC_AVX512_SUPPORT > /* > * If the compiler doesn't support AVX512 instructions, > @@ -114,9 +116,13 @@ acl_check_alg_arm(enum rte_acl_classify_alg alg) > { > if (alg == RTE_ACL_CLASSIFY_NEON) { > #if defined(RTE_ARCH_ARM64) > - return 0; > + if (max_simd_bitwidth >= RTE_SIMD_128) All these are control path functions. So no point to have local copy of max_simd_bitwidth. Here and in all other places within that file we can just call rte_get_max_simd_bitwidth() straightway. > + return 0; > + else > + return -ENOTSUP; > #elif defined(RTE_ARCH_ARM) > - if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) > + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON) && > + max_simd_bitwidth >= RTE_SIMD_128) > return 0; > return -ENOTSUP; > #else > @@ -136,7 +142,10 @@ acl_check_alg_ppc(enum rte_acl_classify_alg alg) > { > if (alg == RTE_ACL_CLASSIFY_ALTIVEC) { > #if defined(RTE_ARCH_PPC_64) > - return 0; > + if (max_simd_bitwidth >= RTE_SIMD_128) > + return 0; > + else > + return -ENOTSUP; > #else > return -ENOTSUP; > #endif > @@ -158,7 +167,8 @@ acl_check_alg_x86(enum rte_acl_classify_alg alg) > if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) && > rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512VL) && > rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512CD) && > - rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW)) > + rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) && > + max_simd_bitwidth >= RTE_SIMD_512) That's not exactly correct, as we have two algs (256 and 512 bit-width) for avx512. So we have to check different max-simd valued for different algorithms. Something like that: static int acl_check_avx512_cpu_flags(void) { return (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) && rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512VL) && rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512CD) && rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW)); } static int acl_check_alg_x86(enum rte_acl_classify_alg alg) { if (alg == RTE_ACL_CLASSIFY_AVX512X32) { #ifdef CC_AVX512_SUPPORT if (acl_check_avx512_cpu_flags) != 0 && rte_get_max_simd_bitwidth() >= RTE_SIMD_512) return 0; #endif return -ENOTSUP; if (alg == RTE_ACL_CLASSIFY_AVX512X16) { #ifdef CC_AVX512_SUPPORT if (acl_check_avx512_cpu_flags) != 0 && rte_get_max_simd_bitwidth() >= RTE_SIMD_256) return 0; #endif return -ENOTSUP; if (alg == RTE_ACL_CLASSIFY_AVX2) { .... > return 0; > #endif > return -ENOTSUP; > @@ -166,7 +176,8 @@ acl_check_alg_x86(enum rte_acl_classify_alg alg) > > if (alg == RTE_ACL_CLASSIFY_AVX2) { > #ifdef CC_AVX2_SUPPORT > - if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) > + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) && > + max_simd_bitwidth >= RTE_SIMD_256) > return 0; > #endif > return -ENOTSUP; > @@ -174,7 +185,8 @@ acl_check_alg_x86(enum rte_acl_classify_alg alg) > > if (alg == RTE_ACL_CLASSIFY_SSE) { > #ifdef RTE_ARCH_X86 > - if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_1)) > + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_1) && > + max_simd_bitwidth >= RTE_SIMD_128) > return 0; > #endif > return -ENOTSUP; > @@ -406,6 +418,9 @@ rte_acl_create(const struct rte_acl_param *param) > TAILQ_INSERT_TAIL(acl_list, te, next); > } > > + if (max_simd_bitwidth == 0) > + max_simd_bitwidth = rte_get_max_simd_bitwidth(); > + > exit: > rte_mcfg_tailq_write_unlock(); > return ctx; > -- > 2.22.0