Hi Vladimir,

On Tue, Oct 8, 2024 at 7:16 PM Vladimir Medvedkin
<vladimir.medved...@intel.com> wrote:
> diff --git a/lib/fib/dir24_8_avx512.c b/lib/fib/dir24_8_avx512.c
> index 43dba28cfb..edd802abe4 100644
> --- a/lib/fib/dir24_8_avx512.c
> +++ b/lib/fib/dir24_8_avx512.c
> @@ -10,7 +10,7 @@
>
>  static __rte_always_inline void
>  dir24_8_vec_lookup_x16(void *p, const uint32_t *ips,
> -       uint64_t *next_hops, int size)
> +       uint64_t *next_hops, int size, bool be_addr)
>  {
>         struct dir24_8_tbl *dp = (struct dir24_8_tbl *)p;
>         __mmask16 msk_ext;
> @@ -28,6 +28,16 @@ dir24_8_vec_lookup_x16(void *p, const uint32_t *ips,
>                 res_msk = _mm512_set1_epi32(UINT16_MAX);
>
>         ip_vec = _mm512_loadu_si512(ips);
> +       if (be_addr) {
> +               const __m512i bswap32 = _mm512_set_epi8(

Some toolchains do not like _mm512_set_epi8 (reported by the CI).


> +                       12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3,
> +                       12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3,
> +                       12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3,
> +                       12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3
> +               );
> +               ip_vec = _mm512_shuffle_epi8(ip_vec, bswap32);
> +       }
> +
>         /* mask 24 most significant bits */
>         idxes = _mm512_srli_epi32(ip_vec, 8);
>

[snip]

> diff --git a/lib/fib/meson.build b/lib/fib/meson.build
> index 6795f41a0a..8c03496cdc 100644
> --- a/lib/fib/meson.build
> +++ b/lib/fib/meson.build
> @@ -25,40 +25,28 @@ if dpdk_conf.has('RTE_ARCH_X86_64') and binutils_ok
>      # linked into main lib.
>
>      # check if all required flags already enabled (variant a).
> -    acl_avx512_flags = ['__AVX512F__','__AVX512DQ__']
> -    acl_avx512_on = true
> -    foreach f:acl_avx512_flags
> +    fib_avx512_flags = ['__AVX512F__','__AVX512DQ__', '__AVX512BW__']
> +    fib_avx512_on = true
> +    foreach f:fib_avx512_flags
>          if cc.get_define(f, args: machine_args) == ''
> -            acl_avx512_on = false
> +            fib_avx512_on = false
>          endif
>      endforeach

Please reuse the common checks recently merged, see for example:
https://git.dpdk.org/dpdk/diff/drivers/event/dlb2/meson.build?id=ef7a4025cd714189dc333bb19ea60c2abdeffb7d


>
> -    if acl_avx512_on == true
> -        cflags += ['-DCC_DIR24_8_AVX512_SUPPORT']
> -        sources += files('dir24_8_avx512.c')
> -        # TRIE AVX512 implementation uses avx512bw intrinsics along with
> -        # avx512f and avx512dq
> -        if cc.get_define('__AVX512BW__', args: machine_args) != ''
> -            cflags += ['-DCC_TRIE_AVX512_SUPPORT']
> -            sources += files('trie_avx512.c')
> -        endif
> -    elif cc.has_multi_arguments('-mavx512f', '-mavx512dq')
> +    if fib_avx512_on == true
> +        cflags += ['-DCC_DIR24_8_AVX512_SUPPORT', '-DCC_TRIE_AVX512_SUPPORT']

Nit: now that both dir24_8 and trie share the same requirement, can we
go with a simple CC_AVX512_SUPPORT?


-- 
David Marchand

Reply via email to