Hi Vladimir, On Tue, Oct 8, 2024 at 7:16 PM Vladimir Medvedkin <vladimir.medved...@intel.com> wrote: > diff --git a/lib/fib/dir24_8_avx512.c b/lib/fib/dir24_8_avx512.c > index 43dba28cfb..edd802abe4 100644 > --- a/lib/fib/dir24_8_avx512.c > +++ b/lib/fib/dir24_8_avx512.c > @@ -10,7 +10,7 @@ > > static __rte_always_inline void > dir24_8_vec_lookup_x16(void *p, const uint32_t *ips, > - uint64_t *next_hops, int size) > + uint64_t *next_hops, int size, bool be_addr) > { > struct dir24_8_tbl *dp = (struct dir24_8_tbl *)p; > __mmask16 msk_ext; > @@ -28,6 +28,16 @@ dir24_8_vec_lookup_x16(void *p, const uint32_t *ips, > res_msk = _mm512_set1_epi32(UINT16_MAX); > > ip_vec = _mm512_loadu_si512(ips); > + if (be_addr) { > + const __m512i bswap32 = _mm512_set_epi8(
Some toolchains do not like _mm512_set_epi8 (reported by the CI). > + 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3, > + 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3, > + 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3, > + 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3 > + ); > + ip_vec = _mm512_shuffle_epi8(ip_vec, bswap32); > + } > + > /* mask 24 most significant bits */ > idxes = _mm512_srli_epi32(ip_vec, 8); > [snip] > diff --git a/lib/fib/meson.build b/lib/fib/meson.build > index 6795f41a0a..8c03496cdc 100644 > --- a/lib/fib/meson.build > +++ b/lib/fib/meson.build > @@ -25,40 +25,28 @@ if dpdk_conf.has('RTE_ARCH_X86_64') and binutils_ok > # linked into main lib. > > # check if all required flags already enabled (variant a). > - acl_avx512_flags = ['__AVX512F__','__AVX512DQ__'] > - acl_avx512_on = true > - foreach f:acl_avx512_flags > + fib_avx512_flags = ['__AVX512F__','__AVX512DQ__', '__AVX512BW__'] > + fib_avx512_on = true > + foreach f:fib_avx512_flags > if cc.get_define(f, args: machine_args) == '' > - acl_avx512_on = false > + fib_avx512_on = false > endif > endforeach Please reuse the common checks recently merged, see for example: https://git.dpdk.org/dpdk/diff/drivers/event/dlb2/meson.build?id=ef7a4025cd714189dc333bb19ea60c2abdeffb7d > > - if acl_avx512_on == true > - cflags += ['-DCC_DIR24_8_AVX512_SUPPORT'] > - sources += files('dir24_8_avx512.c') > - # TRIE AVX512 implementation uses avx512bw intrinsics along with > - # avx512f and avx512dq > - if cc.get_define('__AVX512BW__', args: machine_args) != '' > - cflags += ['-DCC_TRIE_AVX512_SUPPORT'] > - sources += files('trie_avx512.c') > - endif > - elif cc.has_multi_arguments('-mavx512f', '-mavx512dq') > + if fib_avx512_on == true > + cflags += ['-DCC_DIR24_8_AVX512_SUPPORT', '-DCC_TRIE_AVX512_SUPPORT'] Nit: now that both dir24_8 and trie share the same requirement, can we go with a simple CC_AVX512_SUPPORT? -- David Marchand