I didn't find the patch in my gmail box, so I'll reply there.

>@@ -268,6 +269,7 @@ enum processor_features
>   FEATURE_USER_MSR,
>   FEATURE_AVX10_1 = 114,
>   FEATURE_AVX10_2 = 116,
>+  FEATURE_AVX512BMM,
>   FEATURE_AMX_AVX512,
>   FEATURE_AMX_TF32,

The new added feature should be inserted at the end(before CPU_FEATURE_MAX)

+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_vbitrevb_epi8 (__mmask64 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_vbitrevb512_mask ((__v64qi) __A,
+    (__v64qi)
+    _mm512_setzero_epi8 (),
+    (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_vbitrevb_epi8 (__m512i __A)
+{
+  return (__m512i) __builtin_ia32_vbitrevb512_mask ((__v64qi) __A,
+    (__v64qi)
+    _mm512_undefined_epi8 (),

You can directly use _mm512_undefined_epi32 and _mm512_setzero_epi32,
and the definition of _mm512_setzero_epi8/_mm512_undefined_epi8 is not
needed.


extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm128_maskz_vbitrevb_epi8 (__mmask16 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_vbitrevb128_mask ((__v16qi) __A,
+    (__v16qi)
+    _mm128_setzero_epi8 (),
+    (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm128_vbitrevb_epi8 (__m128i __A)
+{
+  return (__m128i) __builtin_ia32_vbitrevb128_mask ((__v16qi) __A,
+    (__v16qi)
+    _mm128_undefined_epi8 (),
+    (__mmask16) -1);
+}
Similar here.


+# AVX512BMM builtins
+DEF_FUNCTION_TYPE (V16QI, V16QI, UHI)
+DEF_FUNCTION_TYPE (V32QI, V32QI, USI)
+DEF_FUNCTION_TYPE (V64QI, V64QI, UDI)
+

I didn't see any builtin defined with V16QI_FTYPE_V16QI_UHI, so it's redundant?

+    case V16QI_FTYPE_V16QI_UHI:
+    case V32QI_FTYPE_V32QI_USI:
+    case V64QI_FTYPE_V64QI_UDI:
+      nargs = 2;
Ditto.

+(define_mode_iterator VI1_AVX512BMM_HI
+  [V32HI (V16HI "TARGET_AVX512VL")])

Better with name of VI2_256_512_AVX512VL


+(define_mode_iterator VI1_AVX512BMM_QI
+  [V64QI (V32QI "TARGET_AVX512VL") (V16QI "TARGET_AVX512VL")])
You can use existed VI1_AVX512VL, no need to define a new mode iterator.


Other parts, the implementation looks ok.
-- 
BR,
Hongtao

Reply via email to