From: Haochen Jiang <haochen.ji...@intel.com> gcc/ChangeLog:
* config/i386/avx512bwintrin.h: Add evex512 target for 512 bit intrins. --- gcc/config/i386/avx512bwintrin.h | 291 ++++++++++++++++--------------- 1 file changed, 153 insertions(+), 138 deletions(-) diff --git a/gcc/config/i386/avx512bwintrin.h b/gcc/config/i386/avx512bwintrin.h index d1cd549ce18..925bae1457c 100644 --- a/gcc/config/i386/avx512bwintrin.h +++ b/gcc/config/i386/avx512bwintrin.h @@ -34,16 +34,6 @@ #define __DISABLE_AVX512BW__ #endif /* __AVX512BW__ */ -/* Internal data types for implementing the intrinsics. */ -typedef short __v32hi __attribute__ ((__vector_size__ (64))); -typedef short __v32hi_u __attribute__ ((__vector_size__ (64), \ - __may_alias__, __aligned__ (1))); -typedef char __v64qi __attribute__ ((__vector_size__ (64))); -typedef char __v64qi_u __attribute__ ((__vector_size__ (64), \ - __may_alias__, __aligned__ (1))); - -typedef unsigned long long __mmask64; - extern __inline unsigned char __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _ktest_mask32_u8 (__mmask32 __A, __mmask32 __B, unsigned char *__CF) @@ -54,229 +44,292 @@ _ktest_mask32_u8 (__mmask32 __A, __mmask32 __B, unsigned char *__CF) extern __inline unsigned char __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_ktest_mask64_u8 (__mmask64 __A, __mmask64 __B, unsigned char *__CF) +_ktestz_mask32_u8 (__mmask32 __A, __mmask32 __B) { - *__CF = (unsigned char) __builtin_ia32_ktestcdi (__A, __B); - return (unsigned char) __builtin_ia32_ktestzdi (__A, __B); + return (unsigned char) __builtin_ia32_ktestzsi (__A, __B); } extern __inline unsigned char __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_ktestz_mask32_u8 (__mmask32 __A, __mmask32 __B) +_ktestc_mask32_u8 (__mmask32 __A, __mmask32 __B) { - return (unsigned char) __builtin_ia32_ktestzsi (__A, __B); + return (unsigned char) __builtin_ia32_ktestcsi (__A, __B); } extern __inline unsigned char __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_ktestz_mask64_u8 (__mmask64 __A, __mmask64 __B) +_kortest_mask32_u8 (__mmask32 __A, __mmask32 __B, unsigned char *__CF) { - return (unsigned char) __builtin_ia32_ktestzdi (__A, __B); + *__CF = (unsigned char) __builtin_ia32_kortestcsi (__A, __B); + return (unsigned char) __builtin_ia32_kortestzsi (__A, __B); } extern __inline unsigned char __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_ktestc_mask32_u8 (__mmask32 __A, __mmask32 __B) +_kortestz_mask32_u8 (__mmask32 __A, __mmask32 __B) { - return (unsigned char) __builtin_ia32_ktestcsi (__A, __B); + return (unsigned char) __builtin_ia32_kortestzsi (__A, __B); } extern __inline unsigned char __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_ktestc_mask64_u8 (__mmask64 __A, __mmask64 __B) +_kortestc_mask32_u8 (__mmask32 __A, __mmask32 __B) { - return (unsigned char) __builtin_ia32_ktestcdi (__A, __B); + return (unsigned char) __builtin_ia32_kortestcsi (__A, __B); } -extern __inline unsigned char +extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kortest_mask32_u8 (__mmask32 __A, __mmask32 __B, unsigned char *__CF) +_kadd_mask32 (__mmask32 __A, __mmask32 __B) { - *__CF = (unsigned char) __builtin_ia32_kortestcsi (__A, __B); - return (unsigned char) __builtin_ia32_kortestzsi (__A, __B); + return (__mmask32) __builtin_ia32_kaddsi ((__mmask32) __A, (__mmask32) __B); } -extern __inline unsigned char +extern __inline unsigned int __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kortest_mask64_u8 (__mmask64 __A, __mmask64 __B, unsigned char *__CF) +_cvtmask32_u32 (__mmask32 __A) { - *__CF = (unsigned char) __builtin_ia32_kortestcdi (__A, __B); - return (unsigned char) __builtin_ia32_kortestzdi (__A, __B); + return (unsigned int) __builtin_ia32_kmovd ((__mmask32) __A); } -extern __inline unsigned char +extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kortestz_mask32_u8 (__mmask32 __A, __mmask32 __B) +_cvtu32_mask32 (unsigned int __A) { - return (unsigned char) __builtin_ia32_kortestzsi (__A, __B); + return (__mmask32) __builtin_ia32_kmovd ((__mmask32) __A); } -extern __inline unsigned char +extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kortestz_mask64_u8 (__mmask64 __A, __mmask64 __B) +_load_mask32 (__mmask32 *__A) { - return (unsigned char) __builtin_ia32_kortestzdi (__A, __B); + return (__mmask32) __builtin_ia32_kmovd (*__A); } -extern __inline unsigned char +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kortestc_mask32_u8 (__mmask32 __A, __mmask32 __B) +_store_mask32 (__mmask32 *__A, __mmask32 __B) { - return (unsigned char) __builtin_ia32_kortestcsi (__A, __B); + *(__mmask32 *) __A = __builtin_ia32_kmovd (__B); } -extern __inline unsigned char +extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kortestc_mask64_u8 (__mmask64 __A, __mmask64 __B) +_knot_mask32 (__mmask32 __A) { - return (unsigned char) __builtin_ia32_kortestcdi (__A, __B); + return (__mmask32) __builtin_ia32_knotsi ((__mmask32) __A); } extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kadd_mask32 (__mmask32 __A, __mmask32 __B) +_kor_mask32 (__mmask32 __A, __mmask32 __B) { - return (__mmask32) __builtin_ia32_kaddsi ((__mmask32) __A, (__mmask32) __B); + return (__mmask32) __builtin_ia32_korsi ((__mmask32) __A, (__mmask32) __B); } -extern __inline __mmask64 +extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kadd_mask64 (__mmask64 __A, __mmask64 __B) +_kxnor_mask32 (__mmask32 __A, __mmask32 __B) { - return (__mmask64) __builtin_ia32_kadddi ((__mmask64) __A, (__mmask64) __B); + return (__mmask32) __builtin_ia32_kxnorsi ((__mmask32) __A, (__mmask32) __B); } -extern __inline unsigned int +extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_cvtmask32_u32 (__mmask32 __A) +_kxor_mask32 (__mmask32 __A, __mmask32 __B) { - return (unsigned int) __builtin_ia32_kmovd ((__mmask32) __A); + return (__mmask32) __builtin_ia32_kxorsi ((__mmask32) __A, (__mmask32) __B); } -extern __inline unsigned long long +extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_cvtmask64_u64 (__mmask64 __A) +_kand_mask32 (__mmask32 __A, __mmask32 __B) { - return (unsigned long long) __builtin_ia32_kmovq ((__mmask64) __A); + return (__mmask32) __builtin_ia32_kandsi ((__mmask32) __A, (__mmask32) __B); } extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_cvtu32_mask32 (unsigned int __A) +_kandn_mask32 (__mmask32 __A, __mmask32 __B) { - return (__mmask32) __builtin_ia32_kmovd ((__mmask32) __A); + return (__mmask32) __builtin_ia32_kandnsi ((__mmask32) __A, (__mmask32) __B); } -extern __inline __mmask64 +extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_cvtu64_mask64 (unsigned long long __A) +_mm512_kunpackw (__mmask32 __A, __mmask32 __B) { - return (__mmask64) __builtin_ia32_kmovq ((__mmask64) __A); + return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, + (__mmask32) __B); } extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_load_mask32 (__mmask32 *__A) +_kunpackw_mask32 (__mmask16 __A, __mmask16 __B) { - return (__mmask32) __builtin_ia32_kmovd (*__A); + return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, + (__mmask32) __B); } -extern __inline __mmask64 +#if __OPTIMIZE__ +extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_load_mask64 (__mmask64 *__A) +_kshiftli_mask32 (__mmask32 __A, unsigned int __B) { - return (__mmask64) __builtin_ia32_kmovq (*(__mmask64 *) __A); + return (__mmask32) __builtin_ia32_kshiftlisi ((__mmask32) __A, + (__mmask8) __B); } -extern __inline void +extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_store_mask32 (__mmask32 *__A, __mmask32 __B) +_kshiftri_mask32 (__mmask32 __A, unsigned int __B) { - *(__mmask32 *) __A = __builtin_ia32_kmovd (__B); + return (__mmask32) __builtin_ia32_kshiftrisi ((__mmask32) __A, + (__mmask8) __B); } -extern __inline void +#else +#define _kshiftli_mask32(X, Y) \ + ((__mmask32) __builtin_ia32_kshiftlisi ((__mmask32)(X), (__mmask8)(Y))) + +#define _kshiftri_mask32(X, Y) \ + ((__mmask32) __builtin_ia32_kshiftrisi ((__mmask32)(X), (__mmask8)(Y))) + +#endif + +#ifdef __DISABLE_AVX512BW__ +#undef __DISABLE_AVX512BW__ +#pragma GCC pop_options +#endif /* __DISABLE_AVX512BW__ */ + +#if !defined (__AVX512BW__) || !defined (__EVEX512__) +#pragma GCC push_options +#pragma GCC target("avx512bw,evex512") +#define __DISABLE_AVX512BW_512__ +#endif /* __AVX512BW_512__ */ + +/* Internal data types for implementing the intrinsics. */ +typedef short __v32hi __attribute__ ((__vector_size__ (64))); +typedef short __v32hi_u __attribute__ ((__vector_size__ (64), \ + __may_alias__, __aligned__ (1))); +typedef char __v64qi __attribute__ ((__vector_size__ (64))); +typedef char __v64qi_u __attribute__ ((__vector_size__ (64), \ + __may_alias__, __aligned__ (1))); + +typedef unsigned long long __mmask64; + +extern __inline unsigned char __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_store_mask64 (__mmask64 *__A, __mmask64 __B) +_ktest_mask64_u8 (__mmask64 __A, __mmask64 __B, unsigned char *__CF) { - *(__mmask64 *) __A = __builtin_ia32_kmovq (__B); + *__CF = (unsigned char) __builtin_ia32_ktestcdi (__A, __B); + return (unsigned char) __builtin_ia32_ktestzdi (__A, __B); } -extern __inline __mmask32 +extern __inline unsigned char __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_knot_mask32 (__mmask32 __A) +_ktestz_mask64_u8 (__mmask64 __A, __mmask64 __B) { - return (__mmask32) __builtin_ia32_knotsi ((__mmask32) __A); + return (unsigned char) __builtin_ia32_ktestzdi (__A, __B); } -extern __inline __mmask64 +extern __inline unsigned char __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_knot_mask64 (__mmask64 __A) +_ktestc_mask64_u8 (__mmask64 __A, __mmask64 __B) { - return (__mmask64) __builtin_ia32_knotdi ((__mmask64) __A); + return (unsigned char) __builtin_ia32_ktestcdi (__A, __B); } -extern __inline __mmask32 +extern __inline unsigned char __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kor_mask32 (__mmask32 __A, __mmask32 __B) +_kortest_mask64_u8 (__mmask64 __A, __mmask64 __B, unsigned char *__CF) { - return (__mmask32) __builtin_ia32_korsi ((__mmask32) __A, (__mmask32) __B); + *__CF = (unsigned char) __builtin_ia32_kortestcdi (__A, __B); + return (unsigned char) __builtin_ia32_kortestzdi (__A, __B); +} + +extern __inline unsigned char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_kortestz_mask64_u8 (__mmask64 __A, __mmask64 __B) +{ + return (unsigned char) __builtin_ia32_kortestzdi (__A, __B); +} + +extern __inline unsigned char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_kortestc_mask64_u8 (__mmask64 __A, __mmask64 __B) +{ + return (unsigned char) __builtin_ia32_kortestcdi (__A, __B); } extern __inline __mmask64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kor_mask64 (__mmask64 __A, __mmask64 __B) +_kadd_mask64 (__mmask64 __A, __mmask64 __B) { - return (__mmask64) __builtin_ia32_kordi ((__mmask64) __A, (__mmask64) __B); + return (__mmask64) __builtin_ia32_kadddi ((__mmask64) __A, (__mmask64) __B); } -extern __inline __mmask32 +extern __inline unsigned long long __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kxnor_mask32 (__mmask32 __A, __mmask32 __B) +_cvtmask64_u64 (__mmask64 __A) { - return (__mmask32) __builtin_ia32_kxnorsi ((__mmask32) __A, (__mmask32) __B); + return (unsigned long long) __builtin_ia32_kmovq ((__mmask64) __A); } extern __inline __mmask64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kxnor_mask64 (__mmask64 __A, __mmask64 __B) +_cvtu64_mask64 (unsigned long long __A) { - return (__mmask64) __builtin_ia32_kxnordi ((__mmask64) __A, (__mmask64) __B); + return (__mmask64) __builtin_ia32_kmovq ((__mmask64) __A); } -extern __inline __mmask32 +extern __inline __mmask64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kxor_mask32 (__mmask32 __A, __mmask32 __B) +_load_mask64 (__mmask64 *__A) { - return (__mmask32) __builtin_ia32_kxorsi ((__mmask32) __A, (__mmask32) __B); + return (__mmask64) __builtin_ia32_kmovq (*(__mmask64 *) __A); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_store_mask64 (__mmask64 *__A, __mmask64 __B) +{ + *(__mmask64 *) __A = __builtin_ia32_kmovq (__B); } extern __inline __mmask64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kxor_mask64 (__mmask64 __A, __mmask64 __B) +_knot_mask64 (__mmask64 __A) { - return (__mmask64) __builtin_ia32_kxordi ((__mmask64) __A, (__mmask64) __B); + return (__mmask64) __builtin_ia32_knotdi ((__mmask64) __A); } -extern __inline __mmask32 +extern __inline __mmask64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kand_mask32 (__mmask32 __A, __mmask32 __B) +_kor_mask64 (__mmask64 __A, __mmask64 __B) { - return (__mmask32) __builtin_ia32_kandsi ((__mmask32) __A, (__mmask32) __B); + return (__mmask64) __builtin_ia32_kordi ((__mmask64) __A, (__mmask64) __B); } extern __inline __mmask64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kand_mask64 (__mmask64 __A, __mmask64 __B) +_kxnor_mask64 (__mmask64 __A, __mmask64 __B) { - return (__mmask64) __builtin_ia32_kanddi ((__mmask64) __A, (__mmask64) __B); + return (__mmask64) __builtin_ia32_kxnordi ((__mmask64) __A, (__mmask64) __B); } -extern __inline __mmask32 +extern __inline __mmask64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kandn_mask32 (__mmask32 __A, __mmask32 __B) +_kxor_mask64 (__mmask64 __A, __mmask64 __B) { - return (__mmask32) __builtin_ia32_kandnsi ((__mmask32) __A, (__mmask32) __B); + return (__mmask64) __builtin_ia32_kxordi ((__mmask64) __A, (__mmask64) __B); +} + +extern __inline __mmask64 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_kand_mask64 (__mmask64 __A, __mmask64 __B) +{ + return (__mmask64) __builtin_ia32_kanddi ((__mmask64) __A, (__mmask64) __B); } extern __inline __mmask64 @@ -366,22 +419,6 @@ _mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A) (__mmask64) __U); } -extern __inline __mmask32 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_kunpackw (__mmask32 __A, __mmask32 __B) -{ - return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, - (__mmask32) __B); -} - -extern __inline __mmask32 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kunpackw_mask32 (__mmask16 __A, __mmask16 __B) -{ - return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, - (__mmask32) __B); -} - extern __inline __mmask64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_kunpackd (__mmask64 __A, __mmask64 __B) @@ -2776,14 +2813,6 @@ _mm512_mask_packus_epi32 (__m512i __W, __mmask32 __M, __m512i __A, } #ifdef __OPTIMIZE__ -extern __inline __mmask32 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kshiftli_mask32 (__mmask32 __A, unsigned int __B) -{ - return (__mmask32) __builtin_ia32_kshiftlisi ((__mmask32) __A, - (__mmask8) __B); -} - extern __inline __mmask64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _kshiftli_mask64 (__mmask64 __A, unsigned int __B) @@ -2792,14 +2821,6 @@ _kshiftli_mask64 (__mmask64 __A, unsigned int __B) (__mmask8) __B); } -extern __inline __mmask32 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kshiftri_mask32 (__mmask32 __A, unsigned int __B) -{ - return (__mmask32) __builtin_ia32_kshiftrisi ((__mmask32) __A, - (__mmask8) __B); -} - extern __inline __mmask64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _kshiftri_mask64 (__mmask64 __A, unsigned int __B) @@ -3145,15 +3166,9 @@ _mm512_bsrli_epi128 (__m512i __A, const int __N) } #else -#define _kshiftli_mask32(X, Y) \ - ((__mmask32) __builtin_ia32_kshiftlisi ((__mmask32)(X), (__mmask8)(Y))) - #define _kshiftli_mask64(X, Y) \ ((__mmask64) __builtin_ia32_kshiftlidi ((__mmask64)(X), (__mmask8)(Y))) -#define _kshiftri_mask32(X, Y) \ - ((__mmask32) __builtin_ia32_kshiftrisi ((__mmask32)(X), (__mmask8)(Y))) - #define _kshiftri_mask64(X, Y) \ ((__mmask64) __builtin_ia32_kshiftridi ((__mmask64)(X), (__mmask8)(Y))) @@ -3328,9 +3343,9 @@ _mm512_bsrli_epi128 (__m512i __A, const int __N) #endif -#ifdef __DISABLE_AVX512BW__ -#undef __DISABLE_AVX512BW__ +#ifdef __DISABLE_AVX512BW_512__ +#undef __DISABLE_AVX512BW_512__ #pragma GCC pop_options -#endif /* __DISABLE_AVX512BW__ */ +#endif /* __DISABLE_AVX512BW_512__ */ #endif /* _AVX512BWINTRIN_H_INCLUDED */ -- 2.31.1