Author: Harsh Tiwary Date: 2025-09-15T09:40:27Z New Revision: 44061d14fb03ebbd38050c628ed009ae4db2714c
URL: https://github.com/llvm/llvm-project/commit/44061d14fb03ebbd38050c628ed009ae4db2714c DIFF: https://github.com/llvm/llvm-project/commit/44061d14fb03ebbd38050c628ed009ae4db2714c.diff LOG: [Headers][X86] Allow AVX512 masked blend intrinsics to be used in constexpr (#156234) This patch enables AVX-512 masked blend intrinsics to be usable in constant expressions (`constexpr`) across various vector widths (128-bit, 256-bit, 512-bit). It updates the respective Clang headers to include the `__DEFAULT_FN_ATTRS_CONSTEXPR` annotation where applicable, and supplements the change with thorough `TEST_CONSTEXPR` checks in the X86 CodeGen test suite to validate constexpr evaluation. Fixes #155796. --------- Co-authored-by: Simon Pilgrim <llvm-...@redking.me.uk> Added: Modified: clang/lib/Headers/avx512bwintrin.h clang/lib/Headers/avx512fintrin.h clang/lib/Headers/avx512fp16intrin.h clang/lib/Headers/avx512vlbwintrin.h clang/lib/Headers/avx512vlfp16intrin.h clang/lib/Headers/avx512vlintrin.h clang/test/CodeGen/X86/avx512bw-builtins.c clang/test/CodeGen/X86/avx512vl-builtins.c clang/test/CodeGen/X86/avx512vlbw-builtins.c clang/test/CodeGen/X86/avx512vlfp16-builtins.c Removed: ################################################################################ diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 42fce7d89e1bb..77820a2ca041c 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -464,17 +464,15 @@ _mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) { (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_blend_epi8(__mmask64 __U, __m512i __A, __m512i __W) { return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, (__v64qi) __W, (__v64qi) __A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_blend_epi16(__mmask32 __U, __m512i __A, __m512i __W) { return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, (__v32hi) __W, (__v32hi) __A); diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 7ba09039cd826..8ebfb75170e17 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -3209,33 +3209,29 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, /* Vector Blend */ -static __inline __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) -{ +static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) { return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, (__v8df) __W, (__v8df) __A); } -static __inline __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) -{ +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) { return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, (__v16sf) __W, (__v16sf) __A); } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) { return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, (__v8di) __W, (__v8di) __A); } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) { return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, (__v16si) __W, (__v16si) __A); diff --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h index d30b49e552e1b..4bd798129a25d 100644 --- a/clang/lib/Headers/avx512fp16intrin.h +++ b/clang/lib/Headers/avx512fp16intrin.h @@ -3309,7 +3309,7 @@ _mm512_reduce_min_ph(__m512h __V) { return __builtin_ia32_reduce_fmin_ph512(__V); } -static __inline__ __m512h __DEFAULT_FN_ATTRS512 +static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_ph(__mmask32 __U, __m512h __A, __m512h __W) { return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U, (__v32hf)__W, (__v32hf)__A); diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h index 6e3efa7b3562c..f1cd71af05ab5 100644 --- a/clang/lib/Headers/avx512vlbwintrin.h +++ b/clang/lib/Headers/avx512vlbwintrin.h @@ -452,33 +452,29 @@ _mm_maskz_mullo_epi16(__mmask8 __U, __m128i __A, __m128i __B) { (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_blend_epi8(__mmask16 __U, __m128i __A, __m128i __W) { return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, (__v16qi) __W, (__v16qi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_blend_epi8(__mmask32 __U, __m256i __A, __m256i __W) { return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, (__v32qi) __W, (__v32qi) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_blend_epi16(__mmask8 __U, __m128i __A, __m128i __W) { return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, (__v8hi) __W, (__v8hi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_blend_epi16(__mmask16 __U, __m256i __A, __m256i __W) { return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, (__v16hi) __W, (__v16hi) __A); diff --git a/clang/lib/Headers/avx512vlfp16intrin.h b/clang/lib/Headers/avx512vlfp16intrin.h index 8eb31eae6173b..ec766e31c6769 100644 --- a/clang/lib/Headers/avx512vlfp16intrin.h +++ b/clang/lib/Headers/avx512vlfp16intrin.h @@ -1995,14 +1995,13 @@ _mm256_maskz_fmadd_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) { (__v8sf)__C, (__mmask8)__U); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_blend_ph(__mmask8 __U, - __m128h __A, - __m128h __W) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_blend_ph(__mmask8 __U, __m128h __A, __m128h __W) { return (__m128h)__builtin_ia32_selectph_128((__mmask8)__U, (__v8hf)__W, (__v8hf)__A); } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_blend_ph(__mmask16 __U, __m256h __A, __m256h __W) { return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U, (__v16hf)__W, (__v16hf)__A); diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index d85ea23d5ee5a..5f5a54e7284c1 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -1498,57 +1498,57 @@ _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) { (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_blend_epi32(__mmask8 __U, __m128i __A, __m128i __W) { return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, (__v4si) __W, (__v4si) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_blend_epi32(__mmask8 __U, __m256i __A, __m256i __W) { return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, (__v8si) __W, (__v8si) __A); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_blend_pd(__mmask8 __U, __m128d __A, __m128d __W) { return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, (__v2df) __W, (__v2df) __A); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) { +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_blend_pd(__mmask8 __U, __m256d __A, __m256d __W) { return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, (__v4df) __W, (__v4df) __A); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_blend_ps(__mmask8 __U, __m128 __A, __m128 __W) { return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, (__v4sf) __W, (__v4sf) __A); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) { +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_blend_ps(__mmask8 __U, __m256 __A, __m256 __W) { return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, (__v8sf) __W, (__v8sf) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_blend_epi64(__mmask8 __U, __m128i __A, __m128i __W) { return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, (__v2di) __W, (__v2di) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_blend_epi64(__mmask8 __U, __m256i __A, __m256i __W) { return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, (__v4di) __W, (__v4di) __A); diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index 9d605efcbd758..3be708aea8a4d 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -854,11 +854,57 @@ __m512i test_mm512_mask_blend_epi8(__mmask64 __U, __m512i __A, __m512i __W) { // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} return _mm512_mask_blend_epi8(__U,__A,__W); } +TEST_CONSTEXPR(match_v64qi( + _mm512_mask_blend_epi8( + (__mmask64) 0x00000001, + (__m512i)(__v64qi) {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, + (__m512i)(__v64qi){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25, 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25} + ), + 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +)); __m512i test_mm512_mask_blend_epi16(__mmask32 __U, __m512i __A, __m512i __W) { // CHECK-LABEL: test_mm512_mask_blend_epi16 // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_blend_epi16(__U,__A,__W); } +TEST_CONSTEXPR(match_v32hi( + _mm512_mask_blend_epi16( + (__mmask32) 0x00000001, + (__m512i)(__v32hi) {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, + (__m512i)(__v32hi){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25} + ), + 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +)); + +__m512i test_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) { + // CHECK-LABEL: test_mm512_mask_blend_epi32 + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + return _mm512_mask_blend_epi32(__U, __A, __W); +} +TEST_CONSTEXPR(match_v16si( + _mm512_mask_blend_epi32( + (__mmask16) 0x0001, + (__m512i)(__v16si) {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, + (__m512i)(__v16si){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25} + ), + 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +)); + +__m512i test_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) { + // CHECK-LABEL: test_mm512_mask_blend_epi64 + // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + return _mm512_mask_blend_epi64(__U, __A, __W); +} + +TEST_CONSTEXPR(match_v8di( + _mm512_mask_blend_epi64( + (__mmask8)0x01, + (__m512i)(__v8di){2, 2, 2, 2, 2, 2, 2, 2}, + (__m512i)(__v8di){10, 11, 12, 13, 14, 15, 16, 17} + ), + 10, 2, 2, 2, 2, 2, 2, 2 +)); + __m512i test_mm512_abs_epi8(__m512i __A) { // CHECK-LABEL: test_mm512_abs_epi8 // CHECK: [[ABS:%.*]] = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %{{.*}}, i1 false) diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index 9daecd0d9875f..8cef11b12fb93 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -3622,41 +3622,140 @@ __m128i test_mm_mask_blend_epi32(__mmask8 __U, __m128i __A, __m128i __W) { // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_blend_epi32(__U,__A,__W); } +TEST_CONSTEXPR(match_v4si( + _mm_mask_blend_epi32( + (__mmask8)0x01, + (__m128i)(__v4si){2, 2, 2, 2}, + (__m128i)(__v4si){ 10,11,12,13 } + ), + 10, 2, 2, 2 +)); __m256i test_mm256_mask_blend_epi32(__mmask8 __U, __m256i __A, __m256i __W) { // CHECK-LABEL: test_mm256_mask_blend_epi32 // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_blend_epi32(__U,__A,__W); } +TEST_CONSTEXPR(match_v8si( + _mm256_mask_blend_epi32( + (__mmask8)0x01, + (__m256i)(__v8si){2, 2, 2, 2, 2, 2, 2, 2}, + (__m256i)(__v8si){ 10,11,12,13,14,15,16,17 } + ), + 10, 2, 2, 2, 2, 2, 2, 2 +)); __m128d test_mm_mask_blend_pd(__mmask8 __U, __m128d __A, __m128d __W) { // CHECK-LABEL: test_mm_mask_blend_pd // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm_mask_blend_pd(__U,__A,__W); } +TEST_CONSTEXPR(match_m128d( + _mm_mask_blend_pd( + (__mmask8)0x01, + (__m128d)(__v2df){2.0, 2.0}, + (__m128d)(__v2df){10.0, 20.0} + ), + 10.0, 2.0 +)); __m256d test_mm256_mask_blend_pd(__mmask8 __U, __m256d __A, __m256d __W) { // CHECK-LABEL: test_mm256_mask_blend_pd // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return _mm256_mask_blend_pd(__U,__A,__W); } +TEST_CONSTEXPR(match_m256d( + _mm256_mask_blend_pd( + (__mmask8)0x01, + (__m256d)(__v4df){2.0, 2.0, 2.0, 2.0}, + (__m256d)(__v4df){10.0, 11.0, 12.0, 13.0} + ), + 10.0, 2.0, 2.0, 2.0 +)); + +__m512d test_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) { + // CHECK-LABEL: test_mm512_mask_blend_pd + // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + return _mm512_mask_blend_pd(__U, __A, __W); +} + +TEST_CONSTEXPR(match_m512d( + _mm512_mask_blend_pd( + (__mmask8)0x01, + (__m512d)(__v8df){2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0}, + (__m512d)(__v8df){10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0} + ), + 10.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0 +)); + __m128 test_mm_mask_blend_ps(__mmask8 __U, __m128 __A, __m128 __W) { // CHECK-LABEL: test_mm_mask_blend_ps // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm_mask_blend_ps(__U,__A,__W); } +TEST_CONSTEXPR(match_m128( + _mm_mask_blend_ps( + (__mmask8)0x01, + (__m128)(__v4sf){2.0f, 2.0f, 2.0f, 2.0f}, + (__m128)(__v4sf){10.0f, 11.0f, 12.0f, 13.0f} + ), + 10.0f, 2.0f, 2.0f, 2.0f +)); + __m256 test_mm256_mask_blend_ps(__mmask8 __U, __m256 __A, __m256 __W) { // CHECK-LABEL: test_mm256_mask_blend_ps // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_mask_blend_ps(__U,__A,__W); } +TEST_CONSTEXPR(match_m256( + _mm256_mask_blend_ps( + (__mmask8)0x01, + (__m256)(__v8sf){2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f}, + (__m256)(__v8sf){10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f} + ), + 10.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f +)); + +__m512 test_mm512_mask_blend_ps(__mmask8 __U, __m512 __A, __m512 __W) { + // CHECK-LABEL: test_mm512_mask_blend_ps + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + return _mm512_mask_blend_ps(__U, __A, __W); +} +TEST_CONSTEXPR(match_m512( + _mm512_mask_blend_ps( + (__mmask16)0x01, + (__m512)(__v16sf){2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f}, + (__m512)(__v16sf){10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, + 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f} + ), + 10.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f +)); + __m128i test_mm_mask_blend_epi64(__mmask8 __U, __m128i __A, __m128i __W) { // CHECK-LABEL: test_mm_mask_blend_epi64 // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_blend_epi64(__U,__A,__W); } +TEST_CONSTEXPR(match_v2di( + _mm_mask_blend_epi64( + (__mmask8)0x01, + (__m128i)(__v2di){2, 2}, + (__m128i)(__v2di){ 10,11 } + ), + 10, 2 +)); __m256i test_mm256_mask_blend_epi64(__mmask8 __U, __m256i __A, __m256i __W) { // CHECK-LABEL: test_mm256_mask_blend_epi64 // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_blend_epi64(__U,__A,__W); } +TEST_CONSTEXPR(match_v4di( + _mm256_mask_blend_epi64( + (__mmask8)0x01, + (__m256i)(__v4di){2, 2, 2, 2}, + (__m256i)(__v4di){ 10,11,12,13 } + ), + 10, 2, 2, 2 +)); __m128d test_mm_mask_compress_pd(__m128d __W, __mmask8 __U, __m128d __A) { // CHECK-LABEL: test_mm_mask_compress_pd // CHECK: @llvm.x86.avx512.mask.compress diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c index d62235a630fd8..d8f9a3ace6f38 100644 --- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c @@ -893,23 +893,56 @@ __m128i test_mm_mask_blend_epi8(__mmask16 __U, __m128i __A, __m128i __W) { // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} return _mm_mask_blend_epi8(__U,__A,__W); } +TEST_CONSTEXPR(match_v16qi( + _mm_mask_blend_epi8( + (__mmask16)0x0001, + (__m128i)(__v16qi){2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, + (__m128i)(__v16qi){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25 } + ), + 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +)); + __m256i test_mm256_mask_blend_epi8(__mmask32 __U, __m256i __A, __m256i __W) { // CHECK-LABEL: test_mm256_mask_blend_epi8 // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} return _mm256_mask_blend_epi8(__U,__A,__W); } +TEST_CONSTEXPR(match_v32qi( + _mm256_mask_blend_epi8( + (__mmask32) 0x00000001, + (__m256i)(__v32qi) {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, + (__m256i)(__v32qi){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25} + ), + 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +)); __m128i test_mm_mask_blend_epi16(__mmask8 __U, __m128i __A, __m128i __W) { // CHECK-LABEL: test_mm_mask_blend_epi16 // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_blend_epi16(__U,__A,__W); } +TEST_CONSTEXPR(match_v8hi( + _mm_mask_blend_epi16( + (__mmask8)0x01, + (__m128i)(__v8hi){2, 2, 2, 2, 2, 2, 2, 2}, + (__m128i)(__v8hi){ 10,11,12,13,14,15,16,17 } + ), + 10, 2, 2, 2, 2, 2, 2, 2 +)); __m256i test_mm256_mask_blend_epi16(__mmask16 __U, __m256i __A, __m256i __W) { // CHECK-LABEL: test_mm256_mask_blend_epi16 // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_blend_epi16(__U,__A,__W); } +TEST_CONSTEXPR(match_v16hi( + _mm256_mask_blend_epi16( + (__mmask16)0x0001, + (__m256i)(__v16hi){2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, + (__m256i)(__v16hi){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25 } + ), + 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +)); __m128i test_mm_mask_abs_epi8(__m128i __W, __mmask16 __U, __m128i __A) { // CHECK-LABEL: test_mm_mask_abs_epi8 diff --git a/clang/test/CodeGen/X86/avx512vlfp16-builtins.c b/clang/test/CodeGen/X86/avx512vlfp16-builtins.c index fd6ea8fe6056d..badfa301e429d 100644 --- a/clang/test/CodeGen/X86/avx512vlfp16-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlfp16-builtins.c @@ -3016,6 +3016,14 @@ __m128h test_mm_mask_blend_ph(__mmask8 __U, __m128h __A, __m128h __W) { // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}} return _mm_mask_blend_ph(__U, __A, __W); } +TEST_CONSTEXPR(match_m128h( + _mm_mask_blend_ph( + (__mmask8)0x01, + (__m128h)(__v8hf){2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f}, + (__m128h)(__v8hf){10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f} + ), + 10.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f +)); __m256h test_mm256_mask_blend_ph(__mmask16 __U, __m256h __A, __m256h __W) { // CHECK-LABEL: test_mm256_mask_blend_ph @@ -3023,6 +3031,41 @@ __m256h test_mm256_mask_blend_ph(__mmask16 __U, __m256h __A, __m256h __W) { // CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}} return _mm256_mask_blend_ph(__U, __A, __W); } +TEST_CONSTEXPR(match_m256h( + _mm256_mask_blend_ph( + (__mmask16)0x0001, + (__m256h)(__v16hf){2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f}, + (__m256h)(__v16hf){10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, + 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f} + ), + 10.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f +)); + +__m512h test_mm512_mask_blend_ph(__mmask32 __U, __m512h __A, __m512h __W) { + // CHECK-LABEL: test_mm512_mask_blend_ph + // CHECK: %{{.*}} = bitcast i32 %{{.*}} to <32 x i1> + // CHECK: %{{.*}} = select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}} + return _mm512_mask_blend_ph(__U, __A, __W); +} +TEST_CONSTEXPR(match_m512h( + _mm512_mask_blend_ph( + (__mmask32)0x00000001, + (__m512h)(__v32hf){2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f}, + (__m512h)(__v32hf){10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, + 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, + 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 32.0f, 33.0f, + 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, 41.0f} + ), + 10.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f +)); __m128h test_mm_permutex2var_ph(__m128h __A, __m128i __I, __m128h __B) { // CHECK-LABEL: test_mm_permutex2var_ph _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits