llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-x86 Author: Chaitanya Koparkar (ckoparkar) <details> <summary>Changes</summary> Fixes #<!-- -->154284 Add constexpr support for the following: _mm_reduce_add_epi8 _mm_reduce_add_epi16 _mm256_reduce_add_epi8 _mm256_reduce_add_epi16 _mm_reduce_mul_epi8 _mm_reduce_mul_epi16 _mm256_reduce_mul_epi8 _mm256_reduce_mul_epi16 _mm_reduce_and_epi8 _mm_reduce_and_epi16 _mm256_reduce_and_epi8 _mm256_reduce_and_epi16 _mm_reduce_or_epi8 _mm_reduce_or_epi16 _mm256_reduce_or_epi8 _mm256_reduce_or_epi16 _mm_mask_reduce_add_epi8 _mm_mask_reduce_add_epi16 _mm256_mask_reduce_add_epi8 _mm256_mask_reduce_add_epi16 _mm_mask_reduce_mul_epi8 _mm_mask_reduce_mul_epi16 _mm256_mask_reduce_mul_epi8 _mm256_mask_reduce_mul_epi16 _mm_mask_reduce_and_epi8 _mm_mask_reduce_and_epi16 _mm256_mask_reduce_and_epi8 _mm256_mask_reduce_and_epi16 _mm_mask_reduce_or_epi8 _mm_mask_reduce_or_epi16 _mm256_mask_reduce_or_epi8 _mm256_mask_reduce_or_epi16 _mm_reduce_max_epi8 _mm_reduce_max_epi16 _mm256_reduce_max_epi8 _mm256_reduce_max_epi16 _mm_reduce_min_epi8 _mm_reduce_min_epi16 _mm256_reduce_min_epi8 _mm256_reduce_min_epi16 _mm_reduce_max_epu8 _mm_reduce_max_epu16 _mm256_reduce_max_epu8 _mm256_reduce_max_epu16 _mm_reduce_min_epu8 _mm_reduce_min_epu16 _mm256_reduce_min_epu8 _mm256_reduce_min_epu16 _mm_mask_reduce_max_epi8 _mm_mask_reduce_max_epi16 _mm256_mask_reduce_max_epi8 _mm256_mask_reduce_max_epi16 _mm_mask_reduce_min_epi8 _mm_mask_reduce_min_epi16 _mm256_mask_reduce_min_epi8 _mm256_mask_reduce_min_epi16 _mm_mask_reduce_max_epu8 _mm_mask_reduce_max_epu16 _mm256_mask_reduce_max_epu8 _mm256_mask_reduce_max_epu16 _mm_mask_reduce_min_epu8 _mm_mask_reduce_min_epu16 _mm256_mask_reduce_min_epu8 _mm256_mask_reduce_min_epu16 --- Patch is 48.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155199.diff 2 Files Affected: - (modified) clang/lib/Headers/avx512vlbwintrin.h (+72-72) - (modified) clang/test/CodeGen/X86/avx512vlbw-reduceIntrin.c (+97) ``````````diff diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h index 2e2052ad1b682..eaf292a0949f5 100644 --- a/clang/lib/Headers/avx512vlbwintrin.h +++ b/clang/lib/Headers/avx512vlbwintrin.h @@ -2197,7 +2197,7 @@ _mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, @@ -2205,7 +2205,7 @@ _mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A) (__v8hi) __W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, @@ -2213,7 +2213,7 @@ _mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A) (__v8hi) _mm_setzero_si128 ()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, @@ -2221,7 +2221,7 @@ _mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A) (__v16hi) __W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, @@ -2229,7 +2229,7 @@ _mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A) (__v16hi) _mm256_setzero_si256 ()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, @@ -2237,7 +2237,7 @@ _mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A) (__v16qi) __W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, @@ -2245,7 +2245,7 @@ _mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A) (__v16qi) _mm_setzero_si128 ()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, @@ -2253,7 +2253,7 @@ _mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A) (__v32qi) __W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mov_epi8 (__mmask32 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, @@ -2812,353 +2812,353 @@ _mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A, (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \ (__v16hi)_mm256_setzero_si256())) -static __inline__ short __DEFAULT_FN_ATTRS128 +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_add_epi16(__m128i __W) { return __builtin_reduce_add((__v8hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS128 +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_mul_epi16(__m128i __W) { return __builtin_reduce_mul((__v8hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS128 +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_and_epi16(__m128i __W) { return __builtin_reduce_and((__v8hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS128 +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_or_epi16(__m128i __W) { return __builtin_reduce_or((__v8hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS128 +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_add_epi16( __mmask8 __M, __m128i __W) { __W = _mm_maskz_mov_epi16(__M, __W); return __builtin_reduce_add((__v8hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS128 +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_mul_epi16( __mmask8 __M, __m128i __W) { __W = _mm_mask_mov_epi16(_mm_set1_epi16(1), __M, __W); return __builtin_reduce_mul((__v8hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS128 +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_and_epi16( __mmask8 __M, __m128i __W) { __W = _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M, __W); return __builtin_reduce_and((__v8hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS128 +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_or_epi16(__mmask8 __M, __m128i __W) { __W = _mm_maskz_mov_epi16(__M, __W); return __builtin_reduce_or((__v8hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS128 +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_max_epi16(__m128i __V) { return __builtin_reduce_max((__v8hi)__V); } -static __inline__ unsigned short __DEFAULT_FN_ATTRS128 +static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_max_epu16(__m128i __V) { return __builtin_reduce_max((__v8hu)__V); } -static __inline__ short __DEFAULT_FN_ATTRS128 +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_min_epi16(__m128i __V) { return __builtin_reduce_min((__v8hi)__V); } -static __inline__ unsigned short __DEFAULT_FN_ATTRS128 +static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_min_epu16(__m128i __V) { return __builtin_reduce_min((__v8hu)__V); } -static __inline__ short __DEFAULT_FN_ATTRS128 +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_max_epi16(__mmask16 __M, __m128i __V) { __V = _mm_mask_mov_epi16(_mm_set1_epi16(-32767-1), __M, __V); return __builtin_reduce_max((__v8hi)__V); } -static __inline__ unsigned short __DEFAULT_FN_ATTRS128 +static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_max_epu16(__mmask16 __M, __m128i __V) { __V = _mm_maskz_mov_epi16(__M, __V); return __builtin_reduce_max((__v8hu)__V); } -static __inline__ short __DEFAULT_FN_ATTRS128 +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_min_epi16(__mmask16 __M, __m128i __V) { __V = _mm_mask_mov_epi16(_mm_set1_epi16(32767), __M, __V); return __builtin_reduce_min((__v8hi)__V); } -static __inline__ unsigned short __DEFAULT_FN_ATTRS128 +static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_min_epu16(__mmask16 __M, __m128i __V) { __V = _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M, __V); return __builtin_reduce_min((__v8hu)__V); } -static __inline__ short __DEFAULT_FN_ATTRS256 +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_add_epi16(__m256i __W) { return __builtin_reduce_add((__v16hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS256 +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_mul_epi16(__m256i __W) { return __builtin_reduce_mul((__v16hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS256 +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_and_epi16(__m256i __W) { return __builtin_reduce_and((__v16hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS256 +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_or_epi16(__m256i __W) { return __builtin_reduce_or((__v16hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS256 +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_add_epi16( __mmask16 __M, __m256i __W) { __W = _mm256_maskz_mov_epi16(__M, __W); return __builtin_reduce_add((__v16hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS256 +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_mul_epi16( __mmask16 __M, __m256i __W) { __W = _mm256_mask_mov_epi16(_mm256_set1_epi16(1), __M, __W); return __builtin_reduce_mul((__v16hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS256 +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_and_epi16( __mmask16 __M, __m256i __W) { __W = _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M, __W); return __builtin_reduce_and((__v16hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS256 +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_or_epi16(__mmask16 __M, __m256i __W) { __W = _mm256_maskz_mov_epi16(__M, __W); return __builtin_reduce_or((__v16hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS256 +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_max_epi16(__m256i __V) { return __builtin_reduce_max((__v16hi)__V); } -static __inline__ unsigned short __DEFAULT_FN_ATTRS256 +static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_max_epu16(__m256i __V) { return __builtin_reduce_max((__v16hu)__V); } -static __inline__ short __DEFAULT_FN_ATTRS256 +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_min_epi16(__m256i __V) { return __builtin_reduce_min((__v16hi)__V); } -static __inline__ unsigned short __DEFAULT_FN_ATTRS256 +static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_min_epu16(__m256i __V) { return __builtin_reduce_min((__v16hu)__V); } -static __inline__ short __DEFAULT_FN_ATTRS256 +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_max_epi16(__mmask16 __M, __m256i __V) { __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(-32767-1), __M, __V); return __builtin_reduce_max((__v16hi)__V); } -static __inline__ unsigned short __DEFAULT_FN_ATTRS256 +static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_max_epu16(__mmask16 __M, __m256i __V) { __V = _mm256_maskz_mov_epi16(__M, __V); return __builtin_reduce_max((__v16hu)__V); } -static __inline__ short __DEFAULT_FN_ATTRS256 +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_min_epi16(__mmask16 __M, __m256i __V) { __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(32767), __M, __V); return __builtin_reduce_min((__v16hi)__V); } -static __inline__ unsigned short __DEFAULT_FN_ATTRS256 +static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_min_epu16(__mmask16 __M, __m256i __V) { __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M, __V); return __builtin_reduce_min((__v16hu)__V); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_add_epi8(__m128i __W) { return __builtin_reduce_add((__v16qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_mul_epi8(__m128i __W) { return __builtin_reduce_mul((__v16qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_and_epi8(__m128i __W) { return __builtin_reduce_and((__v16qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_or_epi8(__m128i __W) { return __builtin_reduce_or((__v16qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_add_epi8(__mmask16 __M, __m128i __W) { __W = _mm_maskz_mov_epi8(__M, __W); return __builtin_reduce_add((__v16qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_mul_epi8(__mmask16 __M, __m128i __W) { __W = _mm_mask_mov_epi8(_mm_set1_epi8(1), __M, __W); return __builtin_reduce_mul((__v16qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_and_epi8(__mmask16 __M, __m128i __W) { __W = _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M, __W); return __builtin_reduce_and((__v16qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_or_epi8(__mmask16 __M, __m128i __W) { __W = _mm_maskz_mov_epi8(__M, __W); return __builtin_reduce_or((__v16qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_max_epi8(__m128i __V) { return __builtin_reduce_max((__v16qs)__V); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS128 +static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_max_epu8(__m128i __V) { return __builtin_reduce_max((__v16qu)__V); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_min_epi8(__m128i __V) { return __builtin_reduce_min((__v16qs)__V); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS128 +static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_min_epu8(__m128i __V) { return __builtin_reduce_min((__v16qu)__V); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_max_epi8(__mmask16 __M, __m128i __V) { __V = _mm_mask_mov_epi8(_mm_set1_epi8(-127-1), __M, __V); return __builtin_reduce_max((__v16qs)__V); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS128 +static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_max_epu8(__mmask16 __M, __m128i __V) { __V = _mm_maskz_mov_epi8(__M, __V); return __builtin_reduce_max((__v16qu)__V); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_min_epi8(__mmask16 __M, __m128i __V) { __V = _mm_mask_mov_epi8(_mm_set1_epi8(127), __M, __V); return __builtin_reduce_min((__v16qs)__V); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS128 +static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_min_epu8(__mmask16 __M, __m128i __V) { __V = _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M, __V); return __builtin_reduce_min((__v16qu)__V); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_add_epi8(__m256i __W) { return __builtin_reduce_add((__v32qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_mul_epi8(__m256i __W) { return __builtin_reduce_mul((__v32qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_and_epi8(__m256i __W) { return __builtin_reduce_and((__v32qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_or_epi8(__m256i __W) { return __builtin_reduce_or((__v32qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_add_epi8(__mmask32 __M, __m256i __W) { __W = _mm256_maskz_mov_epi8(__M, __W); return __builtin_reduce_add((__v32qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_mul_epi8(__mmask32 __M, __m256i __W) { __W = _mm256_mask_mov_epi8(_mm256_set1_epi8(1), __M, __W); return __builtin_reduce_mul((__v32qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_and_epi8(__mmask32 __M, __m256i __W) { __W = _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M, __W); return __builtin_reduce_and((__v32qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_or_epi8(__mmask32 __M, __m256i __W) { __W = _mm256_maskz_mov_epi8(__M, __W); return __builtin_reduce_or((__v32qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_max_epi8(__m256i __V) { return __builtin_reduce_max((__v32qs)__V); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS256 +static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_max_epu8(__m256i __V) { return __builtin_reduce_max((__v32qu)__V); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_min_epi8(__m256i __V) { return __builtin_reduce_min((__v32qs)__V); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS256 +static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_min_epu8(__m256i __V) { return __builtin_reduce_min((__v32qu)__V); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_max_epi8(__mmask32 __M, __m256i __V) { __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(-127-1), __M, __V); return __builtin_reduce_max((__v32qs)__V); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS256 +static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_max_epu8(__mmask32 __M, __m256i __V) { __V = _mm256_maskz_mov_epi8(__M, __V); return __builtin_reduce_max((__v32qu)__V); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_min_epi8(__mmask32 __M, __m256i __V) { __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(127), __M, __V); return __builtin_reduce_min((__v32qs)__V); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS256 +static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_min_epu8(__mmask32 __M, __m256i __V) { __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M, __V); return __builtin_reduce_min((__v32qu)__V); diff --git a/clang/test/CodeGen/X86/avx512vlbw-reduceIntrin.c b/clang/test/CodeGen/X86/avx512vlbw-reduceIntrin.c index faa3b54624a77..8e53db3f45300 100644 --- a/clang/test/CodeGen/X86/avx512vlbw-reduceIntrin.c +++ b/clang/test/CodeGen/X86/avx512vlbw-reduceIntrin.c @@ -8,30 +8,35 @@ // RUN: %clang_cc1 -x c++ -ffreestanding %s -O0 -triple=i386 -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s #include <immintrin.h> +#include "builtin_test_helpers.h" short test_mm_reduce_add_epi16(__m128i __W){ // CHECK-LABEL: test_mm_reduce_add_epi16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %{{.*}}) return _mm_reduce_add_epi16(__W); } +TEST_CONSTEXPR(_mm_reduce_add_epi16((__m128i)(__v8hi){1,2,3,4,5,6,7,8}) == 36); short test_mm_reduce_mul_epi16(__m128i __W){ // CHECK-LABEL: test_mm_reduce_mul_epi16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> %{{.*}}) return _mm_reduce_mul_epi16(__W); } +TEST_CONSTEXPR(_mm_reduce_mul_epi16((__m128i)(__v8hi){1,2,3,1,2,3,1,2}) == 72); short test_mm_reduce_or_epi16(__m128i __W){ // CHECK-LABEL: test_mm_reduce_or_epi16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %{{.*}}) return _mm_reduce_or_epi16(__W); } +TEST_CONSTEXPR(_mm_reduce_or_epi16((__m128i)(__v8hi){1,2,4,8,0,0,0,0}) == 15); short test_mm_reduce_and_epi16(__m128i __W){ // CHECK-LABEL: test_mm_reduce_and_epi16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %{{.*}}) return _mm_reduce_and_epi16(__W); } +TEST_CONSTEXPR(_mm_reduce_and_epi16((__m128i)(__v8hi){1,3,5,7,9,11,13,15}) == 1); short test_mm_mask_reduce_add_epi16(__mmask8 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_add_epi16 @@ -39,6 +44,8 @@ short test_mm_mask_reduce_add_epi16(__mmask8 __M, __m128i __W){ /... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/155199 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits