https://github.com/ckoparkar updated https://github.com/llvm/llvm-project/pull/155199
>From 981fa7db5cd83b9c8b240e6b116a52592a59617a Mon Sep 17 00:00:00 2001 From: Chaitanya Koparkar <ckopar...@gmail.com> Date: Sat, 23 Aug 2025 07:57:38 -0400 Subject: [PATCH] [Headers][X86] Allow AVX512VLBW integer reduction intrinsics to be used in constexpr Add constexpr support for the following: _mm_reduce_add_epi8 _mm_reduce_add_epi16 _mm256_reduce_add_epi8 _mm256_reduce_add_epi16 _mm_reduce_mul_epi8 _mm_reduce_mul_epi16 _mm256_reduce_mul_epi8 _mm256_reduce_mul_epi16 _mm_reduce_and_epi8 _mm_reduce_and_epi16 _mm256_reduce_and_epi8 _mm256_reduce_and_epi16 _mm_reduce_or_epi8 _mm_reduce_or_epi16 _mm256_reduce_or_epi8 _mm256_reduce_or_epi16 _mm_mask_reduce_add_epi8 _mm_mask_reduce_add_epi16 _mm256_mask_reduce_add_epi8 _mm256_mask_reduce_add_epi16 _mm_mask_reduce_mul_epi8 _mm_mask_reduce_mul_epi16 _mm256_mask_reduce_mul_epi8 _mm256_mask_reduce_mul_epi16 _mm_mask_reduce_and_epi8 _mm_mask_reduce_and_epi16 _mm256_mask_reduce_and_epi8 _mm256_mask_reduce_and_epi16 _mm_mask_reduce_or_epi8 _mm_mask_reduce_or_epi16 _mm256_mask_reduce_or_epi8 _mm256_mask_reduce_or_epi16 _mm_reduce_max_epi8 _mm_reduce_max_epi16 _mm256_reduce_max_epi8 _mm256_reduce_max_epi16 _mm_reduce_min_epi8 _mm_reduce_min_epi16 _mm256_reduce_min_epi8 _mm256_reduce_min_epi16 _mm_reduce_max_epu8 _mm_reduce_max_epu16 _mm256_reduce_max_epu8 _mm256_reduce_max_epu16 _mm_reduce_min_epu8 _mm_reduce_min_epu16 _mm256_reduce_min_epu8 _mm256_reduce_min_epu16 _mm_mask_reduce_max_epi8 _mm_mask_reduce_max_epi16 _mm256_mask_reduce_max_epi8 _mm256_mask_reduce_max_epi16 _mm_mask_reduce_min_epi8 _mm_mask_reduce_min_epi16 _mm256_mask_reduce_min_epi8 _mm256_mask_reduce_min_epi16 _mm_mask_reduce_max_epu8 _mm_mask_reduce_max_epu16 _mm256_mask_reduce_max_epu8 _mm256_mask_reduce_max_epu16 _mm_mask_reduce_min_epu8 _mm_mask_reduce_min_epu16 _mm256_mask_reduce_min_epu8 _mm256_mask_reduce_min_epu16 --- clang/lib/Headers/avx512vlbwintrin.h | 183 +++++++++--------- .../CodeGen/X86/avx512vlbw-reduceIntrin.c | 97 ++++++++++ 2 files changed, 184 insertions(+), 96 deletions(-) diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h index 2e2052ad1b682..ec1170c28039b 100644 --- a/clang/lib/Headers/avx512vlbwintrin.h +++ b/clang/lib/Headers/avx512vlbwintrin.h @@ -2197,71 +2197,62 @@ _mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_mov_epi16(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, (__v8hi) __A, (__v8hi) __W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_mov_epi16(__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, (__v8hi) __A, (__v8hi) _mm_setzero_si128 ()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_mov_epi16(__m256i __W, __mmask16 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, (__v16hi) __A, (__v16hi) __W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_mov_epi16(__mmask16 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, (__v16hi) __A, (__v16hi) _mm256_setzero_si256 ()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_mov_epi8(__m128i __W, __mmask16 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, (__v16qi) __A, (__v16qi) __W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_mov_epi8(__mmask16 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, (__v16qi) __A, (__v16qi) _mm_setzero_si128 ()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_mov_epi8(__m256i __W, __mmask32 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, (__v32qi) __A, (__v32qi) __W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_mov_epi8 (__mmask32 __U, __m256i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_mov_epi8(__mmask32 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, (__v32qi) __A, (__v32qi) _mm256_setzero_si256 ()); } - static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A) { @@ -2812,353 +2803,353 @@ _mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A, (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \ (__v16hi)_mm256_setzero_si256())) -static __inline__ short __DEFAULT_FN_ATTRS128 -_mm_reduce_add_epi16(__m128i __W) { +static __inline__ short + __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_add_epi16(__m128i __W) { return __builtin_reduce_add((__v8hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS128 +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_mul_epi16(__m128i __W) { return __builtin_reduce_mul((__v8hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS128 +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_and_epi16(__m128i __W) { return __builtin_reduce_and((__v8hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS128 +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_or_epi16(__m128i __W) { return __builtin_reduce_or((__v8hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS128 -_mm_mask_reduce_add_epi16( __mmask8 __M, __m128i __W) { +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_reduce_add_epi16(__mmask8 __M, __m128i __W) { __W = _mm_maskz_mov_epi16(__M, __W); return __builtin_reduce_add((__v8hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS128 -_mm_mask_reduce_mul_epi16( __mmask8 __M, __m128i __W) { +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_reduce_mul_epi16(__mmask8 __M, __m128i __W) { __W = _mm_mask_mov_epi16(_mm_set1_epi16(1), __M, __W); return __builtin_reduce_mul((__v8hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS128 -_mm_mask_reduce_and_epi16( __mmask8 __M, __m128i __W) { +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_reduce_and_epi16(__mmask8 __M, __m128i __W) { __W = _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M, __W); return __builtin_reduce_and((__v8hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS128 +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_or_epi16(__mmask8 __M, __m128i __W) { __W = _mm_maskz_mov_epi16(__M, __W); return __builtin_reduce_or((__v8hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS128 +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_max_epi16(__m128i __V) { return __builtin_reduce_max((__v8hi)__V); } -static __inline__ unsigned short __DEFAULT_FN_ATTRS128 +static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_max_epu16(__m128i __V) { return __builtin_reduce_max((__v8hu)__V); } -static __inline__ short __DEFAULT_FN_ATTRS128 +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_min_epi16(__m128i __V) { return __builtin_reduce_min((__v8hi)__V); } -static __inline__ unsigned short __DEFAULT_FN_ATTRS128 +static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_min_epu16(__m128i __V) { return __builtin_reduce_min((__v8hu)__V); } -static __inline__ short __DEFAULT_FN_ATTRS128 +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_max_epi16(__mmask16 __M, __m128i __V) { __V = _mm_mask_mov_epi16(_mm_set1_epi16(-32767-1), __M, __V); return __builtin_reduce_max((__v8hi)__V); } -static __inline__ unsigned short __DEFAULT_FN_ATTRS128 +static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_max_epu16(__mmask16 __M, __m128i __V) { __V = _mm_maskz_mov_epi16(__M, __V); return __builtin_reduce_max((__v8hu)__V); } -static __inline__ short __DEFAULT_FN_ATTRS128 +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_min_epi16(__mmask16 __M, __m128i __V) { __V = _mm_mask_mov_epi16(_mm_set1_epi16(32767), __M, __V); return __builtin_reduce_min((__v8hi)__V); } -static __inline__ unsigned short __DEFAULT_FN_ATTRS128 +static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_min_epu16(__mmask16 __M, __m128i __V) { __V = _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M, __V); return __builtin_reduce_min((__v8hu)__V); } -static __inline__ short __DEFAULT_FN_ATTRS256 +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_add_epi16(__m256i __W) { return __builtin_reduce_add((__v16hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS256 +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_mul_epi16(__m256i __W) { return __builtin_reduce_mul((__v16hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS256 +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_and_epi16(__m256i __W) { return __builtin_reduce_and((__v16hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS256 +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_or_epi16(__m256i __W) { return __builtin_reduce_or((__v16hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS256 -_mm256_mask_reduce_add_epi16( __mmask16 __M, __m256i __W) { +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_reduce_add_epi16(__mmask16 __M, __m256i __W) { __W = _mm256_maskz_mov_epi16(__M, __W); return __builtin_reduce_add((__v16hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS256 -_mm256_mask_reduce_mul_epi16( __mmask16 __M, __m256i __W) { +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_reduce_mul_epi16(__mmask16 __M, __m256i __W) { __W = _mm256_mask_mov_epi16(_mm256_set1_epi16(1), __M, __W); return __builtin_reduce_mul((__v16hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS256 -_mm256_mask_reduce_and_epi16( __mmask16 __M, __m256i __W) { +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_reduce_and_epi16(__mmask16 __M, __m256i __W) { __W = _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M, __W); return __builtin_reduce_and((__v16hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS256 +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_or_epi16(__mmask16 __M, __m256i __W) { __W = _mm256_maskz_mov_epi16(__M, __W); return __builtin_reduce_or((__v16hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS256 +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_max_epi16(__m256i __V) { return __builtin_reduce_max((__v16hi)__V); } -static __inline__ unsigned short __DEFAULT_FN_ATTRS256 +static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_max_epu16(__m256i __V) { return __builtin_reduce_max((__v16hu)__V); } -static __inline__ short __DEFAULT_FN_ATTRS256 +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_min_epi16(__m256i __V) { return __builtin_reduce_min((__v16hi)__V); } -static __inline__ unsigned short __DEFAULT_FN_ATTRS256 +static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_min_epu16(__m256i __V) { return __builtin_reduce_min((__v16hu)__V); } -static __inline__ short __DEFAULT_FN_ATTRS256 +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_max_epi16(__mmask16 __M, __m256i __V) { __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(-32767-1), __M, __V); return __builtin_reduce_max((__v16hi)__V); } -static __inline__ unsigned short __DEFAULT_FN_ATTRS256 +static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_max_epu16(__mmask16 __M, __m256i __V) { __V = _mm256_maskz_mov_epi16(__M, __V); return __builtin_reduce_max((__v16hu)__V); } -static __inline__ short __DEFAULT_FN_ATTRS256 +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_min_epi16(__mmask16 __M, __m256i __V) { __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(32767), __M, __V); return __builtin_reduce_min((__v16hi)__V); } -static __inline__ unsigned short __DEFAULT_FN_ATTRS256 +static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_min_epu16(__mmask16 __M, __m256i __V) { __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M, __V); return __builtin_reduce_min((__v16hu)__V); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_add_epi8(__m128i __W) { return __builtin_reduce_add((__v16qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_mul_epi8(__m128i __W) { return __builtin_reduce_mul((__v16qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_and_epi8(__m128i __W) { return __builtin_reduce_and((__v16qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_or_epi8(__m128i __W) { return __builtin_reduce_or((__v16qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_add_epi8(__mmask16 __M, __m128i __W) { __W = _mm_maskz_mov_epi8(__M, __W); return __builtin_reduce_add((__v16qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_mul_epi8(__mmask16 __M, __m128i __W) { __W = _mm_mask_mov_epi8(_mm_set1_epi8(1), __M, __W); return __builtin_reduce_mul((__v16qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_and_epi8(__mmask16 __M, __m128i __W) { __W = _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M, __W); return __builtin_reduce_and((__v16qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_or_epi8(__mmask16 __M, __m128i __W) { __W = _mm_maskz_mov_epi8(__M, __W); return __builtin_reduce_or((__v16qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_max_epi8(__m128i __V) { return __builtin_reduce_max((__v16qs)__V); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS128 +static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_max_epu8(__m128i __V) { return __builtin_reduce_max((__v16qu)__V); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_min_epi8(__m128i __V) { return __builtin_reduce_min((__v16qs)__V); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS128 +static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_min_epu8(__m128i __V) { return __builtin_reduce_min((__v16qu)__V); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_max_epi8(__mmask16 __M, __m128i __V) { __V = _mm_mask_mov_epi8(_mm_set1_epi8(-127-1), __M, __V); return __builtin_reduce_max((__v16qs)__V); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS128 +static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_max_epu8(__mmask16 __M, __m128i __V) { __V = _mm_maskz_mov_epi8(__M, __V); return __builtin_reduce_max((__v16qu)__V); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_min_epi8(__mmask16 __M, __m128i __V) { __V = _mm_mask_mov_epi8(_mm_set1_epi8(127), __M, __V); return __builtin_reduce_min((__v16qs)__V); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS128 +static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_min_epu8(__mmask16 __M, __m128i __V) { __V = _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M, __V); return __builtin_reduce_min((__v16qu)__V); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_add_epi8(__m256i __W) { return __builtin_reduce_add((__v32qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_mul_epi8(__m256i __W) { return __builtin_reduce_mul((__v32qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_and_epi8(__m256i __W) { return __builtin_reduce_and((__v32qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_or_epi8(__m256i __W) { return __builtin_reduce_or((__v32qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_add_epi8(__mmask32 __M, __m256i __W) { __W = _mm256_maskz_mov_epi8(__M, __W); return __builtin_reduce_add((__v32qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_mul_epi8(__mmask32 __M, __m256i __W) { __W = _mm256_mask_mov_epi8(_mm256_set1_epi8(1), __M, __W); return __builtin_reduce_mul((__v32qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_and_epi8(__mmask32 __M, __m256i __W) { __W = _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M, __W); return __builtin_reduce_and((__v32qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_or_epi8(__mmask32 __M, __m256i __W) { __W = _mm256_maskz_mov_epi8(__M, __W); return __builtin_reduce_or((__v32qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_max_epi8(__m256i __V) { return __builtin_reduce_max((__v32qs)__V); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS256 +static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_max_epu8(__m256i __V) { return __builtin_reduce_max((__v32qu)__V); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_min_epi8(__m256i __V) { return __builtin_reduce_min((__v32qs)__V); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS256 +static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_min_epu8(__m256i __V) { return __builtin_reduce_min((__v32qu)__V); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_max_epi8(__mmask32 __M, __m256i __V) { __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(-127-1), __M, __V); return __builtin_reduce_max((__v32qs)__V); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS256 +static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_max_epu8(__mmask32 __M, __m256i __V) { __V = _mm256_maskz_mov_epi8(__M, __V); return __builtin_reduce_max((__v32qu)__V); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_min_epi8(__mmask32 __M, __m256i __V) { __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(127), __M, __V); return __builtin_reduce_min((__v32qs)__V); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS256 +static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_min_epu8(__mmask32 __M, __m256i __V) { __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M, __V); return __builtin_reduce_min((__v32qu)__V); diff --git a/clang/test/CodeGen/X86/avx512vlbw-reduceIntrin.c b/clang/test/CodeGen/X86/avx512vlbw-reduceIntrin.c index faa3b54624a77..8e53db3f45300 100644 --- a/clang/test/CodeGen/X86/avx512vlbw-reduceIntrin.c +++ b/clang/test/CodeGen/X86/avx512vlbw-reduceIntrin.c @@ -8,30 +8,35 @@ // RUN: %clang_cc1 -x c++ -ffreestanding %s -O0 -triple=i386 -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s #include <immintrin.h> +#include "builtin_test_helpers.h" short test_mm_reduce_add_epi16(__m128i __W){ // CHECK-LABEL: test_mm_reduce_add_epi16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %{{.*}}) return _mm_reduce_add_epi16(__W); } +TEST_CONSTEXPR(_mm_reduce_add_epi16((__m128i)(__v8hi){1,2,3,4,5,6,7,8}) == 36); short test_mm_reduce_mul_epi16(__m128i __W){ // CHECK-LABEL: test_mm_reduce_mul_epi16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> %{{.*}}) return _mm_reduce_mul_epi16(__W); } +TEST_CONSTEXPR(_mm_reduce_mul_epi16((__m128i)(__v8hi){1,2,3,1,2,3,1,2}) == 72); short test_mm_reduce_or_epi16(__m128i __W){ // CHECK-LABEL: test_mm_reduce_or_epi16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %{{.*}}) return _mm_reduce_or_epi16(__W); } +TEST_CONSTEXPR(_mm_reduce_or_epi16((__m128i)(__v8hi){1,2,4,8,0,0,0,0}) == 15); short test_mm_reduce_and_epi16(__m128i __W){ // CHECK-LABEL: test_mm_reduce_and_epi16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %{{.*}}) return _mm_reduce_and_epi16(__W); } +TEST_CONSTEXPR(_mm_reduce_and_epi16((__m128i)(__v8hi){1,3,5,7,9,11,13,15}) == 1); short test_mm_mask_reduce_add_epi16(__mmask8 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_add_epi16 @@ -39,6 +44,8 @@ short test_mm_mask_reduce_add_epi16(__mmask8 __M, __m128i __W){ // CHECK: call {{.*}}i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %{{.*}}) return _mm_mask_reduce_add_epi16(__M, __W); } +TEST_CONSTEXPR(_mm_mask_reduce_add_epi16((__mmask8)0b11110000, (__m128i)(__v8hi){1,2,3,4,5,6,7,8}) == 26); +TEST_CONSTEXPR(_mm_mask_reduce_add_epi16((__mmask8)0b00001111, (__m128i)(__v8hi){1,2,3,4,5,6,7,8}) == 10); short test_mm_mask_reduce_mul_epi16(__mmask8 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_mul_epi16 @@ -46,6 +53,8 @@ short test_mm_mask_reduce_mul_epi16(__mmask8 __M, __m128i __W){ // CHECK: call {{.*}}i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> %{{.*}}) return _mm_mask_reduce_mul_epi16(__M, __W); } +TEST_CONSTEXPR(_mm_mask_reduce_mul_epi16((__mmask8)0b11110000, (__m128i)(__v8hi){1,2,3,1,2,3,1,2}) == 12); +TEST_CONSTEXPR(_mm_mask_reduce_mul_epi16((__mmask8)0b00001111, (__m128i)(__v8hi){1,2,3,1,2,3,1,2}) == 6); short test_mm_mask_reduce_and_epi16(__mmask8 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_and_epi16 @@ -53,6 +62,8 @@ short test_mm_mask_reduce_and_epi16(__mmask8 __M, __m128i __W){ // CHECK: call {{.*}}i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %{{.*}} return _mm_mask_reduce_and_epi16(__M, __W); } +TEST_CONSTEXPR(_mm_mask_reduce_and_epi16((__mmask8)0b11110000, (__m128i)(__v8hi){1,3,5,7,0,0,0,0}) == 0); +TEST_CONSTEXPR(_mm_mask_reduce_and_epi16((__mmask8)0b00001111, (__m128i)(__v8hi){1,3,5,7,0,0,0,0}) == 1); short test_mm_mask_reduce_or_epi16(__mmask8 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_or_epi16 @@ -60,30 +71,36 @@ short test_mm_mask_reduce_or_epi16(__mmask8 __M, __m128i __W){ // CHECK: call {{.*}}i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %{{.*}}) return _mm_mask_reduce_or_epi16(__M, __W); } +TEST_CONSTEXPR(_mm_mask_reduce_or_epi16((__mmask8)0b11110000, (__m128i)(__v8hi){1,2,4,8,0,0,0,0}) == 0); +TEST_CONSTEXPR(_mm_mask_reduce_or_epi16((__mmask8)0b00001111, (__m128i)(__v8hi){1,2,4,8,0,0,0,0}) == 15); short test_mm256_reduce_add_epi16(__m256i __W){ // CHECK-LABEL: test_mm256_reduce_add_epi16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %{{.*}}) return _mm256_reduce_add_epi16(__W); } +TEST_CONSTEXPR(_mm256_reduce_add_epi16((__m256i)(__v16hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}) == 120); short test_mm256_reduce_mul_epi16(__m256i __W){ // CHECK-LABEL: test_mm256_reduce_mul_epi16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> %{{.*}}) return _mm256_reduce_mul_epi16(__W); } +TEST_CONSTEXPR(_mm256_reduce_mul_epi16((__m256i)(__v16hi){1,2,3,1,2,3,1,2,3,1,2,3,1,2,3,1}) == 7776); short test_mm256_reduce_or_epi16(__m256i __W){ // CHECK-LABEL: test_mm256_reduce_or_epi16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %{{.*}}) return _mm256_reduce_or_epi16(__W); } +TEST_CONSTEXPR(_mm256_reduce_or_epi16((__m256i)(__v16hi){1,2,4,8,16,32,64,128,0,0,0,0,0,0,0,0}) == 255); short test_mm256_reduce_and_epi16(__m256i __W){ // CHECK-LABEL: test_mm256_reduce_and_epi16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %{{.*}}) return _mm256_reduce_and_epi16(__W); } +TEST_CONSTEXPR(_mm256_reduce_and_epi16((__m256i)(__v16hi){1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31}) == 1); short test_mm256_mask_reduce_add_epi16(__mmask16 __M, __m256i __W){ // CHECK-LABEL: test_mm256_mask_reduce_add_epi16 @@ -91,6 +108,8 @@ short test_mm256_mask_reduce_add_epi16(__mmask16 __M, __m256i __W){ // CHECK: call {{.*}}i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %{{.*}}) return _mm256_mask_reduce_add_epi16(__M, __W); } +TEST_CONSTEXPR(_mm256_mask_reduce_add_epi16((__mmask16)0b1111111100000000, (__m256i)(__v16hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}) == 84); +TEST_CONSTEXPR(_mm256_mask_reduce_add_epi16((__mmask16)0b0000000011111111, (__m256i)(__v16hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}) == 36); short test_mm256_mask_reduce_mul_epi16(__mmask16 __M, __m256i __W){ // CHECK-LABEL: test_mm256_mask_reduce_mul_epi16 @@ -98,6 +117,8 @@ short test_mm256_mask_reduce_mul_epi16(__mmask16 __M, __m256i __W){ // CHECK: call {{.*}}i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> %{{.*}}) return _mm256_mask_reduce_mul_epi16(__M, __W); } +TEST_CONSTEXPR(_mm256_mask_reduce_mul_epi16((__mmask16)0b1111111100000000, (__m256i)(__v16hi){1,2,3,1,2,3,1,2,3,1,2,3,1,2,3,1}) == 108); +TEST_CONSTEXPR(_mm256_mask_reduce_mul_epi16((__mmask16)0b0000000011111111, (__m256i)(__v16hi){1,2,3,1,2,3,1,2,3,1,2,3,1,2,3,1}) == 72); short test_mm256_mask_reduce_and_epi16(__mmask16 __M, __m256i __W){ // CHECK-LABEL: test_mm256_mask_reduce_and_epi16 @@ -105,6 +126,8 @@ short test_mm256_mask_reduce_and_epi16(__mmask16 __M, __m256i __W){ // CHECK: call {{.*}}i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %{{.*}}) return _mm256_mask_reduce_and_epi16(__M, __W); } +TEST_CONSTEXPR(_mm256_mask_reduce_and_epi16((__mmask16)0b1111111100000000, (__m256i)(__v16hi){1,3,5,7,9,11,13,15,0,0,0,0,0,0,0,0}) == 0); +TEST_CONSTEXPR(_mm256_mask_reduce_and_epi16((__mmask16)0b0000000011111111, (__m256i)(__v16hi){1,3,5,7,9,11,13,15,0,0,0,0,0,0,0,0}) == 1); short test_mm256_mask_reduce_or_epi16(__mmask16 __M, __m256i __W){ // CHECK-LABEL: test_mm256_mask_reduce_or_epi16 @@ -112,30 +135,36 @@ short test_mm256_mask_reduce_or_epi16(__mmask16 __M, __m256i __W){ // CHECK: call {{.*}}i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %{{.*}}) return _mm256_mask_reduce_or_epi16(__M, __W); } +TEST_CONSTEXPR(_mm256_mask_reduce_or_epi16((__mmask16)0b1111111100000000, (__m256i)(__v16hi){1,2,4,8,16,32,64,128,0,0,0,0,0,0,0,0}) == 0); +TEST_CONSTEXPR(_mm256_mask_reduce_or_epi16((__mmask16)0b0000000011111111, (__m256i)(__v16hi){1,2,4,8,16,32,64,128,0,0,0,0,0,0,0,0}) == 255); signed char test_mm_reduce_add_epi8(__m128i __W){ // CHECK-LABEL: test_mm_reduce_add_epi8 // CHECK: call {{.*}}i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %{{.*}}) return _mm_reduce_add_epi8(__W); } +TEST_CONSTEXPR(_mm_reduce_add_epi8((__m128i)(__v16qs){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}) == 120); signed char test_mm_reduce_mul_epi8(__m128i __W){ // CHECK-LABEL: test_mm_reduce_mul_epi8 // CHECK: call {{.*}}i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> %{{.*}}) return _mm_reduce_mul_epi8(__W); } +TEST_CONSTEXPR(_mm_reduce_mul_epi8((__m128i)(__v16qs){1,2,1,1,2,1,1,2,1,1,2,1,1,2,1,1}) == 32); signed char test_mm_reduce_and_epi8(__m128i __W){ // CHECK-LABEL: test_mm_reduce_and_epi8 // CHECK: call {{.*}}i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %{{.*}}) return _mm_reduce_and_epi8(__W); } +TEST_CONSTEXPR(_mm_reduce_and_epi8((__m128i)(__v16qs){1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31}) == 1); signed char test_mm_reduce_or_epi8(__m128i __W){ // CHECK-LABEL: test_mm_reduce_or_epi8 // CHECK: call {{.*}}i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %{{.*}}) return _mm_reduce_or_epi8(__W); } +TEST_CONSTEXPR(_mm_reduce_or_epi8((__m128i)(__v16qs){0,1,2,4,8,16,32,64,0,0,0,0,0,0,0,0}) == 127); signed char test_mm_mask_reduce_add_epi8(__mmask16 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_add_epi8 @@ -143,6 +172,8 @@ signed char test_mm_mask_reduce_add_epi8(__mmask16 __M, __m128i __W){ // CHECK: call {{.*}}i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %{{.*}}) return _mm_mask_reduce_add_epi8(__M, __W); } +TEST_CONSTEXPR(_mm_mask_reduce_add_epi8((__mmask16)0b1111111100000000, (__m128i)(__v16qs){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}) == 84); +TEST_CONSTEXPR(_mm_mask_reduce_add_epi8((__mmask16)0b0000000011111111, (__m128i)(__v16qs){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}) == 36); signed char test_mm_mask_reduce_mul_epi8(__mmask16 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_mul_epi8 @@ -150,6 +181,8 @@ signed char test_mm_mask_reduce_mul_epi8(__mmask16 __M, __m128i __W){ // CHECK: call {{.*}}i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> %{{.*}}) return _mm_mask_reduce_mul_epi8(__M, __W); } +TEST_CONSTEXPR(_mm_mask_reduce_mul_epi8((__mmask16)0b1111111100000000, (__m128i)(__v16qs){1,2,1,1,2,1,1,2,1,1,2,1,1,2,1,1}) == 4); +TEST_CONSTEXPR(_mm_mask_reduce_mul_epi8((__mmask16)0b0000000011111111, (__m128i)(__v16qs){1,2,1,1,2,1,1,2,1,1,2,1,1,2,1,1}) == 8); signed char test_mm_mask_reduce_and_epi8(__mmask16 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_and_epi8 @@ -157,6 +190,8 @@ signed char test_mm_mask_reduce_and_epi8(__mmask16 __M, __m128i __W){ // CHECK: call {{.*}}i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %{{.*}}) return _mm_mask_reduce_and_epi8(__M, __W); } +TEST_CONSTEXPR(_mm_mask_reduce_and_epi8((__mmask16)0b1111111100000000, (__m128i)(__v16qs){1,3,5,7,9,11,13,15,0,0,0,0,0,0,0,0}) == 0); +TEST_CONSTEXPR(_mm_mask_reduce_and_epi8((__mmask16)0b0000000011111111, (__m128i)(__v16qs){1,3,5,7,9,11,13,15,0,0,0,0,0,0,0,0}) == 1); signed char test_mm_mask_reduce_or_epi8(__mmask16 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_or_epi8 @@ -164,30 +199,36 @@ signed char test_mm_mask_reduce_or_epi8(__mmask16 __M, __m128i __W){ // CHECK: call {{.*}}i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %{{.*}}) return _mm_mask_reduce_or_epi8(__M, __W); } +TEST_CONSTEXPR(_mm_mask_reduce_or_epi8((__mmask16)0b1111111100000000, (__m128i)(__v16qs){0,1,2,4,8,16,32,64,0,0,0,0,0,0,0,0}) == 0); +TEST_CONSTEXPR(_mm_mask_reduce_or_epi8((__mmask16)0b0000000011111111, (__m128i)(__v16qs){0,1,2,4,8,16,32,64,0,0,0,0,0,0,0,0}) == 127); signed char test_mm256_reduce_add_epi8(__m256i __W){ // CHECK-LABEL: test_mm256_reduce_add_epi8 // CHECK: call {{.*}}i8 @llvm.vector.reduce.add.v32i8(<32 x i8> %{{.*}}) return _mm256_reduce_add_epi8(__W); } +TEST_CONSTEXPR(_mm256_reduce_add_epi8((__m256i)(__v32qs){0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7}) == 112); signed char test_mm256_reduce_mul_epi8(__m256i __W){ // CHECK-LABEL: test_mm256_reduce_mul_epi8 // CHECK: call {{.*}}i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> %{{.*}}) return _mm256_reduce_mul_epi8(__W); } +TEST_CONSTEXPR(_mm256_reduce_mul_epi8((__m256i)(__v32qs){1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2}) == 16); signed char test_mm256_reduce_and_epi8(__m256i __W){ // CHECK-LABEL: test_mm256_reduce_and_epi8 // CHECK: call {{.*}}i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %{{.*}}) return _mm256_reduce_and_epi8(__W); } +TEST_CONSTEXPR(_mm256_reduce_and_epi8((__m256i)(__v32qs){1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63}) == 1); signed char test_mm256_reduce_or_epi8(__m256i __W){ // CHECK-LABEL: test_mm256_reduce_or_epi8 // CHECK: call {{.*}}i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %{{.*}}) return _mm256_reduce_or_epi8(__W); } +TEST_CONSTEXPR(_mm256_reduce_or_epi8((__m256i)(__v32qs){1,2,4,8,16,32,64,127,1,2,4,8,16,32,64,127,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}) == 127); signed char test_mm256_mask_reduce_add_epi8(__mmask32 __M, __m256i __W){ // CHECK-LABEL: test_mm256_mask_reduce_add_epi8 @@ -195,6 +236,8 @@ signed char test_mm256_mask_reduce_add_epi8(__mmask32 __M, __m256i __W){ // CHECK: call {{.*}}i8 @llvm.vector.reduce.add.v32i8(<32 x i8> %{{.*}}) return _mm256_mask_reduce_add_epi8(__M, __W); } +TEST_CONSTEXPR(_mm256_mask_reduce_add_epi8((__mmask32)0b11111111111111110000000000000000, (__m256i)(__v32qs){0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7}) == 56); +TEST_CONSTEXPR(_mm256_mask_reduce_add_epi8((__mmask32)0b00000000000000001111111111111111, (__m256i)(__v32qs){8,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7}) == 64); signed char test_mm256_mask_reduce_mul_epi8(__mmask32 __M, __m256i __W){ // CHECK-LABEL: test_mm256_mask_reduce_mul_epi8 @@ -202,6 +245,8 @@ signed char test_mm256_mask_reduce_mul_epi8(__mmask32 __M, __m256i __W){ // CHECK: call {{.*}}i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> %{{.*}}) return _mm256_mask_reduce_mul_epi8(__M, __W); } +TEST_CONSTEXPR(_mm256_mask_reduce_mul_epi8((__mmask32)0b11111111111111110000000000000000, (__m256i)(__v32qs){1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2}) == 4); +TEST_CONSTEXPR(_mm256_mask_reduce_mul_epi8((__mmask32)0b00000000000000001111111111111111, (__m256i)(__v32qs){4,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2}) == 16); signed char test_mm256_mask_reduce_and_epi8(__mmask32 __M, __m256i __W){ // CHECK-LABEL: test_mm256_mask_reduce_and_epi8 @@ -209,6 +254,8 @@ signed char test_mm256_mask_reduce_and_epi8(__mmask32 __M, __m256i __W){ // CHECK: call {{.*}}i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %{{.*}}) return _mm256_mask_reduce_and_epi8(__M, __W); } +TEST_CONSTEXPR(_mm256_mask_reduce_and_epi8((__mmask32)0b11111111111111110000000000000000, (__m256i)(__v32qs){1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}) == 0); +TEST_CONSTEXPR(_mm256_mask_reduce_and_epi8((__mmask32)0b00000000000000001111111111111111, (__m256i)(__v32qs){1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}) == 1); signed char test_mm256_mask_reduce_or_epi8(__mmask32 __M, __m256i __W){ // CHECK-LABEL: test_mm256_mask_reduce_or_epi8 @@ -216,30 +263,36 @@ signed char test_mm256_mask_reduce_or_epi8(__mmask32 __M, __m256i __W){ // CHECK: call {{.*}}i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %{{.*}}) return _mm256_mask_reduce_or_epi8(__M, __W); } +TEST_CONSTEXPR(_mm256_mask_reduce_or_epi8((__mmask32)0b11111111111111110000000000000000, (__m256i)(__v32qs){1,2,4,8,16,32,64,127,1,2,4,8,16,32,64,127,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}) == 0); +TEST_CONSTEXPR(_mm256_mask_reduce_or_epi8((__mmask32)0b00000000000000001111111111111111, (__m256i)(__v32qs){1,2,4,8,16,32,64,127,1,2,4,8,16,32,64,127,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}) == 127); short test_mm_reduce_max_epi16(__m128i __W){ // CHECK-LABEL: test_mm_reduce_max_epi16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %{{.*}}) return _mm_reduce_max_epi16(__W); } +TEST_CONSTEXPR(_mm_reduce_max_epi16((__m128i)(__v8hi){-4,-3,-2,-1,1,2,3,4}) == 4); short test_mm_reduce_min_epi16(__m128i __W){ // CHECK-LABEL: test_mm_reduce_min_epi16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %{{.*}}) return _mm_reduce_min_epi16(__W); } +TEST_CONSTEXPR(_mm_reduce_min_epi16((__m128i)(__v8hi){-4,-3,-2,-1,1,2,3,4}) == -4); unsigned short test_mm_reduce_max_epu16(__m128i __W){ // CHECK-LABEL: test_mm_reduce_max_epu16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %{{.*}}) return _mm_reduce_max_epu16(__W); } +TEST_CONSTEXPR(_mm_reduce_max_epu16((__m128i)(__v8hu){1,2,3,4,5,6,7,8}) == 8); unsigned short test_mm_reduce_min_epu16(__m128i __W){ // CHECK-LABEL: test_mm_reduce_min_epu16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %{{.*}}) return _mm_reduce_min_epu16(__W); } +TEST_CONSTEXPR(_mm_reduce_min_epu16((__m128i)(__v8hu){1,2,3,4,5,6,7,8}) == 1); short test_mm_mask_reduce_max_epi16(__mmask8 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_max_epi16 @@ -247,6 +300,8 @@ short test_mm_mask_reduce_max_epi16(__mmask8 __M, __m128i __W){ // CHECK: call {{.*}}i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %{{.*}}) return _mm_mask_reduce_max_epi16(__M, __W); } +TEST_CONSTEXPR(_mm_mask_reduce_max_epi16((__mmask8)0b11110000, (__m128i)(__v8hi){-4,-3,-2,-1,1,2,3,4}) == 4); +TEST_CONSTEXPR(_mm_mask_reduce_max_epi16((__mmask8)0b00001111, (__m128i)(__v8hi){-4,-3,-2,-1,1,2,3,4}) == -1); short test_mm_mask_reduce_min_epi16(__mmask8 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_min_epi16 @@ -254,6 +309,8 @@ short test_mm_mask_reduce_min_epi16(__mmask8 __M, __m128i __W){ // CHECK: call {{.*}}i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %{{.*}}) return _mm_mask_reduce_min_epi16(__M, __W); } +TEST_CONSTEXPR(_mm_mask_reduce_min_epi16((__mmask8)0b11110000, (__m128i)(__v8hi){-4,-3,-2,-1,1,2,3,4}) == 1); +TEST_CONSTEXPR(_mm_mask_reduce_min_epi16((__mmask8)0b00001111, (__m128i)(__v8hi){-4,-3,-2,-1,1,2,3,4}) == -4); unsigned short test_mm_mask_reduce_max_epu16(__mmask8 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_max_epu16 @@ -261,6 +318,8 @@ unsigned short test_mm_mask_reduce_max_epu16(__mmask8 __M, __m128i __W){ // CHECK: call {{.*}}i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %{{.*}}) return _mm_mask_reduce_max_epu16(__M, __W); } +TEST_CONSTEXPR(_mm_mask_reduce_max_epu16((__mmask8)0b11110000, (__m128i)(__v8hu){1,2,3,4,5,6,7,8}) == 8); +TEST_CONSTEXPR(_mm_mask_reduce_max_epu16((__mmask8)0b00001111, (__m128i)(__v8hu){1,2,3,4,5,6,7,8}) == 4); unsigned short test_mm_mask_reduce_min_epu16(__mmask8 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_min_epu16 @@ -268,30 +327,36 @@ unsigned short test_mm_mask_reduce_min_epu16(__mmask8 __M, __m128i __W){ // CHECK: call {{.*}}i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %{{.*}}) return _mm_mask_reduce_min_epu16(__M, __W); } +TEST_CONSTEXPR(_mm_mask_reduce_min_epu16((__mmask8)0b11110000, (__m128i)(__v8hu){1,2,3,4,5,6,7,8}) == 5); +TEST_CONSTEXPR(_mm_mask_reduce_min_epu16((__mmask8)0b00001111, (__m128i)(__v8hu){1,2,3,4,5,6,7,8}) == 1); short test_mm256_reduce_max_epi16(__m256i __W){ // CHECK-LABEL: test_mm256_reduce_max_epi16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> %{{.*}}) return _mm256_reduce_max_epi16(__W); } +TEST_CONSTEXPR(_mm256_reduce_max_epi16((__m256i)(__v16hi){-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8}) == 8); short test_mm256_reduce_min_epi16(__m256i __W){ // CHECK-LABEL: test_mm256_reduce_min_epi16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> %{{.*}}) return _mm256_reduce_min_epi16(__W); } +TEST_CONSTEXPR(_mm256_reduce_min_epi16((__m256i)(__v16hi){-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8}) == -8); unsigned short test_mm256_reduce_max_epu16(__m256i __W){ // CHECK-LABEL: test_mm256_reduce_max_epu16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> %{{.*}}) return _mm256_reduce_max_epu16(__W); } +TEST_CONSTEXPR(_mm256_reduce_max_epu16((__m256i)(__v16hu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == 16); unsigned short test_mm256_reduce_min_epu16(__m256i __W){ // CHECK-LABEL: test_mm256_reduce_min_epu16 // CHECK: call {{.*}}i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> %{{.*}}) return _mm256_reduce_min_epu16(__W); } +TEST_CONSTEXPR(_mm256_reduce_min_epu16((__m256i)(__v16hu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == 1); short test_mm256_mask_reduce_max_epi16(__mmask16 __M, __m256i __W){ // CHECK-LABEL: test_mm256_mask_reduce_max_epi16 @@ -299,6 +364,8 @@ short test_mm256_mask_reduce_max_epi16(__mmask16 __M, __m256i __W){ // CHECK: call {{.*}}i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> %{{.*}}) return _mm256_mask_reduce_max_epi16(__M, __W); } +TEST_CONSTEXPR(_mm256_mask_reduce_max_epi16((__mmask16){0b1111111100000000}, (__m256i)(__v16hi){-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8}) == 8); +TEST_CONSTEXPR(_mm256_mask_reduce_max_epi16((__mmask16){0b0000000011111111}, (__m256i)(__v16hi){-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8}) == -1); short test_mm256_mask_reduce_min_epi16(__mmask16 __M, __m256i __W){ // CHECK-LABEL: test_mm256_mask_reduce_min_epi16 @@ -306,6 +373,8 @@ short test_mm256_mask_reduce_min_epi16(__mmask16 __M, __m256i __W){ // CHECK: call {{.*}}i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> %{{.*}}) return _mm256_mask_reduce_min_epi16(__M, __W); } +TEST_CONSTEXPR(_mm256_mask_reduce_min_epi16((__mmask16){0b1111111100000000}, (__m256i)(__v16hi){-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8}) == 1); +TEST_CONSTEXPR(_mm256_mask_reduce_min_epi16((__mmask16){0b0000000011111111}, (__m256i)(__v16hi){-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8}) == -8); unsigned short test_mm256_mask_reduce_max_epu16(__mmask16 __M, __m256i __W){ // CHECK-LABEL: test_mm256_mask_reduce_max_epu16 @@ -313,6 +382,8 @@ unsigned short test_mm256_mask_reduce_max_epu16(__mmask16 __M, __m256i __W){ // CHECK: call {{.*}}i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> %{{.*}}) return _mm256_mask_reduce_max_epu16(__M, __W); } +TEST_CONSTEXPR(_mm256_mask_reduce_max_epu16((__mmask16){0b1111111100000000}, (__m256i)(__v16hu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == 16); +TEST_CONSTEXPR(_mm256_mask_reduce_max_epu16((__mmask16){0b0000000011111111}, (__m256i)(__v16hu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == 8); unsigned short test_mm256_mask_reduce_min_epu16(__mmask16 __M, __m256i __W){ // CHECK-LABEL: test_mm256_mask_reduce_min_epu16 @@ -320,30 +391,36 @@ unsigned short test_mm256_mask_reduce_min_epu16(__mmask16 __M, __m256i __W){ // CHECK: call {{.*}}i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> %{{.*}}) return _mm256_mask_reduce_min_epu16(__M, __W); } +TEST_CONSTEXPR(_mm256_mask_reduce_min_epu16((__mmask16){0b1111111100000000}, (__m256i)(__v16hu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == 9); +TEST_CONSTEXPR(_mm256_mask_reduce_min_epu16((__mmask16){0b0000000011111111}, (__m256i)(__v16hu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == 1); signed char test_mm_reduce_max_epi8(__m128i __W){ // CHECK-LABEL: test_mm_reduce_max_epi8 // CHECK: call {{.*}}i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %{{.*}}) return _mm_reduce_max_epi8(__W); } +TEST_CONSTEXPR(_mm_reduce_max_epi8((__m128i)(__v16qs){-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8}) == 8); signed char test_mm_reduce_min_epi8(__m128i __W){ // CHECK-LABEL: test_mm_reduce_min_epi8 // CHECK: call {{.*}}i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %{{.*}}) return _mm_reduce_min_epi8(__W); } +TEST_CONSTEXPR(_mm_reduce_min_epi8((__m128i)(__v16qs){-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8}) == -8); unsigned char test_mm_reduce_max_epu8(__m128i __W){ // CHECK-LABEL: test_mm_reduce_max_epu8 // CHECK: call {{.*}}i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %{{.*}}) return _mm_reduce_max_epu8(__W); } +TEST_CONSTEXPR(_mm_reduce_max_epu8((__m128i)(__v16qu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == 16); unsigned char test_mm_reduce_min_epu8(__m128i __W){ // CHECK-LABEL: test_mm_reduce_min_epu8 // CHECK: call {{.*}}i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %{{.*}}) return _mm_reduce_min_epu8(__W); } +TEST_CONSTEXPR(_mm_reduce_min_epu8((__m128i)(__v16qu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == 1); signed char test_mm_mask_reduce_max_epi8(__mmask16 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_max_epi8 @@ -351,6 +428,8 @@ signed char test_mm_mask_reduce_max_epi8(__mmask16 __M, __m128i __W){ // CHECK: call {{.*}}i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %{{.*}}) return _mm_mask_reduce_max_epi8(__M, __W); } +TEST_CONSTEXPR(_mm_mask_reduce_max_epi8((__mmask16)0b1111111100000000, (__m128i)(__v16qs){-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8}) == 8); +TEST_CONSTEXPR(_mm_mask_reduce_max_epi8((__mmask16)0b0000000011111111, (__m128i)(__v16qs){-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8}) == -1); signed char test_mm_mask_reduce_min_epi8(__mmask16 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_min_epi8 @@ -358,6 +437,8 @@ signed char test_mm_mask_reduce_min_epi8(__mmask16 __M, __m128i __W){ // CHECK: call {{.*}}i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %{{.*}}) return _mm_mask_reduce_min_epi8(__M, __W); } +TEST_CONSTEXPR(_mm_mask_reduce_min_epi8((__mmask16)0b1111111100000000, (__m128i)(__v16qs){-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8}) == 1); +TEST_CONSTEXPR(_mm_mask_reduce_min_epi8((__mmask16)0b0000000011111111, (__m128i)(__v16qs){-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8}) == -8); unsigned char test_mm_mask_reduce_max_epu8(__mmask16 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_max_epu8 @@ -365,6 +446,8 @@ unsigned char test_mm_mask_reduce_max_epu8(__mmask16 __M, __m128i __W){ // CHECK: call {{.*}}i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %{{.*}}) return _mm_mask_reduce_max_epu8(__M, __W); } +TEST_CONSTEXPR(_mm_mask_reduce_max_epu8((__mmask16)0b1111111100000000, (__m128i)(__v16qs){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == 16); +TEST_CONSTEXPR(_mm_mask_reduce_max_epu8((__mmask16)0b0000000011111111, (__m128i)(__v16qs){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == 8); unsigned char test_mm_mask_reduce_min_epu8(__mmask16 __M, __m128i __W){ // CHECK-LABEL: test_mm_mask_reduce_min_epu8 @@ -372,30 +455,36 @@ unsigned char test_mm_mask_reduce_min_epu8(__mmask16 __M, __m128i __W){ // CHECK: call {{.*}}i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %{{.*}}) return _mm_mask_reduce_min_epu8(__M, __W); } +TEST_CONSTEXPR(_mm_mask_reduce_min_epu8((__mmask16)0b1111111100000000, (__m128i)(__v16qs){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == 9); +TEST_CONSTEXPR(_mm_mask_reduce_min_epu8((__mmask16)0b0000000011111111, (__m128i)(__v16qs){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == 1); signed char test_mm256_reduce_max_epi8(__m256i __W){ // CHECK-LABEL: test_mm256_reduce_max_epi8 // CHECK: call {{.*}}i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> %{{.*}}) return _mm256_reduce_max_epi8(__W); } +TEST_CONSTEXPR(_mm256_reduce_max_epi8((__m256i)(__v32qs){-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == 16); signed char test_mm256_reduce_min_epi8(__m256i __W){ // CHECK-LABEL: test_mm256_reduce_min_epi8 // CHECK: call {{.*}}i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> %{{.*}}) return _mm256_reduce_min_epi8(__W); } +TEST_CONSTEXPR(_mm256_reduce_min_epi8((__m256i)(__v32qs){-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == -16); unsigned char test_mm256_reduce_max_epu8(__m256i __W){ // CHECK-LABEL: test_mm256_reduce_max_epu8 // CHECK: call {{.*}}i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> %{{.*}}) return _mm256_reduce_max_epu8(__W); } +TEST_CONSTEXPR(_mm256_reduce_max_epu8((__m256i)(__v32qu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32}) == 32); unsigned char test_mm256_reduce_min_epu8(__m256i __W){ // CHECK-LABEL: test_mm256_reduce_min_epu8 // CHECK: call {{.*}}i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> %{{.*}}) return _mm256_reduce_min_epu8(__W); } +TEST_CONSTEXPR(_mm256_reduce_min_epu8((__m256i)(__v32qu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32}) == 1); signed char test_mm256_mask_reduce_max_epi8(__mmask32 __M, __m256i __W){ // CHECK-LABEL: test_mm256_mask_reduce_max_epi8 @@ -403,6 +492,8 @@ signed char test_mm256_mask_reduce_max_epi8(__mmask32 __M, __m256i __W){ // CHECK: call {{.*}}i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> %{{.*}}) return _mm256_mask_reduce_max_epi8(__M, __W); } +TEST_CONSTEXPR(_mm256_mask_reduce_max_epi8((__mmask32)0b11111111111111110000000000000000, (__m256i)(__v32qs){-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == 16); +TEST_CONSTEXPR(_mm256_mask_reduce_max_epi8((__mmask32)0b00000000000000001111111111111111, (__m256i)(__v32qs){-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == -1); signed char test_mm256_mask_reduce_min_epi8(__mmask32 __M, __m256i __W){ // CHECK-LABEL: test_mm256_mask_reduce_min_epi8 @@ -410,6 +501,8 @@ signed char test_mm256_mask_reduce_min_epi8(__mmask32 __M, __m256i __W){ // CHECK: call {{.*}}i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> %{{.*}}) return _mm256_mask_reduce_min_epi8(__M, __W); } +TEST_CONSTEXPR(_mm256_mask_reduce_min_epi8((__mmask32)0b11111111111111110000000000000000, (__m256i)(__v32qs){-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == 1); +TEST_CONSTEXPR(_mm256_mask_reduce_min_epi8((__mmask32)0b00000000000000001111111111111111, (__m256i)(__v32qs){-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) == -16); unsigned char test_mm256_mask_reduce_max_epu8(__mmask32 __M, __m256i __W){ // CHECK-LABEL: test_mm256_mask_reduce_max_epu8 @@ -417,6 +510,8 @@ unsigned char test_mm256_mask_reduce_max_epu8(__mmask32 __M, __m256i __W){ // CHECK: call {{.*}}i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> %{{.*}}) return _mm256_mask_reduce_max_epu8(__M, __W); } +TEST_CONSTEXPR(_mm256_mask_reduce_max_epu8((__mmask32)0b11111111111111110000000000000000, (__m256i)(__v32qu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32}) == 32); +TEST_CONSTEXPR(_mm256_mask_reduce_max_epu8((__mmask32)0b00000000000000001111111111111111, (__m256i)(__v32qu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32}) == 16); unsigned char test_mm256_mask_reduce_min_epu8(__mmask32 __M, __m256i __W){ // CHECK-LABEL: test_mm256_mask_reduce_min_epu8 @@ -424,3 +519,5 @@ unsigned char test_mm256_mask_reduce_min_epu8(__mmask32 __M, __m256i __W){ // CHECK: call {{.*}}i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> %{{.*}}) return _mm256_mask_reduce_min_epu8(__M, __W); } +TEST_CONSTEXPR(_mm256_mask_reduce_min_epu8((__mmask32)0b11111111111111110000000000000000, (__m256i)(__v32qu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32}) == 17); +TEST_CONSTEXPR(_mm256_mask_reduce_min_epu8((__mmask32)0b00000000000000001111111111111111, (__m256i)(__v32qu){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32}) == 1); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits