https://github.com/donneypr updated https://github.com/llvm/llvm-project/pull/157582
>From f0f99274e8b5d48ac12fafbc70b99cbf3b4fdf99 Mon Sep 17 00:00:00 2001 From: donneypr <donatopraba...@gmail.com> Date: Mon, 8 Sep 2025 19:54:43 -0400 Subject: [PATCH 1/8] [clang][x86][headers] Make SSE2 add/sub intrinsics constexpr --- clang/lib/Headers/emmintrin.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index e4fbe011239d6..ec87180667234 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -2059,7 +2059,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pd(double *__dp, /// A 128-bit vector of [16 x i8]. /// \returns A 128-bit vector of [16 x i8] containing the sums of both /// parameters. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a, +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8_CONSTEXPR(__m128i __a, __m128i __b) { return (__m128i)((__v16qu)__a + (__v16qu)__b); } @@ -2080,7 +2080,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a, /// A 128-bit vector of [8 x i16]. /// \returns A 128-bit vector of [8 x i16] containing the sums of both /// parameters. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a, +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16_CONSTEXPR(__m128i __a, __m128i __b) { return (__m128i)((__v8hu)__a + (__v8hu)__b); } @@ -2498,7 +2498,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sad_epu8(__m128i __a, /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the differences of the values /// in the operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a, +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8_CONSTEXPR(__m128i __a, __m128i __b) { return (__m128i)((__v16qu)__a - (__v16qu)__b); } @@ -2515,7 +2515,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a, /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the differences of the values /// in the operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a, +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16_CONSTEXPR(__m128i __a, __m128i __b) { return (__m128i)((__v8hu)__a - (__v8hu)__b); } >From 590c0e1a3824140efcc209b3eac480fa2f1fe9a4 Mon Sep 17 00:00:00 2001 From: donneypr <donatopraba...@gmail.com> Date: Mon, 8 Sep 2025 20:05:11 -0400 Subject: [PATCH 2/8] [clang][x86][headers] Make AVX2 add/sub intrinsics constexpr --- clang/lib/Headers/avx2intrin.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index fc12a9bf15e57..37e08b0e48493 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -279,7 +279,7 @@ _mm256_packus_epi32(__m256i __V1, __m256i __V2) /// \param __b /// A 256-bit integer vector containing one of the source operands. /// \returns A 256-bit integer vector containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_add_epi8(__m256i __a, __m256i __b) { return (__m256i)((__v32qu)__a + (__v32qu)__b); @@ -298,7 +298,7 @@ _mm256_add_epi8(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_add_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hu)__a + (__v16hu)__b); @@ -317,7 +317,7 @@ _mm256_add_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [8 x i32] containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_add_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8su)__a + (__v8su)__b); @@ -336,7 +336,7 @@ _mm256_add_epi32(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [4 x i64] containing one of the source operands. /// \returns A 256-bit vector of [4 x i64] containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_add_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a + (__v4du)__b); @@ -2460,7 +2460,7 @@ _mm256_srl_epi64(__m256i __a, __m128i __count) /// \param __b /// A 256-bit integer vector containing the subtrahends. /// \returns A 256-bit integer vector containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sub_epi8(__m256i __a, __m256i __b) { return (__m256i)((__v32qu)__a - (__v32qu)__b); @@ -2487,7 +2487,7 @@ _mm256_sub_epi8(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing the subtrahends. /// \returns A 256-bit vector of [16 x i16] containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sub_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hu)__a - (__v16hu)__b); @@ -2513,7 +2513,7 @@ _mm256_sub_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [8 x i32] containing the subtrahends. /// \returns A 256-bit vector of [8 x i32] containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sub_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8su)__a - (__v8su)__b); @@ -2539,7 +2539,7 @@ _mm256_sub_epi32(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [4 x i64] containing the subtrahends. /// \returns A 256-bit vector of [4 x i64] containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sub_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a - (__v4du)__b); >From dca479e3bdf392f36856b9a7bc79b2bfd270875f Mon Sep 17 00:00:00 2001 From: donneypr <donatopraba...@gmail.com> Date: Mon, 8 Sep 2025 20:19:28 -0400 Subject: [PATCH 3/8] [clang][x86][headers] Make AVX-512 epi64 add/sub intrinsics constexpr Fixes #152490 --- clang/lib/Headers/avx512fintrin.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 7ba09039cd826..345d0e0e3898a 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -859,7 +859,7 @@ _mm512_add_epi64(__m512i __A, __m512i __B) { return (__m512i) ((__v8du) __A + (__v8du) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -867,7 +867,7 @@ _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -875,13 +875,13 @@ _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_epi64 (__m512i __A, __m512i __B) { return (__m512i) ((__v8du) __A - (__v8du) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -889,7 +889,7 @@ _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -897,7 +897,7 @@ _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi32 (__m512i __A, __m512i __B) { return (__m512i) ((__v16su) __A + (__v16su) __B); @@ -919,7 +919,7 @@ _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B) (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_epi32 (__m512i __A, __m512i __B) { return (__m512i) ((__v16su) __A - (__v16su) __B); >From 0d4b8005b3b336aa035f779c097289a5ded656d0 Mon Sep 17 00:00:00 2001 From: donneypr <donatopraba...@gmail.com> Date: Mon, 8 Sep 2025 21:48:53 -0400 Subject: [PATCH 4/8] [clang][x86][headers] Make AVX-512F masked epi32 add/sub constexpr Fixes llvm/llvm-project#152490 --- clang/lib/Headers/avx512fintrin.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 345d0e0e3898a..ff7b03b20df6f 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -903,7 +903,7 @@ _mm512_add_epi32 (__m512i __A, __m512i __B) return (__m512i) ((__v16su) __A + (__v16su) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -911,7 +911,7 @@ _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -925,7 +925,7 @@ _mm512_sub_epi32 (__m512i __A, __m512i __B) return (__m512i) ((__v16su) __A - (__v16su) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -933,7 +933,7 @@ _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, >From c70762ab4ac729f18ba548bb3fd3ccefd5b20be1 Mon Sep 17 00:00:00 2001 From: donneypr <donatopraba...@gmail.com> Date: Mon, 8 Sep 2025 21:53:07 -0400 Subject: [PATCH 5/8] [clang][x86][headers] Make AVX-512BW masked add/sub (epi8/epi16) constexpr Fixes llvm/llvm-project#152490. --- clang/lib/Headers/avx512bwintrin.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 42fce7d89e1bb..0fc2f5bf2eda7 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -369,76 +369,76 @@ static __inline__ void __DEFAULT_FN_ATTRS _store_mask64(__mmask64 *__A, #define _mm512_mask_cmpneq_epu16_mask(k, A, B) \ _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi8 (__m512i __A, __m512i __B) { return (__m512i) ((__v64qu) __A + (__v64qu) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_add_epi8(__A, __B), (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_add_epi8(__A, __B), (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_epi8 (__m512i __A, __m512i __B) { return (__m512i) ((__v64qu) __A - (__v64qu) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_sub_epi8(__A, __B), (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_sub_epi8(__A, __B), (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi16 (__m512i __A, __m512i __B) { return (__m512i) ((__v32hu) __A + (__v32hu) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_add_epi16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_add_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_epi16 (__m512i __A, __m512i __B) { return (__m512i) ((__v32hu) __A - (__v32hu) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_sub_epi16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_sub_epi16(__A, __B), >From 8b22bd7631aec621189d34f97bcf667141aeded3 Mon Sep 17 00:00:00 2001 From: donneypr <donatopraba...@gmail.com> Date: Tue, 9 Sep 2025 07:51:26 -0400 Subject: [PATCH 6/8] clang-format: touched lines in x86 add/sub headers for #152490 --- clang/lib/Headers/avx2intrin.h | 24 ++++++++-------------- clang/lib/Headers/avx512bwintrin.h | 10 ++++----- clang/lib/Headers/avx512fintrin.h | 33 ++++++++++-------------------- clang/lib/Headers/emmintrin.h | 16 +++++++-------- 4 files changed, 32 insertions(+), 51 deletions(-) diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index 37e08b0e48493..49ae71539381a 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -280,8 +280,7 @@ _mm256_packus_epi32(__m256i __V1, __m256i __V2) /// A 256-bit integer vector containing one of the source operands. /// \returns A 256-bit integer vector containing the sums. static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_add_epi8(__m256i __a, __m256i __b) -{ +_mm256_add_epi8(__m256i __a, __m256i __b) { return (__m256i)((__v32qu)__a + (__v32qu)__b); } @@ -299,8 +298,7 @@ _mm256_add_epi8(__m256i __a, __m256i __b) /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the sums. static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_add_epi16(__m256i __a, __m256i __b) -{ +_mm256_add_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hu)__a + (__v16hu)__b); } @@ -318,8 +316,7 @@ _mm256_add_epi16(__m256i __a, __m256i __b) /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [8 x i32] containing the sums. static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_add_epi32(__m256i __a, __m256i __b) -{ +_mm256_add_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8su)__a + (__v8su)__b); } @@ -337,8 +334,7 @@ _mm256_add_epi32(__m256i __a, __m256i __b) /// A 256-bit vector of [4 x i64] containing one of the source operands. /// \returns A 256-bit vector of [4 x i64] containing the sums. static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_add_epi64(__m256i __a, __m256i __b) -{ +_mm256_add_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a + (__v4du)__b); } @@ -2461,8 +2457,7 @@ _mm256_srl_epi64(__m256i __a, __m128i __count) /// A 256-bit integer vector containing the subtrahends. /// \returns A 256-bit integer vector containing the differences. static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_sub_epi8(__m256i __a, __m256i __b) -{ +_mm256_sub_epi8(__m256i __a, __m256i __b) { return (__m256i)((__v32qu)__a - (__v32qu)__b); } @@ -2488,8 +2483,7 @@ _mm256_sub_epi8(__m256i __a, __m256i __b) /// A 256-bit vector of [16 x i16] containing the subtrahends. /// \returns A 256-bit vector of [16 x i16] containing the differences. static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_sub_epi16(__m256i __a, __m256i __b) -{ +_mm256_sub_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hu)__a - (__v16hu)__b); } @@ -2514,8 +2508,7 @@ _mm256_sub_epi16(__m256i __a, __m256i __b) /// A 256-bit vector of [8 x i32] containing the subtrahends. /// \returns A 256-bit vector of [8 x i32] containing the differences. static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_sub_epi32(__m256i __a, __m256i __b) -{ +_mm256_sub_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8su)__a - (__v8su)__b); } @@ -2540,8 +2533,7 @@ _mm256_sub_epi32(__m256i __a, __m256i __b) /// A 256-bit vector of [4 x i64] containing the subtrahends. /// \returns A 256-bit vector of [4 x i64] containing the differences. static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_sub_epi64(__m256i __a, __m256i __b) -{ +_mm256_sub_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a - (__v4du)__b); } diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 0fc2f5bf2eda7..c5e20baf38723 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -369,8 +369,8 @@ static __inline__ void __DEFAULT_FN_ATTRS _store_mask64(__mmask64 *__A, #define _mm512_mask_cmpneq_epu16_mask(k, A, B) \ _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) -static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_add_epi8 (__m512i __A, __m512i __B) { +static __inline__ __m512i + __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi8(__m512i __A, __m512i __B) { return (__m512i) ((__v64qu) __A + (__v64qu) __B); } @@ -389,7 +389,7 @@ _mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) { } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_sub_epi8 (__m512i __A, __m512i __B) { +_mm512_sub_epi8(__m512i __A, __m512i __B) { return (__m512i) ((__v64qu) __A - (__v64qu) __B); } @@ -408,7 +408,7 @@ _mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) { } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_add_epi16 (__m512i __A, __m512i __B) { +_mm512_add_epi16(__m512i __A, __m512i __B) { return (__m512i) ((__v32hu) __A + (__v32hu) __B); } @@ -427,7 +427,7 @@ _mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) { } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_sub_epi16 (__m512i __A, __m512i __B) { +_mm512_sub_epi16(__m512i __A, __m512i __B) { return (__m512i) ((__v32hu) __A - (__v32hu) __B); } diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index ff7b03b20df6f..7f6cebbe3bc7c 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -860,82 +860,71 @@ _mm512_add_epi64(__m512i __A, __m512i __B) { } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) -{ +_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_add_epi64(__A, __B), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B) -{ +_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_add_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_sub_epi64 (__m512i __A, __m512i __B) -{ +_mm512_sub_epi64(__m512i __A, __m512i __B) { return (__m512i) ((__v8du) __A - (__v8du) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) -{ +_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_sub_epi64(__A, __B), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B) -{ +_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_sub_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_add_epi32 (__m512i __A, __m512i __B) -{ +_mm512_add_epi32(__m512i __A, __m512i __B) { return (__m512i) ((__v16su) __A + (__v16su) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) -{ +_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_add_epi32(__A, __B), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B) -{ +_mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_add_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_sub_epi32 (__m512i __A, __m512i __B) -{ +_mm512_sub_epi32(__m512i __A, __m512i __B) { return (__m512i) ((__v16su) __A - (__v16su) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) -{ +_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_sub_epi32(__A, __B), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) -{ +_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_sub_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index ec87180667234..9345b72ab5da4 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -2059,8 +2059,8 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pd(double *__dp, /// A 128-bit vector of [16 x i8]. /// \returns A 128-bit vector of [16 x i8] containing the sums of both /// parameters. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8_CONSTEXPR(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_add_epi8_CONSTEXPR(__m128i __a, __m128i __b) { return (__m128i)((__v16qu)__a + (__v16qu)__b); } @@ -2080,8 +2080,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8_CONSTEXPR(__m128i __a, /// A 128-bit vector of [8 x i16]. /// \returns A 128-bit vector of [8 x i16] containing the sums of both /// parameters. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16_CONSTEXPR(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_add_epi16_CONSTEXPR(__m128i __a, __m128i __b) { return (__m128i)((__v8hu)__a + (__v8hu)__b); } @@ -2498,8 +2498,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sad_epu8(__m128i __a, /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the differences of the values /// in the operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8_CONSTEXPR(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sub_epi8_CONSTEXPR(__m128i __a, __m128i __b) { return (__m128i)((__v16qu)__a - (__v16qu)__b); } @@ -2515,8 +2515,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8_CONSTEXPR(__m128i __a, /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the differences of the values /// in the operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16_CONSTEXPR(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sub_epi16_CONSTEXPR(__m128i __a, __m128i __b) { return (__m128i)((__v8hu)__a - (__v8hu)__b); } >From 80dfc9d0c128673439da441b268e6aab1819e185 Mon Sep 17 00:00:00 2001 From: donneypr <donatopraba...@gmail.com> Date: Tue, 9 Sep 2025 12:21:56 -0400 Subject: [PATCH 7/8] X86: add constexpr tests for add/sub intrinsics --- clang/test/CodeGen/X86/avx2-builtins.c | 66 ++++++++++++++++++ clang/test/CodeGen/X86/avx512bw-builtins.c | 60 ++++++++++++++++ clang/test/CodeGen/X86/avx512f-builtins.c | 80 ++++++++++++++++++++++ clang/test/CodeGen/X86/sse2-builtins.c | 28 ++++++++ 4 files changed, 234 insertions(+) diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index aeb1aee4ea946..9757c444294d3 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -48,24 +48,57 @@ __m256i test_mm256_add_epi8(__m256i a, __m256i b) { return _mm256_add_epi8(a, b); } +TEST_CONSTEXPR( + match_v32qi( + _mm256_add_epi8( + (__m256i)(__v32qi){ + 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, + 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 }, + (__m256i)(__v32qi){ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 }), + 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, + 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)); + __m256i test_mm256_add_epi16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_add_epi16 // CHECK: add <16 x i16> return _mm256_add_epi16(a, b); } +TEST_CONSTEXPR( + match_v16hi( + _mm256_add_epi16( + (__m256i)(__v16hi){ 0, 2, 4, 6, 8,10,12,14, 16,18,20,22,24,26,28,30 }, + (__m256i)(__v16hi){ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }), + 1, 3, 5, 7, 9,11,13,15, 17,19,21,23,25,27,29,31)); + __m256i test_mm256_add_epi32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_add_epi32 // CHECK: add <8 x i32> return _mm256_add_epi32(a, b); } +TEST_CONSTEXPR( + match_v8si( + _mm256_add_epi32( + (__m256i)(__v8si){1,2,3,4,5,6,7,8}, + (__m256i)(__v8si){8,7,6,5,4,3,2,1}), + 9,9,9,9,9,9,9,9)); + __m256i test_mm256_add_epi64(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_add_epi64 // CHECK: add <4 x i64> return _mm256_add_epi64(a, b); } +TEST_CONSTEXPR( + match_v4di( + _mm256_add_epi64( + (__m256i)(__v4di){10,20,30,40}, + (__m256i)(__v4di){ 1, 3, 5, 7 }), + 11,23,35,47)); + __m256i test_mm256_adds_epi8(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_adds_epi8 // CHECK: call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) @@ -1360,24 +1393,57 @@ __m256i test_mm256_sub_epi8(__m256i a, __m256i b) { return _mm256_sub_epi8(a, b); } +TEST_CONSTEXPR( + match_v32qi( + _mm256_sub_epi8( + (__m256i)(__v32qi){ + 0,1,2,3,4,5,6,7, 8, 9,10,11,12,13,14,15, + 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 }, + (__m256i)(__v32qi){ + 1,1,1,1,1,1,1,1, 1, 1, 1, 1, 1, 1, 1, 1, + 1,1,1,1,1,1,1,1, 1, 1, 1, 1, 1, 1, 1, 1 }), + -1,0,1,2,3,4,5,6, 7, 8, 9,10,11,12,13,14, + 15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30)); + __m256i test_mm256_sub_epi16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_sub_epi16 // CHECK: sub <16 x i16> return _mm256_sub_epi16(a, b); } +TEST_CONSTEXPR( + match_v16hi( + _mm256_sub_epi16( + (__m256i)(__v16hi){ 0, 2, 4, 6, 8,10,12,14, 16,18,20,22,24,26,28,30 }, + (__m256i)(__v16hi){ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }), + -1, 1, 3, 5, 7, 9,11,13, 15,17,19,21,23,25,27,29)); + __m256i test_mm256_sub_epi32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_sub_epi32 // CHECK: sub <8 x i32> return _mm256_sub_epi32(a, b); } +TEST_CONSTEXPR( + match_v8si( + _mm256_sub_epi32( + (__m256i)(__v8si){10,20,30,40,50,60,70,80}, + (__m256i)(__v8si){ 1, 2, 3, 4, 5, 6, 7, 8}), + 9,18,27,36,45,54,63,72)); + __m256i test_mm256_sub_epi64(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_sub_epi64 // CHECK: sub <4 x i64> return _mm256_sub_epi64(a, b); } +TEST_CONSTEXPR( + match_v4di( + _mm256_sub_epi64( + (__m256i)(__v4di){10,20,30,40}, + (__m256i)(__v4di){ 1, 3, 5, 7 }), + 9,17,25,33)); + __m256i test_mm256_subs_epi8(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_subs_epi8 // CHECK: call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index 9d605efcbd758..2704999b3beff 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -752,6 +752,24 @@ __m512i test_mm512_add_epi8 (__m512i __A, __m512i __B) { return _mm512_add_epi8(__A,__B); } +TEST_CONSTEXPR( + match_v64qi( + _mm512_add_epi8( + (__m512i)(__v64qi){ + 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, + 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, + 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, + 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63 }, + (__m512i)(__v64qi){ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 }), + 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, + 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32, + 33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48, + 49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)); + __m512i test_mm512_mask_add_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_mask_add_epi8 //CHECK: add <64 x i8> %{{.*}}, %{{.*}} @@ -772,6 +790,24 @@ __m512i test_mm512_sub_epi8 (__m512i __A, __m512i __B) { return _mm512_sub_epi8(__A, __B); } +TEST_CONSTEXPR( + match_v64qi( + _mm512_sub_epi8( + (__m512i)(__v64qi){ + 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, + 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, + 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, + 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63 }, + (__m512i)(__v64qi){ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 }), + -1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14, + 15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30, + 31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46, + 47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62)); + __m512i test_mm512_mask_sub_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_mask_sub_epi8 //CHECK: sub <64 x i8> %{{.*}}, %{{.*}} @@ -792,6 +828,18 @@ __m512i test_mm512_add_epi16 (__m512i __A, __m512i __B) { return _mm512_add_epi16(__A, __B); } +TEST_CONSTEXPR( + match_v32hi( + _mm512_add_epi16( + (__m512i)(__v32hi){ + 0, 2, 4, 6, 8,10,12,14, 16,18,20,22,24,26,28,30, + 32,34,36,38,40,42,44,46, 48,50,52,54,56,58,60,62 }, + (__m512i)(__v32hi){ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }), + 1, 3, 5, 7, 9,11,13,15, 17,19,21,23,25,27,29,31, + 33,35,37,39,41,43,45,47, 49,51,53,55,57,59,61,63)); + __m512i test_mm512_mask_add_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_mask_add_epi16 //CHECK: add <32 x i16> %{{.*}}, %{{.*}} @@ -812,6 +860,18 @@ __m512i test_mm512_sub_epi16 (__m512i __A, __m512i __B) { return _mm512_sub_epi16(__A, __B); } +TEST_CONSTEXPR( + match_v32hi( + _mm512_sub_epi16( + (__m512i)(__v32hi){ + 0, 2, 4, 6, 8,10,12,14, 16,18,20,22,24,26,28,30, + 32,34,36,38,40,42,44,46, 48,50,52,54,56,58,60,62 }, + (__m512i)(__v32hi){ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }), + -1, 1, 3, 5, 7, 9,11,13, 15,17,19,21,23,25,27,29, + 31,33,35,37,39,41,43,45, 47,49,51,53,55,57,59,61)); + __m512i test_mm512_mask_sub_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_mask_sub_epi16 //CHECK: sub <32 x i16> %{{.*}}, %{{.*}} diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index f93216e546a63..055e771357525 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -3006,6 +3006,14 @@ __m512i test_mm512_maskz_sub_epi32 (__mmask16 __k,__m512i __A, __m512i __B) { return _mm512_maskz_sub_epi32(__k,__A,__B); } +TEST_CONSTEXPR( + match_v16si( + _mm512_maskz_sub_epi32( + K, + (__m512i)(__v16si){10,11,12,13,14,15,16,17, 100,200,300,400,500,600,700,800}, + (__m512i)(__v16si){ 1, 2, 3, 4, 5, 6, 7, 8, 9, 8, 7, 6, 5, 4, 3, 2}), + 9,9,9,9,9,9,9,9, 0,0,0,0,0,0,0,0)); + __m512i test_mm512_mask_sub_epi32 (__mmask16 __k,__m512i __A, __m512i __B, __m512i __src) { //CHECK-LABEL: test_mm512_mask_sub_epi32 @@ -3014,12 +3022,24 @@ __m512i test_mm512_mask_sub_epi32 (__mmask16 __k,__m512i __A, __m512i __B, return _mm512_mask_sub_epi32(__src,__k,__A,__B); } +TEST_CONSTEXPR( + match_v16si( + _mm512_mask_sub_epi32(SRC, K, A, B), + 9,9,9,9,9,9,9,9, 42,42,42,42,42,42,42,42)); + __m512i test_mm512_sub_epi32(__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_sub_epi32 //CHECK: sub <16 x i32> return _mm512_sub_epi32(__A,__B); } +TEST_CONSTEXPR( + match_v16si( + _mm512_sub_epi32( + (__m512i)(__v16si){10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25}, + (__m512i)(__v16si){ 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16}), + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9)); + __m512i test_mm512_maskz_sub_epi64 (__mmask8 __k,__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_maskz_sub_epi64 //CHECK: sub <8 x i64> %{{.*}}, %{{.*}} @@ -3027,6 +3047,14 @@ __m512i test_mm512_maskz_sub_epi64 (__mmask8 __k,__m512i __A, __m512i __B) { return _mm512_maskz_sub_epi64(__k,__A,__B); } +TEST_CONSTEXPR( + match_v8di( + _mm512_maskz_sub_epi64( + K, + (__m512i)(__v8di){100,200,300,400,500,600,700,800}, + (__m512i)(__v8di){ 1, 2, 3, 4, 5, 6, 7, 8}), + 99,198,297,396, 0,0,0,0)); + __m512i test_mm512_mask_sub_epi64 (__mmask8 __k,__m512i __A, __m512i __B, __m512i __src) { //CHECK-LABEL: test_mm512_mask_sub_epi64 @@ -3035,12 +3063,24 @@ __m512i test_mm512_mask_sub_epi64 (__mmask8 __k,__m512i __A, __m512i __B, return _mm512_mask_sub_epi64(__src,__k,__A,__B); } +TEST_CONSTEXPR( + match_v8di( + _mm512_mask_sub_epi64(SRC, K, A, B), + 99,198,297,396, -1,-1,-1,-1)); + __m512i test_mm512_sub_epi64(__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_sub_epi64 //CHECK: sub <8 x i64> return _mm512_sub_epi64(__A,__B); } +TEST_CONSTEXPR( + match_v8di( + _mm512_sub_epi64( + (__m512i)(__v8di){10,20,30,40,50,60,70,80}, + (__m512i)(__v8di){ 1, 3, 5, 7, 9,11,13,15}), + 9,17,25,33,41,49,57,65)); + __m512i test_mm512_maskz_add_epi32 (__mmask16 __k,__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_maskz_add_epi32 //CHECK: add <16 x i32> %{{.*}}, %{{.*}} @@ -3048,6 +3088,14 @@ __m512i test_mm512_maskz_add_epi32 (__mmask16 __k,__m512i __A, __m512i __B) { return _mm512_maskz_add_epi32(__k,__A,__B); } +TEST_CONSTEXPR( + match_v16si( + _mm512_maskz_add_epi32( + K, + (__m512i)(__v16si){ 0, 1, 2, 3, 4, 5, 6, 7, 10,20,30,40,50,60,70,80 }, + (__m512i)(__v16si){ 1, 1, 1, 1, 1, 1, 1, 1, 9, 8, 7, 6, 5, 4, 3, 2 }), + 1,2,3,4,5,6,7,8, 0,0,0,0,0,0,0,0)); + __m512i test_mm512_mask_add_epi32 (__mmask16 __k,__m512i __A, __m512i __B, __m512i __src) { //CHECK-LABEL: test_mm512_mask_add_epi32 @@ -3056,12 +3104,24 @@ __m512i test_mm512_mask_add_epi32 (__mmask16 __k,__m512i __A, __m512i __B, return _mm512_mask_add_epi32(__src,__k,__A,__B); } +TEST_CONSTEXPR( + match_v16si( + _mm512_mask_add_epi32(SRC, K, A, B), + 1,2,3,4,5,6,7,8, 100,100,100,100,100,100,100,100)); + __m512i test_mm512_add_epi32(__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_add_epi32 //CHECK: add <16 x i32> return _mm512_add_epi32(__A,__B); } +TEST_CONSTEXPR( + match_v16si( + _mm512_add_epi32( + (__m512i)(__v16si){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15 }, + (__m512i)(__v16si){ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }), + 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)); + __m512i test_mm512_maskz_add_epi64 (__mmask8 __k,__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_maskz_add_epi64 //CHECK: add <8 x i64> %{{.*}}, %{{.*}} @@ -3069,6 +3129,14 @@ __m512i test_mm512_maskz_add_epi64 (__mmask8 __k,__m512i __A, __m512i __B) { return _mm512_maskz_add_epi64(__k,__A,__B); } +TEST_CONSTEXPR( + match_v8di( + _mm512_maskz_add_epi64( + K, + (__m512i)(__v8di){10,20,30,40,50,60,70,80}, + (__m512i)(__v8di){ 1, 2, 3, 4, 1, 2, 3, 4}), + 11,22,33,44, 0,0,0,0)); + __m512i test_mm512_mask_add_epi64 (__mmask8 __k,__m512i __A, __m512i __B, __m512i __src) { //CHECK-LABEL: test_mm512_mask_add_epi64 @@ -3077,12 +3145,24 @@ __m512i test_mm512_mask_add_epi64 (__mmask8 __k,__m512i __A, __m512i __B, return _mm512_mask_add_epi64(__src,__k,__A,__B); } +TEST_CONSTEXPR( + match_v8di( + _mm512_mask_add_epi64(SRC, K, A, B), + 11,22,33,44, 100,100,100,100)); + __m512i test_mm512_add_epi64(__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_add_epi64 //CHECK: add <8 x i64> return _mm512_add_epi64(__A,__B); } +TEST_CONSTEXPR( + match_v8di( + _mm512_add_epi64( + (__m512i)(__v8di){10,20,30,40,50,60,70,80}, + (__m512i)(__v8di){ 1, 1, 1, 1, 1, 1, 1, 1}), + 11,21,31,41,51,61,71,81)); + __m512i test_mm512_mul_epi32(__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_mul_epi32 //CHECK: shl <8 x i64> %{{.*}}, splat (i64 32) diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index 0ba32bb230cdd..411764621fcd6 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -32,12 +32,26 @@ __m128i test_mm_add_epi8(__m128i A, __m128i B) { return _mm_add_epi8(A, B); } +TEST_CONSTEXPR( + match_v16qi( + _mm_add_epi8( + (__m128i)(__v16qi){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15 }, + (__m128i)(__v16qi){ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }), + 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16)); + __m128i test_mm_add_epi16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_add_epi16 // CHECK: add <8 x i16> return _mm_add_epi16(A, B); } +TEST_CONSTEXPR( + match_v8hi( + _mm_add_epi16( + (__m128i)(__v8hi){ 0, 2, 4, 6, 8,10,12,14 }, + (__m128i)(__v8hi){ 1, 1, 1, 1, 1, 1, 1, 1 }), + 1, 3, 5, 7, 9, 11, 13, 15)); + __m128i test_mm_add_epi32(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_add_epi32 // CHECK: add <4 x i32> @@ -1715,12 +1729,26 @@ __m128i test_mm_sub_epi8(__m128i A, __m128i B) { return _mm_sub_epi8(A, B); } +TEST_CONSTEXPR( + match_v16qi( + _mm_sub_epi8( + (__m128i)(__v16qi){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15 }, + (__m128i)(__v16qi){ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }), + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14)); + __m128i test_mm_sub_epi16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_sub_epi16 // CHECK: sub <8 x i16> return _mm_sub_epi16(A, B); } +TEST_CONSTEXPR( + match_v8hi( + _mm_sub_epi16( + (__m128i)(__v8hi){ 0, 2, 4, 6, 8,10,12,14 }, + (__m128i)(__v8hi){ 1, 1, 1, 1, 1, 1, 1, 1 }), + -1, 1, 3, 5, 7, 9, 11, 13)); + __m128i test_mm_sub_epi32(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_sub_epi32 // CHECK: sub <4 x i32> >From 450b4a04c4c9eb210a2df59eda889ed942ef103a Mon Sep 17 00:00:00 2001 From: donneypr <donatopraba...@gmail.com> Date: Tue, 9 Sep 2025 13:47:25 -0400 Subject: [PATCH 8/8] Revert avx512f-builtins.c to upstream/main version --- clang/test/CodeGen/X86/avx512f-builtins.c | 80 ----------------------- 1 file changed, 80 deletions(-) diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 055e771357525..f93216e546a63 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -3006,14 +3006,6 @@ __m512i test_mm512_maskz_sub_epi32 (__mmask16 __k,__m512i __A, __m512i __B) { return _mm512_maskz_sub_epi32(__k,__A,__B); } -TEST_CONSTEXPR( - match_v16si( - _mm512_maskz_sub_epi32( - K, - (__m512i)(__v16si){10,11,12,13,14,15,16,17, 100,200,300,400,500,600,700,800}, - (__m512i)(__v16si){ 1, 2, 3, 4, 5, 6, 7, 8, 9, 8, 7, 6, 5, 4, 3, 2}), - 9,9,9,9,9,9,9,9, 0,0,0,0,0,0,0,0)); - __m512i test_mm512_mask_sub_epi32 (__mmask16 __k,__m512i __A, __m512i __B, __m512i __src) { //CHECK-LABEL: test_mm512_mask_sub_epi32 @@ -3022,24 +3014,12 @@ __m512i test_mm512_mask_sub_epi32 (__mmask16 __k,__m512i __A, __m512i __B, return _mm512_mask_sub_epi32(__src,__k,__A,__B); } -TEST_CONSTEXPR( - match_v16si( - _mm512_mask_sub_epi32(SRC, K, A, B), - 9,9,9,9,9,9,9,9, 42,42,42,42,42,42,42,42)); - __m512i test_mm512_sub_epi32(__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_sub_epi32 //CHECK: sub <16 x i32> return _mm512_sub_epi32(__A,__B); } -TEST_CONSTEXPR( - match_v16si( - _mm512_sub_epi32( - (__m512i)(__v16si){10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25}, - (__m512i)(__v16si){ 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16}), - 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9)); - __m512i test_mm512_maskz_sub_epi64 (__mmask8 __k,__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_maskz_sub_epi64 //CHECK: sub <8 x i64> %{{.*}}, %{{.*}} @@ -3047,14 +3027,6 @@ __m512i test_mm512_maskz_sub_epi64 (__mmask8 __k,__m512i __A, __m512i __B) { return _mm512_maskz_sub_epi64(__k,__A,__B); } -TEST_CONSTEXPR( - match_v8di( - _mm512_maskz_sub_epi64( - K, - (__m512i)(__v8di){100,200,300,400,500,600,700,800}, - (__m512i)(__v8di){ 1, 2, 3, 4, 5, 6, 7, 8}), - 99,198,297,396, 0,0,0,0)); - __m512i test_mm512_mask_sub_epi64 (__mmask8 __k,__m512i __A, __m512i __B, __m512i __src) { //CHECK-LABEL: test_mm512_mask_sub_epi64 @@ -3063,24 +3035,12 @@ __m512i test_mm512_mask_sub_epi64 (__mmask8 __k,__m512i __A, __m512i __B, return _mm512_mask_sub_epi64(__src,__k,__A,__B); } -TEST_CONSTEXPR( - match_v8di( - _mm512_mask_sub_epi64(SRC, K, A, B), - 99,198,297,396, -1,-1,-1,-1)); - __m512i test_mm512_sub_epi64(__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_sub_epi64 //CHECK: sub <8 x i64> return _mm512_sub_epi64(__A,__B); } -TEST_CONSTEXPR( - match_v8di( - _mm512_sub_epi64( - (__m512i)(__v8di){10,20,30,40,50,60,70,80}, - (__m512i)(__v8di){ 1, 3, 5, 7, 9,11,13,15}), - 9,17,25,33,41,49,57,65)); - __m512i test_mm512_maskz_add_epi32 (__mmask16 __k,__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_maskz_add_epi32 //CHECK: add <16 x i32> %{{.*}}, %{{.*}} @@ -3088,14 +3048,6 @@ __m512i test_mm512_maskz_add_epi32 (__mmask16 __k,__m512i __A, __m512i __B) { return _mm512_maskz_add_epi32(__k,__A,__B); } -TEST_CONSTEXPR( - match_v16si( - _mm512_maskz_add_epi32( - K, - (__m512i)(__v16si){ 0, 1, 2, 3, 4, 5, 6, 7, 10,20,30,40,50,60,70,80 }, - (__m512i)(__v16si){ 1, 1, 1, 1, 1, 1, 1, 1, 9, 8, 7, 6, 5, 4, 3, 2 }), - 1,2,3,4,5,6,7,8, 0,0,0,0,0,0,0,0)); - __m512i test_mm512_mask_add_epi32 (__mmask16 __k,__m512i __A, __m512i __B, __m512i __src) { //CHECK-LABEL: test_mm512_mask_add_epi32 @@ -3104,24 +3056,12 @@ __m512i test_mm512_mask_add_epi32 (__mmask16 __k,__m512i __A, __m512i __B, return _mm512_mask_add_epi32(__src,__k,__A,__B); } -TEST_CONSTEXPR( - match_v16si( - _mm512_mask_add_epi32(SRC, K, A, B), - 1,2,3,4,5,6,7,8, 100,100,100,100,100,100,100,100)); - __m512i test_mm512_add_epi32(__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_add_epi32 //CHECK: add <16 x i32> return _mm512_add_epi32(__A,__B); } -TEST_CONSTEXPR( - match_v16si( - _mm512_add_epi32( - (__m512i)(__v16si){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15 }, - (__m512i)(__v16si){ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }), - 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)); - __m512i test_mm512_maskz_add_epi64 (__mmask8 __k,__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_maskz_add_epi64 //CHECK: add <8 x i64> %{{.*}}, %{{.*}} @@ -3129,14 +3069,6 @@ __m512i test_mm512_maskz_add_epi64 (__mmask8 __k,__m512i __A, __m512i __B) { return _mm512_maskz_add_epi64(__k,__A,__B); } -TEST_CONSTEXPR( - match_v8di( - _mm512_maskz_add_epi64( - K, - (__m512i)(__v8di){10,20,30,40,50,60,70,80}, - (__m512i)(__v8di){ 1, 2, 3, 4, 1, 2, 3, 4}), - 11,22,33,44, 0,0,0,0)); - __m512i test_mm512_mask_add_epi64 (__mmask8 __k,__m512i __A, __m512i __B, __m512i __src) { //CHECK-LABEL: test_mm512_mask_add_epi64 @@ -3145,24 +3077,12 @@ __m512i test_mm512_mask_add_epi64 (__mmask8 __k,__m512i __A, __m512i __B, return _mm512_mask_add_epi64(__src,__k,__A,__B); } -TEST_CONSTEXPR( - match_v8di( - _mm512_mask_add_epi64(SRC, K, A, B), - 11,22,33,44, 100,100,100,100)); - __m512i test_mm512_add_epi64(__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_add_epi64 //CHECK: add <8 x i64> return _mm512_add_epi64(__A,__B); } -TEST_CONSTEXPR( - match_v8di( - _mm512_add_epi64( - (__m512i)(__v8di){10,20,30,40,50,60,70,80}, - (__m512i)(__v8di){ 1, 1, 1, 1, 1, 1, 1, 1}), - 11,21,31,41,51,61,71,81)); - __m512i test_mm512_mul_epi32(__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_mul_epi32 //CHECK: shl <8 x i64> %{{.*}}, splat (i64 32) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits