https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/153293
>From c38db4cec7342ea978b7d4849fe574b5b17638df Mon Sep 17 00:00:00 2001 From: Pedro Lobo <pedro.l...@tecnico.ulisboa.pt> Date: Tue, 12 Aug 2025 17:42:37 +0100 Subject: [PATCH 1/3] [Headers][X86] Allow pmuludq/pmuldq to be used in constexpr Adds `constexpr` support for `pmuludq` and `pmuldq` intrinsics. --- clang/include/clang/Basic/BuiltinsX86.td | 20 +++++++++--- clang/lib/AST/ExprConstant.cpp | 37 +++++++++++++++++++++++ clang/lib/Headers/avx2intrin.h | 10 +++--- clang/lib/Headers/avx512fintrin.h | 10 +++--- clang/lib/Headers/emmintrin.h | 14 ++++++--- clang/lib/Headers/smmintrin.h | 4 +-- clang/test/CodeGen/X86/avx2-builtins.c | 2 ++ clang/test/CodeGen/X86/avx512f-builtins.c | 2 ++ clang/test/CodeGen/X86/mmx-builtins.c | 1 + clang/test/CodeGen/X86/sse2-builtins.c | 1 + clang/test/CodeGen/X86/sse41-builtins.c | 1 + 11 files changed, 79 insertions(+), 23 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 4262bdaa7cdd9..8bfb8eaec3865 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -268,7 +268,6 @@ let Header = "emmintrin.h", Attributes = [NoThrow, RequireDeclaration] in { } let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def pmuludq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">; def psraw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; def psrad128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; def psrlw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; @@ -290,6 +289,10 @@ let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] i def psrldqi128_byteshift : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">; } +let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { + def pmuludq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">; +} + let Features = "sse3", Attributes = [NoThrow] in { def monitor : X86Builtin<"void(void const *, unsigned int, unsigned int)">; def mwait : X86Builtin<"void(unsigned int, unsigned int)">; @@ -312,7 +315,6 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] def blendvpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>)">; def blendvps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>)">; def packusdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">; - def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">; def roundps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">; def roundss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">; def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">; @@ -329,6 +331,10 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] def vec_set_v4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int, _Constant int)">; } +let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { + def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">; +} + let Features = "sse4.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { def pcmpistrm128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">; def pcmpistri128 : X86Builtin<"int(_Vector<16, char>, _Vector<16, char>, _Constant char)">; @@ -580,9 +586,7 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i def pmaddubsw256 : X86Builtin<"_Vector<16, short>(_Vector<32, char>, _Vector<32, char>)">; def pmaddwd256 : X86Builtin<"_Vector<8, int>(_Vector<16, short>, _Vector<16, short>)">; def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">; - def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">; def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; - def pmuludq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">; def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">; def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">; def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">; @@ -620,6 +624,11 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i def insert128i256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>, _Constant int)">; } +let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">; + def pmuludq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">; +} + let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def pmulhuw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">; def pmulhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; @@ -1078,6 +1087,9 @@ let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWi def cvtpd2ps512_mask : X86Builtin<"_Vector<8, float>(_Vector<8, double>, _Vector<8, float>, unsigned char, _Constant int)">; def vcvtps2ph512_mask : X86Builtin<"_Vector<16, short>(_Vector<16, float>, _Constant int, _Vector<16, short>, unsigned short)">; def vcvtph2ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, short>, _Vector<16, float>, unsigned short, _Constant int)">; +} + +let Features = "avx512f,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def pmuldq512 : X86Builtin<"_Vector<8, long long int>(_Vector<16, int>, _Vector<16, int>)">; def pmuludq512 : X86Builtin<"_Vector<8, long long int>(_Vector<16, int>, _Vector<16, int>)">; } diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 36dd0f5d7a065..1ab5aedc1cbe9 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11688,6 +11688,43 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case clang::X86::BI__builtin_ia32_pmuldq128: + case clang::X86::BI__builtin_ia32_pmuldq256: + case clang::X86::BI__builtin_ia32_pmuldq512: + case clang::X86::BI__builtin_ia32_pmuludq128: + case clang::X86::BI__builtin_ia32_pmuludq256: + case clang::X86::BI__builtin_ia32_pmuludq512: { + APValue SourceLHS, SourceRHS; + if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) || + !EvaluateAsRValue(Info, E->getArg(1), SourceRHS)) + return false; + + unsigned SourceLen = SourceLHS.getVectorLength(); + SmallVector<APValue, 4> ResultElements; + ResultElements.reserve(SourceLen / 2); + + for (unsigned EltNum = 0; EltNum < SourceLen; EltNum += 2) { + APSInt LHS = SourceLHS.getVectorElt(EltNum).getInt(); + APSInt RHS = SourceRHS.getVectorElt(EltNum).getInt(); + + switch (E->getBuiltinCallee()) { + case clang::X86::BI__builtin_ia32_pmuludq128: + case clang::X86::BI__builtin_ia32_pmuludq256: + case clang::X86::BI__builtin_ia32_pmuludq512: + ResultElements.push_back( + APValue(APSInt(llvm::APIntOps::muluExtended(LHS, RHS), true))); + break; + case clang::X86::BI__builtin_ia32_pmuldq128: + case clang::X86::BI__builtin_ia32_pmuldq256: + case clang::X86::BI__builtin_ia32_pmuldq512: + ResultElements.push_back( + APValue(APSInt(llvm::APIntOps::mulsExtended(LHS, RHS), false))); + break; + } + } + + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } case Builtin::BI__builtin_elementwise_max: case Builtin::BI__builtin_elementwise_min: { APValue SourceLHS, SourceRHS; diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index 48b3cfacc0bb7..fc134958ae5f6 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -1667,9 +1667,8 @@ _mm256_cvtepu32_epi64(__m128i __V) { /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [4 x i64] containing the products. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mul_epi32(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mul_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b); } @@ -1796,9 +1795,8 @@ _mm256_mullo_epi32 (__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [4 x i64] containing the products. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mul_epu32(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mul_epu32(__m256i __a, __m256i __b) { return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b); } diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 634fb78696f04..ed260dcb678f6 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -1417,9 +1417,8 @@ _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B) (__v8di)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_mul_epi32(__m512i __X, __m512i __Y) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mul_epi32(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y); } @@ -1439,9 +1438,8 @@ _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y) (__v8di)_mm512_setzero_si512 ()); } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_mul_epu32(__m512i __X, __m512i __Y) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mul_epu32(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y); } diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 58caded7430bc..51dfca0170313 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -67,6 +67,9 @@ typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16))); #define __trunc64(x) \ (__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0) +#define __zext128(x) \ + (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \ + 1, 2, 3) #define __anyext128(x) \ (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \ 1, -1, -1) @@ -2450,9 +2453,10 @@ _mm_mullo_epi16(__m128i __a, __m128i __b) { /// \param __b /// A 64-bit integer containing one of the source operands. /// \returns A 64-bit integer vector containing the product of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mul_su32(__m64 __a, __m64 __b) { - return __trunc64(__builtin_ia32_pmuludq128((__v4si)__anyext128(__a), - (__v4si)__anyext128(__b))); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_su32(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_pmuludq128((__v4si)__zext128(__a), + (__v4si)__zext128(__b))); } /// Multiplies 32-bit unsigned integer values contained in the lower @@ -2468,8 +2472,8 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mul_su32(__m64 __a, __m64 __b) { /// \param __b /// A [2 x i64] vector containing one of the source operands. /// \returns A [2 x i64] vector containing the product of both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epu32(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_mul_epu32(__m128i __a, __m128i __b) { return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b); } diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h index c5075c419b70b..57d0d329312af 100644 --- a/clang/lib/Headers/smmintrin.h +++ b/clang/lib/Headers/smmintrin.h @@ -567,8 +567,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi32(__m128i __V1, /// A 128-bit vector of [4 x i32]. /// \returns A 128-bit vector of [2 x i64] containing the products of both /// operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epi32(__m128i __V1, - __m128i __V2) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_mul_epi32(__m128i __V1, __m128i __V2) { return (__m128i)__builtin_ia32_pmuldq128((__v4si)__V1, (__v4si)__V2); } diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index 8e591397f17c9..d9cfc4dd012d1 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -907,6 +907,7 @@ __m256i test_mm256_mul_epi32(__m256i a, __m256i b) { // CHECK: mul <4 x i64> %{{.*}}, %{{.*}} return _mm256_mul_epi32(a, b); } +TEST_CONSTEXPR(match_v8si(_mm256_mul_epi32((__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-16, -14, +12, +10, -8, +6, -4, +2}), -16, -1, 36, 0, -40, -1, -28, -1)); __m256i test_mm256_mul_epu32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_mul_epu32 @@ -915,6 +916,7 @@ __m256i test_mm256_mul_epu32(__m256i a, __m256i b) { // CHECK: mul <4 x i64> %{{.*}}, %{{.*}} return _mm256_mul_epu32(a, b); } +TEST_CONSTEXPR(match_v8si(_mm256_mul_epu32((__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-16, -14, +12, +10, -8, +6, -4, +2}), -16, 0, 36, 0, -40, 4, -28, 6)); __m256i test_mm256_mulhi_epu16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_mulhi_epu16 diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index a92227d5aaae1..df8f1421642a5 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -3027,6 +3027,7 @@ __m512i test_mm512_mul_epi32(__m512i __A, __m512i __B) { //CHECK: mul <8 x i64> %{{.*}}, %{{.*}} return _mm512_mul_epi32(__A,__B); } +TEST_CONSTEXPR(match_m512i(_mm512_mul_epi32((__m512i){1, 2, 3, 4, 5, 6, 7, 8}, (__m512i){16, 14, 12, 10, 8, 6, 4, 2}), 16, 28, 36, 40, 40, 36, 28, 16)); __m512i test_mm512_maskz_mul_epi32 (__mmask8 __k,__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_maskz_mul_epi32 @@ -3057,6 +3058,7 @@ __m512i test_mm512_mul_epu32 (__m512i __A, __m512i __B) { //CHECK: mul <8 x i64> %{{.*}}, %{{.*}} return _mm512_mul_epu32(__A,__B); } +TEST_CONSTEXPR(match_m512i(_mm512_mul_epu32((__m512i){1, 2, 3, 4, 5, 6, 7, 8}, (__m512i){16, 14, 12, 10, 8, 6, 4, 2}), 16, 28, 36, 40, 40, 36, 28, 16)); __m512i test_mm512_maskz_mul_epu32 (__mmask8 __k,__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_maskz_mul_epu32 diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c index a4098c8db4f3f..b99e267b276bb 100644 --- a/clang/test/CodeGen/X86/mmx-builtins.c +++ b/clang/test/CodeGen/X86/mmx-builtins.c @@ -389,6 +389,7 @@ __m64 test_mm_mul_su32(__m64 a, __m64 b) { // CHECK: mul <2 x i64> %{{.*}}, %{{.*}} return _mm_mul_su32(a, b); } +TEST_CONSTEXPR(match_v4hi(_mm_mul_su32((__m64)(__v4hi){1, 2, 3, 4}, (__m64)(__v4hi){10, 8, 6, 4}), 10, 28, 16, 0)); __m64 test_mm_mulhi_pi16(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_mulhi_pi16 diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index 49d8b395a0d5a..5334aa3a81310 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -925,6 +925,7 @@ __m128i test_mm_mul_epu32(__m128i A, __m128i B) { // CHECK: mul <2 x i64> %{{.*}}, %{{.*}} return _mm_mul_epu32(A, B); } +TEST_CONSTEXPR(match_v8hi(_mm_mul_epu32((__m128i)(__v8hi){+1, -2, +3, -4, +5, -6, +7, -8}, (__m128i)(__v8hi){-16, -14, +12, +10, -8, +6, -4, +2}), -16, 18, 11, -15, -40, 82, -50, 6)); __m128d test_mm_mul_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_mul_pd diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c index 10deb386d82aa..b7fc582788c8c 100644 --- a/clang/test/CodeGen/X86/sse41-builtins.c +++ b/clang/test/CodeGen/X86/sse41-builtins.c @@ -344,6 +344,7 @@ __m128i test_mm_mul_epi32(__m128i x, __m128i y) { // CHECK: mul <2 x i64> %{{.*}}, %{{.*}} return _mm_mul_epi32(x, y); } +TEST_CONSTEXPR(match_v8hi(_mm_mul_epi32((__m128i)(__v8hi){+1, -2, +3, -4, +5, -6, +7, -8}, (__m128i)(__v8hi){-16, -14, +12, +10, -8, +6, -4, +2}), -16, 18, 26, 0, -40, 82, -42, -1)); __m128i test_mm_mullo_epi32(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_mullo_epi32 >From 6f1e7547d89f812b47e800350bda680231a41ac1 Mon Sep 17 00:00:00 2001 From: Pedro Lobo <pedro.l...@tecnico.ulisboa.pt> Date: Thu, 14 Aug 2025 13:10:34 +0100 Subject: [PATCH 2/3] use more appropriate matches --- clang/test/CodeGen/X86/avx2-builtins.c | 4 ++-- clang/test/CodeGen/X86/avx512f-builtins.c | 4 ++-- clang/test/CodeGen/X86/mmx-builtins.c | 2 +- clang/test/CodeGen/X86/sse2-builtins.c | 2 +- clang/test/CodeGen/X86/sse41-builtins.c | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index d9cfc4dd012d1..b9612b1945f9f 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -907,7 +907,7 @@ __m256i test_mm256_mul_epi32(__m256i a, __m256i b) { // CHECK: mul <4 x i64> %{{.*}}, %{{.*}} return _mm256_mul_epi32(a, b); } -TEST_CONSTEXPR(match_v8si(_mm256_mul_epi32((__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-16, -14, +12, +10, -8, +6, -4, +2}), -16, -1, 36, 0, -40, -1, -28, -1)); +TEST_CONSTEXPR(match_m256i(_mm256_mul_epi32((__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-16, -14, +12, +10, -8, +6, -4, +2}), -16, 36, -40, -28)); __m256i test_mm256_mul_epu32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_mul_epu32 @@ -916,7 +916,7 @@ __m256i test_mm256_mul_epu32(__m256i a, __m256i b) { // CHECK: mul <4 x i64> %{{.*}}, %{{.*}} return _mm256_mul_epu32(a, b); } -TEST_CONSTEXPR(match_v8si(_mm256_mul_epu32((__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-16, -14, +12, +10, -8, +6, -4, +2}), -16, 0, 36, 0, -40, 4, -28, 6)); +TEST_CONSTEXPR(match_m256i(_mm256_mul_epu32((__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-16, -14, +12, +10, -8, +6, -4, +2}), 4294967280, 36, 21474836440, 30064771044)); __m256i test_mm256_mulhi_epu16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_mulhi_epu16 diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index df8f1421642a5..30496b7f2d1fd 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -3027,7 +3027,7 @@ __m512i test_mm512_mul_epi32(__m512i __A, __m512i __B) { //CHECK: mul <8 x i64> %{{.*}}, %{{.*}} return _mm512_mul_epi32(__A,__B); } -TEST_CONSTEXPR(match_m512i(_mm512_mul_epi32((__m512i){1, 2, 3, 4, 5, 6, 7, 8}, (__m512i){16, 14, 12, 10, 8, 6, 4, 2}), 16, 28, 36, 40, 40, 36, 28, 16)); +TEST_CONSTEXPR(match_v8di(_mm512_mul_epi32((__m512i)(__v16si){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m512i)(__v16si){-32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), -32, 84, -120, 140, -144, 132, -104, -60)); __m512i test_mm512_maskz_mul_epi32 (__mmask8 __k,__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_maskz_mul_epi32 @@ -3058,7 +3058,7 @@ __m512i test_mm512_mul_epu32 (__m512i __A, __m512i __B) { //CHECK: mul <8 x i64> %{{.*}}, %{{.*}} return _mm512_mul_epu32(__A,__B); } -TEST_CONSTEXPR(match_m512i(_mm512_mul_epu32((__m512i){1, 2, 3, 4, 5, 6, 7, 8}, (__m512i){16, 14, 12, 10, 8, 6, 4, 2}), 16, 28, 36, 40, 40, 36, 28, 16)); +TEST_CONSTEXPR(match_m512i(_mm512_mul_epu32((__m512i)(__v16si){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m512i)(__v16si){-32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 4294967264, 84, 21474836360, 140, 38654705520, 132, 55834574744, 64424509380)); __m512i test_mm512_maskz_mul_epu32 (__mmask8 __k,__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_maskz_mul_epu32 diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c index b99e267b276bb..86c2597c5e4f2 100644 --- a/clang/test/CodeGen/X86/mmx-builtins.c +++ b/clang/test/CodeGen/X86/mmx-builtins.c @@ -389,7 +389,7 @@ __m64 test_mm_mul_su32(__m64 a, __m64 b) { // CHECK: mul <2 x i64> %{{.*}}, %{{.*}} return _mm_mul_su32(a, b); } -TEST_CONSTEXPR(match_v4hi(_mm_mul_su32((__m64)(__v4hi){1, 2, 3, 4}, (__m64)(__v4hi){10, 8, 6, 4}), 10, 28, 16, 0)); +TEST_CONSTEXPR(match_v2si(_mm_mul_su32((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-10, +8, +6, -4}), 1900534, 589796)); __m64 test_mm_mulhi_pi16(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_mulhi_pi16 diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index 5334aa3a81310..e2ee60b933b57 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -925,7 +925,7 @@ __m128i test_mm_mul_epu32(__m128i A, __m128i B) { // CHECK: mul <2 x i64> %{{.*}}, %{{.*}} return _mm_mul_epu32(A, B); } -TEST_CONSTEXPR(match_v8hi(_mm_mul_epu32((__m128i)(__v8hi){+1, -2, +3, -4, +5, -6, +7, -8}, (__m128i)(__v8hi){-16, -14, +12, +10, -8, +6, -4, +2}), -16, 18, 11, -15, -40, 82, -50, 6)); +TEST_CONSTEXPR(match_m128i(_mm_mul_epu32((__m128i)(__v8hi){+1, -2, +3, -4, +5, -6, +7, -8}, (__m128i)(__v8hi){-16, -14, +12, +10, -8, +6, -4, +2}), -4222077404774416, 1970110094049240)); __m128d test_mm_mul_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_mul_pd diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c index b7fc582788c8c..e85f8cec30d0f 100644 --- a/clang/test/CodeGen/X86/sse41-builtins.c +++ b/clang/test/CodeGen/X86/sse41-builtins.c @@ -344,7 +344,7 @@ __m128i test_mm_mul_epi32(__m128i x, __m128i y) { // CHECK: mul <2 x i64> %{{.*}}, %{{.*}} return _mm_mul_epi32(x, y); } -TEST_CONSTEXPR(match_v8hi(_mm_mul_epi32((__m128i)(__v8hi){+1, -2, +3, -4, +5, -6, +7, -8}, (__m128i)(__v8hi){-16, -14, +12, +10, -8, +6, -4, +2}), -16, 18, 26, 0, -40, 82, -42, -1)); +TEST_CONSTEXPR(match_m128i(_mm_mul_epi32((__m128i)(__v8hi){+1, -2, +3, -4, +5, -6, +7, -8}, (__m128i)(__v8hi){-16, -14, +12, +10, -8, +6, -4, +2}), 111670394864, -180383186984)); __m128i test_mm_mullo_epi32(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_mullo_epi32 >From 13d7366dcd76a3c8917bb60d997e97bbb6f2ed98 Mon Sep 17 00:00:00 2001 From: Pedro Lobo <pedro.l...@tecnico.ulisboa.pt> Date: Thu, 14 Aug 2025 15:15:00 +0100 Subject: [PATCH 3/3] match_v2si to match_m64 --- clang/test/CodeGen/X86/mmx-builtins.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c index 86c2597c5e4f2..30ad05a236d3e 100644 --- a/clang/test/CodeGen/X86/mmx-builtins.c +++ b/clang/test/CodeGen/X86/mmx-builtins.c @@ -389,7 +389,7 @@ __m64 test_mm_mul_su32(__m64 a, __m64 b) { // CHECK: mul <2 x i64> %{{.*}}, %{{.*}} return _mm_mul_su32(a, b); } -TEST_CONSTEXPR(match_v2si(_mm_mul_su32((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-10, +8, +6, -4}), 1900534, 589796)); +TEST_CONSTEXPR(match_m64(_mm_mul_su32((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-10, +8, +6, -4}), 2533154533212150)); __m64 test_mm_mulhi_pi16(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_mulhi_pi16 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits