https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/156047
None >From 417527220c18d18ab894a032895d6f1872c878ee Mon Sep 17 00:00:00 2001 From: Simon Pilgrim <llvm-...@redking.me.uk> Date: Fri, 29 Aug 2025 13:29:00 +0100 Subject: [PATCH] [X86] Add constexpr handling for XOP/AVX512 rotate by immediate intrinsics --- clang/include/clang/Basic/BuiltinsX86.td | 17 ++++--- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 26 ++++++++++ clang/lib/AST/ExprConstant.cpp | 56 ++++++++++++++++++++++ clang/test/CodeGen/X86/avx512f-builtins.c | 17 +++++-- clang/test/CodeGen/X86/avx512vl-builtins.c | 25 +++++++++- clang/test/CodeGen/X86/xop-builtins.c | 4 ++ 6 files changed, 132 insertions(+), 13 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index dfff92839da4e..3cac91cf80f5d 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -934,10 +934,6 @@ let Features = "xop", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in def vphsubwd : X86Builtin<"_Vector<4, int>(_Vector<8, short>)">; def vphsubdq : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>)">; def vpperm : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Vector<16, char>)">; - def vprotbi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant char)">; - def vprotwi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant char)">; - def vprotdi : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant char)">; - def vprotqi : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant char)">; def vpshlb : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">; def vpshlw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; def vpshld : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; @@ -962,6 +958,13 @@ let Features = "xop", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in def vfrczpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">; } +let Features = "xop", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { + def vprotbi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant char)">; + def vprotwi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant char)">; + def vprotdi : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant char)">; + def vprotqi : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant char)">; +} + let Features = "xop", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { def vpermil2pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, long long int>, _Constant char)">; def vpermil2ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, int>, _Constant char)">; @@ -2045,21 +2048,21 @@ let Features = "avx512dq,evex512", Attributes = [NoThrow, Const, RequiredVectorW def reduceps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int, _Vector<16, float>, unsigned short, _Constant int)">; } -let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def prold512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">; def prord512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">; def prolq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">; def prorq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def prold128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">; def prord128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">; def prolq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">; def prorq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def prold256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">; def prord256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">; def prolq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">; diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 1e7736955c065..c34d768f96211 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3325,6 +3325,32 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return LHS.lshr(RHS.getZExtValue()); }); + case clang::X86::BI__builtin_ia32_vprotbi: + case clang::X86::BI__builtin_ia32_vprotdi: + case clang::X86::BI__builtin_ia32_vprotqi: + case clang::X86::BI__builtin_ia32_vprotwi: + case clang::X86::BI__builtin_ia32_prold128: + case clang::X86::BI__builtin_ia32_prold256: + case clang::X86::BI__builtin_ia32_prold512: + case clang::X86::BI__builtin_ia32_prolq128: + case clang::X86::BI__builtin_ia32_prolq256: + case clang::X86::BI__builtin_ia32_prolq512: + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, BuiltinID, [](const APSInt &LHS, const APSInt &RHS) { + return LHS.rotl(RHS); + }); + + case clang::X86::BI__builtin_ia32_prord128: + case clang::X86::BI__builtin_ia32_prord256: + case clang::X86::BI__builtin_ia32_prord512: + case clang::X86::BI__builtin_ia32_prorq128: + case clang::X86::BI__builtin_ia32_prorq256: + case clang::X86::BI__builtin_ia32_prorq512: + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, BuiltinID, [](const APSInt &LHS, const APSInt &RHS) { + return LHS.rotr(RHS); + }); + case Builtin::BI__builtin_elementwise_max: case Builtin::BI__builtin_elementwise_min: return interp__builtin_elementwise_maxmin(S, OpPC, Call, BuiltinID); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 3ca4ac4b92ba7..b4f1e76187e25 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11835,6 +11835,62 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case clang::X86::BI__builtin_ia32_vprotbi: + case clang::X86::BI__builtin_ia32_vprotdi: + case clang::X86::BI__builtin_ia32_vprotqi: + case clang::X86::BI__builtin_ia32_vprotwi: + case clang::X86::BI__builtin_ia32_prold128: + case clang::X86::BI__builtin_ia32_prold256: + case clang::X86::BI__builtin_ia32_prold512: + case clang::X86::BI__builtin_ia32_prolq128: + case clang::X86::BI__builtin_ia32_prolq256: + case clang::X86::BI__builtin_ia32_prolq512: { + APValue SourceLHS, SourceRHS; + if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) || + !EvaluateAsRValue(Info, E->getArg(1), SourceRHS)) + return false; + + QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType(); + bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType(); + unsigned SourceLen = SourceLHS.getVectorLength(); + SmallVector<APValue, 4> ResultElements; + ResultElements.reserve(SourceLen); + + APSInt RHS = SourceRHS.getInt(); + + for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) { + const APSInt &LHS = SourceLHS.getVectorElt(EltNum).getInt(); + ResultElements.push_back(APValue(APSInt(LHS.rotl(RHS), DestUnsigned))); + } + + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } + case clang::X86::BI__builtin_ia32_prord128: + case clang::X86::BI__builtin_ia32_prord256: + case clang::X86::BI__builtin_ia32_prord512: + case clang::X86::BI__builtin_ia32_prorq128: + case clang::X86::BI__builtin_ia32_prorq256: + case clang::X86::BI__builtin_ia32_prorq512: { + APValue SourceLHS, SourceRHS; + if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) || + !EvaluateAsRValue(Info, E->getArg(1), SourceRHS)) + return false; + + QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType(); + bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType(); + unsigned SourceLen = SourceLHS.getVectorLength(); + SmallVector<APValue, 4> ResultElements; + ResultElements.reserve(SourceLen); + + APSInt RHS = SourceRHS.getInt(); + + for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) { + const APSInt &LHS = SourceLHS.getVectorElt(EltNum).getInt(); + ResultElements.push_back(APValue(APSInt(LHS.rotr(RHS), DestUnsigned))); + } + + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } case Builtin::BI__builtin_elementwise_max: case Builtin::BI__builtin_elementwise_min: { APValue SourceLHS, SourceRHS; diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index bddc5d11818ef..a109dad4ebdcc 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -4004,7 +4004,6 @@ __m512i test_mm512_cvtepu32_epi64(__m256i __X) { // CHECK: zext <8 x i32> %{{.*}} to <8 x i64> return _mm512_cvtepu32_epi64(__X); } - TEST_CONSTEXPR(match_v8di(_mm512_cvtepu32_epi64(_mm256_setr_epi32(-70000, 2, -1, 0, 1, -2, 3, -4)), 4294897296, 2, 4294967295, 0, 1, 4294967294, 3, 4294967292)); __m512i test_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X) { @@ -4026,7 +4025,6 @@ __m512i test_mm512_cvtepu16_epi32(__m256i __A) { // CHECK: zext <16 x i16> %{{.*}} to <16 x i32> return _mm512_cvtepu16_epi32(__A); } - TEST_CONSTEXPR(match_v16si(_mm512_cvtepu16_epi32(_mm256_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), 65236, 2, 65535, 0, 1, 65534, 3, 65532, 5, 65530, 7, 65528, 9, 65526, 11, 65524)); __m512i test_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A) { @@ -4048,7 +4046,6 @@ __m512i test_mm512_cvtepu16_epi64(__m128i __A) { // CHECK: zext <8 x i16> %{{.*}} to <8 x i64> return _mm512_cvtepu16_epi64(__A); } - TEST_CONSTEXPR(match_v8di(_mm512_cvtepu16_epi64(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), 65236, 2, 65535, 0, 1, 65534, 3, 65532)); __m512i test_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A) { @@ -4065,12 +4062,12 @@ __m512i test_mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) { return _mm512_maskz_cvtepu16_epi64(__U, __A); } - __m512i test_mm512_rol_epi32(__m512i __A) { // CHECK-LABEL: test_mm512_rol_epi32 // CHECK: @llvm.fshl.v16i32 return _mm512_rol_epi32(__A, 5); } +TEST_CONSTEXPR(match_v16si(_mm512_rol_epi32(((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 32, -33, 96, -97, -129, 192, -193, 256, 288, -289, 352, -353, 416, -417, 480, -481)); __m512i test_mm512_mask_rol_epi32(__m512i __W, __mmask16 __U, __m512i __A) { // CHECK-LABEL: test_mm512_mask_rol_epi32 @@ -4078,6 +4075,7 @@ __m512i test_mm512_mask_rol_epi32(__m512i __W, __mmask16 __U, __m512i __A) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_rol_epi32(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_rol_epi32(((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}), 0xC873, ((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 32, -33, 99, 99, -129, 192, -193, 99, 99, 99, 99, -353, 99, 99, 480, -481)); __m512i test_mm512_maskz_rol_epi32(__mmask16 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_rol_epi32 @@ -4085,12 +4083,14 @@ __m512i test_mm512_maskz_rol_epi32(__mmask16 __U, __m512i __A) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_rol_epi32(__U, __A, 5); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_rol_epi32(0x378C, ((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 0, 0, 96, -97, 0, 0, 0, 256, 288, -289, 352, 0, 416, -417, 0, 0)); __m512i test_mm512_rol_epi64(__m512i __A) { // CHECK-LABEL: test_mm512_rol_epi64 // CHECK: @llvm.fshl.v8i64 return _mm512_rol_epi64(__A, 5); } +TEST_CONSTEXPR(match_v8di(_mm512_rol_epi64(((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 96, -97, -129, 192, -193, 256)); __m512i test_mm512_mask_rol_epi64(__m512i __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_mask_rol_epi64 @@ -4098,6 +4098,7 @@ __m512i test_mm512_mask_rol_epi64(__m512i __W, __mmask8 __U, __m512i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_rol_epi64(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_rol_epi64(((__m512i)(__v8di){99, 99, 99, 99, 99, 99, 99, 99}), 0x73, ((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 99, 99, -129, 192, -193, 99)); __m512i test_mm512_maskz_rol_epi64(__mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_rol_epi64 @@ -4105,6 +4106,7 @@ __m512i test_mm512_maskz_rol_epi64(__mmask8 __U, __m512i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_rol_epi64(__U, __A, 5); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_rol_epi64(0x37, ((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 96, 0, -129, 192, 0, 0)); __m512i test_mm512_rolv_epi32(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_rolv_epi32 @@ -4151,6 +4153,7 @@ __m512i test_mm512_ror_epi32(__m512i __A) { // CHECK: @llvm.fshr.v16i32 return _mm512_ror_epi32(__A, 5); } +TEST_CONSTEXPR(match_v16si(_mm512_ror_epi32(((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 134217728, -134217729, 402653184, -402653185, -536870913, 805306368, -805306369, 1073741824, 1207959552, -1207959553, 1476395008, -1476395009, 1744830464, -1744830465, 2013265920, -2013265921)); __m512i test_mm512_mask_ror_epi32(__m512i __W, __mmask16 __U, __m512i __A) { // CHECK-LABEL: test_mm512_mask_ror_epi32 @@ -4158,6 +4161,7 @@ __m512i test_mm512_mask_ror_epi32(__m512i __W, __mmask16 __U, __m512i __A) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_ror_epi32(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_ror_epi32(((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}), 0xC873, ((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 134217728, -134217729, 99, 99, -536870913, 805306368, -805306369, 99, 99, 99, 99, -1476395009, 99, 99, 2013265920, -2013265921)); __m512i test_mm512_maskz_ror_epi32(__mmask16 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_ror_epi32 @@ -4165,12 +4169,14 @@ __m512i test_mm512_maskz_ror_epi32(__mmask16 __U, __m512i __A) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_ror_epi32(__U, __A, 5); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_ror_epi32(0x378C, ((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 0, 0, 402653184, -402653185, 0, 0, 0, 1073741824, 1207959552, -1207959553, 1476395008, 0, 1744830464, -1744830465, 0, 0)); __m512i test_mm512_ror_epi64(__m512i __A) { // CHECK-LABEL: test_mm512_ror_epi64 // CHECK: @llvm.fshr.v8i64 return _mm512_ror_epi64(__A, 5); } +TEST_CONSTEXPR(match_v8di(_mm512_ror_epi64(((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 576460752303423488LL, -576460752303423489LL, 1729382256910270464LL, -1729382256910270465LL, -2305843009213693953LL, 3458764513820540928LL, -3458764513820540929LL, 4611686018427387904LL)); __m512i test_mm512_mask_ror_epi64(__m512i __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_mask_ror_epi64 @@ -4178,6 +4184,7 @@ __m512i test_mm512_mask_ror_epi64(__m512i __W, __mmask8 __U, __m512i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_ror_epi64(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_ror_epi64(((__m512i)(__v8di){99, 99, 99, 99, 99, 99, 99, 99}), 0x73, ((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 576460752303423488LL, -576460752303423489LL, 99, 99, -2305843009213693953LL, 3458764513820540928LL, -3458764513820540929LL, 99)); __m512i test_mm512_maskz_ror_epi64(__mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_ror_epi64 @@ -4185,7 +4192,7 @@ __m512i test_mm512_maskz_ror_epi64(__mmask8 __U, __m512i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_ror_epi64(__U, __A, 5); } - +TEST_CONSTEXPR(match_v8di(_mm512_maskz_ror_epi64(0x37, ((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 576460752303423488LL, -576460752303423489LL, 1729382256910270464LL, 0, -2305843009213693953LL, 3458764513820540928LL, 0, 0)); __m512i test_mm512_rorv_epi32(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_rorv_epi32 diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index 477326f35518a..78e01b944ec5d 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -5666,6 +5666,7 @@ __m128i test_mm_rol_epi32(__m128i __A) { // CHECK: @llvm.fshl.v4i32 return _mm_rol_epi32(__A, 5); } +TEST_CONSTEXPR(match_v4si(_mm_rol_epi32(((__m128i)(__v4si){1, -2, 3, -4}), 5), 32, -33, 96, -97)); __m128i test_mm_mask_rol_epi32(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_mask_rol_epi32 @@ -5673,6 +5674,7 @@ __m128i test_mm_mask_rol_epi32(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_rol_epi32(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v4si(_mm_mask_rol_epi32(((__m128i)(__v4si){99, 99, 99, 99}), 0x3, ((__m128i)(__v4si){1, -2, 3, -4}), 5), 32, -33, 99, 99)); __m128i test_mm_maskz_rol_epi32(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_rol_epi32 @@ -5680,12 +5682,14 @@ __m128i test_mm_maskz_rol_epi32(__mmask8 __U, __m128i __A) { // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_rol_epi32(__U, __A, 5); } +TEST_CONSTEXPR(match_v4si(_mm_maskz_rol_epi32(0x9, ((__m128i)(__v4si){1, -2, 3, -4}), 5), 32, 0, 0, -97)); __m256i test_mm256_rol_epi32(__m256i __A) { // CHECK-LABEL: test_mm256_rol_epi32 // CHECK: @llvm.fshl.v8i32 return _mm256_rol_epi32(__A, 5); } +TEST_CONSTEXPR(match_v8si(_mm256_rol_epi32(((__m256i)(__v8si){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 96, -97, -129, 192, -193, 256)); __m256i test_mm256_mask_rol_epi32(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_rol_epi32 @@ -5693,6 +5697,7 @@ __m256i test_mm256_mask_rol_epi32(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_rol_epi32(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_rol_epi32(((__m256i)(__v8si){99, 99, 99, 99, 99, 99, 99, 99}), 0x73, ((__m256i)(__v8si){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 99, 99, -129, 192, -193, 99)); __m256i test_mm256_maskz_rol_epi32(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_rol_epi32 @@ -5700,12 +5705,14 @@ __m256i test_mm256_maskz_rol_epi32(__mmask8 __U, __m256i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_rol_epi32(__U, __A, 5); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_rol_epi32(0x37, ((__m256i)(__v8si){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 96, 0, -129, 192, 0, 0)); __m128i test_mm_rol_epi64(__m128i __A) { // CHECK-LABEL: test_mm_rol_epi64 // CHECK: @llvm.fshl.v2i64 return _mm_rol_epi64(__A, 5); } +TEST_CONSTEXPR(match_v2di(_mm_rol_epi64(((__m128i)(__v2di){10, -11}), 19), 5242880, -5242881)); __m128i test_mm_mask_rol_epi64(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_mask_rol_epi64 @@ -5713,6 +5720,7 @@ __m128i test_mm_mask_rol_epi64(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_rol_epi64(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v2di(_mm_mask_rol_epi64(((__m128i)(__v2di){99, 99}), 0x1, ((__m128i)(__v2di){10, -11}), 19), 5242880, 99)); __m128i test_mm_maskz_rol_epi64(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_rol_epi64 @@ -5720,12 +5728,14 @@ __m128i test_mm_maskz_rol_epi64(__mmask8 __U, __m128i __A) { // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_rol_epi64(__U, __A, 5); } +TEST_CONSTEXPR(match_v2di(_mm_maskz_rol_epi64(0x2, ((__m128i)(__v2di){10, -11}), 19), 0, -5242881)); __m256i test_mm256_rol_epi64(__m256i __A) { // CHECK-LABEL: test_mm256_rol_epi64 // CHECK: @llvm.fshl.v4i64 return _mm256_rol_epi64(__A, 5); } +TEST_CONSTEXPR(match_v4di(_mm256_rol_epi64(((__m256i)(__v4di){10, -11, -12, 13}), 19), 5242880, -5242881, -5767169, 6815744)); __m256i test_mm256_mask_rol_epi64(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_rol_epi64 @@ -5733,6 +5743,7 @@ __m256i test_mm256_mask_rol_epi64(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_rol_epi64(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_rol_epi64(((__m256i)(__v4di){99, 99, 99, 99}), 0x9, ((__m256i)(__v4di){10, -11, -12, 13}), 19), 5242880, 99, 99, 6815744)); __m256i test_mm256_maskz_rol_epi64(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_rol_epi64 @@ -5740,6 +5751,7 @@ __m256i test_mm256_maskz_rol_epi64(__mmask8 __U, __m256i __A) { // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_rol_epi64(__U, __A, 5); } +TEST_CONSTEXPR(match_v4di(_mm256_maskz_rol_epi64(0xC, ((__m256i)(__v4di){10, -11, -12, 13}), 19), 0, 0, -5767169, 6815744)); __m128i test_mm_rolv_epi32(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_rolv_epi32 @@ -5826,6 +5838,7 @@ __m128i test_mm_ror_epi32(__m128i __A) { // CHECK: @llvm.fshr.v4i32 return _mm_ror_epi32(__A, 5); } +TEST_CONSTEXPR(match_v4si(_mm_ror_epi32(((__m128i)(__v4si){1, -2, 3, -4}), 5), 134217728, -134217729, 402653184, -402653185)); __m128i test_mm_mask_ror_epi32(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_mask_ror_epi32 @@ -5833,6 +5846,7 @@ __m128i test_mm_mask_ror_epi32(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_ror_epi32(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v4si(_mm_mask_ror_epi32(((__m128i)(__v4si){99, 99, 99, 99}), 0x3, ((__m128i)(__v4si){1, -2, 3, -4}), 5), 134217728, -134217729, 99, 99)); __m128i test_mm_maskz_ror_epi32(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_ror_epi32 @@ -5840,12 +5854,14 @@ __m128i test_mm_maskz_ror_epi32(__mmask8 __U, __m128i __A) { // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_ror_epi32(__U, __A, 5); } +TEST_CONSTEXPR(match_v4si(_mm_maskz_ror_epi32(0x9, ((__m128i)(__v4si){1, -2, 3, -4}), 5), 134217728, 0, 0, -402653185)); __m256i test_mm256_ror_epi32(__m256i __A) { // CHECK-LABEL: test_mm256_ror_epi32 // CHECK: @llvm.fshr.v8i32 return _mm256_ror_epi32(__A, 5); } +TEST_CONSTEXPR(match_v8si(_mm256_ror_epi32(((__m256i)(__v8si){1, -2, 3, -4, -5, 6, -7, 8}), 5), 134217728, -134217729, 402653184, -402653185, -536870913, 805306368, -805306369, 1073741824)); __m256i test_mm256_mask_ror_epi32(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_ror_epi32 @@ -5853,6 +5869,7 @@ __m256i test_mm256_mask_ror_epi32(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_ror_epi32(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_ror_epi32(((__m256i)(__v8si){99, 99, 99, 99, 99, 99, 99, 99}), 0x73, ((__m256i)(__v8si){1, -2, 3, -4, -5, 6, -7, 8}), 5), 134217728, -134217729, 99, 99, -536870913, 805306368, -805306369, 99)); __m256i test_mm256_maskz_ror_epi32(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_ror_epi32 @@ -5860,12 +5877,14 @@ __m256i test_mm256_maskz_ror_epi32(__mmask8 __U, __m256i __A) { // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_ror_epi32(__U, __A, 5); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_ror_epi32(0x37, ((__m256i)(__v8si){1, -2, 3, -4, -5, 6, -7, 8}), 5), 134217728, -134217729, 402653184, 0, -536870913, 805306368, 0, 0)); __m128i test_mm_ror_epi64(__m128i __A) { // CHECK-LABEL: test_mm_ror_epi64 // CHECK: @llvm.fshr.v2i64 return _mm_ror_epi64(__A, 5); } +TEST_CONSTEXPR(match_v2di(_mm_ror_epi64(((__m128i)(__v2di){10, -11}), 19), 351843720888320LL, -351843720888321LL)); __m128i test_mm_mask_ror_epi64(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_mask_ror_epi64 @@ -5873,6 +5892,7 @@ __m128i test_mm_mask_ror_epi64(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_ror_epi64(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v2di(_mm_mask_ror_epi64(((__m128i)(__v2di){99, 99}), 0x1, ((__m128i)(__v2di){10, -11}), 19), 351843720888320LL, 99)); __m128i test_mm_maskz_ror_epi64(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_ror_epi64 @@ -5880,12 +5900,14 @@ __m128i test_mm_maskz_ror_epi64(__mmask8 __U, __m128i __A) { // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_ror_epi64(__U, __A, 5); } +TEST_CONSTEXPR(match_v2di(_mm_maskz_ror_epi64(0x2, ((__m128i)(__v2di){10, -11}), 19), 0, -351843720888321LL)); __m256i test_mm256_ror_epi64(__m256i __A) { // CHECK-LABEL: test_mm256_ror_epi64 // CHECK: @llvm.fshr.v4i64 return _mm256_ror_epi64(__A, 5); } +TEST_CONSTEXPR(match_v4di(_mm256_ror_epi64(((__m256i)(__v4di){10, -11, -12, 13}), 19), 351843720888320LL, -351843720888321LL, -387028092977153LL, 457396837154816LL)); __m256i test_mm256_mask_ror_epi64(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_ror_epi64 @@ -5893,6 +5915,7 @@ __m256i test_mm256_mask_ror_epi64(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_ror_epi64(__W, __U, __A,5); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_ror_epi64(((__m256i)(__v4di){99, 99, 99, 99}), 0x9, ((__m256i)(__v4di){10, -11, -12, 13}), 19), 351843720888320LL, 99, 99, 457396837154816LL)); __m256i test_mm256_maskz_ror_epi64(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_ror_epi64 @@ -5900,7 +5923,7 @@ __m256i test_mm256_maskz_ror_epi64(__mmask8 __U, __m256i __A) { // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_ror_epi64(__U, __A, 5); } - +TEST_CONSTEXPR(match_v4di(_mm256_maskz_ror_epi64(0xC, ((__m256i)(__v4di){10, -11, -12, 13}), 19), 0, 0, -387028092977153LL, 457396837154816LL)); __m128i test_mm_rorv_epi32(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_rorv_epi32 diff --git a/clang/test/CodeGen/X86/xop-builtins.c b/clang/test/CodeGen/X86/xop-builtins.c index cd403d5876fa3..994fc7b3e136a 100644 --- a/clang/test/CodeGen/X86/xop-builtins.c +++ b/clang/test/CodeGen/X86/xop-builtins.c @@ -239,24 +239,28 @@ __m128i test_mm_roti_epi8(__m128i a) { // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> splat (i8 1)) return _mm_roti_epi8(a, 1); } +TEST_CONSTEXPR(match_v16qi(_mm_roti_epi8(((__m128i)(__v16qs){0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15}), 3), 0, 8, -9, 24, -25, 40, -41, 56, -57, 72, -73, 88, -89, 104, -105, 120)); __m128i test_mm_roti_epi16(__m128i a) { // CHECK-LABEL: test_mm_roti_epi16 // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 50)) return _mm_roti_epi16(a, 50); } +TEST_CONSTEXPR(match_v8hi(_mm_roti_epi16(((__m128i)(__v8hi){2, -3, 4, -5, 6, -7, 8, -9}), 1), 4, -5, 8, -9, 12, -13, 16, -17)); __m128i test_mm_roti_epi32(__m128i a) { // CHECK-LABEL: test_mm_roti_epi32 // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 226)) return _mm_roti_epi32(a, -30); } +TEST_CONSTEXPR(match_v4si(_mm_roti_epi32(((__m128i)(__v4si){1, -2, 3, -4}), 5), 32, -33, 96, -97)); __m128i test_mm_roti_epi64(__m128i a) { // CHECK-LABEL: test_mm_roti_epi64 // CHECK: call {{.*}}<2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 100)) return _mm_roti_epi64(a, 100); } +TEST_CONSTEXPR(match_v2di(_mm_roti_epi64(((__m128i)(__v2di){99, -55}), 19), 51904512, -28311553)); __m128i test_mm_shl_epi8(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_shl_epi8 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits