llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Simon Pilgrim (RKSimon) <details> <summary>Changes</summary> BLENDV intrinsics use the signbit of the condition mask to select between the LHS (false) and RHS (true) operands Fixes #<!-- -->157066 --- Full diff: https://github.com/llvm/llvm-project/pull/157126.diff 7 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsX86.td (+8-4) - (modified) clang/lib/AST/ByteCode/InterpBuiltin.cpp (+11) - (modified) clang/lib/AST/ExprConstant.cpp (+7-2) - (modified) clang/lib/Headers/avxintrin.h (+4-6) - (modified) clang/lib/Headers/smmintrin.h (+4-6) - (modified) clang/test/CodeGen/X86/avx-builtins.c (+2) - (modified) clang/test/CodeGen/X86/sse41-builtins.c (+2) ``````````diff diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 35b800f557ff5..b4ff550d27279 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -315,8 +315,6 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] def pblendw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">; def blendpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">; def blendps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">; - def blendvpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>)">; - def blendvps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>)">; def packusdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">; def roundps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">; def roundss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">; @@ -335,7 +333,10 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] } let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { + def blendvpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>)">; + def blendvps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>)">; def pblendvb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Vector<16, char>)">; + def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">; } @@ -470,8 +471,6 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in def vpermilvarps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">; def blendpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">; def blendps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">; - def blendvpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, double>)">; - def blendvps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>)">; def shufpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">; def shufps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">; def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">; @@ -495,6 +494,11 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in def vpermilps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">; } +let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def blendvpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, double>)">; + def blendvps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>)">; +} + let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { def vpermilpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">; def vpermilps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">; diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 4d3d6397e5ed9..acc9f9e2cf1e4 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3421,6 +3421,17 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return F; }); + case clang::X86::BI__builtin_ia32_blendvpd: + case clang::X86::BI__builtin_ia32_blendvpd256: + case clang::X86::BI__builtin_ia32_blendvps: + case clang::X86::BI__builtin_ia32_blendvps256: + return interp__builtin_elementwise_triop_fp( + S, OpPC, Call, + [](const APFloat &F, const APFloat &T, const APFloat &C, + llvm::RoundingMode) { + return C.bitcastToAPInt().isNegative() ? T : F; + }); + case clang::X86::BI__builtin_ia32_pblendvb128: case clang::X86::BI__builtin_ia32_pblendvb256: return interp__builtin_elementwise_triop( diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 100e944f9b48c..90dff96869fc7 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11995,6 +11995,10 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case X86::BI__builtin_ia32_blendvpd: + case X86::BI__builtin_ia32_blendvpd256: + case X86::BI__builtin_ia32_blendvps: + case X86::BI__builtin_ia32_blendvps256: case X86::BI__builtin_ia32_pblendvb128: case X86::BI__builtin_ia32_pblendvb256: { // SSE blendv by mask signbit: "Result = C[] < 0 ? T[] : F[]". @@ -12011,8 +12015,9 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) { const APValue &F = SourceF.getVectorElt(EltNum); const APValue &T = SourceT.getVectorElt(EltNum); - APInt C = SourceC.getVectorElt(EltNum).getInt(); - ResultElements.push_back(C.isNegative() ? T : F); + const APValue &C = SourceC.getVectorElt(EltNum); + APInt M = C.isInt() ? (APInt)C.getInt() : C.getFloat().bitcastToAPInt(); + ResultElements.push_back(M.isNegative() ? T : F); } return Success(APValue(ResultElements.data(), ResultElements.size()), E); diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index bfd62dfd9a6b2..e9d8be320bec9 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -1402,9 +1402,8 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c) /// 64-bit element in operand \a __b is copied to the same position in the /// destination. /// \returns A 256-bit vector of [4 x double] containing the copied values. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c) { return (__m256d)__builtin_ia32_blendvpd256( (__v4df)__a, (__v4df)__b, (__v4df)__c); } @@ -1430,9 +1429,8 @@ _mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c) /// corresponding 32-bit element in operand \a __b is copied to the same /// position in the destination. /// \returns A 256-bit vector of [8 x float] containing the copied values. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) { return (__m256)__builtin_ia32_blendvps256( (__v8sf)__a, (__v8sf)__b, (__v8sf)__c); } diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h index b10fa2fe375a1..6319fdbbeb8f0 100644 --- a/clang/lib/Headers/smmintrin.h +++ b/clang/lib/Headers/smmintrin.h @@ -439,9 +439,8 @@ /// position in the result. When a mask bit is 1, the corresponding 64-bit /// element in operand \a __V2 is copied to the same position in the result. /// \returns A 128-bit vector of [2 x double] containing the copied values. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_blendv_pd(__m128d __V1, - __m128d __V2, - __m128d __M) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_blendv_pd(__m128d __V1, __m128d __V2, __m128d __M) { return (__m128d)__builtin_ia32_blendvpd((__v2df)__V1, (__v2df)__V2, (__v2df)__M); } @@ -466,9 +465,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_blendv_pd(__m128d __V1, /// position in the result. When a mask bit is 1, the corresponding 32-bit /// element in operand \a __V2 is copied to the same position in the result. /// \returns A 128-bit vector of [4 x float] containing the copied values. -static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_blendv_ps(__m128 __V1, - __m128 __V2, - __m128 __M) { +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_blendv_ps(__m128 __V1, __m128 __V2, __m128 __M) { return (__m128)__builtin_ia32_blendvps((__v4sf)__V1, (__v4sf)__V2, (__v4sf)__M); } diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index 622ac5d50aaf0..f255dbe1b2adc 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -99,12 +99,14 @@ __m256d test_mm256_blendv_pd(__m256d V1, __m256d V2, __m256d V3) { // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_blendv_pd(V1, V2, V3); } +TEST_CONSTEXPR(match_m256d(_mm256_blendv_pd((__m256d)(__v4df){1.0, 2.0, 3.0, 4.0},(__m256d)(__v4df){-100.0, -101.0, -102.0, -103.0},(__m256d)(__v4df){0.0, -1.0, 1.0, -1.0}), 1.0f, -101.0, 3.0, -103.0)); __m256 test_mm256_blendv_ps(__m256 V1, __m256 V2, __m256 V3) { // CHECK-LABEL: test_mm256_blendv_ps // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_blendv_ps(V1, V2, V3); } +TEST_CONSTEXPR(match_m256(_mm256_blendv_ps((__m256)(__v8sf){0.0f,1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f},(__m256)(__v8sf){-100.0f, -101.0f, -102.0f, -103.0f, -104.0f, -105.0f, -106.0f, -107.0f},(__m256)(__v8sf){-1.0f, 2.0f, -3.0f, 4.0f, -5.0f, -6.0f, 7.0f, -0.0f}), -100.0f, 1.0f, -102.0f, 3.0f, -104.0f, -105.0f, 6.0f, -107.0f)); __m256d test_mm256_broadcast_pd(__m128d* A) { // CHECK-LABEL: test_mm256_broadcast_pd diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c index 3abfee0409f16..dca161c8038a2 100644 --- a/clang/test/CodeGen/X86/sse41-builtins.c +++ b/clang/test/CodeGen/X86/sse41-builtins.c @@ -52,12 +52,14 @@ __m128d test_mm_blendv_pd(__m128d V1, __m128d V2, __m128d V3) { // CHECK: call {{.*}}<2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_blendv_pd(V1, V2, V3); } +TEST_CONSTEXPR(match_m128d(_mm_blendv_pd((__m128d)(__v2df){2.0, -4.0},(__m128d)(__v2df){-111.0, +222.0},(__m128d)(__v2df){2.0, -2.0}), 2.0, 222.0)); __m128 test_mm_blendv_ps(__m128 V1, __m128 V2, __m128 V3) { // CHECK-LABEL: test_mm_blendv_ps // CHECK: call {{.*}}<4 x float> @llvm.x86.sse41.blendvps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_blendv_ps(V1, V2, V3); } +TEST_CONSTEXPR(match_m128(_mm_blendv_ps((__m128)(__v4sf){0.0f, 1.0f, 2.0f, 3.0f},(__m128)(__v4sf){-100.0f, -101.0f, -102.0f, -103.0f},(__m128)(__v4sf){-1.0f, 2.0f, -3.0f, 0.0f}), -100.0f, 1.0f, -102.0f, 3.0f)); __m128d test_mm_ceil_pd(__m128d x) { // CHECK-LABEL: test_mm_ceil_pd `````````` </details> https://github.com/llvm/llvm-project/pull/157126 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits