llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: NagaChaitanya Vellanki (chaitanyav) <details> <summary>Changes</summary> * Implemented a generic function interp__builtin_elementwise_fp_binop * NaN, Infinity, Denormal cases can be integrated into the lambda in future. For, now these cases are hardcoded in the generic function Resolves:#<!-- -->169991 --- Patch is 73.20 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/171966.diff 15 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsX86.td (+30-16) - (modified) clang/lib/AST/ByteCode/InterpBuiltin.cpp (+96) - (modified) clang/lib/AST/ExprConstant.cpp (+67) - (modified) clang/lib/Headers/avx512fintrin.h (+40-52) - (modified) clang/lib/Headers/avx512fp16intrin.h (+18-23) - (modified) clang/lib/Headers/avx512vlfp16intrin.h (+20-26) - (modified) clang/lib/Headers/avx512vlintrin.h (+16-16) - (modified) clang/lib/Headers/avxintrin.h (+8-12) - (modified) clang/lib/Headers/emmintrin.h (+8-8) - (modified) clang/lib/Headers/xmmintrin.h (+8-12) - (modified) clang/test/CodeGen/X86/avx-builtins.c (+10) - (modified) clang/test/CodeGen/X86/avx512f-builtins.c (+17) - (modified) clang/test/CodeGen/X86/avx512vl-builtins.c (+32-16) - (modified) clang/test/CodeGen/X86/sse-builtins.c (+6) - (modified) clang/test/CodeGen/X86/sse2-builtins.c (+6) ``````````diff diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 71aee5038d518..780dfab76886e 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -71,7 +71,7 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { } foreach Cmp = ["cmpeq", "cmplt", "cmple", "cmpunord", "cmpneq", "cmpnlt", - "cmpnle", "cmpord", "min", "max"] in { + "cmpnle", "cmpord"] in { let Features = "sse" in { def Cmp#ps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">; def Cmp#ss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">; @@ -82,6 +82,17 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { } } + foreach Cmp = ["min", "max"] in { + let Features = "sse", Attributes = [Constexpr] in { + def Cmp#ps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">; + def Cmp#ss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">; + } + let Features = "sse2", Attributes = [Constexpr] in { + def Cmp#pd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">; + def Cmp#sd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">; + } + } + let Features = "sse" in { def cmpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">; def cmpss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">; @@ -465,6 +476,9 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in def vperm2f128_pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">; def vperm2f128_ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">; def vperm2f128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">; +} + +let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { foreach Op = ["max", "min"] in { def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">; def Op#ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">; @@ -1006,10 +1020,6 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512> def cvtpd2dq512_mask : X86Builtin<"_Vector<8, int>(_Vector<8, double>, _Vector<8, int>, unsigned char, _Constant int)">; def cvtps2udq512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, float>, _Vector<16, int>, unsigned short, _Constant int)">; def cvtpd2udq512_mask : X86Builtin<"_Vector<8, int>(_Vector<8, double>, _Vector<8, int>, unsigned char, _Constant int)">; - def minps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Constant int)">; - def minpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int)">; - def maxps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Constant int)">; - def maxpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int)">; def cvtdq2ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, int>, _Vector<16, float>, unsigned short, _Constant int)">; def cvtudq2ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, int>, _Vector<16, float>, unsigned short, _Constant int)">; def vcvtps2ph512_mask : X86Builtin<"_Vector<16, short>(_Vector<16, float>, _Constant int, _Vector<16, short>, unsigned short)">; @@ -1020,6 +1030,10 @@ let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVecto def pmuldq512 : X86Builtin<"_Vector<8, long long int>(_Vector<16, int>, _Vector<16, int>)">; def pmuludq512 : X86Builtin<"_Vector<8, long long int>(_Vector<16, int>, _Vector<16, int>)">; def pshufd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">; + def minps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Constant int)">; + def minpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int)">; + def maxps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Constant int)">; + def maxpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int)">; } let Features = "avx512f", Attributes = [NoThrow, RequiredVectorWidth<512>] in { @@ -1340,12 +1354,12 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128> def divss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">; def mulss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">; def subss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">; - def maxss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">; - def minss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">; def addsd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int)">; def divsd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int)">; def mulsd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int)">; def subsd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int)">; + def maxss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">; + def minss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">; def maxsd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int)">; def minsd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int)">; } @@ -3371,23 +3385,20 @@ let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<5 def subph512 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Constant int)">; def mulph512 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Constant int)">; def divph512 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Constant int)">; +} + +let Features = "avx512fp16", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def maxph512 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Constant int)">; def minph512 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Constant int)">; } -let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def minph256 : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>)">; -} - -let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def minph128 : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>)">; -} - -let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { def maxph256 : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>)">; } -let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { + def minph128 : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>)">; def maxph128 : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>)">; } @@ -3396,6 +3407,9 @@ let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<1 def divsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">; def mulsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">; def subsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">; +} + +let Features = "avx512fp16", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def maxsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">; def minsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 59b48968d7b66..4c0628cd7fcc8 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2433,6 +2433,46 @@ static bool interp__builtin_elementwise_int_unaryop( return true; } +static bool interp__builtin_elementwise_fp_binop( + InterpState &S, CodePtr OpPC, const CallExpr *Call, + llvm::function_ref<APFloat(const APFloat &, const APFloat &, + std::optional<APSInt> RoundingMode)> + Fn) { + assert((Call->getNumArgs() == 2) || (Call->getNumArgs() == 3)); + const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>(); + assert(VT->getElementType()->isFloatingType()); + unsigned NumElems = VT->getNumElements(); + + // Vector case. + assert(Call->getArg(0)->getType()->isVectorType() && + Call->getArg(1)->getType()->isVectorType()); + assert(VT->getElementType() == + Call->getArg(1)->getType()->castAs<VectorType>()->getElementType()); + assert(VT->getNumElements() == + Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements()); + + std::optional<APSInt> RoundingMode = std::nullopt; + if (Call->getNumArgs() == 3) { + RoundingMode = popToAPSInt(S, Call->getArg(2)); + } + const Pointer &BPtr = S.Stk.pop<Pointer>(); + const Pointer &APtr = S.Stk.pop<Pointer>(); + const Pointer &Dst = S.Stk.peek<Pointer>(); + for (unsigned ElemIdx = 0; ElemIdx != NumElems; ++ElemIdx) { + using T = PrimConv<PT_Float>::T; + APFloat ElemA = APtr.elem<T>(ElemIdx).getAPFloat(); + APFloat ElemB = BPtr.elem<T>(ElemIdx).getAPFloat(); + if (ElemA.isNaN() || ElemA.isInfinity() || ElemA.isDenormal() || + ElemB.isNaN() || ElemB.isInfinity() || ElemB.isDenormal()) + return false; + Dst.elem<T>(ElemIdx) = static_cast<T>(Fn(ElemA, ElemB, RoundingMode)); + } + + Dst.initializeAllElements(); + + return true; +} + static bool interp__builtin_elementwise_int_binop( InterpState &S, CodePtr OpPC, const CallExpr *Call, llvm::function_ref<APInt(const APSInt &, const APSInt &)> Fn) { @@ -5575,6 +5615,62 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, }); } + case clang::X86::BI__builtin_ia32_minps: + case clang::X86::BI__builtin_ia32_minpd: + case clang::X86::BI__builtin_ia32_minps256: + case clang::X86::BI__builtin_ia32_minpd256: + case clang::X86::BI__builtin_ia32_minph128: + case clang::X86::BI__builtin_ia32_minph256: + return interp__builtin_elementwise_fp_binop( + S, OpPC, Call, + [](const APFloat &A, const APFloat &B, std::optional<APSInt>) { + if (A.isZero() && B.isZero()) + return B; + else + return llvm::minimum(A, B); + }); + + case clang::X86::BI__builtin_ia32_maxps: + case clang::X86::BI__builtin_ia32_maxpd: + case clang::X86::BI__builtin_ia32_maxps256: + case clang::X86::BI__builtin_ia32_maxpd256: + case clang::X86::BI__builtin_ia32_maxph128: + case clang::X86::BI__builtin_ia32_maxph256: + return interp__builtin_elementwise_fp_binop( + S, OpPC, Call, + [](const APFloat &A, const APFloat &B, std::optional<APSInt>) { + if (A.isZero() && B.isZero()) + return B; + else + return llvm::maximum(A, B); + }); + + case clang::X86::BI__builtin_ia32_minps512: + case clang::X86::BI__builtin_ia32_minpd512: + case clang::X86::BI__builtin_ia32_minph512: { + return interp__builtin_elementwise_fp_binop( + S, OpPC, Call, + [](const APFloat &A, const APFloat &B, std::optional<APSInt>) { + if (A.isZero() && B.isZero()) + return B; + else + return llvm::minimum(A, B); + }); + } + + case clang::X86::BI__builtin_ia32_maxps512: + case clang::X86::BI__builtin_ia32_maxpd512: + case clang::X86::BI__builtin_ia32_maxph512: { + return interp__builtin_elementwise_fp_binop( + S, OpPC, Call, + [](const APFloat &A, const APFloat &B, std::optional<APSInt>) { + if (A.isZero() && B.isZero()) + return B; + else + return llvm::maximum(A, B); + }); + } + default: S.FFDiag(S.Current->getLocation(OpPC), diag::note_invalid_subexpr_in_const_expr) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index d81496ffd74e0..0992037ae800c 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -14256,6 +14256,73 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(R, E); } + case clang::X86::BI__builtin_ia32_minps: + case clang::X86::BI__builtin_ia32_minpd: + case clang::X86::BI__builtin_ia32_maxpd: + case clang::X86::BI__builtin_ia32_maxps: + case clang::X86::BI__builtin_ia32_minps256: + case clang::X86::BI__builtin_ia32_maxps256: + case clang::X86::BI__builtin_ia32_minpd256: + case clang::X86::BI__builtin_ia32_maxpd256: + case clang::X86::BI__builtin_ia32_minps512: + case clang::X86::BI__builtin_ia32_maxps512: + case clang::X86::BI__builtin_ia32_minpd512: + case clang::X86::BI__builtin_ia32_maxpd512: + case clang::X86::BI__builtin_ia32_minph128: + case clang::X86::BI__builtin_ia32_maxph128: + case clang::X86::BI__builtin_ia32_minph256: + case clang::X86::BI__builtin_ia32_maxph256: + case clang::X86::BI__builtin_ia32_minph512: + case clang::X86::BI__builtin_ia32_maxph512: { + + APValue AVec, BVec; + if (!EvaluateAsRValue(Info, E->getArg(0), AVec) || + !EvaluateAsRValue(Info, E->getArg(1), BVec)) + return false; + + assert(AVec.isVector() && BVec.isVector()); + assert(AVec.getVectorLength() == BVec.getVectorLength()); + + bool IsMin; + switch (E->getBuiltinCallee()) { + case clang::X86::BI__builtin_ia32_minps: + case clang::X86::BI__builtin_ia32_minpd: + case clang::X86::BI__builtin_ia32_minps256: + case clang::X86::BI__builtin_ia32_minpd256: + case clang::X86::BI__builtin_ia32_minps512: + case clang::X86::BI__builtin_ia32_minpd512: + case clang::X86::BI__builtin_ia32_minph128: + case clang::X86::BI__builtin_ia32_minph256: + case clang::X86::BI__builtin_ia32_minph512: + IsMin = true; + break; + default: + IsMin = false; + } + const auto *DstVTy = E->getType()->castAs<VectorType>(); + unsigned NumDstElems = DstVTy->getNumElements(); + SmallVector<APValue, 16> ResultElems; + ResultElems.reserve(NumDstElems); + + for (unsigned EltIdx = 0; EltIdx != NumDstElems; ++EltIdx) { + const APFloat &EltA = AVec.getVectorElt(EltIdx).getFloat(); + const APFloat &EltB = BVec.getVectorElt(EltIdx).getFloat(); + if (EltA.isZero() && EltB.isZero()) { + ResultElems.push_back(BVec.getVectorElt(EltIdx)); + } else { + if (EltA.isNaN() || EltA.isInfinity() || EltA.isDenormal() || + EltB.isNaN() || EltB.isInfinity() || EltB.isDenormal()) + return false; + if (IsMin) + ResultElems.push_back(APValue(llvm::minimum(EltA, EltB))); + else + ResultElems.push_back(APValue(llvm::maximum(EltA, EltB))); + } + } + + return Success(APValue(ResultElems.data(), ResultElems.size()), E); + } + case clang::X86::BI__builtin_ia32_vcvtps2ph: case clang::X86::BI__builtin_ia32_vcvtps2ph256: { APValue SrcVec; diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 02282cbccf05d..649ac6faaa3fe 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -932,24 +932,21 @@ _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) { (__v8df)_mm512_max_round_pd((A), (B), (R)), \ (__v8df)_mm512_setzero_pd())) -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_max_pd(__m512d __A, __m512d __B) -{ +static __inline__ __m512d + __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_pd(__m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_max_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_max_pd(__A, __B), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_max_pd(__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_max_pd(__A, __B), (__v8df)_mm512_setzero_pd()); @@ -969,31 +966,28 @@ _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B) (__v16sf)_mm512_max_round_ps((A), (B), (R)), \ (__v16sf)_mm512_setzero_ps())) -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_max_ps(__m512 __A, __m512 __B) -{ +static __inline__ __m512 + __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_ps(__m512 __A, __m512 __B) { return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_max_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_max_ps(__A, __B), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_max_ps(__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_max_ps(__A, __B), (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_max_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, (__v4sf) __B, (__v4sf) __... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/171966 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
