https://github.com/wzssyqa updated https://github.com/llvm/llvm-project/pull/129207
>From 5c3b36b06294b43088d277c689c3367825708779 Mon Sep 17 00:00:00 2001 From: YunQiang Su <s...@debian.org> Date: Fri, 28 Feb 2025 16:09:04 +0800 Subject: [PATCH 1/6] Clang: Add minnum/maxnum builtin functions support With https://github.com/llvm/llvm-project/pull/112852, we claimed that llvm.minnum and llvm.maxnum should treat +0.0>-0.0, while libc doesn't require fmin(3)/fmax(3) for it. To make llvm.minnum/llvm.maxnum easy to use, we define the builtin functions for them, include __builtin_minnum __builtin_elementwise_minnum __builtin_minnum __builtin_elementwise_minnum __builtin_minnum __builtin_elementwise_minnum __builtin_minnum __builtin_maxnum __builtin_elementwise_maxnum __builtin_maxnum __builtin_elementwise_maxnum __builtin_maxnum __builtin_elementwise_maxnum __builtin_maxnum All of them support _Float16, float, double, long double. --- clang/include/clang/Basic/Builtins.td | 24 ++ clang/include/clang/Sema/Sema.h | 1 + clang/lib/CodeGen/CGBuiltin.cpp | 26 ++ clang/lib/Sema/SemaChecking.cpp | 45 ++++ clang/test/CodeGen/builtin-maxnum-minnum.c | 269 +++++++++++++++++++++ 5 files changed, 365 insertions(+) create mode 100644 clang/test/CodeGen/builtin-maxnum-minnum.c diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 72a5e495c4059..e73c436020913 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -209,6 +209,18 @@ def FmaxF16F128 : Builtin, F16F128MathTemplate { let Prototype = "T(T, T)"; } +def MinNum : Builtin { + let Spellings = ["__builtin_minnum"]; + let Attributes = [FunctionWithBuiltinPrefix, NoThrow, Const, CustomTypeChecking, Constexpr]; + let Prototype = "void(...)"; +} + +def MaxNum : Builtin { + let Spellings = ["__builtin_maxnum"]; + let Attributes = [FunctionWithBuiltinPrefix, NoThrow, Const, CustomTypeChecking, Constexpr]; + let Prototype = "void(...)"; +} + def FminF16F128 : Builtin, F16F128MathTemplate { let Spellings = ["__builtin_fmin"]; let Attributes = [FunctionWithBuiltinPrefix, NoThrow, Const, Constexpr]; @@ -1304,6 +1316,18 @@ def ElementwiseMin : Builtin { let Prototype = "void(...)"; } +def ElementwiseMaxNum : Builtin { + let Spellings = ["__builtin_elementwise_maxnum"]; + let Attributes = [NoThrow, Const, CustomTypeChecking]; + let Prototype = "void(...)"; +} + +def ElementwiseMinNum : Builtin { + let Spellings = ["__builtin_elementwise_minnum"]; + let Attributes = [NoThrow, Const, CustomTypeChecking]; + let Prototype = "void(...)"; +} + def ElementwiseMaximum : Builtin { let Spellings = ["__builtin_elementwise_maximum"]; let Attributes = [NoThrow, Const, CustomTypeChecking]; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 9561c59b1facf..77550ede0300d 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -2582,6 +2582,7 @@ class Sema final : public SemaBase { ExprResult AtomicOpsOverloaded(ExprResult TheCallResult, AtomicExpr::AtomicOp Op); + bool BuiltinMaxNumMinNumMath(CallExpr *TheCall); /// \param FPOnly restricts the arguments to floating-point types. bool BuiltinElementwiseMath(CallExpr *TheCall, EltwiseBuiltinArgTyRestriction ArgTyRestr = diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index c126f88b9e3a5..cc4c5b0345344 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -3238,6 +3238,16 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Intrinsic::minnum, Intrinsic::experimental_constrained_minnum)); + case Builtin::BI__builtin_maxnum: + return RValue::get(emitBinaryMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::maxnum, + Intrinsic::experimental_constrained_maxnum)); + + case Builtin::BI__builtin_minnum: + return RValue::get(emitBinaryMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::minnum, + Intrinsic::experimental_constrained_minnum)); + case Builtin::BIfmaximum_num: case Builtin::BIfmaximum_numf: case Builtin::BIfmaximum_numl: @@ -4429,6 +4439,22 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(Result); } + case Builtin::BI__builtin_elementwise_maxnum: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::maxnum, Op0, + Op1, nullptr, "elt.maxnum"); + return RValue::get(Result); + } + + case Builtin::BI__builtin_elementwise_minnum: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::minnum, Op0, + Op1, nullptr, "elt.minnum"); + return RValue::get(Result); + } + case Builtin::BI__builtin_elementwise_maximum: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 12a8894cc7f47..a60ad6fc037ef 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2757,8 +2757,17 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, return ExprError(); break; + case Builtin::BI__builtin_minnum: + case Builtin::BI__builtin_maxnum: { + if (BuiltinMaxNumMinNumMath(TheCall)) + return ExprError(); + break; + } + // These builtins restrict the element type to floating point // types only, and take in two arguments. + case Builtin::BI__builtin_elementwise_minnum: + case Builtin::BI__builtin_elementwise_maxnum: case Builtin::BI__builtin_elementwise_minimum: case Builtin::BI__builtin_elementwise_maximum: case Builtin::BI__builtin_elementwise_atan2: @@ -15276,6 +15285,42 @@ bool Sema::PrepareBuiltinElementwiseMathOneArgCall( return false; } +bool Sema::BuiltinMaxNumMinNumMath(CallExpr *TheCall) { + if (checkArgCount(TheCall, 2)) + return true; + + ExprResult OrigArg0 = TheCall->getArg(0); + ExprResult OrigArg1 = TheCall->getArg(1); + + // Do standard promotions between the two arguments, returning their common + // type. + QualType Res = UsualArithmeticConversions( + OrigArg0, OrigArg1, TheCall->getExprLoc(), ACK_Comparison); + if (OrigArg0.isInvalid() || OrigArg1.isInvalid()) + return true; + + // Make sure any conversions are pushed back into the call; this is + // type safe since unordered compare builtins are declared as "_Bool + // foo(...)". + TheCall->setArg(0, OrigArg0.get()); + TheCall->setArg(1, OrigArg1.get()); + + if (!OrigArg0.get()->isTypeDependent() && OrigArg1.get()->isTypeDependent()) + return true; + + // If the common type isn't a real floating type, then the arguments were + // invalid for this operation. + if (Res.isNull() || !Res->isRealFloatingType()) + return Diag(OrigArg0.get()->getBeginLoc(), + diag::err_typecheck_call_invalid_ordered_compare) + << OrigArg0.get()->getType() << OrigArg1.get()->getType() + << SourceRange(OrigArg0.get()->getBeginLoc(), + OrigArg1.get()->getEndLoc()); + + TheCall->setType(Res); + return false; +} + bool Sema::BuiltinElementwiseMath(CallExpr *TheCall, EltwiseBuiltinArgTyRestriction ArgTyRestr) { if (auto Res = BuiltinVectorMath(TheCall, ArgTyRestr); Res.has_value()) { diff --git a/clang/test/CodeGen/builtin-maxnum-minnum.c b/clang/test/CodeGen/builtin-maxnum-minnum.c new file mode 100644 index 0000000000000..3971ec644ffc5 --- /dev/null +++ b/clang/test/CodeGen/builtin-maxnum-minnum.c @@ -0,0 +1,269 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -disable-llvm-passes -O3 -triple x86_64 %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK + +typedef _Float16 half8 __attribute__((ext_vector_type(8))); +typedef float float4 __attribute__((ext_vector_type(4))); +typedef double double2 __attribute__((ext_vector_type(2))); +typedef long double ldouble2 __attribute__((ext_vector_type(2))); + +// CHECK-LABEL: define dso_local half @fmin16( +// CHECK-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// CHECK-NEXT: store half [[A]], ptr [[A_ADDR]], align 2, !tbaa [[TBAA2:![0-9]+]] +// CHECK-NEXT: store half [[B]], ptr [[B_ADDR]], align 2, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.minnum.f16(half [[TMP0]], half [[TMP1]]) +// CHECK-NEXT: ret half [[TMP2]] +// +_Float16 fmin16(_Float16 a, _Float16 b) { + return __builtin_minnum(a, b); +} +// CHECK-LABEL: define dso_local <8 x half> @pfmin16( +// CHECK-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x half>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x half>, align 16 +// CHECK-NEXT: store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6:![0-9]+]] +// CHECK-NEXT: store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <8 x half> @llvm.minnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) +// CHECK-NEXT: ret <8 x half> [[ELT_MINNUM]] +// +half8 pfmin16(half8 a, half8 b) { + return __builtin_elementwise_minnum(a, b); +} +// CHECK-LABEL: define dso_local float @fmin32( +// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca float, align 4 +// CHECK-NEXT: store float [[A]], ptr [[A_ADDR]], align 4, !tbaa [[TBAA7:![0-9]+]] +// CHECK-NEXT: store float [[B]], ptr [[B_ADDR]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[B_ADDR]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[TMP0]], float [[TMP1]]) +// CHECK-NEXT: ret float [[TMP2]] +// +float fmin32(float a, float b) { + return __builtin_minnum(a, b); +} +// CHECK-LABEL: define dso_local <4 x float> @pfmin32( +// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x float>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x float>, align 16 +// CHECK-NEXT: store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: ret <4 x float> [[ELT_MINNUM]] +// +float4 pfmin32(float4 a, float4 b) { + return __builtin_elementwise_minnum(a, b); +} +// CHECK-LABEL: define dso_local double @fmin64( +// CHECK-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 +// CHECK-NEXT: store double [[A]], ptr [[A_ADDR]], align 8, !tbaa [[TBAA9:![0-9]+]] +// CHECK-NEXT: store double [[B]], ptr [[B_ADDR]], align 8, !tbaa [[TBAA9]] +// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[A_ADDR]], align 8, !tbaa [[TBAA9]] +// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[B_ADDR]], align 8, !tbaa [[TBAA9]] +// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[TMP0]], double [[TMP1]]) +// CHECK-NEXT: ret double [[TMP2]] +// +double fmin64(double a, double b) { + return __builtin_minnum(a, b); +} +// CHECK-LABEL: define dso_local <2 x double> @pfmin64( +// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x double>, align 16 +// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: ret <2 x double> [[ELT_MINNUM]] +// +double2 pfmin64(double2 a, double2 b) { + return __builtin_elementwise_minnum(a, b); +} + +// CHECK-LABEL: define dso_local x86_fp80 @fmin80( +// CHECK-SAME: x86_fp80 noundef [[A:%.*]], x86_fp80 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca x86_fp80, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca x86_fp80, align 16 +// CHECK-NEXT: store x86_fp80 [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA11:![0-9]+]] +// CHECK-NEXT: store x86_fp80 [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP0:%.*]] = load x86_fp80, ptr [[A_ADDR]], align 16, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP1:%.*]] = load x86_fp80, ptr [[B_ADDR]], align 16, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP2:%.*]] = call x86_fp80 @llvm.minnum.f80(x86_fp80 [[TMP0]], x86_fp80 [[TMP1]]) +// CHECK-NEXT: ret x86_fp80 [[TMP2]] +// +long double fmin80(long double a, long double b) { + return __builtin_minnum(a, b); +} +// CHECK-LABEL: define dso_local <2 x x86_fp80> @pfmin80( +// CHECK-SAME: ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP0:%.*]], ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32 +// CHECK-NEXT: [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[TBAA6]] +// CHECK-NEXT: [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[TBAA6]] +// CHECK-NEXT: store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[TBAA6]] +// CHECK-NEXT: store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[TBAA6]] +// CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <2 x x86_fp80> @llvm.minnum.v2f80(<2 x x86_fp80> [[TMP2]], <2 x x86_fp80> [[TMP3]]) +// CHECK-NEXT: ret <2 x x86_fp80> [[ELT_MINNUM]] +// +ldouble2 pfmin80(ldouble2 a, ldouble2 b) { + return __builtin_elementwise_minnum(a, b); +} + +// CHECK-LABEL: define dso_local half @fmax16( +// CHECK-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// CHECK-NEXT: store half [[A]], ptr [[A_ADDR]], align 2, !tbaa [[TBAA2]] +// CHECK-NEXT: store half [[B]], ptr [[B_ADDR]], align 2, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.maxnum.f16(half [[TMP0]], half [[TMP1]]) +// CHECK-NEXT: ret half [[TMP2]] +// +_Float16 fmax16(_Float16 a, _Float16 b) { + return __builtin_maxnum(a, b); +} +// CHECK-LABEL: define dso_local <8 x half> @pfmax16( +// CHECK-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x half>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x half>, align 16 +// CHECK-NEXT: store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[ELT_MAXNUM:%.*]] = call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) +// CHECK-NEXT: ret <8 x half> [[ELT_MAXNUM]] +// +half8 pfmax16(half8 a, half8 b) { + return __builtin_elementwise_maxnum(a, b); +} +// CHECK-LABEL: define dso_local float @fmax32( +// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca float, align 4 +// CHECK-NEXT: store float [[A]], ptr [[A_ADDR]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store float [[B]], ptr [[B_ADDR]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[B_ADDR]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[TMP0]], float [[TMP1]]) +// CHECK-NEXT: ret float [[TMP2]] +// +float fmax32(float a, float b) { + return __builtin_maxnum(a, b); +} +// CHECK-LABEL: define dso_local <4 x float> @pfmax32( +// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x float>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x float>, align 16 +// CHECK-NEXT: store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[ELT_MAXNUM:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: ret <4 x float> [[ELT_MAXNUM]] +// +float4 pfmax32(float4 a, float4 b) { + return __builtin_elementwise_maxnum(a, b); +} +// CHECK-LABEL: define dso_local double @fmax64( +// CHECK-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 +// CHECK-NEXT: store double [[A]], ptr [[A_ADDR]], align 8, !tbaa [[TBAA9]] +// CHECK-NEXT: store double [[B]], ptr [[B_ADDR]], align 8, !tbaa [[TBAA9]] +// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[A_ADDR]], align 8, !tbaa [[TBAA9]] +// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[B_ADDR]], align 8, !tbaa [[TBAA9]] +// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[TMP0]], double [[TMP1]]) +// CHECK-NEXT: ret double [[TMP2]] +// +double fmax64(double a, double b) { + return __builtin_maxnum(a, b); +} +// CHECK-LABEL: define dso_local <2 x double> @pfmax64( +// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x double>, align 16 +// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[ELT_MAXNUM:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: ret <2 x double> [[ELT_MAXNUM]] +// +double2 pfmax64(double2 a, double2 b) { + return __builtin_elementwise_maxnum(a, b); +} + +// CHECK-LABEL: define dso_local x86_fp80 @fmax80( +// CHECK-SAME: x86_fp80 noundef [[A:%.*]], x86_fp80 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca x86_fp80, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca x86_fp80, align 16 +// CHECK-NEXT: store x86_fp80 [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA11]] +// CHECK-NEXT: store x86_fp80 [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP0:%.*]] = load x86_fp80, ptr [[A_ADDR]], align 16, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP1:%.*]] = load x86_fp80, ptr [[B_ADDR]], align 16, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP2:%.*]] = call x86_fp80 @llvm.maxnum.f80(x86_fp80 [[TMP0]], x86_fp80 [[TMP1]]) +// CHECK-NEXT: ret x86_fp80 [[TMP2]] +// +long double fmax80(long double a, long double b) { + return __builtin_maxnum(a, b); +} +// CHECK-LABEL: define dso_local <2 x x86_fp80> @pfmax80( +// CHECK-SAME: ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP0:%.*]], ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32 +// CHECK-NEXT: [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[TBAA6]] +// CHECK-NEXT: [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[TBAA6]] +// CHECK-NEXT: store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[TBAA6]] +// CHECK-NEXT: store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[TBAA6]] +// CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <2 x x86_fp80> @llvm.minnum.v2f80(<2 x x86_fp80> [[TMP2]], <2 x x86_fp80> [[TMP3]]) +// CHECK-NEXT: ret <2 x x86_fp80> [[ELT_MINNUM]] +// +ldouble2 pfmax80(ldouble2 a, ldouble2 b) { + return __builtin_elementwise_minnum(a, b); +} + +//. +// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"_Float16", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[TBAA6]] = !{[[META4]], [[META4]], i64 0} +// CHECK: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} +// CHECK: [[META8]] = !{!"float", [[META4]], i64 0} +// CHECK: [[TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0} +// CHECK: [[META10]] = !{!"double", [[META4]], i64 0} +// CHECK: [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0} +// CHECK: [[META12]] = !{!"long double", [[META4]], i64 0} +//. >From 6d49b74a5ab6f569ea9fba4ae5b6278ed1668145 Mon Sep 17 00:00:00 2001 From: YunQiang Su <s...@debian.org> Date: Fri, 28 Feb 2025 17:19:32 +0800 Subject: [PATCH 2/6] Add bfloat cases --- clang/test/CodeGen/builtin-maxnum-minnum.c | 154 +++++++++++++++------ 1 file changed, 109 insertions(+), 45 deletions(-) diff --git a/clang/test/CodeGen/builtin-maxnum-minnum.c b/clang/test/CodeGen/builtin-maxnum-minnum.c index 3971ec644ffc5..35bfd34ae4711 100644 --- a/clang/test/CodeGen/builtin-maxnum-minnum.c +++ b/clang/test/CodeGen/builtin-maxnum-minnum.c @@ -1,12 +1,13 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 -// RUN: %clang_cc1 -disable-llvm-passes -O3 -triple x86_64 %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK +// RUN: %clang_cc1 -x c++ -std=c++20 -disable-llvm-passes -O3 -triple x86_64 %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK typedef _Float16 half8 __attribute__((ext_vector_type(8))); +typedef __bf16 bf16x8 __attribute__((ext_vector_type(8))); typedef float float4 __attribute__((ext_vector_type(4))); typedef double double2 __attribute__((ext_vector_type(2))); typedef long double ldouble2 __attribute__((ext_vector_type(2))); -// CHECK-LABEL: define dso_local half @fmin16( +// CHECK-LABEL: define dso_local noundef half @_Z6fmin16DF16_DF16_( // CHECK-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 @@ -21,7 +22,7 @@ typedef long double ldouble2 __attribute__((ext_vector_type(2))); _Float16 fmin16(_Float16 a, _Float16 b) { return __builtin_minnum(a, b); } -// CHECK-LABEL: define dso_local <8 x half> @pfmin16( +// CHECK-LABEL: define dso_local noundef <8 x half> @_Z7pfmin16Dv8_DF16_S_( // CHECK-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x half>, align 16 @@ -36,22 +37,53 @@ _Float16 fmin16(_Float16 a, _Float16 b) { half8 pfmin16(half8 a, half8 b) { return __builtin_elementwise_minnum(a, b); } -// CHECK-LABEL: define dso_local float @fmin32( + +// CHECK-LABEL: define dso_local noundef bfloat @_Z7fmin16bDF16bDF16b( +// CHECK-SAME: bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2, !tbaa [[TBAA7:![0-9]+]] +// CHECK-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP2:%.*]] = call bfloat @llvm.minnum.bf16(bfloat [[TMP0]], bfloat [[TMP1]]) +// CHECK-NEXT: ret bfloat [[TMP2]] +// +__bf16 fmin16b(__bf16 a, __bf16 b) { + return __builtin_minnum(a, b); +} +// CHECK-LABEL: define dso_local noundef <8 x bfloat> @_Z8pfmin16bDv8_DF16bS_( +// CHECK-SAME: <8 x bfloat> noundef [[A:%.*]], <8 x bfloat> noundef [[B:%.*]]) #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x bfloat>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x bfloat>, align 16 +// CHECK-NEXT: store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <8 x bfloat> @llvm.minnum.v8bf16(<8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]]) +// CHECK-NEXT: ret <8 x bfloat> [[ELT_MINNUM]] +// +bf16x8 pfmin16b(bf16x8 a, bf16x8 b) { + return __builtin_elementwise_minnum(a, b); +} +// CHECK-LABEL: define dso_local noundef float @_Z6fmin32ff( // CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca float, align 4 -// CHECK-NEXT: store float [[A]], ptr [[A_ADDR]], align 4, !tbaa [[TBAA7:![0-9]+]] -// CHECK-NEXT: store float [[B]], ptr [[B_ADDR]], align 4, !tbaa [[TBAA7]] -// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4, !tbaa [[TBAA7]] -// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[B_ADDR]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store float [[A]], ptr [[A_ADDR]], align 4, !tbaa [[TBAA9:![0-9]+]] +// CHECK-NEXT: store float [[B]], ptr [[B_ADDR]], align 4, !tbaa [[TBAA9]] +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4, !tbaa [[TBAA9]] +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[B_ADDR]], align 4, !tbaa [[TBAA9]] // CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[TMP0]], float [[TMP1]]) // CHECK-NEXT: ret float [[TMP2]] // float fmin32(float a, float b) { return __builtin_minnum(a, b); } -// CHECK-LABEL: define dso_local <4 x float> @pfmin32( +// CHECK-LABEL: define dso_local noundef <4 x float> @_Z7pfmin32Dv4_fS_( // CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x float>, align 16 @@ -66,22 +98,22 @@ float fmin32(float a, float b) { float4 pfmin32(float4 a, float4 b) { return __builtin_elementwise_minnum(a, b); } -// CHECK-LABEL: define dso_local double @fmin64( +// CHECK-LABEL: define dso_local noundef double @_Z6fmin64dd( // CHECK-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 -// CHECK-NEXT: store double [[A]], ptr [[A_ADDR]], align 8, !tbaa [[TBAA9:![0-9]+]] -// CHECK-NEXT: store double [[B]], ptr [[B_ADDR]], align 8, !tbaa [[TBAA9]] -// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[A_ADDR]], align 8, !tbaa [[TBAA9]] -// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[B_ADDR]], align 8, !tbaa [[TBAA9]] +// CHECK-NEXT: store double [[A]], ptr [[A_ADDR]], align 8, !tbaa [[TBAA11:![0-9]+]] +// CHECK-NEXT: store double [[B]], ptr [[B_ADDR]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[A_ADDR]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[B_ADDR]], align 8, !tbaa [[TBAA11]] // CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[TMP0]], double [[TMP1]]) // CHECK-NEXT: ret double [[TMP2]] // double fmin64(double a, double b) { return __builtin_minnum(a, b); } -// CHECK-LABEL: define dso_local <2 x double> @pfmin64( +// CHECK-LABEL: define dso_local noundef <2 x double> @_Z7pfmin64Dv2_dS_( // CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16 @@ -97,22 +129,22 @@ double2 pfmin64(double2 a, double2 b) { return __builtin_elementwise_minnum(a, b); } -// CHECK-LABEL: define dso_local x86_fp80 @fmin80( +// CHECK-LABEL: define dso_local noundef x86_fp80 @_Z6fmin80ee( // CHECK-SAME: x86_fp80 noundef [[A:%.*]], x86_fp80 noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca x86_fp80, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca x86_fp80, align 16 -// CHECK-NEXT: store x86_fp80 [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA11:![0-9]+]] -// CHECK-NEXT: store x86_fp80 [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA11]] -// CHECK-NEXT: [[TMP0:%.*]] = load x86_fp80, ptr [[A_ADDR]], align 16, !tbaa [[TBAA11]] -// CHECK-NEXT: [[TMP1:%.*]] = load x86_fp80, ptr [[B_ADDR]], align 16, !tbaa [[TBAA11]] +// CHECK-NEXT: store x86_fp80 [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA13:![0-9]+]] +// CHECK-NEXT: store x86_fp80 [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA13]] +// CHECK-NEXT: [[TMP0:%.*]] = load x86_fp80, ptr [[A_ADDR]], align 16, !tbaa [[TBAA13]] +// CHECK-NEXT: [[TMP1:%.*]] = load x86_fp80, ptr [[B_ADDR]], align 16, !tbaa [[TBAA13]] // CHECK-NEXT: [[TMP2:%.*]] = call x86_fp80 @llvm.minnum.f80(x86_fp80 [[TMP0]], x86_fp80 [[TMP1]]) // CHECK-NEXT: ret x86_fp80 [[TMP2]] // long double fmin80(long double a, long double b) { return __builtin_minnum(a, b); } -// CHECK-LABEL: define dso_local <2 x x86_fp80> @pfmin80( +// CHECK-LABEL: define dso_local noundef <2 x x86_fp80> @_Z7pfmin80Dv2_eS_( // CHECK-SAME: ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP0:%.*]], ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32 @@ -130,7 +162,7 @@ ldouble2 pfmin80(ldouble2 a, ldouble2 b) { return __builtin_elementwise_minnum(a, b); } -// CHECK-LABEL: define dso_local half @fmax16( +// CHECK-LABEL: define dso_local noundef half @_Z6fmax16DF16_DF16_( // CHECK-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 @@ -145,7 +177,7 @@ ldouble2 pfmin80(ldouble2 a, ldouble2 b) { _Float16 fmax16(_Float16 a, _Float16 b) { return __builtin_maxnum(a, b); } -// CHECK-LABEL: define dso_local <8 x half> @pfmax16( +// CHECK-LABEL: define dso_local noundef <8 x half> @_Z7pfmax16Dv8_DF16_S_( // CHECK-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x half>, align 16 @@ -160,22 +192,52 @@ _Float16 fmax16(_Float16 a, _Float16 b) { half8 pfmax16(half8 a, half8 b) { return __builtin_elementwise_maxnum(a, b); } -// CHECK-LABEL: define dso_local float @fmax32( +// CHECK-LABEL: define dso_local noundef bfloat @_Z7fmax16bDF16bDF16b( +// CHECK-SAME: bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2, !tbaa [[TBAA7]] +// CHECK-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP2:%.*]] = call bfloat @llvm.maxnum.bf16(bfloat [[TMP0]], bfloat [[TMP1]]) +// CHECK-NEXT: ret bfloat [[TMP2]] +// +__bf16 fmax16b(__bf16 a, __bf16 b) { + return __builtin_maxnum(a, b); +} +// CHECK-LABEL: define dso_local noundef <8 x bfloat> @_Z8pfmax16bDv8_DF16bS_( +// CHECK-SAME: <8 x bfloat> noundef [[A:%.*]], <8 x bfloat> noundef [[B:%.*]]) #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x bfloat>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x bfloat>, align 16 +// CHECK-NEXT: store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[ELT_MAXNUM:%.*]] = call <8 x bfloat> @llvm.maxnum.v8bf16(<8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]]) +// CHECK-NEXT: ret <8 x bfloat> [[ELT_MAXNUM]] +// +bf16x8 pfmax16b(bf16x8 a, bf16x8 b) { + return __builtin_elementwise_maxnum(a, b); +} +// CHECK-LABEL: define dso_local noundef float @_Z6fmax32ff( // CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca float, align 4 -// CHECK-NEXT: store float [[A]], ptr [[A_ADDR]], align 4, !tbaa [[TBAA7]] -// CHECK-NEXT: store float [[B]], ptr [[B_ADDR]], align 4, !tbaa [[TBAA7]] -// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4, !tbaa [[TBAA7]] -// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[B_ADDR]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store float [[A]], ptr [[A_ADDR]], align 4, !tbaa [[TBAA9]] +// CHECK-NEXT: store float [[B]], ptr [[B_ADDR]], align 4, !tbaa [[TBAA9]] +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4, !tbaa [[TBAA9]] +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[B_ADDR]], align 4, !tbaa [[TBAA9]] // CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[TMP0]], float [[TMP1]]) // CHECK-NEXT: ret float [[TMP2]] // float fmax32(float a, float b) { return __builtin_maxnum(a, b); } -// CHECK-LABEL: define dso_local <4 x float> @pfmax32( +// CHECK-LABEL: define dso_local noundef <4 x float> @_Z7pfmax32Dv4_fS_( // CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x float>, align 16 @@ -190,22 +252,22 @@ float fmax32(float a, float b) { float4 pfmax32(float4 a, float4 b) { return __builtin_elementwise_maxnum(a, b); } -// CHECK-LABEL: define dso_local double @fmax64( +// CHECK-LABEL: define dso_local noundef double @_Z6fmax64dd( // CHECK-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 -// CHECK-NEXT: store double [[A]], ptr [[A_ADDR]], align 8, !tbaa [[TBAA9]] -// CHECK-NEXT: store double [[B]], ptr [[B_ADDR]], align 8, !tbaa [[TBAA9]] -// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[A_ADDR]], align 8, !tbaa [[TBAA9]] -// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[B_ADDR]], align 8, !tbaa [[TBAA9]] +// CHECK-NEXT: store double [[A]], ptr [[A_ADDR]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: store double [[B]], ptr [[B_ADDR]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[A_ADDR]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[B_ADDR]], align 8, !tbaa [[TBAA11]] // CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[TMP0]], double [[TMP1]]) // CHECK-NEXT: ret double [[TMP2]] // double fmax64(double a, double b) { return __builtin_maxnum(a, b); } -// CHECK-LABEL: define dso_local <2 x double> @pfmax64( +// CHECK-LABEL: define dso_local noundef <2 x double> @_Z7pfmax64Dv2_dS_( // CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16 @@ -221,22 +283,22 @@ double2 pfmax64(double2 a, double2 b) { return __builtin_elementwise_maxnum(a, b); } -// CHECK-LABEL: define dso_local x86_fp80 @fmax80( +// CHECK-LABEL: define dso_local noundef x86_fp80 @_Z6fmax80ee( // CHECK-SAME: x86_fp80 noundef [[A:%.*]], x86_fp80 noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca x86_fp80, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca x86_fp80, align 16 -// CHECK-NEXT: store x86_fp80 [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA11]] -// CHECK-NEXT: store x86_fp80 [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA11]] -// CHECK-NEXT: [[TMP0:%.*]] = load x86_fp80, ptr [[A_ADDR]], align 16, !tbaa [[TBAA11]] -// CHECK-NEXT: [[TMP1:%.*]] = load x86_fp80, ptr [[B_ADDR]], align 16, !tbaa [[TBAA11]] +// CHECK-NEXT: store x86_fp80 [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA13]] +// CHECK-NEXT: store x86_fp80 [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA13]] +// CHECK-NEXT: [[TMP0:%.*]] = load x86_fp80, ptr [[A_ADDR]], align 16, !tbaa [[TBAA13]] +// CHECK-NEXT: [[TMP1:%.*]] = load x86_fp80, ptr [[B_ADDR]], align 16, !tbaa [[TBAA13]] // CHECK-NEXT: [[TMP2:%.*]] = call x86_fp80 @llvm.maxnum.f80(x86_fp80 [[TMP0]], x86_fp80 [[TMP1]]) // CHECK-NEXT: ret x86_fp80 [[TMP2]] // long double fmax80(long double a, long double b) { return __builtin_maxnum(a, b); } -// CHECK-LABEL: define dso_local <2 x x86_fp80> @pfmax80( +// CHECK-LABEL: define dso_local noundef <2 x x86_fp80> @_Z7pfmax80Dv2_eS_( // CHECK-SAME: ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP0:%.*]], ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP1:%.*]]) #[[ATTR3]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32 @@ -258,12 +320,14 @@ ldouble2 pfmax80(ldouble2 a, ldouble2 b) { // CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} // CHECK: [[META3]] = !{!"_Float16", [[META4:![0-9]+]], i64 0} // CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} -// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[META5]] = !{!"Simple C++ TBAA"} // CHECK: [[TBAA6]] = !{[[META4]], [[META4]], i64 0} // CHECK: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} -// CHECK: [[META8]] = !{!"float", [[META4]], i64 0} +// CHECK: [[META8]] = !{!"__bf16", [[META4]], i64 0} // CHECK: [[TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0} -// CHECK: [[META10]] = !{!"double", [[META4]], i64 0} +// CHECK: [[META10]] = !{!"float", [[META4]], i64 0} // CHECK: [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0} -// CHECK: [[META12]] = !{!"long double", [[META4]], i64 0} +// CHECK: [[META12]] = !{!"double", [[META4]], i64 0} +// CHECK: [[TBAA13]] = !{[[META14:![0-9]+]], [[META14]], i64 0} +// CHECK: [[META14]] = !{!"long double", [[META4]], i64 0} //. >From 967bba4e58b77c61af06c2c28d81a2de04283367 Mon Sep 17 00:00:00 2001 From: YunQiang Su <yunqi...@isrc.iscas.ac.cn> Date: Tue, 4 Mar 2025 11:24:42 +0800 Subject: [PATCH 3/6] remove non-elementwise --- clang/include/clang/Basic/Builtins.td | 12 - clang/include/clang/Sema/Sema.h | 1 - clang/lib/CodeGen/CGBuiltin.cpp | 10 - clang/lib/Sema/SemaChecking.cpp | 7 - clang/test/CodeGen/builtin-maxnum-minnum.c | 274 +++++---------------- 5 files changed, 56 insertions(+), 248 deletions(-) diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index e73c436020913..192af5602f53a 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -209,18 +209,6 @@ def FmaxF16F128 : Builtin, F16F128MathTemplate { let Prototype = "T(T, T)"; } -def MinNum : Builtin { - let Spellings = ["__builtin_minnum"]; - let Attributes = [FunctionWithBuiltinPrefix, NoThrow, Const, CustomTypeChecking, Constexpr]; - let Prototype = "void(...)"; -} - -def MaxNum : Builtin { - let Spellings = ["__builtin_maxnum"]; - let Attributes = [FunctionWithBuiltinPrefix, NoThrow, Const, CustomTypeChecking, Constexpr]; - let Prototype = "void(...)"; -} - def FminF16F128 : Builtin, F16F128MathTemplate { let Spellings = ["__builtin_fmin"]; let Attributes = [FunctionWithBuiltinPrefix, NoThrow, Const, Constexpr]; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 77550ede0300d..9561c59b1facf 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -2582,7 +2582,6 @@ class Sema final : public SemaBase { ExprResult AtomicOpsOverloaded(ExprResult TheCallResult, AtomicExpr::AtomicOp Op); - bool BuiltinMaxNumMinNumMath(CallExpr *TheCall); /// \param FPOnly restricts the arguments to floating-point types. bool BuiltinElementwiseMath(CallExpr *TheCall, EltwiseBuiltinArgTyRestriction ArgTyRestr = diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index cc4c5b0345344..ba4d905816e75 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -3238,16 +3238,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Intrinsic::minnum, Intrinsic::experimental_constrained_minnum)); - case Builtin::BI__builtin_maxnum: - return RValue::get(emitBinaryMaybeConstrainedFPBuiltin( - *this, E, Intrinsic::maxnum, - Intrinsic::experimental_constrained_maxnum)); - - case Builtin::BI__builtin_minnum: - return RValue::get(emitBinaryMaybeConstrainedFPBuiltin( - *this, E, Intrinsic::minnum, - Intrinsic::experimental_constrained_minnum)); - case Builtin::BIfmaximum_num: case Builtin::BIfmaximum_numf: case Builtin::BIfmaximum_numl: diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index a60ad6fc037ef..f0b027083caa3 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2757,13 +2757,6 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, return ExprError(); break; - case Builtin::BI__builtin_minnum: - case Builtin::BI__builtin_maxnum: { - if (BuiltinMaxNumMinNumMath(TheCall)) - return ExprError(); - break; - } - // These builtins restrict the element type to floating point // types only, and take in two arguments. case Builtin::BI__builtin_elementwise_minnum: diff --git a/clang/test/CodeGen/builtin-maxnum-minnum.c b/clang/test/CodeGen/builtin-maxnum-minnum.c index 35bfd34ae4711..69cec72495d30 100644 --- a/clang/test/CodeGen/builtin-maxnum-minnum.c +++ b/clang/test/CodeGen/builtin-maxnum-minnum.c @@ -7,154 +7,77 @@ typedef float float4 __attribute__((ext_vector_type(4))); typedef double double2 __attribute__((ext_vector_type(2))); typedef long double ldouble2 __attribute__((ext_vector_type(2))); -// CHECK-LABEL: define dso_local noundef half @_Z6fmin16DF16_DF16_( -// CHECK-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 -// CHECK-NEXT: store half [[A]], ptr [[A_ADDR]], align 2, !tbaa [[TBAA2:![0-9]+]] -// CHECK-NEXT: store half [[B]], ptr [[B_ADDR]], align 2, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.minnum.f16(half [[TMP0]], half [[TMP1]]) -// CHECK-NEXT: ret half [[TMP2]] -// -_Float16 fmin16(_Float16 a, _Float16 b) { - return __builtin_minnum(a, b); -} // CHECK-LABEL: define dso_local noundef <8 x half> @_Z7pfmin16Dv8_DF16_S_( -// CHECK-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x half>, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x half>, align 16 -// CHECK-NEXT: store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6:![0-9]+]] -// CHECK-NEXT: store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2:![0-9]+]] +// CHECK-NEXT: store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] // CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <8 x half> @llvm.minnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) // CHECK-NEXT: ret <8 x half> [[ELT_MINNUM]] // half8 pfmin16(half8 a, half8 b) { return __builtin_elementwise_minnum(a, b); } - -// CHECK-LABEL: define dso_local noundef bfloat @_Z7fmin16bDF16bDF16b( -// CHECK-SAME: bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 -// CHECK-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2, !tbaa [[TBAA7:![0-9]+]] -// CHECK-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2, !tbaa [[TBAA7]] -// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2, !tbaa [[TBAA7]] -// CHECK-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2, !tbaa [[TBAA7]] -// CHECK-NEXT: [[TMP2:%.*]] = call bfloat @llvm.minnum.bf16(bfloat [[TMP0]], bfloat [[TMP1]]) -// CHECK-NEXT: ret bfloat [[TMP2]] -// -__bf16 fmin16b(__bf16 a, __bf16 b) { - return __builtin_minnum(a, b); -} // CHECK-LABEL: define dso_local noundef <8 x bfloat> @_Z8pfmin16bDv8_DF16bS_( -// CHECK-SAME: <8 x bfloat> noundef [[A:%.*]], <8 x bfloat> noundef [[B:%.*]]) #[[ATTR2]] { +// CHECK-SAME: <8 x bfloat> noundef [[A:%.*]], <8 x bfloat> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x bfloat>, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x bfloat>, align 16 -// CHECK-NEXT: store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] -// CHECK-NEXT: store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] // CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <8 x bfloat> @llvm.minnum.v8bf16(<8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]]) // CHECK-NEXT: ret <8 x bfloat> [[ELT_MINNUM]] // bf16x8 pfmin16b(bf16x8 a, bf16x8 b) { return __builtin_elementwise_minnum(a, b); } -// CHECK-LABEL: define dso_local noundef float @_Z6fmin32ff( -// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca float, align 4 -// CHECK-NEXT: store float [[A]], ptr [[A_ADDR]], align 4, !tbaa [[TBAA9:![0-9]+]] -// CHECK-NEXT: store float [[B]], ptr [[B_ADDR]], align 4, !tbaa [[TBAA9]] -// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4, !tbaa [[TBAA9]] -// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[B_ADDR]], align 4, !tbaa [[TBAA9]] -// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[TMP0]], float [[TMP1]]) -// CHECK-NEXT: ret float [[TMP2]] -// -float fmin32(float a, float b) { - return __builtin_minnum(a, b); -} // CHECK-LABEL: define dso_local noundef <4 x float> @_Z7pfmin32Dv4_fS_( -// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR2]] { +// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x float>, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] -// CHECK-NEXT: store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] // CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) // CHECK-NEXT: ret <4 x float> [[ELT_MINNUM]] // float4 pfmin32(float4 a, float4 b) { return __builtin_elementwise_minnum(a, b); } -// CHECK-LABEL: define dso_local noundef double @_Z6fmin64dd( -// CHECK-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 -// CHECK-NEXT: store double [[A]], ptr [[A_ADDR]], align 8, !tbaa [[TBAA11:![0-9]+]] -// CHECK-NEXT: store double [[B]], ptr [[B_ADDR]], align 8, !tbaa [[TBAA11]] -// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[A_ADDR]], align 8, !tbaa [[TBAA11]] -// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[B_ADDR]], align 8, !tbaa [[TBAA11]] -// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[TMP0]], double [[TMP1]]) -// CHECK-NEXT: ret double [[TMP2]] -// -double fmin64(double a, double b) { - return __builtin_minnum(a, b); -} // CHECK-LABEL: define dso_local noundef <2 x double> @_Z7pfmin64Dv2_dS_( -// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR2]] { +// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] -// CHECK-NEXT: store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] // CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) // CHECK-NEXT: ret <2 x double> [[ELT_MINNUM]] // double2 pfmin64(double2 a, double2 b) { return __builtin_elementwise_minnum(a, b); } - -// CHECK-LABEL: define dso_local noundef x86_fp80 @_Z6fmin80ee( -// CHECK-SAME: x86_fp80 noundef [[A:%.*]], x86_fp80 noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca x86_fp80, align 16 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca x86_fp80, align 16 -// CHECK-NEXT: store x86_fp80 [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA13:![0-9]+]] -// CHECK-NEXT: store x86_fp80 [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA13]] -// CHECK-NEXT: [[TMP0:%.*]] = load x86_fp80, ptr [[A_ADDR]], align 16, !tbaa [[TBAA13]] -// CHECK-NEXT: [[TMP1:%.*]] = load x86_fp80, ptr [[B_ADDR]], align 16, !tbaa [[TBAA13]] -// CHECK-NEXT: [[TMP2:%.*]] = call x86_fp80 @llvm.minnum.f80(x86_fp80 [[TMP0]], x86_fp80 [[TMP1]]) -// CHECK-NEXT: ret x86_fp80 [[TMP2]] -// -long double fmin80(long double a, long double b) { - return __builtin_minnum(a, b); -} // CHECK-LABEL: define dso_local noundef <2 x x86_fp80> @_Z7pfmin80Dv2_eS_( -// CHECK-SAME: ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP0:%.*]], ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK-SAME: ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP0:%.*]], ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32 -// CHECK-NEXT: [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[TBAA6]] -// CHECK-NEXT: [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[TBAA6]] -// CHECK-NEXT: store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[TBAA6]] -// CHECK-NEXT: store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[TBAA6]] +// CHECK-NEXT: [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[TBAA2]] // CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <2 x x86_fp80> @llvm.minnum.v2f80(<2 x x86_fp80> [[TMP2]], <2 x x86_fp80> [[TMP3]]) // CHECK-NEXT: ret <2 x x86_fp80> [[ELT_MINNUM]] // @@ -162,120 +85,60 @@ ldouble2 pfmin80(ldouble2 a, ldouble2 b) { return __builtin_elementwise_minnum(a, b); } -// CHECK-LABEL: define dso_local noundef half @_Z6fmax16DF16_DF16_( -// CHECK-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 -// CHECK-NEXT: store half [[A]], ptr [[A_ADDR]], align 2, !tbaa [[TBAA2]] -// CHECK-NEXT: store half [[B]], ptr [[B_ADDR]], align 2, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2, !tbaa [[TBAA2]] -// CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.maxnum.f16(half [[TMP0]], half [[TMP1]]) -// CHECK-NEXT: ret half [[TMP2]] -// -_Float16 fmax16(_Float16 a, _Float16 b) { - return __builtin_maxnum(a, b); -} // CHECK-LABEL: define dso_local noundef <8 x half> @_Z7pfmax16Dv8_DF16_S_( -// CHECK-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR2]] { +// CHECK-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x half>, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x half>, align 16 -// CHECK-NEXT: store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] -// CHECK-NEXT: store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] // CHECK-NEXT: [[ELT_MAXNUM:%.*]] = call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) // CHECK-NEXT: ret <8 x half> [[ELT_MAXNUM]] // half8 pfmax16(half8 a, half8 b) { return __builtin_elementwise_maxnum(a, b); } -// CHECK-LABEL: define dso_local noundef bfloat @_Z7fmax16bDF16bDF16b( -// CHECK-SAME: bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 -// CHECK-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2, !tbaa [[TBAA7]] -// CHECK-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2, !tbaa [[TBAA7]] -// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2, !tbaa [[TBAA7]] -// CHECK-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2, !tbaa [[TBAA7]] -// CHECK-NEXT: [[TMP2:%.*]] = call bfloat @llvm.maxnum.bf16(bfloat [[TMP0]], bfloat [[TMP1]]) -// CHECK-NEXT: ret bfloat [[TMP2]] -// -__bf16 fmax16b(__bf16 a, __bf16 b) { - return __builtin_maxnum(a, b); -} // CHECK-LABEL: define dso_local noundef <8 x bfloat> @_Z8pfmax16bDv8_DF16bS_( -// CHECK-SAME: <8 x bfloat> noundef [[A:%.*]], <8 x bfloat> noundef [[B:%.*]]) #[[ATTR2]] { +// CHECK-SAME: <8 x bfloat> noundef [[A:%.*]], <8 x bfloat> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x bfloat>, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x bfloat>, align 16 -// CHECK-NEXT: store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] -// CHECK-NEXT: store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] // CHECK-NEXT: [[ELT_MAXNUM:%.*]] = call <8 x bfloat> @llvm.maxnum.v8bf16(<8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]]) // CHECK-NEXT: ret <8 x bfloat> [[ELT_MAXNUM]] // bf16x8 pfmax16b(bf16x8 a, bf16x8 b) { return __builtin_elementwise_maxnum(a, b); } -// CHECK-LABEL: define dso_local noundef float @_Z6fmax32ff( -// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca float, align 4 -// CHECK-NEXT: store float [[A]], ptr [[A_ADDR]], align 4, !tbaa [[TBAA9]] -// CHECK-NEXT: store float [[B]], ptr [[B_ADDR]], align 4, !tbaa [[TBAA9]] -// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4, !tbaa [[TBAA9]] -// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[B_ADDR]], align 4, !tbaa [[TBAA9]] -// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[TMP0]], float [[TMP1]]) -// CHECK-NEXT: ret float [[TMP2]] -// -float fmax32(float a, float b) { - return __builtin_maxnum(a, b); -} // CHECK-LABEL: define dso_local noundef <4 x float> @_Z7pfmax32Dv4_fS_( -// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR2]] { +// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x float>, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] -// CHECK-NEXT: store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] // CHECK-NEXT: [[ELT_MAXNUM:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) // CHECK-NEXT: ret <4 x float> [[ELT_MAXNUM]] // float4 pfmax32(float4 a, float4 b) { return __builtin_elementwise_maxnum(a, b); } -// CHECK-LABEL: define dso_local noundef double @_Z6fmax64dd( -// CHECK-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 -// CHECK-NEXT: store double [[A]], ptr [[A_ADDR]], align 8, !tbaa [[TBAA11]] -// CHECK-NEXT: store double [[B]], ptr [[B_ADDR]], align 8, !tbaa [[TBAA11]] -// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[A_ADDR]], align 8, !tbaa [[TBAA11]] -// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[B_ADDR]], align 8, !tbaa [[TBAA11]] -// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[TMP0]], double [[TMP1]]) -// CHECK-NEXT: ret double [[TMP2]] -// -double fmax64(double a, double b) { - return __builtin_maxnum(a, b); -} // CHECK-LABEL: define dso_local noundef <2 x double> @_Z7pfmax64Dv2_dS_( -// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR2]] { +// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] -// CHECK-NEXT: store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] // CHECK-NEXT: [[ELT_MAXNUM:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) // CHECK-NEXT: ret <2 x double> [[ELT_MAXNUM]] // @@ -283,32 +146,17 @@ double2 pfmax64(double2 a, double2 b) { return __builtin_elementwise_maxnum(a, b); } -// CHECK-LABEL: define dso_local noundef x86_fp80 @_Z6fmax80ee( -// CHECK-SAME: x86_fp80 noundef [[A:%.*]], x86_fp80 noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca x86_fp80, align 16 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca x86_fp80, align 16 -// CHECK-NEXT: store x86_fp80 [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA13]] -// CHECK-NEXT: store x86_fp80 [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA13]] -// CHECK-NEXT: [[TMP0:%.*]] = load x86_fp80, ptr [[A_ADDR]], align 16, !tbaa [[TBAA13]] -// CHECK-NEXT: [[TMP1:%.*]] = load x86_fp80, ptr [[B_ADDR]], align 16, !tbaa [[TBAA13]] -// CHECK-NEXT: [[TMP2:%.*]] = call x86_fp80 @llvm.maxnum.f80(x86_fp80 [[TMP0]], x86_fp80 [[TMP1]]) -// CHECK-NEXT: ret x86_fp80 [[TMP2]] -// -long double fmax80(long double a, long double b) { - return __builtin_maxnum(a, b); -} // CHECK-LABEL: define dso_local noundef <2 x x86_fp80> @_Z7pfmax80Dv2_eS_( -// CHECK-SAME: ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP0:%.*]], ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK-SAME: ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP0:%.*]], ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP1:%.*]]) #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32 -// CHECK-NEXT: [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[TBAA6]] -// CHECK-NEXT: [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[TBAA6]] -// CHECK-NEXT: store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[TBAA6]] -// CHECK-NEXT: store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[TBAA6]] +// CHECK-NEXT: [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[TBAA2]] // CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <2 x x86_fp80> @llvm.minnum.v2f80(<2 x x86_fp80> [[TMP2]], <2 x x86_fp80> [[TMP3]]) // CHECK-NEXT: ret <2 x x86_fp80> [[ELT_MINNUM]] // @@ -318,16 +166,6 @@ ldouble2 pfmax80(ldouble2 a, ldouble2 b) { //. // CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} -// CHECK: [[META3]] = !{!"_Float16", [[META4:![0-9]+]], i64 0} -// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} -// CHECK: [[META5]] = !{!"Simple C++ TBAA"} -// CHECK: [[TBAA6]] = !{[[META4]], [[META4]], i64 0} -// CHECK: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} -// CHECK: [[META8]] = !{!"__bf16", [[META4]], i64 0} -// CHECK: [[TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0} -// CHECK: [[META10]] = !{!"float", [[META4]], i64 0} -// CHECK: [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0} -// CHECK: [[META12]] = !{!"double", [[META4]], i64 0} -// CHECK: [[TBAA13]] = !{[[META14:![0-9]+]], [[META14]], i64 0} -// CHECK: [[META14]] = !{!"long double", [[META4]], i64 0} +// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"Simple C++ TBAA"} //. >From f987781b8d7615de8b64b778a89c9ea1f85fba0b Mon Sep 17 00:00:00 2001 From: YunQiang Su <yunqi...@isrc.iscas.ac.cn> Date: Wed, 19 Mar 2025 13:53:44 +0800 Subject: [PATCH 4/6] Add docs --- clang/docs/LanguageExtensions.rst | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index d4771775c9739..8ecbdc492558a 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -818,7 +818,23 @@ of different sizes and signs is forbidden in binary and ternary builtins. T __builtin_elementwise_fmod(T x, T y) return The floating-point remainder of (x/y) whose sign floating point types matches the sign of x. T __builtin_elementwise_max(T x, T y) return x or y, whichever is larger integer and floating point types + For floating point types, follows semantics of maxNum + in IEEE 754-2008. See `LangRef + <http://llvm.org/docs/LangRef.html#llvm-min-intrinsics-comparation>`_ + for the comparison. T __builtin_elementwise_min(T x, T y) return x or y, whichever is smaller integer and floating point types + For floating point types, follows semantics of minNum + in IEEE 754-2008. See `LangRef + <http://llvm.org/docs/LangRef.html#llvm-min-intrinsics-comparation>`_ + for the comparison. + T __builtin_elementwise_maxnum(T x, T y) return x or y, whichever is larger. Follows IEEE 754-2008 floating point types + semantics (maxNum) with +0.0>-0.0. See `LangRef + <http://llvm.org/docs/LangRef.html#llvm-min-intrinsics-comparation>`_ + for the comparison. + T __builtin_elementwise_minnum(T x, T y) return x or y, whichever is smaller. Follows IEEE 754-2008 floating point types + semantics (minNum) with +0.0>-0.0. See `LangRef + <http://llvm.org/docs/LangRef.html#llvm-min-intrinsics-comparation>`_ + for the comparison. T __builtin_elementwise_add_sat(T x, T y) return the sum of x and y, clamped to the range of integer types representable values for the signed/unsigned integer type. T __builtin_elementwise_sub_sat(T x, T y) return the difference of x and y, clamped to the range of integer types >From 70228d0c7166dbdb7cd584213930b0712e7265cc Mon Sep 17 00:00:00 2001 From: YunQiang Su <yunqi...@isrc.iscas.ac.cn> Date: Wed, 19 Mar 2025 16:02:19 +0800 Subject: [PATCH 5/6] remove BuiltinMaxNumMinNumMath --- clang/lib/Sema/SemaChecking.cpp | 36 --------------------------------- 1 file changed, 36 deletions(-) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index f0b027083caa3..7c1e02be47e6d 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -15278,42 +15278,6 @@ bool Sema::PrepareBuiltinElementwiseMathOneArgCall( return false; } -bool Sema::BuiltinMaxNumMinNumMath(CallExpr *TheCall) { - if (checkArgCount(TheCall, 2)) - return true; - - ExprResult OrigArg0 = TheCall->getArg(0); - ExprResult OrigArg1 = TheCall->getArg(1); - - // Do standard promotions between the two arguments, returning their common - // type. - QualType Res = UsualArithmeticConversions( - OrigArg0, OrigArg1, TheCall->getExprLoc(), ACK_Comparison); - if (OrigArg0.isInvalid() || OrigArg1.isInvalid()) - return true; - - // Make sure any conversions are pushed back into the call; this is - // type safe since unordered compare builtins are declared as "_Bool - // foo(...)". - TheCall->setArg(0, OrigArg0.get()); - TheCall->setArg(1, OrigArg1.get()); - - if (!OrigArg0.get()->isTypeDependent() && OrigArg1.get()->isTypeDependent()) - return true; - - // If the common type isn't a real floating type, then the arguments were - // invalid for this operation. - if (Res.isNull() || !Res->isRealFloatingType()) - return Diag(OrigArg0.get()->getBeginLoc(), - diag::err_typecheck_call_invalid_ordered_compare) - << OrigArg0.get()->getType() << OrigArg1.get()->getType() - << SourceRange(OrigArg0.get()->getBeginLoc(), - OrigArg1.get()->getEndLoc()); - - TheCall->setType(Res); - return false; -} - bool Sema::BuiltinElementwiseMath(CallExpr *TheCall, EltwiseBuiltinArgTyRestriction ArgTyRestr) { if (auto Res = BuiltinVectorMath(TheCall, ArgTyRestr); Res.has_value()) { >From 817cfae3086302068701357955e752e8cc60a733 Mon Sep 17 00:00:00 2001 From: YunQiang Su <yunqi...@isrc.iscas.ac.cn> Date: Wed, 19 Mar 2025 16:04:23 +0800 Subject: [PATCH 6/6] Added to clang ReleaseNotes --- clang/docs/ReleaseNotes.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index ce4336acb806a..a9df53162f7d2 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -152,6 +152,7 @@ Non-comprehensive list of changes in this release - Support parsing the `cc` operand modifier and alias it to the `c` modifier (#GH127719). - Added `__builtin_elementwise_exp10`. +- Added `__builtin_elementwise_minnum` and `__builtin_elementwise_maxnum`. New Compiler Flags ------------------ _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits