https://github.com/Icohedron updated https://github.com/llvm/llvm-project/pull/127137
>From 4fae5642c6e8e305cdc687b4968ba5eabaa44b50 Mon Sep 17 00:00:00 2001 From: Icohedron <cheung.de...@gmail.com> Date: Mon, 27 Jan 2025 11:18:09 -0800 Subject: [PATCH 1/3] Add the AddUint64 HLSL builtin function - Defines the AddUint64 HLSL builtin function - Implements the UAddc DXIL op to lower AddUint64 to DXIL --- clang/include/clang/Basic/Builtins.td | 6 ++ .../clang/Basic/DiagnosticSemaKinds.td | 2 + clang/lib/CodeGen/CGBuiltin.cpp | 45 ++++++++++++ clang/lib/Headers/hlsl/hlsl_intrinsics.h | 21 ++++++ clang/lib/Sema/SemaHLSL.cpp | 47 ++++++++++++ .../test/CodeGenHLSL/builtins/AddUint64.hlsl | 71 +++++++++++++++++++ .../SemaHLSL/BuiltIns/AddUint64-errors.hlsl | 41 +++++++++++ llvm/lib/Target/DirectX/DXIL.td | 13 ++++ llvm/lib/Target/DirectX/DXILOpBuilder.cpp | 14 ++++ llvm/lib/Target/DirectX/DXILOpBuilder.h | 3 + llvm/lib/Target/DirectX/DXILOpLowering.cpp | 22 ++++-- llvm/test/CodeGen/DirectX/UAddc.ll | 40 +++++++++++ llvm/test/CodeGen/DirectX/UAddc_errors.ll | 30 ++++++++ 13 files changed, 348 insertions(+), 7 deletions(-) create mode 100644 clang/test/CodeGenHLSL/builtins/AddUint64.hlsl create mode 100644 clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl create mode 100644 llvm/test/CodeGen/DirectX/UAddc.ll create mode 100644 llvm/test/CodeGen/DirectX/UAddc_errors.ll diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 29939242596ba..2433427a89429 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4753,6 +4753,12 @@ def GetDeviceSideMangledName : LangBuiltin<"CUDA_LANG"> { } // HLSL +def HLSLAddUint64: LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_adduint64"]; + let Attributes = [NoThrow, Const]; + let Prototype = "void(...)"; +} + def HLSLResourceGetPointer : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_resource_getpointer"]; let Attributes = [NoThrow]; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 2fce5e88ba8a0..e78339ee924ff 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10655,6 +10655,8 @@ def err_second_argument_to_cwsc_not_pointer : Error< def err_vector_incorrect_num_elements : Error< "%select{too many|too few}0 elements in vector %select{initialization|operand}3 (expected %1 elements, have %2)">; +def err_invalid_even_odd_vector_element_count : Error< + "invalid element count of %0 in vector %select{initialization|operand}4 (expected an %select{even|odd}3 element count in the range of %1 and %2)">; def err_altivec_empty_initializer : Error<"expected initializer">; def err_invalid_neon_type_code : Error< diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 361e4c4bf2e2e..5322b38458b26 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -19445,6 +19445,51 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, return nullptr; switch (BuiltinID) { + case Builtin::BI__builtin_hlsl_adduint64: { + Value *OpA = EmitScalarExpr(E->getArg(0)); + Value *OpB = EmitScalarExpr(E->getArg(1)); + assert(E->getArg(0)->getType()->hasIntegerRepresentation() && + E->getArg(1)->getType()->hasIntegerRepresentation() && + "AddUint64 operands must have an integer representation"); + assert(((E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() == + 2 && + E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() == + 2) || + (E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() == + 4 && + E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() == + 4)) && + "input vectors must have 2 or 4 elements each"); + + llvm::Value *Result = PoisonValue::get(OpA->getType()); + uint64_t NumElements = + E->getArg(0)->getType()->castAs<VectorType>()->getNumElements(); + for (uint64_t i = 0; i < NumElements / 2; ++i) { + + // Obtain low and high words of inputs A and B + llvm::Value *LowA = Builder.CreateExtractElement(OpA, 2 * i + 0); + llvm::Value *HighA = Builder.CreateExtractElement(OpA, 2 * i + 1); + llvm::Value *LowB = Builder.CreateExtractElement(OpB, 2 * i + 0); + llvm::Value *HighB = Builder.CreateExtractElement(OpB, 2 * i + 1); + + // Use an uadd_with_overflow to compute the sum of low words and obtain a + // carry value + llvm::Value *Carry; + llvm::Value *LowSum = EmitOverflowIntrinsic( + *this, llvm::Intrinsic::uadd_with_overflow, LowA, LowB, Carry); + llvm::Value *ZExtCarry = Builder.CreateZExt(Carry, HighA->getType()); + + // Sum the high words and the carry + llvm::Value *HighSum = Builder.CreateAdd(HighA, HighB); + llvm::Value *HighSumPlusCarry = Builder.CreateAdd(HighSum, ZExtCarry); + + // Insert the low and high word sums into the result vector + Result = Builder.CreateInsertElement(Result, LowSum, 2 * i + 0); + Result = Builder.CreateInsertElement(Result, HighSumPlusCarry, 2 * i + 1, + "hlsl.AddUint64"); + } + return Result; + } case Builtin::BI__builtin_hlsl_resource_getpointer: { Value *HandleOp = EmitScalarExpr(E->getArg(0)); Value *IndexOp = EmitScalarExpr(E->getArg(1)); diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index d1f5fdff8b600..513639ed1b81d 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -138,6 +138,27 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_acos) float4 acos(float4); //===----------------------------------------------------------------------===// +// AddUint64 builtins +//===----------------------------------------------------------------------===// + +/// \fn T AddUint64(T a, T b) +/// \brief Implements unsigned 64-bit integer addition using pairs of unsigned +/// 32-bit integers. +/// \param x [in] The first unsigned 32-bit integer pair(s) +/// \param y [in] The second unsigned 32-bit integer pair(s) +/// +/// This function takes one or two pairs (low, high) of unsigned 32-bit integer +/// values and returns pairs (low, high) of unsigned 32-bit integer +/// values representing the result of unsigned 64-bit integer addition. + +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_adduint64) +uint32_t2 AddUint64(uint32_t2, uint32_t2); +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_adduint64) +uint32_t4 AddUint64(uint32_t4, uint32_t4); + +// //===----------------------------------------------------------------------===// // all builtins //===----------------------------------------------------------------------===// diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 4abd870ad6aaa..99eb5360ec356 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -2038,6 +2038,18 @@ static bool CheckAllArgsHaveFloatRepresentation(Sema *S, CallExpr *TheCall) { checkAllFloatTypes); } +static bool CheckUnsignedIntRepresentations(Sema *S, CallExpr *TheCall) { + auto checkUnsignedInteger = [](clang::QualType PassedType) -> bool { + clang::QualType BaseType = + PassedType->isVectorType() + ? PassedType->getAs<clang::VectorType>()->getElementType() + : PassedType; + return !BaseType->isUnsignedIntegerType(); + }; + return CheckAllArgTypesAreCorrect(S, TheCall, S->Context.UnsignedIntTy, + checkUnsignedInteger); +} + static bool CheckFloatOrHalfRepresentations(Sema *S, CallExpr *TheCall) { auto checkFloatorHalf = [](clang::QualType PassedType) -> bool { clang::QualType BaseType = @@ -2229,6 +2241,41 @@ static bool CheckResourceHandle( // returning an ExprError bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { switch (BuiltinID) { + case Builtin::BI__builtin_hlsl_adduint64: { + if (SemaRef.checkArgCount(TheCall, 2)) + return true; + if (CheckVectorElementCallArgs(&SemaRef, TheCall)) + return true; + if (CheckUnsignedIntRepresentations(&SemaRef, TheCall)) + return true; + + // CheckVectorElementCallArgs(...) guarantees both args are the same type. + assert(TheCall->getArg(0)->getType() == TheCall->getArg(1)->getType() && + "Both args must be of the same type"); + + // ensure both args are vectors + auto *VTy = TheCall->getArg(0)->getType()->getAs<VectorType>(); + if (!VTy) { + SemaRef.Diag(TheCall->getBeginLoc(), diag::err_vec_builtin_non_vector) + << "AddUint64" << /*all*/ 1; + return true; + } + + // ensure both args have 2 elements, or both args have 4 elements + int NumElementsArg = VTy->getNumElements(); + if (NumElementsArg != 2 && NumElementsArg != 4) { + SemaRef.Diag(TheCall->getBeginLoc(), + diag::err_invalid_even_odd_vector_element_count) + << NumElementsArg << 2 << 4 << /*even*/ 0 << /*operand*/ 1; + return true; + } + + ExprResult A = TheCall->getArg(0); + QualType ArgTyA = A.get()->getType(); + // return type is the same as the input type + TheCall->setType(ArgTyA); + break; + } case Builtin::BI__builtin_hlsl_resource_getpointer: { if (SemaRef.checkArgCount(TheCall, 2) || CheckResourceHandle(&SemaRef, TheCall, 0) || diff --git a/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl b/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl new file mode 100644 index 0000000000000..4141aef69323d --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl @@ -0,0 +1,71 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \ +// RUN: -emit-llvm -disable-llvm-passes -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK + + +// CHECK-LABEL: define noundef <2 x i32> @_Z20test_AddUint64_uint2Dv2_jS_( +// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: store <2 x i32> [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store <2 x i32> [[B]], ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[A_LOAD:%.*]] = load <2 x i32>, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[B_LOAD:%.*]] = load <2 x i32>, ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[LowA:%.*]] = extractelement <2 x i32> [[A_LOAD]], i64 0 +// CHECK-NEXT: [[HighA:%.*]] = extractelement <2 x i32> [[A_LOAD]], i64 1 +// CHECK-NEXT: [[LowB:%.*]] = extractelement <2 x i32> [[B_LOAD]], i64 0 +// CHECK-NEXT: [[HighB:%.*]] = extractelement <2 x i32> [[B_LOAD]], i64 1 +// CHECK-NEXT: [[UAddc:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LowA]], i32 [[LowB]]) +// CHECK-NEXT: [[Carry:%.*]] = extractvalue { i32, i1 } [[UAddc]], 1 +// CHECK-NEXT: [[LowSum:%.*]] = extractvalue { i32, i1 } [[UAddc]], 0 +// CHECK-NEXT: [[CarryZExt:%.*]] = zext i1 [[Carry]] to i32 +// CHECK-NEXT: [[HighSum:%.*]] = add i32 [[HighA]], [[HighB]] +// CHECK-NEXT: [[HighSumPlusCarry:%.*]] = add i32 [[HighSum]], [[CarryZExt]] +// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[LowSum]], i64 0 +// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = insertelement <2 x i32> [[HLSL_ADDUINT64_UPTO0]], i32 [[HighSumPlusCarry]], i64 1 +// CHECK-NEXT: ret <2 x i32> [[HLSL_ADDUINT64]] +// +uint2 test_AddUint64_uint2(uint2 a, uint2 b) { + return AddUint64(a, b); +} + +// CHECK-LABEL: define noundef <4 x i32> @_Z20test_AddUint64_uint4Dv4_jS_( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: store <4 x i32> [[A]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store <4 x i32> [[B]], ptr [[B_ADDR]], align 16 +// CHECK-NEXT: [[A_LOAD:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[B_LOAD:%.*]] = load <4 x i32>, ptr [[B_ADDR]], align 16 +// CHECK-NEXT: [[LowA:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 0 +// CHECK-NEXT: [[HighA:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 1 +// CHECK-NEXT: [[LowB:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 0 +// CHECK-NEXT: [[HighB:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 1 +// CHECK-NEXT: [[UAddc:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LowA]], i32 [[LowB]]) +// CHECK-NEXT: [[Carry:%.*]] = extractvalue { i32, i1 } [[UAddc]], 1 +// CHECK-NEXT: [[LowSum:%.*]] = extractvalue { i32, i1 } [[UAddc]], 0 +// CHECK-NEXT: [[CarryZExt:%.*]] = zext i1 [[Carry]] to i32 +// CHECK-NEXT: [[HighSum:%.*]] = add i32 [[HighA]], [[HighB]] +// CHECK-NEXT: [[HighSumPlusCarry:%.*]] = add i32 [[HighSum]], [[CarryZExt]] +// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[LowSum]], i64 0 +// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO1:%.*]] = insertelement <4 x i32> [[HLSL_ADDUINT64_UPTO0]], i32 [[HighSumPlusCarry]], i64 1 +// CHECK-NEXT: [[LowA1:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 2 +// CHECK-NEXT: [[HighA1:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 3 +// CHECK-NEXT: [[LowB1:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 2 +// CHECK-NEXT: [[HighB1:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 3 +// CHECK-NEXT: [[UAddc1:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LowA1]], i32 [[LowB1]]) +// CHECK-NEXT: [[Carry1:%.*]] = extractvalue { i32, i1 } [[UAddc1]], 1 +// CHECK-NEXT: [[LowSum1:%.*]] = extractvalue { i32, i1 } [[UAddc1]], 0 +// CHECK-NEXT: [[CarryZExt1:%.*]] = zext i1 [[Carry1]] to i32 +// CHECK-NEXT: [[HighSum1:%.*]] = add i32 [[HighA1]], [[HighB1]] +// CHECK-NEXT: [[HighSumPlusCarry1:%.*]] = add i32 [[HighSum1]], [[CarryZExt1]] +// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO2:%.*]] = insertelement <4 x i32> [[HLSL_ADDUINT64_UPTO1]], i32 [[LowSum1]], i64 2 +// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = insertelement <4 x i32> [[HLSL_ADDUINT64_UPTO2]], i32 [[HighSumPlusCarry1]], i64 3 +// CHECK-NEXT: ret <4 x i32> [[HLSL_ADDUINT64]] +// +uint4 test_AddUint64_uint4(uint4 a, uint4 b) { + return AddUint64(a, b); +} diff --git a/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl new file mode 100644 index 0000000000000..ec9d026bb6fe7 --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify + +uint2 test_too_few_arg() { + return __builtin_hlsl_adduint64(); + // expected-error@-1 {{too few arguments to function call, expected 2, have 0}} +} + +uint4 test_too_many_arg(uint4 a) { + return __builtin_hlsl_adduint64(a, a, a); + // expected-error@-1 {{too many arguments to function call, expected 2, have 3}} +} + +uint2 test_mismatched_arg_types(uint2 a, uint4 b) { + return __builtin_hlsl_adduint64(a, b); + // expected-error@-1 {{all arguments to '__builtin_hlsl_adduint64' must have the same type}} +} + +uint2 test_bad_num_arg_elements(uint3 a, uint3 b) { + return __builtin_hlsl_adduint64(a, b); + // expected-error@-1 {{invalid element count of 3 in vector operand (expected an even element count in the range of 2 and 4)}} +} + +uint2 test_scalar_arg_type(uint a) { + return __builtin_hlsl_adduint64(a, a); + // expected-error@-1 {{all arguments to AddUint64 must be vectors}} +} + +uint2 test_signed_integer_args(int2 a, int2 b) { + return __builtin_hlsl_adduint64(a, b); +// expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(unsigned int)))) unsigned int' (vector of 2 'unsigned int' values)}} +} + +struct S { + uint2 a; +}; + +uint2 test_incorrect_arg_type(S a) { + return __builtin_hlsl_adduint64(a, a); + // expected-error@-1 {{passing 'S' to parameter of incompatible type 'unsigned int'}} +} + diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index 7cb841d9bd5b5..2f6b4d676edfd 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -50,6 +50,7 @@ def HandleTy : DXILOpParamType; def ResBindTy : DXILOpParamType; def ResPropsTy : DXILOpParamType; def SplitDoubleTy : DXILOpParamType; +def BinaryWithCarryTy : DXILOpParamType; class DXILOpClass; @@ -738,6 +739,18 @@ def UMin : DXILOp<40, binary> { let attributes = [Attributes<DXIL1_0, [ReadNone]>]; } +def UAddc : DXILOp<44, binaryWithCarryOrBorrow > { + let Doc = "Unsigned 32-bit integer arithmetic add with carry. uaddc(a,b) = (a+b, a+b overflowed ? 1 : 0)"; + // TODO: This `let intrinsics = ...` line may be uncommented when + // https://github.com/llvm/llvm-project/issues/113192 is fixed + // let intrinsics = [IntrinSelect<int_uadd_with_overflow>]; + let arguments = [OverloadTy, OverloadTy]; + let result = BinaryWithCarryTy; + let overloads = [Overloads<DXIL1_0, [Int32Ty]>]; + let stages = [Stages<DXIL1_0, [all_stages]>]; + let attributes = [Attributes<DXIL1_0, [ReadNone]>]; +} + def FMad : DXILOp<46, tertiary> { let Doc = "Floating point arithmetic multiply/add operation. fmad(m,a,b) = m " "* a + b."; diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp index badd5aabd6432..f0f1bbabb6b23 100644 --- a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp +++ b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp @@ -230,6 +230,14 @@ static StructType *getSplitDoubleType(LLVMContext &Context) { return StructType::create({Int32Ty, Int32Ty}, "dx.types.splitdouble"); } +static StructType *getBinaryWithCarryType(LLVMContext &Context) { + if (auto *ST = StructType::getTypeByName(Context, "dx.types.i32c")) + return ST; + Type *Int32Ty = Type::getInt32Ty(Context); + Type *Int1Ty = Type::getInt1Ty(Context); + return StructType::create({Int32Ty, Int1Ty}, "dx.types.i32c"); +} + static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx, Type *OverloadTy) { switch (Kind) { @@ -273,6 +281,8 @@ static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx, return getResPropsType(Ctx); case OpParamType::SplitDoubleTy: return getSplitDoubleType(Ctx); + case OpParamType::BinaryWithCarryTy: + return getBinaryWithCarryType(Ctx); } llvm_unreachable("Invalid parameter kind"); return nullptr; @@ -539,6 +549,10 @@ StructType *DXILOpBuilder::getSplitDoubleType(LLVMContext &Context) { return ::getSplitDoubleType(Context); } +StructType *DXILOpBuilder::getBinaryWithCarryType(LLVMContext &Context) { + return ::getBinaryWithCarryType(Context); +} + StructType *DXILOpBuilder::getHandleType() { return ::getHandleType(IRB.getContext()); } diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.h b/llvm/lib/Target/DirectX/DXILOpBuilder.h index df5a0240870f4..8e13b87a2be10 100644 --- a/llvm/lib/Target/DirectX/DXILOpBuilder.h +++ b/llvm/lib/Target/DirectX/DXILOpBuilder.h @@ -53,6 +53,9 @@ class DXILOpBuilder { /// Get the `%dx.types.splitdouble` type. StructType *getSplitDoubleType(LLVMContext &Context); + /// Get the `%dx.types.i32c` type. + StructType *getBinaryWithCarryType(LLVMContext &Context); + /// Get the `%dx.types.Handle` type. StructType *getHandleType(); diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp index 0c245c1a43d31..c9e3d7e284963 100644 --- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp +++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp @@ -359,17 +359,16 @@ class OpLowerer { return lowerToBindAndAnnotateHandle(F); } - Error replaceSplitDoubleCallUsages(CallInst *Intrin, CallInst *Op) { + Error replaceAggregateTypeOfCallUsages(CallInst *Intrin, CallInst *Op) { for (Use &U : make_early_inc_range(Intrin->uses())) { if (auto *EVI = dyn_cast<ExtractValueInst>(U.getUser())) { - - if (EVI->getNumIndices() != 1) - return createStringError(std::errc::invalid_argument, - "Splitdouble has only 2 elements"); EVI->setOperand(0, Op); + } else if (auto *IVI = dyn_cast<InsertValueInst>(U.getUser())) { + IVI->setOperand(0, Op); } else { return make_error<StringError>( - "Splitdouble use is not ExtractValueInst", + (Intrin->getCalledFunction()->getName() + + " use is not a ExtractValueInst or InsertValueInst"), inconvertibleErrorCode()); } } @@ -821,7 +820,16 @@ class OpLowerer { F, OpCode::SplitDouble, OpBuilder.getSplitDoubleType(M.getContext()), [&](CallInst *CI, CallInst *Op) { - return replaceSplitDoubleCallUsages(CI, Op); + return replaceAggregateTypeOfCallUsages(CI, Op); + }); + break; + // TODO: this can be removed when + // https://github.com/llvm/llvm-project/issues/113192 is fixed + case Intrinsic::uadd_with_overflow: + HasErrors |= replaceFunctionWithNamedStructOp( + F, OpCode::UAddc, OpBuilder.getBinaryWithCarryType(M.getContext()), + [&](CallInst *CI, CallInst *Op) { + return replaceAggregateTypeOfCallUsages(CI, Op); }); break; case Intrinsic::ctpop: diff --git a/llvm/test/CodeGen/DirectX/UAddc.ll b/llvm/test/CodeGen/DirectX/UAddc.ll new file mode 100644 index 0000000000000..abafa40bf2306 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/UAddc.ll @@ -0,0 +1,40 @@ +; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s + +; CHECK: %dx.types.i32c = type { i32, i1 } + +define noundef i32 @test_UAddc(i32 noundef %a, i32 noundef %b) { +; CHECK-LABEL: define noundef i32 @test_UAddc( +; CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) { +; CHECK-NEXT: [[UAddc:%.*]] = call %dx.types.i32c @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A]], i32 [[B]]) +; CHECK-NEXT: [[Carry:%.*]] = extractvalue %dx.types.i32c [[UAddc]], 1 +; CHECK-NEXT: [[Sum:%.*]] = extractvalue %dx.types.i32c [[UAddc]], 0 +; CHECK-NEXT: [[CarryZExt:%.*]] = zext i1 [[Carry]] to i32 +; CHECK-NEXT: [[Result:%.*]] = add i32 [[Sum]], [[CarryZExt]] +; CHECK-NEXT: ret i32 [[Result]] +; + %uaddc = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %carry = extractvalue { i32, i1 } %uaddc, 1 + %sum = extractvalue { i32, i1 } %uaddc, 0 + %carry_zext = zext i1 %carry to i32 + %result = add i32 %sum, %carry_zext + ret i32 %result +} + + +define noundef i32 @test_UAddc_insert(i32 noundef %a, i32 noundef %b) { +; CHECK-LABEL: define noundef i32 @test_UAddc_insert( +; CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) { +; CHECK-NEXT: [[UAddc:%.*]] = call %dx.types.i32c @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A]], i32 [[B]]) +; CHECK-NEXT: insertvalue %dx.types.i32c [[UAddc]], i32 [[A]], 0 +; CHECK-NEXT: [[Result:%.*]] = extractvalue %dx.types.i32c [[UAddc]], 0 +; CHECK-NEXT: ret i32 [[Result]] +; + %uaddc = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + insertvalue { i32, i1 } %uaddc, i32 %a, 0 + %result = extractvalue { i32, i1 } %uaddc, 0 + ret i32 %result +} + +declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) +; CHECK: declare %dx.types.i32c @dx.op.binaryWithCarryOrBorrow.i32(i32, i32, i32) + diff --git a/llvm/test/CodeGen/DirectX/UAddc_errors.ll b/llvm/test/CodeGen/DirectX/UAddc_errors.ll new file mode 100644 index 0000000000000..72c676db9bba4 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/UAddc_errors.ll @@ -0,0 +1,30 @@ +; We use llc for this test so that we don't abort after the first error. +; RUN: not llc %s -o /dev/null 2>&1 | FileCheck %s + +target triple = "dxil-pc-shadermodel6.3-library" + +; DXIL operation UAddc only supports i32. Other integer types are unsupported. +; CHECK: error: +; CHECK-SAME: in function uaddc_i16 +; CHECK-SAME: Cannot create UAddc operation: Invalid overload type + +define noundef i16 @uaddc_i16(i16 noundef %a, i16 noundef %b) "hlsl.export" { + %uaddc = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 %a, i16 %b) + %carry = extractvalue { i16, i1 } %uaddc, 1 + %sum = extractvalue { i16, i1 } %uaddc, 0 + %carry_zext = zext i1 %carry to i16 + %result = add i16 %sum, %carry_zext + ret i16 %result +} + +; CHECK: error: +; CHECK-SAME: in function uaddc_return +; CHECK-SAME: llvm.uadd.with.overflow.i32 use is not a ExtractValueInst or InsertValueInst + +define noundef { i32, i1 } @uaddc_return(i32 noundef %a, i32 noundef %b) "hlsl.export" { + %uaddc = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + ret { i32, i1 } %uaddc +} + +declare { i16, i1 } @llvm.uadd.with.overflow.i16(i16, i16) + >From 45afa2f3806b2f869d02aa68e2e1d8dddc6cba3b Mon Sep 17 00:00:00 2001 From: Icohedron <cheung.de...@gmail.com> Date: Tue, 11 Feb 2025 22:56:23 +0000 Subject: [PATCH 2/3] Make AddUint64 use llvm.uadd.with.overflow.v2i32 When the input args are of type uint4, uses the vec2 variant of llvm.uadd.with.overflow to sum the low words of both args. --- clang/lib/CodeGen/CGBuiltin.cpp | 65 +++++++++++------- .../test/CodeGenHLSL/builtins/AddUint64.hlsl | 67 ++++++++----------- llvm/test/CodeGen/DirectX/UAddc.ll | 60 +++++++++++++---- 3 files changed, 115 insertions(+), 77 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 5322b38458b26..0fe8cf5179b53 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -19461,31 +19461,50 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, 4)) && "input vectors must have 2 or 4 elements each"); - llvm::Value *Result = PoisonValue::get(OpA->getType()); uint64_t NumElements = E->getArg(0)->getType()->castAs<VectorType>()->getNumElements(); - for (uint64_t i = 0; i < NumElements / 2; ++i) { - - // Obtain low and high words of inputs A and B - llvm::Value *LowA = Builder.CreateExtractElement(OpA, 2 * i + 0); - llvm::Value *HighA = Builder.CreateExtractElement(OpA, 2 * i + 1); - llvm::Value *LowB = Builder.CreateExtractElement(OpB, 2 * i + 0); - llvm::Value *HighB = Builder.CreateExtractElement(OpB, 2 * i + 1); - - // Use an uadd_with_overflow to compute the sum of low words and obtain a - // carry value - llvm::Value *Carry; - llvm::Value *LowSum = EmitOverflowIntrinsic( - *this, llvm::Intrinsic::uadd_with_overflow, LowA, LowB, Carry); - llvm::Value *ZExtCarry = Builder.CreateZExt(Carry, HighA->getType()); - - // Sum the high words and the carry - llvm::Value *HighSum = Builder.CreateAdd(HighA, HighB); - llvm::Value *HighSumPlusCarry = Builder.CreateAdd(HighSum, ZExtCarry); - - // Insert the low and high word sums into the result vector - Result = Builder.CreateInsertElement(Result, LowSum, 2 * i + 0); - Result = Builder.CreateInsertElement(Result, HighSumPlusCarry, 2 * i + 1, + + llvm::Value *Result = PoisonValue::get(OpA->getType()); + llvm::Value *LowA; + llvm::Value *HighA; + llvm::Value *LowB; + llvm::Value *HighB; + + // Obtain low and high words of inputs A and B + if (NumElements == 2) { + LowA = Builder.CreateExtractElement(OpA, (uint64_t)0, "LowA"); + HighA = Builder.CreateExtractElement(OpA, (uint64_t)1, "HighA"); + LowB = Builder.CreateExtractElement(OpB, (uint64_t)0, "LowB"); + HighB = Builder.CreateExtractElement(OpB, (uint64_t)1, "HighB"); + } else { + LowA = Builder.CreateShuffleVector(OpA, ArrayRef<int>{0, 2}, "LowA"); + HighA = Builder.CreateShuffleVector(OpA, ArrayRef<int>{1, 3}, "HighA"); + LowB = Builder.CreateShuffleVector(OpB, ArrayRef<int>{0, 2}, "LowB"); + HighB = Builder.CreateShuffleVector(OpB, ArrayRef<int>{1, 3}, "HighB"); + } + + // Use an uadd_with_overflow to compute the sum of low words and obtain a + // carry value + llvm::Value *Carry; + llvm::Value *LowSum = EmitOverflowIntrinsic( + *this, llvm::Intrinsic::uadd_with_overflow, LowA, LowB, Carry); + llvm::Value *ZExtCarry = + Builder.CreateZExt(Carry, HighA->getType(), "CarryZExt"); + + // Sum the high words and the carry + llvm::Value *HighSum = Builder.CreateAdd(HighA, HighB, "HighSum"); + llvm::Value *HighSumPlusCarry = + Builder.CreateAdd(HighSum, ZExtCarry, "HighSumPlusCarry"); + + // Insert the low and high word sums into the result vector + if (NumElements == 2) { + Result = Builder.CreateInsertElement(Result, LowSum, (uint64_t)0, + "hlsl.AddUint64.upto0"); + Result = Builder.CreateInsertElement(Result, HighSumPlusCarry, + (uint64_t)1, "hlsl.AddUint64"); + } else { /* NumElements == 4 */ + Result = Builder.CreateShuffleVector(LowSum, HighSumPlusCarry, + ArrayRef<int>{0, 2, 1, 3}, "hlsl.AddUint64"); } return Result; diff --git a/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl b/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl index 4141aef69323d..e1832bdbbf33f 100644 --- a/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl +++ b/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl @@ -11,20 +11,20 @@ // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x i32>, align 8 // CHECK-NEXT: store <2 x i32> [[A]], ptr [[A_ADDR]], align 8 // CHECK-NEXT: store <2 x i32> [[B]], ptr [[B_ADDR]], align 8 -// CHECK-NEXT: [[A_LOAD:%.*]] = load <2 x i32>, ptr [[A_ADDR]], align 8 -// CHECK-NEXT: [[B_LOAD:%.*]] = load <2 x i32>, ptr [[B_ADDR]], align 8 -// CHECK-NEXT: [[LowA:%.*]] = extractelement <2 x i32> [[A_LOAD]], i64 0 -// CHECK-NEXT: [[HighA:%.*]] = extractelement <2 x i32> [[A_LOAD]], i64 1 -// CHECK-NEXT: [[LowB:%.*]] = extractelement <2 x i32> [[B_LOAD]], i64 0 -// CHECK-NEXT: [[HighB:%.*]] = extractelement <2 x i32> [[B_LOAD]], i64 1 -// CHECK-NEXT: [[UAddc:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LowA]], i32 [[LowB]]) -// CHECK-NEXT: [[Carry:%.*]] = extractvalue { i32, i1 } [[UAddc]], 1 -// CHECK-NEXT: [[LowSum:%.*]] = extractvalue { i32, i1 } [[UAddc]], 0 -// CHECK-NEXT: [[CarryZExt:%.*]] = zext i1 [[Carry]] to i32 -// CHECK-NEXT: [[HighSum:%.*]] = add i32 [[HighA]], [[HighB]] -// CHECK-NEXT: [[HighSumPlusCarry:%.*]] = add i32 [[HighSum]], [[CarryZExt]] -// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[LowSum]], i64 0 -// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = insertelement <2 x i32> [[HLSL_ADDUINT64_UPTO0]], i32 [[HighSumPlusCarry]], i64 1 +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[LOWA:%.*]] = extractelement <2 x i32> [[TMP0]], i64 0 +// CHECK-NEXT: [[HIGHA:%.*]] = extractelement <2 x i32> [[TMP0]], i64 1 +// CHECK-NEXT: [[LOWB:%.*]] = extractelement <2 x i32> [[TMP1]], i64 0 +// CHECK-NEXT: [[HIGHB:%.*]] = extractelement <2 x i32> [[TMP1]], i64 1 +// CHECK-NEXT: [[TMP2:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LOWA]], i32 [[LOWB]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0 +// CHECK-NEXT: [[CARRYZEXT:%.*]] = zext i1 [[TMP3]] to i32 +// CHECK-NEXT: [[HIGHSUM:%.*]] = add i32 [[HIGHA]], [[HIGHB]] +// CHECK-NEXT: [[HIGHSUMPLUSCARRY:%.*]] = add i32 [[HIGHSUM]], [[CARRYZEXT]] +// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i64 0 +// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = insertelement <2 x i32> [[HLSL_ADDUINT64_UPTO0]], i32 [[HIGHSUMPLUSCARRY]], i64 1 // CHECK-NEXT: ret <2 x i32> [[HLSL_ADDUINT64]] // uint2 test_AddUint64_uint2(uint2 a, uint2 b) { @@ -38,32 +38,19 @@ uint2 test_AddUint64_uint2(uint2 a, uint2 b) { // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16 // CHECK-NEXT: store <4 x i32> [[A]], ptr [[A_ADDR]], align 16 // CHECK-NEXT: store <4 x i32> [[B]], ptr [[B_ADDR]], align 16 -// CHECK-NEXT: [[A_LOAD:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[B_LOAD:%.*]] = load <4 x i32>, ptr [[B_ADDR]], align 16 -// CHECK-NEXT: [[LowA:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 0 -// CHECK-NEXT: [[HighA:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 1 -// CHECK-NEXT: [[LowB:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 0 -// CHECK-NEXT: [[HighB:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 1 -// CHECK-NEXT: [[UAddc:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LowA]], i32 [[LowB]]) -// CHECK-NEXT: [[Carry:%.*]] = extractvalue { i32, i1 } [[UAddc]], 1 -// CHECK-NEXT: [[LowSum:%.*]] = extractvalue { i32, i1 } [[UAddc]], 0 -// CHECK-NEXT: [[CarryZExt:%.*]] = zext i1 [[Carry]] to i32 -// CHECK-NEXT: [[HighSum:%.*]] = add i32 [[HighA]], [[HighB]] -// CHECK-NEXT: [[HighSumPlusCarry:%.*]] = add i32 [[HighSum]], [[CarryZExt]] -// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[LowSum]], i64 0 -// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO1:%.*]] = insertelement <4 x i32> [[HLSL_ADDUINT64_UPTO0]], i32 [[HighSumPlusCarry]], i64 1 -// CHECK-NEXT: [[LowA1:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 2 -// CHECK-NEXT: [[HighA1:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 3 -// CHECK-NEXT: [[LowB1:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 2 -// CHECK-NEXT: [[HighB1:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 3 -// CHECK-NEXT: [[UAddc1:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LowA1]], i32 [[LowB1]]) -// CHECK-NEXT: [[Carry1:%.*]] = extractvalue { i32, i1 } [[UAddc1]], 1 -// CHECK-NEXT: [[LowSum1:%.*]] = extractvalue { i32, i1 } [[UAddc1]], 0 -// CHECK-NEXT: [[CarryZExt1:%.*]] = zext i1 [[Carry1]] to i32 -// CHECK-NEXT: [[HighSum1:%.*]] = add i32 [[HighA1]], [[HighB1]] -// CHECK-NEXT: [[HighSumPlusCarry1:%.*]] = add i32 [[HighSum1]], [[CarryZExt1]] -// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO2:%.*]] = insertelement <4 x i32> [[HLSL_ADDUINT64_UPTO1]], i32 [[LowSum1]], i64 2 -// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = insertelement <4 x i32> [[HLSL_ADDUINT64_UPTO2]], i32 [[HighSumPlusCarry1]], i64 3 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[B_ADDR]], align 16 +// CHECK-NEXT: [[LOWA:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 0, i32 2> +// CHECK-NEXT: [[HIGHA:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 1, i32 3> +// CHECK-NEXT: [[LOWB:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 0, i32 2> +// CHECK-NEXT: [[HIGHB:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 1, i32 3> +// CHECK-NEXT: [[TMP2:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[LOWA]], <2 x i32> [[LOWB]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP2]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP2]], 0 +// CHECK-NEXT: [[CARRYZEXT:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i32> +// CHECK-NEXT: [[HIGHSUM:%.*]] = add <2 x i32> [[HIGHA]], [[HIGHB]] +// CHECK-NEXT: [[HIGHSUMPLUSCARRY:%.*]] = add <2 x i32> [[HIGHSUM]], [[CARRYZEXT]] +// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[HIGHSUMPLUSCARRY]], <4 x i32> <i32 0, i32 2, i32 1, i32 3> // CHECK-NEXT: ret <4 x i32> [[HLSL_ADDUINT64]] // uint4 test_AddUint64_uint4(uint4 a, uint4 b) { diff --git a/llvm/test/CodeGen/DirectX/UAddc.ll b/llvm/test/CodeGen/DirectX/UAddc.ll index abafa40bf2306..c16a3f6a5b5fe 100644 --- a/llvm/test/CodeGen/DirectX/UAddc.ll +++ b/llvm/test/CodeGen/DirectX/UAddc.ll @@ -1,17 +1,18 @@ -; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; CHECK: %dx.types.i32c = type { i32, i1 } define noundef i32 @test_UAddc(i32 noundef %a, i32 noundef %b) { ; CHECK-LABEL: define noundef i32 @test_UAddc( ; CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) { -; CHECK-NEXT: [[UAddc:%.*]] = call %dx.types.i32c @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A]], i32 [[B]]) -; CHECK-NEXT: [[Carry:%.*]] = extractvalue %dx.types.i32c [[UAddc]], 1 -; CHECK-NEXT: [[Sum:%.*]] = extractvalue %dx.types.i32c [[UAddc]], 0 -; CHECK-NEXT: [[CarryZExt:%.*]] = zext i1 [[Carry]] to i32 -; CHECK-NEXT: [[Result:%.*]] = add i32 [[Sum]], [[CarryZExt]] -; CHECK-NEXT: ret i32 [[Result]] -; +; CHECK-NEXT: [[UADDC1:%.*]] = call [[DX_TYPES_I32C:%.*]] @[[DX_OP_BINARYWITHCARRYORBORROW_I32:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 44, i32 [[A]], i32 [[B]]) #[[ATTR0:[0-9]+]] +; CHECK-NEXT: [[CARRY:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC1]], 1 +; CHECK-NEXT: [[SUM:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC1]], 0 +; CHECK-NEXT: [[CARRY_ZEXT:%.*]] = zext i1 [[CARRY]] to i32 +; CHECK-NEXT: [[RESULT:%.*]] = add i32 [[SUM]], [[CARRY_ZEXT]] +; CHECK-NEXT: ret i32 [[RESULT]] +; %uaddc = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) %carry = extractvalue { i32, i1 } %uaddc, 1 %sum = extractvalue { i32, i1 } %uaddc, 0 @@ -20,15 +21,47 @@ define noundef i32 @test_UAddc(i32 noundef %a, i32 noundef %b) { ret i32 %result } +define noundef <2 x i32> @test_UAddc_vec2(<2 x i32> noundef %a, <2 x i32> noundef %b) { +; CHECK-LABEL: define noundef <2 x i32> @test_UAddc_vec2( +; CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) { +; CHECK-NEXT: [[A_I0:%.*]] = extractelement <2 x i32> [[A]], i64 0 +; CHECK-NEXT: [[B_I0:%.*]] = extractelement <2 x i32> [[B]], i64 0 +; CHECK-NEXT: [[UADDC_I09:%.*]] = call [[DX_TYPES_I32C:%.*]] @[[DX_OP_BINARYWITHCARRYORBORROW_I32]](i32 44, i32 [[A_I0]], i32 [[B_I0]]) #[[ATTR0]] +; CHECK-NEXT: [[A_I1:%.*]] = extractelement <2 x i32> [[A]], i64 1 +; CHECK-NEXT: [[B_I1:%.*]] = extractelement <2 x i32> [[B]], i64 1 +; CHECK-NEXT: [[UADDC_I18:%.*]] = call [[DX_TYPES_I32C]] @[[DX_OP_BINARYWITHCARRYORBORROW_I32]](i32 44, i32 [[A_I1]], i32 [[B_I1]]) #[[ATTR0]] +; CHECK-NEXT: [[CARRY_ELEM1:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC_I09]], 1 +; CHECK-NEXT: [[CARRY_ELEM11:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC_I18]], 1 +; CHECK-NEXT: [[CARRY_UPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[CARRY_ELEM1]], i64 0 +; CHECK-NEXT: [[CARRY:%.*]] = insertelement <2 x i1> [[CARRY_UPTO0]], i1 [[CARRY_ELEM11]], i64 1 +; CHECK-NEXT: [[CARRY_I0:%.*]] = extractelement <2 x i1> [[CARRY]], i64 0 +; CHECK-NEXT: [[CARRY_I1:%.*]] = extractelement <2 x i1> [[CARRY]], i64 1 +; CHECK-NEXT: [[SUM_ELEM0:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC_I09]], 0 +; CHECK-NEXT: [[SUM_ELEM02:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC_I18]], 0 +; CHECK-NEXT: [[CARRY_ZEXT_I0:%.*]] = zext i1 [[CARRY_I0]] to i32 +; CHECK-NEXT: [[CARRY_ZEXT_I1:%.*]] = zext i1 [[CARRY_I1]] to i32 +; CHECK-NEXT: [[RESULT_I0:%.*]] = add i32 [[SUM_ELEM0]], [[CARRY_ZEXT_I0]] +; CHECK-NEXT: [[RESULT_I1:%.*]] = add i32 [[SUM_ELEM02]], [[CARRY_ZEXT_I1]] +; CHECK-NEXT: [[RESULT_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RESULT_I0]], i64 0 +; CHECK-NEXT: [[RESULT:%.*]] = insertelement <2 x i32> [[RESULT_UPTO0]], i32 [[RESULT_I1]], i64 1 +; CHECK-NEXT: ret <2 x i32> [[RESULT]] +; + %uaddc = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> %a, <2 x i32> %b) + %carry = extractvalue { <2 x i32>, <2 x i1> } %uaddc, 1 + %sum = extractvalue { <2 x i32>, <2 x i1> } %uaddc, 0 + %carry_zext = zext <2 x i1> %carry to <2 x i32> + %result = add <2 x i32> %sum, %carry_zext + ret <2 x i32> %result +} define noundef i32 @test_UAddc_insert(i32 noundef %a, i32 noundef %b) { ; CHECK-LABEL: define noundef i32 @test_UAddc_insert( ; CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) { -; CHECK-NEXT: [[UAddc:%.*]] = call %dx.types.i32c @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A]], i32 [[B]]) -; CHECK-NEXT: insertvalue %dx.types.i32c [[UAddc]], i32 [[A]], 0 -; CHECK-NEXT: [[Result:%.*]] = extractvalue %dx.types.i32c [[UAddc]], 0 -; CHECK-NEXT: ret i32 [[Result]] -; +; CHECK-NEXT: [[UADDC1:%.*]] = call [[DX_TYPES_I32C:%.*]] @[[DX_OP_BINARYWITHCARRYORBORROW_I32]](i32 44, i32 [[A]], i32 [[B]]) #[[ATTR0]] +; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[DX_TYPES_I32C]] [[UADDC1]], i32 [[A]], 0 +; CHECK-NEXT: [[RESULT:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC1]], 0 +; CHECK-NEXT: ret i32 [[RESULT]] +; %uaddc = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) insertvalue { i32, i1 } %uaddc, i32 %a, 0 %result = extractvalue { i32, i1 } %uaddc, 0 @@ -36,5 +69,4 @@ define noundef i32 @test_UAddc_insert(i32 noundef %a, i32 noundef %b) { } declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) -; CHECK: declare %dx.types.i32c @dx.op.binaryWithCarryOrBorrow.i32(i32, i32, i32) >From 7714dc5686059f827d53b83d9472f02934aa88de Mon Sep 17 00:00:00 2001 From: Icohedron <cheung.de...@gmail.com> Date: Fri, 14 Feb 2025 22:31:14 +0000 Subject: [PATCH 3/3] Report name of function called in source --- clang/lib/Sema/SemaHLSL.cpp | 2 +- clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 99eb5360ec356..5773329101f27 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -2257,7 +2257,7 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { auto *VTy = TheCall->getArg(0)->getType()->getAs<VectorType>(); if (!VTy) { SemaRef.Diag(TheCall->getBeginLoc(), diag::err_vec_builtin_non_vector) - << "AddUint64" << /*all*/ 1; + << TheCall->getDirectCallee() << /*all*/ 1; return true; } diff --git a/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl index ec9d026bb6fe7..9dd2aa1457f8f 100644 --- a/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl @@ -22,7 +22,7 @@ uint2 test_bad_num_arg_elements(uint3 a, uint3 b) { uint2 test_scalar_arg_type(uint a) { return __builtin_hlsl_adduint64(a, a); - // expected-error@-1 {{all arguments to AddUint64 must be vectors}} + // expected-error@-1 {{all arguments to '__builtin_hlsl_adduint64' must be vectors}} } uint2 test_signed_integer_args(int2 a, int2 b) { _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits