Author: Deric C. Date: 2025-03-05T17:04:10-08:00 New Revision: b4ecebe745ddebf30449435203deeb6463ecf9f0
URL: https://github.com/llvm/llvm-project/commit/b4ecebe745ddebf30449435203deeb6463ecf9f0 DIFF: https://github.com/llvm/llvm-project/commit/b4ecebe745ddebf30449435203deeb6463ecf9f0.diff LOG: [HLSL] [DXIL] Implement the AddUint64 HLSL function and the UAddc DXIL op (#127137) Fixes #99205. - Implements the HLSL intrinsic `AddUint64` used to perform unsigned 64-bit integer addition by using pairs of unsigned 32-bit integers instead of native 64-bit types - The LLVM intrinsic `uadd_with_overflow` is used in the implementation of `AddUint64` in `CGBuiltin.cpp` - The DXIL op `UAddc` was defined in `DXIL.td`, and a lowering of the LLVM intrinsic `uadd_with_overflow` to the `UAddc` DXIL op was implemented in `DXILOpLowering.cpp` Notes: - `__builtin_addc` was not able to be used to implement `AddUint64` in `hlsl_intrinsics.h` because its `CarryOut` argument is a pointer, and pointers are not supported in HLSL - A lowering of the LLVM intrinsic `uadd_with_overflow` to SPIR-V [already exists](https://github.com/llvm/llvm-project/blob/main/llvm/test/CodeGen/SPIRV/llvm-intrinsics/uadd.with.overflow.ll) - When lowering the LLVM intrinsic `uadd_with_overflow` to the `UAddc` DXIL op, the anonymous struct type `{ i32, i1 }` is replaced with a named struct type `%dx.types.i32c`. This aspect of the implementation may be changed when issue #113192 gets addressed - Fixes issues mentioned in the comments on the original PR #125319 --------- Co-authored-by: Finn Plummer <50529406+inbe...@users.noreply.github.com> Co-authored-by: Farzon Lotfi <farzonlo...@microsoft.com> Co-authored-by: Chris B <be...@abolishcrlf.org> Co-authored-by: Justin Bogner <m...@justinbogner.com> Added: clang/test/CodeGenHLSL/builtins/AddUint64.hlsl clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl llvm/test/CodeGen/DirectX/UAddc.ll llvm/test/CodeGen/DirectX/UAddc_errors.ll llvm/test/CodeGen/SPIRV/hlsl-intrinsics/AddUint64.ll Modified: clang/include/clang/Basic/Builtins.td clang/include/clang/Basic/DiagnosticSemaKinds.td clang/lib/CodeGen/CGBuiltin.cpp clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h clang/lib/Sema/SemaHLSL.cpp llvm/lib/Target/DirectX/DXIL.td llvm/lib/Target/DirectX/DXILOpBuilder.cpp Removed: ################################################################################ diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index f7027331cd6c5..2268df70927a7 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4765,6 +4765,12 @@ def GetDeviceSideMangledName : LangBuiltin<"CUDA_LANG"> { } // HLSL +def HLSLAddUint64: LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_adduint64"]; + let Attributes = [NoThrow, Const]; + let Prototype = "void(...)"; +} + def HLSLResourceGetPointer : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_resource_getpointer"]; let Attributes = [NoThrow]; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index ed2da2b355e11..5e5902cdf0cd7 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10709,6 +10709,11 @@ def err_vector_incorrect_num_elements : Error< "%select{too many|too few}0 elements in vector %select{initialization|operand}3 (expected %1 elements, have %2)">; def err_altivec_empty_initializer : Error<"expected initializer">; +def err_vector_incorrect_bit_count : Error< + "incorrect number of bits in vector operand (expected %select{|a multiple of}0 %1 bits, have %2)">; +def err_integer_incorrect_bit_count : Error< + "incorrect number of bits in integer (expected %0 bits, have %1)">; + def err_invalid_neon_type_code : Error< "incompatible constant for this __builtin_neon function">; def err_argument_invalid_range : Error< diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index ab8f19b25fa66..20b0000793c06 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -19470,6 +19470,62 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, return nullptr; switch (BuiltinID) { + case Builtin::BI__builtin_hlsl_adduint64: { + Value *OpA = EmitScalarExpr(E->getArg(0)); + Value *OpB = EmitScalarExpr(E->getArg(1)); + QualType Arg0Ty = E->getArg(0)->getType(); + uint64_t NumElements = Arg0Ty->castAs<VectorType>()->getNumElements(); + assert(Arg0Ty == E->getArg(1)->getType() && + "AddUint64 operand types must match"); + assert(Arg0Ty->hasIntegerRepresentation() && + "AddUint64 operands must have an integer representation"); + assert((NumElements == 2 || NumElements == 4) && + "AddUint64 operands must have 2 or 4 elements"); + + llvm::Value *LowA; + llvm::Value *HighA; + llvm::Value *LowB; + llvm::Value *HighB; + + // Obtain low and high words of inputs A and B + if (NumElements == 2) { + LowA = Builder.CreateExtractElement(OpA, (uint64_t)0, "LowA"); + HighA = Builder.CreateExtractElement(OpA, (uint64_t)1, "HighA"); + LowB = Builder.CreateExtractElement(OpB, (uint64_t)0, "LowB"); + HighB = Builder.CreateExtractElement(OpB, (uint64_t)1, "HighB"); + } else { + LowA = Builder.CreateShuffleVector(OpA, ArrayRef<int>{0, 2}, "LowA"); + HighA = Builder.CreateShuffleVector(OpA, ArrayRef<int>{1, 3}, "HighA"); + LowB = Builder.CreateShuffleVector(OpB, ArrayRef<int>{0, 2}, "LowB"); + HighB = Builder.CreateShuffleVector(OpB, ArrayRef<int>{1, 3}, "HighB"); + } + + // Use an uadd_with_overflow to compute the sum of low words and obtain a + // carry value + llvm::Value *Carry; + llvm::Value *LowSum = EmitOverflowIntrinsic( + *this, llvm::Intrinsic::uadd_with_overflow, LowA, LowB, Carry); + llvm::Value *ZExtCarry = + Builder.CreateZExt(Carry, HighA->getType(), "CarryZExt"); + + // Sum the high words and the carry + llvm::Value *HighSum = Builder.CreateAdd(HighA, HighB, "HighSum"); + llvm::Value *HighSumPlusCarry = + Builder.CreateAdd(HighSum, ZExtCarry, "HighSumPlusCarry"); + + if (NumElements == 4) { + return Builder.CreateShuffleVector(LowSum, HighSumPlusCarry, + ArrayRef<int>{0, 2, 1, 3}, + "hlsl.AddUint64"); + } + + llvm::Value *Result = PoisonValue::get(OpA->getType()); + Result = Builder.CreateInsertElement(Result, LowSum, (uint64_t)0, + "hlsl.AddUint64.upto0"); + Result = Builder.CreateInsertElement(Result, HighSumPlusCarry, (uint64_t)1, + "hlsl.AddUint64"); + return Result; + } case Builtin::BI__builtin_hlsl_resource_getpointer: { Value *HandleOp = EmitScalarExpr(E->getArg(0)); Value *IndexOp = EmitScalarExpr(E->getArg(1)); diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h index 75b0c95440461..7573f6e024167 100644 --- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h @@ -174,6 +174,27 @@ float3 acos(float3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_acos) float4 acos(float4); +//===----------------------------------------------------------------------===// +// AddUint64 builtins +//===----------------------------------------------------------------------===// + +/// \fn T AddUint64(T a, T b) +/// \brief Implements unsigned 64-bit integer addition using pairs of unsigned +/// 32-bit integers. +/// \param x [in] The first unsigned 32-bit integer pair(s) +/// \param y [in] The second unsigned 32-bit integer pair(s) +/// +/// This function takes one or two pairs (low, high) of unsigned 32-bit integer +/// values and returns pairs (low, high) of unsigned 32-bit integer +/// values representing the result of unsigned 64-bit integer addition. + +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_adduint64) +uint32_t2 AddUint64(uint32_t2, uint32_t2); +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_adduint64) +uint32_t4 AddUint64(uint32_t4, uint32_t4); + //===----------------------------------------------------------------------===// // all builtins //===----------------------------------------------------------------------===// diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index a4b0b5e4df63f..aff349a932eec 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -2086,6 +2086,18 @@ static bool CheckAllArgsHaveFloatRepresentation(Sema *S, CallExpr *TheCall) { checkAllFloatTypes); } +static bool CheckUnsignedIntRepresentations(Sema *S, CallExpr *TheCall) { + auto checkUnsignedInteger = [](clang::QualType PassedType) -> bool { + clang::QualType BaseType = + PassedType->isVectorType() + ? PassedType->getAs<clang::VectorType>()->getElementType() + : PassedType; + return !BaseType->isUnsignedIntegerType(); + }; + return CheckAllArgTypesAreCorrect(S, TheCall, S->Context.UnsignedIntTy, + checkUnsignedInteger); +} + static bool CheckFloatOrHalfRepresentations(Sema *S, CallExpr *TheCall) { auto checkFloatorHalf = [](clang::QualType PassedType) -> bool { clang::QualType BaseType = @@ -2277,6 +2289,52 @@ static bool CheckResourceHandle( // returning an ExprError bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { switch (BuiltinID) { + case Builtin::BI__builtin_hlsl_adduint64: { + if (SemaRef.checkArgCount(TheCall, 2)) + return true; + if (CheckVectorElementCallArgs(&SemaRef, TheCall)) + return true; + if (CheckUnsignedIntRepresentations(&SemaRef, TheCall)) + return true; + + // CheckVectorElementCallArgs(...) guarantees both args are the same type. + assert(TheCall->getArg(0)->getType() == TheCall->getArg(1)->getType() && + "Both args must be of the same type"); + + // ensure both args are vectors + auto *VTy = TheCall->getArg(0)->getType()->getAs<VectorType>(); + if (!VTy) { + SemaRef.Diag(TheCall->getBeginLoc(), diag::err_vec_builtin_non_vector) + << TheCall->getDirectCallee() << /*all*/ 1; + return true; + } + + // ensure arg integers are 32-bits + uint64_t ElementBitCount = getASTContext() + .getTypeSizeInChars(VTy->getElementType()) + .getQuantity() * + 8; + if (ElementBitCount != 32) { + SemaRef.Diag(TheCall->getBeginLoc(), + diag::err_integer_incorrect_bit_count) + << 32 << ElementBitCount; + return true; + } + + // ensure both args are vectors of total bit size of a multiple of 64 + int NumElementsArg = VTy->getNumElements(); + if (NumElementsArg != 2 && NumElementsArg != 4) { + SemaRef.Diag(TheCall->getBeginLoc(), diag::err_vector_incorrect_bit_count) + << 1 /*a multiple of*/ << 64 << NumElementsArg * ElementBitCount; + return true; + } + + ExprResult A = TheCall->getArg(0); + QualType ArgTyA = A.get()->getType(); + // return type is the same as the input type + TheCall->setType(ArgTyA); + break; + } case Builtin::BI__builtin_hlsl_resource_getpointer: { if (SemaRef.checkArgCount(TheCall, 2) || CheckResourceHandle(&SemaRef, TheCall, 0) || diff --git a/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl b/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl new file mode 100644 index 0000000000000..e1832bdbbf33f --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl @@ -0,0 +1,58 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \ +// RUN: -emit-llvm -disable-llvm-passes -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK + + +// CHECK-LABEL: define noundef <2 x i32> @_Z20test_AddUint64_uint2Dv2_jS_( +// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: store <2 x i32> [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store <2 x i32> [[B]], ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[LOWA:%.*]] = extractelement <2 x i32> [[TMP0]], i64 0 +// CHECK-NEXT: [[HIGHA:%.*]] = extractelement <2 x i32> [[TMP0]], i64 1 +// CHECK-NEXT: [[LOWB:%.*]] = extractelement <2 x i32> [[TMP1]], i64 0 +// CHECK-NEXT: [[HIGHB:%.*]] = extractelement <2 x i32> [[TMP1]], i64 1 +// CHECK-NEXT: [[TMP2:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LOWA]], i32 [[LOWB]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0 +// CHECK-NEXT: [[CARRYZEXT:%.*]] = zext i1 [[TMP3]] to i32 +// CHECK-NEXT: [[HIGHSUM:%.*]] = add i32 [[HIGHA]], [[HIGHB]] +// CHECK-NEXT: [[HIGHSUMPLUSCARRY:%.*]] = add i32 [[HIGHSUM]], [[CARRYZEXT]] +// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i64 0 +// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = insertelement <2 x i32> [[HLSL_ADDUINT64_UPTO0]], i32 [[HIGHSUMPLUSCARRY]], i64 1 +// CHECK-NEXT: ret <2 x i32> [[HLSL_ADDUINT64]] +// +uint2 test_AddUint64_uint2(uint2 a, uint2 b) { + return AddUint64(a, b); +} + +// CHECK-LABEL: define noundef <4 x i32> @_Z20test_AddUint64_uint4Dv4_jS_( +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: store <4 x i32> [[A]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store <4 x i32> [[B]], ptr [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[B_ADDR]], align 16 +// CHECK-NEXT: [[LOWA:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 0, i32 2> +// CHECK-NEXT: [[HIGHA:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 1, i32 3> +// CHECK-NEXT: [[LOWB:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 0, i32 2> +// CHECK-NEXT: [[HIGHB:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 1, i32 3> +// CHECK-NEXT: [[TMP2:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[LOWA]], <2 x i32> [[LOWB]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP2]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP2]], 0 +// CHECK-NEXT: [[CARRYZEXT:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i32> +// CHECK-NEXT: [[HIGHSUM:%.*]] = add <2 x i32> [[HIGHA]], [[HIGHB]] +// CHECK-NEXT: [[HIGHSUMPLUSCARRY:%.*]] = add <2 x i32> [[HIGHSUM]], [[CARRYZEXT]] +// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[HIGHSUMPLUSCARRY]], <4 x i32> <i32 0, i32 2, i32 1, i32 3> +// CHECK-NEXT: ret <4 x i32> [[HLSL_ADDUINT64]] +// +uint4 test_AddUint64_uint4(uint4 a, uint4 b) { + return AddUint64(a, b); +} diff --git a/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl new file mode 100644 index 0000000000000..1f9e92da90ca5 --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl @@ -0,0 +1,46 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify + +uint2 test_too_few_arg() { + return __builtin_hlsl_adduint64(); + // expected-error@-1 {{too few arguments to function call, expected 2, have 0}} +} + +uint4 test_too_many_arg(uint4 a) { + return __builtin_hlsl_adduint64(a, a, a); + // expected-error@-1 {{too many arguments to function call, expected 2, have 3}} +} + +uint2 test_mismatched_arg_types(uint2 a, uint4 b) { + return __builtin_hlsl_adduint64(a, b); + // expected-error@-1 {{all arguments to '__builtin_hlsl_adduint64' must have the same type}} +} + +uint2 test_bad_num_arg_elements(uint3 a, uint3 b) { + return __builtin_hlsl_adduint64(a, b); + // expected-error@-1 {{incorrect number of bits in vector operand (expected a multiple of 64 bits, have 96)}} +} + +uint2 test_scalar_arg_type(uint a) { + return __builtin_hlsl_adduint64(a, a); + // expected-error@-1 {{all arguments to '__builtin_hlsl_adduint64' must be vectors}} +} + +uint2 test_uint64_args(uint16_t2 a) { + return __builtin_hlsl_adduint64(a, a); + // expected-error@-1 {{incorrect number of bits in integer (expected 32 bits, have 16)}} +} + +uint2 test_signed_integer_args(int2 a, int2 b) { + return __builtin_hlsl_adduint64(a, b); +// expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(unsigned int)))) unsigned int' (vector of 2 'unsigned int' values)}} +} + +struct S { + uint2 a; +}; + +uint2 test_incorrect_arg_type(S a) { + return __builtin_hlsl_adduint64(a, a); + // expected-error@-1 {{passing 'S' to parameter of incompatible type 'unsigned int'}} +} + diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index bd58144ebc135..ebe1d876d58b1 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -56,6 +56,7 @@ def HandleTy : DXILOpParamType; def ResBindTy : DXILOpParamType; def ResPropsTy : DXILOpParamType; def SplitDoubleTy : DXILOpParamType; +def BinaryWithCarryTy : DXILOpParamType; class DXILOpClass; @@ -744,6 +745,16 @@ def UMin : DXILOp<40, binary> { let attributes = [Attributes<DXIL1_0, [ReadNone]>]; } +def UAddc : DXILOp<44, binaryWithCarryOrBorrow > { + let Doc = "unsigned add of 32-bit operand with the carry"; + let intrinsics = [IntrinSelect<int_uadd_with_overflow>]; + let arguments = [OverloadTy, OverloadTy]; + let result = BinaryWithCarryTy; + let overloads = [Overloads<DXIL1_0, [Int32Ty]>]; + let stages = [Stages<DXIL1_0, [all_stages]>]; + let attributes = [Attributes<DXIL1_0, [ReadNone]>]; +} + def FMad : DXILOp<46, tertiary> { let Doc = "Floating point arithmetic multiply/add operation. fmad(m,a,b) = m " "* a + b."; diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp index e76b7d6ad7891..5afe512b38026 100644 --- a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp +++ b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp @@ -253,6 +253,14 @@ static StructType *getSplitDoubleType(LLVMContext &Context) { return StructType::create({Int32Ty, Int32Ty}, "dx.types.splitdouble"); } +static StructType *getBinaryWithCarryType(LLVMContext &Context) { + if (auto *ST = StructType::getTypeByName(Context, "dx.types.i32c")) + return ST; + Type *Int32Ty = Type::getInt32Ty(Context); + Type *Int1Ty = Type::getInt1Ty(Context); + return StructType::create({Int32Ty, Int1Ty}, "dx.types.i32c"); +} + static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx, Type *OverloadTy) { switch (Kind) { @@ -308,6 +316,8 @@ static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx, return getResPropsType(Ctx); case OpParamType::SplitDoubleTy: return getSplitDoubleType(Ctx); + case OpParamType::BinaryWithCarryTy: + return getBinaryWithCarryType(Ctx); } llvm_unreachable("Invalid parameter kind"); return nullptr; diff --git a/llvm/test/CodeGen/DirectX/UAddc.ll b/llvm/test/CodeGen/DirectX/UAddc.ll new file mode 100644 index 0000000000000..4b46b56b455f6 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/UAddc.ll @@ -0,0 +1,75 @@ +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s + +; This test exercises the lowering of the intrinsic @llvm.uadd.with.overflow.i32 to the UAddc DXIL op + +; CHECK-DAG: [[DX_TYPES_I32C:%dx\.types\.i32c]] = type { i32, i1 } + +; NOTE: The uint2 overload of AddUint64 HLSL uses @llvm.uadd.with.overflow.i32, resulting in one UAddc op +define noundef i32 @test_UAddc(i32 noundef %a, i32 noundef %b) { +; CHECK-LABEL: define noundef i32 @test_UAddc( +; CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) { +; CHECK-NEXT: [[UADDC:%.*]] = call [[DX_TYPES_I32C]] @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A]], i32 [[B]]) #[[ATTR0:[0-9]+]] +; CHECK-NEXT: [[CARRY:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC]], 1 +; CHECK-NEXT: [[SUM:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC]], 0 +; CHECK-NEXT: [[CARRY_ZEXT:%.*]] = zext i1 [[CARRY]] to i32 +; CHECK-NEXT: [[RESULT:%.*]] = add i32 [[SUM]], [[CARRY_ZEXT]] +; CHECK-NEXT: ret i32 [[RESULT]] +; + %uaddc = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %carry = extractvalue { i32, i1 } %uaddc, 1 + %sum = extractvalue { i32, i1 } %uaddc, 0 + %carry_zext = zext i1 %carry to i32 + %result = add i32 %sum, %carry_zext + ret i32 %result +} + +; NOTE: The uint4 overload of AddUint64 HLSL uses @llvm.uadd.with.overflow.v2i32, resulting in two UAddc ops after scalarization +define noundef <2 x i32> @test_UAddc_vec2(<2 x i32> noundef %a, <2 x i32> noundef %b) { +; CHECK-LABEL: define noundef <2 x i32> @test_UAddc_vec2( +; CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) { +; CHECK-NEXT: [[A_I0:%.*]] = extractelement <2 x i32> [[A]], i64 0 +; CHECK-NEXT: [[B_I0:%.*]] = extractelement <2 x i32> [[B]], i64 0 +; CHECK-NEXT: [[UADDC_I0:%.*]] = call [[DX_TYPES_I32C]] @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A_I0]], i32 [[B_I0]]) #[[ATTR0]] +; CHECK-NEXT: [[A_I1:%.*]] = extractelement <2 x i32> [[A]], i64 1 +; CHECK-NEXT: [[B_I1:%.*]] = extractelement <2 x i32> [[B]], i64 1 +; CHECK-NEXT: [[UADDC_I1:%.*]] = call [[DX_TYPES_I32C]] @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A_I1]], i32 [[B_I1]]) #[[ATTR0]] +; CHECK-NEXT: [[CARRY_ELEM0:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC_I0]], 1 +; CHECK-NEXT: [[CARRY_ELEM1:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC_I1]], 1 +; CHECK-NEXT: [[CARRY_UPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[CARRY_ELEM0]], i64 0 +; CHECK-NEXT: [[CARRY:%.*]] = insertelement <2 x i1> [[CARRY_UPTO0]], i1 [[CARRY_ELEM1]], i64 1 +; CHECK-NEXT: [[CARRY_I0:%.*]] = extractelement <2 x i1> [[CARRY]], i64 0 +; CHECK-NEXT: [[CARRY_I1:%.*]] = extractelement <2 x i1> [[CARRY]], i64 1 +; CHECK-NEXT: [[SUM_ELEM0:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC_I0]], 0 +; CHECK-NEXT: [[SUM_ELEM1:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC_I1]], 0 +; CHECK-NEXT: [[CARRY_ZEXT_I0:%.*]] = zext i1 [[CARRY_I0]] to i32 +; CHECK-NEXT: [[CARRY_ZEXT_I1:%.*]] = zext i1 [[CARRY_I1]] to i32 +; CHECK-NEXT: [[RESULT_I0:%.*]] = add i32 [[SUM_ELEM0]], [[CARRY_ZEXT_I0]] +; CHECK-NEXT: [[RESULT_I1:%.*]] = add i32 [[SUM_ELEM1]], [[CARRY_ZEXT_I1]] +; CHECK-NEXT: [[RESULT_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RESULT_I0]], i64 0 +; CHECK-NEXT: [[RESULT:%.*]] = insertelement <2 x i32> [[RESULT_UPTO0]], i32 [[RESULT_I1]], i64 1 +; CHECK-NEXT: ret <2 x i32> [[RESULT]] +; + %uaddc = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> %a, <2 x i32> %b) + %carry = extractvalue { <2 x i32>, <2 x i1> } %uaddc, 1 + %sum = extractvalue { <2 x i32>, <2 x i1> } %uaddc, 0 + %carry_zext = zext <2 x i1> %carry to <2 x i32> + %result = add <2 x i32> %sum, %carry_zext + ret <2 x i32> %result +} + +define noundef i32 @test_UAddc_insert(i32 noundef %a, i32 noundef %b) { +; CHECK-LABEL: define noundef i32 @test_UAddc_insert( +; CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) { +; CHECK-NEXT: [[UADDC:%.*]] = call [[DX_TYPES_I32C]] @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A]], i32 [[B]]) #[[ATTR0]] +; CHECK-NEXT: [[UNUSED:%.*]] = insertvalue [[DX_TYPES_I32C]] [[UADDC]], i32 [[A]], 0 +; CHECK-NEXT: [[RESULT:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC]], 0 +; CHECK-NEXT: ret i32 [[RESULT]] +; + %uaddc = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + insertvalue { i32, i1 } %uaddc, i32 %a, 0 + %result = extractvalue { i32, i1 } %uaddc, 0 + ret i32 %result +} + +declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) + diff --git a/llvm/test/CodeGen/DirectX/UAddc_errors.ll b/llvm/test/CodeGen/DirectX/UAddc_errors.ll new file mode 100644 index 0000000000000..0c6964a09c953 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/UAddc_errors.ll @@ -0,0 +1,30 @@ +; We use llc for this test so that we don't abort after the first error. +; RUN: not llc %s -o /dev/null 2>&1 | FileCheck %s + +target triple = "dxil-pc-shadermodel6.3-library" + +; DXIL operation UAddc only supports i32. Other integer types are unsupported. +; CHECK: error: +; CHECK-SAME: in function uaddc_i16 +; CHECK-SAME: Cannot create UAddc operation: Invalid overload type + +define noundef i16 @uaddc_i16(i16 noundef %a, i16 noundef %b) "hlsl.export" { + %uaddc = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 %a, i16 %b) + %carry = extractvalue { i16, i1 } %uaddc, 1 + %sum = extractvalue { i16, i1 } %uaddc, 0 + %carry_zext = zext i1 %carry to i16 + %result = add i16 %sum, %carry_zext + ret i16 %result +} + +; CHECK: error: +; CHECK-SAME: in function uaddc_return +; CHECK-SAME: DXIL ops that return structs may only be used by insert- and extractvalue + +define noundef { i32, i1 } @uaddc_return(i32 noundef %a, i32 noundef %b) "hlsl.export" { + %uaddc = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + ret { i32, i1 } %uaddc +} + +declare { i16, i1 } @llvm.uadd.with.overflow.i16(i16, i16) + diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/AddUint64.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/AddUint64.ll new file mode 100644 index 0000000000000..6521699a242ed --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/AddUint64.ll @@ -0,0 +1,88 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; Code here is an excerpt of clang/test/CodeGenHLSL/builtins/AddUint64.hlsl compiled for spirv using the following command +; clang -cc1 -finclude-default-header -triple spirv-unknown-vulkan-compute clang/test/CodeGenHLSL/builtins/AddUint64.hlsl -emit-llvm -disable-llvm-passes -o llvm/test/CodeGen/SPIRV/hlsl-intrinsics/uadd_with_overflow.ll + +; CHECK-DAG: %[[#int_32:]] = OpTypeInt 32 0 +; CHECK-DAG: %[[#vec2_int_32:]] = OpTypeVector %[[#int_32]] 2 +; CHECK-DAG: %[[#bool:]] = OpTypeBool +; CHECK-DAG: %[[#const_i32_1:]] = OpConstant %[[#int_32]] 1 +; CHECK-DAG: %[[#struct_i32_i32:]] = OpTypeStruct %[[#int_32]] %[[#int_32]] +; CHECK-DAG: %[[#func_v2i32_v2i32_v2i32:]] = OpTypeFunction %[[#vec2_int_32]] %[[#vec2_int_32]] %[[#vec2_int_32]] +; CHECK-DAG: %[[#const_i32_0:]] = OpConstant %[[#int_32]] 0 +; CHECK-DAG: %[[#undef_v2i32:]] = OpUndef %[[#vec2_int_32]] +; CHECK-DAG: %[[#vec4_int_32:]] = OpTypeVector %[[#int_32]] 4 +; CHECK-DAG: %[[#vec2_bool:]] = OpTypeVector %[[#bool]] 2 +; CHECK-DAG: %[[#const_v2i32_0_0:]] = OpConstantComposite %[[#vec2_int_32]] %[[#const_i32_0]] %[[#const_i32_0]] +; CHECK-DAG: %[[#const_v2i32_1_1:]] = OpConstantComposite %[[#vec2_int_32]] %[[#const_i32_1]] %[[#const_i32_1]] +; CHECK-DAG: %[[#struct_v2i32_v2i32:]] = OpTypeStruct %[[#vec2_int_32]] %[[#vec2_int_32]] +; CHECK-DAG: %[[#func_v4i32_v4i32_v4i32:]] = OpTypeFunction %[[#vec4_int_32]] %[[#vec4_int_32]] %[[#vec4_int_32]] +; CHECK-DAG: %[[#undef_v4i32:]] = OpUndef %[[#vec4_int_32]] + + +define spir_func <2 x i32> @test_AddUint64_uint2(<2 x i32> %a, <2 x i32> %b) { +entry: +; CHECK: %[[#a:]] = OpFunctionParameter %[[#vec2_int_32]] +; CHECK: %[[#b:]] = OpFunctionParameter %[[#vec2_int_32]] +; CHECK: %[[#a_low:]] = OpCompositeExtract %[[#int_32]] %[[#a]] 0 +; CHECK: %[[#a_high:]] = OpCompositeExtract %[[#int_32]] %[[#a]] 1 +; CHECK: %[[#b_low:]] = OpCompositeExtract %[[#int_32]] %[[#b]] 0 +; CHECK: %[[#b_high:]] = OpCompositeExtract %[[#int_32]] %[[#b]] 1 +; CHECK: %[[#iaddcarry:]] = OpIAddCarry %[[#struct_i32_i32]] %[[#a_low]] %[[#b_low]] +; CHECK: %[[#lowsum:]] = OpCompositeExtract %[[#int_32]] %[[#iaddcarry]] 0 +; CHECK: %[[#carry:]] = OpCompositeExtract %[[#int_32]] %[[#iaddcarry]] 1 +; CHECK: %[[#carry_ne0:]] = OpINotEqual %[[#bool]] %[[#carry]] %[[#const_i32_0]] +; CHECK: %[[#select_1_or_0:]] = OpSelect %[[#int_32]] %[[#carry_ne0]] %[[#const_i32_1]] %[[#const_i32_0]] +; CHECK: %[[#highsum:]] = OpIAdd %[[#int_32]] %[[#a_high]] %[[#b_high]] +; CHECK: %[[#highsumpluscarry:]] = OpIAdd %[[#int_32]] %[[#highsum]] %[[#select_1_or_0]] +; CHECK: %[[#adduint64_upto0:]] = OpCompositeInsert %[[#vec2_int_32]] %[[#lowsum]] %[[#undef_v2i32]] 0 +; CHECK: %[[#adduint64:]] = OpCompositeInsert %[[#vec2_int_32]] %[[#highsumpluscarry]] %[[#adduint64_upto0]] 1 +; CHECK: OpReturnValue %[[#adduint64]] +; + %LowA = extractelement <2 x i32> %a, i64 0 + %HighA = extractelement <2 x i32> %a, i64 1 + %LowB = extractelement <2 x i32> %b, i64 0 + %HighB = extractelement <2 x i32> %b, i64 1 + %3 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %LowA, i32 %LowB) + %4 = extractvalue { i32, i1 } %3, 1 + %5 = extractvalue { i32, i1 } %3, 0 + %CarryZExt = zext i1 %4 to i32 + %HighSum = add i32 %HighA, %HighB + %HighSumPlusCarry = add i32 %HighSum, %CarryZExt + %hlsl.AddUint64.upto0 = insertelement <2 x i32> poison, i32 %5, i64 0 + %hlsl.AddUint64 = insertelement <2 x i32> %hlsl.AddUint64.upto0, i32 %HighSumPlusCarry, i64 1 + ret <2 x i32> %hlsl.AddUint64 +} + +define spir_func <4 x i32> @test_AddUint64_uint4(<4 x i32> %a, <4 x i32> %b) #0 { +entry: +; CHECK: %[[#a:]] = OpFunctionParameter %[[#vec4_int_32]] +; CHECK: %[[#b:]] = OpFunctionParameter %[[#vec4_int_32]] +; CHECK: %[[#a_low:]] = OpVectorShuffle %[[#vec2_int_32]] %[[#a]] %[[#undef_v4i32]] 0 2 +; CHECK: %[[#a_high:]] = OpVectorShuffle %[[#vec2_int_32]] %[[#a]] %[[#undef_v4i32]] 1 3 +; CHECK: %[[#b_low:]] = OpVectorShuffle %[[#vec2_int_32]] %[[#b]] %[[#undef_v4i32]] 0 2 +; CHECK: %[[#b_high:]] = OpVectorShuffle %[[#vec2_int_32]] %[[#b]] %[[#undef_v4i32]] 1 3 +; CHECK: %[[#iaddcarry:]] = OpIAddCarry %[[#struct_v2i32_v2i32]] %[[#a_low]] %[[#vec2_int_32]] +; CHECK: %[[#lowsum:]] = OpCompositeExtract %[[#vec2_int_32]] %[[#iaddcarry]] 0 +; CHECK: %[[#carry:]] = OpCompositeExtract %[[#vec2_int_32]] %[[#iaddcarry]] 1 +; CHECK: %[[#carry_ne0:]] = OpINotEqual %[[#vec2_bool]] %[[#carry]] %[[#const_v2i32_0_0]] +; CHECK: %[[#select_1_or_0:]] = OpSelect %[[#vec2_int_32]] %[[#carry_ne0]] %[[#const_v2i32_1_1]] %[[#const_v2i32_0_0]] +; CHECK: %[[#highsum:]] = OpIAdd %[[#vec2_int_32]] %[[#a_high]] %[[#b_high]] +; CHECK: %[[#highsumpluscarry:]] = OpIAdd %[[#vec2_int_32]] %[[#highsum]] %[[#select_1_or_0]] +; CHECK: %[[#adduint64:]] = OpVectorShuffle %[[#vec4_int_32]] %[[#lowsum]] %[[#highsumpluscarry]] 0 2 1 3 +; CHECK: OpReturnValue %[[#adduint64]] +; + %LowA = shufflevector <4 x i32> %a, <4 x i32> poison, <2 x i32> <i32 0, i32 2> + %HighA = shufflevector <4 x i32> %a, <4 x i32> poison, <2 x i32> <i32 1, i32 3> + %LowB = shufflevector <4 x i32> %b, <4 x i32> poison, <2 x i32> <i32 0, i32 2> + %HighB = shufflevector <4 x i32> %b, <4 x i32> poison, <2 x i32> <i32 1, i32 3> + %3 = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> %LowA, <2 x i32> %LowB) + %4 = extractvalue { <2 x i32>, <2 x i1> } %3, 1 + %5 = extractvalue { <2 x i32>, <2 x i1> } %3, 0 + %CarryZExt = zext <2 x i1> %4 to <2 x i32> + %HighSum = add <2 x i32> %HighA, %HighB + %HighSumPlusCarry = add <2 x i32> %HighSum, %CarryZExt + %hlsl.AddUint64 = shufflevector <2 x i32> %5, <2 x i32> %HighSumPlusCarry, <4 x i32> <i32 0, i32 2, i32 1, i32 3> + ret <4 x i32> %hlsl.AddUint64 +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits