[clang] [llvm] [HLSL] [DXIL] Implement the AddUint64 HLSL function and the UAddc DXIL op (PR #127137)

Deric Cheung via cfe-commits Fri, 14 Feb 2025 14:32:24 -0800

https://github.com/Icohedron updated 
https://github.com/llvm/llvm-project/pull/127137


>From 4fae5642c6e8e305cdc687b4968ba5eabaa44b50 Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.de...@gmail.com>
Date: Mon, 27 Jan 2025 11:18:09 -0800
Subject: [PATCH 1/3] Add the AddUint64 HLSL builtin function

- Defines the AddUint64 HLSL builtin function
- Implements the UAddc DXIL op to lower AddUint64 to DXIL
---
 clang/include/clang/Basic/Builtins.td         |  6 ++
 .../clang/Basic/DiagnosticSemaKinds.td        |  2 +
 clang/lib/CodeGen/CGBuiltin.cpp               | 45 ++++++++++++
 clang/lib/Headers/hlsl/hlsl_intrinsics.h      | 21 ++++++
 clang/lib/Sema/SemaHLSL.cpp                   | 47 ++++++++++++
 .../test/CodeGenHLSL/builtins/AddUint64.hlsl  | 71 +++++++++++++++++++
 .../SemaHLSL/BuiltIns/AddUint64-errors.hlsl   | 41 +++++++++++
 llvm/lib/Target/DirectX/DXIL.td               | 13 ++++
 llvm/lib/Target/DirectX/DXILOpBuilder.cpp     | 14 ++++
 llvm/lib/Target/DirectX/DXILOpBuilder.h       |  3 +
 llvm/lib/Target/DirectX/DXILOpLowering.cpp    | 22 ++++--
 llvm/test/CodeGen/DirectX/UAddc.ll            | 40 +++++++++++
 llvm/test/CodeGen/DirectX/UAddc_errors.ll     | 30 ++++++++
 13 files changed, 348 insertions(+), 7 deletions(-)
 create mode 100644 clang/test/CodeGenHLSL/builtins/AddUint64.hlsl
 create mode 100644 clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl
 create mode 100644 llvm/test/CodeGen/DirectX/UAddc.ll
 create mode 100644 llvm/test/CodeGen/DirectX/UAddc_errors.ll

diff --git a/clang/include/clang/Basic/Builtins.td 
b/clang/include/clang/Basic/Builtins.td
index 29939242596ba..2433427a89429 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4753,6 +4753,12 @@ def GetDeviceSideMangledName : LangBuiltin<"CUDA_LANG"> {
 }
 
 // HLSL
+def HLSLAddUint64: LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_adduint64"];
+  let Attributes = [NoThrow, Const];
+  let Prototype = "void(...)";
+}
+
 def HLSLResourceGetPointer : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_resource_getpointer"];
   let Attributes = [NoThrow];
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 2fce5e88ba8a0..e78339ee924ff 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10655,6 +10655,8 @@ def err_second_argument_to_cwsc_not_pointer : Error<
 
 def err_vector_incorrect_num_elements : Error<
   "%select{too many|too few}0 elements in vector 
%select{initialization|operand}3 (expected %1 elements, have %2)">;
+def err_invalid_even_odd_vector_element_count : Error<
+  "invalid element count of %0 in vector %select{initialization|operand}4 
(expected an %select{even|odd}3 element count in the range of %1 and %2)">;
 def err_altivec_empty_initializer : Error<"expected initializer">;
 
 def err_invalid_neon_type_code : Error<
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 361e4c4bf2e2e..5322b38458b26 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -19445,6 +19445,51 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
     return nullptr;
 
   switch (BuiltinID) {
+  case Builtin::BI__builtin_hlsl_adduint64: {
+    Value *OpA = EmitScalarExpr(E->getArg(0));
+    Value *OpB = EmitScalarExpr(E->getArg(1));
+    assert(E->getArg(0)->getType()->hasIntegerRepresentation() &&
+           E->getArg(1)->getType()->hasIntegerRepresentation() &&
+           "AddUint64 operands must have an integer representation");
+    assert(((E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() ==
+                 2 &&
+             E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() ==
+                 2) ||
+            (E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() ==
+                 4 &&
+             E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() ==
+                 4)) &&
+           "input vectors must have 2 or 4 elements each");
+
+    llvm::Value *Result = PoisonValue::get(OpA->getType());
+    uint64_t NumElements =
+        E->getArg(0)->getType()->castAs<VectorType>()->getNumElements();
+    for (uint64_t i = 0; i < NumElements / 2; ++i) {
+
+      // Obtain low and high words of inputs A and B
+      llvm::Value *LowA = Builder.CreateExtractElement(OpA, 2 * i + 0);
+      llvm::Value *HighA = Builder.CreateExtractElement(OpA, 2 * i + 1);
+      llvm::Value *LowB = Builder.CreateExtractElement(OpB, 2 * i + 0);
+      llvm::Value *HighB = Builder.CreateExtractElement(OpB, 2 * i + 1);
+
+      // Use an uadd_with_overflow to compute the sum of low words and obtain a
+      // carry value
+      llvm::Value *Carry;
+      llvm::Value *LowSum = EmitOverflowIntrinsic(
+          *this, llvm::Intrinsic::uadd_with_overflow, LowA, LowB, Carry);
+      llvm::Value *ZExtCarry = Builder.CreateZExt(Carry, HighA->getType());
+
+      // Sum the high words and the carry
+      llvm::Value *HighSum = Builder.CreateAdd(HighA, HighB);
+      llvm::Value *HighSumPlusCarry = Builder.CreateAdd(HighSum, ZExtCarry);
+
+      // Insert the low and high word sums into the result vector
+      Result = Builder.CreateInsertElement(Result, LowSum, 2 * i + 0);
+      Result = Builder.CreateInsertElement(Result, HighSumPlusCarry, 2 * i + 1,
+                                           "hlsl.AddUint64");
+    }
+    return Result;
+  }
   case Builtin::BI__builtin_hlsl_resource_getpointer: {
     Value *HandleOp = EmitScalarExpr(E->getArg(0));
     Value *IndexOp = EmitScalarExpr(E->getArg(1));
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h 
b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index d1f5fdff8b600..513639ed1b81d 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -138,6 +138,27 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_acos)
 float4 acos(float4);
 
 
//===----------------------------------------------------------------------===//
+// AddUint64 builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T AddUint64(T a, T b)
+/// \brief Implements unsigned 64-bit integer addition using pairs of unsigned
+/// 32-bit integers.
+/// \param x [in] The first unsigned 32-bit integer pair(s)
+/// \param y [in] The second unsigned 32-bit integer pair(s)
+///
+/// This function takes one or two pairs (low, high) of unsigned 32-bit integer
+/// values and returns pairs (low, high) of unsigned 32-bit integer
+/// values representing the result of unsigned 64-bit integer addition.
+
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_adduint64)
+uint32_t2 AddUint64(uint32_t2, uint32_t2);
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_adduint64)
+uint32_t4 AddUint64(uint32_t4, uint32_t4);
+
+// 
//===----------------------------------------------------------------------===//
 // all builtins
 
//===----------------------------------------------------------------------===//
 
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 4abd870ad6aaa..99eb5360ec356 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -2038,6 +2038,18 @@ static bool CheckAllArgsHaveFloatRepresentation(Sema *S, 
CallExpr *TheCall) {
                                     checkAllFloatTypes);
 }
 
+static bool CheckUnsignedIntRepresentations(Sema *S, CallExpr *TheCall) {
+  auto checkUnsignedInteger = [](clang::QualType PassedType) -> bool {
+    clang::QualType BaseType =
+        PassedType->isVectorType()
+            ? PassedType->getAs<clang::VectorType>()->getElementType()
+            : PassedType;
+    return !BaseType->isUnsignedIntegerType();
+  };
+  return CheckAllArgTypesAreCorrect(S, TheCall, S->Context.UnsignedIntTy,
+                                    checkUnsignedInteger);
+}
+
 static bool CheckFloatOrHalfRepresentations(Sema *S, CallExpr *TheCall) {
   auto checkFloatorHalf = [](clang::QualType PassedType) -> bool {
     clang::QualType BaseType =
@@ -2229,6 +2241,41 @@ static bool CheckResourceHandle(
 // returning an ExprError
 bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) 
{
   switch (BuiltinID) {
+  case Builtin::BI__builtin_hlsl_adduint64: {
+    if (SemaRef.checkArgCount(TheCall, 2))
+      return true;
+    if (CheckVectorElementCallArgs(&SemaRef, TheCall))
+      return true;
+    if (CheckUnsignedIntRepresentations(&SemaRef, TheCall))
+      return true;
+
+    // CheckVectorElementCallArgs(...) guarantees both args are the same type.
+    assert(TheCall->getArg(0)->getType() == TheCall->getArg(1)->getType() &&
+           "Both args must be of the same type");
+
+    // ensure both args are vectors
+    auto *VTy = TheCall->getArg(0)->getType()->getAs<VectorType>();
+    if (!VTy) {
+      SemaRef.Diag(TheCall->getBeginLoc(), diag::err_vec_builtin_non_vector)
+          << "AddUint64" << /*all*/ 1;
+      return true;
+    }
+
+    // ensure both args have 2 elements, or both args have 4 elements
+    int NumElementsArg = VTy->getNumElements();
+    if (NumElementsArg != 2 && NumElementsArg != 4) {
+      SemaRef.Diag(TheCall->getBeginLoc(),
+                   diag::err_invalid_even_odd_vector_element_count)
+          << NumElementsArg << 2 << 4 << /*even*/ 0 << /*operand*/ 1;
+      return true;
+    }
+
+    ExprResult A = TheCall->getArg(0);
+    QualType ArgTyA = A.get()->getType();
+    // return type is the same as the input type
+    TheCall->setType(ArgTyA);
+    break;
+  }
   case Builtin::BI__builtin_hlsl_resource_getpointer: {
     if (SemaRef.checkArgCount(TheCall, 2) ||
         CheckResourceHandle(&SemaRef, TheCall, 0) ||
diff --git a/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl 
b/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl
new file mode 100644
index 0000000000000..4141aef69323d
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl
@@ -0,0 +1,71 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// RUN: %clang_cc1 -finclude-default-header -triple 
dxil-pc-shadermodel6.3-library %s \
+// RUN:  -emit-llvm -disable-llvm-passes -o - | \
+// RUN:  FileCheck %s --check-prefixes=CHECK
+
+
+// CHECK-LABEL: define noundef <2 x i32> @_Z20test_AddUint64_uint2Dv2_jS_(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <2 x i32>, align 8
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <2 x i32>, align 8
+// CHECK-NEXT:    store <2 x i32> [[A]], ptr [[A_ADDR]], align 8
+// CHECK-NEXT:    store <2 x i32> [[B]], ptr [[B_ADDR]], align 8
+// CHECK-NEXT:    [[A_LOAD:%.*]] = load <2 x i32>, ptr [[A_ADDR]], align 8
+// CHECK-NEXT:    [[B_LOAD:%.*]] = load <2 x i32>, ptr [[B_ADDR]], align 8
+// CHECK-NEXT:    [[LowA:%.*]] = extractelement <2 x i32> [[A_LOAD]], i64 0
+// CHECK-NEXT:    [[HighA:%.*]] = extractelement <2 x i32> [[A_LOAD]], i64 1
+// CHECK-NEXT:    [[LowB:%.*]] = extractelement <2 x i32> [[B_LOAD]], i64 0
+// CHECK-NEXT:    [[HighB:%.*]] = extractelement <2 x i32> [[B_LOAD]], i64 1
+// CHECK-NEXT:    [[UAddc:%.*]] = call { i32, i1 } 
@llvm.uadd.with.overflow.i32(i32 [[LowA]], i32 [[LowB]])
+// CHECK-NEXT:    [[Carry:%.*]] = extractvalue { i32, i1 } [[UAddc]], 1
+// CHECK-NEXT:    [[LowSum:%.*]] = extractvalue { i32, i1 } [[UAddc]], 0
+// CHECK-NEXT:    [[CarryZExt:%.*]] = zext i1 [[Carry]] to i32
+// CHECK-NEXT:    [[HighSum:%.*]] = add i32 [[HighA]], [[HighB]]
+// CHECK-NEXT:    [[HighSumPlusCarry:%.*]] = add i32 [[HighSum]], [[CarryZExt]]
+// CHECK-NEXT:    [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <2 x i32> 
poison, i32 [[LowSum]], i64 0
+// CHECK-NEXT:    [[HLSL_ADDUINT64:%.*]] = insertelement <2 x i32> 
[[HLSL_ADDUINT64_UPTO0]], i32 [[HighSumPlusCarry]], i64 1
+// CHECK-NEXT:    ret <2 x i32> [[HLSL_ADDUINT64]]
+//
+uint2 test_AddUint64_uint2(uint2 a, uint2 b) {
+  return AddUint64(a, b);
+}
+
+// CHECK-LABEL: define noundef <4 x i32> @_Z20test_AddUint64_uint4Dv4_jS_(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) 
#[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT:    store <4 x i32> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    store <4 x i32> [[B]], ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[A_LOAD:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    [[B_LOAD:%.*]] = load <4 x i32>, ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[LowA:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 0
+// CHECK-NEXT:    [[HighA:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 1
+// CHECK-NEXT:    [[LowB:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 0
+// CHECK-NEXT:    [[HighB:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 1
+// CHECK-NEXT:    [[UAddc:%.*]] = call { i32, i1 } 
@llvm.uadd.with.overflow.i32(i32 [[LowA]], i32 [[LowB]])
+// CHECK-NEXT:    [[Carry:%.*]] = extractvalue { i32, i1 } [[UAddc]], 1
+// CHECK-NEXT:    [[LowSum:%.*]] = extractvalue { i32, i1 } [[UAddc]], 0
+// CHECK-NEXT:    [[CarryZExt:%.*]] = zext i1 [[Carry]] to i32
+// CHECK-NEXT:    [[HighSum:%.*]] = add i32 [[HighA]], [[HighB]]
+// CHECK-NEXT:    [[HighSumPlusCarry:%.*]] = add i32 [[HighSum]], [[CarryZExt]]
+// CHECK-NEXT:    [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <4 x i32> 
poison, i32 [[LowSum]], i64 0
+// CHECK-NEXT:    [[HLSL_ADDUINT64_UPTO1:%.*]] = insertelement <4 x i32> 
[[HLSL_ADDUINT64_UPTO0]], i32 [[HighSumPlusCarry]], i64 1
+// CHECK-NEXT:    [[LowA1:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 2
+// CHECK-NEXT:    [[HighA1:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 3
+// CHECK-NEXT:    [[LowB1:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 2
+// CHECK-NEXT:    [[HighB1:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 3
+// CHECK-NEXT:    [[UAddc1:%.*]] = call { i32, i1 } 
@llvm.uadd.with.overflow.i32(i32 [[LowA1]], i32 [[LowB1]])
+// CHECK-NEXT:    [[Carry1:%.*]] = extractvalue { i32, i1 } [[UAddc1]], 1
+// CHECK-NEXT:    [[LowSum1:%.*]] = extractvalue { i32, i1 } [[UAddc1]], 0
+// CHECK-NEXT:    [[CarryZExt1:%.*]] = zext i1 [[Carry1]] to i32
+// CHECK-NEXT:    [[HighSum1:%.*]] = add i32 [[HighA1]], [[HighB1]]
+// CHECK-NEXT:    [[HighSumPlusCarry1:%.*]] = add i32 [[HighSum1]], 
[[CarryZExt1]]
+// CHECK-NEXT:    [[HLSL_ADDUINT64_UPTO2:%.*]] = insertelement <4 x i32> 
[[HLSL_ADDUINT64_UPTO1]], i32 [[LowSum1]], i64 2
+// CHECK-NEXT:    [[HLSL_ADDUINT64:%.*]] = insertelement <4 x i32> 
[[HLSL_ADDUINT64_UPTO2]], i32 [[HighSumPlusCarry1]], i64 3
+// CHECK-NEXT:    ret <4 x i32> [[HLSL_ADDUINT64]]
+//
+uint4 test_AddUint64_uint4(uint4 a, uint4 b) {
+  return AddUint64(a, b);
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl 
b/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl
new file mode 100644
index 0000000000000..ec9d026bb6fe7
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -finclude-default-header -triple 
dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only 
-disable-llvm-passes -verify
+
+uint2 test_too_few_arg() {
+  return __builtin_hlsl_adduint64();
+  // expected-error@-1 {{too few arguments to function call, expected 2, have 
0}}
+}
+
+uint4 test_too_many_arg(uint4 a) {
+  return __builtin_hlsl_adduint64(a, a, a);
+  // expected-error@-1 {{too many arguments to function call, expected 2, have 
3}}
+}
+
+uint2 test_mismatched_arg_types(uint2 a, uint4 b) {
+  return __builtin_hlsl_adduint64(a, b);
+  // expected-error@-1 {{all arguments to '__builtin_hlsl_adduint64' must have 
the same type}}
+}
+
+uint2 test_bad_num_arg_elements(uint3 a, uint3 b) {
+  return __builtin_hlsl_adduint64(a, b);
+  // expected-error@-1 {{invalid element count of 3 in vector operand 
(expected an even element count in the range of 2 and 4)}}
+}
+
+uint2 test_scalar_arg_type(uint a) {
+  return __builtin_hlsl_adduint64(a, a);
+  // expected-error@-1 {{all arguments to AddUint64 must be vectors}}
+}
+
+uint2 test_signed_integer_args(int2 a, int2 b) {
+  return __builtin_hlsl_adduint64(a, b);
+// expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of 
incompatible type '__attribute__((__vector_size__(2 * sizeof(unsigned int)))) 
unsigned int' (vector of 2 'unsigned int' values)}}
+}
+
+struct S {
+  uint2 a;
+};
+
+uint2 test_incorrect_arg_type(S a) {
+  return __builtin_hlsl_adduint64(a, a);
+  // expected-error@-1 {{passing 'S' to parameter of incompatible type 
'unsigned int'}}
+}
+
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 7cb841d9bd5b5..2f6b4d676edfd 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -50,6 +50,7 @@ def HandleTy : DXILOpParamType;
 def ResBindTy : DXILOpParamType;
 def ResPropsTy : DXILOpParamType;
 def SplitDoubleTy : DXILOpParamType;
+def BinaryWithCarryTy : DXILOpParamType;
 
 class DXILOpClass;
 
@@ -738,6 +739,18 @@ def UMin : DXILOp<40, binary> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
+def UAddc : DXILOp<44, binaryWithCarryOrBorrow > {
+  let Doc = "Unsigned 32-bit integer arithmetic add with carry. uaddc(a,b) = 
(a+b, a+b overflowed ? 1 : 0)";
+  // TODO: This `let intrinsics = ...` line may be uncommented when 
+  // https://github.com/llvm/llvm-project/issues/113192 is fixed
+  // let intrinsics = [IntrinSelect<int_uadd_with_overflow>];
+  let arguments = [OverloadTy, OverloadTy];
+  let result = BinaryWithCarryTy;
+  let overloads = [Overloads<DXIL1_0, [Int32Ty]>];
+  let stages = [Stages<DXIL1_0, [all_stages]>];
+  let attributes = [Attributes<DXIL1_0, [ReadNone]>];
+}
+
 def FMad : DXILOp<46, tertiary> {
   let Doc = "Floating point arithmetic multiply/add operation. fmad(m,a,b) = m 
"
             "* a + b.";
diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp 
b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
index badd5aabd6432..f0f1bbabb6b23 100644
--- a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
@@ -230,6 +230,14 @@ static StructType *getSplitDoubleType(LLVMContext 
&Context) {
   return StructType::create({Int32Ty, Int32Ty}, "dx.types.splitdouble");
 }
 
+static StructType *getBinaryWithCarryType(LLVMContext &Context) {
+  if (auto *ST = StructType::getTypeByName(Context, "dx.types.i32c"))
+    return ST;
+  Type *Int32Ty = Type::getInt32Ty(Context);
+  Type *Int1Ty = Type::getInt1Ty(Context);
+  return StructType::create({Int32Ty, Int1Ty}, "dx.types.i32c");
+}
+
 static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx,
                                     Type *OverloadTy) {
   switch (Kind) {
@@ -273,6 +281,8 @@ static Type *getTypeFromOpParamType(OpParamType Kind, 
LLVMContext &Ctx,
     return getResPropsType(Ctx);
   case OpParamType::SplitDoubleTy:
     return getSplitDoubleType(Ctx);
+  case OpParamType::BinaryWithCarryTy:
+    return getBinaryWithCarryType(Ctx);
   }
   llvm_unreachable("Invalid parameter kind");
   return nullptr;
@@ -539,6 +549,10 @@ StructType *DXILOpBuilder::getSplitDoubleType(LLVMContext 
&Context) {
   return ::getSplitDoubleType(Context);
 }
 
+StructType *DXILOpBuilder::getBinaryWithCarryType(LLVMContext &Context) {
+  return ::getBinaryWithCarryType(Context);
+}
+
 StructType *DXILOpBuilder::getHandleType() {
   return ::getHandleType(IRB.getContext());
 }
diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.h 
b/llvm/lib/Target/DirectX/DXILOpBuilder.h
index df5a0240870f4..8e13b87a2be10 100644
--- a/llvm/lib/Target/DirectX/DXILOpBuilder.h
+++ b/llvm/lib/Target/DirectX/DXILOpBuilder.h
@@ -53,6 +53,9 @@ class DXILOpBuilder {
   /// Get the `%dx.types.splitdouble` type.
   StructType *getSplitDoubleType(LLVMContext &Context);
 
+  /// Get the `%dx.types.i32c` type.
+  StructType *getBinaryWithCarryType(LLVMContext &Context);
+
   /// Get the `%dx.types.Handle` type.
   StructType *getHandleType();
 
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp 
b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 0c245c1a43d31..c9e3d7e284963 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -359,17 +359,16 @@ class OpLowerer {
     return lowerToBindAndAnnotateHandle(F);
   }
 
-  Error replaceSplitDoubleCallUsages(CallInst *Intrin, CallInst *Op) {
+  Error replaceAggregateTypeOfCallUsages(CallInst *Intrin, CallInst *Op) {
     for (Use &U : make_early_inc_range(Intrin->uses())) {
       if (auto *EVI = dyn_cast<ExtractValueInst>(U.getUser())) {
-
-        if (EVI->getNumIndices() != 1)
-          return createStringError(std::errc::invalid_argument,
-                                   "Splitdouble has only 2 elements");
         EVI->setOperand(0, Op);
+      } else if (auto *IVI = dyn_cast<InsertValueInst>(U.getUser())) {
+        IVI->setOperand(0, Op);
       } else {
         return make_error<StringError>(
-            "Splitdouble use is not ExtractValueInst",
+            (Intrin->getCalledFunction()->getName() +
+             " use is not a ExtractValueInst or InsertValueInst"),
             inconvertibleErrorCode());
       }
     }
@@ -821,7 +820,16 @@ class OpLowerer {
             F, OpCode::SplitDouble,
             OpBuilder.getSplitDoubleType(M.getContext()),
             [&](CallInst *CI, CallInst *Op) {
-              return replaceSplitDoubleCallUsages(CI, Op);
+              return replaceAggregateTypeOfCallUsages(CI, Op);
+            });
+        break;
+      // TODO: this can be removed when
+      // https://github.com/llvm/llvm-project/issues/113192 is fixed
+      case Intrinsic::uadd_with_overflow:
+        HasErrors |= replaceFunctionWithNamedStructOp(
+            F, OpCode::UAddc, OpBuilder.getBinaryWithCarryType(M.getContext()),
+            [&](CallInst *CI, CallInst *Op) {
+              return replaceAggregateTypeOfCallUsages(CI, Op);
             });
         break;
       case Intrinsic::ctpop:
diff --git a/llvm/test/CodeGen/DirectX/UAddc.ll 
b/llvm/test/CodeGen/DirectX/UAddc.ll
new file mode 100644
index 0000000000000..abafa40bf2306
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/UAddc.ll
@@ -0,0 +1,40 @@
+; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | 
FileCheck %s
+
+; CHECK: %dx.types.i32c = type { i32, i1 }
+
+define noundef i32 @test_UAddc(i32 noundef %a, i32 noundef %b) {
+; CHECK-LABEL: define noundef i32 @test_UAddc(
+; CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) {
+; CHECK-NEXT:    [[UAddc:%.*]] = call %dx.types.i32c 
@dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A]], i32 [[B]])
+; CHECK-NEXT:    [[Carry:%.*]] = extractvalue %dx.types.i32c [[UAddc]], 1
+; CHECK-NEXT:    [[Sum:%.*]] = extractvalue %dx.types.i32c [[UAddc]], 0
+; CHECK-NEXT:    [[CarryZExt:%.*]] = zext i1 [[Carry]] to i32
+; CHECK-NEXT:    [[Result:%.*]] = add i32 [[Sum]], [[CarryZExt]]
+; CHECK-NEXT:    ret i32 [[Result]]
+; 
+  %uaddc = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+  %carry = extractvalue { i32, i1 } %uaddc, 1
+  %sum = extractvalue { i32, i1 } %uaddc, 0
+  %carry_zext = zext i1 %carry to i32
+  %result = add i32 %sum, %carry_zext
+  ret i32 %result
+}
+
+
+define noundef i32 @test_UAddc_insert(i32 noundef %a, i32 noundef %b) {
+; CHECK-LABEL: define noundef i32 @test_UAddc_insert(
+; CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) {
+; CHECK-NEXT:    [[UAddc:%.*]] = call %dx.types.i32c 
@dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A]], i32 [[B]])
+; CHECK-NEXT:    insertvalue %dx.types.i32c [[UAddc]], i32 [[A]], 0
+; CHECK-NEXT:    [[Result:%.*]] = extractvalue %dx.types.i32c [[UAddc]], 0
+; CHECK-NEXT:    ret i32 [[Result]]
+; 
+  %uaddc = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+  insertvalue { i32, i1 } %uaddc, i32 %a, 0
+  %result = extractvalue { i32, i1 } %uaddc, 0
+  ret i32 %result
+}
+
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32)
+; CHECK: declare %dx.types.i32c @dx.op.binaryWithCarryOrBorrow.i32(i32, i32, 
i32)
+
diff --git a/llvm/test/CodeGen/DirectX/UAddc_errors.ll 
b/llvm/test/CodeGen/DirectX/UAddc_errors.ll
new file mode 100644
index 0000000000000..72c676db9bba4
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/UAddc_errors.ll
@@ -0,0 +1,30 @@
+; We use llc for this test so that we don't abort after the first error.
+; RUN: not llc %s -o /dev/null 2>&1 | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.3-library"
+
+; DXIL operation UAddc only supports i32. Other integer types are unsupported.
+; CHECK: error:
+; CHECK-SAME: in function uaddc_i16
+; CHECK-SAME: Cannot create UAddc operation: Invalid overload type
+
+define noundef i16 @uaddc_i16(i16 noundef %a, i16 noundef %b) "hlsl.export" {
+  %uaddc = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 %a, i16 %b)
+  %carry = extractvalue { i16, i1 } %uaddc, 1
+  %sum = extractvalue { i16, i1 } %uaddc, 0
+  %carry_zext = zext i1 %carry to i16
+  %result = add i16 %sum, %carry_zext
+  ret i16 %result
+}
+
+; CHECK: error:
+; CHECK-SAME: in function uaddc_return
+; CHECK-SAME: llvm.uadd.with.overflow.i32 use is not a ExtractValueInst or 
InsertValueInst
+
+define noundef { i32, i1 } @uaddc_return(i32 noundef %a, i32 noundef %b) 
"hlsl.export" {
+  %uaddc = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+  ret { i32, i1 } %uaddc
+}
+
+declare { i16, i1 } @llvm.uadd.with.overflow.i16(i16, i16)
+

>From 45afa2f3806b2f869d02aa68e2e1d8dddc6cba3b Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.de...@gmail.com>
Date: Tue, 11 Feb 2025 22:56:23 +0000
Subject: [PATCH 2/3] Make AddUint64 use llvm.uadd.with.overflow.v2i32

When the input args are of type uint4, uses the vec2 variant of 
llvm.uadd.with.overflow to sum the low words of both args.
---
 clang/lib/CodeGen/CGBuiltin.cpp               | 65 +++++++++++-------
 .../test/CodeGenHLSL/builtins/AddUint64.hlsl  | 67 ++++++++-----------
 llvm/test/CodeGen/DirectX/UAddc.ll            | 60 +++++++++++++----
 3 files changed, 115 insertions(+), 77 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 5322b38458b26..0fe8cf5179b53 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -19461,31 +19461,50 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
                  4)) &&
            "input vectors must have 2 or 4 elements each");
 
-    llvm::Value *Result = PoisonValue::get(OpA->getType());
     uint64_t NumElements =
         E->getArg(0)->getType()->castAs<VectorType>()->getNumElements();
-    for (uint64_t i = 0; i < NumElements / 2; ++i) {
-
-      // Obtain low and high words of inputs A and B
-      llvm::Value *LowA = Builder.CreateExtractElement(OpA, 2 * i + 0);
-      llvm::Value *HighA = Builder.CreateExtractElement(OpA, 2 * i + 1);
-      llvm::Value *LowB = Builder.CreateExtractElement(OpB, 2 * i + 0);
-      llvm::Value *HighB = Builder.CreateExtractElement(OpB, 2 * i + 1);
-
-      // Use an uadd_with_overflow to compute the sum of low words and obtain a
-      // carry value
-      llvm::Value *Carry;
-      llvm::Value *LowSum = EmitOverflowIntrinsic(
-          *this, llvm::Intrinsic::uadd_with_overflow, LowA, LowB, Carry);
-      llvm::Value *ZExtCarry = Builder.CreateZExt(Carry, HighA->getType());
-
-      // Sum the high words and the carry
-      llvm::Value *HighSum = Builder.CreateAdd(HighA, HighB);
-      llvm::Value *HighSumPlusCarry = Builder.CreateAdd(HighSum, ZExtCarry);
-
-      // Insert the low and high word sums into the result vector
-      Result = Builder.CreateInsertElement(Result, LowSum, 2 * i + 0);
-      Result = Builder.CreateInsertElement(Result, HighSumPlusCarry, 2 * i + 1,
+
+    llvm::Value *Result = PoisonValue::get(OpA->getType());
+    llvm::Value *LowA;
+    llvm::Value *HighA;
+    llvm::Value *LowB;
+    llvm::Value *HighB;
+
+    // Obtain low and high words of inputs A and B
+    if (NumElements == 2) {
+      LowA = Builder.CreateExtractElement(OpA, (uint64_t)0, "LowA");
+      HighA = Builder.CreateExtractElement(OpA, (uint64_t)1, "HighA");
+      LowB = Builder.CreateExtractElement(OpB, (uint64_t)0, "LowB");
+      HighB = Builder.CreateExtractElement(OpB, (uint64_t)1, "HighB");
+    } else {
+      LowA = Builder.CreateShuffleVector(OpA, ArrayRef<int>{0, 2}, "LowA");
+      HighA = Builder.CreateShuffleVector(OpA, ArrayRef<int>{1, 3}, "HighA");
+      LowB = Builder.CreateShuffleVector(OpB, ArrayRef<int>{0, 2}, "LowB");
+      HighB = Builder.CreateShuffleVector(OpB, ArrayRef<int>{1, 3}, "HighB");
+    }
+
+    // Use an uadd_with_overflow to compute the sum of low words and obtain a
+    // carry value
+    llvm::Value *Carry;
+    llvm::Value *LowSum = EmitOverflowIntrinsic(
+        *this, llvm::Intrinsic::uadd_with_overflow, LowA, LowB, Carry);
+    llvm::Value *ZExtCarry =
+        Builder.CreateZExt(Carry, HighA->getType(), "CarryZExt");
+
+    // Sum the high words and the carry
+    llvm::Value *HighSum = Builder.CreateAdd(HighA, HighB, "HighSum");
+    llvm::Value *HighSumPlusCarry =
+        Builder.CreateAdd(HighSum, ZExtCarry, "HighSumPlusCarry");
+
+    // Insert the low and high word sums into the result vector
+    if (NumElements == 2) {
+      Result = Builder.CreateInsertElement(Result, LowSum, (uint64_t)0,
+                                           "hlsl.AddUint64.upto0");
+      Result = Builder.CreateInsertElement(Result, HighSumPlusCarry,
+                                           (uint64_t)1, "hlsl.AddUint64");
+    } else { /* NumElements == 4 */
+      Result = Builder.CreateShuffleVector(LowSum, HighSumPlusCarry,
+                                           ArrayRef<int>{0, 2, 1, 3},
                                            "hlsl.AddUint64");
     }
     return Result;
diff --git a/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl 
b/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl
index 4141aef69323d..e1832bdbbf33f 100644
--- a/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl
@@ -11,20 +11,20 @@
 // CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <2 x i32>, align 8
 // CHECK-NEXT:    store <2 x i32> [[A]], ptr [[A_ADDR]], align 8
 // CHECK-NEXT:    store <2 x i32> [[B]], ptr [[B_ADDR]], align 8
-// CHECK-NEXT:    [[A_LOAD:%.*]] = load <2 x i32>, ptr [[A_ADDR]], align 8
-// CHECK-NEXT:    [[B_LOAD:%.*]] = load <2 x i32>, ptr [[B_ADDR]], align 8
-// CHECK-NEXT:    [[LowA:%.*]] = extractelement <2 x i32> [[A_LOAD]], i64 0
-// CHECK-NEXT:    [[HighA:%.*]] = extractelement <2 x i32> [[A_LOAD]], i64 1
-// CHECK-NEXT:    [[LowB:%.*]] = extractelement <2 x i32> [[B_LOAD]], i64 0
-// CHECK-NEXT:    [[HighB:%.*]] = extractelement <2 x i32> [[B_LOAD]], i64 1
-// CHECK-NEXT:    [[UAddc:%.*]] = call { i32, i1 } 
@llvm.uadd.with.overflow.i32(i32 [[LowA]], i32 [[LowB]])
-// CHECK-NEXT:    [[Carry:%.*]] = extractvalue { i32, i1 } [[UAddc]], 1
-// CHECK-NEXT:    [[LowSum:%.*]] = extractvalue { i32, i1 } [[UAddc]], 0
-// CHECK-NEXT:    [[CarryZExt:%.*]] = zext i1 [[Carry]] to i32
-// CHECK-NEXT:    [[HighSum:%.*]] = add i32 [[HighA]], [[HighB]]
-// CHECK-NEXT:    [[HighSumPlusCarry:%.*]] = add i32 [[HighSum]], [[CarryZExt]]
-// CHECK-NEXT:    [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <2 x i32> 
poison, i32 [[LowSum]], i64 0
-// CHECK-NEXT:    [[HLSL_ADDUINT64:%.*]] = insertelement <2 x i32> 
[[HLSL_ADDUINT64_UPTO0]], i32 [[HighSumPlusCarry]], i64 1
+// CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[A_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[B_ADDR]], align 8
+// CHECK-NEXT:    [[LOWA:%.*]] = extractelement <2 x i32> [[TMP0]], i64 0
+// CHECK-NEXT:    [[HIGHA:%.*]] = extractelement <2 x i32> [[TMP0]], i64 1
+// CHECK-NEXT:    [[LOWB:%.*]] = extractelement <2 x i32> [[TMP1]], i64 0
+// CHECK-NEXT:    [[HIGHB:%.*]] = extractelement <2 x i32> [[TMP1]], i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = call { i32, i1 } 
@llvm.uadd.with.overflow.i32(i32 [[LOWA]], i32 [[LOWB]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0
+// CHECK-NEXT:    [[CARRYZEXT:%.*]] = zext i1 [[TMP3]] to i32
+// CHECK-NEXT:    [[HIGHSUM:%.*]] = add i32 [[HIGHA]], [[HIGHB]]
+// CHECK-NEXT:    [[HIGHSUMPLUSCARRY:%.*]] = add i32 [[HIGHSUM]], [[CARRYZEXT]]
+// CHECK-NEXT:    [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <2 x i32> 
poison, i32 [[TMP4]], i64 0
+// CHECK-NEXT:    [[HLSL_ADDUINT64:%.*]] = insertelement <2 x i32> 
[[HLSL_ADDUINT64_UPTO0]], i32 [[HIGHSUMPLUSCARRY]], i64 1
 // CHECK-NEXT:    ret <2 x i32> [[HLSL_ADDUINT64]]
 //
 uint2 test_AddUint64_uint2(uint2 a, uint2 b) {
@@ -38,32 +38,19 @@ uint2 test_AddUint64_uint2(uint2 a, uint2 b) {
 // CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <4 x i32>, align 16
 // CHECK-NEXT:    store <4 x i32> [[A]], ptr [[A_ADDR]], align 16
 // CHECK-NEXT:    store <4 x i32> [[B]], ptr [[B_ADDR]], align 16
-// CHECK-NEXT:    [[A_LOAD:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
-// CHECK-NEXT:    [[B_LOAD:%.*]] = load <4 x i32>, ptr [[B_ADDR]], align 16
-// CHECK-NEXT:    [[LowA:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 0
-// CHECK-NEXT:    [[HighA:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 1
-// CHECK-NEXT:    [[LowB:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 0
-// CHECK-NEXT:    [[HighB:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 1
-// CHECK-NEXT:    [[UAddc:%.*]] = call { i32, i1 } 
@llvm.uadd.with.overflow.i32(i32 [[LowA]], i32 [[LowB]])
-// CHECK-NEXT:    [[Carry:%.*]] = extractvalue { i32, i1 } [[UAddc]], 1
-// CHECK-NEXT:    [[LowSum:%.*]] = extractvalue { i32, i1 } [[UAddc]], 0
-// CHECK-NEXT:    [[CarryZExt:%.*]] = zext i1 [[Carry]] to i32
-// CHECK-NEXT:    [[HighSum:%.*]] = add i32 [[HighA]], [[HighB]]
-// CHECK-NEXT:    [[HighSumPlusCarry:%.*]] = add i32 [[HighSum]], [[CarryZExt]]
-// CHECK-NEXT:    [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <4 x i32> 
poison, i32 [[LowSum]], i64 0
-// CHECK-NEXT:    [[HLSL_ADDUINT64_UPTO1:%.*]] = insertelement <4 x i32> 
[[HLSL_ADDUINT64_UPTO0]], i32 [[HighSumPlusCarry]], i64 1
-// CHECK-NEXT:    [[LowA1:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 2
-// CHECK-NEXT:    [[HighA1:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 3
-// CHECK-NEXT:    [[LowB1:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 2
-// CHECK-NEXT:    [[HighB1:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 3
-// CHECK-NEXT:    [[UAddc1:%.*]] = call { i32, i1 } 
@llvm.uadd.with.overflow.i32(i32 [[LowA1]], i32 [[LowB1]])
-// CHECK-NEXT:    [[Carry1:%.*]] = extractvalue { i32, i1 } [[UAddc1]], 1
-// CHECK-NEXT:    [[LowSum1:%.*]] = extractvalue { i32, i1 } [[UAddc1]], 0
-// CHECK-NEXT:    [[CarryZExt1:%.*]] = zext i1 [[Carry1]] to i32
-// CHECK-NEXT:    [[HighSum1:%.*]] = add i32 [[HighA1]], [[HighB1]]
-// CHECK-NEXT:    [[HighSumPlusCarry1:%.*]] = add i32 [[HighSum1]], 
[[CarryZExt1]]
-// CHECK-NEXT:    [[HLSL_ADDUINT64_UPTO2:%.*]] = insertelement <4 x i32> 
[[HLSL_ADDUINT64_UPTO1]], i32 [[LowSum1]], i64 2
-// CHECK-NEXT:    [[HLSL_ADDUINT64:%.*]] = insertelement <4 x i32> 
[[HLSL_ADDUINT64_UPTO2]], i32 [[HighSumPlusCarry1]], i64 3
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[LOWA:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> 
poison, <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT:    [[HIGHA:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> 
poison, <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT:    [[LOWB:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> 
poison, <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT:    [[HIGHB:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> 
poison, <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT:    [[TMP2:%.*]] = call { <2 x i32>, <2 x i1> } 
@llvm.uadd.with.overflow.v2i32(<2 x i32> [[LOWA]], <2 x i32> [[LOWB]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <2 x i32>, <2 x i1> } 
[[TMP2]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i1> } 
[[TMP2]], 0
+// CHECK-NEXT:    [[CARRYZEXT:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i32>
+// CHECK-NEXT:    [[HIGHSUM:%.*]] = add <2 x i32> [[HIGHA]], [[HIGHB]]
+// CHECK-NEXT:    [[HIGHSUMPLUSCARRY:%.*]] = add <2 x i32> [[HIGHSUM]], 
[[CARRYZEXT]]
+// CHECK-NEXT:    [[HLSL_ADDUINT64:%.*]] = shufflevector <2 x i32> [[TMP4]], 
<2 x i32> [[HIGHSUMPLUSCARRY]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
 // CHECK-NEXT:    ret <4 x i32> [[HLSL_ADDUINT64]]
 //
 uint4 test_AddUint64_uint4(uint4 a, uint4 b) {
diff --git a/llvm/test/CodeGen/DirectX/UAddc.ll 
b/llvm/test/CodeGen/DirectX/UAddc.ll
index abafa40bf2306..c16a3f6a5b5fe 100644
--- a/llvm/test/CodeGen/DirectX/UAddc.ll
+++ b/llvm/test/CodeGen/DirectX/UAddc.ll
@@ -1,17 +1,18 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | 
FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --version 5
+; RUN: opt -S -scalarizer -dxil-op-lower 
-mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
 
 ; CHECK: %dx.types.i32c = type { i32, i1 }
 
 define noundef i32 @test_UAddc(i32 noundef %a, i32 noundef %b) {
 ; CHECK-LABEL: define noundef i32 @test_UAddc(
 ; CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) {
-; CHECK-NEXT:    [[UAddc:%.*]] = call %dx.types.i32c 
@dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A]], i32 [[B]])
-; CHECK-NEXT:    [[Carry:%.*]] = extractvalue %dx.types.i32c [[UAddc]], 1
-; CHECK-NEXT:    [[Sum:%.*]] = extractvalue %dx.types.i32c [[UAddc]], 0
-; CHECK-NEXT:    [[CarryZExt:%.*]] = zext i1 [[Carry]] to i32
-; CHECK-NEXT:    [[Result:%.*]] = add i32 [[Sum]], [[CarryZExt]]
-; CHECK-NEXT:    ret i32 [[Result]]
-; 
+; CHECK-NEXT:    [[UADDC1:%.*]] = call [[DX_TYPES_I32C:%.*]] 
@[[DX_OP_BINARYWITHCARRYORBORROW_I32:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32
 44, i32 [[A]], i32 [[B]]) #[[ATTR0:[0-9]+]]
+; CHECK-NEXT:    [[CARRY:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC1]], 1
+; CHECK-NEXT:    [[SUM:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC1]], 0
+; CHECK-NEXT:    [[CARRY_ZEXT:%.*]] = zext i1 [[CARRY]] to i32
+; CHECK-NEXT:    [[RESULT:%.*]] = add i32 [[SUM]], [[CARRY_ZEXT]]
+; CHECK-NEXT:    ret i32 [[RESULT]]
+;
   %uaddc = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
   %carry = extractvalue { i32, i1 } %uaddc, 1
   %sum = extractvalue { i32, i1 } %uaddc, 0
@@ -20,15 +21,47 @@ define noundef i32 @test_UAddc(i32 noundef %a, i32 noundef 
%b) {
   ret i32 %result
 }
 
+define noundef <2 x i32> @test_UAddc_vec2(<2 x i32> noundef %a, <2 x i32> 
noundef %b) {
+; CHECK-LABEL: define noundef <2 x i32> @test_UAddc_vec2(
+; CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) {
+; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <2 x i32> [[A]], i64 0
+; CHECK-NEXT:    [[B_I0:%.*]] = extractelement <2 x i32> [[B]], i64 0
+; CHECK-NEXT:    [[UADDC_I09:%.*]] = call [[DX_TYPES_I32C:%.*]] 
@[[DX_OP_BINARYWITHCARRYORBORROW_I32]](i32 44, i32 [[A_I0]], i32 [[B_I0]]) 
#[[ATTR0]]
+; CHECK-NEXT:    [[A_I1:%.*]] = extractelement <2 x i32> [[A]], i64 1
+; CHECK-NEXT:    [[B_I1:%.*]] = extractelement <2 x i32> [[B]], i64 1
+; CHECK-NEXT:    [[UADDC_I18:%.*]] = call [[DX_TYPES_I32C]] 
@[[DX_OP_BINARYWITHCARRYORBORROW_I32]](i32 44, i32 [[A_I1]], i32 [[B_I1]]) 
#[[ATTR0]]
+; CHECK-NEXT:    [[CARRY_ELEM1:%.*]] = extractvalue [[DX_TYPES_I32C]] 
[[UADDC_I09]], 1
+; CHECK-NEXT:    [[CARRY_ELEM11:%.*]] = extractvalue [[DX_TYPES_I32C]] 
[[UADDC_I18]], 1
+; CHECK-NEXT:    [[CARRY_UPTO0:%.*]] = insertelement <2 x i1> poison, i1 
[[CARRY_ELEM1]], i64 0
+; CHECK-NEXT:    [[CARRY:%.*]] = insertelement <2 x i1> [[CARRY_UPTO0]], i1 
[[CARRY_ELEM11]], i64 1
+; CHECK-NEXT:    [[CARRY_I0:%.*]] = extractelement <2 x i1> [[CARRY]], i64 0
+; CHECK-NEXT:    [[CARRY_I1:%.*]] = extractelement <2 x i1> [[CARRY]], i64 1
+; CHECK-NEXT:    [[SUM_ELEM0:%.*]] = extractvalue [[DX_TYPES_I32C]] 
[[UADDC_I09]], 0
+; CHECK-NEXT:    [[SUM_ELEM02:%.*]] = extractvalue [[DX_TYPES_I32C]] 
[[UADDC_I18]], 0
+; CHECK-NEXT:    [[CARRY_ZEXT_I0:%.*]] = zext i1 [[CARRY_I0]] to i32
+; CHECK-NEXT:    [[CARRY_ZEXT_I1:%.*]] = zext i1 [[CARRY_I1]] to i32
+; CHECK-NEXT:    [[RESULT_I0:%.*]] = add i32 [[SUM_ELEM0]], [[CARRY_ZEXT_I0]]
+; CHECK-NEXT:    [[RESULT_I1:%.*]] = add i32 [[SUM_ELEM02]], [[CARRY_ZEXT_I1]]
+; CHECK-NEXT:    [[RESULT_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 
[[RESULT_I0]], i64 0
+; CHECK-NEXT:    [[RESULT:%.*]] = insertelement <2 x i32> [[RESULT_UPTO0]], 
i32 [[RESULT_I1]], i64 1
+; CHECK-NEXT:    ret <2 x i32> [[RESULT]]
+;
+  %uaddc = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x 
i32> %a, <2 x i32> %b)
+  %carry = extractvalue { <2 x i32>, <2 x i1> } %uaddc, 1
+  %sum = extractvalue { <2 x i32>, <2 x i1> } %uaddc, 0
+  %carry_zext = zext <2 x i1> %carry to <2 x i32>
+  %result = add <2 x i32> %sum, %carry_zext
+  ret <2 x i32> %result
+}
 
 define noundef i32 @test_UAddc_insert(i32 noundef %a, i32 noundef %b) {
 ; CHECK-LABEL: define noundef i32 @test_UAddc_insert(
 ; CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) {
-; CHECK-NEXT:    [[UAddc:%.*]] = call %dx.types.i32c 
@dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A]], i32 [[B]])
-; CHECK-NEXT:    insertvalue %dx.types.i32c [[UAddc]], i32 [[A]], 0
-; CHECK-NEXT:    [[Result:%.*]] = extractvalue %dx.types.i32c [[UAddc]], 0
-; CHECK-NEXT:    ret i32 [[Result]]
-; 
+; CHECK-NEXT:    [[UADDC1:%.*]] = call [[DX_TYPES_I32C:%.*]] 
@[[DX_OP_BINARYWITHCARRYORBORROW_I32]](i32 44, i32 [[A]], i32 [[B]]) #[[ATTR0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[DX_TYPES_I32C]] [[UADDC1]], i32 
[[A]], 0
+; CHECK-NEXT:    [[RESULT:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC1]], 0
+; CHECK-NEXT:    ret i32 [[RESULT]]
+;
   %uaddc = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
   insertvalue { i32, i1 } %uaddc, i32 %a, 0
   %result = extractvalue { i32, i1 } %uaddc, 0
@@ -36,5 +69,4 @@ define noundef i32 @test_UAddc_insert(i32 noundef %a, i32 
noundef %b) {
 }
 
 declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32)
-; CHECK: declare %dx.types.i32c @dx.op.binaryWithCarryOrBorrow.i32(i32, i32, 
i32)
 

>From 7714dc5686059f827d53b83d9472f02934aa88de Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.de...@gmail.com>
Date: Fri, 14 Feb 2025 22:31:14 +0000
Subject: [PATCH 3/3] Report name of function called in source

---
 clang/lib/Sema/SemaHLSL.cpp                        | 2 +-
 clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 99eb5360ec356..5773329101f27 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -2257,7 +2257,7 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned 
BuiltinID, CallExpr *TheCall) {
     auto *VTy = TheCall->getArg(0)->getType()->getAs<VectorType>();
     if (!VTy) {
       SemaRef.Diag(TheCall->getBeginLoc(), diag::err_vec_builtin_non_vector)
-          << "AddUint64" << /*all*/ 1;
+          << TheCall->getDirectCallee() << /*all*/ 1;
       return true;
     }
 
diff --git a/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl 
b/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl
index ec9d026bb6fe7..9dd2aa1457f8f 100644
--- a/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl
@@ -22,7 +22,7 @@ uint2 test_bad_num_arg_elements(uint3 a, uint3 b) {
 
 uint2 test_scalar_arg_type(uint a) {
   return __builtin_hlsl_adduint64(a, a);
-  // expected-error@-1 {{all arguments to AddUint64 must be vectors}}
+  // expected-error@-1 {{all arguments to '__builtin_hlsl_adduint64' must be 
vectors}}
 }
 
 uint2 test_signed_integer_args(int2 a, int2 b) {

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [HLSL] [DXIL] Implement the AddUint64 HLSL function and the UAddc DXIL op (PR #127137)

Reply via email to