https://github.com/frasercrmck updated 
https://github.com/llvm/llvm-project/pull/119423

>From 3f007d702922db63e128e3c0f72dff2f600e0879 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fra...@codeplay.com>
Date: Tue, 10 Dec 2024 17:41:07 +0000
Subject: [PATCH 1/6] [clang] Fix sub-integer
 __builtin_elementwise_(add|sub)_sat

These builtins would unconditionally perform the usual arithmetic
conversions on promotable scalar integer arguments. This meant in
practice that char and short arguments were promoted to int, and the
operation was truncated back down afterwards. This in effect silently
replaced a saturating add/sub with a regular add/sub, which is not
intuitive (or intended) behaviour.

With this patch, promotable scalar integer types are not promoted to
int, but are kept intact. If the types differ, the smaller integer is
promoted to the larger one. The signedness of the operation matches the
larger integer type.

No change is made to vector types, which are both not promoted and whose
element types must match.
---
 clang/lib/Sema/SemaChecking.cpp               | 38 +++++++
 .../test/CodeGen/builtins-elementwise-math.c  | 98 ++++++++++++++++++-
 2 files changed, 134 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index ce846ae88c38b4..9d986a22945f78 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2765,6 +2765,44 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, 
unsigned BuiltinID,
   // types only.
   case Builtin::BI__builtin_elementwise_add_sat:
   case Builtin::BI__builtin_elementwise_sub_sat: {
+    if (checkArgCount(TheCall, 2))
+      return ExprError();
+    ExprResult LHS = TheCall->getArg(0);
+    ExprResult RHS = TheCall->getArg(1);
+    QualType LHSType = LHS.get()->getType().getUnqualifiedType();
+    QualType RHSType = RHS.get()->getType().getUnqualifiedType();
+    // If both LHS/RHS are promotable integer types, do not perform the usual
+    // conversions - we must keep the saturating operation at the correct
+    // bitwidth.
+    if (Context.isPromotableIntegerType(LHSType) &&
+        Context.isPromotableIntegerType(RHSType)) {
+      // First, convert each argument to an r-value.
+      ExprResult ResLHS = DefaultFunctionArrayLvalueConversion(LHS.get());
+      if (ResLHS.isInvalid())
+        return ExprError();
+      LHS = ResLHS.get();
+
+      ExprResult ResRHS = DefaultFunctionArrayLvalueConversion(RHS.get());
+      if (ResRHS.isInvalid())
+        return ExprError();
+      RHS = ResRHS.get();
+
+      LHSType = LHS.get()->getType().getUnqualifiedType();
+      RHSType = RHS.get()->getType().getUnqualifiedType();
+
+      // If the two integer types are not of equal order, cast the smaller
+      // integer one to the larger one
+      if (int Order = Context.getIntegerTypeOrder(LHSType, RHSType); Order == 
1)
+        RHS = ImpCastExprToType(RHS.get(), LHSType, CK_IntegralCast);
+      else if (Order == -1)
+        LHS = ImpCastExprToType(LHS.get(), RHSType, CK_IntegralCast);
+
+      TheCall->setArg(0, LHS.get());
+      TheCall->setArg(1, RHS.get());
+      TheCall->setType(LHS.get()->getType().getUnqualifiedType());
+      break;
+    }
+
     if (BuiltinElementwiseMath(TheCall))
       return ExprError();
 
diff --git a/clang/test/CodeGen/builtins-elementwise-math.c 
b/clang/test/CodeGen/builtins-elementwise-math.c
index 7f6b5f26eb9307..4ac6fe18c4d5a3 100644
--- a/clang/test/CodeGen/builtins-elementwise-math.c
+++ b/clang/test/CodeGen/builtins-elementwise-math.c
@@ -68,7 +68,10 @@ void test_builtin_elementwise_add_sat(float f1, float f2, 
double d1, double d2,
                                       long long int i2, si8 vi1, si8 vi2,
                                       unsigned u1, unsigned u2, u4 vu1, u4 vu2,
                                       _BitInt(31) bi1, _BitInt(31) bi2,
-                                      unsigned _BitInt(55) bu1, unsigned 
_BitInt(55) bu2) {
+                                      unsigned _BitInt(55) bu1, unsigned 
_BitInt(55) bu2,
+                                      char c1, char c2, unsigned char uc1,
+                                      unsigned char uc2, short s1, short s2,
+                                      unsigned short us1, unsigned short us2) {
   // CHECK:      [[I1:%.+]] = load i64, ptr %i1.addr, align 8
   // CHECK-NEXT: [[I2:%.+]] = load i64, ptr %i2.addr, align 8
   // CHECK-NEXT: call i64 @llvm.sadd.sat.i64(i64 [[I1]], i64 [[I2]])
@@ -114,6 +117,50 @@ void test_builtin_elementwise_add_sat(float f1, float f2, 
double d1, double d2,
 
   // CHECK: store i64 98, ptr %i1.addr, align 8
   i1 = __builtin_elementwise_add_sat(1, 'a');
+
+  // CHECK:      [[C1:%.+]] = load i8, ptr %c1.addr, align 1
+  // CHECK-NEXT: [[C2:%.+]] = load i8, ptr %c2.addr, align 1
+  // CHECK-NEXT: call i8 @llvm.sadd.sat.i8(i8 [[C1]], i8 [[C2]])
+  c1 = __builtin_elementwise_add_sat(c1, c2);
+
+  // CHECK:      [[UC1:%.+]] = load i8, ptr %uc1.addr, align 1
+  // CHECK-NEXT: [[UC2:%.+]] = load i8, ptr %uc2.addr, align 1
+  // CHECK-NEXT: call i8 @llvm.uadd.sat.i8(i8 [[UC1]], i8 [[UC2]])
+  uc1 = __builtin_elementwise_add_sat(uc1, uc2);
+
+  // CHECK:      [[S1:%.+]] = load i16, ptr %s1.addr, align 2
+  // CHECK-NEXT: [[S2:%.+]] = load i16, ptr %s2.addr, align 2
+  // CHECK-NEXT: call i16 @llvm.sadd.sat.i16(i16 [[S1]], i16 [[S2]])
+  s1 = __builtin_elementwise_add_sat(s1, s2);
+
+  // CHECK:      [[US1:%.+]] = load i16, ptr %us1.addr, align 2
+  // CHECK-NEXT: [[US2:%.+]] = load i16, ptr %us2.addr, align 2
+  // CHECK-NEXT: call i16 @llvm.uadd.sat.i16(i16 [[US1]], i16 [[US2]])
+  us1 = __builtin_elementwise_add_sat(us1, us2);
+
+  // CHECK:      [[C1:%.+]] = load i8, ptr %c1.addr, align 1
+  // CHECK-NEXT: [[C1EXT:%.+]] = sext i8 [[C1]] to i16
+  // CHECK-NEXT: [[S1:%.+]] = load i16, ptr %s1.addr, align 2
+  // CHECK-NEXT: call i16 @llvm.sadd.sat.i16(i16 [[C1EXT]], i16 [[S1]])
+  s1 = __builtin_elementwise_add_sat(c1, s1);
+
+  // CHECK:      [[S1:%.+]] = load i16, ptr %s1.addr, align 2
+  // CHECK-NEXT: [[C1:%.+]] = load i8, ptr %c1.addr, align 1
+  // CHECK-NEXT: [[C1EXT:%.+]] = sext i8 [[C1]] to i16
+  // CHECK-NEXT: call i16 @llvm.sadd.sat.i16(i16 [[S1]], i16 [[C1EXT]])
+  s1 = __builtin_elementwise_add_sat(s1, c1);
+
+  // CHECK:      [[UC1:%.+]] = load i8, ptr %uc1.addr, align 1
+  // CHECK-NEXT: [[UC1EXT:%.+]] = zext i8 [[UC1]] to i16
+  // CHECK-NEXT: [[S1:%.+]] = load i16, ptr %s1.addr, align 2
+  // CHECK-NEXT: call i16 @llvm.sadd.sat.i16(i16 [[UC1EXT]], i16 [[S1]])
+  s1 = __builtin_elementwise_add_sat(uc1, s1);
+
+  // CHECK:      [[US1:%.+]] = load i16, ptr %us1.addr, align 2
+  // CHECK-NEXT: [[C1:%.+]] = load i8, ptr %c1.addr, align 1
+  // CHECK-NEXT: [[C1EXT:%.+]] = sext i8 [[C1]] to i16
+  // CHECK-NEXT: call i16 @llvm.uadd.sat.i16(i16 [[US1]], i16 [[C1EXT]])
+  us1 = __builtin_elementwise_add_sat(us1, c1);
 }
 
 void test_builtin_elementwise_sub_sat(float f1, float f2, double d1, double d2,
@@ -121,7 +168,10 @@ void test_builtin_elementwise_sub_sat(float f1, float f2, 
double d1, double d2,
                                       long long int i2, si8 vi1, si8 vi2,
                                       unsigned u1, unsigned u2, u4 vu1, u4 vu2,
                                       _BitInt(31) bi1, _BitInt(31) bi2,
-                                      unsigned _BitInt(55) bu1, unsigned 
_BitInt(55) bu2) {
+                                      unsigned _BitInt(55) bu1, unsigned 
_BitInt(55) bu2,
+                                      char c1, char c2, unsigned char uc1,
+                                      unsigned char uc2, short s1, short s2,
+                                      unsigned short us1, unsigned short us2) {
   // CHECK:      [[I1:%.+]] = load i64, ptr %i1.addr, align 8
   // CHECK-NEXT: [[I2:%.+]] = load i64, ptr %i2.addr, align 8
   // CHECK-NEXT: call i64 @llvm.ssub.sat.i64(i64 [[I1]], i64 [[I2]])
@@ -167,6 +217,50 @@ void test_builtin_elementwise_sub_sat(float f1, float f2, 
double d1, double d2,
 
   // CHECK: store i64 -96, ptr %i1.addr, align 8
   i1 = __builtin_elementwise_sub_sat(1, 'a');
+
+  // CHECK:      [[C1:%.+]] = load i8, ptr %c1.addr, align 1
+  // CHECK-NEXT: [[C2:%.+]] = load i8, ptr %c2.addr, align 1
+  // CHECK-NEXT: call i8 @llvm.ssub.sat.i8(i8 [[C1]], i8 [[C2]])
+  c1 = __builtin_elementwise_sub_sat(c1, c2);
+
+  // CHECK:      [[UC1:%.+]] = load i8, ptr %uc1.addr, align 1
+  // CHECK-NEXT: [[UC2:%.+]] = load i8, ptr %uc2.addr, align 1
+  // CHECK-NEXT: call i8 @llvm.usub.sat.i8(i8 [[UC1]], i8 [[UC2]])
+  uc1 = __builtin_elementwise_sub_sat(uc1, uc2);
+
+  // CHECK:      [[S1:%.+]] = load i16, ptr %s1.addr, align 2
+  // CHECK-NEXT: [[S2:%.+]] = load i16, ptr %s2.addr, align 2
+  // CHECK-NEXT: call i16 @llvm.ssub.sat.i16(i16 [[S1]], i16 [[S2]])
+  s1 = __builtin_elementwise_sub_sat(s1, s2);
+
+  // CHECK:      [[US1:%.+]] = load i16, ptr %us1.addr, align 2
+  // CHECK-NEXT: [[US2:%.+]] = load i16, ptr %us2.addr, align 2
+  // CHECK-NEXT: call i16 @llvm.usub.sat.i16(i16 [[US1]], i16 [[US2]])
+  us1 = __builtin_elementwise_sub_sat(us1, us2);
+
+  // CHECK:      [[C1:%.+]] = load i8, ptr %c1.addr, align 1
+  // CHECK-NEXT: [[C1EXT:%.+]] = sext i8 [[C1]] to i16
+  // CHECK-NEXT: [[S1:%.+]] = load i16, ptr %s1.addr, align 2
+  // CHECK-NEXT: call i16 @llvm.ssub.sat.i16(i16 [[C1EXT]], i16 [[S1]])
+  s1 = __builtin_elementwise_sub_sat(c1, s1);
+
+  // CHECK:      [[S1:%.+]] = load i16, ptr %s1.addr, align 2
+  // CHECK-NEXT: [[C1:%.+]] = load i8, ptr %c1.addr, align 1
+  // CHECK-NEXT: [[C1EXT:%.+]] = sext i8 [[C1]] to i16
+  // CHECK-NEXT: call i16 @llvm.ssub.sat.i16(i16 [[S1]], i16 [[C1EXT]])
+  s1 = __builtin_elementwise_sub_sat(s1, c1);
+
+  // CHECK:      [[UC1:%.+]] = load i8, ptr %uc1.addr, align 1
+  // CHECK-NEXT: [[UC1EXT:%.+]] = zext i8 [[UC1]] to i16
+  // CHECK-NEXT: [[S1:%.+]] = load i16, ptr %s1.addr, align 2
+  // CHECK-NEXT: call i16 @llvm.ssub.sat.i16(i16 [[UC1EXT]], i16 [[S1]])
+  s1 = __builtin_elementwise_sub_sat(uc1, s1);
+
+  // CHECK:      [[US1:%.+]] = load i16, ptr %us1.addr, align 2
+  // CHECK-NEXT: [[C1:%.+]] = load i8, ptr %c1.addr, align 1
+  // CHECK-NEXT: [[C1EXT:%.+]] = sext i8 [[C1]] to i16
+  // CHECK-NEXT: call i16 @llvm.usub.sat.i16(i16 [[US1]], i16 [[C1EXT]])
+  us1 = __builtin_elementwise_sub_sat(us1, c1);
 }
 
 void test_builtin_elementwise_maximum(float f1, float f2, double d1, double d2,

>From 4d39f93bedd6c25de17a1a168c727f5383701740 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fra...@codeplay.com>
Date: Wed, 11 Dec 2024 16:24:53 +0000
Subject: [PATCH 2/6] fix popcount/bitreverse

---
 clang/lib/Sema/SemaChecking.cpp               | 21 ++++++++-
 .../test/CodeGen/builtins-elementwise-math.c  | 46 +++++++++++++++----
 2 files changed, 57 insertions(+), 10 deletions(-)

diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 9d986a22945f78..efb3aab8e39f46 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2828,11 +2828,28 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, 
unsigned BuiltinID,
     break;
   case Builtin::BI__builtin_elementwise_popcount:
   case Builtin::BI__builtin_elementwise_bitreverse: {
-    if (PrepareBuiltinElementwiseMathOneArgCall(TheCall))
+    if (checkArgCount(TheCall, 1))
       return ExprError();
 
-    const Expr *Arg = TheCall->getArg(0);
+    Expr *Arg = TheCall->getArg(0);
     QualType ArgTy = Arg->getType();
+
+    // If the argument is a promotable integer type, do not perform the usual
+    // conversions - we must keep the operation at the correct bitwidth.
+    if (Context.isPromotableIntegerType(ArgTy)) {
+      // Convert the argument to an r-value - avoid the usual conversions.
+      ExprResult ResLHS = DefaultFunctionArrayLvalueConversion(Arg);
+      if (ResLHS.isInvalid())
+        return ExprError();
+      Arg = ResLHS.get();
+      TheCall->setArg(0, Arg);
+      TheCall->setType(Arg->getType());
+      break;
+    }
+
+    if (PrepareBuiltinElementwiseMathOneArgCall(TheCall))
+      return ExprError();
+
     QualType EltTy = ArgTy;
 
     if (auto *VecTy = EltTy->getAs<VectorType>())
diff --git a/clang/test/CodeGen/builtins-elementwise-math.c 
b/clang/test/CodeGen/builtins-elementwise-math.c
index 4ac6fe18c4d5a3..872ab4b89e5708 100644
--- a/clang/test/CodeGen/builtins-elementwise-math.c
+++ b/clang/test/CodeGen/builtins-elementwise-math.c
@@ -512,7 +512,8 @@ void test_builtin_elementwise_min(float f1, float f2, 
double d1, double d2,
 
 void test_builtin_elementwise_bitreverse(si8 vi1, si8 vi2,
                                   long long int i1, long long int i2, short si,
-                                  _BitInt(31) bi1, _BitInt(31) bi2) {
+                                  _BitInt(31) bi1, _BitInt(31) bi2,
+                                  char ci) {
   
 
   // CHECK:      [[I1:%.+]] = load i64, ptr %i1.addr, align 8
@@ -541,10 +542,24 @@ void test_builtin_elementwise_bitreverse(si8 vi1, si8 vi2,
   b = __builtin_elementwise_bitreverse(-10);
 
   // CHECK:      [[SI:%.+]] = load i16, ptr %si.addr, align 2
-  // CHECK-NEXT: [[SI_EXT:%.+]] = sext i16 [[SI]] to i32
-  // CHECK-NEXT: [[RES:%.+]] = call i32 @llvm.bitreverse.i32(i32 [[SI_EXT]])
-  // CHECK-NEXT: = trunc i32 [[RES]] to i16
+  // CHECK-NEXT: [[RES:%.+]] = call i16 @llvm.bitreverse.i16(i16 [[SI]])
   si = __builtin_elementwise_bitreverse(si);
+
+  // CHECK:      store i16 28671, ptr %si.addr, align 2
+  si = __builtin_elementwise_bitreverse((short)-10);
+
+  // CHECK:      store i16 28671, ptr %si.addr, align 2
+  si = __builtin_elementwise_bitreverse((unsigned short)-10);
+
+  // CHECK:      [[CI:%.+]] = load i8, ptr %ci.addr, align 1
+  // CHECK-NEXT: [[RES:%.+]] = call i8 @llvm.bitreverse.i8(i8 [[CI]])
+  ci = __builtin_elementwise_bitreverse(ci);
+
+  // CHECK:      store i8 111, ptr %ci.addr, align 1
+  ci = __builtin_elementwise_bitreverse((unsigned char)-10);
+
+  // CHECK:      store i8 111, ptr %ci.addr, align 1
+  ci = __builtin_elementwise_bitreverse((char)-10);
 }
 
 void test_builtin_elementwise_ceil(float f1, float f2, double d1, double d2,
@@ -762,7 +777,8 @@ void test_builtin_elementwise_log2(float f1, float f2, 
double d1, double d2,
 
 void test_builtin_elementwise_popcount(si8 vi1, si8 vi2, long long int i1,
                                        long long int i2, short si,
-                                       _BitInt(31) bi1, _BitInt(31) bi2) {
+                                       _BitInt(31) bi1, _BitInt(31) bi2,
+                                       char ci) {
   // CHECK:      [[I1:%.+]] = load i64, ptr %i1.addr, align 8
   // CHECK-NEXT: call i64 @llvm.ctpop.i64(i64 [[I1]])
   i2 = __builtin_elementwise_popcount(i1);
@@ -789,10 +805,24 @@ void test_builtin_elementwise_popcount(si8 vi1, si8 vi2, 
long long int i1,
   b = __builtin_elementwise_popcount(-10);
 
   // CHECK:      [[SI:%.+]] = load i16, ptr %si.addr, align 2
-  // CHECK-NEXT: [[SI_EXT:%.+]] = sext i16 [[SI]] to i32
-  // CHECK-NEXT: [[RES:%.+]] = call i32 @llvm.ctpop.i32(i32 [[SI_EXT]])
-  // CHECK-NEXT: = trunc i32 [[RES]] to i16
+  // CHECK-NEXT: [[RES:%.+]] = call i16 @llvm.ctpop.i16(i16 [[SI]])
   si = __builtin_elementwise_popcount(si);
+
+  // CHECK:      store i16 3, ptr %si.addr, align 2
+  si = __builtin_elementwise_popcount((unsigned short)32771);
+
+  // CHECK:      store i16 3, ptr %si.addr, align 2
+  si = __builtin_elementwise_popcount((short)32771);
+
+  // CHECK:      [[CI:%.+]] = load i8, ptr %ci.addr, align 1
+  // CHECK-NEXT: [[RES:%.+]] = call i8 @llvm.ctpop.i8(i8 [[CI]])
+  ci = __builtin_elementwise_popcount(ci);
+
+  // CHECK:      store i8 2, ptr %ci.addr, align 1
+  ci = __builtin_elementwise_popcount((unsigned char)192);
+
+  // CHECK:      store i8 2, ptr %ci.addr, align 1
+  ci = __builtin_elementwise_popcount((char)192);
 }
 
 void test_builtin_elementwise_fmod(float f1, float f2, double d1, double d2,

>From 479f38472615b656d6df21aafe6268ef1f153db9 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fra...@codeplay.com>
Date: Wed, 18 Dec 2024 17:21:19 +0000
Subject: [PATCH 3/6] wip: prevent any type mixing

---
 clang/include/clang/Sema/Sema.h               |   8 +-
 clang/lib/Sema/SemaChecking.cpp               | 139 ++++++------------
 clang/lib/Sema/SemaExpr.cpp                   |  41 +++---
 .../test/CodeGen/builtins-elementwise-math.c  |  66 +--------
 .../CodeGenHLSL/builtins/dot-builtin.hlsl     |   6 +-
 .../implicit-norecurse-attrib.hlsl            |   2 +-
 clang/test/Sema/builtins-elementwise-math.c   |  24 ++-
 7 files changed, 103 insertions(+), 183 deletions(-)

diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index af59b7f38c71aa..585770a5e7f74b 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -2323,7 +2323,8 @@ class Sema final : public SemaBase {
                          const FunctionProtoType *Proto);
 
   /// \param FPOnly restricts the arguments to floating-point types.
-  bool BuiltinVectorMath(CallExpr *TheCall, QualType &Res, bool FPOnly = 
false);
+  std::optional<QualType> BuiltinVectorMath(CallExpr *TheCall,
+                                            bool FPOnly = false);
   bool BuiltinVectorToScalarMath(CallExpr *TheCall);
 
   void checkLifetimeCaptureBy(FunctionDecl *FDecl, bool IsMemberFunction,
@@ -7557,6 +7558,11 @@ class Sema final : public SemaBase {
   ExprResult DefaultVariadicArgumentPromotion(Expr *E, VariadicCallType CT,
                                               FunctionDecl *FDecl);
 
+  // Check that the usual arithmetic conversions can be performed on this pair
+  // of expressions that might be of enumeration type.
+  void checkEnumArithmeticConversions(Expr *LHS, Expr *RHS, SourceLocation Loc,
+                                      Sema::ArithConvKind ACK);
+
   // UsualArithmeticConversions - performs the UsualUnaryConversions on it's
   // operands and then handles various conversions that are common to binary
   // operators (C99 6.3.1.8). If both operands aren't arithmetic, this
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index efb3aab8e39f46..7cd8bffb57521e 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2765,44 +2765,6 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, 
unsigned BuiltinID,
   // types only.
   case Builtin::BI__builtin_elementwise_add_sat:
   case Builtin::BI__builtin_elementwise_sub_sat: {
-    if (checkArgCount(TheCall, 2))
-      return ExprError();
-    ExprResult LHS = TheCall->getArg(0);
-    ExprResult RHS = TheCall->getArg(1);
-    QualType LHSType = LHS.get()->getType().getUnqualifiedType();
-    QualType RHSType = RHS.get()->getType().getUnqualifiedType();
-    // If both LHS/RHS are promotable integer types, do not perform the usual
-    // conversions - we must keep the saturating operation at the correct
-    // bitwidth.
-    if (Context.isPromotableIntegerType(LHSType) &&
-        Context.isPromotableIntegerType(RHSType)) {
-      // First, convert each argument to an r-value.
-      ExprResult ResLHS = DefaultFunctionArrayLvalueConversion(LHS.get());
-      if (ResLHS.isInvalid())
-        return ExprError();
-      LHS = ResLHS.get();
-
-      ExprResult ResRHS = DefaultFunctionArrayLvalueConversion(RHS.get());
-      if (ResRHS.isInvalid())
-        return ExprError();
-      RHS = ResRHS.get();
-
-      LHSType = LHS.get()->getType().getUnqualifiedType();
-      RHSType = RHS.get()->getType().getUnqualifiedType();
-
-      // If the two integer types are not of equal order, cast the smaller
-      // integer one to the larger one
-      if (int Order = Context.getIntegerTypeOrder(LHSType, RHSType); Order == 
1)
-        RHS = ImpCastExprToType(RHS.get(), LHSType, CK_IntegralCast);
-      else if (Order == -1)
-        LHS = ImpCastExprToType(LHS.get(), RHSType, CK_IntegralCast);
-
-      TheCall->setArg(0, LHS.get());
-      TheCall->setArg(1, RHS.get());
-      TheCall->setType(LHS.get()->getType().getUnqualifiedType());
-      break;
-    }
-
     if (BuiltinElementwiseMath(TheCall))
       return ExprError();
 
@@ -2828,28 +2790,11 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, 
unsigned BuiltinID,
     break;
   case Builtin::BI__builtin_elementwise_popcount:
   case Builtin::BI__builtin_elementwise_bitreverse: {
-    if (checkArgCount(TheCall, 1))
-      return ExprError();
-
-    Expr *Arg = TheCall->getArg(0);
-    QualType ArgTy = Arg->getType();
-
-    // If the argument is a promotable integer type, do not perform the usual
-    // conversions - we must keep the operation at the correct bitwidth.
-    if (Context.isPromotableIntegerType(ArgTy)) {
-      // Convert the argument to an r-value - avoid the usual conversions.
-      ExprResult ResLHS = DefaultFunctionArrayLvalueConversion(Arg);
-      if (ResLHS.isInvalid())
-        return ExprError();
-      Arg = ResLHS.get();
-      TheCall->setArg(0, Arg);
-      TheCall->setType(Arg->getType());
-      break;
-    }
-
     if (PrepareBuiltinElementwiseMathOneArgCall(TheCall))
       return ExprError();
 
+    const Expr *Arg = TheCall->getArg(0);
+    QualType ArgTy = Arg->getType();
     QualType EltTy = ArgTy;
 
     if (auto *VecTy = EltTy->getAs<VectorType>())
@@ -14649,11 +14594,18 @@ void Sema::CheckAddressOfPackedMember(Expr *rhs) {
                      _2, _3, _4));
 }
 
+static ExprResult UsualUnaryConversionsNoPromoteInt(Sema &S, Expr *E) {
+  // Don't promote integer types
+  if (QualType Ty = E->getType(); 
S.getASTContext().isPromotableIntegerType(Ty))
+    return S.DefaultFunctionArrayLvalueConversion(E);
+  return S.UsualUnaryConversions(E);
+}
+
 bool Sema::PrepareBuiltinElementwiseMathOneArgCall(CallExpr *TheCall) {
   if (checkArgCount(TheCall, 1))
     return true;
 
-  ExprResult A = UsualUnaryConversions(TheCall->getArg(0));
+  ExprResult A = UsualUnaryConversionsNoPromoteInt(*this, TheCall->getArg(0));
   if (A.isInvalid())
     return true;
 
@@ -14668,57 +14620,63 @@ bool 
Sema::PrepareBuiltinElementwiseMathOneArgCall(CallExpr *TheCall) {
 }
 
 bool Sema::BuiltinElementwiseMath(CallExpr *TheCall, bool FPOnly) {
-  QualType Res;
-  if (BuiltinVectorMath(TheCall, Res, FPOnly))
-    return true;
-  TheCall->setType(Res);
-  return false;
+  if (auto Res = BuiltinVectorMath(TheCall, FPOnly); Res.has_value()) {
+    TheCall->setType(*Res);
+    return false;
+  }
+  return true;
 }
 
 bool Sema::BuiltinVectorToScalarMath(CallExpr *TheCall) {
-  QualType Res;
-  if (BuiltinVectorMath(TheCall, Res))
+  std::optional<QualType> Res = BuiltinVectorMath(TheCall);
+  if (!Res)
     return true;
 
-  if (auto *VecTy0 = Res->getAs<VectorType>())
+  if (auto *VecTy0 = (*Res)->getAs<VectorType>())
     TheCall->setType(VecTy0->getElementType());
   else
-    TheCall->setType(Res);
+    TheCall->setType(*Res);
 
   return false;
 }
 
-bool Sema::BuiltinVectorMath(CallExpr *TheCall, QualType &Res, bool FPOnly) {
+std::optional<QualType> Sema::BuiltinVectorMath(CallExpr *TheCall,
+                                                bool FPOnly) {
   if (checkArgCount(TheCall, 2))
-    return true;
+    return std::nullopt;
 
-  ExprResult A = TheCall->getArg(0);
-  ExprResult B = TheCall->getArg(1);
-  // Do standard promotions between the two arguments, returning their common
-  // type.
-  Res = UsualArithmeticConversions(A, B, TheCall->getExprLoc(), 
ACK_Comparison);
-  if (A.isInvalid() || B.isInvalid())
-    return true;
+  checkEnumArithmeticConversions(TheCall->getArg(0), TheCall->getArg(1),
+                                 TheCall->getExprLoc(), ACK_Comparison);
 
-  QualType TyA = A.get()->getType();
-  QualType TyB = B.get()->getType();
+  Expr *Args[2];
+  for (int I = 0; I < 2; ++I) {
+    ExprResult Converted =
+        UsualUnaryConversionsNoPromoteInt(*this, TheCall->getArg(I));
+    if (Converted.isInvalid())
+      return std::nullopt;
+    Args[I] = Converted.get();
+  }
 
-  if (Res.isNull() || TyA.getCanonicalType() != TyB.getCanonicalType())
-    return Diag(A.get()->getBeginLoc(),
-                diag::err_typecheck_call_different_arg_types)
-           << TyA << TyB;
+  SourceLocation LocA = Args[0]->getBeginLoc();
+  QualType TyA = Args[0]->getType();
+  QualType TyB = Args[1]->getType();
+
+  if (TyA.getCanonicalType() != TyB.getCanonicalType()) {
+    Diag(LocA, diag::err_typecheck_call_different_arg_types) << TyA << TyB;
+    return std::nullopt;
+  }
 
   if (FPOnly) {
-    if (checkFPMathBuiltinElementType(*this, A.get()->getBeginLoc(), TyA, 1))
-      return true;
+    if (checkFPMathBuiltinElementType(*this, LocA, TyA, 1))
+      return std::nullopt;
   } else {
-    if (checkMathBuiltinElementType(*this, A.get()->getBeginLoc(), TyA, 1))
-      return true;
+    if (checkMathBuiltinElementType(*this, LocA, TyA, 1))
+      return std::nullopt;
   }
 
-  TheCall->setArg(0, A.get());
-  TheCall->setArg(1, B.get());
-  return false;
+  TheCall->setArg(0, Args[0]);
+  TheCall->setArg(1, Args[1]);
+  return TyA;
 }
 
 bool Sema::BuiltinElementwiseTernaryMath(CallExpr *TheCall,
@@ -14728,7 +14686,8 @@ bool Sema::BuiltinElementwiseTernaryMath(CallExpr 
*TheCall,
 
   Expr *Args[3];
   for (int I = 0; I < 3; ++I) {
-    ExprResult Converted = UsualUnaryConversions(TheCall->getArg(I));
+    ExprResult Converted =
+        UsualUnaryConversionsNoPromoteInt(*this, TheCall->getArg(I));
     if (Converted.isInvalid())
       return true;
     Args[I] = Converted.get();
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 562c98c6babe04..0dbabd1f7b5000 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -1489,9 +1489,9 @@ static QualType handleFixedPointConversion(Sema &S, 
QualType LHSTy,
 
 /// Check that the usual arithmetic conversions can be performed on this pair 
of
 /// expressions that might be of enumeration type.
-static void checkEnumArithmeticConversions(Sema &S, Expr *LHS, Expr *RHS,
-                                           SourceLocation Loc,
-                                           Sema::ArithConvKind ACK) {
+void Sema::checkEnumArithmeticConversions(Expr *LHS, Expr *RHS,
+                                          SourceLocation Loc,
+                                          Sema::ArithConvKind ACK) {
   // C++2a [expr.arith.conv]p1:
   //   If one operand is of enumeration type and the other operand is of a
   //   different enumeration type or a floating-point type, this behavior is
@@ -1499,54 +1499,53 @@ static void checkEnumArithmeticConversions(Sema &S, 
Expr *LHS, Expr *RHS,
   //
   // Warn on this in all language modes. Produce a deprecation warning in 
C++20.
   // Eventually we will presumably reject these cases (in C++23 onwards?).
-  QualType L = LHS->getEnumCoercedType(S.Context),
-           R = RHS->getEnumCoercedType(S.Context);
+  QualType L = LHS->getEnumCoercedType(Context),
+           R = RHS->getEnumCoercedType(Context);
   bool LEnum = L->isUnscopedEnumerationType(),
        REnum = R->isUnscopedEnumerationType();
   bool IsCompAssign = ACK == Sema::ACK_CompAssign;
   if ((!IsCompAssign && LEnum && R->isFloatingType()) ||
       (REnum && L->isFloatingType())) {
-    S.Diag(Loc, S.getLangOpts().CPlusPlus26
-                    ? diag::err_arith_conv_enum_float_cxx26
-                : S.getLangOpts().CPlusPlus20
-                    ? diag::warn_arith_conv_enum_float_cxx20
-                    : diag::warn_arith_conv_enum_float)
+    Diag(Loc, getLangOpts().CPlusPlus26 ? diag::err_arith_conv_enum_float_cxx26
+              : getLangOpts().CPlusPlus20
+                  ? diag::warn_arith_conv_enum_float_cxx20
+                  : diag::warn_arith_conv_enum_float)
         << LHS->getSourceRange() << RHS->getSourceRange() << (int)ACK << LEnum
         << L << R;
   } else if (!IsCompAssign && LEnum && REnum &&
-             !S.Context.hasSameUnqualifiedType(L, R)) {
+             !Context.hasSameUnqualifiedType(L, R)) {
     unsigned DiagID;
     // In C++ 26, usual arithmetic conversions between 2 different enum types
     // are ill-formed.
-    if (S.getLangOpts().CPlusPlus26)
+    if (getLangOpts().CPlusPlus26)
       DiagID = diag::err_conv_mixed_enum_types_cxx26;
     else if (!L->castAs<EnumType>()->getDecl()->hasNameForLinkage() ||
              !R->castAs<EnumType>()->getDecl()->hasNameForLinkage()) {
       // If either enumeration type is unnamed, it's less likely that the
       // user cares about this, but this situation is still deprecated in
       // C++2a. Use a different warning group.
-      DiagID = S.getLangOpts().CPlusPlus20
-                    ? diag::warn_arith_conv_mixed_anon_enum_types_cxx20
-                    : diag::warn_arith_conv_mixed_anon_enum_types;
+      DiagID = getLangOpts().CPlusPlus20
+                   ? diag::warn_arith_conv_mixed_anon_enum_types_cxx20
+                   : diag::warn_arith_conv_mixed_anon_enum_types;
     } else if (ACK == Sema::ACK_Conditional) {
       // Conditional expressions are separated out because they have
       // historically had a different warning flag.
-      DiagID = S.getLangOpts().CPlusPlus20
+      DiagID = getLangOpts().CPlusPlus20
                    ? diag::warn_conditional_mixed_enum_types_cxx20
                    : diag::warn_conditional_mixed_enum_types;
     } else if (ACK == Sema::ACK_Comparison) {
       // Comparison expressions are separated out because they have
       // historically had a different warning flag.
-      DiagID = S.getLangOpts().CPlusPlus20
+      DiagID = getLangOpts().CPlusPlus20
                    ? diag::warn_comparison_mixed_enum_types_cxx20
                    : diag::warn_comparison_mixed_enum_types;
     } else {
-      DiagID = S.getLangOpts().CPlusPlus20
+      DiagID = getLangOpts().CPlusPlus20
                    ? diag::warn_arith_conv_mixed_enum_types_cxx20
                    : diag::warn_arith_conv_mixed_enum_types;
     }
-    S.Diag(Loc, DiagID) << LHS->getSourceRange() << RHS->getSourceRange()
-                        << (int)ACK << L << R;
+    Diag(Loc, DiagID) << LHS->getSourceRange() << RHS->getSourceRange()
+                      << (int)ACK << L << R;
   }
 }
 
@@ -1557,7 +1556,7 @@ static void checkEnumArithmeticConversions(Sema &S, Expr 
*LHS, Expr *RHS,
 QualType Sema::UsualArithmeticConversions(ExprResult &LHS, ExprResult &RHS,
                                           SourceLocation Loc,
                                           ArithConvKind ACK) {
-  checkEnumArithmeticConversions(*this, LHS.get(), RHS.get(), Loc, ACK);
+  checkEnumArithmeticConversions(LHS.get(), RHS.get(), Loc, ACK);
 
   if (ACK != ACK_CompAssign) {
     LHS = UsualUnaryConversions(LHS.get());
diff --git a/clang/test/CodeGen/builtins-elementwise-math.c 
b/clang/test/CodeGen/builtins-elementwise-math.c
index 872ab4b89e5708..b45c3405f3e09c 100644
--- a/clang/test/CodeGen/builtins-elementwise-math.c
+++ b/clang/test/CodeGen/builtins-elementwise-math.c
@@ -57,9 +57,7 @@ void test_builtin_elementwise_abs(float f1, float f2, double 
d1, double d2,
   b = __builtin_elementwise_abs(-10);
 
   // CHECK:      [[SI:%.+]] = load i16, ptr %si.addr, align 2
-  // CHECK-NEXT: [[SI_EXT:%.+]] = sext i16 [[SI]] to i32
-  // CHECK-NEXT: [[RES:%.+]] = call i32 @llvm.abs.i32(i32 [[SI_EXT]], i1 false)
-  // CHECK-NEXT: = trunc i32 [[RES]] to i16
+  // CHECK-NEXT: [[RES:%.+]] = call i16 @llvm.abs.i16(i16 [[SI]], i1 false)
   si = __builtin_elementwise_abs(si);
 }
 
@@ -79,7 +77,7 @@ void test_builtin_elementwise_add_sat(float f1, float f2, 
double d1, double d2,
 
   // CHECK:      [[I1:%.+]] = load i64, ptr %i1.addr, align 8
   // CHECK-NEXT: call i64 @llvm.sadd.sat.i64(i64 [[I1]], i64 10)
-  i1 = __builtin_elementwise_add_sat(i1, 10);
+  i1 = __builtin_elementwise_add_sat(i1, (long long int)10);
 
   // CHECK:      [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16
   // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, ptr %vi2.addr, align 16
@@ -137,30 +135,6 @@ void test_builtin_elementwise_add_sat(float f1, float f2, 
double d1, double d2,
   // CHECK-NEXT: [[US2:%.+]] = load i16, ptr %us2.addr, align 2
   // CHECK-NEXT: call i16 @llvm.uadd.sat.i16(i16 [[US1]], i16 [[US2]])
   us1 = __builtin_elementwise_add_sat(us1, us2);
-
-  // CHECK:      [[C1:%.+]] = load i8, ptr %c1.addr, align 1
-  // CHECK-NEXT: [[C1EXT:%.+]] = sext i8 [[C1]] to i16
-  // CHECK-NEXT: [[S1:%.+]] = load i16, ptr %s1.addr, align 2
-  // CHECK-NEXT: call i16 @llvm.sadd.sat.i16(i16 [[C1EXT]], i16 [[S1]])
-  s1 = __builtin_elementwise_add_sat(c1, s1);
-
-  // CHECK:      [[S1:%.+]] = load i16, ptr %s1.addr, align 2
-  // CHECK-NEXT: [[C1:%.+]] = load i8, ptr %c1.addr, align 1
-  // CHECK-NEXT: [[C1EXT:%.+]] = sext i8 [[C1]] to i16
-  // CHECK-NEXT: call i16 @llvm.sadd.sat.i16(i16 [[S1]], i16 [[C1EXT]])
-  s1 = __builtin_elementwise_add_sat(s1, c1);
-
-  // CHECK:      [[UC1:%.+]] = load i8, ptr %uc1.addr, align 1
-  // CHECK-NEXT: [[UC1EXT:%.+]] = zext i8 [[UC1]] to i16
-  // CHECK-NEXT: [[S1:%.+]] = load i16, ptr %s1.addr, align 2
-  // CHECK-NEXT: call i16 @llvm.sadd.sat.i16(i16 [[UC1EXT]], i16 [[S1]])
-  s1 = __builtin_elementwise_add_sat(uc1, s1);
-
-  // CHECK:      [[US1:%.+]] = load i16, ptr %us1.addr, align 2
-  // CHECK-NEXT: [[C1:%.+]] = load i8, ptr %c1.addr, align 1
-  // CHECK-NEXT: [[C1EXT:%.+]] = sext i8 [[C1]] to i16
-  // CHECK-NEXT: call i16 @llvm.uadd.sat.i16(i16 [[US1]], i16 [[C1EXT]])
-  us1 = __builtin_elementwise_add_sat(us1, c1);
 }
 
 void test_builtin_elementwise_sub_sat(float f1, float f2, double d1, double d2,
@@ -179,7 +153,7 @@ void test_builtin_elementwise_sub_sat(float f1, float f2, 
double d1, double d2,
 
   // CHECK:      [[I1:%.+]] = load i64, ptr %i1.addr, align 8
   // CHECK-NEXT: call i64 @llvm.ssub.sat.i64(i64 [[I1]], i64 10)
-  i1 = __builtin_elementwise_sub_sat(i1, 10);
+  i1 = __builtin_elementwise_sub_sat(i1, (long long int)10);
 
   // CHECK:      [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16
   // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, ptr %vi2.addr, align 16
@@ -237,30 +211,6 @@ void test_builtin_elementwise_sub_sat(float f1, float f2, 
double d1, double d2,
   // CHECK-NEXT: [[US2:%.+]] = load i16, ptr %us2.addr, align 2
   // CHECK-NEXT: call i16 @llvm.usub.sat.i16(i16 [[US1]], i16 [[US2]])
   us1 = __builtin_elementwise_sub_sat(us1, us2);
-
-  // CHECK:      [[C1:%.+]] = load i8, ptr %c1.addr, align 1
-  // CHECK-NEXT: [[C1EXT:%.+]] = sext i8 [[C1]] to i16
-  // CHECK-NEXT: [[S1:%.+]] = load i16, ptr %s1.addr, align 2
-  // CHECK-NEXT: call i16 @llvm.ssub.sat.i16(i16 [[C1EXT]], i16 [[S1]])
-  s1 = __builtin_elementwise_sub_sat(c1, s1);
-
-  // CHECK:      [[S1:%.+]] = load i16, ptr %s1.addr, align 2
-  // CHECK-NEXT: [[C1:%.+]] = load i8, ptr %c1.addr, align 1
-  // CHECK-NEXT: [[C1EXT:%.+]] = sext i8 [[C1]] to i16
-  // CHECK-NEXT: call i16 @llvm.ssub.sat.i16(i16 [[S1]], i16 [[C1EXT]])
-  s1 = __builtin_elementwise_sub_sat(s1, c1);
-
-  // CHECK:      [[UC1:%.+]] = load i8, ptr %uc1.addr, align 1
-  // CHECK-NEXT: [[UC1EXT:%.+]] = zext i8 [[UC1]] to i16
-  // CHECK-NEXT: [[S1:%.+]] = load i16, ptr %s1.addr, align 2
-  // CHECK-NEXT: call i16 @llvm.ssub.sat.i16(i16 [[UC1EXT]], i16 [[S1]])
-  s1 = __builtin_elementwise_sub_sat(uc1, s1);
-
-  // CHECK:      [[US1:%.+]] = load i16, ptr %us1.addr, align 2
-  // CHECK-NEXT: [[C1:%.+]] = load i8, ptr %c1.addr, align 1
-  // CHECK-NEXT: [[C1EXT:%.+]] = sext i8 [[C1]] to i16
-  // CHECK-NEXT: call i16 @llvm.usub.sat.i16(i16 [[US1]], i16 [[C1EXT]])
-  us1 = __builtin_elementwise_sub_sat(us1, c1);
 }
 
 void test_builtin_elementwise_maximum(float f1, float f2, double d1, double d2,
@@ -372,7 +322,7 @@ void test_builtin_elementwise_max(float f1, float f2, 
double d1, double d2,
 
   // CHECK:      [[I1:%.+]] = load i64, ptr %i1.addr, align 8
   // CHECK-NEXT: call i64 @llvm.smax.i64(i64 [[I1]], i64 10)
-  i1 = __builtin_elementwise_max(i1, 10);
+  i1 = __builtin_elementwise_max(i1, (long long int)10);
 
   // CHECK:      [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16
   // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, ptr %vi2.addr, align 16
@@ -456,7 +406,7 @@ void test_builtin_elementwise_min(float f1, float f2, 
double d1, double d2,
 
   // CHECK:      [[I2:%.+]] = load i64, ptr %i2.addr, align 8
   // CHECK-NEXT: call i64 @llvm.smin.i64(i64 -11, i64 [[I2]])
-  i1 = __builtin_elementwise_min(-11, i2);
+  i1 = __builtin_elementwise_min((long long int)-11, i2);
 
   // CHECK:      [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16
   // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, ptr %vi2.addr, align 16
@@ -468,12 +418,6 @@ void test_builtin_elementwise_min(float f1, float f2, 
double d1, double d2,
   // CHECK-NEXT: call i32 @llvm.umin.i32(i32 [[U1]], i32 [[U2]])
   u1 = __builtin_elementwise_min(u1, u2);
 
-  // CHECK:      [[U1:%.+]] = load i32, ptr %u1.addr, align 4
-  // CHECK-NEXT: [[ZEXT_U1:%.+]] = zext i32 [[U1]] to i64
-  // CHECK-NEXT: [[I2:%.+]] = load i64, ptr %i2.addr, align 8
-  // CHECK-NEXT: call i64 @llvm.smin.i64(i64 [[ZEXT_U1]], i64 [[I2]])
-  u1 = __builtin_elementwise_min(u1, i2);
-
   // CHECK:      [[VU1:%.+]] = load <4 x i32>, ptr %vu1.addr, align 16
   // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, ptr %vu2.addr, align 16
   // CHECK-NEXT: call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[VU1]], <4 x i32> 
[[VU2]])
diff --git a/clang/test/CodeGenHLSL/builtins/dot-builtin.hlsl 
b/clang/test/CodeGenHLSL/builtins/dot-builtin.hlsl
index 482f089d4770fd..f6a19257d94e26 100644
--- a/clang/test/CodeGenHLSL/builtins/dot-builtin.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/dot-builtin.hlsl
@@ -6,7 +6,7 @@
 // CHECK: %conv2 = fptrunc double %hlsl.dot to float
 // CHECK: ret float %conv2
 float builtin_bool_to_float_type_promotion ( float p0, bool p1 ) {
-  return __builtin_hlsl_dot ( p0, p1 );
+  return __builtin_hlsl_dot ( (double)p0, (double)p1 );
 }
 
 // CHECK-LABEL: builtin_bool_to_float_arg1_type_promotion
@@ -16,7 +16,7 @@ float builtin_bool_to_float_type_promotion ( float p0, bool 
p1 ) {
 // CHECK: %conv2 = fptrunc double %hlsl.dot to float
 // CHECK: ret float %conv2
 float builtin_bool_to_float_arg1_type_promotion ( bool p0, float p1 ) {
-  return __builtin_hlsl_dot ( p0, p1 );
+  return __builtin_hlsl_dot ( (double)p0, (double)p1 );
 }
 
 // CHECK-LABEL: builtin_dot_int_to_float_promotion
@@ -26,5 +26,5 @@ float builtin_bool_to_float_arg1_type_promotion ( bool p0, 
float p1 ) {
 // CHECK: %conv2 = fptrunc double %hlsl.dot to float
 // CHECK: ret float %conv2
 float builtin_dot_int_to_float_promotion ( float p0, int p1 ) {
-  return __builtin_hlsl_dot ( p0, p1 );
+  return __builtin_hlsl_dot ( (double)p0, (double)p1 );
 }
diff --git a/clang/test/CodeGenHLSL/implicit-norecurse-attrib.hlsl 
b/clang/test/CodeGenHLSL/implicit-norecurse-attrib.hlsl
index ca01960678175c..a8ab6ce98ae7e9 100644
--- a/clang/test/CodeGenHLSL/implicit-norecurse-attrib.hlsl
+++ b/clang/test/CodeGenHLSL/implicit-norecurse-attrib.hlsl
@@ -37,7 +37,7 @@ uint Find(Node SortedTree[MAX], uint key) {
 // Imagine the inout works
 export
 bool InitTree(/*inout*/ Node tree[MAX], RWBuffer<uint4> encodedTree, uint 
maxDepth) {
-  uint size = pow(2.f, maxDepth) - 1;
+  uint size = pow(2.f, (float)maxDepth) - 1;
   if (size > MAX) return false;
   for (uint i = 1; i < size; i++) {
     tree[i].value = encodedTree[i].x;
diff --git a/clang/test/Sema/builtins-elementwise-math.c 
b/clang/test/Sema/builtins-elementwise-math.c
index 6002e91f8ec6fa..4f591308429296 100644
--- a/clang/test/Sema/builtins-elementwise-math.c
+++ b/clang/test/Sema/builtins-elementwise-math.c
@@ -69,6 +69,7 @@ void test_builtin_elementwise_add_sat(int i, short s, double 
d, float4 v, int3 i
   // expected-error@-1 {{1st argument must be a vector of integers (was 
'float4' (vector of 4 'float' values))}}
 
   s = __builtin_elementwise_add_sat(i, s);
+  // expected-error@-1 {{arguments are of different types ('int' vs 'short')}}
 
   enum e { one,
            two };
@@ -128,6 +129,7 @@ void test_builtin_elementwise_sub_sat(int i, short s, 
double d, float4 v, int3 i
   // expected-error@-1 {{1st argument must be a vector of integers (was 
'float4' (vector of 4 'float' values))}}
 
   s = __builtin_elementwise_sub_sat(i, s);
+  // expected-error@-1 {{arguments are of different types ('int' vs 'short')}}
 
   enum e { one,
            two };
@@ -184,6 +186,7 @@ void test_builtin_elementwise_max(int i, short s, double d, 
float4 v, int3 iv, u
   // expected-error@-1 {{arguments are of different types ('unsigned3' (vector 
of 3 'unsigned int' values) vs 'int3' (vector of 3 'int' values))}}
 
   s = __builtin_elementwise_max(i, s);
+  // expected-error@-1 {{arguments are of different types ('int' vs 'short')}}
 
   enum e { one,
            two };
@@ -240,6 +243,7 @@ void test_builtin_elementwise_min(int i, short s, double d, 
float4 v, int3 iv, u
   // expected-error@-1 {{arguments are of different types ('unsigned3' (vector 
of 3 'unsigned int' values) vs 'int3' (vector of 3 'int' values))}}
 
   s = __builtin_elementwise_min(i, s);
+  // expected-error@-1 {{arguments are of different types ('int' vs 'short')}}
 
   enum e { one,
            two };
@@ -273,7 +277,7 @@ void test_builtin_elementwise_min(int i, short s, double d, 
float4 v, int3 iv, u
   // expected-error@-1 {{1st argument must be a vector, integer or floating 
point type (was '_Complex float')}}
 }
 
-void test_builtin_elementwise_maximum(int i, short s, float f, double d, 
float4 v, int3 iv, unsigned3 uv, int *p) {
+void test_builtin_elementwise_maximum(int i, short s, float f, double d, 
float4 fv, double4 dv, int3 iv, unsigned3 uv, int *p) {
   i = __builtin_elementwise_maximum(p, d);
   // expected-error@-1 {{arguments are of different types ('int *' vs 
'double')}}
 
@@ -289,15 +293,19 @@ void test_builtin_elementwise_maximum(int i, short s, 
float f, double d, float4
   i = __builtin_elementwise_maximum(i, i, i);
   // expected-error@-1 {{too many arguments to function call, expected 2, have 
3}}
 
-  i = __builtin_elementwise_maximum(v, iv);
+  i = __builtin_elementwise_maximum(fv, iv);
   // expected-error@-1 {{arguments are of different types ('float4' (vector of 
4 'float' values) vs 'int3' (vector of 3 'int' values))}}
 
   i = __builtin_elementwise_maximum(uv, iv);
   // expected-error@-1 {{arguments are of different types ('unsigned3' (vector 
of 3 'unsigned int' values) vs 'int3' (vector of 3 'int' values))}}
 
+  dv = __builtin_elementwise_maximum(fv, dv);
+  // expected-error@-1 {{arguments are of different types ('float4' (vector of 
4 'float' values) vs 'double4' (vector of 4 'double' values))}}
+
   d = __builtin_elementwise_maximum(f, d);
+  // expected-error@-1 {{arguments are of different types ('float' vs 
'double')}}
 
-  v = __builtin_elementwise_maximum(v, v);
+  fv = __builtin_elementwise_maximum(fv, fv);
 
   i = __builtin_elementwise_maximum(iv, iv);
   // expected-error@-1 {{1st argument must be a floating point type (was 
'int3' (vector of 3 'int' values))}}
@@ -314,7 +322,7 @@ void test_builtin_elementwise_maximum(int i, short s, float 
f, double d, float4
   // expected-error@-1 {{1st argument must be a floating point type (was 
'_Complex float')}}
 }
 
-void test_builtin_elementwise_minimum(int i, short s, float f, double d, 
float4 v, int3 iv, unsigned3 uv, int *p) {
+void test_builtin_elementwise_minimum(int i, short s, float f, double d, 
float4 fv, double4 dv, int3 iv, unsigned3 uv, int *p) {
   i = __builtin_elementwise_minimum(p, d);
   // expected-error@-1 {{arguments are of different types ('int *' vs 
'double')}}
 
@@ -330,15 +338,19 @@ void test_builtin_elementwise_minimum(int i, short s, 
float f, double d, float4
   i = __builtin_elementwise_minimum(i, i, i);
   // expected-error@-1 {{too many arguments to function call, expected 2, have 
3}}
 
-  i = __builtin_elementwise_minimum(v, iv);
+  i = __builtin_elementwise_minimum(fv, iv);
   // expected-error@-1 {{arguments are of different types ('float4' (vector of 
4 'float' values) vs 'int3' (vector of 3 'int' values))}}
 
   i = __builtin_elementwise_minimum(uv, iv);
   // expected-error@-1 {{arguments are of different types ('unsigned3' (vector 
of 3 'unsigned int' values) vs 'int3' (vector of 3 'int' values))}}
 
+  dv = __builtin_elementwise_minimum(fv, dv);
+  // expected-error@-1 {{arguments are of different types ('float4' (vector of 
4 'float' values) vs 'double4' (vector of 4 'double' values))}}
+
   d = __builtin_elementwise_minimum(f, d);
+  // expected-error@-1 {{arguments are of different types ('float' vs 
'double')}}
 
-  v = __builtin_elementwise_minimum(v, v);
+  fv = __builtin_elementwise_minimum(fv, fv);
 
   i = __builtin_elementwise_minimum(iv, iv);
   // expected-error@-1 {{1st argument must be a floating point type (was 
'int3' (vector of 3 'int' values))}}

>From ad8ccbaddcafbc5642316cf57c0a2fc2c0882b13 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fra...@codeplay.com>
Date: Mon, 6 Jan 2025 12:37:45 +0000
Subject: [PATCH 4/6] update tests

---
 .../test/CodeGen/builtins-elementwise-math.c  | 27 ++++++++++++++++---
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/clang/test/CodeGen/builtins-elementwise-math.c 
b/clang/test/CodeGen/builtins-elementwise-math.c
index b45c3405f3e09c..41b31707e69a27 100644
--- a/clang/test/CodeGen/builtins-elementwise-math.c
+++ b/clang/test/CodeGen/builtins-elementwise-math.c
@@ -35,6 +35,11 @@ void test_builtin_elementwise_abs(float f1, float f2, double 
d1, double d2,
   // CHECK-NEXT: call i64 @llvm.abs.i64(i64 [[I1]], i1 false)
   i2 = __builtin_elementwise_abs(i1);
 
+  // CHECK:      [[I1:%.+]] = load i64, ptr %i1.addr, align 8
+  // CHECK:      [[S1:%.+]] = trunc i64 [[I1]] to i16
+  // CHECK-NEXT: call i16 @llvm.abs.i16(i16 [[S1]], i1 false)
+  i1 = __builtin_elementwise_abs((short)i1);
+
   // CHECK:      [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16
   // CHECK-NEXT: call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[VI1]], i1 false)
   vi2 = __builtin_elementwise_abs(vi1);
@@ -77,7 +82,7 @@ void test_builtin_elementwise_add_sat(float f1, float f2, 
double d1, double d2,
 
   // CHECK:      [[I1:%.+]] = load i64, ptr %i1.addr, align 8
   // CHECK-NEXT: call i64 @llvm.sadd.sat.i64(i64 [[I1]], i64 10)
-  i1 = __builtin_elementwise_add_sat(i1, (long long int)10);
+  i1 = __builtin_elementwise_add_sat(i1, 10ll);
 
   // CHECK:      [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16
   // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, ptr %vi2.addr, align 16
@@ -131,6 +136,13 @@ void test_builtin_elementwise_add_sat(float f1, float f2, 
double d1, double d2,
   // CHECK-NEXT: call i16 @llvm.sadd.sat.i16(i16 [[S1]], i16 [[S2]])
   s1 = __builtin_elementwise_add_sat(s1, s2);
 
+  // CHECK:      [[S1:%.+]] = load i16, ptr %s1.addr, align 2
+  // CHECK:      [[I1:%.+]] = sext i16 [[S1]] to i32
+  // CHECK-NEXT: [[S2:%.+]] = load i16, ptr %s2.addr, align 2
+  // CHECK:      [[I2:%.+]] = sext i16 [[S2]] to i32
+  // CHECK-NEXT: call i32 @llvm.sadd.sat.i32(i32 [[I1]], i32 [[I2]])
+  s1 = __builtin_elementwise_add_sat((int)s1, (int)s2);
+
   // CHECK:      [[US1:%.+]] = load i16, ptr %us1.addr, align 2
   // CHECK-NEXT: [[US2:%.+]] = load i16, ptr %us2.addr, align 2
   // CHECK-NEXT: call i16 @llvm.uadd.sat.i16(i16 [[US1]], i16 [[US2]])
@@ -153,7 +165,7 @@ void test_builtin_elementwise_sub_sat(float f1, float f2, 
double d1, double d2,
 
   // CHECK:      [[I1:%.+]] = load i64, ptr %i1.addr, align 8
   // CHECK-NEXT: call i64 @llvm.ssub.sat.i64(i64 [[I1]], i64 10)
-  i1 = __builtin_elementwise_sub_sat(i1, (long long int)10);
+  i1 = __builtin_elementwise_sub_sat(i1, 10ll);
 
   // CHECK:      [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16
   // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, ptr %vi2.addr, align 16
@@ -322,7 +334,7 @@ void test_builtin_elementwise_max(float f1, float f2, 
double d1, double d2,
 
   // CHECK:      [[I1:%.+]] = load i64, ptr %i1.addr, align 8
   // CHECK-NEXT: call i64 @llvm.smax.i64(i64 [[I1]], i64 10)
-  i1 = __builtin_elementwise_max(i1, (long long int)10);
+  i1 = __builtin_elementwise_max(i1, 10ll);
 
   // CHECK:      [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16
   // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, ptr %vi2.addr, align 16
@@ -406,7 +418,14 @@ void test_builtin_elementwise_min(float f1, float f2, 
double d1, double d2,
 
   // CHECK:      [[I2:%.+]] = load i64, ptr %i2.addr, align 8
   // CHECK-NEXT: call i64 @llvm.smin.i64(i64 -11, i64 [[I2]])
-  i1 = __builtin_elementwise_min((long long int)-11, i2);
+  i1 = __builtin_elementwise_min(-11ll, i2);
+
+  // CHECK:      [[I1:%.+]] = load i64, ptr %i1.addr, align 8
+  // CHECK:      [[S1:%.+]] = trunc i64 [[I1]] to i16
+  // CHECK-NEXT: [[I2:%.+]] = load i64, ptr %i2.addr, align 8
+  // CHECK:      [[S2:%.+]] = trunc i64 [[I2]] to i16
+  // CHECK-NEXT: call i16 @llvm.smin.i16(i16 [[S1]], i16 [[S2]])
+  i1 = __builtin_elementwise_min((short)i1, (short)i2);
 
   // CHECK:      [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16
   // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, ptr %vi2.addr, align 16

>From 80a83998d0a3e991fb64b5e07141769cc02f7453 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fra...@codeplay.com>
Date: Mon, 6 Jan 2025 15:51:41 +0000
Subject: [PATCH 5/6] update docs

---
 clang/docs/LanguageExtensions.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/clang/docs/LanguageExtensions.rst 
b/clang/docs/LanguageExtensions.rst
index e020710c7aa4f5..6a5862978a72b0 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -649,7 +649,9 @@ Unless specified otherwise operation(±0) = ±0 and 
operation(±infinity) = ±in
 
 The integer elementwise intrinsics, including 
``__builtin_elementwise_popcount``,
 ``__builtin_elementwise_bitreverse``, ``__builtin_elementwise_add_sat``,
-``__builtin_elementwise_sub_sat`` can be called in a ``constexpr`` context.
+``__builtin_elementwise_sub_sat`` can be called in a ``constexpr`` context. No
+implicit promotion of integer types takes place. The mixing of integer types of
+different sizes and signs is forbidden in binary and ternary builtins.
 
 ============================================== 
====================================================================== 
=========================================
          Name                                   Operation                      
                                       Supported element types

>From 453460f3e987c7523a4cbb50a6f766c47a950c8f Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fra...@codeplay.com>
Date: Mon, 6 Jan 2025 16:30:41 +0000
Subject: [PATCH 6/6] update docs: newline

---
 clang/docs/LanguageExtensions.rst | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/clang/docs/LanguageExtensions.rst 
b/clang/docs/LanguageExtensions.rst
index 6a5862978a72b0..35493c4a934d9b 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -649,9 +649,10 @@ Unless specified otherwise operation(±0) = ±0 and 
operation(±infinity) = ±in
 
 The integer elementwise intrinsics, including 
``__builtin_elementwise_popcount``,
 ``__builtin_elementwise_bitreverse``, ``__builtin_elementwise_add_sat``,
-``__builtin_elementwise_sub_sat`` can be called in a ``constexpr`` context. No
-implicit promotion of integer types takes place. The mixing of integer types of
-different sizes and signs is forbidden in binary and ternary builtins.
+``__builtin_elementwise_sub_sat`` can be called in a ``constexpr`` context.
+
+No implicit promotion of integer types takes place. The mixing of integer types
+of different sizes and signs is forbidden in binary and ternary builtins.
 
 ============================================== 
====================================================================== 
=========================================
          Name                                   Operation                      
                                       Supported element types

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to