fhahn updated this revision to Diff 382128.
fhahn marked an inline comment as done.
fhahn added a comment.

Address comments @aaron.ballman, thanks!

The most notable changes are using `UsualArithmeticConversions` for argument 
conversion and checking the canonical types. Also added a bunch of additional 
Sema tests as suggested, and some codegen tests for a couple of conversions.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D111985/new/

https://reviews.llvm.org/D111985

Files:
  clang/include/clang/Basic/Builtins.def
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/include/clang/Sema/Sema.h
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGen/builtins-elementwise-math.c
  clang/test/Sema/builtins-elementwise-math.c

Index: clang/test/Sema/builtins-elementwise-math.c
===================================================================
--- /dev/null
+++ clang/test/Sema/builtins-elementwise-math.c
@@ -0,0 +1,102 @@
+// RUN: %clang_cc1 %s -pedantic -verify -triple=x86_64-apple-darwin9
+
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef int int3 __attribute__((ext_vector_type(3)));
+
+struct Foo {
+  char *p;
+};
+
+__attribute__((address_space(1))) int int_as_one;
+typedef int bar;
+bar b;
+
+void test_builtin_elementwise_max(int i, short s, double d, float4 v, int3 iv, int *p) {
+  i = __builtin_elementwise_max(p, d);
+  // expected-error@-1 {{argument types do not match, 'int *' != 'double'}}
+
+  struct Foo foo = __builtin_elementwise_max(i, i);
+  // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}
+
+  i = __builtin_elementwise_max(i);
+  // expected-error@-1 {{too few arguments to function call, expected 2, have 1}}
+
+  i = __builtin_elementwise_max();
+  // expected-error@-1 {{too few arguments to function call, expected 2, have 0}}
+
+  i = __builtin_elementwise_max(v, iv);
+  // expected-error@-1 {{argument types do not match, 'float4' (vector of 4 'float' values) != 'int3' (vector of 3 'int' values)}}
+
+  s = __builtin_elementwise_max(i, s);
+
+  enum e { one,
+           two };
+  i = __builtin_elementwise_max(one, two);
+
+  enum f { three };
+  enum f x = __builtin_elementwise_max(one, three);
+
+  _ExtInt(32) ext;
+  ext = __builtin_elementwise_max(ext, ext);
+
+  const int ci;
+  i = __builtin_elementwise_max(ci, i);
+  i = __builtin_elementwise_max(i, ci);
+  i = __builtin_elementwise_max(ci, ci);
+
+  i = __builtin_elementwise_max(i, int_as_one); // ok (attributes don't match)?
+  i = __builtin_elementwise_max(i, b);          // ok (sugar doesn't match)?
+
+  int A[10];
+  A = __builtin_elementwise_max(A, A);
+  // expected-error@-1 {{argument type 'int *' is not supported}}
+
+  int(ii);
+  int j;
+  j = __builtin_elementwise_max(i, j);
+}
+
+void test_builtin_elementwise_min(int i, short s, double d, float4 v, int3 iv, int *p) {
+  i = __builtin_elementwise_min(p, d);
+  // expected-error@-1 {{argument types do not match, 'int *' != 'double'}}
+
+  struct Foo foo = __builtin_elementwise_min(i, i);
+  // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}
+
+  i = __builtin_elementwise_min(i);
+  // expected-error@-1 {{too few arguments to function call, expected 2, have 1}}
+
+  i = __builtin_elementwise_min();
+  // expected-error@-1 {{too few arguments to function call, expected 2, have 0}}
+
+  i = __builtin_elementwise_min(v, iv);
+  // expected-error@-1 {{argument types do not match, 'float4' (vector of 4 'float' values) != 'int3' (vector of 3 'int' values)}}
+
+  s = __builtin_elementwise_min(i, s);
+
+  enum e { one,
+           two };
+  i = __builtin_elementwise_min(one, two);
+
+  enum f { three };
+  enum f x = __builtin_elementwise_min(one, three);
+
+  _ExtInt(32) ext;
+  ext = __builtin_elementwise_min(ext, ext);
+
+  const int ci;
+  i = __builtin_elementwise_min(ci, i);
+  i = __builtin_elementwise_min(i, ci);
+  i = __builtin_elementwise_min(ci, ci);
+
+  i = __builtin_elementwise_min(i, int_as_one); // ok (attributes don't match)?
+  i = __builtin_elementwise_min(i, b);          // ok (sugar doesn't match)?
+
+  int A[10];
+  A = __builtin_elementwise_min(A, A);
+  // expected-error@-1 {{argument type 'int *' is not supported}}
+
+  int(ii);
+  int j;
+  j = __builtin_elementwise_min(i, j);
+}
Index: clang/test/CodeGen/builtins-elementwise-math.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/builtins-elementwise-math.c
@@ -0,0 +1,149 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef short int si8 __attribute__((ext_vector_type(8)));
+typedef unsigned int u4 __attribute__((ext_vector_type(4)));
+
+__attribute__((address_space(1))) int int_as_one;
+typedef int bar;
+bar b;
+
+void test_builtin_elementwise_max(float f1, float f2, double d1, double d2,
+                                  float4 vf1, float4 vf2, long long int i1,
+                                  long long int i2, si8 vi1, si8 vi2,
+                                  unsigned u1, unsigned u2, u4 vu1, u4 vu2) {
+  // CHECK-LABEL: define void @test_builtin_elementwise_max(
+
+  // CHECK:      [[F1:%.+]] = load float, float* %f1.addr, align 4
+  // CHECK-NEXT: [[F2:%.+]] = load float, float* %f2.addr, align 4
+  // CHECK-NEXT:  call float @llvm.maxnum.f32(float %0, float %1)
+  f1 = __builtin_elementwise_max(f1, f2);
+
+  // CHECK:      [[D1:%.+]] = load double, double* %d1.addr, align 8
+  // CHECK-NEXT: [[D2:%.+]] = load double, double* %d2.addr, align 8
+  // CHECK-NEXT: call double @llvm.maxnum.f64(double [[D1]], double [[D2]])
+  d1 = __builtin_elementwise_max(d1, d2);
+
+  // CHECK:      [[D2:%.+]] = load double, double* %d2.addr, align 8
+  // CHECK-NEXT: call double @llvm.maxnum.f64(double 2.000000e+01, double [[D2]])
+  d1 = __builtin_elementwise_max(20.0, d2);
+
+  // CHECK:      [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16
+  // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16
+  // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]])
+  vf1 = __builtin_elementwise_max(vf1, vf2);
+
+  // CHECK:      [[I1:%.+]] = load i64, i64* %i1.addr, align 8
+  // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8
+  // CHECK-NEXT: call i64 @llvm.smax.i64(i64 [[I1]], i64 [[I2]])
+  i1 = __builtin_elementwise_max(i1, i2);
+
+  // CHECK:      [[I1:%.+]] = load i64, i64* %i1.addr, align 8
+  // CHECK-NEXT: call i64 @llvm.smax.i64(i64 [[I1]], i64 10)
+  i1 = __builtin_elementwise_max(i1, 10);
+
+  // CHECK:      [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
+  // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, <8 x i16>* %vi2.addr, align 16
+  // CHECK-NEXT: call <8 x i16> @llvm.smax.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]])
+  vi1 = __builtin_elementwise_max(vi1, vi2);
+
+  // CHECK:      [[U1:%.+]] = load i32, i32* %u1.addr, align 4
+  // CHECK-NEXT: [[U2:%.+]] = load i32, i32* %u2.addr, align 4
+  // CHECK-NEXT: call i32 @llvm.umax.i32(i32 [[U1]], i32 [[U2]])
+  u1 = __builtin_elementwise_max(u1, u2);
+
+  // CHECK:      [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16
+  // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, <4 x i32>* %vu2.addr, align 16
+  // CHECK-NEXT: call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]])
+  vu1 = __builtin_elementwise_max(vu1, vu2);
+
+  // CHECK:      [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16
+  // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16
+  // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]])
+  const float4 cvf1 = vf1;
+  vf1 = __builtin_elementwise_max(cvf1, vf2);
+
+  // CHECK:      [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16
+  // CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16
+  // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]])
+  vf1 = __builtin_elementwise_max(vf2, cvf1);
+
+  // CHECK:      [[IAS1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4
+  // CHECK-NEXT: [[B:%.+]] = load i32, i32* @b, align 4
+  // CHECK-NEXT: call i32 @llvm.smax.i32(i32 [[IAS1]], i32 [[B]])
+  int_as_one = __builtin_elementwise_max(int_as_one, b);
+
+  // CHECK: call i32 @llvm.smax.i32(i32 1, i32 97)
+  i1 = __builtin_elementwise_max(1, 'a');
+}
+
+void test_builtin_elementwise_min(float f1, float f2, double d1, double d2,
+                                  float4 vf1, float4 vf2, long long int i1,
+                                  long long int i2, si8 vi1, si8 vi2,
+                                  unsigned u1, unsigned u2, u4 vu1, u4 vu2) {
+  // CHECK-LABEL: define void @test_builtin_elementwise_min(
+  // CHECK:      [[F1:%.+]] = load float, float* %f1.addr, align 4
+  // CHECK-NEXT: [[F2:%.+]] = load float, float* %f2.addr, align 4
+  // CHECK-NEXT:  call float @llvm.minnum.f32(float %0, float %1)
+  f1 = __builtin_elementwise_min(f1, f2);
+
+  // CHECK:      [[D1:%.+]] = load double, double* %d1.addr, align 8
+  // CHECK-NEXT: [[D2:%.+]] = load double, double* %d2.addr, align 8
+  // CHECK-NEXT: call double @llvm.minnum.f64(double [[D1]], double [[D2]])
+  d1 = __builtin_elementwise_min(d1, d2);
+
+  // CHECK:      [[D1:%.+]] = load double, double* %d1.addr, align 8
+  // CHECK-NEXT: call double @llvm.minnum.f64(double [[D1]], double 2.000000e+00)
+  d1 = __builtin_elementwise_min(d1, 2.0);
+
+  // CHECK:      [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16
+  // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16
+  // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]])
+  vf1 = __builtin_elementwise_min(vf1, vf2);
+
+  // CHECK:      [[I1:%.+]] = load i64, i64* %i1.addr, align 8
+  // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8
+  // CHECK-NEXT: call i64 @llvm.smin.i64(i64 [[I1]], i64 [[I2]])
+  i1 = __builtin_elementwise_min(i1, i2);
+
+  // CHECK:      [[I2:%.+]] = load i64, i64* %i2.addr, align 8
+  // CHECK-NEXT: call i64 @llvm.smin.i64(i64 -11, i64 [[I2]])
+  i1 = __builtin_elementwise_min(-11, i2);
+
+  // CHECK:      [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
+  // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, <8 x i16>* %vi2.addr, align 16
+  // CHECK-NEXT: call <8 x i16> @llvm.smin.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]])
+  vi1 = __builtin_elementwise_min(vi1, vi2);
+
+  // CHECK:      [[U1:%.+]] = load i32, i32* %u1.addr, align 4
+  // CHECK-NEXT: [[U2:%.+]] = load i32, i32* %u2.addr, align 4
+  // CHECK-NEXT: call i32 @llvm.umin.i32(i32 [[U1]], i32 [[U2]])
+  u1 = __builtin_elementwise_min(u1, u2);
+
+  // CHECK:      [[U1:%.+]] = load i32, i32* %u1.addr, align 4
+  // CHECK-NEXT: [[ZEXT_U1:%.+]] = zext i32 [[U1]] to i64
+  // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8
+  // CHECK-NEXT: call i64 @llvm.smin.i64(i64 [[ZEXT_U1]], i64 [[I2]])
+  u1 = __builtin_elementwise_min(u1, i2);
+
+  // CHECK:      [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16
+  // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, <4 x i32>* %vu2.addr, align 16
+  // CHECK-NEXT: call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]])
+  vu1 = __builtin_elementwise_min(vu1, vu2);
+
+  // CHECK:      [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16
+  // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16
+  // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]])
+  const float4 cvf1 = vf1;
+  vf1 = __builtin_elementwise_min(cvf1, vf2);
+
+  // CHECK:      [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16
+  // CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16
+  // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]])
+  vf1 = __builtin_elementwise_min(vf2, cvf1);
+
+  // CHECK:      [[IAS1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4
+  // CHECK-NEXT: [[B:%.+]] = load i32, i32* @b, align 4
+  // CHECK-NEXT: call i32 @llvm.smin.i32(i32 [[IAS1]], i32 [[B]])
+  int_as_one = __builtin_elementwise_min(int_as_one, b);
+}
Index: clang/lib/Sema/SemaChecking.cpp
===================================================================
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -1976,6 +1976,11 @@
     break;
   }
 
+  case Builtin::BI__builtin_elementwise_min:
+  case Builtin::BI__builtin_elementwise_max:
+    if (SemaBuiltinElementwiseMath(TheCall))
+      return ExprError();
+    break;
   case Builtin::BI__builtin_matrix_transpose:
     return SemaBuiltinMatrixTranspose(TheCall, TheCallResult);
 
@@ -16649,6 +16654,48 @@
                      _2, _3, _4));
 }
 
+// Check if \p Ty is a valid type for the elementwise math builtins. If it is
+// not a valid type, emit an error message and return true. Otherwise return
+// false.
+static bool checkMathBuiltinElementType(Sema &S, SourceLocation Loc,
+                                        QualType Ty) {
+  if (!Ty->getAs<VectorType>() && !ConstantMatrixType::isValidElementType(Ty)) {
+    S.Diag(Loc, diag::err_elementwise_math_invalid_arg_type) << Ty;
+    return true;
+  }
+  return false;
+}
+
+bool Sema::SemaBuiltinElementwiseMath(CallExpr *TheCall) {
+  if (checkArgCount(*this, TheCall, 2))
+    return true;
+
+  ExprResult A = TheCall->getArg(0);
+  ExprResult B = TheCall->getArg(1);
+  // Do standard promotions between the two arguments, returning their common
+  // type.
+  QualType Res =
+      UsualArithmeticConversions(A, B, TheCall->getExprLoc(), ACK_Comparison);
+  if (A.isInvalid() || B.isInvalid())
+    return true;
+
+  QualType TyA = A.get()->getType();
+  QualType TyB = B.get()->getType();
+
+  if (Res.isNull() || TyA.getCanonicalType() != TyB.getCanonicalType())
+    return Diag(A.get()->getBeginLoc(),
+                diag::err_elementwise_math_arg_types_mismatch)
+           << TyA << TyB;
+
+  if (checkMathBuiltinElementType(*this, A.get()->getBeginLoc(), TyA))
+    return true;
+
+  TheCall->setArg(0, A.get());
+  TheCall->setArg(1, B.get());
+  TheCall->setType(TyB);
+  return false;
+}
+
 ExprResult Sema::SemaBuiltinMatrixTranspose(CallExpr *TheCall,
                                             ExprResult CallResult) {
   if (checkArgCount(*this, TheCall, 1))
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -3101,6 +3101,39 @@
     return RValue::get(V);
   }
 
+  case Builtin::BI__builtin_elementwise_max: {
+    Value *Op0 = EmitScalarExpr(E->getArg(0));
+    Value *Op1 = EmitScalarExpr(E->getArg(1));
+    Value *Result;
+    if (Op0->getType()->isIntOrIntVectorTy()) {
+      QualType Ty = E->getArg(0)->getType();
+      if (auto *VecTy = Ty->getAs<VectorType>())
+        Ty = VecTy->getElementType();
+      Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
+                                                 ? llvm::Intrinsic::smax
+                                                 : llvm::Intrinsic::umax,
+                                             Op0, Op1, nullptr, "elt.max");
+    } else
+      Result = Builder.CreateMaxNum(Op0, Op1, "elt.max");
+    return RValue::get(Result);
+  }
+  case Builtin::BI__builtin_elementwise_min: {
+    Value *Op0 = EmitScalarExpr(E->getArg(0));
+    Value *Op1 = EmitScalarExpr(E->getArg(1));
+    Value *Result;
+    if (Op0->getType()->isIntOrIntVectorTy()) {
+      QualType Ty = E->getArg(0)->getType();
+      if (auto *VecTy = Ty->getAs<VectorType>())
+        Ty = VecTy->getElementType();
+      Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
+                                                 ? llvm::Intrinsic::smin
+                                                 : llvm::Intrinsic::umin,
+                                             Op0, Op1, nullptr, "elt.min");
+    } else
+      Result = Builder.CreateMinNum(Op0, Op1, "elt.min");
+    return RValue::get(Result);
+  }
+
   case Builtin::BI__builtin_matrix_transpose: {
     const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>();
     Value *MatValue = EmitScalarExpr(E->getArg(0));
Index: clang/include/clang/Sema/Sema.h
===================================================================
--- clang/include/clang/Sema/Sema.h
+++ clang/include/clang/Sema/Sema.h
@@ -12712,6 +12712,8 @@
 
   bool CheckPPCMMAType(QualType Type, SourceLocation TypeLoc);
 
+  bool SemaBuiltinElementwiseMath(CallExpr *TheCall);
+
   // Matrix builtin handling.
   ExprResult SemaBuiltinMatrixTranspose(CallExpr *TheCall,
                                         ExprResult CallResult);
Index: clang/include/clang/Basic/DiagnosticSemaKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -8744,6 +8744,12 @@
 
 let CategoryName = "Semantic Issue" in {
 
+def err_elementwise_math_arg_types_mismatch : Error <
+  "argument types do not match, %0 != %1">;
+
+def err_elementwise_math_invalid_arg_type: Error <
+  "argument type %0 is not supported">;
+
 def err_invalid_conversion_between_matrixes : Error<
   "conversion between matrix types%diff{ $ and $|}0,1 of different size is not allowed">;
 
Index: clang/include/clang/Basic/Builtins.def
===================================================================
--- clang/include/clang/Basic/Builtins.def
+++ clang/include/clang/Basic/Builtins.def
@@ -643,6 +643,9 @@
 BUILTIN(__builtin_alloca_with_align, "v*zIz", "Fn")
 BUILTIN(__builtin_call_with_static_chain, "v.", "nt")
 
+BUILTIN(__builtin_elementwise_max, "v.", "nct")
+BUILTIN(__builtin_elementwise_min, "v.", "nct")
+
 BUILTIN(__builtin_matrix_transpose, "v.", "nFt")
 BUILTIN(__builtin_matrix_column_major_load, "v.", "nFt")
 BUILTIN(__builtin_matrix_column_major_store, "v.", "nFt")
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to