[PATCH] D112001: [Clang] Add min/max reduction builtins.

Florian Hahn via Phabricator via cfe-commits Mon, 18 Oct 2021 08:28:57 -0700

fhahn created this revision.
fhahn added reviewers: aaron.ballman, scanon, craig.topper, rjmccall, 
erichkeane.
fhahn requested review of this revision.
Herald added a project: clang.


This patch implements __builtin_reduce_max and __builtin_reduce_min as
specified in D111529 <https://reviews.llvm.org/D111529>.

The order of operations does not matter for min or max reductions and
they can be directly lowered to the corresponding
llvm.vector.reduce.{fmin,fmax,umin,umax,smin,smax} intrinsic calls.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D112001

Files:
  clang/include/clang/Basic/Builtins.def
  clang/include/clang/Sema/Sema.h
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGen/builtins-elementwise-math.c
  clang/test/Sema/builtins-elementwise-math.c

Index: clang/test/Sema/builtins-elementwise-math.c
===================================================================
--- clang/test/Sema/builtins-elementwise-math.c
+++ clang/test/Sema/builtins-elementwise-math.c
@@ -61,3 +61,31 @@
   i = __builtin_elementwise_min(v, iv);
   // expected-error@-1 {{argument types do not match, 'float4' (vector of 4 'float' values) != 'int3' (vector of 3 'int' values)}}
 }
+
+void test_builtin_reduce_max(int i, float4 v, int3 iv) {
+  struct Foo s = __builtin_reduce_max(iv);
+  // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}
+
+  i = __builtin_reduce_max(v, v);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+
+  i = __builtin_reduce_max();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
+
+  i = __builtin_reduce_max(i);
+  // expected-error@-1 {{argument must have a vector type, but was 'int'}}
+}
+
+void test_builtin_reduce_min(int i, float4 v, int3 iv) {
+  struct Foo s = __builtin_reduce_min(iv);
+  // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}
+
+  i = __builtin_reduce_min(v, v);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+
+  i = __builtin_reduce_min();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
+
+  i = __builtin_reduce_min(i);
+  // expected-error@-1 {{argument must have a vector type, but was 'int'}}
+}
Index: clang/test/CodeGen/builtins-elementwise-math.c
===================================================================
--- clang/test/CodeGen/builtins-elementwise-math.c
+++ clang/test/CodeGen/builtins-elementwise-math.c
@@ -110,3 +110,33 @@
   // CHECK-NEXT: call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]])
   vu1 = __builtin_elementwise_min(vu1, vu2);
 }
+
+void test_builtin_reduce_max(float4 vf1, si8 vi1, u4 vu1) {
+  // CHECK-LABEL: define void @test_builtin_reduce_max(
+  // CHECK:      [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16
+  // CHECK-NEXT: call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[VF1]])
+  float r1 = __builtin_reduce_max(vf1);
+
+  // CHECK:      [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
+  // CHECK-NEXT: call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> [[VI1]])
+  short r2 = __builtin_reduce_max(vi1);
+
+  // CHECK:      [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16
+  // CHECK-NEXT: call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[VU1]])
+  unsigned r3 = __builtin_reduce_max(vu1);
+}
+
+void test_builtin_reduce_min(float4 vf1, si8 vi1, u4 vu1) {
+  // CHECK-LABEL: define void @test_builtin_reduce_min(
+  // CHECK:      [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16
+  // CHECK-NEXT: call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[VF1]])
+  float r1 = __builtin_reduce_min(vf1);
+
+  // CHECK:      [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
+  // CHECK-NEXT: call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> [[VI1]])
+  short r2 = __builtin_reduce_min(vi1);
+
+  // CHECK:      [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16
+  // CHECK-NEXT: call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[VU1]])
+  unsigned r3 = __builtin_reduce_min(vu1);
+}
Index: clang/lib/Sema/SemaChecking.cpp
===================================================================
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -1978,11 +1978,15 @@
 
   case Builtin::BI__builtin_elementwise_abs:
     return SemaBuiltinElementwiseMathOneArg(TheCall, TheCallResult);
-
   case Builtin::BI__builtin_elementwise_min:
   case Builtin::BI__builtin_elementwise_max:
     return SemaBuiltinElementwiseMathTwoArgs(TheCall, TheCallResult);
 
+  case Builtin::BI__builtin_reduce_max:
+  case Builtin::BI__builtin_reduce_min:
+    return SemaBuiltinReduceMath(TheCall, TheCallResult);
+
+
   case Builtin::BI__builtin_matrix_transpose:
     return SemaBuiltinMatrixTranspose(TheCall, TheCallResult);
   case Builtin::BI__builtin_matrix_column_major_load:
@@ -16656,6 +16660,25 @@
                      _2, _3, _4));
 }
 
+ExprResult Sema::SemaBuiltinReduceMath(CallExpr *TheCall,
+                                                  ExprResult CallResult) {
+  if (checkArgCount(*this, TheCall, 1))
+    return ExprError();
+
+  Expr *A = TheCall->getArg(0);
+  QualType TyA = A->getType();
+
+  const VectorType *VecTy = TyA->getAs<VectorType>();
+  if (!VecTy)
+  if (!TyA->getAs<VectorType>())
+      return Diag(A->getBeginLoc(), diag::err_elementwise_math_invalid_arg_type_2)
+             << "vector"
+             << TyA;
+
+  TheCall->setType(VecTy->getElementType());
+  return CallResult;
+}
+
 ExprResult Sema::SemaBuiltinElementwiseMathOneArg(CallExpr *TheCall,
                                                   ExprResult CallResult) {
   if (checkArgCount(*this, TheCall, 1))
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -3145,6 +3145,43 @@
     return RValue::get(Result);
   }
 
+  case Builtin::BI__builtin_reduce_max: {
+    auto GetIntrinsicID = [](QualType QT, llvm::Type *IrTy) {
+      if (IrTy->isIntOrIntVectorTy()) {
+        if (auto *VecTy = QT->getAs<VectorType>())
+          QT = VecTy->getElementType();
+        if (QT->isSignedIntegerType())
+          return llvm::Intrinsic::vector_reduce_smax;
+        else
+          return llvm::Intrinsic::vector_reduce_umax;
+      }
+      return llvm::Intrinsic::vector_reduce_fmax;
+    };
+    Value *Op0 = EmitScalarExpr(E->getArg(0));
+    Value *Result = Builder.CreateUnaryIntrinsic(GetIntrinsicID(E->getArg(0)->getType(), Op0->getType()),
+                                             Op0, nullptr, "rdx.min");
+    return RValue::get(Result);
+  }
+
+  case Builtin::BI__builtin_reduce_min: {
+    auto GetIntrinsicID = [](QualType QT, llvm::Type *IrTy) {
+      if (IrTy->isIntOrIntVectorTy()) {
+        if (auto *VecTy = QT->getAs<VectorType>())
+          QT = VecTy->getElementType();
+        if (QT->isSignedIntegerType())
+          return llvm::Intrinsic::vector_reduce_smin;
+        else
+          return llvm::Intrinsic::vector_reduce_umin;
+      }
+      return llvm::Intrinsic::vector_reduce_fmin;
+    };
+    Value *Op0 = EmitScalarExpr(E->getArg(0));
+    Value *Result = Builder.CreateUnaryIntrinsic(GetIntrinsicID(E->getArg(0)->getType(), Op0->getType()),
+                                             Op0, nullptr, "rdx.min");
+    return RValue::get(Result);
+  }
+
+
   case Builtin::BI__builtin_matrix_transpose: {
     const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>();
     Value *MatValue = EmitScalarExpr(E->getArg(0));
Index: clang/include/clang/Sema/Sema.h
===================================================================
--- clang/include/clang/Sema/Sema.h
+++ clang/include/clang/Sema/Sema.h
@@ -12716,6 +12716,8 @@
                                               ExprResult CallResult);
   ExprResult SemaBuiltinElementwiseMathTwoArgs(CallExpr *TheCall,
                                                ExprResult CallResult);
+  ExprResult SemaBuiltinReduceMath(CallExpr *TheCall,
+                                              ExprResult CallResult);
 
   // Matrix builtin handling.
   ExprResult SemaBuiltinMatrixTranspose(CallExpr *TheCall,
Index: clang/include/clang/Basic/Builtins.def
===================================================================
--- clang/include/clang/Basic/Builtins.def
+++ clang/include/clang/Basic/Builtins.def
@@ -646,6 +646,8 @@
 BUILTIN(__builtin_elementwise_abs, "v.", "nct")
 BUILTIN(__builtin_elementwise_max, "v.", "nct")
 BUILTIN(__builtin_elementwise_min, "v.", "nct")
+BUILTIN(__builtin_reduce_max, "v.", "nct")
+BUILTIN(__builtin_reduce_min, "v.", "nct")
 
 BUILTIN(__builtin_matrix_transpose, "v.", "nFt")
 BUILTIN(__builtin_matrix_column_major_load, "v.", "nFt")

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D112001: [Clang] Add min/max reduction builtins.

Reply via email to