https://github.com/frasercrmck created 
https://github.com/llvm/llvm-project/pull/131995

These builtins are modeled on the clzg/ctzg builtins, which accept an optional 
second argument. This second argument is returned if the first argument is 0.

>From c0eef87f4301a9725b7d81e78875a816d3cdf7cb Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fra...@codeplay.com>
Date: Mon, 3 Feb 2025 16:54:17 +0000
Subject: [PATCH] [clang] Introduce elementwise clz/ctz builtins

These builtins are modeled on the clzg/ctzg builtins, which accept an
optional second argument. This second argument is returned if the first
argument is 0.
---
 clang/docs/LanguageExtensions.rst             |  8 ++
 clang/include/clang/Basic/Builtins.td         | 12 +++
 clang/lib/CodeGen/CGBuiltin.cpp               | 18 ++--
 clang/lib/Sema/SemaChecking.cpp               | 13 +++
 .../test/CodeGen/builtins-elementwise-math.c  | 96 +++++++++++++++++++
 clang/test/Sema/builtins-elementwise-math.c   | 44 +++++++++
 6 files changed, 185 insertions(+), 6 deletions(-)

diff --git a/clang/docs/LanguageExtensions.rst 
b/clang/docs/LanguageExtensions.rst
index d4771775c9739..f5d4ec49a1dd2 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -831,6 +831,14 @@ of different sizes and signs is forbidden in binary and 
ternary builtins.
                                                 semantics, see `LangRef
                                                 
<http://llvm.org/docs/LangRef.html#llvm-min-intrinsics-comparation>`_
                                                 for the comparison.
+ T __builtin_elementwise_clz(T x[, T y])        return the number of leading 0 
bits in the first argument. If          integer types
+                                                the first argument is 0 and an 
optional second argument is provided,
+                                                the second argument is 
returned. If the first argument is 0 but only
+                                                one argument is provided, the 
behaviour is undefined.
+ T __builtin_elementwise_ctz(T x[, T y])        return the number of trailing 
0 bits in the first argument. If         integer types
+                                                the first argument is 0 and an 
optional second argument is provided,
+                                                the second argument is 
returned. If the first argument is 0 but only
+                                                one argument is provided, the 
behaviour is undefined.
 ============================================== 
====================================================================== 
=========================================
 
 
diff --git a/clang/include/clang/Basic/Builtins.td 
b/clang/include/clang/Basic/Builtins.td
index 72a5e495c4059..0bc506d977186 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1484,6 +1484,18 @@ def ElementwiseSubSat : Builtin {
   let Prototype = "void(...)";
 }
 
+def ElementwiseClz : Builtin {
+  let Spellings = ["__builtin_elementwise_clz"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];
+  let Prototype = "void(...)";
+}
+
+def ElementwiseCtz : Builtin {
+  let Spellings = ["__builtin_elementwise_ctz"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];
+  let Prototype = "void(...)";
+}
+
 def ReduceMax : Builtin {
   let Spellings = ["__builtin_reduce_max"];
   let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index c126f88b9e3a5..c48e1cf54f3f9 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3673,9 +3673,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
   case Builtin::BI__builtin_ctz:
   case Builtin::BI__builtin_ctzl:
   case Builtin::BI__builtin_ctzll:
-  case Builtin::BI__builtin_ctzg: {
-    bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg &&
-                       E->getNumArgs() > 1;
+  case Builtin::BI__builtin_ctzg:
+  case Builtin::BI__builtin_elementwise_ctz: {
+    bool HasFallback =
+        (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg ||
+         BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_ctz) &&
+        E->getNumArgs() > 1;
 
     Value *ArgValue =
         HasFallback ? EmitScalarExpr(E->getArg(0))
@@ -3705,9 +3708,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
   case Builtin::BI__builtin_clz:
   case Builtin::BI__builtin_clzl:
   case Builtin::BI__builtin_clzll:
-  case Builtin::BI__builtin_clzg: {
-    bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg &&
-                       E->getNumArgs() > 1;
+  case Builtin::BI__builtin_clzg:
+  case Builtin::BI__builtin_elementwise_clz: {
+    bool HasFallback =
+        (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg ||
+         BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_clz) &&
+        E->getNumArgs() > 1;
 
     Value *ArgValue =
         HasFallback ? EmitScalarExpr(E->getArg(0))
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 12a8894cc7f47..d7276ef997c4d 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2818,6 +2818,19 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, 
unsigned BuiltinID,
     TheCall->setType(Magnitude.get()->getType());
     break;
   }
+  case Builtin::BI__builtin_elementwise_clz:
+  case Builtin::BI__builtin_elementwise_ctz:
+    // These builtins can be unary or binary. Note for empty calls we call the
+    // unary checker in order to not emit an error that says the function
+    // expects 2 arguments, which would be misleading.
+    if (TheCall->getNumArgs() <= 1) {
+      if (PrepareBuiltinElementwiseMathOneArgCall(
+              TheCall, EltwiseBuiltinArgTyRestriction::IntegerTy))
+        return ExprError();
+    } else if (BuiltinElementwiseMath(
+                   TheCall, EltwiseBuiltinArgTyRestriction::IntegerTy))
+      return ExprError();
+    break;
   case Builtin::BI__builtin_reduce_max:
   case Builtin::BI__builtin_reduce_min: {
     if (PrepareBuiltinReduceMathOneArgCall(TheCall))
diff --git a/clang/test/CodeGen/builtins-elementwise-math.c 
b/clang/test/CodeGen/builtins-elementwise-math.c
index ee8345ff51e5e..537e38bb0bd28 100644
--- a/clang/test/CodeGen/builtins-elementwise-math.c
+++ b/clang/test/CodeGen/builtins-elementwise-math.c
@@ -1176,3 +1176,99 @@ void test_builtin_elementwise_fma(float f32, double f64,
   half2 tmp2_v2f16 = __builtin_elementwise_fma(v2f16, v2f16, (half2)4.0);
 
 }
+
+void test_builtin_elementwise_clz(si8 vs1, si8 vs2, u4 vu1,
+                                  long long int lli, short si,
+                                  _BitInt(31) bi, int i,
+                                  char ci) {
+  // CHECK:      [[V8S1:%.+]] = load <8 x i16>, ptr %vs1.addr
+  // CHECK-NEXT: call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[V8S1]], i1 true)
+  vs1 = __builtin_elementwise_clz(vs1);
+
+  // CHECK:      [[V8S1:%.+]] = load <8 x i16>, ptr %vs1.addr
+  // CHECK-NEXT: [[CLZ:%.+]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> 
[[V8S1]], i1 true)
+  // CHECK-NEXT: [[ISZERO:%.+]] = icmp eq <8 x i16> [[V8S1]], zeroinitializer
+  // CHECK-NEXT: [[V8S2:%.+]] = load <8 x i16>, ptr %vs2.addr
+  // select <8 x i1> [[ISZERO]], <8 x i16> [[CLZ]], <8 x i16> [[V8S2]]
+  vs1 = __builtin_elementwise_clz(vs1, vs2);
+
+  // CHECK:      [[V4U1:%.+]] = load <4 x i32>, ptr %vu1.addr
+  // CHECK-NEXT: call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[V4U1]], i1 true)
+  vu1 = __builtin_elementwise_clz(vu1);
+
+  // CHECK:      [[LLI:%.+]] = load i64, ptr %lli.addr
+  // CHECK-NEXT: call i64 @llvm.ctlz.i64(i64 [[LLI]], i1 true)
+  lli = __builtin_elementwise_clz(lli);
+
+  // CHECK:      [[SI:%.+]] = load i16, ptr %si.addr
+  // CHECK-NEXT: call i16 @llvm.ctlz.i16(i16 [[SI]], i1 true)
+  si = __builtin_elementwise_clz(si);
+
+  // CHECK:      [[BI1:%.+]] = load i32, ptr %bi.addr
+  // CHECK-NEXT: [[BI2:%.+]] = trunc i32 [[BI1]] to i31
+  // CHECK-NEXT: call i31 @llvm.ctlz.i31(i31 [[BI2]], i1 true)
+  bi = __builtin_elementwise_clz(bi);
+
+  // CHECK:      [[BI1:%.+]] = load i32, ptr %bi.addr
+  // CHECK-NEXT: [[BI2:%.+]] = trunc i32 [[BI1]] to i31
+  // CHECK-NEXT: [[CLZ:%.+]] = call i31 @llvm.ctlz.i31(i31 [[BI2]], i1 true)
+  // CHECK-NEXT: [[ISZERO:%.+]] = icmp eq i31 [[BI2]], 0
+  // CHECK-NEXT: select i1 [[ISZERO]], i31 1, i31 [[CLZ]]
+  bi = __builtin_elementwise_clz(bi, (_BitInt(31))1);
+
+  // CHECK:      [[I:%.+]] = load i32, ptr %i.addr
+  // CHECK-NEXT: call i32 @llvm.ctlz.i32(i32 [[I]], i1 true)
+  i = __builtin_elementwise_clz(i);
+
+  // CHECK:      [[CI:%.+]] = load i8, ptr %ci.addr
+  // CHECK-NEXT: call i8 @llvm.ctlz.i8(i8 [[CI]], i1 true)
+  ci = __builtin_elementwise_clz(ci);
+}
+
+void test_builtin_elementwise_ctz(si8 vs1, si8 vs2, u4 vu1,
+                                  long long int lli, short si,
+                                  _BitInt(31) bi, int i,
+                                  char ci) {
+  // CHECK:      [[V8S1:%.+]] = load <8 x i16>, ptr %vs1.addr
+  // CHECK-NEXT: call <8 x i16> @llvm.cttz.v8i16(<8 x i16> [[V8S1]], i1 true)
+  vs1 = __builtin_elementwise_ctz(vs1);
+
+  // CHECK:      [[V8S1:%.+]] = load <8 x i16>, ptr %vs1.addr
+  // CHECK-NEXT: [[ctz:%.+]] = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> 
[[V8S1]], i1 true)
+  // CHECK-NEXT: [[ISZERO:%.+]] = icmp eq <8 x i16> [[V8S1]], zeroinitializer
+  // CHECK-NEXT: [[V8S2:%.+]] = load <8 x i16>, ptr %vs2.addr
+  // select <8 x i1> [[ISZERO]], <8 x i16> [[ctz]], <8 x i16> [[V8S2]]
+  vs1 = __builtin_elementwise_ctz(vs1, vs2);
+
+  // CHECK:      [[V4U1:%.+]] = load <4 x i32>, ptr %vu1.addr
+  // CHECK-NEXT: call <4 x i32> @llvm.cttz.v4i32(<4 x i32> [[V4U1]], i1 true)
+  vu1 = __builtin_elementwise_ctz(vu1);
+
+  // CHECK:      [[LLI:%.+]] = load i64, ptr %lli.addr
+  // CHECK-NEXT: call i64 @llvm.cttz.i64(i64 [[LLI]], i1 true)
+  lli = __builtin_elementwise_ctz(lli);
+
+  // CHECK:      [[SI:%.+]] = load i16, ptr %si.addr
+  // CHECK-NEXT: call i16 @llvm.cttz.i16(i16 [[SI]], i1 true)
+  si = __builtin_elementwise_ctz(si);
+
+  // CHECK:      [[BI1:%.+]] = load i32, ptr %bi.addr
+  // CHECK-NEXT: [[BI2:%.+]] = trunc i32 [[BI1]] to i31
+  // CHECK-NEXT: call i31 @llvm.cttz.i31(i31 [[BI2]], i1 true)
+  bi = __builtin_elementwise_ctz(bi);
+
+  // CHECK:      [[BI1:%.+]] = load i32, ptr %bi.addr
+  // CHECK-NEXT: [[BI2:%.+]] = trunc i32 [[BI1]] to i31
+  // CHECK-NEXT: [[ctz:%.+]] = call i31 @llvm.cttz.i31(i31 [[BI2]], i1 true)
+  // CHECK-NEXT: [[ISZERO:%.+]] = icmp eq i31 [[BI2]], 0
+  // CHECK-NEXT: select i1 [[ISZERO]], i31 1, i31 [[ctz]]
+  bi = __builtin_elementwise_ctz(bi, (_BitInt(31))1);
+
+  // CHECK:      [[I:%.+]] = load i32, ptr %i.addr
+  // CHECK-NEXT: call i32 @llvm.cttz.i32(i32 [[I]], i1 true)
+  i = __builtin_elementwise_ctz(i);
+
+  // CHECK:      [[CI:%.+]] = load i8, ptr %ci.addr
+  // CHECK-NEXT: call i8 @llvm.cttz.i8(i8 [[CI]], i1 true)
+  ci = __builtin_elementwise_ctz(ci);
+}
diff --git a/clang/test/Sema/builtins-elementwise-math.c 
b/clang/test/Sema/builtins-elementwise-math.c
index 5c54202991a85..e0bb5c12676e7 100644
--- a/clang/test/Sema/builtins-elementwise-math.c
+++ b/clang/test/Sema/builtins-elementwise-math.c
@@ -1202,3 +1202,47 @@ void test_builtin_elementwise_fma(int i32, int2 v2i32, 
short i16,
   c3 = __builtin_elementwise_fma(f32, f32, c3);
   // expected-error@-1 {{3rd argument must be a scalar or vector of 
floating-point types (was '_Complex float')}}
 }
+
+void test_builtin_elementwise_clz(int i32, int2 v2i32, short i16,
+                                  double f64, double2 v2f64) {
+  f64 = __builtin_elementwise_clz(f64);
+  // expected-error@-1 {{1st argument must be a scalar or vector of integer 
types (was 'double')}}
+
+  _Complex float c1;
+  c1 = __builtin_elementwise_clz(c1);
+  // expected-error@-1 {{1st argument must be a scalar or vector of integer 
types (was '_Complex float')}}
+
+  v2i32 = __builtin_elementwise_clz(v2i32, i32);
+  // expected-error@-1 {{arguments are of different types ('int2' (vector of 2 
'int' values) vs 'int')}}
+
+  v2i32 = __builtin_elementwise_clz(v2i32, f64);
+  // expected-error@-1 {{arguments are of different types ('int2' (vector of 2 
'int' values) vs 'double')}}
+
+  v2i32 = __builtin_elementwise_clz();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 
0}}
+
+  v2i32 = __builtin_elementwise_clz(v2i32, v2i32, f64);
+  // expected-error@-1 {{too many arguments to function call, expected 2, have 
3}}
+}
+
+void test_builtin_elementwise_ctz(int i32, int2 v2i32, short i16,
+                                  double f64, double2 v2f64) {
+  f64 = __builtin_elementwise_ctz(f64);
+  // expected-error@-1 {{1st argument must be a scalar or vector of integer 
types (was 'double')}}
+
+  _Complex float c1;
+  c1 = __builtin_elementwise_ctz(c1);
+  // expected-error@-1 {{1st argument must be a scalar or vector of integer 
types (was '_Complex float')}}
+
+  v2i32 = __builtin_elementwise_ctz(v2i32, i32);
+  // expected-error@-1 {{arguments are of different types ('int2' (vector of 2 
'int' values) vs 'int')}}
+
+  v2i32 = __builtin_elementwise_ctz(v2i32, f64);
+  // expected-error@-1 {{arguments are of different types ('int2' (vector of 2 
'int' values) vs 'double')}}
+
+  v2i32 = __builtin_elementwise_ctz();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 
0}}
+
+  v2i32 = __builtin_elementwise_ctz(v2i32, v2i32, f64);
+  // expected-error@-1 {{too many arguments to function call, expected 2, have 
3}}
+}

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to