https://github.com/frasercrmck created https://github.com/llvm/llvm-project/pull/131995
These builtins are modeled on the clzg/ctzg builtins, which accept an optional second argument. This second argument is returned if the first argument is 0. >From c0eef87f4301a9725b7d81e78875a816d3cdf7cb Mon Sep 17 00:00:00 2001 From: Fraser Cormack <fra...@codeplay.com> Date: Mon, 3 Feb 2025 16:54:17 +0000 Subject: [PATCH] [clang] Introduce elementwise clz/ctz builtins These builtins are modeled on the clzg/ctzg builtins, which accept an optional second argument. This second argument is returned if the first argument is 0. --- clang/docs/LanguageExtensions.rst | 8 ++ clang/include/clang/Basic/Builtins.td | 12 +++ clang/lib/CodeGen/CGBuiltin.cpp | 18 ++-- clang/lib/Sema/SemaChecking.cpp | 13 +++ .../test/CodeGen/builtins-elementwise-math.c | 96 +++++++++++++++++++ clang/test/Sema/builtins-elementwise-math.c | 44 +++++++++ 6 files changed, 185 insertions(+), 6 deletions(-) diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index d4771775c9739..f5d4ec49a1dd2 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -831,6 +831,14 @@ of different sizes and signs is forbidden in binary and ternary builtins. semantics, see `LangRef <http://llvm.org/docs/LangRef.html#llvm-min-intrinsics-comparation>`_ for the comparison. + T __builtin_elementwise_clz(T x[, T y]) return the number of leading 0 bits in the first argument. If integer types + the first argument is 0 and an optional second argument is provided, + the second argument is returned. If the first argument is 0 but only + one argument is provided, the behaviour is undefined. + T __builtin_elementwise_ctz(T x[, T y]) return the number of trailing 0 bits in the first argument. If integer types + the first argument is 0 and an optional second argument is provided, + the second argument is returned. If the first argument is 0 but only + one argument is provided, the behaviour is undefined. ============================================== ====================================================================== ========================================= diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 72a5e495c4059..0bc506d977186 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -1484,6 +1484,18 @@ def ElementwiseSubSat : Builtin { let Prototype = "void(...)"; } +def ElementwiseClz : Builtin { + let Spellings = ["__builtin_elementwise_clz"]; + let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr]; + let Prototype = "void(...)"; +} + +def ElementwiseCtz : Builtin { + let Spellings = ["__builtin_elementwise_ctz"]; + let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr]; + let Prototype = "void(...)"; +} + def ReduceMax : Builtin { let Spellings = ["__builtin_reduce_max"]; let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr]; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index c126f88b9e3a5..c48e1cf54f3f9 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -3673,9 +3673,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_ctz: case Builtin::BI__builtin_ctzl: case Builtin::BI__builtin_ctzll: - case Builtin::BI__builtin_ctzg: { - bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg && - E->getNumArgs() > 1; + case Builtin::BI__builtin_ctzg: + case Builtin::BI__builtin_elementwise_ctz: { + bool HasFallback = + (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg || + BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_ctz) && + E->getNumArgs() > 1; Value *ArgValue = HasFallback ? EmitScalarExpr(E->getArg(0)) @@ -3705,9 +3708,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_clz: case Builtin::BI__builtin_clzl: case Builtin::BI__builtin_clzll: - case Builtin::BI__builtin_clzg: { - bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg && - E->getNumArgs() > 1; + case Builtin::BI__builtin_clzg: + case Builtin::BI__builtin_elementwise_clz: { + bool HasFallback = + (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg || + BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_clz) && + E->getNumArgs() > 1; Value *ArgValue = HasFallback ? EmitScalarExpr(E->getArg(0)) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 12a8894cc7f47..d7276ef997c4d 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2818,6 +2818,19 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, TheCall->setType(Magnitude.get()->getType()); break; } + case Builtin::BI__builtin_elementwise_clz: + case Builtin::BI__builtin_elementwise_ctz: + // These builtins can be unary or binary. Note for empty calls we call the + // unary checker in order to not emit an error that says the function + // expects 2 arguments, which would be misleading. + if (TheCall->getNumArgs() <= 1) { + if (PrepareBuiltinElementwiseMathOneArgCall( + TheCall, EltwiseBuiltinArgTyRestriction::IntegerTy)) + return ExprError(); + } else if (BuiltinElementwiseMath( + TheCall, EltwiseBuiltinArgTyRestriction::IntegerTy)) + return ExprError(); + break; case Builtin::BI__builtin_reduce_max: case Builtin::BI__builtin_reduce_min: { if (PrepareBuiltinReduceMathOneArgCall(TheCall)) diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c index ee8345ff51e5e..537e38bb0bd28 100644 --- a/clang/test/CodeGen/builtins-elementwise-math.c +++ b/clang/test/CodeGen/builtins-elementwise-math.c @@ -1176,3 +1176,99 @@ void test_builtin_elementwise_fma(float f32, double f64, half2 tmp2_v2f16 = __builtin_elementwise_fma(v2f16, v2f16, (half2)4.0); } + +void test_builtin_elementwise_clz(si8 vs1, si8 vs2, u4 vu1, + long long int lli, short si, + _BitInt(31) bi, int i, + char ci) { + // CHECK: [[V8S1:%.+]] = load <8 x i16>, ptr %vs1.addr + // CHECK-NEXT: call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[V8S1]], i1 true) + vs1 = __builtin_elementwise_clz(vs1); + + // CHECK: [[V8S1:%.+]] = load <8 x i16>, ptr %vs1.addr + // CHECK-NEXT: [[CLZ:%.+]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[V8S1]], i1 true) + // CHECK-NEXT: [[ISZERO:%.+]] = icmp eq <8 x i16> [[V8S1]], zeroinitializer + // CHECK-NEXT: [[V8S2:%.+]] = load <8 x i16>, ptr %vs2.addr + // select <8 x i1> [[ISZERO]], <8 x i16> [[CLZ]], <8 x i16> [[V8S2]] + vs1 = __builtin_elementwise_clz(vs1, vs2); + + // CHECK: [[V4U1:%.+]] = load <4 x i32>, ptr %vu1.addr + // CHECK-NEXT: call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[V4U1]], i1 true) + vu1 = __builtin_elementwise_clz(vu1); + + // CHECK: [[LLI:%.+]] = load i64, ptr %lli.addr + // CHECK-NEXT: call i64 @llvm.ctlz.i64(i64 [[LLI]], i1 true) + lli = __builtin_elementwise_clz(lli); + + // CHECK: [[SI:%.+]] = load i16, ptr %si.addr + // CHECK-NEXT: call i16 @llvm.ctlz.i16(i16 [[SI]], i1 true) + si = __builtin_elementwise_clz(si); + + // CHECK: [[BI1:%.+]] = load i32, ptr %bi.addr + // CHECK-NEXT: [[BI2:%.+]] = trunc i32 [[BI1]] to i31 + // CHECK-NEXT: call i31 @llvm.ctlz.i31(i31 [[BI2]], i1 true) + bi = __builtin_elementwise_clz(bi); + + // CHECK: [[BI1:%.+]] = load i32, ptr %bi.addr + // CHECK-NEXT: [[BI2:%.+]] = trunc i32 [[BI1]] to i31 + // CHECK-NEXT: [[CLZ:%.+]] = call i31 @llvm.ctlz.i31(i31 [[BI2]], i1 true) + // CHECK-NEXT: [[ISZERO:%.+]] = icmp eq i31 [[BI2]], 0 + // CHECK-NEXT: select i1 [[ISZERO]], i31 1, i31 [[CLZ]] + bi = __builtin_elementwise_clz(bi, (_BitInt(31))1); + + // CHECK: [[I:%.+]] = load i32, ptr %i.addr + // CHECK-NEXT: call i32 @llvm.ctlz.i32(i32 [[I]], i1 true) + i = __builtin_elementwise_clz(i); + + // CHECK: [[CI:%.+]] = load i8, ptr %ci.addr + // CHECK-NEXT: call i8 @llvm.ctlz.i8(i8 [[CI]], i1 true) + ci = __builtin_elementwise_clz(ci); +} + +void test_builtin_elementwise_ctz(si8 vs1, si8 vs2, u4 vu1, + long long int lli, short si, + _BitInt(31) bi, int i, + char ci) { + // CHECK: [[V8S1:%.+]] = load <8 x i16>, ptr %vs1.addr + // CHECK-NEXT: call <8 x i16> @llvm.cttz.v8i16(<8 x i16> [[V8S1]], i1 true) + vs1 = __builtin_elementwise_ctz(vs1); + + // CHECK: [[V8S1:%.+]] = load <8 x i16>, ptr %vs1.addr + // CHECK-NEXT: [[ctz:%.+]] = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> [[V8S1]], i1 true) + // CHECK-NEXT: [[ISZERO:%.+]] = icmp eq <8 x i16> [[V8S1]], zeroinitializer + // CHECK-NEXT: [[V8S2:%.+]] = load <8 x i16>, ptr %vs2.addr + // select <8 x i1> [[ISZERO]], <8 x i16> [[ctz]], <8 x i16> [[V8S2]] + vs1 = __builtin_elementwise_ctz(vs1, vs2); + + // CHECK: [[V4U1:%.+]] = load <4 x i32>, ptr %vu1.addr + // CHECK-NEXT: call <4 x i32> @llvm.cttz.v4i32(<4 x i32> [[V4U1]], i1 true) + vu1 = __builtin_elementwise_ctz(vu1); + + // CHECK: [[LLI:%.+]] = load i64, ptr %lli.addr + // CHECK-NEXT: call i64 @llvm.cttz.i64(i64 [[LLI]], i1 true) + lli = __builtin_elementwise_ctz(lli); + + // CHECK: [[SI:%.+]] = load i16, ptr %si.addr + // CHECK-NEXT: call i16 @llvm.cttz.i16(i16 [[SI]], i1 true) + si = __builtin_elementwise_ctz(si); + + // CHECK: [[BI1:%.+]] = load i32, ptr %bi.addr + // CHECK-NEXT: [[BI2:%.+]] = trunc i32 [[BI1]] to i31 + // CHECK-NEXT: call i31 @llvm.cttz.i31(i31 [[BI2]], i1 true) + bi = __builtin_elementwise_ctz(bi); + + // CHECK: [[BI1:%.+]] = load i32, ptr %bi.addr + // CHECK-NEXT: [[BI2:%.+]] = trunc i32 [[BI1]] to i31 + // CHECK-NEXT: [[ctz:%.+]] = call i31 @llvm.cttz.i31(i31 [[BI2]], i1 true) + // CHECK-NEXT: [[ISZERO:%.+]] = icmp eq i31 [[BI2]], 0 + // CHECK-NEXT: select i1 [[ISZERO]], i31 1, i31 [[ctz]] + bi = __builtin_elementwise_ctz(bi, (_BitInt(31))1); + + // CHECK: [[I:%.+]] = load i32, ptr %i.addr + // CHECK-NEXT: call i32 @llvm.cttz.i32(i32 [[I]], i1 true) + i = __builtin_elementwise_ctz(i); + + // CHECK: [[CI:%.+]] = load i8, ptr %ci.addr + // CHECK-NEXT: call i8 @llvm.cttz.i8(i8 [[CI]], i1 true) + ci = __builtin_elementwise_ctz(ci); +} diff --git a/clang/test/Sema/builtins-elementwise-math.c b/clang/test/Sema/builtins-elementwise-math.c index 5c54202991a85..e0bb5c12676e7 100644 --- a/clang/test/Sema/builtins-elementwise-math.c +++ b/clang/test/Sema/builtins-elementwise-math.c @@ -1202,3 +1202,47 @@ void test_builtin_elementwise_fma(int i32, int2 v2i32, short i16, c3 = __builtin_elementwise_fma(f32, f32, c3); // expected-error@-1 {{3rd argument must be a scalar or vector of floating-point types (was '_Complex float')}} } + +void test_builtin_elementwise_clz(int i32, int2 v2i32, short i16, + double f64, double2 v2f64) { + f64 = __builtin_elementwise_clz(f64); + // expected-error@-1 {{1st argument must be a scalar or vector of integer types (was 'double')}} + + _Complex float c1; + c1 = __builtin_elementwise_clz(c1); + // expected-error@-1 {{1st argument must be a scalar or vector of integer types (was '_Complex float')}} + + v2i32 = __builtin_elementwise_clz(v2i32, i32); + // expected-error@-1 {{arguments are of different types ('int2' (vector of 2 'int' values) vs 'int')}} + + v2i32 = __builtin_elementwise_clz(v2i32, f64); + // expected-error@-1 {{arguments are of different types ('int2' (vector of 2 'int' values) vs 'double')}} + + v2i32 = __builtin_elementwise_clz(); + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} + + v2i32 = __builtin_elementwise_clz(v2i32, v2i32, f64); + // expected-error@-1 {{too many arguments to function call, expected 2, have 3}} +} + +void test_builtin_elementwise_ctz(int i32, int2 v2i32, short i16, + double f64, double2 v2f64) { + f64 = __builtin_elementwise_ctz(f64); + // expected-error@-1 {{1st argument must be a scalar or vector of integer types (was 'double')}} + + _Complex float c1; + c1 = __builtin_elementwise_ctz(c1); + // expected-error@-1 {{1st argument must be a scalar or vector of integer types (was '_Complex float')}} + + v2i32 = __builtin_elementwise_ctz(v2i32, i32); + // expected-error@-1 {{arguments are of different types ('int2' (vector of 2 'int' values) vs 'int')}} + + v2i32 = __builtin_elementwise_ctz(v2i32, f64); + // expected-error@-1 {{arguments are of different types ('int2' (vector of 2 'int' values) vs 'double')}} + + v2i32 = __builtin_elementwise_ctz(); + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} + + v2i32 = __builtin_elementwise_ctz(v2i32, v2i32, f64); + // expected-error@-1 {{too many arguments to function call, expected 2, have 3}} +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits