https://github.com/frasercrmck updated https://github.com/llvm/llvm-project/pull/131995
>From 93112f0d5f0a8f6c8b1803a61d549701ba476f20 Mon Sep 17 00:00:00 2001 From: Fraser Cormack <fra...@codeplay.com> Date: Mon, 3 Feb 2025 16:54:17 +0000 Subject: [PATCH 1/6] [clang] Introduce elementwise clz/ctz builtins These builtins are modeled on the clzg/ctzg builtins, which accept an optional second argument. This second argument is returned if the first argument is 0. --- clang/docs/LanguageExtensions.rst | 8 ++ clang/include/clang/Basic/Builtins.td | 12 +++ clang/lib/CodeGen/CGBuiltin.cpp | 18 ++-- clang/lib/Sema/SemaChecking.cpp | 13 +++ .../test/CodeGen/builtins-elementwise-math.c | 96 +++++++++++++++++++ clang/test/Sema/builtins-elementwise-math.c | 44 +++++++++ 6 files changed, 185 insertions(+), 6 deletions(-) diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index f448a9a8db172..c938fc64c129b 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -848,6 +848,14 @@ of different sizes and signs is forbidden in binary and ternary builtins. semantics, see `LangRef <http://llvm.org/docs/LangRef.html#llvm-min-intrinsics-comparation>`_ for the comparison. + T __builtin_elementwise_clz(T x[, T y]) return the number of leading 0 bits in the first argument. If integer types + the first argument is 0 and an optional second argument is provided, + the second argument is returned. If the first argument is 0 but only + one argument is provided, the behaviour is undefined. + T __builtin_elementwise_ctz(T x[, T y]) return the number of trailing 0 bits in the first argument. If integer types + the first argument is 0 and an optional second argument is provided, + the second argument is returned. If the first argument is 0 but only + one argument is provided, the behaviour is undefined. ============================================== ====================================================================== ========================================= diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 5ebb82180521d..e0f78ea2ed2aa 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -1502,6 +1502,18 @@ def ElementwiseSubSat : Builtin { let Prototype = "void(...)"; } +def ElementwiseClz : Builtin { + let Spellings = ["__builtin_elementwise_clz"]; + let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr]; + let Prototype = "void(...)"; +} + +def ElementwiseCtz : Builtin { + let Spellings = ["__builtin_elementwise_ctz"]; + let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr]; + let Prototype = "void(...)"; +} + def ReduceMax : Builtin { let Spellings = ["__builtin_reduce_max"]; let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr]; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 5f2eb76e7bacb..4c341cb2a42cc 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -3322,9 +3322,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_ctz: case Builtin::BI__builtin_ctzl: case Builtin::BI__builtin_ctzll: - case Builtin::BI__builtin_ctzg: { - bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg && - E->getNumArgs() > 1; + case Builtin::BI__builtin_ctzg: + case Builtin::BI__builtin_elementwise_ctz: { + bool HasFallback = + (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg || + BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_ctz) && + E->getNumArgs() > 1; Value *ArgValue = HasFallback ? EmitScalarExpr(E->getArg(0)) @@ -3354,9 +3357,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_clz: case Builtin::BI__builtin_clzl: case Builtin::BI__builtin_clzll: - case Builtin::BI__builtin_clzg: { - bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg && - E->getNumArgs() > 1; + case Builtin::BI__builtin_clzg: + case Builtin::BI__builtin_elementwise_clz: { + bool HasFallback = + (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg || + BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_clz) && + E->getNumArgs() > 1; Value *ArgValue = HasFallback ? EmitScalarExpr(E->getArg(0)) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index dd5b710d7e1d4..e60ae9ebae4d4 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3070,6 +3070,19 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, TheCall->setType(Magnitude.get()->getType()); break; } + case Builtin::BI__builtin_elementwise_clz: + case Builtin::BI__builtin_elementwise_ctz: + // These builtins can be unary or binary. Note for empty calls we call the + // unary checker in order to not emit an error that says the function + // expects 2 arguments, which would be misleading. + if (TheCall->getNumArgs() <= 1) { + if (PrepareBuiltinElementwiseMathOneArgCall( + TheCall, EltwiseBuiltinArgTyRestriction::IntegerTy)) + return ExprError(); + } else if (BuiltinElementwiseMath( + TheCall, EltwiseBuiltinArgTyRestriction::IntegerTy)) + return ExprError(); + break; case Builtin::BI__builtin_reduce_max: case Builtin::BI__builtin_reduce_min: { if (PrepareBuiltinReduceMathOneArgCall(TheCall)) diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c index ee8345ff51e5e..537e38bb0bd28 100644 --- a/clang/test/CodeGen/builtins-elementwise-math.c +++ b/clang/test/CodeGen/builtins-elementwise-math.c @@ -1176,3 +1176,99 @@ void test_builtin_elementwise_fma(float f32, double f64, half2 tmp2_v2f16 = __builtin_elementwise_fma(v2f16, v2f16, (half2)4.0); } + +void test_builtin_elementwise_clz(si8 vs1, si8 vs2, u4 vu1, + long long int lli, short si, + _BitInt(31) bi, int i, + char ci) { + // CHECK: [[V8S1:%.+]] = load <8 x i16>, ptr %vs1.addr + // CHECK-NEXT: call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[V8S1]], i1 true) + vs1 = __builtin_elementwise_clz(vs1); + + // CHECK: [[V8S1:%.+]] = load <8 x i16>, ptr %vs1.addr + // CHECK-NEXT: [[CLZ:%.+]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[V8S1]], i1 true) + // CHECK-NEXT: [[ISZERO:%.+]] = icmp eq <8 x i16> [[V8S1]], zeroinitializer + // CHECK-NEXT: [[V8S2:%.+]] = load <8 x i16>, ptr %vs2.addr + // select <8 x i1> [[ISZERO]], <8 x i16> [[CLZ]], <8 x i16> [[V8S2]] + vs1 = __builtin_elementwise_clz(vs1, vs2); + + // CHECK: [[V4U1:%.+]] = load <4 x i32>, ptr %vu1.addr + // CHECK-NEXT: call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[V4U1]], i1 true) + vu1 = __builtin_elementwise_clz(vu1); + + // CHECK: [[LLI:%.+]] = load i64, ptr %lli.addr + // CHECK-NEXT: call i64 @llvm.ctlz.i64(i64 [[LLI]], i1 true) + lli = __builtin_elementwise_clz(lli); + + // CHECK: [[SI:%.+]] = load i16, ptr %si.addr + // CHECK-NEXT: call i16 @llvm.ctlz.i16(i16 [[SI]], i1 true) + si = __builtin_elementwise_clz(si); + + // CHECK: [[BI1:%.+]] = load i32, ptr %bi.addr + // CHECK-NEXT: [[BI2:%.+]] = trunc i32 [[BI1]] to i31 + // CHECK-NEXT: call i31 @llvm.ctlz.i31(i31 [[BI2]], i1 true) + bi = __builtin_elementwise_clz(bi); + + // CHECK: [[BI1:%.+]] = load i32, ptr %bi.addr + // CHECK-NEXT: [[BI2:%.+]] = trunc i32 [[BI1]] to i31 + // CHECK-NEXT: [[CLZ:%.+]] = call i31 @llvm.ctlz.i31(i31 [[BI2]], i1 true) + // CHECK-NEXT: [[ISZERO:%.+]] = icmp eq i31 [[BI2]], 0 + // CHECK-NEXT: select i1 [[ISZERO]], i31 1, i31 [[CLZ]] + bi = __builtin_elementwise_clz(bi, (_BitInt(31))1); + + // CHECK: [[I:%.+]] = load i32, ptr %i.addr + // CHECK-NEXT: call i32 @llvm.ctlz.i32(i32 [[I]], i1 true) + i = __builtin_elementwise_clz(i); + + // CHECK: [[CI:%.+]] = load i8, ptr %ci.addr + // CHECK-NEXT: call i8 @llvm.ctlz.i8(i8 [[CI]], i1 true) + ci = __builtin_elementwise_clz(ci); +} + +void test_builtin_elementwise_ctz(si8 vs1, si8 vs2, u4 vu1, + long long int lli, short si, + _BitInt(31) bi, int i, + char ci) { + // CHECK: [[V8S1:%.+]] = load <8 x i16>, ptr %vs1.addr + // CHECK-NEXT: call <8 x i16> @llvm.cttz.v8i16(<8 x i16> [[V8S1]], i1 true) + vs1 = __builtin_elementwise_ctz(vs1); + + // CHECK: [[V8S1:%.+]] = load <8 x i16>, ptr %vs1.addr + // CHECK-NEXT: [[ctz:%.+]] = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> [[V8S1]], i1 true) + // CHECK-NEXT: [[ISZERO:%.+]] = icmp eq <8 x i16> [[V8S1]], zeroinitializer + // CHECK-NEXT: [[V8S2:%.+]] = load <8 x i16>, ptr %vs2.addr + // select <8 x i1> [[ISZERO]], <8 x i16> [[ctz]], <8 x i16> [[V8S2]] + vs1 = __builtin_elementwise_ctz(vs1, vs2); + + // CHECK: [[V4U1:%.+]] = load <4 x i32>, ptr %vu1.addr + // CHECK-NEXT: call <4 x i32> @llvm.cttz.v4i32(<4 x i32> [[V4U1]], i1 true) + vu1 = __builtin_elementwise_ctz(vu1); + + // CHECK: [[LLI:%.+]] = load i64, ptr %lli.addr + // CHECK-NEXT: call i64 @llvm.cttz.i64(i64 [[LLI]], i1 true) + lli = __builtin_elementwise_ctz(lli); + + // CHECK: [[SI:%.+]] = load i16, ptr %si.addr + // CHECK-NEXT: call i16 @llvm.cttz.i16(i16 [[SI]], i1 true) + si = __builtin_elementwise_ctz(si); + + // CHECK: [[BI1:%.+]] = load i32, ptr %bi.addr + // CHECK-NEXT: [[BI2:%.+]] = trunc i32 [[BI1]] to i31 + // CHECK-NEXT: call i31 @llvm.cttz.i31(i31 [[BI2]], i1 true) + bi = __builtin_elementwise_ctz(bi); + + // CHECK: [[BI1:%.+]] = load i32, ptr %bi.addr + // CHECK-NEXT: [[BI2:%.+]] = trunc i32 [[BI1]] to i31 + // CHECK-NEXT: [[ctz:%.+]] = call i31 @llvm.cttz.i31(i31 [[BI2]], i1 true) + // CHECK-NEXT: [[ISZERO:%.+]] = icmp eq i31 [[BI2]], 0 + // CHECK-NEXT: select i1 [[ISZERO]], i31 1, i31 [[ctz]] + bi = __builtin_elementwise_ctz(bi, (_BitInt(31))1); + + // CHECK: [[I:%.+]] = load i32, ptr %i.addr + // CHECK-NEXT: call i32 @llvm.cttz.i32(i32 [[I]], i1 true) + i = __builtin_elementwise_ctz(i); + + // CHECK: [[CI:%.+]] = load i8, ptr %ci.addr + // CHECK-NEXT: call i8 @llvm.cttz.i8(i8 [[CI]], i1 true) + ci = __builtin_elementwise_ctz(ci); +} diff --git a/clang/test/Sema/builtins-elementwise-math.c b/clang/test/Sema/builtins-elementwise-math.c index 01057b3f8d083..94b987c09b9e5 100644 --- a/clang/test/Sema/builtins-elementwise-math.c +++ b/clang/test/Sema/builtins-elementwise-math.c @@ -1213,3 +1213,47 @@ float3 foo(float3 a,const struct_float3* hi) { float3 b = __builtin_elementwise_max((float3)(0.0f), a); return __builtin_elementwise_pow(b, hi->b.yyy); } + +void test_builtin_elementwise_clz(int i32, int2 v2i32, short i16, + double f64, double2 v2f64) { + f64 = __builtin_elementwise_clz(f64); + // expected-error@-1 {{1st argument must be a scalar or vector of integer types (was 'double')}} + + _Complex float c1; + c1 = __builtin_elementwise_clz(c1); + // expected-error@-1 {{1st argument must be a scalar or vector of integer types (was '_Complex float')}} + + v2i32 = __builtin_elementwise_clz(v2i32, i32); + // expected-error@-1 {{arguments are of different types ('int2' (vector of 2 'int' values) vs 'int')}} + + v2i32 = __builtin_elementwise_clz(v2i32, f64); + // expected-error@-1 {{arguments are of different types ('int2' (vector of 2 'int' values) vs 'double')}} + + v2i32 = __builtin_elementwise_clz(); + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} + + v2i32 = __builtin_elementwise_clz(v2i32, v2i32, f64); + // expected-error@-1 {{too many arguments to function call, expected 2, have 3}} +} + +void test_builtin_elementwise_ctz(int i32, int2 v2i32, short i16, + double f64, double2 v2f64) { + f64 = __builtin_elementwise_ctz(f64); + // expected-error@-1 {{1st argument must be a scalar or vector of integer types (was 'double')}} + + _Complex float c1; + c1 = __builtin_elementwise_ctz(c1); + // expected-error@-1 {{1st argument must be a scalar or vector of integer types (was '_Complex float')}} + + v2i32 = __builtin_elementwise_ctz(v2i32, i32); + // expected-error@-1 {{arguments are of different types ('int2' (vector of 2 'int' values) vs 'int')}} + + v2i32 = __builtin_elementwise_ctz(v2i32, f64); + // expected-error@-1 {{arguments are of different types ('int2' (vector of 2 'int' values) vs 'double')}} + + v2i32 = __builtin_elementwise_ctz(); + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} + + v2i32 = __builtin_elementwise_ctz(v2i32, v2i32, f64); + // expected-error@-1 {{too many arguments to function call, expected 2, have 3}} +} >From 898d62d9c81f678cf0be4a47e9c90d9d301d3c3d Mon Sep 17 00:00:00 2001 From: Fraser Cormack <fra...@codeplay.com> Date: Wed, 19 Mar 2025 13:53:50 +0000 Subject: [PATCH 2/6] support constexpr --- clang/lib/AST/ExprConstant.cpp | 55 ++++++++++++++++++-- clang/test/Sema/constant-builtins-vector.cpp | 34 ++++++++++++ 2 files changed, 86 insertions(+), 3 deletions(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 1b33b6706e204..645905528cb79 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11551,6 +11551,49 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case Builtin::BI__builtin_elementwise_clz: + case Builtin::BI__builtin_elementwise_ctz: { + APValue SourceLHS; + std::optional<APValue> Fallback; + if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS)) + return false; + if (E->getNumArgs() > 1) { + APValue FallbackTmp; + if (!EvaluateAsRValue(Info, E->getArg(1), FallbackTmp)) + return false; + Fallback = FallbackTmp; + } + + QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType(); + unsigned SourceLen = SourceLHS.getVectorLength(); + SmallVector<APValue, 4> ResultElements; + ResultElements.reserve(SourceLen); + + for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) { + APSInt LHS = SourceLHS.getVectorElt(EltNum).getInt(); + if (!LHS) { + // Without a fallback, a zero element is undefined + if (!Fallback) + return false; + ResultElements.push_back(Fallback->getVectorElt(EltNum)); + continue; + } + switch (E->getBuiltinCallee()) { + case Builtin::BI__builtin_elementwise_clz: + ResultElements.push_back(APValue( + APSInt(APInt(Info.Ctx.getIntWidth(DestEltTy), LHS.countl_zero()), + DestEltTy->isUnsignedIntegerOrEnumerationType()))); + break; + case Builtin::BI__builtin_elementwise_ctz: + ResultElements.push_back(APValue( + APSInt(APInt(Info.Ctx.getIntWidth(DestEltTy), LHS.countr_zero()), + DestEltTy->isUnsignedIntegerOrEnumerationType()))); + break; + } + } + + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } } } @@ -13103,6 +13146,7 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, case Builtin::BI__builtin_clzll: case Builtin::BI__builtin_clzs: case Builtin::BI__builtin_clzg: + case Builtin::BI__builtin_elementwise_clz: case Builtin::BI__lzcnt16: // Microsoft variants of count leading-zeroes case Builtin::BI__lzcnt: case Builtin::BI__lzcnt64: { @@ -13111,7 +13155,9 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, return false; std::optional<APSInt> Fallback; - if (BuiltinOp == Builtin::BI__builtin_clzg && E->getNumArgs() > 1) { + if ((BuiltinOp == Builtin::BI__builtin_clzg || + BuiltinOp == Builtin::BI__builtin_elementwise_clz) && + E->getNumArgs() > 1) { APSInt FallbackTemp; if (!EvaluateInteger(E->getArg(1), FallbackTemp, Info)) return false; @@ -13183,13 +13229,16 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, case Builtin::BI__builtin_ctzl: case Builtin::BI__builtin_ctzll: case Builtin::BI__builtin_ctzs: - case Builtin::BI__builtin_ctzg: { + case Builtin::BI__builtin_ctzg: + case Builtin::BI__builtin_elementwise_ctz: { APSInt Val; if (!EvaluateInteger(E->getArg(0), Val, Info)) return false; std::optional<APSInt> Fallback; - if (BuiltinOp == Builtin::BI__builtin_ctzg && E->getNumArgs() > 1) { + if ((BuiltinOp == Builtin::BI__builtin_ctzg || + BuiltinOp == Builtin::BI__builtin_elementwise_ctz) && + E->getNumArgs() > 1) { APSInt FallbackTemp; if (!EvaluateInteger(E->getArg(1), FallbackTemp, Info)) return false; diff --git a/clang/test/Sema/constant-builtins-vector.cpp b/clang/test/Sema/constant-builtins-vector.cpp index bde5c478b2b6f..3d988cc655099 100644 --- a/clang/test/Sema/constant-builtins-vector.cpp +++ b/clang/test/Sema/constant-builtins-vector.cpp @@ -860,3 +860,37 @@ static_assert(__builtin_elementwise_sub_sat(0U, 1U) == 0U); static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_sub_sat((vector4char){5, 4, 3, 2}, (vector4char){1, 1, 1, 1})) == (LITTLE_END ? 0x01020304 : 0x04030201)); static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_sub_sat((vector4uchar){5, 4, 3, 2}, (vector4uchar){1, 1, 1, 1})) == (LITTLE_END ? 0x01020304U : 0x04030201U)); static_assert(__builtin_bit_cast(unsigned long long, __builtin_elementwise_sub_sat((vector4short){(short)0x8000, (short)0x8001, (short)0x8002, (short)0x8003}, (vector4short){7, 8, 9, 10}) == (LITTLE_END ? 0x8000800080008000 : 0x8000800080008000))); + +static_assert(__builtin_elementwise_clz(2) == 30); +static_assert(__builtin_elementwise_clz(2, 8) == 30); +static_assert(__builtin_elementwise_clz(0, 8) == 8); +static_assert(__builtin_elementwise_clz((char)2) == 6); +static_assert(__builtin_elementwise_clz((short)2) == 14); +static_assert(__builtin_elementwise_clz((char)1) == 0x7); +static_assert(__builtin_elementwise_clz((char)4) == 0x5); +static_assert(__builtin_elementwise_clz((char)127) == 0x1); +static_assert(__builtin_elementwise_clz((char)128) == 0x0); +static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_clz((vector4char){1, 4, 127, (char)128})) == (LITTLE_END ? 0x00010507 : 0x07050100)); + +constexpr int clz0 = __builtin_elementwise_clz(0); +// expected-error@-1 {{must be initialized by a constant expression}} +constexpr vector4char clz1 = __builtin_elementwise_clz((vector4char){1, 0, 3, 4}); +// expected-error@-1 {{must be initialized by a constant expression}} +static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_clz((vector4char){1, 0, 127, 0}, (vector4char){9, -1, 9, -2})) == (LITTLE_END ? 0xFE01FF07 : 0x07FF01FE)); + +static_assert(__builtin_elementwise_ctz(2) == 1); +static_assert(__builtin_elementwise_ctz(2, 8) == 1); +static_assert(__builtin_elementwise_ctz(0, 8) == 8); +static_assert(__builtin_elementwise_ctz((char)2) == 1); +static_assert(__builtin_elementwise_ctz((short)2) == 1); +static_assert(__builtin_elementwise_ctz((char)8) == 0x3); +static_assert(__builtin_elementwise_ctz((char)32) == 0x5); +static_assert(__builtin_elementwise_ctz((char)127) == 0x0); +static_assert(__builtin_elementwise_ctz((char)128) == 0x7); +static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_ctz((vector4char){8, 32, 127, (char)128})) == (LITTLE_END ? 0x07000503 : 0x03050007)); + +constexpr int ctz0 = __builtin_elementwise_ctz(0); +// expected-error@-1 {{must be initialized by a constant expression}} +constexpr vector4char ctz1 = __builtin_elementwise_ctz((vector4char){1, 0, 3, 4}); +// expected-error@-1 {{must be initialized by a constant expression}} +static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_ctz((vector4char){8, 0, 127, 0}, (vector4char){9, -1, 9, -2})) == (LITTLE_END ? 0xFE00FF03 : 0x03FF00FE)); >From 7369848f5922c7ecdb9d30ed4d6c670493bd7670 Mon Sep 17 00:00:00 2001 From: Fraser Cormack <fra...@codeplay.com> Date: Wed, 19 Mar 2025 14:01:58 +0000 Subject: [PATCH 3/6] update docs --- clang/docs/LanguageExtensions.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index c938fc64c129b..fe0567ee73c78 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -756,7 +756,8 @@ Unless specified otherwise operation(±0) = ±0 and operation(±infinity) = ±in The integer elementwise intrinsics, including ``__builtin_elementwise_popcount``, ``__builtin_elementwise_bitreverse``, ``__builtin_elementwise_add_sat``, -``__builtin_elementwise_sub_sat`` can be called in a ``constexpr`` context. +``__builtin_elementwise_sub_sat``, ``__builtin_elementwise_clz``, +``__builtin_elementwise_ctz`` can be called in a ``constexpr`` context. No implicit promotion of integer types takes place. The mixing of integer types of different sizes and signs is forbidden in binary and ternary builtins. >From 055b73e863a079647095b37949c8c71826530522 Mon Sep 17 00:00:00 2001 From: Fraser Cormack <fra...@codeplay.com> Date: Tue, 15 Apr 2025 10:35:54 +0100 Subject: [PATCH 4/6] docs: result is undefined; not behaviour --- clang/docs/LanguageExtensions.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index fe0567ee73c78..96adf955e4a5f 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -852,11 +852,11 @@ of different sizes and signs is forbidden in binary and ternary builtins. T __builtin_elementwise_clz(T x[, T y]) return the number of leading 0 bits in the first argument. If integer types the first argument is 0 and an optional second argument is provided, the second argument is returned. If the first argument is 0 but only - one argument is provided, the behaviour is undefined. + one argument is provided, the result is undefined. T __builtin_elementwise_ctz(T x[, T y]) return the number of trailing 0 bits in the first argument. If integer types the first argument is 0 and an optional second argument is provided, the second argument is returned. If the first argument is 0 but only - one argument is provided, the behaviour is undefined. + one argument is provided, the result is undefined. ============================================== ====================================================================== ========================================= >From 2218d319990eaa04ea68080c9e005c079834bc81 Mon Sep 17 00:00:00 2001 From: Fraser Cormack <fra...@codeplay.com> Date: Wed, 16 Jul 2025 12:11:24 +0100 Subject: [PATCH 5/6] add support for new constexpr interpreter --- .../include/clang/Basic/DiagnosticASTKinds.td | 3 + clang/lib/AST/ByteCode/InterpBuiltin.cpp | 92 +++++++++++++++++++ clang/lib/AST/ExprConstant.cpp | 15 ++- clang/test/Sema/constant-builtins-vector.cpp | 12 ++- 4 files changed, 117 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticASTKinds.td b/clang/include/clang/Basic/DiagnosticASTKinds.td index a67b9995d3b54..39b30e4ea456e 100644 --- a/clang/include/clang/Basic/DiagnosticASTKinds.td +++ b/clang/include/clang/Basic/DiagnosticASTKinds.td @@ -396,6 +396,9 @@ def note_constexpr_non_const_vectorelements : Note< "cannot determine number of elements for sizeless vectors in a constant expression">; def note_constexpr_assumption_failed : Note< "assumption evaluated to false">; +def note_constexpr_countzeroes_zero : Note< + "evaluation of %select{__builtin_elementwise_clz|__builtin_elementwise_ctz}0 " + "with a zero value is undefined">; def err_experimental_clang_interp_failed : Error< "the experimental clang interpreter failed to evaluate an expression">; diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index de0b97fd93c76..06c53f17a7065 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -1737,6 +1737,93 @@ static bool interp__builtin_elementwise_popcount(InterpState &S, CodePtr OpPC, return true; } +/// Can be called with an integer or vector as the first and only parameter. +static bool interp__builtin_elementwise_countzeroes(InterpState &S, + CodePtr OpPC, + const InterpFrame *Frame, + const CallExpr *Call, + unsigned BuiltinID) { + const bool HasZeroArg = Call->getNumArgs() == 2; + const bool IsCTZ = BuiltinID == Builtin::BI__builtin_elementwise_ctz; + assert(Call->getNumArgs() == 1 || HasZeroArg); + if (Call->getArg(0)->getType()->isIntegerType()) { + PrimType ArgT = *S.getContext().classify(Call->getArg(0)->getType()); + APSInt Val = popToAPSInt(S.Stk, ArgT); + std::optional<APSInt> ZeroVal; + if (HasZeroArg) { + ZeroVal = Val; + Val = popToAPSInt(S.Stk, ArgT); + } + + if (Val.isZero()) { + if (ZeroVal) { + pushInteger(S, *ZeroVal, Call->getType()); + return true; + } + // If we haven't been provided the second argument, the result is + // undefined + S.FFDiag(S.Current->getSource(OpPC), + diag::note_constexpr_countzeroes_zero) + << /*IsTrailing=*/IsCTZ; + return false; + } + + if (BuiltinID == Builtin::BI__builtin_elementwise_clz) { + pushInteger(S, Val.countLeadingZeros(), Call->getType()); + } else { + pushInteger(S, Val.countTrailingZeros(), Call->getType()); + } + return true; + } + // Otherwise, the argument must be a vector. + const ASTContext &ASTCtx = S.getASTContext(); + Pointer ZeroArg; + if (HasZeroArg) { + assert(Call->getArg(1)->getType()->isVectorType() && + ASTCtx.hasSameUnqualifiedType(Call->getArg(0)->getType(), + Call->getArg(1)->getType())); + ZeroArg = S.Stk.pop<Pointer>(); + assert(ZeroArg.getFieldDesc()->isPrimitiveArray()); + } + assert(Call->getArg(0)->getType()->isVectorType()); + const Pointer &Arg = S.Stk.pop<Pointer>(); + assert(Arg.getFieldDesc()->isPrimitiveArray()); + const Pointer &Dst = S.Stk.peek<Pointer>(); + assert(Dst.getFieldDesc()->isPrimitiveArray()); + assert(Arg.getFieldDesc()->getNumElems() == + Dst.getFieldDesc()->getNumElems()); + + QualType ElemType = Arg.getFieldDesc()->getElemQualType(); + PrimType ElemT = *S.getContext().classify(ElemType); + unsigned NumElems = Arg.getNumElems(); + + // FIXME: Reading from uninitialized vector elements? + for (unsigned I = 0; I != NumElems; ++I) { + INT_TYPE_SWITCH_NO_BOOL(ElemT, { + APInt EltVal = Arg.atIndex(I).deref<T>().toAPSInt(); + if (EltVal.isZero()) { + if (HasZeroArg) { + Dst.atIndex(I).deref<T>() = ZeroArg.atIndex(I).deref<T>(); + } else { + // If we haven't been provided the second argument, the result is + // undefined + S.FFDiag(S.Current->getSource(OpPC), + diag::note_constexpr_countzeroes_zero) + << /*IsTrailing=*/IsCTZ; + return false; + } + } else if (IsCTZ) { + Dst.atIndex(I).deref<T>() = T::from(EltVal.countTrailingZeros()); + } else { + Dst.atIndex(I).deref<T>() = T::from(EltVal.countLeadingZeros()); + } + Dst.atIndex(I).initialize(); + }); + } + + return true; +} + static bool interp__builtin_memcpy(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, const CallExpr *Call, unsigned ID) { @@ -2600,6 +2687,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case Builtin::BI__builtin_ctzg: return interp__builtin_ctz(S, OpPC, Frame, Call, BuiltinID); + case Builtin::BI__builtin_elementwise_clz: + case Builtin::BI__builtin_elementwise_ctz: + return interp__builtin_elementwise_countzeroes(S, OpPC, Frame, Call, + BuiltinID); + case Builtin::BI__builtin_bswap16: case Builtin::BI__builtin_bswap32: case Builtin::BI__builtin_bswap64: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 645905528cb79..a503752860e66 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11573,8 +11573,12 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { APSInt LHS = SourceLHS.getVectorElt(EltNum).getInt(); if (!LHS) { // Without a fallback, a zero element is undefined - if (!Fallback) + if (!Fallback) { + Info.FFDiag(E, diag::note_constexpr_countzeroes_zero) + << /*IsTrailing=*/(E->getBuiltinCallee() == + Builtin::BI__builtin_elementwise_ctz); return false; + } ResultElements.push_back(Fallback->getVectorElt(EltNum)); continue; } @@ -13175,6 +13179,11 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, BuiltinOp != Builtin::BI__lzcnt && BuiltinOp != Builtin::BI__lzcnt64; + if (BuiltinOp == Builtin::BI__builtin_elementwise_clz) { + Info.FFDiag(E, diag::note_constexpr_countzeroes_zero) + << /*IsTrailing=*/false; + } + if (ZeroIsUndefined) return Error(E); } @@ -13249,6 +13258,10 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, if (Fallback) return Success(*Fallback, E); + if (BuiltinOp == Builtin::BI__builtin_elementwise_ctz) { + Info.FFDiag(E, diag::note_constexpr_countzeroes_zero) + << /*IsTrailing=*/true; + } return Error(E); } diff --git a/clang/test/Sema/constant-builtins-vector.cpp b/clang/test/Sema/constant-builtins-vector.cpp index 3d988cc655099..50b6959cefa4a 100644 --- a/clang/test/Sema/constant-builtins-vector.cpp +++ b/clang/test/Sema/constant-builtins-vector.cpp @@ -873,9 +873,11 @@ static_assert(__builtin_elementwise_clz((char)128) == 0x0); static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_clz((vector4char){1, 4, 127, (char)128})) == (LITTLE_END ? 0x00010507 : 0x07050100)); constexpr int clz0 = __builtin_elementwise_clz(0); -// expected-error@-1 {{must be initialized by a constant expression}} +// expected-error@-1 {{must be initialized by a constant expression}} \ +// expected-note@-1 {{evaluation of __builtin_elementwise_clz with a zero value is undefined}} constexpr vector4char clz1 = __builtin_elementwise_clz((vector4char){1, 0, 3, 4}); -// expected-error@-1 {{must be initialized by a constant expression}} +// expected-error@-1 {{must be initialized by a constant expression}} \ +// expected-note@-1 {{evaluation of __builtin_elementwise_clz with a zero value is undefined}} static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_clz((vector4char){1, 0, 127, 0}, (vector4char){9, -1, 9, -2})) == (LITTLE_END ? 0xFE01FF07 : 0x07FF01FE)); static_assert(__builtin_elementwise_ctz(2) == 1); @@ -890,7 +892,9 @@ static_assert(__builtin_elementwise_ctz((char)128) == 0x7); static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_ctz((vector4char){8, 32, 127, (char)128})) == (LITTLE_END ? 0x07000503 : 0x03050007)); constexpr int ctz0 = __builtin_elementwise_ctz(0); -// expected-error@-1 {{must be initialized by a constant expression}} +// expected-error@-1 {{must be initialized by a constant expression}} \ +// expected-note@-1 {{evaluation of __builtin_elementwise_ctz with a zero value is undefined}} constexpr vector4char ctz1 = __builtin_elementwise_ctz((vector4char){1, 0, 3, 4}); -// expected-error@-1 {{must be initialized by a constant expression}} +// expected-error@-1 {{must be initialized by a constant expression}} \ +// expected-note@-1 {{evaluation of __builtin_elementwise_ctz with a zero value is undefined}} static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_ctz((vector4char){8, 0, 127, 0}, (vector4char){9, -1, 9, -2})) == (LITTLE_END ? 0xFE00FF03 : 0x03FF00FE)); >From 017971c8c252bd78dba20b5d14d6996e47173e97 Mon Sep 17 00:00:00 2001 From: Fraser Cormack <fra...@codeplay.com> Date: Wed, 16 Jul 2025 16:46:47 +0100 Subject: [PATCH 6/6] change ub wording --- clang/docs/LanguageExtensions.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 96adf955e4a5f..31138d4905212 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -851,12 +851,12 @@ of different sizes and signs is forbidden in binary and ternary builtins. for the comparison. T __builtin_elementwise_clz(T x[, T y]) return the number of leading 0 bits in the first argument. If integer types the first argument is 0 and an optional second argument is provided, - the second argument is returned. If the first argument is 0 but only - one argument is provided, the result is undefined. + the second argument is returned. It is undefined behaviour if the + first argument is 0 and no second argument is provided. T __builtin_elementwise_ctz(T x[, T y]) return the number of trailing 0 bits in the first argument. If integer types the first argument is 0 and an optional second argument is provided, - the second argument is returned. If the first argument is 0 but only - one argument is provided, the result is undefined. + the second argument is returned. It is undefined behaviour if the + first argument is 0 and no second argument is provided. ============================================== ====================================================================== ========================================= _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits