https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/154203
>From c2f86a3591a4e44e875cd00967ed8679876de287 Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Mon, 18 Aug 2025 15:59:46 -0500 Subject: [PATCH] [Clang] Support generic bit counting builtins on fixed boolean vectors Summary: Boolean vectors as implemented in clang can be bit-casted to an integer that is rounded up to the next primitive sized integer. Users can do this themselves, but since the counting bits are very likely to be used with bitmasks like this and the generic forms are expected to be generic it seems reasonable that we handle this case directly. --- clang/docs/LanguageExtensions.rst | 4 +- clang/docs/ReleaseNotes.rst | 4 +- clang/lib/AST/ExprConstant.cpp | 12 + clang/lib/CodeGen/CGBuiltin.cpp | 24 +- clang/lib/Sema/SemaChecking.cpp | 4 +- clang/test/CodeGen/builtins.c | 423 ++++++++++++++++-------------- 6 files changed, 271 insertions(+), 200 deletions(-) diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 12ca4cf42f7cc..e26a1ca58b20f 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -4131,7 +4131,7 @@ builtin, the mangler emits their usual pattern without any special treatment. ----------------------- ``__builtin_popcountg`` returns the number of 1 bits in the argument. The -argument can be of any unsigned integer type. +argument can be of any unsigned integer type or fixed boolean vector. **Syntax**: @@ -4163,7 +4163,7 @@ such as ``unsigned __int128`` and C23 ``unsigned _BitInt(N)``. ``__builtin_clzg`` (respectively ``__builtin_ctzg``) returns the number of leading (respectively trailing) 0 bits in the first argument. The first argument -can be of any unsigned integer type. +can be of any unsigned integer type or fixed boolean vector. If the first argument is 0 and an optional second argument of ``int`` type is provided, then the second argument is returned. If the first argument is 0, but diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index e88d68fa99664..9d82309b4484c 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -140,10 +140,12 @@ Non-comprehensive list of changes in this release - A vector of booleans is now a valid condition for the ternary ``?:`` operator. This binds to a simple vector select operation. +- The ``__builtin_popcountg``, ``__builtin_ctzg``, and ``__builtin_clzg`` + functions now accept fixed-size boolean vectors. + - Use of ``__has_feature`` to detect the ``ptrauth_qualifier`` and ``ptrauth_intrinsics`` features has been deprecated, and is restricted to the arm64e target only. The correct method to check for these features is to test for the ``__PTRAUTH__`` - macro. New Compiler Flags ------------------ diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 9c87a88899647..9d70e43685438 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13433,6 +13433,10 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, case Builtin::BI__lzcnt16: // Microsoft variants of count leading-zeroes case Builtin::BI__lzcnt: case Builtin::BI__lzcnt64: { + // TODO: Handle boolean vectors in constexpr contexts. + if (E->getArg(0)->getType()->isExtVectorBoolType()) + return false; + APSInt Val; if (!EvaluateInteger(E->getArg(0), Val, Info)) return false; @@ -13519,6 +13523,10 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, case Builtin::BI__builtin_ctzs: case Builtin::BI__builtin_ctzg: case Builtin::BI__builtin_elementwise_cttz: { + // TODO: Handle boolean vectors in constexpr contexts. + if (E->getArg(0)->getType()->isExtVectorBoolType()) + return false; + APSInt Val; if (!EvaluateInteger(E->getArg(0), Val, Info)) return false; @@ -13735,6 +13743,10 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, case Builtin::BI__popcnt16: // Microsoft variants of popcount case Builtin::BI__popcnt: case Builtin::BI__popcnt64: { + // TODO: Handle boolean vectors in constexpr contexts. + if (E->getArg(0)->getType()->isExtVectorBoolType()) + return false; + APSInt Val; if (!EvaluateInteger(E->getArg(0), Val, Info)) return false; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 59414fe466704..b92c98b9f5ce2 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1693,6 +1693,22 @@ getBitTestAtomicOrdering(BitTest::InterlockingKind I) { llvm_unreachable("invalid interlocking"); } +static llvm::Value *EmitIntegerExpr(CodeGenFunction &CGF, const Expr *E) { + llvm::Value *ArgValue = CGF.EmitScalarExpr(E); + llvm::Type *ArgType = ArgValue->getType(); + + if (auto *VT = dyn_cast<llvm::FixedVectorType>(ArgType); + VT && VT->getElementType()->isIntegerTy(1)) { + llvm::Type *StorageType = CGF.ConvertTypeForMem(E->getType()); + ArgValue = CGF.emitBoolVecConversion( + ArgValue, StorageType->getPrimitiveSizeInBits(), "insertvec"); + ArgValue = CGF.Builder.CreateBitCast(ArgValue, StorageType); + ArgType = ArgValue->getType(); + } + + return ArgValue; +} + /// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of /// bits and a bit position and read and optionally modify the bit at that /// position. The position index can be arbitrarily large, i.e. it can be larger @@ -2020,7 +2036,7 @@ Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E, assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) && "Unsupported builtin check kind"); - Value *ArgValue = EmitScalarExpr(E); + Value *ArgValue = EmitIntegerExpr(*this, E); if (!SanOpts.has(SanitizerKind::Builtin)) return ArgValue; @@ -3334,7 +3350,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, E->getNumArgs() > 1; Value *ArgValue = - HasFallback ? EmitScalarExpr(E->getArg(0)) + HasFallback ? EmitIntegerExpr(*this, E->getArg(0)) : EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero); llvm::Type *ArgType = ArgValue->getType(); @@ -3371,7 +3387,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, E->getNumArgs() > 1; Value *ArgValue = - HasFallback ? EmitScalarExpr(E->getArg(0)) + HasFallback ? EmitIntegerExpr(*this, E->getArg(0)) : EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero); llvm::Type *ArgType = ArgValue->getType(); @@ -3456,7 +3472,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_popcountl: case Builtin::BI__builtin_popcountll: case Builtin::BI__builtin_popcountg: { - Value *ArgValue = EmitScalarExpr(E->getArg(0)); + Value *ArgValue = EmitIntegerExpr(*this, E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index c21c40e707008..fcee7a8220988 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2214,7 +2214,7 @@ static bool BuiltinPopcountg(Sema &S, CallExpr *TheCall) { QualType ArgTy = Arg->getType(); - if (!ArgTy->isUnsignedIntegerType()) { + if (!ArgTy->isUnsignedIntegerType() && !ArgTy->isExtVectorBoolType()) { S.Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type) << 1 << /* scalar */ 1 << /* unsigned integer ty */ 3 << /* no fp */ 0 << ArgTy; @@ -2239,7 +2239,7 @@ static bool BuiltinCountZeroBitsGeneric(Sema &S, CallExpr *TheCall) { QualType Arg0Ty = Arg0->getType(); - if (!Arg0Ty->isUnsignedIntegerType()) { + if (!Arg0Ty->isUnsignedIntegerType() && !Arg0Ty->isExtVectorBoolType()) { S.Diag(Arg0->getBeginLoc(), diag::err_builtin_invalid_arg_type) << 1 << /* scalar */ 1 << /* unsigned integer ty */ 3 << /* no fp */ 0 << Arg0Ty; diff --git a/clang/test/CodeGen/builtins.c b/clang/test/CodeGen/builtins.c index aa9965b815983..7ad143ed165c8 100644 --- a/clang/test/CodeGen/builtins.c +++ b/clang/test/CodeGen/builtins.c @@ -991,247 +991,288 @@ void test_builtin_os_log_long_double(void *buf, long double ld) { void test_builtin_popcountg(unsigned char uc, unsigned short us, unsigned int ui, unsigned long ul, unsigned long long ull, unsigned __int128 ui128, - unsigned _BitInt(128) ubi128) { + unsigned _BitInt(128) ubi128, + _Bool __attribute__((ext_vector_type(8))) vb8) { volatile int pop; - pop = __builtin_popcountg(uc); - // CHECK: %1 = load i8, ptr %uc.addr, align 1 - // CHECK-NEXT: %2 = call i8 @llvm.ctpop.i8(i8 %1) - // CHECK-NEXT: %cast = zext i8 %2 to i32 + // CHECK: %2 = load i8, ptr %uc.addr, align 1 + // CHECK-NEXT: %3 = call i8 @llvm.ctpop.i8(i8 %2) + // CHECK-NEXT: %cast = zext i8 %3 to i32 // CHECK-NEXT: store volatile i32 %cast, ptr %pop, align 4 + pop = __builtin_popcountg(uc); + // CHECK: %4 = load i16, ptr %us.addr, align 2 + // CHECK-NEXT: %5 = call i16 @llvm.ctpop.i16(i16 %4) + // CHECK-NEXT: %cast2 = zext i16 %5 to i32 + // CHECK-NEXT: store volatile i32 %cast2, ptr %pop, align 4 pop = __builtin_popcountg(us); - // CHECK-NEXT: %3 = load i16, ptr %us.addr, align 2 - // CHECK-NEXT: %4 = call i16 @llvm.ctpop.i16(i16 %3) - // CHECK-NEXT: %cast1 = zext i16 %4 to i32 - // CHECK-NEXT: store volatile i32 %cast1, ptr %pop, align 4 + // CHECK: %6 = load i32, ptr %ui.addr, align 4 + // CHECK-NEXT: %7 = call i32 @llvm.ctpop.i32(i32 %6) + // CHECK-NEXT: store volatile i32 %7, ptr %pop, align 4 pop = __builtin_popcountg(ui); - // CHECK-NEXT: %5 = load i32, ptr %ui.addr, align 4 - // CHECK-NEXT: %6 = call i32 @llvm.ctpop.i32(i32 %5) - // CHECK-NEXT: store volatile i32 %6, ptr %pop, align 4 + // CHECK: %8 = load i64, ptr %ul.addr, align 8 + // CHECK-NEXT: %9 = call i64 @llvm.ctpop.i64(i64 %8) + // CHECK-NEXT: %cast3 = trunc i64 %9 to i32 + // CHECK-NEXT: store volatile i32 %cast3, ptr %pop, align 4 pop = __builtin_popcountg(ul); - // CHECK-NEXT: %7 = load i64, ptr %ul.addr, align 8 - // CHECK-NEXT: %8 = call i64 @llvm.ctpop.i64(i64 %7) - // CHECK-NEXT: %cast2 = trunc i64 %8 to i32 - // CHECK-NEXT: store volatile i32 %cast2, ptr %pop, align 4 + // CHECK: %10 = load i64, ptr %ull.addr, align 8 + // CHECK-NEXT: %11 = call i64 @llvm.ctpop.i64(i64 %10) + // CHECK-NEXT: %cast4 = trunc i64 %11 to i32 + // CHECK-NEXT: store volatile i32 %cast4, ptr %pop, align 4 pop = __builtin_popcountg(ull); - // CHECK-NEXT: %9 = load i64, ptr %ull.addr, align 8 - // CHECK-NEXT: %10 = call i64 @llvm.ctpop.i64(i64 %9) - // CHECK-NEXT: %cast3 = trunc i64 %10 to i32 - // CHECK-NEXT: store volatile i32 %cast3, ptr %pop, align 4 + // CHECK: %12 = load i128, ptr %ui128.addr, align 16 + // CHECK-NEXT: %13 = call i128 @llvm.ctpop.i128(i128 %12) + // CHECK-NEXT: %cast5 = trunc i128 %13 to i32 + // CHECK-NEXT: store volatile i32 %cast5, ptr %pop, align 4 pop = __builtin_popcountg(ui128); - // CHECK-NEXT: %11 = load i128, ptr %ui128.addr, align 16 - // CHECK-NEXT: %12 = call i128 @llvm.ctpop.i128(i128 %11) - // CHECK-NEXT: %cast4 = trunc i128 %12 to i32 - // CHECK-NEXT: store volatile i32 %cast4, ptr %pop, align 4 + // CHECK: %14 = load i128, ptr %ubi128.addr, align 8 + // CHECK-NEXT: %15 = call i128 @llvm.ctpop.i128(i128 %14) + // CHECK-NEXT: %cast6 = trunc i128 %15 to i32 + // CHECK-NEXT: store volatile i32 %cast6, ptr %pop, align 4 pop = __builtin_popcountg(ubi128); - // CHECK-NEXT: %13 = load i128, ptr %ubi128.addr, align 8 - // CHECK-NEXT: %14 = call i128 @llvm.ctpop.i128(i128 %13) - // CHECK-NEXT: %cast5 = trunc i128 %14 to i32 - // CHECK-NEXT: store volatile i32 %cast5, ptr %pop, align 4 - // CHECK-NEXT: ret void + // CHECK: %load_bits7 = load i8, ptr %vb8.addr, align 1 + // CHECK-NEXT: %16 = bitcast i8 %load_bits7 to <8 x i1> + // CHECK-NEXT: %17 = bitcast <8 x i1> %16 to i8 + // CHECK-NEXT: %18 = call i8 @llvm.ctpop.i8(i8 %17) + // CHECK-NEXT: %cast8 = zext i8 %18 to i32 + // CHECK-NEXT: store volatile i32 %cast8, ptr %pop, align 4 + pop = __builtin_popcountg(vb8); } // CHECK-LABEL: define{{.*}} void @test_builtin_clzg void test_builtin_clzg(unsigned char uc, unsigned short us, unsigned int ui, unsigned long ul, unsigned long long ull, unsigned __int128 ui128, unsigned _BitInt(128) ubi128, - signed char sc, short s, int i) { + signed char sc, short s, int i, + _Bool __attribute__((ext_vector_type(8))) vb8) { volatile int lz; + // CHECK: %2 = load i8, ptr %uc.addr, align 1 + // CHECK-NEXT: %3 = call i8 @llvm.ctlz.i8(i8 %2, i1 true) + // CHECK-NEXT: %cast = zext i8 %3 to i32 + // CHECK-NEXT: store volatile i32 %cast, ptr %lz, align 4 lz = __builtin_clzg(uc); - // CHECK: %1 = load i8, ptr %uc.addr, align 1 - // CHECK-NEXT: %2 = call i8 @llvm.ctlz.i8(i8 %1, i1 true) - // CHECK-NEXT: %cast = zext i8 %2 to i32 - // CHECK-NEXT: store volatile i32 %cast, ptr %lz, align 4 + // CHECK-NEXT: %4 = load i16, ptr %us.addr, align 2 + // CHECK-NEXT: %5 = call i16 @llvm.ctlz.i16(i16 %4, i1 true) + // CHECK-NEXT: %cast2 = zext i16 %5 to i32 + // CHECK-NEXT: store volatile i32 %cast2, ptr %lz, align 4 lz = __builtin_clzg(us); - // CHECK-NEXT: %3 = load i16, ptr %us.addr, align 2 - // CHECK-NEXT: %4 = call i16 @llvm.ctlz.i16(i16 %3, i1 true) - // CHECK-NEXT: %cast1 = zext i16 %4 to i32 - // CHECK-NEXT: store volatile i32 %cast1, ptr %lz, align 4 + // CHECK-NEXT: %6 = load i32, ptr %ui.addr, align 4 + // CHECK-NEXT: %7 = call i32 @llvm.ctlz.i32(i32 %6, i1 true) + // CHECK-NEXT: store volatile i32 %7, ptr %lz, align 4 lz = __builtin_clzg(ui); - // CHECK-NEXT: %5 = load i32, ptr %ui.addr, align 4 - // CHECK-NEXT: %6 = call i32 @llvm.ctlz.i32(i32 %5, i1 true) - // CHECK-NEXT: store volatile i32 %6, ptr %lz, align 4 + // CHECK-NEXT: %8 = load i64, ptr %ul.addr, align 8 + // CHECK-NEXT: %9 = call i64 @llvm.ctlz.i64(i64 %8, i1 true) + // CHECK-NEXT: %cast3 = trunc i64 %9 to i32 + // CHECK-NEXT: store volatile i32 %cast3, ptr %lz, align 4 lz = __builtin_clzg(ul); - // CHECK-NEXT: %7 = load i64, ptr %ul.addr, align 8 - // CHECK-NEXT: %8 = call i64 @llvm.ctlz.i64(i64 %7, i1 true) - // CHECK-NEXT: %cast2 = trunc i64 %8 to i32 - // CHECK-NEXT: store volatile i32 %cast2, ptr %lz, align 4 + // CHECK-NEXT: %10 = load i64, ptr %ull.addr, align 8 + // CHECK-NEXT: %11 = call i64 @llvm.ctlz.i64(i64 %10, i1 true) + // CHECK-NEXT: %cast4 = trunc i64 %11 to i32 + // CHECK-NEXT: store volatile i32 %cast4, ptr %lz, align 4 lz = __builtin_clzg(ull); - // CHECK-NEXT: %9 = load i64, ptr %ull.addr, align 8 - // CHECK-NEXT: %10 = call i64 @llvm.ctlz.i64(i64 %9, i1 true) - // CHECK-NEXT: %cast3 = trunc i64 %10 to i32 - // CHECK-NEXT: store volatile i32 %cast3, ptr %lz, align 4 + // CHECK-NEXT: %12 = load i128, ptr %ui128.addr, align 16 + // CHECK-NEXT: %13 = call i128 @llvm.ctlz.i128(i128 %12, i1 true) + // CHECK-NEXT: %cast5 = trunc i128 %13 to i32 + // CHECK-NEXT: store volatile i32 %cast5, ptr %lz, align 4 lz = __builtin_clzg(ui128); - // CHECK-NEXT: %11 = load i128, ptr %ui128.addr, align 16 - // CHECK-NEXT: %12 = call i128 @llvm.ctlz.i128(i128 %11, i1 true) - // CHECK-NEXT: %cast4 = trunc i128 %12 to i32 - // CHECK-NEXT: store volatile i32 %cast4, ptr %lz, align 4 + // CHECK-NEXT: %14 = load i128, ptr %ubi128.addr, align 8 + // CHECK-NEXT: %15 = call i128 @llvm.ctlz.i128(i128 %14, i1 true) + // CHECK-NEXT: %cast6 = trunc i128 %15 to i32 + // CHECK-NEXT: store volatile i32 %cast6, ptr %lz, align 4 lz = __builtin_clzg(ubi128); - // CHECK-NEXT: %13 = load i128, ptr %ubi128.addr, align 8 - // CHECK-NEXT: %14 = call i128 @llvm.ctlz.i128(i128 %13, i1 true) - // CHECK-NEXT: %cast5 = trunc i128 %14 to i32 - // CHECK-NEXT: store volatile i32 %cast5, ptr %lz, align 4 + // CHECK-NEXT: %load_bits7 = load i8, ptr %vb8.addr, align 1 + // CHECK-NEXT: %16 = bitcast i8 %load_bits7 to <8 x i1> + // CHECK-NEXT: %17 = bitcast <8 x i1> %16 to i8 + // CHECK-NEXT: %18 = call i8 @llvm.ctlz.i8(i8 %17, i1 true) + // CHECK-NEXT: %cast8 = zext i8 %18 to i32 + // CHECK-NEXT: store volatile i32 %cast8, ptr %lz, align 4 + lz = __builtin_clzg(vb8); + // CHECK-NEXT: %19 = load i8, ptr %uc.addr, align 1 + // CHECK-NEXT: %20 = call i8 @llvm.ctlz.i8(i8 %19, i1 true) + // CHECK-NEXT: %cast9 = zext i8 %20 to i32 + // CHECK-NEXT: %iszero = icmp eq i8 %19, 0 + // CHECK-NEXT: %21 = load i8, ptr %sc.addr, align 1 + // CHECK-NEXT: %conv = sext i8 %21 to i32 + // CHECK-NEXT: %clzg = select i1 %iszero, i32 %conv, i32 %cast9 + // CHECK-NEXT: store volatile i32 %clzg, ptr %lz, align 4 lz = __builtin_clzg(uc, sc); - // CHECK-NEXT: %15 = load i8, ptr %uc.addr, align 1 - // CHECK-NEXT: %16 = call i8 @llvm.ctlz.i8(i8 %15, i1 true) - // CHECK-NEXT: %cast6 = zext i8 %16 to i32 - // CHECK-NEXT: %iszero = icmp eq i8 %15, 0 - // CHECK-NEXT: %17 = load i8, ptr %sc.addr, align 1 - // CHECK-NEXT: %conv = sext i8 %17 to i32 - // CHECK-NEXT: %clzg = select i1 %iszero, i32 %conv, i32 %cast6 - // CHECK-NEXT: store volatile i32 %clzg, ptr %lz, align 4 + // CHECK-NEXT: %22 = load i16, ptr %us.addr, align 2 + // CHECK-NEXT: %23 = call i16 @llvm.ctlz.i16(i16 %22, i1 true) + // CHECK-NEXT: %cast10 = zext i16 %23 to i32 + // CHECK-NEXT: %iszero11 = icmp eq i16 %22, 0 + // CHECK-NEXT: %24 = load i8, ptr %uc.addr, align 1 + // CHECK-NEXT: %conv12 = zext i8 %24 to i32 + // CHECK-NEXT: %clzg13 = select i1 %iszero11, i32 %conv12, i32 %cast10 + // CHECK-NEXT: store volatile i32 %clzg13, ptr %lz, align 4 lz = __builtin_clzg(us, uc); - // CHECK-NEXT: %18 = load i16, ptr %us.addr, align 2 - // CHECK-NEXT: %19 = call i16 @llvm.ctlz.i16(i16 %18, i1 true) - // CHECK-NEXT: %cast7 = zext i16 %19 to i32 - // CHECK-NEXT: %iszero8 = icmp eq i16 %18, 0 - // CHECK-NEXT: %20 = load i8, ptr %uc.addr, align 1 - // CHECK-NEXT: %conv9 = zext i8 %20 to i32 - // CHECK-NEXT: %clzg10 = select i1 %iszero8, i32 %conv9, i32 %cast7 - // CHECK-NEXT: store volatile i32 %clzg10, ptr %lz, align 4 + // CHECK-NEXT: %25 = load i32, ptr %ui.addr, align 4 + // CHECK-NEXT: %26 = call i32 @llvm.ctlz.i32(i32 %25, i1 true) + // CHECK-NEXT: %iszero14 = icmp eq i32 %25, 0 + // CHECK-NEXT: %27 = load i16, ptr %s.addr, align 2 + // CHECK-NEXT: %conv15 = sext i16 %27 to i32 + // CHECK-NEXT: %clzg16 = select i1 %iszero14, i32 %conv15, i32 %26 + // CHECK-NEXT: store volatile i32 %clzg16, ptr %lz, align 4 lz = __builtin_clzg(ui, s); - // CHECK-NEXT: %21 = load i32, ptr %ui.addr, align 4 - // CHECK-NEXT: %22 = call i32 @llvm.ctlz.i32(i32 %21, i1 true) - // CHECK-NEXT: %iszero11 = icmp eq i32 %21, 0 - // CHECK-NEXT: %23 = load i16, ptr %s.addr, align 2 - // CHECK-NEXT: %conv12 = sext i16 %23 to i32 - // CHECK-NEXT: %clzg13 = select i1 %iszero11, i32 %conv12, i32 %22 - // CHECK-NEXT: store volatile i32 %clzg13, ptr %lz, align 4 + // CHECK-NEXT: %28 = load i64, ptr %ul.addr, align 8 + // CHECK-NEXT: %29 = call i64 @llvm.ctlz.i64(i64 %28, i1 true) + // CHECK-NEXT: %cast17 = trunc i64 %29 to i32 + // CHECK-NEXT: %iszero18 = icmp eq i64 %28, 0 + // CHECK-NEXT: %30 = load i16, ptr %us.addr, align 2 + // CHECK-NEXT: %conv19 = zext i16 %30 to i32 + // CHECK-NEXT: %clzg20 = select i1 %iszero18, i32 %conv19, i32 %cast17 + // CHECK-NEXT: store volatile i32 %clzg20, ptr %lz, align 4 lz = __builtin_clzg(ul, us); - // CHECK-NEXT: %24 = load i64, ptr %ul.addr, align 8 - // CHECK-NEXT: %25 = call i64 @llvm.ctlz.i64(i64 %24, i1 true) - // CHECK-NEXT: %cast14 = trunc i64 %25 to i32 - // CHECK-NEXT: %iszero15 = icmp eq i64 %24, 0 - // CHECK-NEXT: %26 = load i16, ptr %us.addr, align 2 - // CHECK-NEXT: %conv16 = zext i16 %26 to i32 - // CHECK-NEXT: %clzg17 = select i1 %iszero15, i32 %conv16, i32 %cast14 - // CHECK-NEXT: store volatile i32 %clzg17, ptr %lz, align 4 + // CHECK-NEXT: %31 = load i64, ptr %ull.addr, align 8 + // CHECK-NEXT: %32 = call i64 @llvm.ctlz.i64(i64 %31, i1 true) + // CHECK-NEXT: %cast21 = trunc i64 %32 to i32 + // CHECK-NEXT: %iszero22 = icmp eq i64 %31, 0 + // CHECK-NEXT: %33 = load i32, ptr %i.addr, align 4 + // CHECK-NEXT: %clzg23 = select i1 %iszero22, i32 %33, i32 %cast21 + // CHECK-NEXT: store volatile i32 %clzg23, ptr %lz, align 4 lz = __builtin_clzg(ull, i); - // CHECK-NEXT: %27 = load i64, ptr %ull.addr, align 8 - // CHECK-NEXT: %28 = call i64 @llvm.ctlz.i64(i64 %27, i1 true) - // CHECK-NEXT: %cast18 = trunc i64 %28 to i32 - // CHECK-NEXT: %iszero19 = icmp eq i64 %27, 0 - // CHECK-NEXT: %29 = load i32, ptr %i.addr, align 4 - // CHECK-NEXT: %clzg20 = select i1 %iszero19, i32 %29, i32 %cast18 - // CHECK-NEXT: store volatile i32 %clzg20, ptr %lz, align 4 + // CHECK-NEXT: %34 = load i128, ptr %ui128.addr, align 16 + // CHECK-NEXT: %35 = call i128 @llvm.ctlz.i128(i128 %34, i1 true) + // CHECK-NEXT: %cast24 = trunc i128 %35 to i32 + // CHECK-NEXT: %iszero25 = icmp eq i128 %34, 0 + // CHECK-NEXT: %36 = load i32, ptr %i.addr, align 4 + // CHECK-NEXT: %clzg26 = select i1 %iszero25, i32 %36, i32 %cast24 + // CHECK-NEXT: store volatile i32 %clzg26, ptr %lz, align 4 lz = __builtin_clzg(ui128, i); - // CHECK-NEXT: %30 = load i128, ptr %ui128.addr, align 16 - // CHECK-NEXT: %31 = call i128 @llvm.ctlz.i128(i128 %30, i1 true) - // CHECK-NEXT: %cast21 = trunc i128 %31 to i32 - // CHECK-NEXT: %iszero22 = icmp eq i128 %30, 0 - // CHECK-NEXT: %32 = load i32, ptr %i.addr, align 4 - // CHECK-NEXT: %clzg23 = select i1 %iszero22, i32 %32, i32 %cast21 - // CHECK-NEXT: store volatile i32 %clzg23, ptr %lz, align 4 + // CHECK-NEXT: %37 = load i128, ptr %ubi128.addr, align 8 + // CHECK-NEXT: %38 = call i128 @llvm.ctlz.i128(i128 %37, i1 true) + // CHECK-NEXT: %cast27 = trunc i128 %38 to i32 + // CHECK-NEXT: %iszero28 = icmp eq i128 %37, 0 + // CHECK-NEXT: %39 = load i32, ptr %i.addr, align 4 + // CHECK-NEXT: %clzg29 = select i1 %iszero28, i32 %39, i32 %cast27 + // CHECK-NEXT: store volatile i32 %clzg29, ptr %lz, align 4 lz = __builtin_clzg(ubi128, i); - // CHECK-NEXT: %33 = load i128, ptr %ubi128.addr, align 8 - // CHECK-NEXT: %34 = call i128 @llvm.ctlz.i128(i128 %33, i1 true) - // CHECK-NEXT: %cast24 = trunc i128 %34 to i32 - // CHECK-NEXT: %iszero25 = icmp eq i128 %33, 0 - // CHECK-NEXT: %35 = load i32, ptr %i.addr, align 4 - // CHECK-NEXT: %clzg26 = select i1 %iszero25, i32 %35, i32 %cast24 - // CHECK-NEXT: store volatile i32 %clzg26, ptr %lz, align 4 - // CHECK-NEXT: ret void + // CHECK-NEXT: %load_bits30 = load i8, ptr %vb8.addr, align 1 + // CHECK-NEXT: %40 = bitcast i8 %load_bits30 to <8 x i1> + // CHECK-NEXT: %41 = bitcast <8 x i1> %40 to i8 + // CHECK-NEXT: %42 = call i8 @llvm.ctlz.i8(i8 %41, i1 true) + // CHECK-NEXT: %cast31 = zext i8 %42 to i32 + // CHECK-NEXT: %iszero32 = icmp eq i8 %41, 0 + // CHECK-NEXT: %43 = load i32, ptr %i.addr, align 4 + // CHECK-NEXT: %clzg33 = select i1 %iszero32, i32 %43, i32 %cast31 + // CHECK-NEXT: store volatile i32 %clzg33, ptr %lz, align 4 + lz = __builtin_clzg(vb8, i); } // CHECK-LABEL: define{{.*}} void @test_builtin_ctzg void test_builtin_ctzg(unsigned char uc, unsigned short us, unsigned int ui, unsigned long ul, unsigned long long ull, unsigned __int128 ui128, unsigned _BitInt(128) ubi128, - signed char sc, short s, int i) { + signed char sc, short s, int i, + _Bool __attribute__((ext_vector_type(8))) vb8) { volatile int tz; - tz = __builtin_ctzg(uc); - // CHECK: %1 = load i8, ptr %uc.addr, align 1 - // CHECK-NEXT: %2 = call i8 @llvm.cttz.i8(i8 %1, i1 true) - // CHECK-NEXT: %cast = zext i8 %2 to i32 + // CHECK: %2 = load i8, ptr %uc.addr, align 1 + // CHECK-NEXT: %3 = call i8 @llvm.cttz.i8(i8 %2, i1 true) + // CHECK-NEXT: %cast = zext i8 %3 to i32 // CHECK-NEXT: store volatile i32 %cast, ptr %tz, align 4 + tz = __builtin_ctzg(uc); + // CHECK-NEXT: %4 = load i16, ptr %us.addr, align 2 + // CHECK-NEXT: %5 = call i16 @llvm.cttz.i16(i16 %4, i1 true) + // CHECK-NEXT: %cast2 = zext i16 %5 to i32 + // CHECK-NEXT: store volatile i32 %cast2, ptr %tz, align 4 tz = __builtin_ctzg(us); - // CHECK-NEXT: %3 = load i16, ptr %us.addr, align 2 - // CHECK-NEXT: %4 = call i16 @llvm.cttz.i16(i16 %3, i1 true) - // CHECK-NEXT: %cast1 = zext i16 %4 to i32 - // CHECK-NEXT: store volatile i32 %cast1, ptr %tz, align 4 + // CHECK-NEXT: %6 = load i32, ptr %ui.addr, align 4 + // CHECK-NEXT: %7 = call i32 @llvm.cttz.i32(i32 %6, i1 true) + // CHECK-NEXT: store volatile i32 %7, ptr %tz, align 4 tz = __builtin_ctzg(ui); - // CHECK-NEXT: %5 = load i32, ptr %ui.addr, align 4 - // CHECK-NEXT: %6 = call i32 @llvm.cttz.i32(i32 %5, i1 true) - // CHECK-NEXT: store volatile i32 %6, ptr %tz, align 4 + // CHECK-NEXT: %8 = load i64, ptr %ul.addr, align 8 + // CHECK-NEXT: %9 = call i64 @llvm.cttz.i64(i64 %8, i1 true) + // CHECK-NEXT: %cast3 = trunc i64 %9 to i32 + // CHECK-NEXT: store volatile i32 %cast3, ptr %tz, align 4 tz = __builtin_ctzg(ul); - // CHECK-NEXT: %7 = load i64, ptr %ul.addr, align 8 - // CHECK-NEXT: %8 = call i64 @llvm.cttz.i64(i64 %7, i1 true) - // CHECK-NEXT: %cast2 = trunc i64 %8 to i32 - // CHECK-NEXT: store volatile i32 %cast2, ptr %tz, align 4 + // CHECK-NEXT: %10 = load i64, ptr %ull.addr, align 8 + // CHECK-NEXT: %11 = call i64 @llvm.cttz.i64(i64 %10, i1 true) + // CHECK-NEXT: %cast4 = trunc i64 %11 to i32 + // CHECK-NEXT: store volatile i32 %cast4, ptr %tz, align 4 tz = __builtin_ctzg(ull); - // CHECK-NEXT: %9 = load i64, ptr %ull.addr, align 8 - // CHECK-NEXT: %10 = call i64 @llvm.cttz.i64(i64 %9, i1 true) - // CHECK-NEXT: %cast3 = trunc i64 %10 to i32 - // CHECK-NEXT: store volatile i32 %cast3, ptr %tz, align 4 + // CHECK-NEXT: %12 = load i128, ptr %ui128.addr, align 16 + // CHECK-NEXT: %13 = call i128 @llvm.cttz.i128(i128 %12, i1 true) + // CHECK-NEXT: %cast5 = trunc i128 %13 to i32 + // CHECK-NEXT: store volatile i32 %cast5, ptr %tz, align 4 tz = __builtin_ctzg(ui128); - // CHECK-NEXT: %11 = load i128, ptr %ui128.addr, align 16 - // CHECK-NEXT: %12 = call i128 @llvm.cttz.i128(i128 %11, i1 true) - // CHECK-NEXT: %cast4 = trunc i128 %12 to i32 - // CHECK-NEXT: store volatile i32 %cast4, ptr %tz, align 4 + // CHECK-NEXT: %14 = load i128, ptr %ubi128.addr, align 8 + // CHECK-NEXT: %15 = call i128 @llvm.cttz.i128(i128 %14, i1 true) + // CHECK-NEXT: %cast6 = trunc i128 %15 to i32 + // CHECK-NEXT: store volatile i32 %cast6, ptr %tz, align 4 tz = __builtin_ctzg(ubi128); - // CHECK-NEXT: %13 = load i128, ptr %ubi128.addr, align 8 - // CHECK-NEXT: %14 = call i128 @llvm.cttz.i128(i128 %13, i1 true) - // CHECK-NEXT: %cast5 = trunc i128 %14 to i32 - // CHECK-NEXT: store volatile i32 %cast5, ptr %tz, align 4 - tz = __builtin_ctzg(uc, sc); - // CHECK-NEXT: %15 = load i8, ptr %uc.addr, align 1 - // CHECK-NEXT: %16 = call i8 @llvm.cttz.i8(i8 %15, i1 true) - // CHECK-NEXT: %cast6 = zext i8 %16 to i32 - // CHECK-NEXT: %iszero = icmp eq i8 %15, 0 - // CHECK-NEXT: %17 = load i8, ptr %sc.addr, align 1 - // CHECK-NEXT: %conv = sext i8 %17 to i32 - // CHECK-NEXT: %ctzg = select i1 %iszero, i32 %conv, i32 %cast6 + // CHECK-NEXT: %load_bits7 = load i8, ptr %vb8.addr, align 1 + // CHECK-NEXT: %16 = bitcast i8 %load_bits7 to <8 x i1> + // CHECK-NEXT: %17 = bitcast <8 x i1> %16 to i8 + // CHECK-NEXT: %18 = call i8 @llvm.cttz.i8(i8 %17, i1 true) + // CHECK-NEXT: %cast8 = zext i8 %18 to i32 + // CHECK-NEXT: store volatile i32 %cast8, ptr %tz, align 4 + tz = __builtin_ctzg(vb8); + // CHECK-NEXT: %19 = load i8, ptr %uc.addr, align 1 + // CHECK-NEXT: %20 = call i8 @llvm.cttz.i8(i8 %19, i1 true) + // CHECK-NEXT: %cast9 = zext i8 %20 to i32 + // CHECK-NEXT: %iszero = icmp eq i8 %19, 0 + // CHECK-NEXT: %21 = load i8, ptr %sc.addr, align 1 + // CHECK-NEXT: %conv = sext i8 %21 to i32 + // CHECK-NEXT: %ctzg = select i1 %iszero, i32 %conv, i32 %cast9 // CHECK-NEXT: store volatile i32 %ctzg, ptr %tz, align 4 + tz = __builtin_ctzg(uc, sc); + // CHECK-NEXT: %22 = load i16, ptr %us.addr, align 2 + // CHECK-NEXT: %23 = call i16 @llvm.cttz.i16(i16 %22, i1 true) + // CHECK-NEXT: %cast10 = zext i16 %23 to i32 + // CHECK-NEXT: %iszero11 = icmp eq i16 %22, 0 + // CHECK-NEXT: %24 = load i8, ptr %uc.addr, align 1 + // CHECK-NEXT: %conv12 = zext i8 %24 to i32 + // CHECK-NEXT: %ctzg13 = select i1 %iszero11, i32 %conv12, i32 %cast10 + // CHECK-NEXT: store volatile i32 %ctzg13, ptr %tz, align 4 tz = __builtin_ctzg(us, uc); - // CHECK-NEXT: %18 = load i16, ptr %us.addr, align 2 - // CHECK-NEXT: %19 = call i16 @llvm.cttz.i16(i16 %18, i1 true) - // CHECK-NEXT: %cast7 = zext i16 %19 to i32 - // CHECK-NEXT: %iszero8 = icmp eq i16 %18, 0 - // CHECK-NEXT: %20 = load i8, ptr %uc.addr, align 1 - // CHECK-NEXT: %conv9 = zext i8 %20 to i32 - // CHECK-NEXT: %ctzg10 = select i1 %iszero8, i32 %conv9, i32 %cast7 - // CHECK-NEXT: store volatile i32 %ctzg10, ptr %tz, align 4 + // CHECK-NEXT: %25 = load i32, ptr %ui.addr, align 4 + // CHECK-NEXT: %26 = call i32 @llvm.cttz.i32(i32 %25, i1 true) + // CHECK-NEXT: %iszero14 = icmp eq i32 %25, 0 + // CHECK-NEXT: %27 = load i16, ptr %s.addr, align 2 + // CHECK-NEXT: %conv15 = sext i16 %27 to i32 + // CHECK-NEXT: %ctzg16 = select i1 %iszero14, i32 %conv15, i32 %26 + // CHECK-NEXT: store volatile i32 %ctzg16, ptr %tz, align 4 tz = __builtin_ctzg(ui, s); - // CHECK-NEXT: %21 = load i32, ptr %ui.addr, align 4 - // CHECK-NEXT: %22 = call i32 @llvm.cttz.i32(i32 %21, i1 true) - // CHECK-NEXT: %iszero11 = icmp eq i32 %21, 0 - // CHECK-NEXT: %23 = load i16, ptr %s.addr, align 2 - // CHECK-NEXT: %conv12 = sext i16 %23 to i32 - // CHECK-NEXT: %ctzg13 = select i1 %iszero11, i32 %conv12, i32 %22 - // CHECK-NEXT: store volatile i32 %ctzg13, ptr %tz, align 4 + // CHECK-NEXT: %28 = load i64, ptr %ul.addr, align 8 + // CHECK-NEXT: %29 = call i64 @llvm.cttz.i64(i64 %28, i1 true) + // CHECK-NEXT: %cast17 = trunc i64 %29 to i32 + // CHECK-NEXT: %iszero18 = icmp eq i64 %28, 0 + // CHECK-NEXT: %30 = load i16, ptr %us.addr, align 2 + // CHECK-NEXT: %conv19 = zext i16 %30 to i32 + // CHECK-NEXT: %ctzg20 = select i1 %iszero18, i32 %conv19, i32 %cast17 + // CHECK-NEXT: store volatile i32 %ctzg20, ptr %tz, align 4 tz = __builtin_ctzg(ul, us); - // CHECK-NEXT: %24 = load i64, ptr %ul.addr, align 8 - // CHECK-NEXT: %25 = call i64 @llvm.cttz.i64(i64 %24, i1 true) - // CHECK-NEXT: %cast14 = trunc i64 %25 to i32 - // CHECK-NEXT: %iszero15 = icmp eq i64 %24, 0 - // CHECK-NEXT: %26 = load i16, ptr %us.addr, align 2 - // CHECK-NEXT: %conv16 = zext i16 %26 to i32 - // CHECK-NEXT: %ctzg17 = select i1 %iszero15, i32 %conv16, i32 %cast14 - // CHECK-NEXT: store volatile i32 %ctzg17, ptr %tz, align 4 + // CHECK-NEXT: %31 = load i64, ptr %ull.addr, align 8 + // CHECK-NEXT: %32 = call i64 @llvm.cttz.i64(i64 %31, i1 true) + // CHECK-NEXT: %cast21 = trunc i64 %32 to i32 + // CHECK-NEXT: %iszero22 = icmp eq i64 %31, 0 + // CHECK-NEXT: %33 = load i32, ptr %i.addr, align 4 + // CHECK-NEXT: %ctzg23 = select i1 %iszero22, i32 %33, i32 %cast21 + // CHECK-NEXT: store volatile i32 %ctzg23, ptr %tz, align 4 tz = __builtin_ctzg(ull, i); - // CHECK-NEXT: %27 = load i64, ptr %ull.addr, align 8 - // CHECK-NEXT: %28 = call i64 @llvm.cttz.i64(i64 %27, i1 true) - // CHECK-NEXT: %cast18 = trunc i64 %28 to i32 - // CHECK-NEXT: %iszero19 = icmp eq i64 %27, 0 - // CHECK-NEXT: %29 = load i32, ptr %i.addr, align 4 - // CHECK-NEXT: %ctzg20 = select i1 %iszero19, i32 %29, i32 %cast18 - // CHECK-NEXT: store volatile i32 %ctzg20, ptr %tz, align 4 + // CHECK-NEXT: %34 = load i128, ptr %ui128.addr, align 16 + // CHECK-NEXT: %35 = call i128 @llvm.cttz.i128(i128 %34, i1 true) + // CHECK-NEXT: %cast24 = trunc i128 %35 to i32 + // CHECK-NEXT: %iszero25 = icmp eq i128 %34, 0 + // CHECK-NEXT: %36 = load i32, ptr %i.addr, align 4 + // CHECK-NEXT: %ctzg26 = select i1 %iszero25, i32 %36, i32 %cast24 + // CHECK-NEXT: store volatile i32 %ctzg26, ptr %tz, align 4 tz = __builtin_ctzg(ui128, i); - // CHECK-NEXT: %30 = load i128, ptr %ui128.addr, align 16 - // CHECK-NEXT: %31 = call i128 @llvm.cttz.i128(i128 %30, i1 true) - // CHECK-NEXT: %cast21 = trunc i128 %31 to i32 - // CHECK-NEXT: %iszero22 = icmp eq i128 %30, 0 - // CHECK-NEXT: %32 = load i32, ptr %i.addr, align 4 - // CHECK-NEXT: %ctzg23 = select i1 %iszero22, i32 %32, i32 %cast21 - // CHECK-NEXT: store volatile i32 %ctzg23, ptr %tz, align 4 + // CHECK-NEXT: %37 = load i128, ptr %ubi128.addr, align 8 + // CHECK-NEXT: %38 = call i128 @llvm.cttz.i128(i128 %37, i1 true) + // CHECK-NEXT: %cast27 = trunc i128 %38 to i32 + // CHECK-NEXT: %iszero28 = icmp eq i128 %37, 0 + // CHECK-NEXT: %39 = load i32, ptr %i.addr, align 4 + // CHECK-NEXT: %ctzg29 = select i1 %iszero28, i32 %39, i32 %cast27 + // CHECK-NEXT: store volatile i32 %ctzg29, ptr %tz, align 4 tz = __builtin_ctzg(ubi128, i); - // CHECK-NEXT: %33 = load i128, ptr %ubi128.addr, align 8 - // CHECK-NEXT: %34 = call i128 @llvm.cttz.i128(i128 %33, i1 true) - // CHECK-NEXT: %cast24 = trunc i128 %34 to i32 - // CHECK-NEXT: %iszero25 = icmp eq i128 %33, 0 - // CHECK-NEXT: %35 = load i32, ptr %i.addr, align 4 - // CHECK-NEXT: %ctzg26 = select i1 %iszero25, i32 %35, i32 %cast24 - // CHECK-NEXT: store volatile i32 %ctzg26, ptr %tz, align 4 - // CHECK-NEXT: ret void + // CHECK-NEXT: %load_bits30 = load i8, ptr %vb8.addr, align 1 + // CHECK-NEXT: %40 = bitcast i8 %load_bits30 to <8 x i1> + // CHECK-NEXT: %41 = bitcast <8 x i1> %40 to i8 + // CHECK-NEXT: %42 = call i8 @llvm.cttz.i8(i8 %41, i1 true) + // CHECK-NEXT: %cast31 = zext i8 %42 to i32 + // CHECK-NEXT: %iszero32 = icmp eq i8 %41, 0 + // CHECK-NEXT: %43 = load i32, ptr %i.addr, align 4 + // CHECK-NEXT: %ctzg33 = select i1 %iszero32, i32 %43, i32 %cast31 + // CHECK-NEXT: store volatile i32 %ctzg33, ptr %tz, align 4 + tz = __builtin_ctzg(vb8, i); } #endif _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits