Author: Sarah Spall Date: 2025-03-11T13:54:09-07:00 New Revision: f9568e8d23b7f38bf60f46adac65b98f9e2b6e4e
URL: https://github.com/llvm/llvm-project/commit/f9568e8d23b7f38bf60f46adac65b98f9e2b6e4e DIFF: https://github.com/llvm/llvm-project/commit/f9568e8d23b7f38bf60f46adac65b98f9e2b6e4e.diff LOG: [HLSL] Make memory representation of boolean vectors in HLSL, vectors of i32. Add support for boolean swizzling. (#123977) Make the memory representation of boolean vectors in HLSL, vectors of i32. Allow boolean swizzling for boolean vectors in HLSL. Add tests for boolean vectors and boolean vector swizzling. Closes #91639 Added: clang/test/CodeGenHLSL/BoolVector.hlsl clang/test/SemaHLSL/Types/BuiltinVector/BooleanVectorConstantExpr.hlsl Modified: clang/include/clang/AST/Type.h clang/lib/AST/ASTContext.cpp clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp clang/lib/AST/ExprConstant.cpp clang/lib/AST/Type.cpp clang/lib/CodeGen/CGDebugInfo.cpp clang/lib/CodeGen/CGExpr.cpp clang/lib/CodeGen/CGExprConstant.cpp clang/lib/CodeGen/CodeGenTypes.cpp clang/lib/Sema/SemaExprMember.cpp clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl Removed: ################################################################################ diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index ef59bd1621fb8..3c942f2ed7486 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -2568,6 +2568,9 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase { bool isVectorType() const; // GCC vector type. bool isExtVectorType() const; // Extended vector type. bool isExtVectorBoolType() const; // Extended vector type with bool element. + // Extended vector type with bool element that is packed. HLSL doesn't pack + // its bool vectors. + bool isPackedVectorBoolType(const ASTContext &ctx) const; bool isSubscriptableVectorType() const; bool isMatrixType() const; // Matrix type. bool isConstantMatrixType() const; // Constant matrix type. diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index fce1c34897da7..55022a4bc000a 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -2005,8 +2005,9 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const { case Type::Vector: { const auto *VT = cast<VectorType>(T); TypeInfo EltInfo = getTypeInfo(VT->getElementType()); - Width = VT->isExtVectorBoolType() ? VT->getNumElements() - : EltInfo.Width * VT->getNumElements(); + Width = VT->isPackedVectorBoolType(*this) + ? VT->getNumElements() + : EltInfo.Width * VT->getNumElements(); // Enforce at least byte size and alignment. Width = std::max<unsigned>(8, Width); Align = std::max<unsigned>(8, Width); diff --git a/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp b/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp index f4c54551a9a60..6b8860c09167c 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp @@ -94,7 +94,8 @@ static bool enumerateData(const Pointer &P, const Context &Ctx, Bits Offset, Bits ElemSize = Bits(Ctx.getASTContext().getTypeSize(ElemType)); PrimType ElemT = *Ctx.classify(ElemType); // Special case, since the bools here are packed. - bool PackedBools = FieldDesc->getType()->isExtVectorBoolType(); + bool PackedBools = + FieldDesc->getType()->isPackedVectorBoolType(Ctx.getASTContext()); unsigned NumElems = FieldDesc->getNumElems(); bool Ok = true; for (unsigned I = P.getIndex(); I != NumElems; ++I) { @@ -227,7 +228,7 @@ static bool CheckBitcastType(InterpState &S, CodePtr OpPC, QualType T, QualType EltTy = VT->getElementType(); unsigned NElts = VT->getNumElements(); unsigned EltSize = - VT->isExtVectorBoolType() ? 1 : ASTCtx.getTypeSize(EltTy); + VT->isPackedVectorBoolType(ASTCtx) ? 1 : ASTCtx.getTypeSize(EltTy); if ((NElts * EltSize) % ASTCtx.getCharWidth() != 0) { // The vector's size in bits is not a multiple of the target's byte size, diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 9ed27c38bc4ea..f8e8aaddbfdbd 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -7474,7 +7474,7 @@ class APValueToBufferConverter { QualType EltTy = VTy->getElementType(); unsigned NElts = VTy->getNumElements(); - if (VTy->isExtVectorBoolType()) { + if (VTy->isPackedVectorBoolType(Info.Ctx)) { // Special handling for OpenCL bool vectors: // Since these vectors are stored as packed bits, but we can't write // individual bits to the BitCastBuffer, we'll buffer all of the elements @@ -7737,11 +7737,11 @@ class BufferToAPValueConverter { QualType EltTy = VTy->getElementType(); unsigned NElts = VTy->getNumElements(); unsigned EltSize = - VTy->isExtVectorBoolType() ? 1 : Info.Ctx.getTypeSize(EltTy); + VTy->isPackedVectorBoolType(Info.Ctx) ? 1 : Info.Ctx.getTypeSize(EltTy); SmallVector<APValue, 4> Elts; Elts.reserve(NElts); - if (VTy->isExtVectorBoolType()) { + if (VTy->isPackedVectorBoolType(Info.Ctx)) { // Special handling for OpenCL bool vectors: // Since these vectors are stored as packed bits, but we can't read // individual bits from the BitCastBuffer, we'll buffer all of the @@ -7870,7 +7870,8 @@ static bool checkBitCastConstexprEligibilityType(SourceLocation Loc, if (const auto *VTy = Ty->getAs<VectorType>()) { QualType EltTy = VTy->getElementType(); unsigned NElts = VTy->getNumElements(); - unsigned EltSize = VTy->isExtVectorBoolType() ? 1 : Ctx.getTypeSize(EltTy); + unsigned EltSize = + VTy->isPackedVectorBoolType(Ctx) ? 1 : Ctx.getTypeSize(EltTy); if ((NElts * EltSize) % Ctx.getCharWidth() != 0) { // The vector's size in bits is not a multiple of the target's byte size, diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index 2fd7f5800594a..72161c06a88d4 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -410,6 +410,12 @@ VectorType::VectorType(TypeClass tc, QualType vecType, unsigned nElements, VectorTypeBits.NumElements = nElements; } +bool Type::isPackedVectorBoolType(const ASTContext &ctx) const { + if (ctx.getLangOpts().HLSL) + return false; + return isExtVectorBoolType(); +} + BitIntType::BitIntType(bool IsUnsigned, unsigned NumBits) : Type(BitInt, QualType{}, TypeDependence::None), IsUnsigned(IsUnsigned), NumBits(NumBits) {} diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 0e6daa42ee7bf..7020cef875170 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -3308,7 +3308,7 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const ObjCInterfaceType *Ty, llvm::DIType *CGDebugInfo::CreateType(const VectorType *Ty, llvm::DIFile *Unit) { - if (Ty->isExtVectorBoolType()) { + if (Ty->isPackedVectorBoolType(CGM.getContext())) { // Boolean ext_vector_type(N) are special because their real element type // (bits of bit size) is not their Clang element type (_Bool of size byte). // For now, we pretend the boolean vector were actually a vector of bytes diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 191912ca7d800..5943ff9294e1a 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -1985,7 +1985,7 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile, if (const auto *ClangVecTy = Ty->getAs<VectorType>()) { // Boolean vectors use `iN` as storage type. - if (ClangVecTy->isExtVectorBoolType()) { + if (ClangVecTy->isPackedVectorBoolType(getContext())) { llvm::Type *ValTy = ConvertType(Ty); unsigned ValNumElems = cast<llvm::FixedVectorType>(ValTy)->getNumElements(); @@ -2064,6 +2064,10 @@ llvm::Value *CodeGenFunction::EmitToMemory(llvm::Value *Value, QualType Ty) { if (Ty->isExtVectorBoolType()) { llvm::Type *StoreTy = convertTypeForLoadStore(Ty, Value->getType()); + if (StoreTy->isVectorTy() && StoreTy->getScalarSizeInBits() > + Value->getType()->getScalarSizeInBits()) + return Builder.CreateZExt(Value, StoreTy); + // Expand to the memory bit width. unsigned MemNumElems = StoreTy->getPrimitiveSizeInBits(); // <N x i1> --> <P x i1>. @@ -2079,8 +2083,9 @@ llvm::Value *CodeGenFunction::EmitToMemory(llvm::Value *Value, QualType Ty) { /// by convertTypeForLoadStore) to its primary IR type (as returned /// by ConvertType). llvm::Value *CodeGenFunction::EmitFromMemory(llvm::Value *Value, QualType Ty) { - if (Ty->isExtVectorBoolType()) { + if (Ty->isPackedVectorBoolType(getContext())) { const auto *RawIntTy = Value->getType(); + // Bitcast iP --> <P x i1>. auto *PaddedVecTy = llvm::FixedVectorType::get( Builder.getInt1Ty(), RawIntTy->getPrimitiveSizeInBits()); @@ -2091,10 +2096,10 @@ llvm::Value *CodeGenFunction::EmitFromMemory(llvm::Value *Value, QualType Ty) { return emitBoolVecConversion(V, ValNumElems, "extractvec"); } - if (hasBooleanRepresentation(Ty) || Ty->isBitIntType()) { - llvm::Type *ResTy = ConvertType(Ty); + llvm::Type *ResTy = ConvertType(Ty); + if (hasBooleanRepresentation(Ty) || Ty->isBitIntType() || + Ty->isExtVectorBoolType()) return Builder.CreateTrunc(Value, ResTy, "loadedv"); - } return Value; } @@ -2152,7 +2157,8 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr, if (auto *VecTy = dyn_cast<llvm::FixedVectorType>(SrcTy)) { auto *NewVecTy = CGM.getABIInfo().getOptimalVectorMemoryType(VecTy, getLangOpts()); - if (!ClangVecTy->isExtVectorBoolType() && VecTy != NewVecTy) { + if (!ClangVecTy->isPackedVectorBoolType(getContext()) && + VecTy != NewVecTy) { SmallVector<int, 16> Mask(NewVecTy->getNumElements(), -1); std::iota(Mask.begin(), Mask.begin() + VecTy->getNumElements(), 0); Value = Builder.CreateShuffleVector(Value, Mask, "extractVec"); @@ -2343,7 +2349,15 @@ RValue CodeGenFunction::EmitLoadOfExtVectorElementLValue(LValue LV) { if (!ExprVT) { unsigned InIdx = getAccessedFieldNo(0, Elts); llvm::Value *Elt = llvm::ConstantInt::get(SizeTy, InIdx); - return RValue::get(Builder.CreateExtractElement(Vec, Elt)); + + llvm::Value *Element = Builder.CreateExtractElement(Vec, Elt); + + llvm::Type *LVTy = ConvertType(LV.getType()); + if (Element->getType()->getPrimitiveSizeInBits() > + LVTy->getPrimitiveSizeInBits()) + Element = Builder.CreateTrunc(Element, LVTy); + + return RValue::get(Element); } // Always use shuffle vector to try to retain the original program structure @@ -2354,6 +2368,10 @@ RValue CodeGenFunction::EmitLoadOfExtVectorElementLValue(LValue LV) { Mask.push_back(getAccessedFieldNo(i, Elts)); Vec = Builder.CreateShuffleVector(Vec, Mask); + + if (LV.getType()->isExtVectorBoolType()) + Vec = Builder.CreateTrunc(Vec, ConvertType(LV.getType()), "truncv"); + return RValue::get(Vec); } @@ -2407,6 +2425,13 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst, // Read/modify/write the vector, inserting the new element. llvm::Value *Vec = Builder.CreateLoad(Dst.getVectorAddress(), Dst.isVolatileQualified()); + llvm::Type *VecTy = Vec->getType(); + llvm::Value *SrcVal = Src.getScalarVal(); + + if (SrcVal->getType()->getPrimitiveSizeInBits() < + VecTy->getScalarSizeInBits()) + SrcVal = Builder.CreateZExt(SrcVal, VecTy->getScalarType()); + auto *IRStoreTy = dyn_cast<llvm::IntegerType>(Vec->getType()); if (IRStoreTy) { auto *IRVecTy = llvm::FixedVectorType::get( @@ -2414,19 +2439,21 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst, Vec = Builder.CreateBitCast(Vec, IRVecTy); // iN --> <N x i1>. } - llvm::Value *SrcVal = Src.getScalarVal(); + // Allow inserting `<1 x T>` into an `<N x T>`. It can happen with scalar // types which are mapped to vector LLVM IR types (e.g. for implementing // an ABI). if (auto *EltTy = dyn_cast<llvm::FixedVectorType>(SrcVal->getType()); EltTy && EltTy->getNumElements() == 1) SrcVal = Builder.CreateBitCast(SrcVal, EltTy->getElementType()); + Vec = Builder.CreateInsertElement(Vec, SrcVal, Dst.getVectorIdx(), "vecins"); if (IRStoreTy) { // <N x i1> --> <iN>. Vec = Builder.CreateBitCast(Vec, IRStoreTy); } + Builder.CreateStore(Vec, Dst.getVectorAddress(), Dst.isVolatileQualified()); return; @@ -2623,14 +2650,12 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src, // This access turns into a read/modify/write of the vector. Load the input // value now. llvm::Value *Vec = Builder.CreateLoad(DstAddr, Dst.isVolatileQualified()); + llvm::Type *VecTy = Vec->getType(); const llvm::Constant *Elts = Dst.getExtVectorElts(); - llvm::Value *SrcVal = Src.getScalarVal(); - if (const VectorType *VTy = Dst.getType()->getAs<VectorType>()) { unsigned NumSrcElts = VTy->getNumElements(); - unsigned NumDstElts = - cast<llvm::FixedVectorType>(Vec->getType())->getNumElements(); + unsigned NumDstElts = cast<llvm::FixedVectorType>(VecTy)->getNumElements(); if (NumDstElts == NumSrcElts) { // Use shuffle vector is the src and destination are the same number of // elements and restore the vector mask since it is on the side it will be @@ -2639,6 +2664,11 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src, for (unsigned i = 0; i != NumSrcElts; ++i) Mask[getAccessedFieldNo(i, Elts)] = i; + llvm::Value *SrcVal = Src.getScalarVal(); + if (VecTy->getScalarSizeInBits() > + SrcVal->getType()->getScalarSizeInBits()) + SrcVal = Builder.CreateZExt(SrcVal, VecTy); + Vec = Builder.CreateShuffleVector(SrcVal, Mask); } else if (NumDstElts > NumSrcElts) { // Extended the source vector to the same length and then shuffle it @@ -2649,7 +2679,8 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src, for (unsigned i = 0; i != NumSrcElts; ++i) ExtMask.push_back(i); ExtMask.resize(NumDstElts, -1); - llvm::Value *ExtSrcVal = Builder.CreateShuffleVector(SrcVal, ExtMask); + llvm::Value *ExtSrcVal = + Builder.CreateShuffleVector(Src.getScalarVal(), ExtMask); // build identity SmallVector<int, 4> Mask; for (unsigned i = 0; i != NumDstElts; ++i) @@ -2674,6 +2705,11 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src, // be updating one element. unsigned InIdx = getAccessedFieldNo(0, Elts); llvm::Value *Elt = llvm::ConstantInt::get(SizeTy, InIdx); + + llvm::Value *SrcVal = Src.getScalarVal(); + if (VecTy->getScalarSizeInBits() > SrcVal->getType()->getScalarSizeInBits()) + SrcVal = Builder.CreateZExt(SrcVal, VecTy->getScalarType()); + Vec = Builder.CreateInsertElement(Vec, SrcVal, Elt); } @@ -4701,9 +4737,13 @@ EmitExtVectorElementExpr(const ExtVectorElementExpr *E) { // Store the vector to memory (because LValue wants an address). Address VecMem = CreateMemTemp(E->getBase()->getType()); + // need to zero extend an hlsl boolean vector to store it back to memory + QualType Ty = E->getBase()->getType(); + llvm::Type *LTy = convertTypeForLoadStore(Ty, Vec->getType()); + if (LTy->getScalarSizeInBits() > Vec->getType()->getScalarSizeInBits()) + Vec = Builder.CreateZExt(Vec, LTy); Builder.CreateStore(Vec, VecMem); - Base = MakeAddrLValue(VecMem, E->getBase()->getType(), - AlignmentSource::Decl); + Base = MakeAddrLValue(VecMem, Ty, AlignmentSource::Decl); } QualType type = diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp index 08e42a9e1dcf3..e90881a9743bf 100644 --- a/clang/lib/CodeGen/CGExprConstant.cpp +++ b/clang/lib/CodeGen/CGExprConstant.cpp @@ -1981,7 +1981,10 @@ llvm::Constant *ConstantEmitter::emitForMemory(CodeGenModule &CGM, } // Zero-extend bool. - if (C->getType()->isIntegerTy(1) && !destType->isBitIntType()) { + // In HLSL bool vectors are stored in memory as a vector of i32 + if ((C->getType()->isIntegerTy(1) && !destType->isBitIntType()) || + (destType->isExtVectorBoolType() && + !destType->isPackedVectorBoolType(CGM.getContext()))) { llvm::Type *boolTy = CGM.getTypes().ConvertTypeForMem(destType); llvm::Constant *Res = llvm::ConstantFoldCastOperand( llvm::Instruction::ZExt, C, boolTy, CGM.getDataLayout()); diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index bd625052cb5ed..dfbd444a850a5 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -115,6 +115,12 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T) { // Check for the boolean vector case. if (T->isExtVectorBoolType()) { auto *FixedVT = cast<llvm::FixedVectorType>(R); + + if (Context.getLangOpts().HLSL) { + llvm::Type *IRElemTy = ConvertTypeForMem(Context.BoolTy); + return llvm::FixedVectorType::get(IRElemTy, FixedVT->getNumElements()); + } + // Pad to at least one byte. uint64_t BytePadded = std::max<uint64_t>(FixedVT->getNumElements(), 8); return llvm::IntegerType::get(FixedVT->getContext(), BytePadded); @@ -657,7 +663,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { case Type::Vector: { const auto *VT = cast<VectorType>(Ty); // An ext_vector_type of Bool is really a vector of bits. - llvm::Type *IRElemTy = VT->isExtVectorBoolType() + llvm::Type *IRElemTy = VT->isPackedVectorBoolType(Context) ? llvm::Type::getInt1Ty(getLLVMContext()) : VT->getElementType()->isMFloat8Type() ? llvm::Type::getInt8Ty(getLLVMContext()) diff --git a/clang/lib/Sema/SemaExprMember.cpp b/clang/lib/Sema/SemaExprMember.cpp index d130e8b86bc56..1d9efbeb5ccb5 100644 --- a/clang/lib/Sema/SemaExprMember.cpp +++ b/clang/lib/Sema/SemaExprMember.cpp @@ -1697,7 +1697,7 @@ static ExprResult LookupMemberExpr(Sema &S, LookupResult &R, QualType(), false); } - if (BaseType->isExtVectorBoolType()) { + if (BaseType->isPackedVectorBoolType(S.Context)) { // We disallow element access for ext_vector_type bool. There is no way to // materialize a reference to a vector element as a pointer (each element is // one bit in the vector). diff --git a/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl index 1665a0260ab05..6770efefe94fe 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl @@ -91,13 +91,12 @@ void l4_to_i2() { // CHECK-LABEL: i2_to_b2 // CHECK: [[l2:%.*]] = alloca <2 x i32> -// CHECK: [[b2:%.*]] = alloca i8 +// CHECK: [[b2:%.*]] = alloca <2 x i32> // CHECK: store <2 x i32> splat (i32 8), ptr [[i2]] // CHECK: [[veci2:%.*]] = load <2 x i32>, ptr [[i2]] // CHECK: [[vecb2:%.*]] = icmp ne <2 x i32> [[veci2]], zeroinitializer -// CHECK: [[vecb8:%.*]] = shufflevector <2 x i1> [[vecb2]], <2 x i1> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> -// CHECK: [[i8:%.*]] = bitcast <8 x i1> [[vecb8]] to i8 -// CHECK: store i8 [[i8]], ptr [[b2]] +// CHECK: [[vecb8:%.*]] = zext <2 x i1> [[vecb2]] to <2 x i32> +// CHECK: store <2 x i32> [[vecb8]], ptr [[b2]] void i2_to_b2() { vector<int, 2> i2 = 8; vector<bool, 2> b2 = i2; @@ -105,14 +104,13 @@ void i2_to_b2() { // CHECK-LABEL: d4_to_b2 // CHECK: [[d4:%.*]] = alloca <4 x double> -// CHECK: [[b2:%.*]] = alloca i8 +// CHECK: [[b2:%.*]] = alloca <2 x i32> // CHECK: store <4 x double> splat (double 9.000000e+00), ptr [[d4]] // CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]] // CHECK: [[vecb4:%.*]] = fcmp reassoc nnan ninf nsz arcp afn une <4 x double> [[vecd4]], zeroinitializer // CHECK: [[vecd2:%.*]] = shufflevector <4 x i1> [[vecb4]], <4 x i1> poison, <2 x i32> <i32 0, i32 1> -// CHECK: [[vecb8:%.*]] = shufflevector <2 x i1> [[vecd2]], <2 x i1> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> -// CHECK: [[i8:%.*]] = bitcast <8 x i1> [[vecb8]] to i8 -// CHECK: store i8 [[i8]], ptr [[b2]] +// CHECK: [[vecb8:%.*]] = zext <2 x i1> [[vecd2]] to <2 x i32> +// CHECK: store <2 x i32> [[vecb8]], ptr [[b2]] void d4_to_b2() { vector<double,4> d4 = 9.0; vector<bool, 2> b2 = d4; diff --git a/clang/test/CodeGenHLSL/BoolVector.hlsl b/clang/test/CodeGenHLSL/BoolVector.hlsl new file mode 100644 index 0000000000000..5e889d50be98d --- /dev/null +++ b/clang/test/CodeGenHLSL/BoolVector.hlsl @@ -0,0 +1,111 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s + +// CHECK: %struct.S = type { <2 x i32>, float } +// CHECK: [[ConstS:@.*]] = private unnamed_addr constant %struct.S { <2 x i32> splat (i32 1), float 1.000000e+00 }, align 8 +// CHECK: [[ConstArr:.*]] = private unnamed_addr constant [2 x <2 x i32>] [<2 x i32> splat (i32 1), <2 x i32> zeroinitializer], align 8 + +struct S { + bool2 bv; + float f; +}; + +// CHECK-LABEL: define noundef i1 {{.*}}fn1{{.*}} +// CHECK: [[B:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: store <2 x i32> splat (i32 1), ptr [[B]], align 8 +// CHECK-NEXT: [[BoolVec:%.*]] = load <2 x i32>, ptr [[B]], align 8 +// CHECK-NEXT: [[L:%.*]] = trunc <2 x i32> [[BoolVec:%.*]] to <2 x i1> +// CHECK-NEXT: [[VecExt:%.*]] = extractelement <2 x i1> [[L]], i32 0 +// CHECK-NEXT: ret i1 [[VecExt]] +bool fn1() { + bool2 B = {true,true}; + return B[0]; +} + +// CHECK-LABEL: define noundef <2 x i1> {{.*}}fn2{{.*}} +// CHECK: [[VAddr:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[A:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: [[StoreV:%.*]] = zext i1 {{.*}} to i32 +// CHECK-NEXT: store i32 [[StoreV]], ptr [[VAddr]], align 4 +// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[VAddr]], align 4 +// CHECK-NEXT: [[LoadV:%.*]] = trunc i32 [[L]] to i1 +// CHECK-NEXT: [[Vec:%.*]] = insertelement <2 x i1> poison, i1 [[LoadV]], i32 0 +// CHECK-NEXT: [[Vec1:%.*]] = insertelement <2 x i1> [[Vec]], i1 true, i32 1 +// CHECK-NEXT: [[Z:%.*]] = zext <2 x i1> [[Vec1]] to <2 x i32> +// CHECK-NEXT: store <2 x i32> [[Z]], ptr [[A]], align 8 +// CHECK-NEXT: [[LoadBV:%.*]] = load <2 x i32>, ptr [[A]], align 8 +// CHECK-NEXT: [[LoadV2:%.*]] = trunc <2 x i32> [[LoadBV]] to <2 x i1> +// CHECK-NEXT: ret <2 x i1> [[LoadV2]] +bool2 fn2(bool V) { + bool2 A = {V,true}; + return A; +} + +// CHECK-LABEL: define noundef i1 {{.*}}fn3{{.*}} +// CHECK: [[s:%.*]] = alloca %struct.S, align 8 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[s]], ptr align 8 [[ConstS]], i32 16, i1 false) +// CHECK-NEXT: [[BV:%.*]] = getelementptr inbounds nuw %struct.S, ptr [[s]], i32 0, i32 0 +// CHECK-NEXT: [[LBV:%.*]] = load <2 x i32>, ptr [[BV]], align 8 +// CHECK-NEXT: [[LV:%.*]] = trunc <2 x i32> [[LBV]] to <2 x i1> +// CHECK-NEXT: [[VX:%.*]] = extractelement <2 x i1> [[LV]], i32 0 +// CHECK-NEXT: ret i1 [[VX]] +bool fn3() { + S s = {{true,true}, 1.0}; + return s.bv[0]; +} + +// CHECK-LABEL: define noundef i1 {{.*}}fn4{{.*}} +// CHECK: [[Arr:%.*]] = alloca [2 x <2 x i32>], align 8 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[Arr]], ptr align 8 [[ConstArr]], i32 16, i1 false) +// CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[Arr]], i32 0, i32 0 +// CHECK-NEXT: [[L:%.*]] = load <2 x i32>, ptr [[Idx]], align 8 +// CHECK-NEXT: [[LV:%.*]] = trunc <2 x i32> [[L]] to <2 x i1> +// CHECK-NEXT: [[VX:%.*]] = extractelement <2 x i1> [[LV]], i32 1 +// CHECK-NEXT: ret i1 [[VX]] +bool fn4() { + bool2 Arr[2] = {{true,true}, {false,false}}; + return Arr[0][1]; +} + +// CHECK-LABEL: define void {{.*}}fn5{{.*}} +// CHECK: [[Arr:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: store <2 x i32> splat (i32 1), ptr [[Arr]], align 8 +// CHECK-NEXT: [[L:%.*]] = load <2 x i32>, ptr [[Arr]], align 8 +// CHECK-NEXT: [[V:%.*]] = insertelement <2 x i32> [[L]], i32 0, i32 1 +// CHECK-NEXT: store <2 x i32> [[V]], ptr [[Arr]], align 8 +// CHECK-NEXT: ret void +void fn5() { + bool2 Arr = {true,true}; + Arr[1] = false; +} + +// CHECK-LABEL: define void {{.*}}fn6{{.*}} +// CHECK: [[V:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[S:%.*]] = alloca %struct.S, align 8 +// CHECK-NEXT: store i32 0, ptr [[V]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[S]], ptr align 8 {{.*}}, i32 16, i1 false) +// CHECK-NEXT: [[Y:%.*]] = load i32, ptr [[V]], align 4 +// CHECK-NEXT: [[LV:%.*]] = trunc i32 [[Y]] to i1 +// CHECK-NEXT: [[BV:%.*]] = getelementptr inbounds nuw %struct.S, ptr [[S]], i32 0, i32 0 +// CHECK-NEXT: [[X:%.*]] = load <2 x i32>, ptr [[BV]], align 8 +// CHECK-NEXT: [[Z:%.*]] = zext i1 [[LV]] to i32 +// CHECK-NEXT: [[VI:%.*]] = insertelement <2 x i32> [[X]], i32 [[Z]], i32 1 +// CHECK-NEXT: store <2 x i32> [[VI]], ptr [[BV]], align 8 +// CHECK-NEXT: ret void +void fn6() { + bool V = false; + S s = {{true,true}, 1.0}; + s.bv[1] = V; +} + +// CHECK-LABEL: define void {{.*}}fn7{{.*}} +// CHECK: [[Arr:%.*]] = alloca [2 x <2 x i32>], align 8 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[Arr]], ptr align 8 {{.*}}, i32 16, i1 false) +// CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[Arr]], i32 0, i32 0 +// CHECK-NEXT: [[X:%.*]] = load <2 x i32>, ptr [[Idx]], align 8 +// CHECK-NEXT: [[VI:%.*]] = insertelement <2 x i32> [[X]], i32 0, i32 1 +// CHECK-NEXT: store <2 x i32> [[VI]], ptr [[Idx]], align 8 +// CHECK-NEXT: ret void +void fn7() { + bool2 Arr[2] = {{true,true}, {false,false}}; + Arr[0][1] = false; +} diff --git a/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl b/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl index 97711c9ee25a1..96e17046ee934 100644 --- a/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl +++ b/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl @@ -11,13 +11,23 @@ int2 ToTwoInts(int V){ } // CHECK-LABEL: ToFourFloats -// [[splat:%.*]] = insertelement <1 x float> poison, float {{.*}}, i64 0 -// [[vec4:%.*]] = shufflevector <1 x float> [[splat]], <1 x float> poison, <4 x i32> zeroinitializer +// CHECK: [[splat:%.*]] = insertelement <1 x float> poison, float {{.*}}, i64 0 +// CHECK: [[vec4:%.*]] = shufflevector <1 x float> [[splat]], <1 x float> poison, <4 x i32> zeroinitializer // ret <4 x float> [[vec4]] float4 ToFourFloats(float V){ return V.rrrr; } +// CHECK-LABEL: ToFourBools +// CHECK: {{%.*}} = zext i1 {{.*}} to i32 +// CHECK: [[splat:%.*]] = insertelement <1 x i32> poison, i32 {{.*}}, i64 0 +// CHECK-NEXT: [[vec4:%.*]] = shufflevector <1 x i32> [[splat]], <1 x i32> poison, <4 x i32> zeroinitializer +// CHECK-NEXT: [[vec2Ret:%.*]] = trunc <4 x i32> [[vec4]] to <4 x i1> +// CHECK-NEXT: ret <4 x i1> [[vec2Ret]] +bool4 ToFourBools(bool V) { + return V.rrrr; +} + // CHECK-LABEL: FillOne // CHECK: [[vec1Ptr:%.*]] = alloca <1 x i32>, align 4 // CHECK: store <1 x i32> splat (i32 1), ptr [[vec1Ptr]], align 4 @@ -93,6 +103,17 @@ vector<float, 1> FillOneHalfFloat(){ return .5f.r; } +// CHECK-LABEL: FillTrue +// CHECK: [[Tmp:%.*]] = alloca <1 x i32>, align 4 +// CHECK-NEXT: store <1 x i32> splat (i32 1), ptr [[Tmp]], align 4 +// CHECK-NEXT: [[Vec1:%.*]] = load <1 x i32>, ptr [[Tmp]], align 4 +// CHECK-NEXT: [[Vec2:%.*]] = shufflevector <1 x i32> [[Vec1]], <1 x i32> poison, <2 x i32> zeroinitializer +// CHECK-NEXT: [[Vec2Ret:%.*]] = trunc <2 x i32> [[Vec2]] to <2 x i1> +// CHECK-NEXT: ret <2 x i1> [[Vec2Ret]] +bool2 FillTrue() { + return true.xx; +} + // The initial codegen for this case is correct but a bit odd. The IR optimizer // cleans this up very nicely. @@ -110,6 +131,25 @@ float2 HowManyFloats(float V) { return V.rr.rr; } +// CHECK-LABEL: HowManyBools +// CHECK: [[VAddr:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[Vec2Ptr:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: [[Tmp:%.*]] = zext i1 {{.*}} to i32 +// CHECK-NEXT: store i32 [[Tmp]], ptr [[VAddr]], align 4 +// CHECK-NEXT: [[VVal:%.*]] = load i32, ptr [[VAddr]], align 4 +// CHECK-NEXT: [[Splat:%.*]] = insertelement <1 x i32> poison, i32 [[VVal]], i64 0 +// CHECK-NEXT: [[Vec2:%.*]] = shufflevector <1 x i32> [[Splat]], <1 x i32> poison, <2 x i32> zeroinitializer +// CHECK-NEXT: [[Trunc:%.*]] = trunc <2 x i32> [[Vec2]] to <2 x i1> +// CHECK-NEXT: [[Ext:%.*]] = zext <2 x i1> [[Trunc]] to <2 x i32> +// CHECK-NEXT: store <2 x i32> [[Ext]], ptr [[Vec2Ptr]], align 8 +// CHECK-NEXT: [[V2:%.*]] = load <2 x i32>, ptr [[Vec2Ptr]], align 8 +// CHECK-NEXT: [[V3:%.*]] = shufflevector <2 x i32> [[V2]], <2 x i32> poison, <2 x i32> zeroinitializer +// CHECK-NEXT: [[LV1:%.*]] = trunc <2 x i32> [[V3]] to <2 x i1> +// CHECK-NEXT: ret <2 x i1> [[LV1]] +bool2 HowManyBools(bool V) { + return V.rr.rr; +} + // This codegen is gnarly because `1.l` is a double, so this creates double // vectors that need to be truncated down to floats. The optimizer cleans this // up nicely too. @@ -166,3 +206,99 @@ int AssignInt(int V){ X.x = V.x + V.x; return X; } + +// CHECK-LABEL: AssignBool +// CHECK: [[VAddr:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[XAddr:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[Zext:%.*]] = zext i1 %V to i32 +// CHECK-NEXT: store i32 [[Zext]], ptr [[VAddr]], align 4 +// CHECK-NEXT: [[X:%.*]] = load i32, ptr [[VAddr]], align 4 +// CHECK-NEXT: [[Splat:%.*]] = insertelement <1 x i32> poison, i32 [[X]], i64 0 +// CHECK-NEXT: [[Y:%.*]] = extractelement <1 x i32> [[Splat]], i32 0 +// CHECK-NEXT: [[Z:%.*]] = trunc i32 [[Y]] to i1 +// CHECK-NEXT: [[A:%.*]] = zext i1 [[Z]] to i32 +// CHECK-NEXT: store i32 [[A]], ptr [[XAddr]], align 4 +// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[VAddr]], align 4 +// CHECK-NEXT: [[Splat2:%.*]] = insertelement <1 x i32> poison, i32 [[B]], i64 0 +// CHECK-NEXT: [[C:%.*]] = extractelement <1 x i32> [[Splat2]], i32 0 +// CHECK-NEXT: [[D:%.*]] = trunc i32 [[C]] to i1 +// CHECK-NEXT: br i1 [[D]], label %lor.end, label %lor.rhs + +// CHECK: lor.rhs: +// CHECK-NEXT: [[E:%.*]] = load i32, ptr [[VAddr]], align 4 +// CHECK-NEXT: [[Splat3:%.*]] = insertelement <1 x i32> poison, i32 [[E]], i64 0 +// CHECK-NEXT: [[F:%.*]] = extractelement <1 x i32> [[Splat3]], i32 0 +// CHECK-NEXT: [[G:%.*]] = trunc i32 [[F]] to i1 +// CHECK-NEXT: br label %lor.end + +// CHECK: lor.end: +// CHECK-NEXT: [[H:%.*]] = phi i1 [ true, %entry ], [ [[G]], %lor.rhs ] +// CHECK-NEXT: store i1 [[H]], ptr [[XAddr]], align 4 +// CHECK-NEXT: [[I:%.*]] = load i32, ptr [[XAddr]], align 4 +// CHECK-NEXT: [[LoadV:%.*]] = trunc i32 [[I]] to i1 +// CHECK-NEXT: ret i1 [[LoadV]] +bool AssignBool(bool V) { + bool X = V.x; + X.x = V.x || V.x; + return X; +} + +// CHECK-LABEL: AssignBool2 +// CHECK: [[VAdddr:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[X:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: [[Tmp:%.*]] = alloca <1 x i32>, align 4 +// CHECK-NEXT: [[SV:%.*]] = zext i1 %V to i32 +// CHECK-NEXT: store i32 [[SV]], ptr [[VAddr]], align 4 +// CHECK-NEXT: store <1 x i32> splat (i32 1), ptr [[Tmp]], align 4 +// CHECK-NEXT: [[Y:%.*]] = load <1 x i32>, ptr [[Tmp]], align 4 +// CHECK-NEXT: [[Z:%.*]] = shufflevector <1 x i32> [[Y]], <1 x i32> poison, <2 x i32> zeroinitializer +// CHECK-NEXT: [[LV:%.*]] = trunc <2 x i32> [[Z]] to <2 x i1> +// CHECK-NEXT: [[A:%.*]] = zext <2 x i1> [[LV]] to <2 x i32> +// CHECK-NEXT: store <2 x i32> [[A]], ptr [[X]], align 8 +// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[VAddr]], align 4 +// CHECK-NEXT: [[LV1:%.*]] = trunc i32 [[B]] to i1 +// CHECK-NEXT: [[C:%.*]] = load <2 x i32>, ptr [[X]], align 8 +// CHECK-NEXT: [[D:%.*]] = zext i1 [[LV1]] to i32 +// CHECK-NEXT: [[E:%.*]] = insertelement <2 x i32> [[C]], i32 [[D]], i32 1 +// CHECK-NEXT: store <2 x i32> [[E]], ptr [[X]], align 8 +// CHECK-NEXT: ret void +void AssignBool2(bool V) { + bool2 X = true.xx; + X.y = V; +} + +// CHECK-LABEL: AssignBool3 +// CHECK: [[VAddr:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: [[X:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: [[Y:%.*]] = zext <2 x i1> %V to <2 x i32> +// CHECK-NEXT: store <2 x i32> [[Y]], ptr [[VAddr]], align 8 +// CHECK-NEXT: store <2 x i32> splat (i32 1), ptr [[X]], align 8 +// CHECK-NEXT: [[Z:%.*]] = load <2 x i32>, ptr [[VAddr]], align 8 +// CHECK-NEXT: [[LV:%.*]] = trunc <2 x i32> [[Z]] to <2 x i1> +// CHECK-NEXT: [[A:%.*]] = load <2 x i32>, ptr [[X]], align 8 +// CHECK-NEXT: [[B:%.*]] = zext <2 x i1> [[LV]] to <2 x i32> +// CHECK-NEXT: [[C:%.*]] = shufflevector <2 x i32> [[B]], <2 x i32> poison, <2 x i32> <i32 0, i32 1> +// CHECK-NEXT: store <2 x i32> [[C]], ptr [[X]], align 8 +// CHECK-NEXT: ret void +void AssignBool3(bool2 V) { + bool2 X = {true,true}; + X.xy = V; +} + +// CHECK-LABEL: AccessBools +// CHECK: [[X:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: [[Tmp:%.*]] = alloca <1 x i32>, align 4 +// CHECK-NEXT: store <1 x i32> splat (i32 1), ptr [[Tmp]], align 4 +// CHECK-NEXT: [[Y:%.*]] = load <1 x i32>, ptr [[Tmp]], align 4 +// CHECK-NEXT: [[Z:%.*]] = shufflevector <1 x i32> [[Y]], <1 x i32> poison, <4 x i32> zeroinitializer +// CHECK-NEXT: [[LV:%.*]] = trunc <4 x i32> [[Z]] to <4 x i1> +// CHECK-NEXT: [[A:%.*]] = zext <4 x i1> [[LV]] to <4 x i32> +// CHECK-NEXT: store <4 x i32> [[A]], ptr [[X]], align 16 +// CHECK-NEXT: [[B:%.*]] = load <4 x i32>, ptr [[X]], align 16 +// CHECK-NEXT: [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <2 x i32> <i32 2, i32 3> +// CHECK-NEXT: [[LV1:%.*]] = trunc <2 x i32> [[C]] to <2 x i1> +// CHECK-NEXT: ret <2 x i1> [[LV1]] +bool2 AccessBools() { + bool4 X = true.xxxx; + return X.zw; +} diff --git a/clang/test/SemaHLSL/Types/BuiltinVector/BooleanVectorConstantExpr.hlsl b/clang/test/SemaHLSL/Types/BuiltinVector/BooleanVectorConstantExpr.hlsl new file mode 100644 index 0000000000000..1d368befc839a --- /dev/null +++ b/clang/test/SemaHLSL/Types/BuiltinVector/BooleanVectorConstantExpr.hlsl @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library -finclude-default-header -std=hlsl202x -verify %s + +// expected-no-diagnostics + +export void fn() { + _Static_assert((true.xxxx).y == true, "Woo!"); + + _Static_assert((true.xx).x && false == false, "Woo!"); +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits