Author: Sarah Spall Date: 2025-02-07T09:12:55-08:00 New Revision: 3f8e2802069aabe4384ecd4575d50fd4457dae51
URL: https://github.com/llvm/llvm-project/commit/3f8e2802069aabe4384ecd4575d50fd4457dae51 DIFF: https://github.com/llvm/llvm-project/commit/3f8e2802069aabe4384ecd4575d50fd4457dae51.diff LOG: [HLSL] Implement HLSL Elementwise casting (excluding splat cases); Re-land #118842 (#126258) Implement HLSLElementwiseCast excluding support for splat cases Do not support casting types that contain bitfields. Partly closes https://github.com/llvm/llvm-project/issues/100609 and partly closes https://github.com/llvm/llvm-project/issues/100619 Re-land #118842 after fixing warning as an error, found by a buildbot. Added: clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl clang/test/SemaHLSL/Language/ElementwiseCasts.hlsl Modified: clang/include/clang/AST/OperationKinds.def clang/include/clang/Sema/SemaHLSL.h clang/lib/AST/Expr.cpp clang/lib/AST/ExprConstant.cpp clang/lib/CodeGen/CGExpr.cpp clang/lib/CodeGen/CGExprAgg.cpp clang/lib/CodeGen/CGExprComplex.cpp clang/lib/CodeGen/CGExprConstant.cpp clang/lib/CodeGen/CGExprScalar.cpp clang/lib/CodeGen/CodeGenFunction.h clang/lib/Edit/RewriteObjCFoundationAPI.cpp clang/lib/Sema/SemaCast.cpp clang/lib/Sema/SemaHLSL.cpp clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp clang/test/SemaHLSL/BuiltIns/vector-constructors-erros.hlsl Removed: ################################################################################ diff --git a/clang/include/clang/AST/OperationKinds.def b/clang/include/clang/AST/OperationKinds.def index 8788b8ff0ef0a45..b3dc7c3d8dc77e1 100644 --- a/clang/include/clang/AST/OperationKinds.def +++ b/clang/include/clang/AST/OperationKinds.def @@ -367,6 +367,9 @@ CAST_OPERATION(HLSLVectorTruncation) // Non-decaying array RValue cast (HLSL only). CAST_OPERATION(HLSLArrayRValue) +// Aggregate by Value cast (HLSL only). +CAST_OPERATION(HLSLElementwiseCast) + //===- Binary Operations -------------------------------------------------===// // Operators listed in order of precedence. // Note that additions to this should also update the StmtVisitor class, diff --git a/clang/include/clang/Sema/SemaHLSL.h b/clang/include/clang/Sema/SemaHLSL.h index 20376e980ab351a..6e8ca2e4710dec8 100644 --- a/clang/include/clang/Sema/SemaHLSL.h +++ b/clang/include/clang/Sema/SemaHLSL.h @@ -141,6 +141,9 @@ class SemaHLSL : public SemaBase { // Diagnose whether the input ID is uint/unit2/uint3 type. bool diagnoseInputIDType(QualType T, const ParsedAttr &AL); + bool CanPerformScalarCast(QualType SrcTy, QualType DestTy); + bool ContainsBitField(QualType BaseTy); + bool CanPerformElementwiseCast(Expr *Src, QualType DestType); ExprResult ActOnOutParamExpr(ParmVarDecl *Param, Expr *Arg); QualType getInoutParameterType(QualType Ty); diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index 4fc62919fde94bf..c22aa66ba2cfb3d 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -1956,6 +1956,7 @@ bool CastExpr::CastConsistency() const { case CK_FixedPointToBoolean: case CK_HLSLArrayRValue: case CK_HLSLVectorTruncation: + case CK_HLSLElementwiseCast: CheckNoBasePath: assert(path_empty() && "Cast kind should not have a base path!"); break; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 37019b5235f5610..192b679b4c99596 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -15047,6 +15047,7 @@ bool IntExprEvaluator::VisitCastExpr(const CastExpr *E) { case CK_NoOp: case CK_LValueToRValueBitCast: case CK_HLSLArrayRValue: + case CK_HLSLElementwiseCast: return ExprEvaluatorBaseTy::VisitCastExpr(E); case CK_MemberPointerToBoolean: @@ -15905,6 +15906,7 @@ bool ComplexExprEvaluator::VisitCastExpr(const CastExpr *E) { case CK_IntegralToFixedPoint: case CK_MatrixCast: case CK_HLSLVectorTruncation: + case CK_HLSLElementwiseCast: llvm_unreachable("invalid cast kind for complex value"); case CK_LValueToRValue: diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index bf8df2789f58dba..1e233c42c8782df 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -5338,6 +5338,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { case CK_MatrixCast: case CK_HLSLVectorTruncation: case CK_HLSLArrayRValue: + case CK_HLSLElementwiseCast: return EmitUnsupportedLValue(E, "unexpected cast lvalue"); case CK_Dependent: @@ -6376,3 +6377,75 @@ RValue CodeGenFunction::EmitPseudoObjectRValue(const PseudoObjectExpr *E, LValue CodeGenFunction::EmitPseudoObjectLValue(const PseudoObjectExpr *E) { return emitPseudoObjectExpr(*this, E, true, AggValueSlot::ignored()).LV; } + +void CodeGenFunction::FlattenAccessAndType( + Address Addr, QualType AddrType, + SmallVectorImpl<std::pair<Address, llvm::Value *>> &AccessList, + SmallVectorImpl<QualType> &FlatTypes) { + // WorkList is list of type we are processing + the Index List to access + // the field of that type in Addr for use in a GEP + llvm::SmallVector<std::pair<QualType, llvm::SmallVector<llvm::Value *, 4>>, + 16> + WorkList; + llvm::IntegerType *IdxTy = llvm::IntegerType::get(getLLVMContext(), 32); + // Addr should be a pointer so we need to 'dereference' it + WorkList.push_back({AddrType, {llvm::ConstantInt::get(IdxTy, 0)}}); + + while (!WorkList.empty()) { + auto [T, IdxList] = WorkList.pop_back_val(); + T = T.getCanonicalType().getUnqualifiedType(); + assert(!isa<MatrixType>(T) && "Matrix types not yet supported in HLSL"); + if (const auto *CAT = dyn_cast<ConstantArrayType>(T)) { + uint64_t Size = CAT->getZExtSize(); + for (int64_t I = Size - 1; I > -1; I--) { + llvm::SmallVector<llvm::Value *, 4> IdxListCopy = IdxList; + IdxListCopy.push_back(llvm::ConstantInt::get(IdxTy, I)); + WorkList.emplace_back(CAT->getElementType(), IdxListCopy); + } + } else if (const auto *RT = dyn_cast<RecordType>(T)) { + const RecordDecl *Record = RT->getDecl(); + assert(!Record->isUnion() && "Union types not supported in flat cast."); + + const CXXRecordDecl *CXXD = dyn_cast<CXXRecordDecl>(Record); + + llvm::SmallVector<QualType, 16> FieldTypes; + if (CXXD && CXXD->isStandardLayout()) + Record = CXXD->getStandardLayoutBaseWithFields(); + + // deal with potential base classes + if (CXXD && !CXXD->isStandardLayout()) { + for (auto &Base : CXXD->bases()) + FieldTypes.push_back(Base.getType()); + } + + for (auto *FD : Record->fields()) + FieldTypes.push_back(FD->getType()); + + for (int64_t I = FieldTypes.size() - 1; I > -1; I--) { + llvm::SmallVector<llvm::Value *, 4> IdxListCopy = IdxList; + IdxListCopy.push_back(llvm::ConstantInt::get(IdxTy, I)); + WorkList.insert(WorkList.end(), {FieldTypes[I], IdxListCopy}); + } + } else if (const auto *VT = dyn_cast<VectorType>(T)) { + llvm::Type *LLVMT = ConvertTypeForMem(T); + CharUnits Align = getContext().getTypeAlignInChars(T); + Address GEP = + Builder.CreateInBoundsGEP(Addr, IdxList, LLVMT, Align, "vector.gep"); + for (unsigned I = 0, E = VT->getNumElements(); I < E; I++) { + llvm::Value *Idx = llvm::ConstantInt::get(IdxTy, I); + // gep on vector fields is not recommended so combine gep with + // extract/insert + AccessList.emplace_back(GEP, Idx); + FlatTypes.push_back(VT->getElementType()); + } + } else { + // a scalar/builtin type + llvm::Type *LLVMT = ConvertTypeForMem(T); + CharUnits Align = getContext().getTypeAlignInChars(T); + Address GEP = + Builder.CreateInBoundsGEP(Addr, IdxList, LLVMT, Align, "gep"); + AccessList.emplace_back(GEP, nullptr); + FlatTypes.push_back(T); + } + } +} diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index 2ad6587089f1014..c3f1cbed6b39f95 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -491,6 +491,79 @@ static bool isTrivialFiller(Expr *E) { return false; } +// emit a flat cast where the RHS is a scalar, including vector +static void EmitHLSLScalarFlatCast(CodeGenFunction &CGF, Address DestVal, + QualType DestTy, llvm::Value *SrcVal, + QualType SrcTy, SourceLocation Loc) { + // Flatten our destination + SmallVector<QualType, 16> DestTypes; // Flattened type + SmallVector<std::pair<Address, llvm::Value *>, 16> StoreGEPList; + // ^^ Flattened accesses to DestVal we want to store into + CGF.FlattenAccessAndType(DestVal, DestTy, StoreGEPList, DestTypes); + + assert(SrcTy->isVectorType() && "HLSL Flat cast doesn't handle splatting."); + const VectorType *VT = SrcTy->getAs<VectorType>(); + SrcTy = VT->getElementType(); + assert(StoreGEPList.size() <= VT->getNumElements() && + "Cannot perform HLSL flat cast when vector source \ + object has less elements than flattened destination \ + object."); + for (unsigned I = 0, Size = StoreGEPList.size(); I < Size; I++) { + llvm::Value *Load = CGF.Builder.CreateExtractElement(SrcVal, I, "vec.load"); + llvm::Value *Cast = + CGF.EmitScalarConversion(Load, SrcTy, DestTypes[I], Loc); + + // store back + llvm::Value *Idx = StoreGEPList[I].second; + if (Idx) { + llvm::Value *V = + CGF.Builder.CreateLoad(StoreGEPList[I].first, "load.for.insert"); + Cast = CGF.Builder.CreateInsertElement(V, Cast, Idx); + } + CGF.Builder.CreateStore(Cast, StoreGEPList[I].first); + } + return; +} + +// emit a flat cast where the RHS is an aggregate +static void EmitHLSLElementwiseCast(CodeGenFunction &CGF, Address DestVal, + QualType DestTy, Address SrcVal, + QualType SrcTy, SourceLocation Loc) { + // Flatten our destination + SmallVector<QualType, 16> DestTypes; // Flattened type + SmallVector<std::pair<Address, llvm::Value *>, 16> StoreGEPList; + // ^^ Flattened accesses to DestVal we want to store into + CGF.FlattenAccessAndType(DestVal, DestTy, StoreGEPList, DestTypes); + // Flatten our src + SmallVector<QualType, 16> SrcTypes; // Flattened type + SmallVector<std::pair<Address, llvm::Value *>, 16> LoadGEPList; + // ^^ Flattened accesses to SrcVal we want to load from + CGF.FlattenAccessAndType(SrcVal, SrcTy, LoadGEPList, SrcTypes); + + assert(StoreGEPList.size() <= LoadGEPList.size() && + "Cannot perform HLSL flat cast when flattened source object \ + has less elements than flattened destination object."); + // apply casts to what we load from LoadGEPList + // and store result in Dest + for (unsigned I = 0, E = StoreGEPList.size(); I < E; I++) { + llvm::Value *Idx = LoadGEPList[I].second; + llvm::Value *Load = CGF.Builder.CreateLoad(LoadGEPList[I].first, "load"); + Load = + Idx ? CGF.Builder.CreateExtractElement(Load, Idx, "vec.extract") : Load; + llvm::Value *Cast = + CGF.EmitScalarConversion(Load, SrcTypes[I], DestTypes[I], Loc); + + // store back + Idx = StoreGEPList[I].second; + if (Idx) { + llvm::Value *V = + CGF.Builder.CreateLoad(StoreGEPList[I].first, "load.for.insert"); + Cast = CGF.Builder.CreateInsertElement(V, Cast, Idx); + } + CGF.Builder.CreateStore(Cast, StoreGEPList[I].first); + } +} + /// Emit initialization of an array from an initializer list. ExprToVisit must /// be either an InitListEpxr a CXXParenInitListExpr. void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType, @@ -890,7 +963,25 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { case CK_HLSLArrayRValue: Visit(E->getSubExpr()); break; - + case CK_HLSLElementwiseCast: { + Expr *Src = E->getSubExpr(); + QualType SrcTy = Src->getType(); + RValue RV = CGF.EmitAnyExpr(Src); + QualType DestTy = E->getType(); + Address DestVal = Dest.getAddress(); + SourceLocation Loc = E->getExprLoc(); + + if (RV.isScalar()) { + llvm::Value *SrcVal = RV.getScalarVal(); + EmitHLSLScalarFlatCast(CGF, DestVal, DestTy, SrcVal, SrcTy, Loc); + } else { + assert(RV.isAggregate() && + "Can't perform HLSL Aggregate cast on a complex type."); + Address SrcVal = RV.getAggregateAddress(); + EmitHLSLElementwiseCast(CGF, DestVal, DestTy, SrcVal, SrcTy, Loc); + } + break; + } case CK_NoOp: case CK_UserDefinedConversion: case CK_ConstructorConversion: @@ -1461,6 +1552,7 @@ static bool castPreservesZero(const CastExpr *CE) { case CK_NonAtomicToAtomic: case CK_AtomicToNonAtomic: case CK_HLSLVectorTruncation: + case CK_HLSLElementwiseCast: return true; case CK_BaseToDerivedMemberPointer: diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index ac31dff11b585ec..c2679ea92dc9728 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -610,6 +610,7 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastKind CK, Expr *Op, case CK_MatrixCast: case CK_HLSLVectorTruncation: case CK_HLSLArrayRValue: + case CK_HLSLElementwiseCast: llvm_unreachable("invalid cast kind for complex value"); case CK_FloatingRealToComplex: diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp index 655fc3dc954c819..ef11798869d3b13 100644 --- a/clang/lib/CodeGen/CGExprConstant.cpp +++ b/clang/lib/CodeGen/CGExprConstant.cpp @@ -1335,6 +1335,7 @@ class ConstExprEmitter case CK_MatrixCast: case CK_HLSLVectorTruncation: case CK_HLSLArrayRValue: + case CK_HLSLElementwiseCast: return nullptr; } llvm_unreachable("Invalid CastKind"); diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index df850421c72c6c7..80daed7e5395193 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -2269,6 +2269,42 @@ bool CodeGenFunction::ShouldNullCheckClassCastValue(const CastExpr *CE) { return true; } +// RHS is an aggregate type +static Value *EmitHLSLElementwiseCast(CodeGenFunction &CGF, Address RHSVal, + QualType RHSTy, QualType LHSTy, + SourceLocation Loc) { + SmallVector<std::pair<Address, llvm::Value *>, 16> LoadGEPList; + SmallVector<QualType, 16> SrcTypes; // Flattened type + CGF.FlattenAccessAndType(RHSVal, RHSTy, LoadGEPList, SrcTypes); + // LHS is either a vector or a builtin? + // if its a vector create a temp alloca to store into and return that + if (auto *VecTy = LHSTy->getAs<VectorType>()) { + assert(SrcTypes.size() >= VecTy->getNumElements() && + "Flattened type on RHS must have more elements than vector on LHS."); + llvm::Value *V = + CGF.Builder.CreateLoad(CGF.CreateIRTemp(LHSTy, "flatcast.tmp")); + // write to V. + for (unsigned I = 0, E = VecTy->getNumElements(); I < E; I++) { + llvm::Value *Load = CGF.Builder.CreateLoad(LoadGEPList[I].first, "load"); + llvm::Value *Idx = LoadGEPList[I].second; + Load = Idx ? CGF.Builder.CreateExtractElement(Load, Idx, "vec.extract") + : Load; + llvm::Value *Cast = CGF.EmitScalarConversion( + Load, SrcTypes[I], VecTy->getElementType(), Loc); + V = CGF.Builder.CreateInsertElement(V, Cast, I); + } + return V; + } + // i its a builtin just do an extract element or load. + assert(LHSTy->isBuiltinType() && + "Destination type must be a vector or builtin type."); + llvm::Value *Load = CGF.Builder.CreateLoad(LoadGEPList[0].first, "load"); + llvm::Value *Idx = LoadGEPList[0].second; + Load = + Idx ? CGF.Builder.CreateExtractElement(Load, Idx, "vec.extract") : Load; + return CGF.EmitScalarConversion(Load, LHSTy, SrcTypes[0], Loc); +} + // VisitCastExpr - Emit code for an explicit or implicit cast. Implicit casts // have to handle a more broad range of conversions than explicit casts, as they // handle things like function to ptr-to-function decay etc. @@ -2759,7 +2795,16 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { llvm::Value *Zero = llvm::Constant::getNullValue(CGF.SizeTy); return Builder.CreateExtractElement(Vec, Zero, "cast.vtrunc"); } + case CK_HLSLElementwiseCast: { + RValue RV = CGF.EmitAnyExpr(E); + SourceLocation Loc = CE->getExprLoc(); + QualType SrcTy = E->getType(); + assert(RV.isAggregate() && "Not a valid HLSL Flat Cast."); + // RHS is an aggregate + Address SrcVal = RV.getAggregateAddress(); + return EmitHLSLElementwiseCast(CGF, SrcVal, SrcTy, DestTy, Loc); + } } // end of switch llvm_unreachable("unknown scalar cast"); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index ced3484fbd2b6ce..e7a5100a9fa2946 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4439,6 +4439,11 @@ class CodeGenFunction : public CodeGenTypeCache { AggValueSlot slot = AggValueSlot::ignored()); LValue EmitPseudoObjectLValue(const PseudoObjectExpr *e); + void FlattenAccessAndType( + Address Addr, QualType AddrTy, + SmallVectorImpl<std::pair<Address, llvm::Value *>> &AccessList, + SmallVectorImpl<QualType> &FlatTypes); + llvm::Value *EmitIvarOffset(const ObjCInterfaceDecl *Interface, const ObjCIvarDecl *Ivar); llvm::Value *EmitIvarOffsetAsPointerDiff(const ObjCInterfaceDecl *Interface, diff --git a/clang/lib/Edit/RewriteObjCFoundationAPI.cpp b/clang/lib/Edit/RewriteObjCFoundationAPI.cpp index 81797c8c4dc75a2..32f5ebb55155ed1 100644 --- a/clang/lib/Edit/RewriteObjCFoundationAPI.cpp +++ b/clang/lib/Edit/RewriteObjCFoundationAPI.cpp @@ -1085,6 +1085,7 @@ static bool rewriteToNumericBoxedExpression(const ObjCMessageExpr *Msg, llvm_unreachable("OpenCL-specific cast in Objective-C?"); case CK_HLSLVectorTruncation: + case CK_HLSLElementwiseCast: llvm_unreachable("HLSL-specific cast in Objective-C?"); break; diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp index 54bc52fa2ac4052..23be71ad8e2aebc 100644 --- a/clang/lib/Sema/SemaCast.cpp +++ b/clang/lib/Sema/SemaCast.cpp @@ -23,6 +23,7 @@ #include "clang/Basic/TargetInfo.h" #include "clang/Lex/Preprocessor.h" #include "clang/Sema/Initialization.h" +#include "clang/Sema/SemaHLSL.h" #include "clang/Sema/SemaObjC.h" #include "clang/Sema/SemaRISCV.h" #include "llvm/ADT/SmallVector.h" @@ -2772,6 +2773,22 @@ void CastOperation::CheckCXXCStyleCast(bool FunctionalStyle, return; } + CheckedConversionKind CCK = FunctionalStyle + ? CheckedConversionKind::FunctionalCast + : CheckedConversionKind::CStyleCast; + // This case should not trigger on regular vector splat + // vector cast, vector truncation, or special hlsl splat cases + QualType SrcTy = SrcExpr.get()->getType(); + if (Self.getLangOpts().HLSL && + Self.HLSL().CanPerformElementwiseCast(SrcExpr.get(), DestType)) { + if (SrcTy->isConstantArrayType()) + SrcExpr = Self.ImpCastExprToType( + SrcExpr.get(), Self.Context.getArrayParameterType(SrcTy), + CK_HLSLArrayRValue, VK_PRValue, nullptr, CCK); + Kind = CK_HLSLElementwiseCast; + return; + } + if (ValueKind == VK_PRValue && !DestType->isRecordType() && !isPlaceholder(BuiltinType::Overload)) { SrcExpr = Self.DefaultFunctionArrayLvalueConversion(SrcExpr.get()); @@ -2824,9 +2841,6 @@ void CastOperation::CheckCXXCStyleCast(bool FunctionalStyle, if (isValidCast(tcr)) Kind = CK_NoOp; - CheckedConversionKind CCK = FunctionalStyle - ? CheckedConversionKind::FunctionalCast - : CheckedConversionKind::CStyleCast; if (tcr == TC_NotApplicable) { tcr = TryAddressSpaceCast(Self, SrcExpr, DestType, /*CStyle*/ true, msg, Kind); diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 2cb389eefaac28d..ec6b5b45de42bfa 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -2706,6 +2706,150 @@ bool SemaHLSL::CheckCompatibleParameterABI(FunctionDecl *New, return HadError; } +// Generally follows PerformScalarCast, with cases reordered for +// clarity of what types are supported +bool SemaHLSL::CanPerformScalarCast(QualType SrcTy, QualType DestTy) { + + if (SemaRef.getASTContext().hasSameUnqualifiedType(SrcTy, DestTy)) + return true; + + switch (SrcTy->getScalarTypeKind()) { + case Type::STK_Bool: // casting from bool is like casting from an integer + case Type::STK_Integral: + switch (DestTy->getScalarTypeKind()) { + case Type::STK_Bool: + case Type::STK_Integral: + case Type::STK_Floating: + return true; + case Type::STK_CPointer: + case Type::STK_ObjCObjectPointer: + case Type::STK_BlockPointer: + case Type::STK_MemberPointer: + llvm_unreachable("HLSL doesn't support pointers."); + case Type::STK_IntegralComplex: + case Type::STK_FloatingComplex: + llvm_unreachable("HLSL doesn't support complex types."); + case Type::STK_FixedPoint: + llvm_unreachable("HLSL doesn't support fixed point types."); + } + llvm_unreachable("Should have returned before this"); + + case Type::STK_Floating: + switch (DestTy->getScalarTypeKind()) { + case Type::STK_Floating: + case Type::STK_Bool: + case Type::STK_Integral: + return true; + case Type::STK_FloatingComplex: + case Type::STK_IntegralComplex: + llvm_unreachable("HLSL doesn't support complex types."); + case Type::STK_FixedPoint: + llvm_unreachable("HLSL doesn't support fixed point types."); + case Type::STK_CPointer: + case Type::STK_ObjCObjectPointer: + case Type::STK_BlockPointer: + case Type::STK_MemberPointer: + llvm_unreachable("HLSL doesn't support pointers."); + } + llvm_unreachable("Should have returned before this"); + + case Type::STK_MemberPointer: + case Type::STK_CPointer: + case Type::STK_BlockPointer: + case Type::STK_ObjCObjectPointer: + llvm_unreachable("HLSL doesn't support pointers."); + + case Type::STK_FixedPoint: + llvm_unreachable("HLSL doesn't support fixed point types."); + + case Type::STK_FloatingComplex: + case Type::STK_IntegralComplex: + llvm_unreachable("HLSL doesn't support complex types."); + } + + llvm_unreachable("Unhandled scalar cast"); +} + +// Detect if a type contains a bitfield. Will be removed when +// bitfield support is added to HLSLElementwiseCast +bool SemaHLSL::ContainsBitField(QualType BaseTy) { + llvm::SmallVector<QualType, 16> WorkList; + WorkList.push_back(BaseTy); + while (!WorkList.empty()) { + QualType T = WorkList.pop_back_val(); + T = T.getCanonicalType().getUnqualifiedType(); + // only check aggregate types + if (const auto *AT = dyn_cast<ConstantArrayType>(T)) { + WorkList.push_back(AT->getElementType()); + continue; + } + if (const auto *RT = dyn_cast<RecordType>(T)) { + const RecordDecl *RD = RT->getDecl(); + if (RD->isUnion()) + continue; + + const CXXRecordDecl *CXXD = dyn_cast<CXXRecordDecl>(RD); + + if (CXXD && CXXD->isStandardLayout()) + RD = CXXD->getStandardLayoutBaseWithFields(); + + for (const auto *FD : RD->fields()) { + if (FD->isBitField()) + return true; + WorkList.push_back(FD->getType()); + } + continue; + } + } + return false; +} + +// Can we perform an HLSL Elementwise cast? +// TODO: update this code when matrices are added; see issue #88060 +bool SemaHLSL::CanPerformElementwiseCast(Expr *Src, QualType DestTy) { + + // Don't handle casts where LHS and RHS are any combination of scalar/vector + // There must be an aggregate somewhere + QualType SrcTy = Src->getType(); + if (SrcTy->isScalarType()) // always a splat and this cast doesn't handle that + return false; + + if (SrcTy->isVectorType() && + (DestTy->isScalarType() || DestTy->isVectorType())) + return false; + + if (ContainsBitField(DestTy) || ContainsBitField(SrcTy)) + return false; + + llvm::SmallVector<QualType> DestTypes; + BuildFlattenedTypeList(DestTy, DestTypes); + llvm::SmallVector<QualType> SrcTypes; + BuildFlattenedTypeList(SrcTy, SrcTypes); + + // Usually the size of SrcTypes must be greater than or equal to the size of + // DestTypes. + if (SrcTypes.size() < DestTypes.size()) + return false; + + unsigned SrcSize = SrcTypes.size(); + unsigned DstSize = DestTypes.size(); + unsigned I; + for (I = 0; I < DstSize && I < SrcSize; I++) { + if (SrcTypes[I]->isUnionType() || DestTypes[I]->isUnionType()) + return false; + if (!CanPerformScalarCast(SrcTypes[I], DestTypes[I])) { + return false; + } + } + + // check the rest of the source type for unions. + for (; I < SrcSize; I++) { + if (SrcTypes[I]->isUnionType()) + return false; + } + return true; +} + ExprResult SemaHLSL::ActOnOutParamExpr(ParmVarDecl *Param, Expr *Arg) { assert(Param->hasAttr<HLSLParamModifierAttr>() && "We should not get here without a parameter modifier expression"); diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp index 7a900780384a91d..3a983421358c7f4 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp @@ -522,6 +522,7 @@ void ExprEngine::VisitCast(const CastExpr *CastE, const Expr *Ex, case CK_ToUnion: case CK_MatrixCast: case CK_VectorSplat: + case CK_HLSLElementwiseCast: case CK_HLSLVectorTruncation: { QualType resultType = CastE->getType(); if (CastE->isGLValue()) diff --git a/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl new file mode 100644 index 000000000000000..18f82bff3b3086e --- /dev/null +++ b/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl @@ -0,0 +1,144 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -disable-llvm-passes -emit-llvm -finclude-default-header -o - %s | FileCheck %s + +// array truncation to a scalar +// CHECK-LABEL: define void {{.*}}call0 +// CHECK: [[A:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: [[B:%.*]] = alloca float, align 4 +// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false) +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0, i32 1 +// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G1]], align 4 +// CHECK-NEXT: store i32 [[L]], ptr [[B]], align 4 +export void call0() { + int A[2] = {0,1}; + float B = (float)A; +} + +// array truncation +// CHECK-LABEL: define void {{.*}}call1 +// CHECK: [[A:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: [[B:%.*]] = alloca [1 x i32], align 4 +// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 {{.*}}, i32 4, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false) +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x i32], ptr [[B]], i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0, i32 0 +// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0, i32 1 +// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G2]], align 4 +// CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4 +export void call1() { + int A[2] = {0,1}; + int B[1] = {4}; + B = (int[1])A; +} + +// just a cast +// CHECK-LABEL: define void {{.*}}call2 +// CHECK: [[A:%.*]] = alloca [1 x i32], align 4 +// CHECK-NEXT: [[B:%.*]] = alloca [1 x float], align 4 +// CHECK-NEXT: [[Tmp:%.*]] = alloca [1 x i32], align 4 +// CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 4 [[A]], i8 0, i32 4, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 {{.*}}, i32 4, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 4, i1 false) +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x float], ptr [[B]], i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [1 x i32], ptr [[Tmp]], i32 0, i32 0 +// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G2]], align 4 +// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L]] to float +// CHECK-NEXT: store float [[C]], ptr [[G1]], align 4 +export void call2() { + int A[1] = {0}; + float B[1] = {1.0}; + B = (float[1])A; +} + +// vector to array +// CHECK-LABEL: define void {{.*}}call3 +// CHECK: [[A:%.*]] = alloca <1 x float>, align 4 +// CHECK-NEXT: [[B:%.*]] = alloca [1 x i32], align 4 +// CHECK-NEXT: store <1 x float> splat (float 0x3FF3333340000000), ptr [[A]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 {{.*}}, i32 4, i1 false) +// CHECK-NEXT: [[C:%.*]] = load <1 x float>, ptr [[A]], align 4 +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x i32], ptr [[B]], i32 0, i32 0 +// CHECK-NEXT: [[V:%.*]] = extractelement <1 x float> [[C]], i64 0 +// CHECK-NEXT: [[C:%.*]] = fptosi float [[V]] to i32 +// CHECK-NEXT: store i32 [[C]], ptr [[G1]], align 4 +export void call3() { + float1 A = {1.2}; + int B[1] = {1}; + B = (int[1])A; +} + +// flatten array of vector to array with cast +// CHECK-LABEL: define void {{.*}}call5 +// CHECK: [[A:%.*]] = alloca [1 x <2 x float>], align 8 +// CHECK-NEXT: [[B:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: [[Tmp:%.*]] = alloca [1 x <2 x float>], align 8 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[A]], ptr align 8 {{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 {{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[Tmp]], ptr align 8 [[A]], i32 8, i1 false) +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i32 0, i32 1 +// CHECK-NEXT: [[VG:%.*]] = getelementptr inbounds [1 x <2 x float>], ptr [[Tmp]], i32 0, i32 0 +// CHECK-NEXT: [[L:%.*]] = load <2 x float>, ptr [[VG]], align 8 +// CHECK-NEXT: [[VL:%.*]] = extractelement <2 x float> [[L]], i32 0 +// CHECK-NEXT: [[C:%.*]] = fptosi float [[VL]] to i32 +// CHECK-NEXT: store i32 [[C]], ptr [[G1]], align 4 +// CHECK-NEXT: [[L4:%.*]] = load <2 x float>, ptr [[VG]], align 8 +// CHECK-NEXT: [[VL5:%.*]] = extractelement <2 x float> [[L4]], i32 1 +// CHECK-NEXT: [[C6:%.*]] = fptosi float [[VL5]] to i32 +// CHECK-NEXT: store i32 [[C6]], ptr [[G2]], align 4 +export void call5() { + float2 A[1] = {{1.2,3.4}}; + int B[2] = {1,2}; + B = (int[2])A; +} + +// flatten 2d array +// CHECK-LABEL: define void {{.*}}call6 +// CHECK: [[A:%.*]] = alloca [2 x [1 x i32]], align 4 +// CHECK-NEXT: [[B:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x [1 x i32]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 {{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false) +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i32 0, i32 1 +// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr [[Tmp]], i32 0, i32 0, i32 0 +// CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr [[Tmp]], i32 0, i32 1, i32 0 +// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G3]], align 4 +// CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4 +// CHECK-NEXT: [[L4:%.*]] = load i32, ptr [[G4]], align 4 +// CHECK-NEXT: store i32 [[L4]], ptr [[G2]], align 4 +export void call6() { + int A[2][1] = {{1},{3}}; + int B[2] = {1,2}; + B = (int[2])A; +} + +struct S { + int X; + float Y; +}; + +// flatten and truncate from a struct +// CHECK-LABEL: define void {{.*}}call7 +// CHECK: [[s:%.*]] = alloca %struct.S, align 4 +// CHECK-NEXT: [[A:%.*]] = alloca [1 x i32], align 4 +// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.S, align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[s]], ptr align 4 {{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 4, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[s]], i32 8, i1 false) +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x i32], ptr [[A]], i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 0, i32 0 +// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 0, i32 1 +// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G2]], align 4 +// CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4 +export void call7() { + S s = {1, 2.9}; + int A[1] = {1}; + A = (int[1])s; +} + diff --git a/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl new file mode 100644 index 000000000000000..26fde37c901dd0f --- /dev/null +++ b/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl @@ -0,0 +1,140 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s + +struct S { + int X; + float Y; +}; + +// struct truncation to a scalar +// CHECK-LABEL: define void {{.*}}call0 +// CHECK: [[s:%.*]] = alloca %struct.S, align 4 +// CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.S, align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[s]], ptr align 4 {{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[s]], i32 8, i1 false) +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 0, i32 1 +// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G1]], align 4 +// CHECK-NEXT: store i32 [[L]], ptr [[A]], align 4 +export void call0() { + S s = {1,2}; + int A = (int)s; +} + +// struct from vector +// CHECK-LABEL: define void {{.*}}call1 +// CHECK: [[A:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: [[s:%.*]] = alloca %struct.S, align 4 +// CHECK-NEXT: store <2 x i32> <i32 1, i32 2>, ptr [[A]], align 8 +// CHECK-NEXT: [[L:%.*]] = load <2 x i32>, ptr [[A]], align 8 +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1 +// CHECK-NEXT: [[VL:%.*]] = extractelement <2 x i32> [[L]], i64 0 +// CHECK-NEXT: store i32 [[VL]], ptr [[G1]], align 4 +// CHECK-NEXT: [[VL2:%.*]] = extractelement <2 x i32> [[L]], i64 1 +// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[VL2]] to float +// CHECK-NEXT: store float [[C]], ptr [[G2]], align 4 +export void call1() { + int2 A = {1,2}; + S s = (S)A; +} + + +// struct from array +// CHECK-LABEL: define void {{.*}}call2 +// CHECK: [[A:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: [[s:%.*]] = alloca %struct.S, align 4 +// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false) +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1 +// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0, i32 0 +// CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0, i32 1 +// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G3]], align 4 +// CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4 +// CHECK-NEXT: [[L4:%.*]] = load i32, ptr [[G4]], align 4 +// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L4]] to float +// CHECK-NEXT: store float [[C]], ptr [[G2]], align 4 +export void call2() { + int A[2] = {1,2}; + S s = (S)A; +} + +struct Q { + int Z; +}; + +struct R { + Q q; + float F; +}; + +// struct from nested struct? +// CHECK-LABEL: define void {{.*}}call6 +// CHECK: [[r:%.*]] = alloca %struct.R, align 4 +// CHECK-NEXT: [[s:%.*]] = alloca %struct.S, align 4 +// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.R, align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[r]], ptr align 4 {{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[r]], i32 8, i1 false) +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1 +// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds %struct.R, ptr [[Tmp]], i32 0, i32 0 +// CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds %struct.R, ptr [[Tmp]], i32 0, i32 1 +// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G3]], align 4 +// CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4 +// CHECK-NEXT: [[L4:%.*]] = load float, ptr [[G4]], align 4 +// CHECK-NEXT: store float [[L4]], ptr [[G2]], align 4 +export void call6() { + R r = {{1}, 2.0}; + S s = (S)r; +} + +// nested struct from array? +// CHECK-LABEL: define void {{.*}}call7 +// CHECK: [[A:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: [[r:%.*]] = alloca %struct.R, align 4 +// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false) +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.R, ptr [[r]], i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.R, ptr [[r]], i32 0, i32 1 +// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0, i32 0 +// CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0, i32 1 +// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G3]], align 4 +// CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4 +// CHECK-NEXT: [[L4:%.*]] = load i32, ptr [[G4]], align 4 +// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L4]] to float +// CHECK-NEXT: store float [[C]], ptr [[G2]], align 4 +export void call7() { + int A[2] = {1,2}; + R r = (R)A; +} + +struct T { + int A; + int B; + int C; +}; + +// struct truncation +// CHECK-LABEL: define void {{.*}}call8 +// CHECK: [[t:%.*]] = alloca %struct.T, align 4 +// CHECK-NEXT: [[s:%.*]] = alloca %struct.S, align 4 +// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.T, align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[t]], ptr align 4 {{.*}}, i32 12, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[t]], i32 12, i1 false) +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1 +// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds %struct.T, ptr [[Tmp]], i32 0, i32 0 +// CHECK-NEXT: %gep3 = getelementptr inbounds %struct.T, ptr %agg-temp, i32 0, i32 1 +// CHECK-NEXT: %gep4 = getelementptr inbounds %struct.T, ptr %agg-temp, i32 0, i32 2 +// CHECK-NEXT: %load = load i32, ptr %gep2, align 4 +// CHECK-NEXT: store i32 %load, ptr %gep, align 4 +// CHECK-NEXT: %load5 = load i32, ptr %gep3, align 4 +// CHECK-NEXT: %conv = sitofp i32 %load5 to float +// CHECK-NEXT: store float %conv, ptr %gep1, align 4 +export void call8() { + T t = {1,2,3}; + S s = (S)t; +} diff --git a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl new file mode 100644 index 000000000000000..f579dfb377de59d --- /dev/null +++ b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl @@ -0,0 +1,81 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s + +// vector flat cast from array +// CHECK-LABEL: define void {{.*}}call2 +// CHECK: [[A:%.*]] = alloca [2 x [1 x i32]], align 4 +// CHECK-NEXT: [[B:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x [1 x i32]], align 4 +// CHECK-NEXT: [[Tmp2:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false) +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr [[Tmp]], i32 0, i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr [[Tmp]], i32 0, i32 1, i32 0 +// CHECK-NEXT: [[C:%.*]] = load <2 x i32>, ptr [[Tmp2]], align 8 +// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G1]], align 4 +// CHECK-NEXT: [[D:%.*]] = insertelement <2 x i32> [[C]], i32 [[L]], i64 0 +// CHECK-NEXT: [[L2:%.*]] = load i32, ptr [[G2]], align 4 +// CHECK-NEXT: [[E:%.*]] = insertelement <2 x i32> [[D]], i32 [[L2]], i64 1 +// CHECK-NEXT: store <2 x i32> [[E]], ptr [[B]], align 8 +export void call2() { + int A[2][1] = {{1},{2}}; + int2 B = (int2)A; +} + +struct S { + int X; + float Y; +}; + +// vector flat cast from struct +// CHECK-LABEL: define void {{.*}}call3 +// CHECK: [[s:%.*]] = alloca %struct.S, align 4 +// CHECK-NEXT: [[A:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.S, align 4 +// CHECK-NEXT: [[Tmp2:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[s]], ptr align 4 {{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[s]], i32 8, i1 false) +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 0, i32 1 +// CHECK-NEXT: [[B:%.*]] = load <2 x i32>, ptr [[Tmp2]], align 8 +// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G1]], align 4 +// CHECK-NEXT: [[C:%.*]] = insertelement <2 x i32> [[B]], i32 [[L]], i64 0 +// CHECK-NEXT: [[L2:%.*]] = load float, ptr [[G2]], align 4 +// CHECK-NEXT: [[D:%.*]] = fptosi float [[L2]] to i32 +// CHECK-NEXT: [[E:%.*]] = insertelement <2 x i32> [[C]], i32 [[D]], i64 1 +// CHECK-NEXT: store <2 x i32> [[E]], ptr [[A]], align 8 +export void call3() { + S s = {1, 2.0}; + int2 A = (int2)s; +} + +// truncate array to scalar +// CHECK-LABEL: define void {{.*}}call4 +// CHECK: [[A:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false) +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0, i32 1 +// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G1]], align 4 +// CHECK-NEXT: store i32 [[L]], ptr [[B]], align 4 +export void call4() { + int A[2] = {1,2}; + int B = (int)A; +} + +// truncate struct to scalar +// CHECK-LABEL: define void {{.*}}call5 +// CHECK: [[s:%.*]] = alloca %struct.S, align 4 +// CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.S, align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[s]], ptr align 4 {{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[s]], i32 8, i1 false) +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 0, i32 1 +// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G1]], align 4 +// CHECK-NEXT: store i32 [[L]], ptr [[A]], align 4 +export void call5() { + S s = {1, 2.0}; + int A = (int)s; +} diff --git a/clang/test/SemaHLSL/BuiltIns/vector-constructors-erros.hlsl b/clang/test/SemaHLSL/BuiltIns/vector-constructors-erros.hlsl index 7f6bdc7e67836b7..b004acdc7c502ce 100644 --- a/clang/test/SemaHLSL/BuiltIns/vector-constructors-erros.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/vector-constructors-erros.hlsl @@ -17,6 +17,4 @@ void entry() { // These _should_ work in HLSL but aren't yet supported. S s; float2 GettingStrange = float2(s, s); // expected-error{{no viable conversion from 'S' to 'float'}} expected-error{{no viable conversion from 'S' to 'float'}} - S2 s2; - float2 EvenStranger = float2(s2); // expected-error{{cannot convert 'S2' to 'float2' (vector of 2 'float' values) without a conversion operator}} } diff --git a/clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl b/clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl new file mode 100644 index 000000000000000..c900c83a063a06b --- /dev/null +++ b/clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -verify + +export void cantCast() { + int A[3] = {1,2,3}; + int B[4] = {1,2,3,4}; + B = (int[4])A; + // expected-error@-1 {{C-style cast from 'int *' to 'int[4]' is not allowed}} +} + +struct S { +// expected-note@-1 {{candidate constructor (the implicit copy constructor) not viable: no known conversion from 'int2' (aka 'vector<int, 2>') to 'const S' for 1st argument}} +// expected-note@-2 {{candidate constructor (the implicit move constructor) not viable: no known conversion from 'int2' (aka 'vector<int, 2>') to 'S' for 1st argument}} +// expected-note@-3 {{candidate constructor (the implicit default constructor) not viable: requires 0 arguments, but 1 was provided}} + int A : 8; + int B; +}; + +// casting types which contain bitfields is not yet supported. +export void cantCast2() { + S s = {1,2}; + int2 C = (int2)s; + // expected-error@-1 {{cannot convert 'S' to 'int2' (aka 'vector<int, 2>') without a conversion operator}} +} + +export void cantCast3() { + int2 C = {1,2}; + S s = (S)C; + // expected-error@-1 {{no matching conversion for C-style cast from 'int2' (aka 'vector<int, 2>') to 'S'}} +} diff --git a/clang/test/SemaHLSL/Language/ElementwiseCasts.hlsl b/clang/test/SemaHLSL/Language/ElementwiseCasts.hlsl new file mode 100644 index 000000000000000..563d3f02a1485a2 --- /dev/null +++ b/clang/test/SemaHLSL/Language/ElementwiseCasts.hlsl @@ -0,0 +1,23 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -finclude-default-header -fnative-half-type %s -ast-dump | FileCheck %s + +// truncation +// CHECK-LABEL: call1 +// CHECK: CStyleCastExpr {{.*}} 'int[1]' <HLSLElementwiseCast> +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int[2]' <HLSLArrayRValue> part_of_explicit_cast +// CHECK-NEXT: DeclRefExpr {{.*}} 'int[2]' lvalue Var {{.*}} 'A' 'int[2]' +export void call1() { + int A[2] = {0,1}; + int B[1] = {4}; + B = (int[1])A; +} + +// flat cast of equal size +// CHECK-LABEL: call2 +// CHECK: CStyleCastExpr {{.*}} 'float[1]' <HLSLElementwiseCast> +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int[1]' <HLSLArrayRValue> part_of_explicit_cast +// CHECK-NEXT: DeclRefExpr {{.*}} 'int[1]' lvalue Var {{.*}} 'A' 'int[1]' +export void call2() { + int A[1] = {0}; + float B[1] = {1.0}; + B = (float[1])A; +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits