llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang <details> <summary>Changes</summary> This PR is based on #<!-- -->67018. This PR fixes compilation issue for RVV tuple types as InputOperands for inline asm. --- Currently the compiler generates https://godbolt.org/z/djebPfqxf for tuple type as inline asm inputs and cannot be code generated successfully https://godbolt.org/z/na7T19Krc. This PR fixes Clang by generating https://godbolt.org/z/MsovoxbY9 instead, which can be successfully handled by the back-end. A follow-up PR will handle interactions of RVV tuple type InputOperands and OutputOperands correctly. --- Full diff: https://github.com/llvm/llvm-project/pull/67109.diff 2 Files Affected: - (modified) clang/lib/CodeGen/CGStmt.cpp (+101-6) - (added) clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c (+54) ``````````diff diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 6674aa2409a5947..4a2bdde56c5704e 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -19,6 +19,7 @@ #include "clang/AST/Expr.h" #include "clang/AST/Stmt.h" #include "clang/AST/StmtVisitor.h" +#include "clang/AST/Type.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/DiagnosticSema.h" #include "clang/Basic/PrettyStackTrace.h" @@ -29,10 +30,13 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/Assumptions.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SaveAndRestore.h" #include <optional> @@ -2392,6 +2396,26 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S, Tmp = Builder.CreateZExtOrTrunc(Tmp, TruncTy); } else if (TruncTy->isVectorTy()) { Tmp = Builder.CreateBitCast(Tmp, TruncTy); + } else if (TruncTy->isStructTy() && ResultRegQualTys[i]->isRVVType()) { + auto *STy = cast<llvm::StructType>(TruncTy); + auto *VTy = cast<llvm::ScalableVectorType>(STy->getElementType(0)); + + assert(STy->containsHomogeneousScalableVectorTypes() && + "Must be dealing with RVV tuple type"); + + unsigned MinElts = VTy->getElementCount().getKnownMinValue(); + llvm::Value *StructValue = llvm::PoisonValue::get(STy); + + for (unsigned Idx = 0, TupleSize = STy->getNumElements(); + Idx != TupleSize; ++Idx) { + llvm::Value *IdxValue = + llvm::ConstantInt::get(CGM.Int64Ty, Idx * MinElts); + llvm::Value *SubVec = Builder.CreateExtractVector(VTy, Tmp, IdxValue); + + StructValue = Builder.CreateInsertValue(StructValue, SubVec, Idx); + } + + Tmp = StructValue; } } @@ -2399,7 +2423,13 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S, // ResultTypeRequiresCast elements correspond to the first // ResultTypeRequiresCast.size() elements of RegResults. if ((i < ResultTypeRequiresCast.size()) && ResultTypeRequiresCast[i]) { - unsigned Size = CGF.getContext().getTypeSize(ResultRegQualTys[i]); + unsigned Size; + if (ResultRegQualTys[i]->isRVVType() && TruncTy->isStructTy()) { + Size = cast<llvm::ScalableVectorType>( + cast<llvm::StructType>(TruncTy)->getElementType(0)) + ->getScalarSizeInBits(); + } else + Size = CGF.getContext().getTypeSize(ResultRegQualTys[i]); Address A = Dest.getAddress(CGF).withElementType(ResultRegTypes[i]); if (CGF.getTargetHooks().isScalarizableAsmOperand(CGF, TruncTy)) { Builder.CreateStore(Tmp, A); @@ -2524,11 +2554,32 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { ResultRegIsFlagReg.push_back(IsFlagReg); llvm::Type *Ty = ConvertTypeForMem(QTy); + ResultTruncRegTypes.push_back(Ty); + + // Expressing the type as a structure in inline asm calls will complicate + // the current code case, so instead, the return type is set to be a + // single scalable vector, then reconstructed with `vector.extract` and + // `insertvalue`. The type is derived here, and the reconstruction is done + // under EmitAsmStores. + if (QTy->isRVVType() && isa<llvm::StructType>(Ty)) { + // Flatten the structure into a single ScalableVectorType + auto *STy = cast<llvm::StructType>(Ty); + assert(STy->containsHomogeneousScalableVectorTypes() && + isa<llvm::ScalableVectorType>(STy->getElementType(0)) && + "Dealing with RVV tuple (aggregate with homogeneous scalable " + "vectors"); + + auto *VecTy = cast<llvm::ScalableVectorType>(STy->getElementType(0)); + + Ty = llvm::ScalableVectorType::get(VecTy->getScalarType(), + STy->getNumElements() * + VecTy->getMinNumElements()); + } + const bool RequiresCast = Info.allowsRegister() && (getTargetHooks().isScalarizableAsmOperand(*this, Ty) || Ty->isAggregateType()); - ResultTruncRegTypes.push_back(Ty); ResultTypeRequiresCast.push_back(RequiresCast); if (RequiresCast) { @@ -2551,6 +2602,13 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { QualType InputTy = S.getInputExpr(InputNo)->getType(); QualType OutputType = OutExpr->getType(); + if ((InputTy->isRVVType() && + isa<llvm::StructType>(ConvertType(InputTy))) || + (OutputType->isRVVType() && + isa<llvm::StructType>(ConvertType(OutputType)))) { + llvm_unreachable("FIXME: Deal with RVV type matching."); + } + uint64_t InputSize = getContext().getTypeSize(InputTy); if (getContext().getTypeSize(OutputType) < InputSize) { // Form the asm to return the value as a larger integer or fp type. @@ -2671,6 +2729,13 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { QualType OutputType = S.getOutputExpr(Output)->getType(); QualType InputTy = InputExpr->getType(); + if ((InputTy->isRVVType() && + isa<llvm::StructType>(ConvertType(InputTy))) || + (OutputType->isRVVType() && + isa<llvm::StructType>(ConvertType(OutputType)))) { + llvm_unreachable("FIXME: Deal with RVV type matching."); + } + if (getContext().getTypeSize(OutputType) > getContext().getTypeSize(InputTy)) { // Use ptrtoint as appropriate so that we can do our extension. @@ -2701,10 +2766,40 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { std::max((uint64_t)LargestVectorWidth, VT->getPrimitiveSizeInBits().getKnownMinValue()); - ArgTypes.push_back(Arg->getType()); - ArgElemTypes.push_back(ArgElemType); - Args.push_back(Arg); - Constraints += InputConstraint; + // Expand RVV tuple type input operands. + if (InputExpr->getType()->isRVVType() && Arg->getType()->isStructTy()) { + std::string ExpandedInputContraint; + + auto *STy = cast<llvm::StructType>(Arg->getType()); + + assert(STy->containsHomogeneousScalableVectorTypes() && + isa<llvm::ScalableVectorType>(STy->getElementType(0)) && + "Only aggregate type of homogeneous scalable vectors is handled " + "here"); + + auto *VTy = cast<llvm::ScalableVectorType>(STy->getElementType(0)); + + for (unsigned Idx = 0, TupleSize = STy->getNumElements(); + Idx != TupleSize; ++Idx) { + if (ExpandedInputContraint.size()) + ExpandedInputContraint += ","; + + ExpandedInputContraint += InputConstraint; + ArgTypes.push_back(VTy); + ArgElemTypes.push_back(ArgElemType); + + llvm::Value *SubVec = Builder.CreateExtractValue(Arg, {Idx}); + + Args.push_back(SubVec); + } + + Constraints += ExpandedInputContraint; + } else { + ArgTypes.push_back(Arg->getType()); + ArgElemTypes.push_back(ArgElemType); + Args.push_back(Arg); + Constraints += InputConstraint; + } } // Append the "input" part of inout constraints. diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c new file mode 100644 index 000000000000000..24f403c6625d0aa --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c @@ -0,0 +1,54 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3 +#include <riscv_vector.h> + +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve32x -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck %s + +// CHECK-LABEL: define dso_local void @foo( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> asm "#NOP", "=^vr"() #[[ATTR2:[0-9]+]], !srcloc !4 +// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[TMP0]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[TMP0]], i64 2) +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP2]], <vscale x 2 x i32> [[TMP3]], 1 +// CHECK-NEXT: ret void +// +void foo() { + vint32m1x2_t v0; + asm ("#NOP" : "=vr" (v0)); +} + +// CHECK-LABEL: define dso_local void @bar( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } asm "#NOP", "=^vr,=^vr"() #[[ATTR2]], !srcloc !5 +// CHECK-NEXT: [[ASMRESULT:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 0 +// CHECK-NEXT: [[ASMRESULT1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 1 +// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[ASMRESULT]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[ASMRESULT]], i64 2) +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP2]], <vscale x 2 x i32> [[TMP3]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[ASMRESULT1]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[TMP5]], 0 +// CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[ASMRESULT1]], i64 2) +// CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP6]], <vscale x 2 x i32> [[TMP7]], 1 +// CHECK-NEXT: ret void +// +void bar() { + vint32m1x2_t v0, v2; + asm ("#NOP" : "=vr" (v0), "=vr" (v2)); +} + +// CHECK-LABEL: define dso_local void @baz( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } undef, 0 +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } undef, 1 +// CHECK-NEXT: call void asm sideeffect "#NOP", "^vr,^vr"(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i32> [[TMP1]]) #[[ATTR3:[0-9]+]], !srcloc !6 +// CHECK-NEXT: ret void +// +void baz() { + vint32m1x2_t v2; + asm ("#NOP" :: "vr" (v2)); +} `````````` </details> https://github.com/llvm/llvm-project/pull/67109 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits