eopXD created this revision.
eopXD added reviewers: craig.topper, kito-cheng, rogfer01, wangpc.
Herald added subscribers: jobnoorman, luke, VincentWu, vkmr, frasercrmck,
luismarques, apazos, sameer.abuasal, s.egerton, Jim, benna, psnobl, jocewei,
PkmX, the_o, brucehoult, MartinMosbeck, edward-jones, zzheng, jrtc27,
shiva0217, niosHD, sabuasal, simoncook, johnrusso, rbar, asb, arichardson.
Herald added a project: All.
eopXD requested review of this revision.
Herald added subscribers: llvm-commits, cfe-commits, MaskRay.
Herald added projects: clang, LLVM.
This is an oversight in D146872 <https://reviews.llvm.org/D146872>, where
function calls with tuple type
was not covered. This commit fixes this.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D157953
Files:
clang/lib/CodeGen/CGCall.cpp
clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-tuple-type.c
llvm/test/Transforms/SROA/scalable-vector-struct.ll
Index: llvm/test/Transforms/SROA/scalable-vector-struct.ll
===================================================================
--- llvm/test/Transforms/SROA/scalable-vector-struct.ll
+++ llvm/test/Transforms/SROA/scalable-vector-struct.ll
@@ -20,3 +20,34 @@
%val = load %struct.test, %struct.test* %addr, align 4
ret %struct.test %val
}
+
+
+define { <vscale x 2 x i32>, <vscale x 2 x i32> } @return_tuple(<vscale x 2 x i32> %v_tuple.coerce0, <vscale x 2 x i32> %v_tuple.coerce1) {
+; CHECK-LABEL: @return_tuple(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[V_TUPLE_COERCE0:%.*]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP0]], <vscale x 2 x i32> [[V_TUPLE_COERCE1:%.*]], 1
+; CHECK-NEXT: [[COERCE_EXTRACT0:%.*]] = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP1]], 0
+; CHECK-NEXT: [[COERCE_EXTRACT1:%.*]] = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP1]], 1
+; CHECK-NEXT: [[CALL:%.*]] = call { <vscale x 2 x i32>, <vscale x 2 x i32> } @foo(<vscale x 2 x i32> [[COERCE_EXTRACT0]], <vscale x 2 x i32> [[COERCE_EXTRACT1]])
+; CHECK-NEXT: ret { <vscale x 2 x i32>, <vscale x 2 x i32> } [[CALL]]
+;
+entry:
+ %v_tuple = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
+ %v_tuple.addr = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
+ %coerce = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
+ %0 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> %v_tuple.coerce0, 0
+ %1 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %0, <vscale x 2 x i32> %v_tuple.coerce1, 1
+ store { <vscale x 2 x i32>, <vscale x 2 x i32> } %1, ptr %v_tuple, align 4
+ %v_tuple1 = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr %v_tuple, align 4
+ store { <vscale x 2 x i32>, <vscale x 2 x i32> } %v_tuple1, ptr %v_tuple.addr, align 4
+ %2 = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr %v_tuple.addr, align 4
+ store { <vscale x 2 x i32>, <vscale x 2 x i32> } %2, ptr %coerce, align 4
+ %coerce.tuple = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr %coerce, align 4
+ %coerce.extract0 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %coerce.tuple, 0
+ %coerce.extract1 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %coerce.tuple, 1
+ %call = call { <vscale x 2 x i32>, <vscale x 2 x i32> } @foo(<vscale x 2 x i32> %coerce.extract0, <vscale x 2 x i32> %coerce.extract1)
+ ret { <vscale x 2 x i32>, <vscale x 2 x i32> } %call
+}
+
+declare { <vscale x 2 x i32>, <vscale x 2 x i32> } @foo(<vscale x 2 x i32>, <vscale x 2 x i32>)
Index: clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-tuple-type.c
===================================================================
--- clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-tuple-type.c
+++ clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-tuple-type.c
@@ -90,3 +90,36 @@
__rvv_int32m1x2_t qux(__rvv_int32m1x2_t v_tuple) {
return v_tuple;
}
+
+// O0-LABEL: define dso_local { <vscale x 2 x i32>, <vscale x 2 x i32> } @quux
+// O0-SAME: (<vscale x 2 x i32> [[V_TUPLE_COERCE0:%.*]], <vscale x 2 x i32> [[V_TUPLE_COERCE1:%.*]]) #[[ATTR0]] {
+// O0-NEXT: entry:
+// O0-NEXT: [[V_TUPLE:%.*]] = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
+// O0-NEXT: [[V_TUPLE_ADDR:%.*]] = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
+// O0-NEXT: [[COERCE:%.*]] = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
+// O0-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[V_TUPLE_COERCE0]], 0
+// O0-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP0]], <vscale x 2 x i32> [[V_TUPLE_COERCE1]], 1
+// O0-NEXT: store { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP1]], ptr [[V_TUPLE]], align 4
+// O0-NEXT: [[V_TUPLE1:%.*]] = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr [[V_TUPLE]], align 4
+// O0-NEXT: store { <vscale x 2 x i32>, <vscale x 2 x i32> } [[V_TUPLE1]], ptr [[V_TUPLE_ADDR]], align 4
+// O0-NEXT: [[TMP2:%.*]] = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr [[V_TUPLE_ADDR]], align 4
+// O0-NEXT: store { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP2]], ptr [[COERCE]], align 4
+// O0-NEXT: [[COERCE_TUPLE:%.*]] = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr [[COERCE]], align 4
+// O0-NEXT: [[COERCE_EXTRACT0:%.*]] = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[COERCE_TUPLE]], 0
+// O0-NEXT: [[COERCE_EXTRACT1:%.*]] = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[COERCE_TUPLE]], 1
+// O0-NEXT: [[CALL:%.*]] = call { <vscale x 2 x i32>, <vscale x 2 x i32> } @qux(<vscale x 2 x i32> [[COERCE_EXTRACT0]], <vscale x 2 x i32> [[COERCE_EXTRACT1]])
+// O0-NEXT: ret { <vscale x 2 x i32>, <vscale x 2 x i32> } [[CALL]]
+//
+// AFTER_MEM2REG-LABEL: define dso_local { <vscale x 2 x i32>, <vscale x 2 x i32> } @quux
+// AFTER_MEM2REG-SAME: (<vscale x 2 x i32> [[V_TUPLE_COERCE0:%.*]], <vscale x 2 x i32> [[V_TUPLE_COERCE1:%.*]]) #[[ATTR0]] {
+// AFTER_MEM2REG-NEXT: entry:
+// AFTER_MEM2REG-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[V_TUPLE_COERCE0]], 0
+// AFTER_MEM2REG-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP0]], <vscale x 2 x i32> [[V_TUPLE_COERCE1]], 1
+// AFTER_MEM2REG-NEXT: [[COERCE_EXTRACT0:%.*]] = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP1]], 0
+// AFTER_MEM2REG-NEXT: [[COERCE_EXTRACT1:%.*]] = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP1]], 1
+// AFTER_MEM2REG-NEXT: [[CALL:%.*]] = call { <vscale x 2 x i32>, <vscale x 2 x i32> } @qux(<vscale x 2 x i32> [[COERCE_EXTRACT0]], <vscale x 2 x i32> [[COERCE_EXTRACT1]])
+// AFTER_MEM2REG-NEXT: ret { <vscale x 2 x i32>, <vscale x 2 x i32> } [[CALL]]
+//
+__rvv_int32m1x2_t quux(__rvv_int32m1x2_t v_tuple) {
+ return qux(v_tuple);
+}
Index: clang/lib/CodeGen/CGCall.cpp
===================================================================
--- clang/lib/CodeGen/CGCall.cpp
+++ clang/lib/CodeGen/CGCall.cpp
@@ -5260,30 +5260,50 @@
dyn_cast<llvm::StructType>(ArgInfo.getCoerceToType());
if (STy && ArgInfo.isDirect() && ArgInfo.getCanBeFlattened()) {
llvm::Type *SrcTy = Src.getElementType();
- uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(SrcTy);
- uint64_t DstSize = CGM.getDataLayout().getTypeAllocSize(STy);
-
- // If the source type is smaller than the destination type of the
- // coerce-to logic, copy the source value into a temp alloca the size
- // of the destination type to allow loading all of it. The bits past
- // the source value are left undef.
- if (SrcSize < DstSize) {
- Address TempAlloca
- = CreateTempAlloca(STy, Src.getAlignment(),
- Src.getName() + ".coerce");
- Builder.CreateMemCpy(TempAlloca, Src, SrcSize);
- Src = TempAlloca;
+ llvm::TypeSize SrcTypeSize =
+ CGM.getDataLayout().getTypeAllocSize(SrcTy);
+ llvm::TypeSize DstTypeSize = CGM.getDataLayout().getTypeAllocSize(STy);
+ if (SrcTypeSize.isScalable()) {
+ assert(STy->containsHomogeneousScalableVectorTypes() &&
+ "ABI only supports structure with hmogeneous scalable vector "
+ "type");
+ assert(SrcTypeSize == DstTypeSize &&
+ "Only allow non-fractional movement of structure with "
+ "homogeneous scalable vector type");
+ assert(NumIRArgs == STy->getNumElements());
+
+ llvm::Value *StoredStructValue =
+ Builder.CreateLoad(Src, Src.getName() + ".tuple");
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ llvm::Value *LI = Builder.CreateExtractValue(
+ StoredStructValue, i, Src.getName() + ".extract" + Twine(i));
+ IRCallArgs[FirstIRArg + i] = LI;
+ }
} else {
- Src = Src.withElementType(STy);
- }
+ uint64_t SrcSize = SrcTypeSize.getFixedValue();
+ uint64_t DstSize = DstTypeSize.getFixedValue();
+
+ // If the source type is smaller than the destination type of the
+ // coerce-to logic, copy the source value into a temp alloca the size
+ // of the destination type to allow loading all of it. The bits past
+ // the source value are left undef.
+ if (SrcSize < DstSize) {
+ Address TempAlloca = CreateTempAlloca(STy, Src.getAlignment(),
+ Src.getName() + ".coerce");
+ Builder.CreateMemCpy(TempAlloca, Src, SrcSize);
+ Src = TempAlloca;
+ } else {
+ Src = Src.withElementType(STy);
+ }
- assert(NumIRArgs == STy->getNumElements());
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- Address EltPtr = Builder.CreateStructGEP(Src, i);
- llvm::Value *LI = Builder.CreateLoad(EltPtr);
- if (ArgHasMaybeUndefAttr)
- LI = Builder.CreateFreeze(LI);
- IRCallArgs[FirstIRArg + i] = LI;
+ assert(NumIRArgs == STy->getNumElements());
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ Address EltPtr = Builder.CreateStructGEP(Src, i);
+ llvm::Value *LI = Builder.CreateLoad(EltPtr);
+ if (ArgHasMaybeUndefAttr)
+ LI = Builder.CreateFreeze(LI);
+ IRCallArgs[FirstIRArg + i] = LI;
+ }
}
} else {
// In the simple case, just pass the coerced loaded value.
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits