yaxunl updated this revision to Diff 135474.
yaxunl added a comment.
Revised by John's comments.
https://reviews.llvm.org/D34367
Files:
lib/CodeGen/CGAtomic.cpp
lib/CodeGen/CGCall.cpp
lib/CodeGen/CGCall.h
lib/CodeGen/CGClass.cpp
lib/CodeGen/CGDecl.cpp
lib/CodeGen/CGExprCXX.cpp
lib/CodeGen/CGGPUBuiltin.cpp
lib/CodeGen/CGObjCGNU.cpp
lib/CodeGen/CGObjCMac.cpp
lib/CodeGen/ItaniumCXXABI.cpp
lib/CodeGen/MicrosoftCXXABI.cpp
test/CodeGenCXX/amdgcn-func-arg.cpp
test/CodeGenOpenCL/addr-space-struct-arg.cl
test/CodeGenOpenCL/byval.cl
Index: test/CodeGenOpenCL/byval.cl
===================================================================
--- test/CodeGenOpenCL/byval.cl
+++ test/CodeGenOpenCL/byval.cl
@@ -1,5 +1,4 @@
// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn %s | FileCheck %s
-// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn---opencl %s | FileCheck %s
struct A {
int x[100];
Index: test/CodeGenOpenCL/addr-space-struct-arg.cl
===================================================================
--- test/CodeGenOpenCL/addr-space-struct-arg.cl
+++ test/CodeGenOpenCL/addr-space-struct-arg.cl
@@ -1,5 +1,6 @@
// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -ffake-address-space-map -triple i686-pc-darwin | FileCheck -enable-var-scope -check-prefixes=COM,X86 %s
-// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -triple amdgcn-amdhsa-amd-amdgizcl | FileCheck -enable-var-scope -check-prefixes=COM,AMD %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -triple amdgcn-amdhsa-amd | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -finclude-default-header -triple amdgcn-amdhsa-amd | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN,AMDGCN20 %s
typedef struct {
int cells[9];
@@ -35,9 +36,12 @@
int2 y[20];
};
+#if __OPENCL_C_VERSION__ >= 200
+struct LargeStructOneMember g_s;
+#endif
// X86-LABEL: define void @foo(%struct.Mat4X4* noalias sret %agg.result, %struct.Mat3X3* byval align 4 %in)
-// AMD-LABEL: define %struct.Mat4X4 @foo([9 x i32] %in.coerce)
+// AMDGCN-LABEL: define %struct.Mat4X4 @foo([9 x i32] %in.coerce)
Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
Mat4X4 out;
return out;
@@ -49,15 +53,15 @@
// X86: call void @llvm.memcpy.p0i8.p1i8.i32(i8*
// X86: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)*
-// AMD: load [9 x i32], [9 x i32] addrspace(1)*
-// AMD: call %struct.Mat4X4 @foo([9 x i32]
-// AMD: call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)*
+// AMDGCN: load [9 x i32], [9 x i32] addrspace(1)*
+// AMDGCN: call %struct.Mat4X4 @foo([9 x i32]
+// AMDGCN: call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)*
kernel void ker(global Mat3X3 *in, global Mat4X4 *out) {
out[0] = foo(in[1]);
}
// X86-LABEL: define void @foo_large(%struct.Mat64X64* noalias sret %agg.result, %struct.Mat32X32* byval align 4 %in)
-// AMD-LABEL: define void @foo_large(%struct.Mat64X64 addrspace(5)* noalias sret %agg.result, %struct.Mat32X32 addrspace(5)* byval align 4 %in)
+// AMDGCN-LABEL: define void @foo_large(%struct.Mat64X64 addrspace(5)* noalias sret %agg.result, %struct.Mat32X32 addrspace(5)* byval align 4 %in)
Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) {
Mat64X64 out;
return out;
@@ -68,66 +72,97 @@
// the return value.
// X86: call void @llvm.memcpy.p0i8.p1i8.i32(i8*
// X86: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)*
-// AMD: call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)*
-// AMD: call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)*
+// AMDGCN: call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)*
+// AMDGCN: call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)*
kernel void ker_large(global Mat32X32 *in, global Mat64X64 *out) {
out[0] = foo_large(in[1]);
}
-// AMD-LABEL: define void @FuncOneMember(<2 x i32> %u.coerce)
+// AMDGCN-LABEL: define void @FuncOneMember(<2 x i32> %u.coerce)
void FuncOneMember(struct StructOneMember u) {
u.x = (int2)(0, 0);
}
-// AMD-LABEL: define void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %u)
+// AMDGCN-LABEL: define void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %u)
+// AMDGCN-NOT: addrspacecast
+// AMDGCN: store <2 x i32> %{{.*}}, <2 x i32> addrspace(5)*
void FuncOneLargeMember(struct LargeStructOneMember u) {
u.x[0] = (int2)(0, 0);
}
-// AMD-LABEL: define amdgpu_kernel void @KernelOneMember
-// AMD-SAME: (<2 x i32> %[[u_coerce:.*]])
-// AMD: %[[u:.*]] = alloca %struct.StructOneMember, align 8, addrspace(5)
-// AMD: %[[coerce_dive:.*]] = getelementptr inbounds %struct.StructOneMember, %struct.StructOneMember addrspace(5)* %[[u]], i32 0, i32 0
-// AMD: store <2 x i32> %[[u_coerce]], <2 x i32> addrspace(5)* %[[coerce_dive]]
-// AMD: call void @FuncOneMember(<2 x i32>
+// AMDGCN20-LABEL: define void @test_indirect_arg_globl()
+// AMDGCN20: %[[byval_temp:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
+// AMDGCN20: %[[r0:.*]] = bitcast %struct.LargeStructOneMember addrspace(5)* %[[byval_temp]] to i8 addrspace(5)*
+// AMDGCN20: call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)* align 8 %[[r0]], i8 addrspace(1)* align 8 bitcast (%struct.LargeStructOneMember addrspace(1)* @g_s to i8 addrspace(1)*), i64 800, i1 false)
+// AMDGCN20: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[byval_temp]])
+#if __OPENCL_C_VERSION__ >= 200
+void test_indirect_arg_globl(void) {
+ FuncOneLargeMember(g_s);
+}
+#endif
+
+// AMDGCN-LABEL: define amdgpu_kernel void @test_indirect_arg_local()
+// AMDGCN: %[[byval_temp:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
+// AMDGCN: %[[r0:.*]] = bitcast %struct.LargeStructOneMember addrspace(5)* %[[byval_temp]] to i8 addrspace(5)*
+// AMDGCN: call void @llvm.memcpy.p5i8.p3i8.i64(i8 addrspace(5)* align 8 %[[r0]], i8 addrspace(3)* align 8 bitcast (%struct.LargeStructOneMember addrspace(3)* @test_indirect_arg_local.l_s to i8 addrspace(3)*), i64 800, i1 false)
+// AMDGCN: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[byval_temp]])
+kernel void test_indirect_arg_local(void) {
+ local struct LargeStructOneMember l_s;
+ FuncOneLargeMember(l_s);
+}
+
+// AMDGCN-LABEL: define void @test_indirect_arg_private()
+// AMDGCN: %[[p_s:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
+// AMDGCN-NOT: @llvm.memcpy
+// AMDGCN-NEXT: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[p_s]])
+void test_indirect_arg_private(void) {
+ struct LargeStructOneMember p_s;
+ FuncOneLargeMember(p_s);
+}
+
+// AMDGCN-LABEL: define amdgpu_kernel void @KernelOneMember
+// AMDGCN-SAME: (<2 x i32> %[[u_coerce:.*]])
+// AMDGCN: %[[u:.*]] = alloca %struct.StructOneMember, align 8, addrspace(5)
+// AMDGCN: %[[coerce_dive:.*]] = getelementptr inbounds %struct.StructOneMember, %struct.StructOneMember addrspace(5)* %[[u]], i32 0, i32 0
+// AMDGCN: store <2 x i32> %[[u_coerce]], <2 x i32> addrspace(5)* %[[coerce_dive]]
+// AMDGCN: call void @FuncOneMember(<2 x i32>
kernel void KernelOneMember(struct StructOneMember u) {
FuncOneMember(u);
}
-// AMD-LABEL: define amdgpu_kernel void @KernelLargeOneMember(
-// AMD: %[[U:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
-// AMD: store %struct.LargeStructOneMember %u.coerce, %struct.LargeStructOneMember addrspace(5)* %[[U]], align 8
-// AMD: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[U]])
+// AMDGCN-LABEL: define amdgpu_kernel void @KernelLargeOneMember(
+// AMDGCN: %[[U:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
+// AMDGCN: store %struct.LargeStructOneMember %u.coerce, %struct.LargeStructOneMember addrspace(5)* %[[U]], align 8
+// AMDGCN: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[U]])
kernel void KernelLargeOneMember(struct LargeStructOneMember u) {
FuncOneLargeMember(u);
}
-// AMD-LABEL: define void @FuncTwoMember(<2 x i32> %u.coerce0, <2 x i32> %u.coerce1)
+// AMDGCN-LABEL: define void @FuncTwoMember(<2 x i32> %u.coerce0, <2 x i32> %u.coerce1)
void FuncTwoMember(struct StructTwoMember u) {
u.y = (int2)(0, 0);
}
-// AMD-LABEL: define void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval align 8 %u)
+// AMDGCN-LABEL: define void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval align 8 %u)
void FuncLargeTwoMember(struct LargeStructTwoMember u) {
u.y[0] = (int2)(0, 0);
}
-
-// AMD-LABEL: define amdgpu_kernel void @KernelTwoMember
-// AMD-SAME: (%struct.StructTwoMember %[[u_coerce:.*]])
-// AMD: %[[u:.*]] = alloca %struct.StructTwoMember, align 8, addrspace(5)
-// AMD: %[[LD0:.*]] = load <2 x i32>, <2 x i32> addrspace(5)*
-// AMD: %[[LD1:.*]] = load <2 x i32>, <2 x i32> addrspace(5)*
-// AMD: call void @FuncTwoMember(<2 x i32> %[[LD0]], <2 x i32> %[[LD1]])
+// AMDGCN-LABEL: define amdgpu_kernel void @KernelTwoMember
+// AMDGCN-SAME: (%struct.StructTwoMember %[[u_coerce:.*]])
+// AMDGCN: %[[u:.*]] = alloca %struct.StructTwoMember, align 8, addrspace(5)
+// AMDGCN: %[[LD0:.*]] = load <2 x i32>, <2 x i32> addrspace(5)*
+// AMDGCN: %[[LD1:.*]] = load <2 x i32>, <2 x i32> addrspace(5)*
+// AMDGCN: call void @FuncTwoMember(<2 x i32> %[[LD0]], <2 x i32> %[[LD1]])
kernel void KernelTwoMember(struct StructTwoMember u) {
FuncTwoMember(u);
}
-// AMD-LABEL: define amdgpu_kernel void @KernelLargeTwoMember
-// AMD-SAME: (%struct.LargeStructTwoMember %[[u_coerce:.*]])
-// AMD: %[[u:.*]] = alloca %struct.LargeStructTwoMember, align 8, addrspace(5)
-// AMD: store %struct.LargeStructTwoMember %[[u_coerce]], %struct.LargeStructTwoMember addrspace(5)* %[[u]]
-// AMD: call void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval align 8 %[[u]])
+// AMDGCN-LABEL: define amdgpu_kernel void @KernelLargeTwoMember
+// AMDGCN-SAME: (%struct.LargeStructTwoMember %[[u_coerce:.*]])
+// AMDGCN: %[[u:.*]] = alloca %struct.LargeStructTwoMember, align 8, addrspace(5)
+// AMDGCN: store %struct.LargeStructTwoMember %[[u_coerce]], %struct.LargeStructTwoMember addrspace(5)* %[[u]]
+// AMDGCN: call void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval align 8 %[[u]])
kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
FuncLargeTwoMember(u);
}
Index: test/CodeGenCXX/amdgcn-func-arg.cpp
===================================================================
--- /dev/null
+++ test/CodeGenCXX/amdgcn-func-arg.cpp
@@ -0,0 +1,94 @@
+// RUN: %clang_cc1 -O0 -triple amdgcn---amdgiz -emit-llvm %s -o - | FileCheck %s
+
+class A {
+public:
+ int x;
+ A():x(0) {}
+ ~A() {}
+};
+
+class B {
+int x[100];
+};
+
+A g_a;
+B g_b;
+
+void func_with_ref_arg(A &a);
+void func_with_ref_arg(B &b);
+
+// CHECK-LABEL: define void @_Z22func_with_indirect_arg1A(%class.A addrspace(5)* %a)
+// CHECK: %p = alloca %class.A*, align 8, addrspace(5)
+// CHECK: %[[r1:.+]] = addrspacecast %class.A* addrspace(5)* %p to %class.A**
+// CHECK: %[[r0:.+]] = addrspacecast %class.A addrspace(5)* %a to %class.A*
+// CHECK: store %class.A* %[[r0]], %class.A** %[[r1]], align 8
+void func_with_indirect_arg(A a) {
+ A *p = &a;
+}
+
+// CHECK-LABEL: define void @_Z22test_indirect_arg_autov()
+// CHECK: %a = alloca %class.A, align 4, addrspace(5)
+// CHECK: %[[r0:.+]] = addrspacecast %class.A addrspace(5)* %a to %class.A*
+// CHECK: %agg.tmp = alloca %class.A, align 4, addrspace(5)
+// CHECK: %[[r1:.+]] = addrspacecast %class.A addrspace(5)* %agg.tmp to %class.A*
+// CHECK: call void @_ZN1AC1Ev(%class.A* %[[r0]])
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
+// CHECK: %[[r4:.+]] = addrspacecast %class.A* %[[r1]] to %class.A addrspace(5)*
+// CHECK: call void @_Z22func_with_indirect_arg1A(%class.A addrspace(5)* %[[r4]])
+// CHECK: call void @_ZN1AD1Ev(%class.A* %[[r1]])
+// CHECK: call void @_Z17func_with_ref_argR1A(%class.A* dereferenceable(4) %[[r0]])
+// CHECK: call void @_ZN1AD1Ev(%class.A* %[[r0]])
+void test_indirect_arg_auto() {
+ A a;
+ func_with_indirect_arg(a);
+ func_with_ref_arg(a);
+}
+
+// CHECK: define void @_Z24test_indirect_arg_globalv()
+// CHECK: %agg.tmp = alloca %class.A, align 4, addrspace(5)
+// CHECK: %[[r0:.+]] = addrspacecast %class.A addrspace(5)* %agg.tmp to %class.A*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
+// CHECK: %[[r2:.+]] = addrspacecast %class.A* %[[r0]] to %class.A addrspace(5)*
+// CHECK: call void @_Z22func_with_indirect_arg1A(%class.A addrspace(5)* %[[r2]])
+// CHECK: call void @_ZN1AD1Ev(%class.A* %[[r0]])
+// CHECK: call void @_Z17func_with_ref_argR1A(%class.A* dereferenceable(4) addrspacecast (%class.A addrspace(1)* @g_a to %class.A*))
+void test_indirect_arg_global() {
+ func_with_indirect_arg(g_a);
+ func_with_ref_arg(g_a);
+}
+
+// CHECK-LABEL: define void @_Z19func_with_byval_arg1B(%class.B addrspace(5)* byval align 4 %b)
+// CHECK: %p = alloca %class.B*, align 8, addrspace(5)
+// CHECK: %[[r1:.+]] = addrspacecast %class.B* addrspace(5)* %p to %class.B**
+// CHECK: %[[r0:.+]] = addrspacecast %class.B addrspace(5)* %b to %class.B*
+// CHECK: store %class.B* %[[r0]], %class.B** %[[r1]], align 8
+void func_with_byval_arg(B b) {
+ B *p = &b;
+}
+
+// CHECK-LABEL: define void @_Z19test_byval_arg_autov()
+// CHECK: %b = alloca %class.B, align 4, addrspace(5)
+// CHECK: %[[r0:.+]] = addrspacecast %class.B addrspace(5)* %b to %class.B*
+// CHECK: %agg.tmp = alloca %class.B, align 4, addrspace(5)
+// CHECK: %[[r1:.+]] = addrspacecast %class.B addrspace(5)* %agg.tmp to %class.B*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
+// CHECK: %[[r4:.+]] = addrspacecast %class.B* %[[r1]] to %class.B addrspace(5)*
+// CHECK: call void @_Z19func_with_byval_arg1B(%class.B addrspace(5)* byval align 4 %[[r4]])
+// CHECK: call void @_Z17func_with_ref_argR1B(%class.B* dereferenceable(400) %[[r0]])
+void test_byval_arg_auto() {
+ B b;
+ func_with_byval_arg(b);
+ func_with_ref_arg(b);
+}
+
+// CHECK-LABEL: define void @_Z21test_byval_arg_globalv()
+// CHECK: %agg.tmp = alloca %class.B, align 4, addrspace(5)
+// CHECK: %[[r0:.+]] = addrspacecast %class.B addrspace(5)* %agg.tmp to %class.B*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
+// CHECK: %[[r2:.+]] = addrspacecast %class.B* %[[r0]] to %class.B addrspace(5)*
+// CHECK: call void @_Z19func_with_byval_arg1B(%class.B addrspace(5)* byval align 4 %[[r2]])
+// CHECK: call void @_Z17func_with_ref_argR1B(%class.B* dereferenceable(400) addrspacecast (%class.B addrspace(1)* @g_b to %class.B*))
+void test_byval_arg_global() {
+ func_with_byval_arg(g_b);
+ func_with_ref_arg(g_b);
+}
Index: lib/CodeGen/MicrosoftCXXABI.cpp
===================================================================
--- lib/CodeGen/MicrosoftCXXABI.cpp
+++ lib/CodeGen/MicrosoftCXXABI.cpp
@@ -1538,8 +1538,7 @@
}
RValue RV = RValue::get(MostDerivedArg);
if (FPT->isVariadic()) {
- Args.insert(Args.begin() + 1,
- CallArg(RV, getContext().IntTy, /*needscopy=*/false));
+ Args.insert(Args.begin() + 1, CallArg(RV, getContext().IntTy));
return AddedStructorArgs::prefix(1);
}
Args.add(RV, getContext().IntTy);
Index: lib/CodeGen/ItaniumCXXABI.cpp
===================================================================
--- lib/CodeGen/ItaniumCXXABI.cpp
+++ lib/CodeGen/ItaniumCXXABI.cpp
@@ -1479,8 +1479,7 @@
llvm::Value *VTT =
CGF.GetVTTParameter(GlobalDecl(D, Type), ForVirtualBase, Delegating);
QualType VTTTy = getContext().getPointerType(getContext().VoidPtrTy);
- Args.insert(Args.begin() + 1,
- CallArg(RValue::get(VTT), VTTTy, /*needscopy=*/false));
+ Args.insert(Args.begin() + 1, CallArg(RValue::get(VTT), VTTTy));
return AddedStructorArgs::prefix(1); // Added one arg.
}
Index: lib/CodeGen/CGObjCMac.cpp
===================================================================
--- lib/CodeGen/CGObjCMac.cpp
+++ lib/CodeGen/CGObjCMac.cpp
@@ -1708,7 +1708,7 @@
e = Method->param_end(); i != e; ++i, ++I) {
const ParmVarDecl *ParamDecl = (*i);
if (ParamDecl->hasAttr<NSConsumedAttr>()) {
- RValue RV = I->RV;
+ RValue RV = I->getRValue();
assert(RV.isScalar() &&
"NullReturnState::complete - arg not on object");
CGF.EmitARCRelease(RV.getScalarVal(), ARCImpreciseLifetime);
@@ -7075,7 +7075,7 @@
CGF.getPointerAlign());
// Update the message ref argument.
- args[1].RV = RValue::get(mref.getPointer());
+ args[1].setRValue(RValue::get(mref.getPointer()));
// Load the function to call from the message ref table.
Address calleeAddr =
Index: lib/CodeGen/CGObjCGNU.cpp
===================================================================
--- lib/CodeGen/CGObjCGNU.cpp
+++ lib/CodeGen/CGObjCGNU.cpp
@@ -1456,7 +1456,7 @@
}
// Reset the receiver in case the lookup modified it
- ActualArgs[0] = CallArg(RValue::get(Receiver), ASTIdTy, false);
+ ActualArgs[0] = CallArg(RValue::get(Receiver), ASTIdTy);
imp = EnforceType(Builder, imp, MSI.MessengerType);
Index: lib/CodeGen/CGGPUBuiltin.cpp
===================================================================
--- lib/CodeGen/CGGPUBuiltin.cpp
+++ lib/CodeGen/CGGPUBuiltin.cpp
@@ -84,7 +84,7 @@
// We don't know how to emit non-scalar varargs.
if (std::any_of(Args.begin() + 1, Args.end(),
- [](const CallArg &A) { return !A.RV.isScalar(); })) {
+ [](const CallArg &A) { return !A.getRValue().isScalar(); })) {
CGM.ErrorUnsupported(E, "non-scalar arg to printf");
return RValue::get(llvm::ConstantInt::get(IntTy, 0));
}
@@ -97,7 +97,7 @@
} else {
llvm::SmallVector<llvm::Type *, 8> ArgTypes;
for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I)
- ArgTypes.push_back(Args[I].RV.getScalarVal()->getType());
+ ArgTypes.push_back(Args[I].getRValue().getScalarVal()->getType());
// Using llvm::StructType is correct only because printf doesn't accept
// aggregates. If we had to handle aggregates here, we'd have to manually
@@ -109,14 +109,14 @@
for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) {
llvm::Value *P = Builder.CreateStructGEP(AllocaTy, Alloca, I - 1);
- llvm::Value *Arg = Args[I].RV.getScalarVal();
+ llvm::Value *Arg = Args[I].getRValue().getScalarVal();
Builder.CreateAlignedStore(Arg, P, DL.getPrefTypeAlignment(Arg->getType()));
}
BufferPtr = Builder.CreatePointerCast(Alloca, llvm::Type::getInt8PtrTy(Ctx));
}
// Invoke vprintf and return.
llvm::Function* VprintfFunc = GetVprintfDeclaration(CGM.getModule());
- return RValue::get(
- Builder.CreateCall(VprintfFunc, {Args[0].RV.getScalarVal(), BufferPtr}));
+ return RValue::get(Builder.CreateCall(
+ VprintfFunc, {Args[0].getRValue().getScalarVal(), BufferPtr}));
}
Index: lib/CodeGen/CGExprCXX.cpp
===================================================================
--- lib/CodeGen/CGExprCXX.cpp
+++ lib/CodeGen/CGExprCXX.cpp
@@ -265,7 +265,7 @@
// when it isn't necessary; just produce the proper effect here.
LValue RHS = isa<CXXOperatorCallExpr>(CE)
? MakeNaturalAlignAddrLValue(
- (*RtlArgs)[0].RV.getScalarVal(),
+ (*RtlArgs)[0].getRValue().getScalarVal(),
(*(CE->arg_begin() + 1))->getType())
: EmitLValue(*CE->arg_begin());
EmitAggregateAssign(This, RHS, CE->getType());
@@ -1490,7 +1490,7 @@
AllocAlign);
for (unsigned I = 0, N = E->getNumPlacementArgs(); I != N; ++I) {
auto &Arg = NewArgs[I + NumNonPlacementArgs];
- Cleanup->setPlacementArg(I, Arg.RV, Arg.Ty);
+ Cleanup->setPlacementArg(I, Arg.getRValue(), Arg.Ty);
}
return;
@@ -1521,8 +1521,8 @@
AllocAlign);
for (unsigned I = 0, N = E->getNumPlacementArgs(); I != N; ++I) {
auto &Arg = NewArgs[I + NumNonPlacementArgs];
- Cleanup->setPlacementArg(I, DominatingValue<RValue>::save(CGF, Arg.RV),
- Arg.Ty);
+ Cleanup->setPlacementArg(
+ I, DominatingValue<RValue>::save(CGF, Arg.getRValue()), Arg.Ty);
}
CGF.initFullExprCleanup();
Index: lib/CodeGen/CGDecl.cpp
===================================================================
--- lib/CodeGen/CGDecl.cpp
+++ lib/CodeGen/CGDecl.cpp
@@ -1866,6 +1866,22 @@
llvm::Type *IRTy = ConvertTypeForMem(Ty)->getPointerTo(AS);
if (DeclPtr.getType() != IRTy)
DeclPtr = Builder.CreateBitCast(DeclPtr, IRTy, D.getName());
+ // Indirect argument is in alloca address space, which may be different
+ // from the default address space.
+ auto AllocaAS = CGM.getASTAllocaAddressSpace();
+ auto *V = DeclPtr.getPointer();
+ auto SrcLangAS = getLangOpts().OpenCL ? LangAS::opencl_private : AllocaAS;
+ auto DestLangAS =
+ getLangOpts().OpenCL ? LangAS::opencl_private : LangAS::Default;
+ if (SrcLangAS != DestLangAS) {
+ assert(getContext().getTargetAddressSpace(SrcLangAS) ==
+ CGM.getDataLayout().getAllocaAddrSpace());
+ auto DestAS = getContext().getTargetAddressSpace(DestLangAS);
+ auto *T = V->getType()->getPointerElementType()->getPointerTo(DestAS);
+ DeclPtr = Address(getTargetHooks().performAddrSpaceCast(
+ *this, V, SrcLangAS, DestLangAS, T, true),
+ DeclPtr.getAlignment());
+ }
// Push a destructor cleanup for this parameter if the ABI requires it.
// Don't push a cleanup in a thunk for a method that will also emit a
Index: lib/CodeGen/CGClass.cpp
===================================================================
--- lib/CodeGen/CGClass.cpp
+++ lib/CodeGen/CGClass.cpp
@@ -2077,7 +2077,8 @@
assert(Args.size() == 2 && "unexpected argcount for trivial ctor");
QualType SrcTy = D->getParamDecl(0)->getType().getNonReferenceType();
- Address Src(Args[1].RV.getScalarVal(), getNaturalTypeAlignment(SrcTy));
+ Address Src(Args[1].getRValue().getScalarVal(),
+ getNaturalTypeAlignment(SrcTy));
LValue SrcLVal = MakeAddrLValue(Src, SrcTy);
QualType DestTy = getContext().getTypeDeclType(ClassDecl);
LValue DestLVal = MakeAddrLValue(This, DestTy);
@@ -2131,8 +2132,7 @@
const CXXConstructorDecl *D, bool ForVirtualBase, Address This,
bool InheritedFromVBase, const CXXInheritedCtorInitExpr *E) {
CallArgList Args;
- CallArg ThisArg(RValue::get(This.getPointer()), D->getThisType(getContext()),
- /*NeedsCopy=*/false);
+ CallArg ThisArg(RValue::get(This.getPointer()), D->getThisType(getContext()));
// Forward the parameters.
if (InheritedFromVBase &&
@@ -2196,7 +2196,7 @@
assert(Args.size() >= Params.size() && "too few arguments for call");
for (unsigned I = 0, N = Args.size(); I != N; ++I) {
if (I < Params.size() && isa<ImplicitParamDecl>(Params[I])) {
- const RValue &RV = Args[I].RV;
+ const RValue &RV = Args[I].getRValue();
assert(!RV.isComplex() && "complex indirect params not supported");
ParamValue Val = RV.isScalar()
? ParamValue::forDirect(RV.getScalarVal())
Index: lib/CodeGen/CGCall.h
===================================================================
--- lib/CodeGen/CGCall.h
+++ lib/CodeGen/CGCall.h
@@ -213,12 +213,28 @@
};
struct CallArg {
- RValue RV;
+ private:
+ union {
+ RValue RV;
+ LValue LV; /// The l-value from which the argument is derived.
+ };
+ bool HasLV;
+
+ public:
QualType Ty;
- bool NeedsCopy;
- CallArg(RValue rv, QualType ty, bool needscopy)
- : RV(rv), Ty(ty), NeedsCopy(needscopy)
- { }
+ CallArg(RValue rv, QualType ty) : RV(rv), HasLV(false), Ty(ty) {}
+ CallArg(LValue _LV, QualType ty) : LV(_LV), HasLV(true), Ty(ty) {}
+ bool hasLValue() const { return HasLV; }
+ QualType getType() const { return Ty; }
+ RValue getRValue() const;
+ LValue getLValue() const {
+ assert(HasLV);
+ return LV;
+ }
+ void setRValue(RValue _RV) {
+ assert(!HasLV);
+ RV = _RV;
+ }
};
/// CallArgList - Type for representing both the value and type of
@@ -248,8 +264,10 @@
llvm::Instruction *IsActiveIP;
};
- void add(RValue rvalue, QualType type, bool needscopy = false) {
- push_back(CallArg(rvalue, type, needscopy));
+ void add(RValue rvalue, QualType type) { push_back(CallArg(rvalue, type)); }
+
+ void addUncopiedAggregate(LValue LV, QualType type) {
+ push_back(CallArg(LV, type));
}
/// Add all the arguments from another CallArgList to this one. After doing
Index: lib/CodeGen/CGCall.cpp
===================================================================
--- lib/CodeGen/CGCall.cpp
+++ lib/CodeGen/CGCall.cpp
@@ -3391,7 +3391,7 @@
assert(InitialArgSize + 1 == Args.size() &&
"The code below depends on only adding one arg per EmitCallArg");
(void)InitialArgSize;
- RValue RVArg = Args.back().RV;
+ RValue RVArg = Args.back().getRValue();
EmitNonNullArgCheck(RVArg, ArgTypes[Idx], (*Arg)->getExprLoc(), AC,
ParamsToSkip + Idx);
// @llvm.objectsize should never have side-effects and shouldn't need
@@ -3439,6 +3439,12 @@
} // end anonymous namespace
+RValue CallArg::getRValue() const {
+ if (!HasLV)
+ return RV;
+ return LV.asAggregateRValue();
+}
+
void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E,
QualType type) {
DisableDebugLocationUpdates Dis(*this, E);
@@ -3499,15 +3505,7 @@
cast<CastExpr>(E)->getCastKind() == CK_LValueToRValue) {
LValue L = EmitLValue(cast<CastExpr>(E)->getSubExpr());
assert(L.isSimple());
- if (L.getAlignment() >= getContext().getTypeAlignInChars(type)) {
- args.add(L.asAggregateRValue(), type, /*NeedsCopy*/true);
- } else {
- // We can't represent a misaligned lvalue in the CallArgList, so copy
- // to an aligned temporary now.
- LValue Dest = MakeAddrLValue(CreateMemTemp(type), type);
- EmitAggregateCopy(Dest, L, type, L.isVolatile());
- args.add(RValue::getAggregate(Dest.getAddress()), type);
- }
+ args.addUncopiedAggregate(L, type);
return;
}
@@ -3767,7 +3765,18 @@
for (CallArgList::const_iterator I = CallArgs.begin(), E = CallArgs.end();
I != E; ++I, ++info_it, ++ArgNo) {
const ABIArgInfo &ArgInfo = info_it->info;
- RValue RV = I->RV;
+ RValue RV;
+ if (I->hasLValue() && I->getLValue().getAlignment() <
+ getContext().getTypeAlignInChars(I->Ty)) {
+ // We can't represent a misaligned lvalue in the CallArgList, so copy
+ // to an aligned temporary now.
+ LValue Dest = MakeAddrLValue(CreateMemTemp(I->Ty), I->Ty);
+ EmitAggregateCopy(Dest, I->getLValue(), I->Ty,
+ I->getLValue().isVolatile());
+ RV = RValue::getAggregate(Dest.getAddress());
+ } else {
+ RV = I->getRValue();
+ }
// Insert a padding argument to ensure proper alignment.
if (IRFunctionArgs.hasPaddingArg(ArgNo))
@@ -3823,22 +3832,29 @@
// source. (This case doesn't occur on any common architecture.)
// 2. If the argument is byval, RV is not sufficiently aligned, and
// we cannot force it to be sufficiently aligned.
- // 3. If the argument is byval, but RV is located in an address space
- // different than that of the argument (0).
+ // 3. If the argument is byval, but RV is not located in default
+ // or alloca address space.
Address Addr = RV.getAggregateAddress();
CharUnits Align = ArgInfo.getIndirectAlign();
const llvm::DataLayout *TD = &CGM.getDataLayout();
- const unsigned RVAddrSpace = Addr.getType()->getAddressSpace();
- const unsigned ArgAddrSpace =
- (FirstIRArg < IRFuncTy->getNumParams()
- ? IRFuncTy->getParamType(FirstIRArg)->getPointerAddressSpace()
- : 0);
- if ((!ArgInfo.getIndirectByVal() && I->NeedsCopy) ||
- (ArgInfo.getIndirectByVal() && Addr.getAlignment() < Align &&
- llvm::getOrEnforceKnownAlignment(Addr.getPointer(),
- Align.getQuantity(), *TD)
- < Align.getQuantity()) ||
- (ArgInfo.getIndirectByVal() && (RVAddrSpace != ArgAddrSpace))) {
+
+ assert((FirstIRArg >= IRFuncTy->getNumParams() ||
+ IRFuncTy->getParamType(FirstIRArg)->getPointerAddressSpace() ==
+ TD->getAllocaAddrSpace()) &&
+ "indirect argument must be in alloca address space");
+ if (I->hasLValue() &&
+ ((!ArgInfo.getIndirectByVal() &&
+ (I->getLValue().getAlignment() >=
+ getContext().getTypeAlignInChars(I->Ty))) ||
+ (ArgInfo.getIndirectByVal() &&
+ ((I->getLValue().getAddressSpace() != LangAS::Default &&
+ I->getLValue().getAddressSpace() != LangAS::opencl_private &&
+ I->getLValue().getAddressSpace() !=
+ CGM.getASTAllocaAddressSpace()) ||
+ (Addr.getAlignment() < Align &&
+ llvm::getOrEnforceKnownAlignment(Addr.getPointer(),
+ Align.getQuantity(), *TD) <
+ Align.getQuantity()))))) {
// Create an aligned temporary, and copy to it.
Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(),
"byval-temp", false);
@@ -3848,7 +3864,12 @@
EmitAggregateCopy(Dest, Src, I->Ty, RV.isVolatileQualified());
} else {
// Skip the extra memcpy call.
- IRCallArgs[FirstIRArg] = Addr.getPointer();
+ auto *V = Addr.getPointer();
+ auto *T = V->getType()->getPointerElementType()->getPointerTo(
+ CGM.getDataLayout().getAllocaAddrSpace());
+ IRCallArgs[FirstIRArg] = getTargetHooks().performAddrSpaceCast(
+ *this, V, LangAS::Default, CGM.getASTAllocaAddressSpace(), T,
+ true);
}
}
break;
@@ -4358,7 +4379,7 @@
OffsetValue);
} else if (const auto *AA = TargetDecl->getAttr<AllocAlignAttr>()) {
llvm::Value *ParamVal =
- CallArgs[AA->getParamIndex() - 1].RV.getScalarVal();
+ CallArgs[AA->getParamIndex() - 1].getRValue().getScalarVal();
EmitAlignmentAssumption(Ret.getScalarVal(), ParamVal);
}
}
Index: lib/CodeGen/CGAtomic.cpp
===================================================================
--- lib/CodeGen/CGAtomic.cpp
+++ lib/CodeGen/CGAtomic.cpp
@@ -1160,7 +1160,7 @@
if (UseOptimizedLibcall && Res.getScalarVal()) {
llvm::Value *ResVal = Res.getScalarVal();
if (PostOp) {
- llvm::Value *LoadVal1 = Args[1].RV.getScalarVal();
+ llvm::Value *LoadVal1 = Args[1].getRValue().getScalarVal();
ResVal = Builder.CreateBinOp(PostOp, ResVal, LoadVal1);
}
if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch)
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits