yaxunl created this revision. The byval function argument is in alloca address space in LLVM IR. However, during Clang codegen for C++, the address space of indirect function argument should match its address space in the source code, even for byval argument. This is because destructor of the byval argument may be called in the caller function, and address of the byval argument may be taken, in either case the byval function argument is expected to be in its original address space, not the alloca address space.
Also, the non-byval indirect function argument should be in the default address space instead of alloca address space, since the argument is not necessarily an alloca. Therefore, the byval function argument should be mapped to the temp var casted to default address space. The caller will cast it to alloca addr space when passing it to the callee. In the callee, the argument is also casted to the default address space and used. https://reviews.llvm.org/D34367 Files: lib/CodeGen/CGCall.cpp lib/CodeGen/CGDecl.cpp test/CodeGenCXX/amdgcn-func-arg.cpp
Index: test/CodeGenCXX/amdgcn-func-arg.cpp =================================================================== --- /dev/null +++ test/CodeGenCXX/amdgcn-func-arg.cpp @@ -0,0 +1,91 @@ +// RUN: %clang_cc1 -O0 -triple amdgcn---amdgiz -emit-llvm %s -o - | FileCheck %s + +class A { +int x; +public: + A():x(0) {} + ~A() {} +}; + +class B { +int x; +}; + +A g_a; +B g_b; + +void func_with_ref_arg(A &a); +void func_with_ref_arg(B &b); + +// CHECK-LABEL: define void @_Z22func_with_indirect_arg1A(%class.A* %a) +// CHECK: %p = alloca %class.A*, align 8, addrspace(5) +// CHECK: %[[r0:.+]] = addrspacecast %class.A* addrspace(5)* %p to %class.A** +// CHECK: store %class.A* %a, %class.A** %[[r0]], align 8 +void func_with_indirect_arg(A a) { + A *p = &a; +} + +// CHECK-LABEL: define void @_Z22test_indirect_arg_autov() +// CHECK: %a = alloca %class.A, align 4, addrspace(5) +// CHECK: %agg.tmp = alloca %class.A, align 4, addrspace(5) +// CHECK: %[[r0:.+]] = addrspacecast %class.A addrspace(5)* %a to %class.A* +// CHECK: call void @_ZN1AC1Ev(%class.A* %[[r0]]) +// CHECK: %[[r1:.+]] = addrspacecast %class.A addrspace(5)* %agg.tmp to %class.A* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 +// CHECK: call void @_Z22func_with_indirect_arg1A(%class.A* %[[r1]]) +// CHECK: call void @_ZN1AD1Ev(%class.A* %[[r1]]) +// CHECK: call void @_Z17func_with_ref_argR1A(%class.A* dereferenceable(4) %[[r0]]) +// CHECK: call void @_ZN1AD1Ev(%class.A* %[[r0]]) +void test_indirect_arg_auto() { + A a; + func_with_indirect_arg(a); + func_with_ref_arg(a); +} + +// CHECK: define void @_Z24test_indirect_arg_globalv() +// CHECK: %agg.tmp = alloca %class.A, align 4, addrspace(5) +// CHECK: %[[r0]] = addrspacecast %class.A addrspace(5)* %agg.tmp to %class.A* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 +// CHECK: call void @_Z22func_with_indirect_arg1A(%class.A* %[[r0]]) +// CHECK: call void @_ZN1AD1Ev(%class.A* %[[r0]]) +// CHECK: call void @_Z17func_with_ref_argR1A(%class.A* dereferenceable(4) addrspacecast (%class.A addrspace(1)* @g_a to %class.A*)) +void test_indirect_arg_global() { + func_with_indirect_arg(g_a); + func_with_ref_arg(g_a); +} + +// CHECK-LABEL: define void @_Z19func_with_byval_arg1B(%class.B addrspace(5)* byval align 4 %b) +// CHECK: %p = alloca %class.B*, align 8, addrspace(5) +// CHECK: %[[r0:.+]] = addrspacecast %class.B addrspace(5)* %b to %class.B* +// CHECK: %[[r1:.+]] = addrspacecast %class.B* addrspace(5)* %p to %class.B** +// CHECK: store %class.B* %[[r0]], %class.B** %[[r1]], align 8 +void func_with_byval_arg(B b) { + B *p = &b; +} + +// CHECK-LABEL: define void @_Z19test_byval_arg_autov() +// CHECK: %b = alloca %class.B, align 4, addrspace(5) +// CHECK: %agg.tmp = alloca %class.B, align 4, addrspace(5) +// CHECK: %[[r0:.+]] = addrspacecast %class.B addrspace(5)* %b to %class.B* +// CHECK: %[[r1:.+]] = addrspacecast %class.B addrspace(5)* %agg.tmp to %class.B* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 +// CHECK: %[[r4:.+]] = addrspacecast %class.B* %[[r1]] to %class.B addrspace(5)* +// CHECK: call void @_Z19func_with_byval_arg1B(%class.B addrspace(5)* byval align 4 %[[r4]]) +// CHECK: call void @_Z17func_with_ref_argR1B(%class.B* dereferenceable(4) %[[r0]]) +void test_byval_arg_auto() { + B b; + func_with_byval_arg(b); + func_with_ref_arg(b); +} + +// CHECK-LABEL: define void @_Z21test_byval_arg_globalv() +// CHECK: %agg.tmp = alloca %class.B, align 4, addrspace(5) +// CHECK: %[[r0:.+]] = addrspacecast %class.B addrspace(5)* %agg.tmp to %class.B* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 +// CHECK: %[[r2:.+]] = addrspacecast %class.B* %[[r0]] to %class.B addrspace(5)* +// CHECK: call void @_Z19func_with_byval_arg1B(%class.B addrspace(5)* byval align 4 %[[r2]]) +// CHECK: call void @_Z17func_with_ref_argR1B(%class.B* dereferenceable(4) addrspacecast (%class.B addrspace(1)* @g_b to %class.B*)) +void test_byval_arg_global() { + func_with_byval_arg(g_b); + func_with_ref_arg(g_b); +} Index: lib/CodeGen/CGDecl.cpp =================================================================== --- lib/CodeGen/CGDecl.cpp +++ lib/CodeGen/CGDecl.cpp @@ -1819,6 +1819,19 @@ llvm::Type *IRTy = ConvertTypeForMem(Ty)->getPointerTo(AS); if (DeclPtr.getType() != IRTy) DeclPtr = Builder.CreateBitCast(DeclPtr, IRTy, D.getName()); + // Byval argument is in alloca address space, which may be different + // from the default address space. + auto AllocaAS = CGM.getASTAllocaAddressSpace(); + auto *V = DeclPtr.getPointer(); + auto SrcAS = V->getType()->getPointerAddressSpace(); + auto DestAS = getContext().getTargetAddressSpace(LangAS::Default); + if (SrcAS != DestAS) { + assert(SrcAS == CGM.getDataLayout().getAllocaAddrSpace()); + auto *T = V->getType()->getPointerElementType()->getPointerTo(DestAS); + DeclPtr = Address(getTargetHooks().performAddrSpaceCast( + *this, V, AllocaAS, LangAS::Default, T, true), + DeclPtr.getAlignment()); + } // Push a destructor cleanup for this parameter if the ABI requires it. // Don't push a cleanup in a thunk for a method that will also emit a Index: lib/CodeGen/CGCall.cpp =================================================================== --- lib/CodeGen/CGCall.cpp +++ lib/CodeGen/CGCall.cpp @@ -1600,7 +1600,9 @@ // indirect arguments are always on the stack, which is alloca addr space. llvm::Type *LTy = ConvertTypeForMem(it->type); ArgTypes[FirstIRArg] = LTy->getPointerTo( - CGM.getDataLayout().getAllocaAddrSpace()); + ArgInfo.getIndirectByVal() + ? CGM.getDataLayout().getAllocaAddrSpace() + : getContext().getTargetAddressSpace(LangAS::Default)); break; } @@ -3810,12 +3812,20 @@ } case ABIArgInfo::Indirect: { + auto CastToAllocaAddrSpace = [&](llvm::Value *V) { + if (!ArgInfo.getIndirectByVal()) + return V; + auto *T = V->getType()->getPointerElementType()->getPointerTo( + CGM.getDataLayout().getAllocaAddrSpace()); + return getTargetHooks().performAddrSpaceCast( + *this, V, LangAS::Default, CGM.getASTAllocaAddressSpace(), T, true); + }; assert(NumIRArgs == 1); if (RV.isScalar() || RV.isComplex()) { // Make a temporary alloca to pass the argument. Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(), - "indirect-arg-temp", false); - IRCallArgs[FirstIRArg] = Addr.getPointer(); + "indirect-arg-temp"); + IRCallArgs[FirstIRArg] = CastToAllocaAddrSpace(Addr.getPointer()); LValue argLV = MakeAddrLValue(Addr, I->Ty); EmitInitStoreOfNonAggregate(*this, RV, argLV); @@ -3827,29 +3837,32 @@ // 2. If the argument is byval, RV is not sufficiently aligned, and // we cannot force it to be sufficiently aligned. // 3. If the argument is byval, but RV is located in an address space - // different than that of the argument (0). + // different than that of the argument (alloca address space). Address Addr = RV.getAggregateAddress(); CharUnits Align = ArgInfo.getIndirectAlign(); const llvm::DataLayout *TD = &CGM.getDataLayout(); - const unsigned RVAddrSpace = Addr.getType()->getAddressSpace(); + const unsigned RVAddrSpace = Addr.getPointer() + ->stripPointerCasts() + ->getType() + ->getPointerAddressSpace(); const unsigned ArgAddrSpace = (FirstIRArg < IRFuncTy->getNumParams() ? IRFuncTy->getParamType(FirstIRArg)->getPointerAddressSpace() - : 0); + : (ArgInfo.getIndirectByVal() ? TD->getAllocaAddrSpace() : 0)); if ((!ArgInfo.getIndirectByVal() && I->NeedsCopy) || (ArgInfo.getIndirectByVal() && Addr.getAlignment() < Align && llvm::getOrEnforceKnownAlignment(Addr.getPointer(), Align.getQuantity(), *TD) < Align.getQuantity()) || (ArgInfo.getIndirectByVal() && (RVAddrSpace != ArgAddrSpace))) { // Create an aligned temporary, and copy to it. Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(), - "byval-temp", false); - IRCallArgs[FirstIRArg] = AI.getPointer(); + "byval-temp"); + IRCallArgs[FirstIRArg] = CastToAllocaAddrSpace(AI.getPointer()); EmitAggregateCopy(AI, Addr, I->Ty, RV.isVolatileQualified()); } else { // Skip the extra memcpy call. - IRCallArgs[FirstIRArg] = Addr.getPointer(); + IRCallArgs[FirstIRArg] = CastToAllocaAddrSpace(Addr.getPointer()); } } break;
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits