Author: lattner Date: Fri Jan 11 00:09:30 2008 New Revision: 45853 URL: http://llvm.org/viewvc/llvm-project?rev=45853&view=rev Log: When inlining a functino with a byval argument, make an explicit copy of it in case the callee modifies the struct.
Added: llvm/trunk/test/Transforms/Inline/byval.ll Modified: llvm/trunk/lib/Transforms/Utils/InlineFunction.cpp Modified: llvm/trunk/lib/Transforms/Utils/InlineFunction.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/InlineFunction.cpp?rev=45853&r1=45852&r2=45853&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Utils/InlineFunction.cpp (original) +++ llvm/trunk/lib/Transforms/Utils/InlineFunction.cpp Fri Jan 11 00:09:30 2008 @@ -18,7 +18,9 @@ #include "llvm/Module.h" #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" +#include "llvm/ParameterAttributes.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/Target/TargetData.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/CallSite.h" using namespace llvm; @@ -201,7 +203,6 @@ BasicBlock *OrigBB = TheCall->getParent(); Function *Caller = OrigBB->getParent(); - // GC poses two hazards to inlining, which only occur when the callee has GC: // 1. If the caller has no GC, then the callee's GC must be propagated to the // caller. @@ -213,7 +214,6 @@ return false; } - // Get an iterator to the last basic block in the function, which will have // the new function inlined after it. // @@ -228,15 +228,66 @@ { // Scope to destroy ValueMap after cloning. DenseMap<const Value*, Value*> ValueMap; - // Calculate the vector of arguments to pass into the function cloner, which - // matches up the formal to the actual argument values. assert(std::distance(CalledFunc->arg_begin(), CalledFunc->arg_end()) == std::distance(CS.arg_begin(), CS.arg_end()) && "No varargs calls can be inlined!"); + + // Calculate the vector of arguments to pass into the function cloner, which + // matches up the formal to the actual argument values. CallSite::arg_iterator AI = CS.arg_begin(); + unsigned ArgNo = 0; for (Function::const_arg_iterator I = CalledFunc->arg_begin(), - E = CalledFunc->arg_end(); I != E; ++I, ++AI) - ValueMap[I] = *AI; + E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) { + Value *ActualArg = *AI; + + // When byval arguments actually inlined, we need to make the copy implied + // by them actually explicit. + // TODO: If we know that the callee never modifies the struct, we can + // remove this copy. + if (CalledFunc->paramHasAttr(ArgNo+1, ParamAttr::ByVal)) { + const Type *AggTy = cast<PointerType>(I->getType())->getElementType(); + const Type *VoidPtrTy = PointerType::getUnqual(Type::Int8Ty); + + // Create the alloca. If we have TargetData, use nice alignment. + unsigned Align = 1; + if (TD) Align = TD->getPrefTypeAlignment(AggTy); + Value *NewAlloca = new AllocaInst(AggTy, 0, Align, I->getName(), + Caller->begin()->begin()); + // Emit a memcpy. + Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(), + Intrinsic::memcpy_i64); + Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall); + Value *SrcCast = new BitCastInst(*AI, VoidPtrTy, "tmp", TheCall); + + Value *Size; + if (TD == 0) + Size = ConstantExpr::getSizeOf(AggTy); + else + Size = ConstantInt::get(Type::Int64Ty, TD->getTypeStoreSize(AggTy)); + + // Always generate a memcpy of alignment 1 here because we don't know + // the alignment of the src pointer. Other optimizations can infer + // better alignment. + Value *CallArgs[] = { + DestCast, SrcCast, Size, ConstantInt::get(Type::Int32Ty, 1) + }; + CallInst *TheMemCpy = + new CallInst(MemCpyFn, CallArgs, CallArgs+4, "", TheCall); + + // If we have a call graph, update it. + if (CG) { + CallGraphNode *MemCpyCGN = CG->getOrInsertFunction(MemCpyFn); + CallGraphNode *CallerNode = (*CG)[Caller]; + CallerNode->addCalledFunction(TheMemCpy, MemCpyCGN); + } + + // Uses of the argument in the function should use our new alloca + // instead. + ActualArg = NewAlloca; + } + + ValueMap[I] = ActualArg; + } // We want the inliner to prune the code as it copies. We would LOVE to // have no dead or constant instructions leftover after inlining occurs Added: llvm/trunk/test/Transforms/Inline/byval.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/byval.ll?rev=45853&view=auto ============================================================================== --- llvm/trunk/test/Transforms/Inline/byval.ll (added) +++ llvm/trunk/test/Transforms/Inline/byval.ll Fri Jan 11 00:09:30 2008 @@ -0,0 +1,28 @@ +; RUN: llvm-as < %s | opt -inline | llvm-dis | grep {llvm.memcpy} + +; Inlining a byval struct should cause an explicit copy into an alloca. + + %struct.ss = type { i32, i64 } [EMAIL PROTECTED] = internal constant [10 x i8] c"%d, %lld\0A\00" ; <[10 x i8]*> [#uses=1] + +define internal void @f(%struct.ss* byval %b) nounwind { +entry: + %tmp = getelementptr %struct.ss* %b, i32 0, i32 0 ; <i32*> [#uses=2] + %tmp1 = load i32* %tmp, align 4 ; <i32> [#uses=1] + %tmp2 = add i32 %tmp1, 1 ; <i32> [#uses=1] + store i32 %tmp2, i32* %tmp, align 4 + ret void +} + +declare i32 @printf(i8*, ...) nounwind + +define i32 @main() nounwind { +entry: + %S = alloca %struct.ss ; <%struct.ss*> [#uses=4] + %tmp1 = getelementptr %struct.ss* %S, i32 0, i32 0 ; <i32*> [#uses=1] + store i32 1, i32* %tmp1, align 8 + %tmp4 = getelementptr %struct.ss* %S, i32 0, i32 1 ; <i64*> [#uses=1] + store i64 2, i64* %tmp4, align 4 + call void @f( %struct.ss* byval %S ) nounwind + ret i32 0 +} _______________________________________________ llvm-commits mailing list llvm-commits@cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits