https://github.com/yuxuanchen1997 updated https://github.com/llvm/llvm-project/pull/99283
>From dcd98a941d3edb3e5b96d7feeca50f99ea8309d2 Mon Sep 17 00:00:00 2001 From: Yuxuan Chen <y...@meta.com> Date: Mon, 15 Jul 2024 15:01:39 -0700 Subject: [PATCH] Implement noalloc in CoroSplit --- llvm/lib/Transforms/Coroutines/CoroInternal.h | 4 + llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 121 ++++++++++++++---- llvm/lib/Transforms/Coroutines/Coroutines.cpp | 27 ++++ llvm/test/Transforms/Coroutines/ArgAddr.ll | 2 +- .../Transforms/Coroutines/coro-alloca-07.ll | 2 +- .../coro-alloca-loop-carried-address.ll | 2 +- .../Coroutines/coro-lifetime-end.ll | 6 +- .../Coroutines/coro-spill-after-phi.ll | 2 +- .../Transforms/Coroutines/coro-split-00.ll | 7 + 9 files changed, 140 insertions(+), 33 deletions(-) diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h index 5716fd0ea4ab9..d91cccd99a703 100644 --- a/llvm/lib/Transforms/Coroutines/CoroInternal.h +++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h @@ -26,6 +26,10 @@ bool declaresIntrinsics(const Module &M, const std::initializer_list<StringRef>); void replaceCoroFree(CoroIdInst *CoroId, bool Elide); +void suppressCoroAllocs(CoroIdInst *CoroId); +void suppressCoroAllocs(LLVMContext &Context, + ArrayRef<CoroAllocInst *> CoroAllocs); + /// Attempts to rewrite the location operand of debug intrinsics in terms of /// the coroutine frame pointer, folding pointer offsets into the DIExpression /// of the intrinsic. diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index 9e4da5f8ca961..f78c03e9687c9 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/PriorityWorklist.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/CFG.h" @@ -1179,6 +1180,14 @@ static void updateAsyncFuncPointerContextSize(coro::Shape &Shape) { Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct); } +static TypeSize getFrameSizeForShape(coro::Shape &Shape) { + // In the same function all coro.sizes should have the same result type. + auto *SizeIntrin = Shape.CoroSizes.back(); + Module *M = SizeIntrin->getModule(); + const DataLayout &DL = M->getDataLayout(); + return DL.getTypeAllocSize(Shape.FrameTy); +} + static void replaceFrameSizeAndAlignment(coro::Shape &Shape) { if (Shape.ABI == coro::ABI::Async) updateAsyncFuncPointerContextSize(Shape); @@ -1194,10 +1203,8 @@ static void replaceFrameSizeAndAlignment(coro::Shape &Shape) { // In the same function all coro.sizes should have the same result type. auto *SizeIntrin = Shape.CoroSizes.back(); - Module *M = SizeIntrin->getModule(); - const DataLayout &DL = M->getDataLayout(); - auto Size = DL.getTypeAllocSize(Shape.FrameTy); - auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size); + auto *SizeConstant = + ConstantInt::get(SizeIntrin->getType(), getFrameSizeForShape(Shape)); for (CoroSizeInst *CS : Shape.CoroSizes) { CS->replaceAllUsesWith(SizeConstant); @@ -1455,6 +1462,62 @@ struct SwitchCoroutineSplitter { setCoroInfo(F, Shape, Clones); } + static Function *createNoAllocVariant(Function &F, coro::Shape &Shape, + SmallVectorImpl<Function *> &Clones) { + auto *OrigFnTy = F.getFunctionType(); + auto OldParams = OrigFnTy->params(); + + SmallVector<Type *> NewParams; + NewParams.reserve(OldParams.size() + 1); + NewParams.append(OldParams.begin(), OldParams.end()); + NewParams.push_back(PointerType::getUnqual(Shape.FrameTy)); + + auto *NewFnTy = FunctionType::get(OrigFnTy->getReturnType(), NewParams, + OrigFnTy->isVarArg()); + Function *NoAllocF = + Function::Create(NewFnTy, F.getLinkage(), F.getName() + ".noalloc"); + ValueToValueMapTy VMap; + unsigned int Idx = 0; + for (const auto &I : F.args()) { + VMap[&I] = NoAllocF->getArg(Idx++); + } + SmallVector<ReturnInst *, 4> Returns; + CloneFunctionInto(NoAllocF, &F, VMap, + CloneFunctionChangeType::LocalChangesOnly, Returns); + + if (Shape.CoroBegin) { + auto *NewCoroBegin = + cast_if_present<CoroBeginInst>(VMap[Shape.CoroBegin]); + auto *NewCoroId = cast<CoroIdInst>(NewCoroBegin->getId()); + coro::replaceCoroFree(NewCoroId, /*Elide=*/true); + coro::suppressCoroAllocs(NewCoroId); + NewCoroBegin->replaceAllUsesWith(NoAllocF->getArg(Idx)); + NewCoroBegin->eraseFromParent(); + } + + Module *M = F.getParent(); + M->getFunctionList().insert(M->end(), NoAllocF); + + removeUnreachableBlocks(*NoAllocF); + auto NewAttrs = NoAllocF->getAttributes(); + // We just appended the frame pointer as the last argument of the new + // function. + auto FrameIdx = NoAllocF->arg_size() - 1; + // When we elide allocation, we read these attributes to determine the + // frame size and alignment. + addFramePointerAttrs(NewAttrs, NoAllocF->getContext(), FrameIdx, + Shape.FrameSize, Shape.FrameAlign, + /*NoAlias=*/false); + + NoAllocF->setAttributes(NewAttrs); + + Clones.push_back(NoAllocF); + // Reset the original function's coro info, make the new noalloc variant + // connected to the original ramp function. + setCoroInfo(F, Shape, Clones); + return NoAllocF; + } + private: // Create a resume clone by cloning the body of the original function, setting // new entry block and replacing coro.suspend an appropriate value to force @@ -1913,6 +1976,21 @@ class PrettyStackTraceFunction : public PrettyStackTraceEntry { }; } // namespace +/// Remove calls to llvm.coro.end in the original function. +static void removeCoroEndsFromRampFunction(const coro::Shape &Shape) { + if (Shape.ABI != coro::ABI::Switch) { + for (auto *End : Shape.CoroEnds) { + replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, nullptr); + } + } else { + for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) { + auto &Context = End->getContext(); + End->replaceAllUsesWith(ConstantInt::getFalse(Context)); + End->eraseFromParent(); + } + } +} + static coro::Shape splitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones, TargetTransformInfo &TTI, bool OptimizeFrame, @@ -1932,10 +2010,10 @@ splitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones, simplifySuspendPoints(Shape); buildCoroutineFrame(F, Shape, TTI, MaterializableCallback); replaceFrameSizeAndAlignment(Shape); - + bool isNoSuspendCoroutine = Shape.CoroSuspends.empty(); // If there are no suspend points, no split required, just remove // the allocation and deallocation blocks, they are not needed. - if (Shape.CoroSuspends.empty()) { + if (isNoSuspendCoroutine) { handleNoSuspendCoroutine(Shape); } else { switch (Shape.ABI) { @@ -1967,22 +2045,13 @@ splitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones, for (DbgVariableRecord *DVR : DbgVariableRecords) coro::salvageDebugInfo(ArgToAllocaMap, *DVR, Shape.OptimizeFrame, false /*UseEntryValue*/); - return Shape; -} -/// Remove calls to llvm.coro.end in the original function. -static void removeCoroEndsFromRampFunction(const coro::Shape &Shape) { - if (Shape.ABI != coro::ABI::Switch) { - for (auto *End : Shape.CoroEnds) { - replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, nullptr); - } - } else { - for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) { - auto &Context = End->getContext(); - End->replaceAllUsesWith(ConstantInt::getFalse(Context)); - End->eraseFromParent(); - } + removeCoroEndsFromRampFunction(Shape); + + if (!isNoSuspendCoroutine && Shape.ABI == coro::ABI::Switch) { + SwitchCoroutineSplitter::createNoAllocVariant(F, Shape, Clones); } + return Shape; } static void updateCallGraphAfterCoroutineSplit( @@ -2108,18 +2177,18 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C, // Split all the coroutines. for (LazyCallGraph::Node *N : Coroutines) { Function &F = N->getFunction(); + LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F.getName() << "\n"); F.setSplittedCoroutine(); SmallVector<Function *, 4> Clones; - auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); - const coro::Shape Shape = + coro::Shape Shape = splitCoroutine(F, Clones, FAM.getResult<TargetIRAnalysis>(F), OptimizeFrame, MaterializableCallback); - removeCoroEndsFromRampFunction(Shape); updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM); + auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); ORE.emit([&]() { return OptimizationRemark(DEBUG_TYPE, "CoroSplit", &F) << "Split '" << ore::NV("function", F.getName()) @@ -2135,9 +2204,9 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C, } } - for (auto *PrepareFn : PrepareFns) { - replaceAllPrepares(PrepareFn, CG, C); - } + for (auto *PrepareFn : PrepareFns) { + replaceAllPrepares(PrepareFn, CG, C); + } return PreservedAnalyses::none(); } diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp index 1a92bc1636257..be257339e0ac4 100644 --- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp +++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp @@ -145,6 +145,33 @@ void coro::replaceCoroFree(CoroIdInst *CoroId, bool Elide) { } } +void coro::suppressCoroAllocs(CoroIdInst *CoroId) { + SmallVector<CoroAllocInst *, 4> CoroAllocs; + for (User *U : CoroId->users()) + if (auto *CA = dyn_cast<CoroAllocInst>(U)) + CoroAllocs.push_back(CA); + + if (CoroAllocs.empty()) + return; + + coro::suppressCoroAllocs(CoroId->getContext(), CoroAllocs); +} + +// Replacing llvm.coro.alloc with false will suppress dynamic +// allocation as it is expected for the frontend to generate the code that +// looks like: +// id = coro.id(...) +// mem = coro.alloc(id) ? malloc(coro.size()) : 0; +// coro.begin(id, mem) +void coro::suppressCoroAllocs(LLVMContext &Context, + ArrayRef<CoroAllocInst *> CoroAllocs) { + auto *False = ConstantInt::getFalse(Context); + for (auto *CA : CoroAllocs) { + CA->replaceAllUsesWith(False); + CA->eraseFromParent(); + } +} + static void clear(coro::Shape &Shape) { Shape.CoroBegin = nullptr; Shape.CoroEnds.clear(); diff --git a/llvm/test/Transforms/Coroutines/ArgAddr.ll b/llvm/test/Transforms/Coroutines/ArgAddr.ll index 1fbc8e1d49767..6c18cc19a9c0c 100644 --- a/llvm/test/Transforms/Coroutines/ArgAddr.ll +++ b/llvm/test/Transforms/Coroutines/ArgAddr.ll @@ -5,7 +5,7 @@ define nonnull ptr @f(i32 %n) presplitcoroutine { ; CHECK-LABEL: @f( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @f.resumers) +; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @{{.*}}) ; CHECK-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: store i32 [[N:%.*]], ptr [[N_ADDR]], align 4 ; CHECK-NEXT: [[CALL:%.*]] = tail call ptr @malloc(i32 24) diff --git a/llvm/test/Transforms/Coroutines/coro-alloca-07.ll b/llvm/test/Transforms/Coroutines/coro-alloca-07.ll index c81bf333f2059..914fd87ccdffc 100644 --- a/llvm/test/Transforms/Coroutines/coro-alloca-07.ll +++ b/llvm/test/Transforms/Coroutines/coro-alloca-07.ll @@ -62,7 +62,7 @@ declare void @free(ptr) ; CHECK-LABEL: @f( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @f.resumers) +; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @{{.*}}) ; CHECK-NEXT: [[ALLOC:%.*]] = call ptr @malloc(i32 48) ; CHECK-NEXT: [[HDL:%.*]] = call noalias nonnull ptr @llvm.coro.begin(token [[ID]], ptr [[ALLOC]]) ; CHECK-NEXT: store ptr @f.resume, ptr [[HDL]], align 8 diff --git a/llvm/test/Transforms/Coroutines/coro-alloca-loop-carried-address.ll b/llvm/test/Transforms/Coroutines/coro-alloca-loop-carried-address.ll index 412327a49dcf2..b132f79f13db1 100644 --- a/llvm/test/Transforms/Coroutines/coro-alloca-loop-carried-address.ll +++ b/llvm/test/Transforms/Coroutines/coro-alloca-loop-carried-address.ll @@ -7,7 +7,7 @@ define void @foo() presplitcoroutine { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @foo.resumers) +; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @{{.*}}) ; CHECK-NEXT: [[ALLOC:%.*]] = call ptr @malloc(i64 40) ; CHECK-NEXT: [[VFRAME:%.*]] = call noalias nonnull ptr @llvm.coro.begin(token [[ID]], ptr [[ALLOC]]) ; CHECK-NEXT: store ptr @foo.resume, ptr [[VFRAME]], align 8 diff --git a/llvm/test/Transforms/Coroutines/coro-lifetime-end.ll b/llvm/test/Transforms/Coroutines/coro-lifetime-end.ll index 330c61360e20a..d0b856865c215 100644 --- a/llvm/test/Transforms/Coroutines/coro-lifetime-end.ll +++ b/llvm/test/Transforms/Coroutines/coro-lifetime-end.ll @@ -13,7 +13,7 @@ declare void @consume.i8.array(ptr) define void @HasNoLifetimeEnd() presplitcoroutine { ; CHECK-LABEL: define void @HasNoLifetimeEnd() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @HasNoLifetimeEnd.resumers) +; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @{{.*}}) ; CHECK-NEXT: [[ALLOC:%.*]] = call ptr @malloc(i64 16) ; CHECK-NEXT: [[VFRAME:%.*]] = call noalias nonnull ptr @llvm.coro.begin(token [[ID]], ptr [[ALLOC]]) ; CHECK-NEXT: store ptr @HasNoLifetimeEnd.resume, ptr [[VFRAME]], align 8 @@ -50,7 +50,7 @@ exit: define void @LifetimeEndAfterCoroEnd() presplitcoroutine { ; CHECK-LABEL: define void @LifetimeEndAfterCoroEnd() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @LifetimeEndAfterCoroEnd.resumers) +; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @{{.*}}) ; CHECK-NEXT: [[ALLOC:%.*]] = call ptr @malloc(i64 16) ; CHECK-NEXT: [[VFRAME:%.*]] = call noalias nonnull ptr @llvm.coro.begin(token [[ID]], ptr [[ALLOC]]) ; CHECK-NEXT: store ptr @LifetimeEndAfterCoroEnd.resume, ptr [[VFRAME]], align 8 @@ -88,7 +88,7 @@ exit: define void @BranchWithoutLifetimeEnd() presplitcoroutine { ; CHECK-LABEL: define void @BranchWithoutLifetimeEnd() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @BranchWithoutLifetimeEnd.resumers) +; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @{{.*}}) ; CHECK-NEXT: [[ALLOC:%.*]] = call ptr @malloc(i64 16) ; CHECK-NEXT: [[VFRAME:%.*]] = call noalias nonnull ptr @llvm.coro.begin(token [[ID]], ptr [[ALLOC]]) ; CHECK-NEXT: store ptr @BranchWithoutLifetimeEnd.resume, ptr [[VFRAME]], align 8 diff --git a/llvm/test/Transforms/Coroutines/coro-spill-after-phi.ll b/llvm/test/Transforms/Coroutines/coro-spill-after-phi.ll index cbe57a8d61132..41b53d89c5dfe 100644 --- a/llvm/test/Transforms/Coroutines/coro-spill-after-phi.ll +++ b/llvm/test/Transforms/Coroutines/coro-spill-after-phi.ll @@ -8,7 +8,7 @@ define ptr @f(i1 %n) presplitcoroutine { ; CHECK-LABEL: @f( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @f.resumers) +; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @{{.*}}) ; CHECK-NEXT: [[ALLOC:%.*]] = call ptr @malloc(i32 32) ; CHECK-NEXT: [[HDL:%.*]] = call noalias nonnull ptr @llvm.coro.begin(token [[ID]], ptr [[ALLOC]]) ; CHECK-NEXT: store ptr @f.resume, ptr [[HDL]], align 8 diff --git a/llvm/test/Transforms/Coroutines/coro-split-00.ll b/llvm/test/Transforms/Coroutines/coro-split-00.ll index b35bd720b86f9..d89938388eb8e 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-00.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-00.ll @@ -63,6 +63,13 @@ suspend: ; CHECK-NOT: call void @free( ; CHECK: ret void +; CHECK-LABEL: @f.noalloc({{.*}}) +; CHECK-NOT: call ptr @malloc +; CHECK: call void @print(i32 0) +; CHECK-NOT: call void @print(i32 1) +; CHECK-NOT: call void @free( +; CHECK: ret ptr %{{.*}} + declare ptr @llvm.coro.free(token, ptr) declare i32 @llvm.coro.size.i32() declare i8 @llvm.coro.suspend(token, i1) _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits