https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/71262
>From 184936c339ea73ccfc4349e023ff165aa9f8392e Mon Sep 17 00:00:00 2001 From: Mircea Trofin <mtro...@google.com> Date: Fri, 3 Nov 2023 18:19:15 -0700 Subject: [PATCH 1/4] [coro][pgp] Do not insert counters in the `suspend` block If we do, we can't lower the suspend call to a tail call. If this happened in a loop, it can lead to stack overflow (this was encountered in a benchmark, as an extreme case) We can instrument the other 2 edges instead, as long as they also don't point to the same basic block. --- .../llvm/Transforms/Instrumentation/CFGMST.h | 65 +++++++++++++++---- .../Coroutines/coro-split-musttail.ll | 7 +- .../Coroutines/coro-split-musttail1.ll | 12 ++-- .../Coroutines/coro-split-musttail10.ll | 1 + .../Coroutines/coro-split-musttail11.ll | 1 + .../Coroutines/coro-split-musttail12.ll | 1 + .../Coroutines/coro-split-musttail13.ll | 1 + .../Coroutines/coro-split-musttail2.ll | 1 + .../Coroutines/coro-split-musttail3.ll | 12 ++-- .../Coroutines/coro-split-musttail4.ll | 1 + .../Coroutines/coro-split-musttail5.ll | 1 + .../Coroutines/coro-split-musttail6.ll | 1 + .../Coroutines/coro-split-musttail7.ll | 1 + 13 files changed, 83 insertions(+), 22 deletions(-) diff --git a/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h b/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h index 6ed8a6c6eaf0197..1c5b7ba6d0ed364 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h +++ b/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h @@ -19,6 +19,8 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -121,31 +123,70 @@ template <class Edge, class BBInfo> class CFGMST { static const uint32_t CriticalEdgeMultiplier = 1000; + auto GetCoroSuspendSwitch = + [&](const Instruction *TI) -> const SwitchInst * { + if (!F.isPresplitCoroutine()) + return nullptr; + if (auto *SWInst = dyn_cast<SwitchInst>(TI)) + if (auto *Intrinsic = dyn_cast<IntrinsicInst>(SWInst->getCondition())) + if (Intrinsic->getIntrinsicID() == Intrinsic::coro_suspend) + return SWInst; + return nullptr; + }; + for (BasicBlock &BB : F) { Instruction *TI = BB.getTerminator(); + const SwitchInst *CoroSuspendSwitch = GetCoroSuspendSwitch(TI); uint64_t BBWeight = (BFI != nullptr ? BFI->getBlockFreq(&BB).getFrequency() : 2); uint64_t Weight = 2; if (int successors = TI->getNumSuccessors()) { for (int i = 0; i != successors; ++i) { BasicBlock *TargetBB = TI->getSuccessor(i); - bool Critical = isCriticalEdge(TI, i); - uint64_t scaleFactor = BBWeight; - if (Critical) { - if (scaleFactor < UINT64_MAX / CriticalEdgeMultiplier) - scaleFactor *= CriticalEdgeMultiplier; - else - scaleFactor = UINT64_MAX; + const bool Critical = isCriticalEdge(TI, i); + const bool IsCoroSuspendTarget = + CoroSuspendSwitch && + CoroSuspendSwitch->getDefaultDest() == TargetBB; + // We must not add instrumentation to the BB representing the + // "suspend" path, else CoroSplit won't be able to lower + // llvm.coro.suspend to a tail call. We do want profiling info for + // the other branches (resume/destroy). So we do 2 things: + // 1. we prefer instrumenting those other edges by setting the weight + // of the "suspend" edge to max, and + // 2. we mark the edge as "Removed" to guarantee it is not considered + // for instrumentation. That could technically happen: + // (from test/Transforms/Coroutines/coro-split-musttail.ll) + // + // %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + // switch i8 %suspend, label %exit [ + // i8 0, label %await.ready + // i8 1, label %exit + // ] + if (IsCoroSuspendTarget) { + Weight = UINT64_MAX; + } else { + bool Critical = isCriticalEdge(TI, i); + uint64_t scaleFactor = BBWeight; + if (Critical) { + if (scaleFactor < UINT64_MAX / CriticalEdgeMultiplier) + scaleFactor *= CriticalEdgeMultiplier; + else + scaleFactor = UINT64_MAX; + } + if (BPI != nullptr) + Weight = + BPI->getEdgeProbability(&BB, TargetBB).scale(scaleFactor); + if (Weight == 0) + Weight++; } - if (BPI != nullptr) - Weight = BPI->getEdgeProbability(&BB, TargetBB).scale(scaleFactor); - if (Weight == 0) - Weight++; auto *E = &addEdge(&BB, TargetBB, Weight); E->IsCritical = Critical; + // See comment above - we must guarantee the coro suspend BB isn't + // instrumented. + if (IsCoroSuspendTarget) + E->Removed = true; LLVM_DEBUG(dbgs() << " Edge: from " << BB.getName() << " to " << TargetBB->getName() << " w=" << Weight << "\n"); - // Keep track of entry/exit edges: if (&BB == Entry) { if (Weight > MaxEntryOutWeight) { diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail.ll index 0406135687904bf..825e44471db27ae 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-musttail.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-musttail.ll @@ -1,6 +1,7 @@ ; Tests that coro-split will convert coro.resume followed by a suspend to a ; musttail call. -; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,NOPGO %s +; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,PGO %s define void @f() #0 { entry: @@ -40,7 +41,9 @@ exit: ; Verify that in the resume part resume call is marked with musttail. ; CHECK-LABEL: @f.resume( ; CHECK: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0) -; CHECK-NEXT: musttail call fastcc void %[[addr2]](ptr null) +; NOPGO-NEXT: musttail call fastcc void %[[addr2]](ptr null) +; PGO: call void @llvm.instrprof +; PGO-NEXT: musttail call fastcc void %[[addr2]](ptr null) ; CHECK-NEXT: ret void declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1 diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll index cd1635b93d2cc24..d0d11fc4495e480 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll @@ -1,6 +1,7 @@ ; Tests that coro-split will convert coro.resume followed by a suspend to a ; musttail call. -; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,NOPGO %s +; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,PGO %s define void @f() #0 { entry: @@ -63,14 +64,17 @@ unreach: ; CHECK-LABEL: @f.resume( ; CHECK: %[[hdl:.+]] = call ptr @g() ; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl]], i8 0) -; CHECK-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]]) +; NOPGO-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]]) +; PGO: musttail call fastcc void %[[addr2]](ptr %[[hdl]]) ; CHECK-NEXT: ret void ; CHECK: %[[hdl2:.+]] = call ptr @h() ; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl2]], i8 0) -; CHECK-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]]) +; NOPGO-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]]) +; PGO: musttail call fastcc void %[[addr3]](ptr %[[hdl2]]) ; CHECK-NEXT: ret void ; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0) -; CHECK-NEXT: musttail call fastcc void %[[addr4]](ptr null) +; NOPGO-NEXT: musttail call fastcc void %[[addr4]](ptr null) +; PGO: musttail call fastcc void %[[addr4]](ptr null) ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll index 9d73c8bbc57b81a..cdd58b2a084fcd8 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll @@ -1,6 +1,7 @@ ; Tests that we would convert coro.resume to a musttail call if the target is ; Wasm64 with tail-call support. ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s +; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s target triple = "wasm64-unknown-unknown" diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail11.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail11.ll index 9bc5b4f0c65d91e..da5d868280e9671 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-musttail11.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-musttail11.ll @@ -1,6 +1,7 @@ ; Tests that we would convert coro.resume to a musttail call if the target is ; Wasm32 with tail-call support. ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s +; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s target triple = "wasm32-unknown-unknown" diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail12.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail12.ll index e7f4bcb9b0ff29a..5baec378876bb1e 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-musttail12.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-musttail12.ll @@ -1,5 +1,6 @@ ; Tests that coro-split won't convert the cmp instruction prematurely. ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s +; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s declare void @fakeresume1(ptr) declare void @print() diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail13.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail13.ll index 2384f9382685bd0..0290e42339e2ad4 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-musttail13.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-musttail13.ll @@ -1,5 +1,6 @@ ; Tests that coro-split won't fall in infinite loop when simplify the terminators leading to ret. ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s +; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s declare void @fakeresume1(ptr) declare void @may_throw(ptr) diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll index 38fc12815c033e7..2f27f79480ab1b4 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll @@ -1,6 +1,7 @@ ; Tests that coro-split will convert coro.resume followed by a suspend to a ; musttail call. ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s +; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s define void @fakeresume1(ptr) { entry: diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll index b777f000e33a6d3..4778e3dcaf9957b 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll @@ -1,6 +1,7 @@ ; Tests that coro-split will convert coro.resume followed by a suspend to a ; musttail call. -; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,NOPGO %s +; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,PGO %s define void @f() #0 { entry: @@ -59,14 +60,17 @@ unreach: ; CHECK-LABEL: @f.resume( ; CHECK: %[[hdl:.+]] = call ptr @g() ; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl]], i8 0) -; CHECK-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]]) +; NOPGO-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]]) +; PGO: musttail call fastcc void %[[addr2]](ptr %[[hdl]]) ; CHECK-NEXT: ret void ; CHECK: %[[hdl2:.+]] = call ptr @h() ; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl2]], i8 0) -; CHECK-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]]) +; NOPGO-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]]) +; PGO: musttail call fastcc void %[[addr3]](ptr %[[hdl2]]) ; CHECK-NEXT: ret void ; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0) -; CHECK-NEXT: musttail call fastcc void %[[addr4]](ptr null) +; NOPGO-NEXT: musttail call fastcc void %[[addr4]](ptr null) +; PGO: musttail call fastcc void %[[addr4]](ptr null) ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll index 1e0fcdb87a72d30..00ee422ce5863df 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll @@ -1,6 +1,7 @@ ; Tests that coro-split will convert a call before coro.suspend to a musttail call ; while the user of the coro.suspend is a icmpinst. ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s +; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s define void @fakeresume1(ptr) { entry: diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll index d19606491335e50..9afc79abbe88cd8 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll @@ -1,6 +1,7 @@ ; Tests that sinked lifetime markers wouldn't provent optimization ; to convert a resuming call to a musttail call. ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s +; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s declare void @fakeresume1(ptr align 8) diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll index eea711861c488c5..9c2b1ece1624bc9 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll @@ -4,6 +4,7 @@ ; an extra bitcast instruction in the path, which makes it harder to ; optimize. ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s +; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s declare void @fakeresume1(ptr align 8) diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll index c32fe9b0ee304c2..860032bd3cf8e52 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll @@ -4,6 +4,7 @@ ; is that this contains dead instruction generated during the transformation, ; which makes the optimization harder. ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s +; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s declare void @fakeresume1(ptr align 8) >From d77e267674da1dae85ff64fe252e0384af84da61 Mon Sep 17 00:00:00 2001 From: Mircea Trofin <mtro...@google.com> Date: Mon, 13 Nov 2023 10:47:28 -0800 Subject: [PATCH 2/4] Factored suspend case out. --- .../llvm/Transforms/Instrumentation/CFGMST.h | 97 +++++++++---------- 1 file changed, 46 insertions(+), 51 deletions(-) diff --git a/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h b/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h index 1c5b7ba6d0ed364..33d4019eed9b139 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h +++ b/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h @@ -94,6 +94,39 @@ template <class Edge, class BBInfo> class CFGMST { return It->second.get(); } + void handleCoroSuspendEdge(Edge *E) { + // We must not add instrumentation to the BB representing the + // "suspend" path, else CoroSplit won't be able to lower + // llvm.coro.suspend to a tail call. We do want profiling info for + // the other branches (resume/destroy). So we do 2 things: + // 1. we prefer instrumenting those other edges by setting the weight + // of the "suspend" edge to max, and + // 2. we mark the edge as "Removed" to guarantee it is not considered + // for instrumentation. That could technically happen: + // (from test/Transforms/Coroutines/coro-split-musttail.ll) + // + // %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + // switch i8 %suspend, label %exit [ + // i8 0, label %await.ready + // i8 1, label %exit + // ] + const BasicBlock *EdgeTarget = E->DestBB; + if (!EdgeTarget) + return; + assert(E->SrcBB); + const Function *F = EdgeTarget->getParent(); + if (!F->isPresplitCoroutine()) + return; + + const Instruction *TI = E->SrcBB->getTerminator(); + if (auto *SWInst = dyn_cast<SwitchInst>(TI)) + if (auto *Intrinsic = dyn_cast<IntrinsicInst>(SWInst->getCondition())) + if (Intrinsic->getIntrinsicID() == Intrinsic::coro_suspend && + SWInst->getDefaultDest() == EdgeTarget) { + E->Weight = UINT64_MAX; + E->Removed = true; + } + } // Traverse the CFG using a stack. Find all the edges and assign the weight. // Edges with large weight will be put into MST first so they are less likely // to be instrumented. @@ -123,70 +156,32 @@ template <class Edge, class BBInfo> class CFGMST { static const uint32_t CriticalEdgeMultiplier = 1000; - auto GetCoroSuspendSwitch = - [&](const Instruction *TI) -> const SwitchInst * { - if (!F.isPresplitCoroutine()) - return nullptr; - if (auto *SWInst = dyn_cast<SwitchInst>(TI)) - if (auto *Intrinsic = dyn_cast<IntrinsicInst>(SWInst->getCondition())) - if (Intrinsic->getIntrinsicID() == Intrinsic::coro_suspend) - return SWInst; - return nullptr; - }; - for (BasicBlock &BB : F) { Instruction *TI = BB.getTerminator(); - const SwitchInst *CoroSuspendSwitch = GetCoroSuspendSwitch(TI); uint64_t BBWeight = (BFI != nullptr ? BFI->getBlockFreq(&BB).getFrequency() : 2); uint64_t Weight = 2; if (int successors = TI->getNumSuccessors()) { for (int i = 0; i != successors; ++i) { BasicBlock *TargetBB = TI->getSuccessor(i); - const bool Critical = isCriticalEdge(TI, i); - const bool IsCoroSuspendTarget = - CoroSuspendSwitch && - CoroSuspendSwitch->getDefaultDest() == TargetBB; - // We must not add instrumentation to the BB representing the - // "suspend" path, else CoroSplit won't be able to lower - // llvm.coro.suspend to a tail call. We do want profiling info for - // the other branches (resume/destroy). So we do 2 things: - // 1. we prefer instrumenting those other edges by setting the weight - // of the "suspend" edge to max, and - // 2. we mark the edge as "Removed" to guarantee it is not considered - // for instrumentation. That could technically happen: - // (from test/Transforms/Coroutines/coro-split-musttail.ll) - // - // %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) - // switch i8 %suspend, label %exit [ - // i8 0, label %await.ready - // i8 1, label %exit - // ] - if (IsCoroSuspendTarget) { - Weight = UINT64_MAX; - } else { - bool Critical = isCriticalEdge(TI, i); - uint64_t scaleFactor = BBWeight; - if (Critical) { - if (scaleFactor < UINT64_MAX / CriticalEdgeMultiplier) - scaleFactor *= CriticalEdgeMultiplier; - else - scaleFactor = UINT64_MAX; - } - if (BPI != nullptr) - Weight = - BPI->getEdgeProbability(&BB, TargetBB).scale(scaleFactor); - if (Weight == 0) - Weight++; + bool Critical = isCriticalEdge(TI, i); + uint64_t scaleFactor = BBWeight; + if (Critical) { + if (scaleFactor < UINT64_MAX / CriticalEdgeMultiplier) + scaleFactor *= CriticalEdgeMultiplier; + else + scaleFactor = UINT64_MAX; } + if (BPI != nullptr) + Weight = BPI->getEdgeProbability(&BB, TargetBB).scale(scaleFactor); + if (Weight == 0) + Weight++; auto *E = &addEdge(&BB, TargetBB, Weight); E->IsCritical = Critical; - // See comment above - we must guarantee the coro suspend BB isn't - // instrumented. - if (IsCoroSuspendTarget) - E->Removed = true; + handleCoroSuspendEdge(E); LLVM_DEBUG(dbgs() << " Edge: from " << BB.getName() << " to " << TargetBB->getName() << " w=" << Weight << "\n"); + // Keep track of entry/exit edges: if (&BB == Entry) { if (Weight > MaxEntryOutWeight) { >From 6b85e455905e063662643f5a0c02edd55aef9022 Mon Sep 17 00:00:00 2001 From: Mircea Trofin <mtro...@google.com> Date: Mon, 13 Nov 2023 19:13:48 -0800 Subject: [PATCH 3/4] Copied over `coro-split-musstail` tests to InstrProfiling Removed the non-PGO clause in the copy. --- .../Coro/coro-split-musttail.ll | 63 ++++++++++ .../Coro/coro-split-musttail1.ll | 97 +++++++++++++++ .../Coro/coro-split-musttail10.ll | 55 +++++++++ .../Coro/coro-split-musttail11.ll | 55 +++++++++ .../Coro/coro-split-musttail12.ll | 85 +++++++++++++ .../Coro/coro-split-musttail13.ll | 76 ++++++++++++ .../Coro/coro-split-musttail2.ll | 68 ++++++++++ .../Coro/coro-split-musttail3.ll | 91 ++++++++++++++ .../Coro/coro-split-musttail4.ll | 66 ++++++++++ .../Coro/coro-split-musttail5.ll | 63 ++++++++++ .../Coro/coro-split-musttail6.ll | 113 +++++++++++++++++ .../Coro/coro-split-musttail7.ll | 116 ++++++++++++++++++ 12 files changed, 948 insertions(+) create mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail.ll create mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail1.ll create mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail10.ll create mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail11.ll create mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail12.ll create mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail13.ll create mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail2.ll create mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail3.ll create mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail4.ll create mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail5.ll create mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail6.ll create mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail7.ll diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail.ll new file mode 100644 index 000000000000000..a7321833d74843b --- /dev/null +++ b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail.ll @@ -0,0 +1,63 @@ +; Tests that instrumentation doesn't interfere with lowering (coro-split). +; It should convert coro.resume followed by a suspend to a musttail call. + +; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s + +define void @f() #0 { +entry: + %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) + %alloc = call ptr @malloc(i64 16) #3 + %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc) + + %save = call token @llvm.coro.save(ptr null) + %addr1 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0) + call fastcc void %addr1(ptr null) + + %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + switch i8 %suspend, label %exit [ + i8 0, label %await.ready + i8 1, label %exit + ] +await.ready: + %save2 = call token @llvm.coro.save(ptr null) + %addr2 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0) + call fastcc void %addr2(ptr null) + + %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) + switch i8 %suspend2, label %exit [ + i8 0, label %exit + i8 1, label %exit + ] +exit: + call i1 @llvm.coro.end(ptr null, i1 false, token none) + ret void +} + +; Verify that in the initial function resume is not marked with musttail. +; CHECK-LABEL: @f( +; CHECK: %[[addr1:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0) +; CHECK-NOT: musttail call fastcc void %[[addr1]](ptr null) + +; Verify that in the resume part resume call is marked with musttail. +; CHECK-LABEL: @f.resume( +; CHECK: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0) +; CHECK: call void @llvm.instrprof +; CHECK-NEXT: musttail call fastcc void %[[addr2]](ptr null) +; CHECK-NEXT: ret void + +declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1 +declare i1 @llvm.coro.alloc(token) #2 +declare i64 @llvm.coro.size.i64() #3 +declare ptr @llvm.coro.begin(token, ptr writeonly) #2 +declare token @llvm.coro.save(ptr) #2 +declare ptr @llvm.coro.frame() #3 +declare i8 @llvm.coro.suspend(token, i1) #2 +declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1 +declare i1 @llvm.coro.end(ptr, i1, token) #2 +declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1 +declare ptr @malloc(i64) + +attributes #0 = { presplitcoroutine } +attributes #1 = { argmemonly nounwind readonly } +attributes #2 = { nounwind } +attributes #3 = { nounwind readnone } diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail1.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail1.ll new file mode 100644 index 000000000000000..6098dee9a58035a --- /dev/null +++ b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail1.ll @@ -0,0 +1,97 @@ +; Tests that instrumentation doesn't interfere with lowering (coro-split). +; It should convert coro.resume followed by a suspend to a musttail call. + +; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s + +define void @f() #0 { +entry: + %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) + %alloc = call ptr @malloc(i64 16) #3 + %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc) + + %save = call token @llvm.coro.save(ptr null) + %addr1 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0) + call fastcc void %addr1(ptr null) + + %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + switch i8 %suspend, label %exit [ + i8 0, label %await.suspend + i8 1, label %exit + ] +await.suspend: + %save2 = call token @llvm.coro.save(ptr null) + %br0 = call i8 @switch_result() + switch i8 %br0, label %unreach [ + i8 0, label %await.resume3 + i8 1, label %await.resume1 + i8 2, label %await.resume2 + ] +await.resume1: + %hdl = call ptr @g() + %addr2 = call ptr @llvm.coro.subfn.addr(ptr %hdl, i8 0) + call fastcc void %addr2(ptr %hdl) + br label %final.suspend +await.resume2: + %hdl2 = call ptr @h() + %addr3 = call ptr @llvm.coro.subfn.addr(ptr %hdl2, i8 0) + call fastcc void %addr3(ptr %hdl2) + br label %final.suspend +await.resume3: + %addr4 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0) + call fastcc void %addr4(ptr null) + br label %final.suspend +final.suspend: + %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) + switch i8 %suspend2, label %exit [ + i8 0, label %pre.exit + i8 1, label %exit + ] +pre.exit: + br label %exit +exit: + call i1 @llvm.coro.end(ptr null, i1 false, token none) + ret void +unreach: + unreachable +} + +; Verify that in the initial function resume is not marked with musttail. +; CHECK-LABEL: @f( +; CHECK: %[[addr1:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0) +; CHECK-NOT: musttail call fastcc void %[[addr1]](ptr null) + +; Verify that in the resume part resume call is marked with musttail. +; CHECK-LABEL: @f.resume( +; CHECK: %[[hdl:.+]] = call ptr @g() +; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl]], i8 0) +; CHECK: musttail call fastcc void %[[addr2]](ptr %[[hdl]]) +; CHECK-NEXT: ret void +; CHECK: %[[hdl2:.+]] = call ptr @h() +; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl2]], i8 0) +; CHECK: musttail call fastcc void %[[addr3]](ptr %[[hdl2]]) +; CHECK-NEXT: ret void +; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0) +; CHECK: musttail call fastcc void %[[addr4]](ptr null) +; CHECK-NEXT: ret void + + + +declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1 +declare i1 @llvm.coro.alloc(token) #2 +declare i64 @llvm.coro.size.i64() #3 +declare ptr @llvm.coro.begin(token, ptr writeonly) #2 +declare token @llvm.coro.save(ptr) #2 +declare ptr @llvm.coro.frame() #3 +declare i8 @llvm.coro.suspend(token, i1) #2 +declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1 +declare i1 @llvm.coro.end(ptr, i1, token) #2 +declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1 +declare ptr @malloc(i64) +declare i8 @switch_result() +declare ptr @g() +declare ptr @h() + +attributes #0 = { presplitcoroutine } +attributes #1 = { argmemonly nounwind readonly } +attributes #2 = { nounwind } +attributes #3 = { nounwind readnone } diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail10.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail10.ll new file mode 100644 index 000000000000000..f43b10ebf42e5a3 --- /dev/null +++ b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail10.ll @@ -0,0 +1,55 @@ +; Tests that instrumentation doesn't interfere with lowering (coro-split). +; It should convert coro.resume followed by a suspend to a musttail call. + +; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s + +target triple = "wasm64-unknown-unknown" + +define void @f() #0 { +entry: + %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) + %alloc = call ptr @malloc(i64 16) #3 + %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc) + + %save = call token @llvm.coro.save(ptr null) + %addr1 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0) + call fastcc void %addr1(ptr null) + + %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + switch i8 %suspend, label %exit [ + i8 0, label %await.ready + i8 1, label %exit + ] +await.ready: + %save2 = call token @llvm.coro.save(ptr null) + %addr2 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0) + call fastcc void %addr2(ptr null) + + %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) + switch i8 %suspend2, label %exit [ + i8 0, label %exit + i8 1, label %exit + ] +exit: + call i1 @llvm.coro.end(ptr null, i1 false, token none) + ret void +} + +; CHECK: musttail call + +declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1 +declare i1 @llvm.coro.alloc(token) #2 +declare i64 @llvm.coro.size.i64() #3 +declare ptr @llvm.coro.begin(token, ptr writeonly) #2 +declare token @llvm.coro.save(ptr) #2 +declare ptr @llvm.coro.frame() #3 +declare i8 @llvm.coro.suspend(token, i1) #2 +declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1 +declare i1 @llvm.coro.end(ptr, i1, token) #2 +declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1 +declare ptr @malloc(i64) + +attributes #0 = { presplitcoroutine "target-features"="+tail-call" } +attributes #1 = { argmemonly nounwind readonly } +attributes #2 = { nounwind } +attributes #3 = { nounwind readnone } diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail11.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail11.ll new file mode 100644 index 000000000000000..fc5bb9a1b20b3de --- /dev/null +++ b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail11.ll @@ -0,0 +1,55 @@ +; Tests that instrumentation doesn't interfere with lowering (coro-split). +; It should convert coro.resume followed by a suspend to a musttail call. + +; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s + +target triple = "wasm32-unknown-unknown" + +define void @f() #0 { +entry: + %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) + %alloc = call ptr @malloc(i64 16) #3 + %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc) + + %save = call token @llvm.coro.save(ptr null) + %addr1 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0) + call fastcc void %addr1(ptr null) + + %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + switch i8 %suspend, label %exit [ + i8 0, label %await.ready + i8 1, label %exit + ] +await.ready: + %save2 = call token @llvm.coro.save(ptr null) + %addr2 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0) + call fastcc void %addr2(ptr null) + + %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) + switch i8 %suspend2, label %exit [ + i8 0, label %exit + i8 1, label %exit + ] +exit: + call i1 @llvm.coro.end(ptr null, i1 false, token none) + ret void +} + +; CHECK: musttail call + +declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1 +declare i1 @llvm.coro.alloc(token) #2 +declare i64 @llvm.coro.size.i64() #3 +declare ptr @llvm.coro.begin(token, ptr writeonly) #2 +declare token @llvm.coro.save(ptr) #2 +declare ptr @llvm.coro.frame() #3 +declare i8 @llvm.coro.suspend(token, i1) #2 +declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1 +declare i1 @llvm.coro.end(ptr, i1, token) #2 +declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1 +declare ptr @malloc(i64) + +attributes #0 = { presplitcoroutine "target-features"="+tail-call" } +attributes #1 = { argmemonly nounwind readonly } +attributes #2 = { nounwind } +attributes #3 = { nounwind readnone } diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail12.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail12.ll new file mode 100644 index 000000000000000..634d0106a2e6aea --- /dev/null +++ b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail12.ll @@ -0,0 +1,85 @@ +; Tests that instrumentation doesn't interfere with lowering (coro-split). +; It should convert coro.resume followed by a suspend to a musttail call. + +; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s + +declare void @fakeresume1(ptr) +declare void @print() + +define void @f(i1 %cond) #0 { +entry: + %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) + %alloc = call ptr @malloc(i64 16) #3 + %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc) + + %save = call token @llvm.coro.save(ptr null) + + %init_suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + switch i8 %init_suspend, label %coro.end [ + i8 0, label %await.ready + i8 1, label %coro.end + ] +await.ready: + %save2 = call token @llvm.coro.save(ptr null) + br i1 %cond, label %then, label %else + +then: + call fastcc void @fakeresume1(ptr align 8 null) + br label %merge + +else: + br label %merge + +merge: + %v0 = phi i1 [0, %then], [1, %else] + br label %compare + +compare: + %cond.cmp = icmp eq i1 %v0, 0 + br i1 %cond.cmp, label %ready, label %prepare + +prepare: + call void @print() + br label %ready + +ready: + %suspend = call i8 @llvm.coro.suspend(token %save2, i1 true) + %switch = icmp ult i8 %suspend, 2 + br i1 %switch, label %cleanup, label %coro.end + +cleanup: + %free.handle = call ptr @llvm.coro.free(token %id, ptr %vFrame) + %.not = icmp eq ptr %free.handle, null + br i1 %.not, label %coro.end, label %coro.free + +coro.free: + call void @delete(ptr nonnull %free.handle) #2 + br label %coro.end + +coro.end: + call i1 @llvm.coro.end(ptr null, i1 false, token none) + ret void +} + +; CHECK-LABEL: @f.resume( +; CHECK-NOT: } +; CHECK: call void @print() + + +declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1 +declare i1 @llvm.coro.alloc(token) #2 +declare i64 @llvm.coro.size.i64() #3 +declare ptr @llvm.coro.begin(token, ptr writeonly) #2 +declare token @llvm.coro.save(ptr) #2 +declare ptr @llvm.coro.frame() #3 +declare i8 @llvm.coro.suspend(token, i1) #2 +declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1 +declare i1 @llvm.coro.end(ptr, i1, token) #2 +declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1 +declare ptr @malloc(i64) +declare void @delete(ptr nonnull) #2 + +attributes #0 = { presplitcoroutine } +attributes #1 = { argmemonly nounwind readonly } +attributes #2 = { nounwind } +attributes #3 = { nounwind readnone } diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail13.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail13.ll new file mode 100644 index 000000000000000..2f9a14c90107195 --- /dev/null +++ b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail13.ll @@ -0,0 +1,76 @@ +; Tests that instrumentation doesn't interfere with lowering (coro-split). +; It should convert coro.resume followed by a suspend to a musttail call. + +; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s + +declare void @fakeresume1(ptr) +declare void @may_throw(ptr) +declare void @print() + +define void @f(i1 %cond) #0 personality i32 3 { +entry: + %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) + %alloc = call ptr @malloc(i64 16) #3 + %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc) + + %save = call token @llvm.coro.save(ptr null) + + %init_suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + switch i8 %init_suspend, label %coro.end [ + i8 0, label %await.ready + i8 1, label %coro.end + ] +await.ready: + call fastcc void @fakeresume1(ptr align 8 null) + invoke void @may_throw(ptr null) + to label %ready unwind label %lpad + +ready: + %save2 = call token @llvm.coro.save(ptr null) + %suspend = call i8 @llvm.coro.suspend(token %save2, i1 true) + %switch = icmp ult i8 %suspend, 2 + br i1 %switch, label %cleanup, label %coro.end + +cleanup: + %free.handle = call ptr @llvm.coro.free(token %id, ptr %vFrame) + %.not = icmp eq ptr %free.handle, null + br i1 %.not, label %coro.end, label %coro.free + +lpad: + %lpval = landingpad { ptr, i32 } + cleanup + + %need.resume = call i1 @llvm.coro.end(ptr null, i1 true, token none) + resume { ptr, i32 } %lpval + +coro.free: + call void @delete(ptr nonnull %free.handle) #2 + br label %coro.end + +coro.end: + call i1 @llvm.coro.end(ptr null, i1 false, token none) + ret void +} + +; CHECK-LABEL: @f.resume( +; CHECK-NOT: musttail call fastcc void @fakeresume1( +; CHECK: } + + +declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1 +declare i1 @llvm.coro.alloc(token) #2 +declare i64 @llvm.coro.size.i64() #3 +declare ptr @llvm.coro.begin(token, ptr writeonly) #2 +declare token @llvm.coro.save(ptr) #2 +declare ptr @llvm.coro.frame() #3 +declare i8 @llvm.coro.suspend(token, i1) #2 +declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1 +declare i1 @llvm.coro.end(ptr, i1, token) #2 +declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1 +declare ptr @malloc(i64) +declare void @delete(ptr nonnull) #2 + +attributes #0 = { presplitcoroutine } +attributes #1 = { argmemonly nounwind readonly } +attributes #2 = { nounwind } +attributes #3 = { nounwind readnone } diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail2.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail2.ll new file mode 100644 index 000000000000000..61b61a200e704d5 --- /dev/null +++ b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail2.ll @@ -0,0 +1,68 @@ +; Tests that instrumentation doesn't interfere with lowering (coro-split). +; It should convert coro.resume followed by a suspend to a musttail call. + +; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s + +define void @fakeresume1(ptr) { +entry: + ret void; +} + +define void @fakeresume2(ptr align 8) { +entry: + ret void; +} + +define void @g() #0 { +entry: + %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) + %alloc = call ptr @malloc(i64 16) #3 + %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc) + + %save = call token @llvm.coro.save(ptr null) + call fastcc void @fakeresume1(ptr null) + + %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + switch i8 %suspend, label %exit [ + i8 0, label %await.ready + i8 1, label %exit + ] +await.ready: + %save2 = call token @llvm.coro.save(ptr null) + call fastcc void @fakeresume2(ptr align 8 null) + + %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) + switch i8 %suspend2, label %exit [ + i8 0, label %exit + i8 1, label %exit + ] +exit: + call i1 @llvm.coro.end(ptr null, i1 false, token none) + ret void +} + +; Verify that in the initial function resume is not marked with musttail. +; CHECK-LABEL: @g( +; CHECK-NOT: musttail call fastcc void @fakeresume1(ptr null) + +; Verify that in the resume part resume call is marked with musttail. +; CHECK-LABEL: @g.resume( +; CHECK: musttail call fastcc void @fakeresume2(ptr align 8 null) +; CHECK-NEXT: ret void + +declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1 +declare i1 @llvm.coro.alloc(token) #2 +declare i64 @llvm.coro.size.i64() #3 +declare ptr @llvm.coro.begin(token, ptr writeonly) #2 +declare token @llvm.coro.save(ptr) #2 +declare ptr @llvm.coro.frame() #3 +declare i8 @llvm.coro.suspend(token, i1) #2 +declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1 +declare i1 @llvm.coro.end(ptr, i1, token) #2 +declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1 +declare ptr @malloc(i64) + +attributes #0 = { presplitcoroutine } +attributes #1 = { argmemonly nounwind readonly } +attributes #2 = { nounwind } +attributes #3 = { nounwind readnone } diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail3.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail3.ll new file mode 100644 index 000000000000000..82176b8085e6c7b --- /dev/null +++ b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail3.ll @@ -0,0 +1,91 @@ +; Tests that instrumentation doesn't interfere with lowering (coro-split). +; It should convert coro.resume followed by a suspend to a musttail call. + +; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s + +define void @f() #0 { +entry: + %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) + %alloc = call ptr @malloc(i64 16) #3 + %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc) + + %save = call token @llvm.coro.save(ptr null) + %addr1 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0) + call fastcc void %addr1(ptr null) + + %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + %cmp = icmp eq i8 %suspend, 0 + br i1 %cmp, label %await.suspend, label %exit +await.suspend: + %save2 = call token @llvm.coro.save(ptr null) + %br0 = call i8 @switch_result() + switch i8 %br0, label %unreach [ + i8 0, label %await.resume3 + i8 1, label %await.resume1 + i8 2, label %await.resume2 + ] +await.resume1: + %hdl = call ptr @g() + %addr2 = call ptr @llvm.coro.subfn.addr(ptr %hdl, i8 0) + call fastcc void %addr2(ptr %hdl) + br label %final.suspend +await.resume2: + %hdl2 = call ptr @h() + %addr3 = call ptr @llvm.coro.subfn.addr(ptr %hdl2, i8 0) + call fastcc void %addr3(ptr %hdl2) + br label %final.suspend +await.resume3: + %addr4 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0) + call fastcc void %addr4(ptr null) + br label %final.suspend +final.suspend: + %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) + %cmp2 = icmp eq i8 %suspend2, 0 + br i1 %cmp2, label %pre.exit, label %exit +pre.exit: + br label %exit +exit: + call i1 @llvm.coro.end(ptr null, i1 false, token none) + ret void +unreach: + unreachable +} + +; Verify that in the initial function resume is not marked with musttail. +; CHECK-LABEL: @f( +; CHECK: %[[addr1:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0) +; CHECK-NOT: musttail call fastcc void %[[addr1]](ptr null) + +; Verify that in the resume part resume call is marked with musttail. +; CHECK-LABEL: @f.resume( +; CHECK: %[[hdl:.+]] = call ptr @g() +; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl]], i8 0) +; CHECK: musttail call fastcc void %[[addr2]](ptr %[[hdl]]) +; CHECK-NEXT: ret void +; CHECK: %[[hdl2:.+]] = call ptr @h() +; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl2]], i8 0) +; CHECK: musttail call fastcc void %[[addr3]](ptr %[[hdl2]]) +; CHECK-NEXT: ret void +; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0) +; CHECK: musttail call fastcc void %[[addr4]](ptr null) +; CHECK-NEXT: ret void + +declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1 +declare i1 @llvm.coro.alloc(token) #2 +declare i64 @llvm.coro.size.i64() #3 +declare ptr @llvm.coro.begin(token, ptr writeonly) #2 +declare token @llvm.coro.save(ptr) #2 +declare ptr @llvm.coro.frame() #3 +declare i8 @llvm.coro.suspend(token, i1) #2 +declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1 +declare i1 @llvm.coro.end(ptr, i1, token) #2 +declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1 +declare ptr @malloc(i64) +declare i8 @switch_result() +declare ptr @g() +declare ptr @h() + +attributes #0 = { presplitcoroutine } +attributes #1 = { argmemonly nounwind readonly } +attributes #2 = { nounwind } +attributes #3 = { nounwind readnone } diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail4.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail4.ll new file mode 100644 index 000000000000000..be70fc4b51f1db4 --- /dev/null +++ b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail4.ll @@ -0,0 +1,66 @@ +; Tests that instrumentation doesn't interfere with lowering (coro-split). +; It should convert coro.resume followed by a suspend to a musttail call. + +; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s + +define void @fakeresume1(ptr) { +entry: + ret void; +} + +define void @f() #0 { +entry: + %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) + %alloc = call ptr @malloc(i64 16) #3 + %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc) + + %save = call token @llvm.coro.save(ptr null) + + %init_suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + switch i8 %init_suspend, label %coro.end [ + i8 0, label %await.ready + i8 1, label %coro.end + ] +await.ready: + %save2 = call token @llvm.coro.save(ptr null) + + call fastcc void @fakeresume1(ptr align 8 null) + %suspend = call i8 @llvm.coro.suspend(token %save2, i1 true) + %switch = icmp ult i8 %suspend, 2 + br i1 %switch, label %cleanup, label %coro.end + +cleanup: + %free.handle = call ptr @llvm.coro.free(token %id, ptr %vFrame) + %.not = icmp eq ptr %free.handle, null + br i1 %.not, label %coro.end, label %coro.free + +coro.free: + call void @delete(ptr nonnull %free.handle) #2 + br label %coro.end + +coro.end: + call i1 @llvm.coro.end(ptr null, i1 false, token none) + ret void +} + +; CHECK-LABEL: @f.resume( +; CHECK: musttail call fastcc void @fakeresume1( +; CHECK-NEXT: ret void + +declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1 +declare i1 @llvm.coro.alloc(token) #2 +declare i64 @llvm.coro.size.i64() #3 +declare ptr @llvm.coro.begin(token, ptr writeonly) #2 +declare token @llvm.coro.save(ptr) #2 +declare ptr @llvm.coro.frame() #3 +declare i8 @llvm.coro.suspend(token, i1) #2 +declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1 +declare i1 @llvm.coro.end(ptr, i1, token) #2 +declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1 +declare ptr @malloc(i64) +declare void @delete(ptr nonnull) #2 + +attributes #0 = { presplitcoroutine } +attributes #1 = { argmemonly nounwind readonly } +attributes #2 = { nounwind } +attributes #3 = { nounwind readnone } diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail5.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail5.ll new file mode 100644 index 000000000000000..3e5bddd8e13112d --- /dev/null +++ b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail5.ll @@ -0,0 +1,63 @@ +; Tests that instrumentation doesn't interfere with lowering (coro-split). +; It should convert coro.resume followed by a suspend to a musttail call. + +; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s + +declare void @fakeresume1(ptr align 8) + +define void @g() #0 { +entry: + %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) + %alloc = call ptr @malloc(i64 16) #3 + %alloc.var = alloca i8 + call void @llvm.lifetime.start.p0(i64 1, ptr %alloc.var) + %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc) + + %save = call token @llvm.coro.save(ptr null) + %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + + switch i8 %suspend, label %exit [ + i8 0, label %await.suspend + i8 1, label %exit + ] +await.suspend: + %save2 = call token @llvm.coro.save(ptr null) + call fastcc void @fakeresume1(ptr align 8 null) + %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) + switch i8 %suspend2, label %exit [ + i8 0, label %await.ready + i8 1, label %exit + ] +await.ready: + call void @consume(ptr %alloc.var) + call void @llvm.lifetime.end.p0(i64 1, ptr %alloc.var) + br label %exit +exit: + call i1 @llvm.coro.end(ptr null, i1 false, token none) + ret void +} + +; Verify that in the resume part resume call is marked with musttail. +; CHECK-LABEL: @g.resume( +; CHECK: musttail call fastcc void @fakeresume1(ptr align 8 null) +; CHECK-NEXT: ret void + +declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1 +declare i1 @llvm.coro.alloc(token) #2 +declare i64 @llvm.coro.size.i64() #3 +declare ptr @llvm.coro.begin(token, ptr writeonly) #2 +declare token @llvm.coro.save(ptr) #2 +declare ptr @llvm.coro.frame() #3 +declare i8 @llvm.coro.suspend(token, i1) #2 +declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1 +declare i1 @llvm.coro.end(ptr, i1, token) #2 +declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1 +declare ptr @malloc(i64) +declare void @consume(ptr) +declare void @llvm.lifetime.start.p0(i64, ptr nocapture) +declare void @llvm.lifetime.end.p0(i64, ptr nocapture) + +attributes #0 = { presplitcoroutine } +attributes #1 = { argmemonly nounwind readonly } +attributes #2 = { nounwind } +attributes #3 = { nounwind readnone } diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail6.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail6.ll new file mode 100644 index 000000000000000..5d068872fcace07 --- /dev/null +++ b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail6.ll @@ -0,0 +1,113 @@ +; Tests that instrumentation doesn't interfere with lowering (coro-split). +; It should convert coro.resume followed by a suspend to a musttail call. + +; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s + +declare void @fakeresume1(ptr align 8) + +define void @g() #0 { +entry: + %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) + %alloc = call ptr @malloc(i64 16) #3 + %alloc.var = alloca i64 + call void @llvm.lifetime.start.p0(i64 1, ptr %alloc.var) + %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc) + + %save = call token @llvm.coro.save(ptr null) + %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + + switch i8 %suspend, label %exit [ + i8 0, label %await.suspend + i8 1, label %exit + ] +await.suspend: + %save2 = call token @llvm.coro.save(ptr null) + call fastcc void @fakeresume1(ptr align 8 null) + %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) + switch i8 %suspend2, label %exit [ + i8 0, label %await.ready + i8 1, label %exit + ] +await.ready: + call void @consume(ptr %alloc.var) + call void @llvm.lifetime.end.p0(i64 1, ptr %alloc.var) + br label %exit +exit: + call i1 @llvm.coro.end(ptr null, i1 false, token none) + ret void +} + +; Verify that in the resume part resume call is marked with musttail. +; CHECK-LABEL: @g.resume( +; CHECK: musttail call fastcc void @fakeresume1(ptr align 8 null) +; CHECK-NEXT: ret void + +; It has a cleanup bb. +define void @f() #0 { +entry: + %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) + %alloc = call ptr @malloc(i64 16) #3 + %alloc.var = alloca i64 + call void @llvm.lifetime.start.p0(i64 1, ptr %alloc.var) + %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc) + + %save = call token @llvm.coro.save(ptr null) + %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + + switch i8 %suspend, label %exit [ + i8 0, label %await.suspend + i8 1, label %exit + ] +await.suspend: + %save2 = call token @llvm.coro.save(ptr null) + call fastcc void @fakeresume1(ptr align 8 null) + %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) + switch i8 %suspend2, label %exit [ + i8 0, label %await.ready + i8 1, label %cleanup + ] +await.ready: + call void @consume(ptr %alloc.var) + call void @llvm.lifetime.end.p0(i64 1, ptr %alloc.var) + br label %exit + +cleanup: + %free.handle = call ptr @llvm.coro.free(token %id, ptr %vFrame) + %.not = icmp eq ptr %free.handle, null + br i1 %.not, label %exit, label %coro.free + +coro.free: + call void @delete(ptr nonnull %free.handle) #2 + br label %exit + +exit: + call i1 @llvm.coro.end(ptr null, i1 false, token none) + ret void +} + +; FIXME: The fakeresume1 here should be marked as musttail. +; Verify that in the resume part resume call is marked with musttail. +; CHECK-LABEL: @f.resume( +; CHECK: musttail call fastcc void @fakeresume1(ptr align 8 null) +; CHECK-NEXT: ret void + +declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1 +declare i1 @llvm.coro.alloc(token) #2 +declare i64 @llvm.coro.size.i64() #3 +declare ptr @llvm.coro.begin(token, ptr writeonly) #2 +declare token @llvm.coro.save(ptr) #2 +declare ptr @llvm.coro.frame() #3 +declare i8 @llvm.coro.suspend(token, i1) #2 +declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1 +declare i1 @llvm.coro.end(ptr, i1, token) #2 +declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1 +declare ptr @malloc(i64) +declare void @delete(ptr nonnull) #2 +declare void @consume(ptr) +declare void @llvm.lifetime.start.p0(i64, ptr nocapture) +declare void @llvm.lifetime.end.p0(i64, ptr nocapture) + +attributes #0 = { presplitcoroutine } +attributes #1 = { argmemonly nounwind readonly } +attributes #2 = { nounwind } +attributes #3 = { nounwind readnone } diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail7.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail7.ll new file mode 100644 index 000000000000000..6ea81c6ff0b0961 --- /dev/null +++ b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail7.ll @@ -0,0 +1,116 @@ +; Tests that instrumentation doesn't interfere with lowering (coro-split). +; It should convert coro.resume followed by a suspend to a musttail call. + +; The difference between this and coro-split-musttail5.ll and coro-split-musttail5.ll +; is that this contains dead instruction generated during the transformation, +; which makes the optimization harder. +; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s + +declare void @fakeresume1(ptr align 8) + +define void @g() #0 { +entry: + %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) + %alloc = call ptr @malloc(i64 16) #3 + %alloc.var = alloca i64 + call void @llvm.lifetime.start.p0(i64 1, ptr %alloc.var) + %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc) + + %save = call token @llvm.coro.save(ptr null) + %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + + switch i8 %suspend, label %exit [ + i8 0, label %await.suspend + i8 1, label %exit + ] +await.suspend: + %save2 = call token @llvm.coro.save(ptr null) + call fastcc void @fakeresume1(ptr align 8 null) + %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) + switch i8 %suspend2, label %exit [ + i8 0, label %await.ready + i8 1, label %exit + ] +await.ready: + call void @consume(ptr %alloc.var) + call void @llvm.lifetime.end.p0(i64 1, ptr %alloc.var) + br label %exit +exit: + call i1 @llvm.coro.end(ptr null, i1 false, token none) + ret void +} + +; Verify that in the resume part resume call is marked with musttail. +; CHECK-LABEL: @g.resume( +; CHECK: musttail call fastcc void @fakeresume1(ptr align 8 null) +; CHECK-NEXT: ret void + +; It has a cleanup bb. +define void @f() #0 { +entry: + %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) + %alloc = call ptr @malloc(i64 16) #3 + %alloc.var = alloca i64 + call void @llvm.lifetime.start.p0(i64 1, ptr %alloc.var) + %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc) + + %save = call token @llvm.coro.save(ptr null) + %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + + switch i8 %suspend, label %exit [ + i8 0, label %await.suspend + i8 1, label %exit + ] +await.suspend: + %save2 = call token @llvm.coro.save(ptr null) + call fastcc void @fakeresume1(ptr align 8 null) + %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) + switch i8 %suspend2, label %exit [ + i8 0, label %await.ready + i8 1, label %cleanup + ] +await.ready: + call void @consume(ptr %alloc.var) + call void @llvm.lifetime.end.p0(i64 1, ptr %alloc.var) + br label %exit + +cleanup: + %free.handle = call ptr @llvm.coro.free(token %id, ptr %vFrame) + %.not = icmp eq ptr %free.handle, null + br i1 %.not, label %exit, label %coro.free + +coro.free: + call void @delete(ptr nonnull %free.handle) #2 + br label %exit + +exit: + call i1 @llvm.coro.end(ptr null, i1 false, token none) + ret void +} + +; FIXME: The fakeresume1 here should be marked as musttail. +; Verify that in the resume part resume call is marked with musttail. +; CHECK-LABEL: @f.resume( +; CHECK: musttail call fastcc void @fakeresume1(ptr align 8 null) +; CHECK-NEXT: ret void + +declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1 +declare i1 @llvm.coro.alloc(token) #2 +declare i64 @llvm.coro.size.i64() #3 +declare ptr @llvm.coro.begin(token, ptr writeonly) #2 +declare token @llvm.coro.save(ptr) #2 +declare ptr @llvm.coro.frame() #3 +declare i8 @llvm.coro.suspend(token, i1) #2 +declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1 +declare i1 @llvm.coro.end(ptr, i1, token) #2 +declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1 +declare ptr @malloc(i64) +declare void @delete(ptr nonnull) #2 +declare void @consume(ptr) +declare void @llvm.lifetime.start.p0(i64, ptr nocapture) +declare void @llvm.lifetime.end.p0(i64, ptr nocapture) + +attributes #0 = { presplitcoroutine } +attributes #1 = { argmemonly nounwind readonly } +attributes #2 = { nounwind } +attributes #3 = { nounwind readnone } >From 713bbb79f22e869de8d3d36571fc141429617f08 Mon Sep 17 00:00:00 2001 From: Mircea Trofin <mtro...@google.com> Date: Wed, 15 Nov 2023 10:13:32 -0800 Subject: [PATCH 4/4] No need to weigh the problematic edge if it's removed. --- llvm/include/llvm/Transforms/Instrumentation/CFGMST.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h b/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h index 33d4019eed9b139..1bd4976e07abbb4 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h +++ b/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h @@ -122,11 +122,10 @@ template <class Edge, class BBInfo> class CFGMST { if (auto *SWInst = dyn_cast<SwitchInst>(TI)) if (auto *Intrinsic = dyn_cast<IntrinsicInst>(SWInst->getCondition())) if (Intrinsic->getIntrinsicID() == Intrinsic::coro_suspend && - SWInst->getDefaultDest() == EdgeTarget) { - E->Weight = UINT64_MAX; + SWInst->getDefaultDest() == EdgeTarget) E->Removed = true; - } } + // Traverse the CFG using a stack. Find all the edges and assign the weight. // Edges with large weight will be put into MST first so they are less likely // to be instrumented. _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits