[llvm-branch-commits] [llvm] release/20.x: [ValueTracking] Skip incoming values that are the same as the phi in `isGuaranteedNotToBeUndefOrPoison` (#130111) (PR #130474)

2025-03-09 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-analysis

Author: None (llvmbot)


Changes

Backport 462eb7e28ef4507b16a4b45efb356bc6a3523615

Requested by: @nikic

---
Full diff: https://github.com/llvm/llvm-project/pull/130474.diff


2 Files Affected:

- (modified) llvm/lib/Analysis/ValueTracking.cpp (+2) 
- (added) llvm/test/Analysis/ValueTracking/phi-self.ll (+89) 


``diff
diff --git a/llvm/lib/Analysis/ValueTracking.cpp 
b/llvm/lib/Analysis/ValueTracking.cpp
index 8a674914641a8..43013294e2b4c 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -7776,6 +7776,8 @@ static bool isGuaranteedNotToBeUndefOrPoison(
   unsigned Num = PN->getNumIncomingValues();
   bool IsWellDefined = true;
   for (unsigned i = 0; i < Num; ++i) {
+if (PN == PN->getIncomingValue(i))
+  continue;
 auto *TI = PN->getIncomingBlock(i)->getTerminator();
 if (!isGuaranteedNotToBeUndefOrPoison(PN->getIncomingValue(i), AC, TI,
   DT, Depth + 1, Kind)) {
diff --git a/llvm/test/Analysis/ValueTracking/phi-self.ll 
b/llvm/test/Analysis/ValueTracking/phi-self.ll
new file mode 100644
index 0..17afd872cab03
--- /dev/null
+++ b/llvm/test/Analysis/ValueTracking/phi-self.ll
@@ -0,0 +1,89 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --version 5
+; RUN: opt -S -passes=instsimplify < %s | FileCheck %s
+
+; Test `%r` can be replaced by `%nonpoison`.
+
+define i32 @other_noundef(i32 noundef %arg) {
+; CHECK-LABEL: define i32 @other_noundef(
+; CHECK-SAME: i32 noundef [[ARG:%.*]]) {
+; CHECK-NEXT:  [[START:.*]]:
+; CHECK-NEXT:br label %[[LOOP:.*]]
+; CHECK:   [[LOOP]]:
+; CHECK-NEXT:[[NONPOISON:%.*]] = phi i32 [ 0, %[[START]] ], [ 
[[NONPOISON]], %[[BB0:.*]] ], [ [[ARG]], %[[BB1:.*]] ]
+; CHECK-NEXT:[[I:%.*]] = call i32 @opaque()
+; CHECK-NEXT:switch i32 [[I]], label %[[EXIT:.*]] [
+; CHECK-NEXT:  i32 0, label %[[BB0]]
+; CHECK-NEXT:  i32 1, label %[[BB1]]
+; CHECK-NEXT:]
+; CHECK:   [[EXIT]]:
+; CHECK-NEXT:ret i32 [[NONPOISON]]
+; CHECK:   [[BB0]]:
+; CHECK-NEXT:br label %[[LOOP]]
+; CHECK:   [[BB1]]:
+; CHECK-NEXT:br label %[[LOOP]]
+;
+start:
+  br label %loop
+
+loop:
+  %nonpoison = phi i32 [ 0, %start ], [ %nonpoison, %bb0 ], [ %arg, %bb1 ]
+  %i = call i32 @opaque()
+  switch i32 %i, label %exit [
+  i32 0, label %bb0
+  i32 1, label %bb1
+  ]
+
+exit:
+  %r = freeze i32 %nonpoison
+  ret i32 %r
+
+bb0:
+  br label %loop
+
+bb1:
+  br label %loop
+}
+
+define i32 @other_poison(i32 %arg) {
+; CHECK-LABEL: define i32 @other_poison(
+; CHECK-SAME: i32 [[ARG:%.*]]) {
+; CHECK-NEXT:  [[START:.*]]:
+; CHECK-NEXT:br label %[[LOOP:.*]]
+; CHECK:   [[LOOP]]:
+; CHECK-NEXT:[[MAYPOISON:%.*]] = phi i32 [ 0, %[[START]] ], [ 
[[MAYPOISON]], %[[BB0:.*]] ], [ [[ARG]], %[[BB1:.*]] ]
+; CHECK-NEXT:[[I:%.*]] = call i32 @opaque()
+; CHECK-NEXT:switch i32 [[I]], label %[[EXIT:.*]] [
+; CHECK-NEXT:  i32 0, label %[[BB0]]
+; CHECK-NEXT:  i32 1, label %[[BB1]]
+; CHECK-NEXT:]
+; CHECK:   [[EXIT]]:
+; CHECK-NEXT:[[R:%.*]] = freeze i32 [[MAYPOISON]]
+; CHECK-NEXT:ret i32 [[R]]
+; CHECK:   [[BB0]]:
+; CHECK-NEXT:br label %[[LOOP]]
+; CHECK:   [[BB1]]:
+; CHECK-NEXT:br label %[[LOOP]]
+;
+start:
+  br label %loop
+
+loop:
+  %maypoison = phi i32 [ 0, %start ], [ %maypoison, %bb0 ], [ %arg, %bb1 ]
+  %i = call i32 @opaque()
+  switch i32 %i, label %exit [
+  i32 0, label %bb0
+  i32 1, label %bb1
+  ]
+
+exit:
+  %r = freeze i32 %maypoison
+  ret i32 %r
+
+bb0:
+  br label %loop
+
+bb1:
+  br label %loop
+}
+
+declare i32 @opaque()

``




https://github.com/llvm/llvm-project/pull/130474
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [ValueTracking] Skip incoming values that are the same as the phi in `isGuaranteedNotToBeUndefOrPoison` (#130111) (PR #130474)

2025-03-09 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/130474

Backport 462eb7e28ef4507b16a4b45efb356bc6a3523615

Requested by: @nikic

>From 92c994999ad85f0d115d7abe30b1ab3f6d79552d Mon Sep 17 00:00:00 2001
From: DianQK 
Date: Fri, 7 Mar 2025 05:46:32 +0800
Subject: [PATCH] [ValueTracking] Skip incoming values that are the same as the
 phi in `isGuaranteedNotToBeUndefOrPoison` (#130111)

Fixes (keep it open) #130110.

If the incoming value is PHI itself, we can skip this. If we can
guarantee that the other incoming values are neither undef nor poison,
then we can also guarantee that the value isn't either. If we cannot
guarantee that, it makes no sense in calculating it.

(cherry picked from commit 462eb7e28ef4507b16a4b45efb356bc6a3523615)
---
 llvm/lib/Analysis/ValueTracking.cpp  |  2 +
 llvm/test/Analysis/ValueTracking/phi-self.ll | 89 
 2 files changed, 91 insertions(+)
 create mode 100644 llvm/test/Analysis/ValueTracking/phi-self.ll

diff --git a/llvm/lib/Analysis/ValueTracking.cpp 
b/llvm/lib/Analysis/ValueTracking.cpp
index 8a674914641a8..43013294e2b4c 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -7776,6 +7776,8 @@ static bool isGuaranteedNotToBeUndefOrPoison(
   unsigned Num = PN->getNumIncomingValues();
   bool IsWellDefined = true;
   for (unsigned i = 0; i < Num; ++i) {
+if (PN == PN->getIncomingValue(i))
+  continue;
 auto *TI = PN->getIncomingBlock(i)->getTerminator();
 if (!isGuaranteedNotToBeUndefOrPoison(PN->getIncomingValue(i), AC, TI,
   DT, Depth + 1, Kind)) {
diff --git a/llvm/test/Analysis/ValueTracking/phi-self.ll 
b/llvm/test/Analysis/ValueTracking/phi-self.ll
new file mode 100644
index 0..17afd872cab03
--- /dev/null
+++ b/llvm/test/Analysis/ValueTracking/phi-self.ll
@@ -0,0 +1,89 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --version 5
+; RUN: opt -S -passes=instsimplify < %s | FileCheck %s
+
+; Test `%r` can be replaced by `%nonpoison`.
+
+define i32 @other_noundef(i32 noundef %arg) {
+; CHECK-LABEL: define i32 @other_noundef(
+; CHECK-SAME: i32 noundef [[ARG:%.*]]) {
+; CHECK-NEXT:  [[START:.*]]:
+; CHECK-NEXT:br label %[[LOOP:.*]]
+; CHECK:   [[LOOP]]:
+; CHECK-NEXT:[[NONPOISON:%.*]] = phi i32 [ 0, %[[START]] ], [ 
[[NONPOISON]], %[[BB0:.*]] ], [ [[ARG]], %[[BB1:.*]] ]
+; CHECK-NEXT:[[I:%.*]] = call i32 @opaque()
+; CHECK-NEXT:switch i32 [[I]], label %[[EXIT:.*]] [
+; CHECK-NEXT:  i32 0, label %[[BB0]]
+; CHECK-NEXT:  i32 1, label %[[BB1]]
+; CHECK-NEXT:]
+; CHECK:   [[EXIT]]:
+; CHECK-NEXT:ret i32 [[NONPOISON]]
+; CHECK:   [[BB0]]:
+; CHECK-NEXT:br label %[[LOOP]]
+; CHECK:   [[BB1]]:
+; CHECK-NEXT:br label %[[LOOP]]
+;
+start:
+  br label %loop
+
+loop:
+  %nonpoison = phi i32 [ 0, %start ], [ %nonpoison, %bb0 ], [ %arg, %bb1 ]
+  %i = call i32 @opaque()
+  switch i32 %i, label %exit [
+  i32 0, label %bb0
+  i32 1, label %bb1
+  ]
+
+exit:
+  %r = freeze i32 %nonpoison
+  ret i32 %r
+
+bb0:
+  br label %loop
+
+bb1:
+  br label %loop
+}
+
+define i32 @other_poison(i32 %arg) {
+; CHECK-LABEL: define i32 @other_poison(
+; CHECK-SAME: i32 [[ARG:%.*]]) {
+; CHECK-NEXT:  [[START:.*]]:
+; CHECK-NEXT:br label %[[LOOP:.*]]
+; CHECK:   [[LOOP]]:
+; CHECK-NEXT:[[MAYPOISON:%.*]] = phi i32 [ 0, %[[START]] ], [ 
[[MAYPOISON]], %[[BB0:.*]] ], [ [[ARG]], %[[BB1:.*]] ]
+; CHECK-NEXT:[[I:%.*]] = call i32 @opaque()
+; CHECK-NEXT:switch i32 [[I]], label %[[EXIT:.*]] [
+; CHECK-NEXT:  i32 0, label %[[BB0]]
+; CHECK-NEXT:  i32 1, label %[[BB1]]
+; CHECK-NEXT:]
+; CHECK:   [[EXIT]]:
+; CHECK-NEXT:[[R:%.*]] = freeze i32 [[MAYPOISON]]
+; CHECK-NEXT:ret i32 [[R]]
+; CHECK:   [[BB0]]:
+; CHECK-NEXT:br label %[[LOOP]]
+; CHECK:   [[BB1]]:
+; CHECK-NEXT:br label %[[LOOP]]
+;
+start:
+  br label %loop
+
+loop:
+  %maypoison = phi i32 [ 0, %start ], [ %maypoison, %bb0 ], [ %arg, %bb1 ]
+  %i = call i32 @opaque()
+  switch i32 %i, label %exit [
+  i32 0, label %bb0
+  i32 1, label %bb1
+  ]
+
+exit:
+  %r = freeze i32 %maypoison
+  ret i32 %r
+
+bb0:
+  br label %loop
+
+bb1:
+  br label %loop
+}
+
+declare i32 @opaque()

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [ValueTracking] Skip incoming values that are the same as the phi in `isGuaranteedNotToBeUndefOrPoison` (#130111) (PR #130474)

2025-03-09 Thread via llvm-branch-commits

llvmbot wrote:

@nikic What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/130474
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [ValueTracking] Skip incoming values that are the same as the phi in `isGuaranteedNotToBeUndefOrPoison` (#130111) (PR #130474)

2025-03-09 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/130474
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Coro] Use CloneFunctionInto for coroutine cloning instead of CloneFunction (PR #129149)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/129149
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Replace IdentityMD set with a predicate in ValueMapper (PR #129147)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/129147
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Clean up comments in CloneFunctionInto (PR #129153)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/129153
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Remove now unused CollectDebugInfoForCloning (PR #129152)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/129152
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Coro] Remove now unused CommonDebugInfo in CoroSplit (PR #129150)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/129150
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Remove now unused FindDebugInfoToIdentityMap (PR #129151)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/129151
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Add a helper to collect debug info from instructions (PR #129145)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/129145
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Make DifferentModule case more obvious in CollectDebugInfoForCloning (PR #129146)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/129146
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Move DebugInfoFinder decl closer to its place of usage (PR #129154)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/129154
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Move DebugInfoFinder decl closer to its place of usage (PR #129154)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/129154
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Simplify the flow in FindDebugInfoToIdentityMap (PR #129144)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/129144
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Remove now unused CollectDebugInfoForCloning (PR #129152)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/129152
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Clean up comments in CloneFunctionInto (PR #129153)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/129153
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Replace DIFinder usage in CloneFunctionInto with a MetadataPredicate (PR #129148)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/129148
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Make DifferentModule case more obvious in CollectDebugInfoForCloning (PR #129146)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/129146
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Coro] Use CloneFunctionInto for coroutine cloning instead of CloneFunction (PR #129149)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/129149
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Coro] Remove now unused CommonDebugInfo in CoroSplit (PR #129150)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/129150
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Add a helper to collect debug info from instructions (PR #129145)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/129145
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Replace IdentityMD set with a predicate in ValueMapper (PR #129147)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/129147
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Simplify the flow in FindDebugInfoToIdentityMap (PR #129144)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/129144
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Remove now unused FindDebugInfoToIdentityMap (PR #129151)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/129151
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Replace DIFinder usage in CloneFunctionInto with a MetadataPredicate (PR #129148)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/129148
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang-tools-extra] [clang-tidy] support to detect conversion in `make_optional` for `bugprone-optional-value-conversion` (PR #130417)

2025-03-09 Thread via llvm-branch-commits


@@ -115,6 +115,10 @@ Changes in existing checks
   no longer be needed and will be removed. Also fixing false positive from 
   const reference accessors to objects containing optional member.
 
+- Improved :doc:`bugprone-optional-value-conversion

EugeneZelenko wrote:

Should be first entry in this list.

https://github.com/llvm/llvm-project/pull/130417
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Replace IdentityMD set with a predicate in ValueMapper (PR #129147)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/129147

>From f847bc64ce6a61dc5b9ff04b54accc9b2dac278d Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Tue, 25 Feb 2025 10:53:57 -0800
Subject: [PATCH] [NFC][Cloning] Replace IdentityMD set with a predicate in
 ValueMapper

Summary:
We used the set only to check if it contains certain metadata nodes.
Replacing the set with a predicate makes the intention clearer and the
API more general.

Test Plan:
ninja check-all

stack-info: PR: https://github.com/llvm/llvm-project/pull/129147, branch: 
users/artempyanykh/fast-coro-upstream-part2-take2/5
---
 llvm/include/llvm/Transforms/Utils/Cloning.h  |  4 +--
 .../llvm/Transforms/Utils/ValueMapper.h   | 27 ++-
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp  |  7 +++--
 llvm/lib/Transforms/Utils/CloneFunction.cpp   | 10 ---
 llvm/lib/Transforms/Utils/ValueMapper.cpp | 15 +--
 5 files changed, 34 insertions(+), 29 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h 
b/llvm/include/llvm/Transforms/Utils/Cloning.h
index d36f91416db88..2252dda0b9aad 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -194,7 +194,7 @@ void CloneFunctionMetadataInto(Function &NewFunc, const 
Function &OldFunc,
ValueToValueMapTy &VMap, RemapFlags RemapFlag,
ValueMapTypeRemapper *TypeMapper = nullptr,
ValueMaterializer *Materializer = nullptr,
-   const MetadataSetTy *IdentityMD = nullptr);
+   const MetadataPredicate *IdentityMD = nullptr);
 
 /// Clone OldFunc's body into NewFunc.
 void CloneFunctionBodyInto(Function &NewFunc, const Function &OldFunc,
@@ -204,7 +204,7 @@ void CloneFunctionBodyInto(Function &NewFunc, const 
Function &OldFunc,
ClonedCodeInfo *CodeInfo = nullptr,
ValueMapTypeRemapper *TypeMapper = nullptr,
ValueMaterializer *Materializer = nullptr,
-   const MetadataSetTy *IdentityMD = nullptr);
+   const MetadataPredicate *IdentityMD = nullptr);
 
 void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
const Instruction *StartingInst,
diff --git a/llvm/include/llvm/Transforms/Utils/ValueMapper.h 
b/llvm/include/llvm/Transforms/Utils/ValueMapper.h
index 852d7095d1133..560df1d3f7f29 100644
--- a/llvm/include/llvm/Transforms/Utils/ValueMapper.h
+++ b/llvm/include/llvm/Transforms/Utils/ValueMapper.h
@@ -37,6 +37,7 @@ class Value;
 using ValueToValueMapTy = ValueMap;
 using DbgRecordIterator = simple_ilist::iterator;
 using MetadataSetTy = SmallPtrSet;
+using MetadataPredicate = std::function;
 
 /// This is a class that can be implemented by clients to remap types when
 /// cloning constants and instructions.
@@ -138,8 +139,8 @@ inline RemapFlags operator|(RemapFlags LHS, RemapFlags RHS) 
{
 /// alternate \a ValueToValueMapTy and \a ValueMaterializer and returns a ID to
 /// pass into the schedule*() functions.
 ///
-/// If an \a IdentityMD set is optionally provided, \a Metadata inside this set
-/// will be mapped onto itself in \a VM on first use.
+/// If an \a IdentityMD predicate is optionally provided, \a Metadata for which
+/// the predicate returns true will be mapped onto itself in \a VM on first 
use.
 ///
 /// TODO: lib/Linker really doesn't need the \a ValueHandle in the \a
 /// ValueToValueMapTy.  We should template \a ValueMapper (and its
@@ -158,7 +159,7 @@ class ValueMapper {
   ValueMapper(ValueToValueMapTy &VM, RemapFlags Flags = RF_None,
   ValueMapTypeRemapper *TypeMapper = nullptr,
   ValueMaterializer *Materializer = nullptr,
-  const MetadataSetTy *IdentityMD = nullptr);
+  const MetadataPredicate *IdentityMD = nullptr);
   ValueMapper(ValueMapper &&) = delete;
   ValueMapper(const ValueMapper &) = delete;
   ValueMapper &operator=(ValueMapper &&) = delete;
@@ -225,7 +226,7 @@ inline Value *MapValue(const Value *V, ValueToValueMapTy 
&VM,
RemapFlags Flags = RF_None,
ValueMapTypeRemapper *TypeMapper = nullptr,
ValueMaterializer *Materializer = nullptr,
-   const MetadataSetTy *IdentityMD = nullptr) {
+   const MetadataPredicate *IdentityMD = nullptr) {
   return ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
   .mapValue(*V);
 }
@@ -239,8 +240,8 @@ inline Value *MapValue(const Value *V, ValueToValueMapTy 
&VM,
 /// \c MD.
 ///  3. Else if \c MD is a \a ConstantAsMetadata, call \a MapValue() and
 /// re-wrap its return (returning nullptr on nullptr).
-///  4. Else if \c MD is in \c IdentityMD then add an identity mapping for it
-/// and return 

[llvm-branch-commits] [llvm] [NFC][Coro] Use CloneFunctionInto for coroutine cloning instead of CloneFunction (PR #129149)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/129149

>From e09a38c4b0eae5f28b70aecf5197607ecdd87220 Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Tue, 25 Feb 2025 12:42:14 -0800
Subject: [PATCH] [NFC][Coro] Use CloneFunctionInto for coroutine cloning
 instead of CloneFunction

Summary:
CloneFunctionInto now is fast on its own and we don't need to use
CloneFunctionAttributes/Metadata/Body separately.

CommonDebugInfo in CoroClone is now unused and is cleaned up separately
in the next diff in the stack.

Test Plan:
ninja check-all

stack-info: PR: https://github.com/llvm/llvm-project/pull/129149, branch: 
users/artempyanykh/fast-coro-upstream-part2-take2/7
---
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 10 ++
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp 
b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index b2c4e64319725..fabbf5f020a74 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -921,14 +921,8 @@ void coro::BaseCloner::create() {
   auto savedLinkage = NewF->getLinkage();
   NewF->setLinkage(llvm::GlobalValue::ExternalLinkage);
 
-  MetadataPredicate IdentityMD = [&](const Metadata *MD) {
-return CommonDebugInfo.contains(MD);
-  };
-  CloneFunctionAttributesInto(NewF, &OrigF, VMap, false);
-  CloneFunctionMetadataInto(*NewF, OrigF, VMap, RF_None, nullptr, nullptr,
-&IdentityMD);
-  CloneFunctionBodyInto(*NewF, OrigF, VMap, RF_None, Returns, "", nullptr,
-nullptr, nullptr, &IdentityMD);
+  CloneFunctionInto(NewF, &OrigF, VMap,
+CloneFunctionChangeType::LocalChangesOnly, Returns);
 
   auto &Context = NewF->getContext();
 

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Remove now unused CollectDebugInfoForCloning (PR #129152)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/129152

>From 86a30c82f7620340a0386311dec81f91f48475ae Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Tue, 25 Feb 2025 13:02:37 -0800
Subject: [PATCH] [NFC][Cloning] Remove now unused CollectDebugInfoForCloning

Summary:
This function is no longer used, let's remove it from the header and
impl.

Test Plan:
ninja check-llvm-unit

stack-info: PR: https://github.com/llvm/llvm-project/pull/129152, branch: 
users/artempyanykh/fast-coro-upstream-part2-take2/10
---
 llvm/include/llvm/Transforms/Utils/Cloning.h | 14 -
 llvm/lib/Transforms/Utils/CloneFunction.cpp  | 21 
 2 files changed, 35 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h 
b/llvm/include/llvm/Transforms/Utils/Cloning.h
index ae00c16e7eada..ec1a1d5faa7e9 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -230,20 +230,6 @@ void CloneAndPruneFunctionInto(Function *NewFunc, const 
Function *OldFunc,
const char *NameSuffix = "",
ClonedCodeInfo *CodeInfo = nullptr);
 
-/// Collect debug information such as types, compile units, and other
-/// subprograms that are reachable from \p F and can be considered global for
-/// the purposes of cloning (and hence not needing to be cloned).
-///
-/// What debug information should be processed depends on \p Changes: when
-/// cloning into the same module we process \p F's subprogram and instructions;
-/// when into a cloned module, neither of those.
-///
-/// Returns DISubprogram of the cloned function when cloning into the same
-/// module or nullptr otherwise.
-DISubprogram *CollectDebugInfoForCloning(const Function &F,
- CloneFunctionChangeType Changes,
- DebugInfoFinder &DIFinder);
-
 /// This class captures the data input to the InlineFunction call, and records
 /// the auxiliary results produced by it.
 class InlineFunctionInfo {
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp 
b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 11033aeec7dda..f32d9454eb076 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -168,27 +168,6 @@ void llvm::CloneFunctionAttributesInto(Function *NewFunc,
  OldAttrs.getRetAttrs(), NewArgAttrs));
 }
 
-DISubprogram *llvm::CollectDebugInfoForCloning(const Function &F,
-   CloneFunctionChangeType Changes,
-   DebugInfoFinder &DIFinder) {
-  // CloneModule takes care of cloning debug info for ClonedModule. Cloning 
into
-  // DifferentModule is taken care of separately in ClonedFunctionInto as part
-  // of llvm.dbg.cu update.
-  if (Changes >= CloneFunctionChangeType::DifferentModule)
-return nullptr;
-
-  DISubprogram *SPClonedWithinModule = nullptr;
-  if (Changes < CloneFunctionChangeType::DifferentModule) {
-SPClonedWithinModule = F.getSubprogram();
-  }
-  if (SPClonedWithinModule)
-DIFinder.processSubprogram(SPClonedWithinModule);
-
-  collectDebugInfoFromInstructions(F, DIFinder);
-
-  return SPClonedWithinModule;
-}
-
 void llvm::CloneFunctionMetadataInto(Function &NewFunc, const Function 
&OldFunc,
  ValueToValueMapTy &VMap,
  RemapFlags RemapFlag,

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Remove now unused FindDebugInfoToIdentityMap (PR #129151)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/129151

>From 7c2cbfc200504b8b827a2b3f4a9b8cac99e7165c Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Tue, 25 Feb 2025 13:00:47 -0800
Subject: [PATCH] [NFC][Cloning] Remove now unused FindDebugInfoToIdentityMap

Summary:
This function is no longer needed.

Test Plan:
ninja check-llvm-unit

stack-info: PR: https://github.com/llvm/llvm-project/pull/129151, branch: 
users/artempyanykh/fast-coro-upstream-part2-take2/9
---
 llvm/include/llvm/Transforms/Utils/Cloning.h | 19 ---
 llvm/lib/Transforms/Utils/CloneFunction.cpp  | 34 
 2 files changed, 53 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h 
b/llvm/include/llvm/Transforms/Utils/Cloning.h
index 2252dda0b9aad..ae00c16e7eada 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -244,25 +244,6 @@ DISubprogram *CollectDebugInfoForCloning(const Function &F,
  CloneFunctionChangeType Changes,
  DebugInfoFinder &DIFinder);
 
-/// Based on \p Changes and \p DIFinder return debug info that needs to be
-/// identity mapped during Metadata cloning.
-///
-/// NOTE: Such \a MetadataSetTy can be used by \a CloneFunction* to directly
-/// specify metadata that should be identity mapped (and hence not cloned). The
-/// metadata will be identity mapped in \a ValueToValueMapTy on first use. 
There
-/// are several reasons for doing it this way rather than eagerly identity
-/// mapping metadata nodes in a \a ValueMap:
-/// 1. Mapping metadata is not cheap, particularly because of tracking.
-/// 2. When cloning a Function we identity map lots of global module-level
-///metadata to avoid cloning it, while only a fraction of it is actually
-///used by the function. Mapping on first use is a lot faster for modules
-///with meaningful amount of debug info.
-/// 3. Eagerly identity mapping metadata makes it harder to cache module-level
-///data (e.g. a set of metadata nodes in a \a DICompileUnit).
-MetadataSetTy FindDebugInfoToIdentityMap(CloneFunctionChangeType Changes,
- DebugInfoFinder &DIFinder,
- DISubprogram *SPClonedWithinModule);
-
 /// This class captures the data input to the InlineFunction call, and records
 /// the auxiliary results produced by it.
 class InlineFunctionInfo {
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp 
b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 8080dca09be00..11033aeec7dda 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -189,40 +189,6 @@ DISubprogram *llvm::CollectDebugInfoForCloning(const 
Function &F,
   return SPClonedWithinModule;
 }
 
-MetadataSetTy
-llvm::FindDebugInfoToIdentityMap(CloneFunctionChangeType Changes,
- DebugInfoFinder &DIFinder,
- DISubprogram *SPClonedWithinModule) {
-  if (Changes >= CloneFunctionChangeType::DifferentModule)
-return {};
-
-  if (DIFinder.subprogram_count() == 0)
-assert(!SPClonedWithinModule &&
-   "Subprogram should be in DIFinder->subprogram_count()...");
-
-  MetadataSetTy MD;
-
-  // Avoid cloning types, compile units, and (other) subprograms.
-  for (DISubprogram *ISP : DIFinder.subprograms())
-if (ISP != SPClonedWithinModule)
-  MD.insert(ISP);
-
-  // If a subprogram isn't going to be cloned skip its lexical blocks as well.
-  for (DIScope *S : DIFinder.scopes()) {
-auto *LScope = dyn_cast(S);
-if (LScope && LScope->getSubprogram() != SPClonedWithinModule)
-  MD.insert(S);
-  }
-
-for (DICompileUnit *CU : DIFinder.compile_units())
-  MD.insert(CU);
-
-for (DIType *Type : DIFinder.types())
-  MD.insert(Type);
-
-  return MD;
-}
-
 void llvm::CloneFunctionMetadataInto(Function &NewFunc, const Function 
&OldFunc,
  ValueToValueMapTy &VMap,
  RemapFlags RemapFlag,

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Move DebugInfoFinder decl closer to its place of usage (PR #129154)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/129154

>From 889229d08cbc22ba90bb275b851b75e399812c9d Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Tue, 25 Feb 2025 13:09:23 -0800
Subject: [PATCH] [NFC][Cloning] Move DebugInfoFinder decl closer to its place
 of usage

Summary:
This makes it clear that DIFinder is only really necessary for llvm.dbg.cu 
update.

Test Plan:
ninja check-llvm-unit

stack-info: PR: https://github.com/llvm/llvm-project/pull/129154, branch: 
users/artempyanykh/fast-coro-upstream-part2-take2/12
---
 llvm/lib/Transforms/Utils/CloneFunction.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp 
b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 979cbad0d82c0..3af07594c848b 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -266,8 +266,6 @@ void llvm::CloneFunctionInto(Function *NewFunc, const 
Function *OldFunc,
   if (OldFunc->isDeclaration())
 return;
 
-  DebugInfoFinder DIFinder;
-
   if (Changes < CloneFunctionChangeType::DifferentModule) {
 assert((NewFunc->getParent() == nullptr ||
 NewFunc->getParent() == OldFunc->getParent()) &&
@@ -320,7 +318,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const 
Function *OldFunc,
 Visited.insert(Operand);
 
   // Collect and clone all the compile units referenced from the instructions 
in
-  // the function (e.g. as a scope).
+  // the function (e.g. as instructions' scope).
+  DebugInfoFinder DIFinder;
   collectDebugInfoFromInstructions(*OldFunc, DIFinder);
   for (auto *Unit : DIFinder.compile_units()) {
 MDNode *MappedUnit =

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Clean up comments in CloneFunctionInto (PR #129153)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/129153

>From fd9ac7bccdca1ff8c386a712ddfb36a9d0973112 Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Tue, 25 Feb 2025 13:07:40 -0800
Subject: [PATCH] [NFC][Cloning] Clean up comments in CloneFunctionInto

Summary:
Some comments no longer make sense nor refer to an existing code path.

Test Plan:
ninja check-llvm-unit

stack-info: PR: https://github.com/llvm/llvm-project/pull/129153, branch: 
users/artempyanykh/fast-coro-upstream-part2-take2/11
---
 llvm/lib/Transforms/Utils/CloneFunction.cpp | 11 ---
 1 file changed, 11 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp 
b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index f32d9454eb076..979cbad0d82c0 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -266,24 +266,13 @@ void llvm::CloneFunctionInto(Function *NewFunc, const 
Function *OldFunc,
   if (OldFunc->isDeclaration())
 return;
 
-  // When we remap instructions within the same module, we want to avoid
-  // duplicating inlined DISubprograms, so record all subprograms we find as we
-  // duplicate instructions and then freeze them in the MD map. We also record
-  // information about dbg.value and dbg.declare to avoid duplicating the
-  // types.
   DebugInfoFinder DIFinder;
 
-  // Track the subprogram attachment that needs to be cloned to fine-tune the
-  // mapping within the same module.
   if (Changes < CloneFunctionChangeType::DifferentModule) {
-// Need to find subprograms, types, and compile units.
-
 assert((NewFunc->getParent() == nullptr ||
 NewFunc->getParent() == OldFunc->getParent()) &&
"Expected NewFunc to have the same parent, or no parent");
   } else {
-// Need to find all the compile units.
-
 assert((NewFunc->getParent() == nullptr ||
 NewFunc->getParent() != OldFunc->getParent()) &&
"Expected NewFunc to have different parents, or no parent");

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Make DifferentModule case more obvious in CollectDebugInfoForCloning (PR #129146)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/129146

>From 52f4ec3ca373c81b7938056c2cd9c3ea890f09fe Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Tue, 25 Feb 2025 10:20:06 -0800
Subject: [PATCH] [NFC][Cloning] Make DifferentModule case more obvious in
 CollectDebugInfoForCloning

Summary:
This should be behaviorally equivalent. DIFinder is only used when
cloning into a DifferentModule as part of llvm.dbg.cu update in
CloneFunctionInto.

Test Plan:
ninja check-llvm-unit check-llvm

stack-info: PR: https://github.com/llvm/llvm-project/pull/129146, branch: 
users/artempyanykh/fast-coro-upstream-part2-take2/4
---
 llvm/lib/Transforms/Utils/CloneFunction.cpp | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp 
b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index e03c5c27b5ac1..dd1b4fe718053 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -147,8 +147,10 @@ void llvm::CloneFunctionAttributesInto(Function *NewFunc,
 DISubprogram *llvm::CollectDebugInfoForCloning(const Function &F,
CloneFunctionChangeType Changes,
DebugInfoFinder &DIFinder) {
-  // CloneModule takes care of cloning debug info.
-  if (Changes == CloneFunctionChangeType::ClonedModule)
+  // CloneModule takes care of cloning debug info for ClonedModule. Cloning 
into
+  // DifferentModule is taken care of separately in ClonedFunctionInto as part
+  // of llvm.dbg.cu update.
+  if (Changes >= CloneFunctionChangeType::DifferentModule)
 return nullptr;
 
   DISubprogram *SPClonedWithinModule = nullptr;
@@ -362,6 +364,10 @@ void llvm::CloneFunctionInto(Function *NewFunc, const 
Function *OldFunc,
   SmallPtrSet Visited;
   for (auto *Operand : NMD->operands())
 Visited.insert(Operand);
+
+  // Collect and clone all the compile units referenced from the instructions 
in
+  // the function (e.g. as a scope).
+  collectDebugInfoFromInstructions(*OldFunc, DIFinder);
   for (auto *Unit : DIFinder.compile_units()) {
 MDNode *MappedUnit =
 MapMetadata(Unit, VMap, RF_None, TypeMapper, Materializer);

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [libc++][format] Implements P3107R5 in . (PR #130500)

2025-03-09 Thread Mark de Wever via llvm-branch-commits

https://github.com/mordante created 
https://github.com/llvm/llvm-project/pull/130500

The followup paper P3235R3 which is voted in as a DR changes the names 
foo_locking to foo_buffered. These changes have been applied in this patch.

Before
---
Benchmark Time CPU   Iterations
---
printf 71.3 ns 71.3 ns  9525175
print_string226 ns  226 ns  3105850
print_stack 232 ns  232 ns  3026498
print_direct530 ns  530 ns  1318447

After
---
Benchmark Time CPU   Iterations
---
printf 70.6 ns 70.6 ns  9789585
print_string222 ns  222 ns  3147678
print_stack 227 ns  227 ns  3084767
print_direct474 ns  474 ns  1472786

Note: The performance of libc++'s std::print is still extemely slow compared to 
printf. Based on P3107R5 std::print should outperform printf. The main culprit 
is the call to isatty, which is resolved after implementing
LWG4044  Confusing requirements for std::print on POSIX platforms

Implements
- P3107R5 - Permit an efficient implementation of ``std::print``

Implements parts of
- P3235R3 std::print more types faster with less memory

Fixes: #105435

>From bfd4e08a6d624437b734af8ee6803c1f528ee7b6 Mon Sep 17 00:00:00 2001
From: Mark de Wever 
Date: Sat, 30 Mar 2024 17:35:56 +0100
Subject: [PATCH] [libc++][format] Implements P3107R5 in .

The followup paper P3235R3 which is voted in as a DR changes the names
foo_locking to foo_buffered. These changes have been applied in this
patch.

Before
---
Benchmark Time CPU   Iterations
---
printf 71.3 ns 71.3 ns  9525175
print_string226 ns  226 ns  3105850
print_stack 232 ns  232 ns  3026498
print_direct530 ns  530 ns  1318447

After
---
Benchmark Time CPU   Iterations
---
printf 70.6 ns 70.6 ns  9789585
print_string222 ns  222 ns  3147678
print_stack 227 ns  227 ns  3084767
print_direct474 ns  474 ns  1472786

Note: The performance of libc++'s std::print is still extemely slow
compared to printf. Based on P3107R5 std::print should outperform
printf. The main culprit is the call to isatty, which is resolved
after implementing
LWG4044  Confusing requirements for std::print on POSIX platforms

Implements
- P3107R5 - Permit an efficient implementation of ``std::print``

Implements parts of
- P3235R3 std::print more types faster with less memory

Fixes: #105435
---
 libcxx/docs/ReleaseNotes/21.rst   |   1 +
 libcxx/include/__format/buffer.h  |   3 +
 libcxx/include/print  | 247 +-
 .../test/libcxx/transitive_includes/cxx03.csv |   5 +
 .../test/libcxx/transitive_includes/cxx11.csv |   5 +
 .../test/libcxx/transitive_includes/cxx14.csv |   5 +
 .../test/libcxx/transitive_includes/cxx17.csv |   5 +
 .../test/libcxx/transitive_includes/cxx23.csv |   5 +-
 .../test/libcxx/transitive_includes/cxx26.csv |   4 +
 9 files changed, 267 insertions(+), 13 deletions(-)

diff --git a/libcxx/docs/ReleaseNotes/21.rst b/libcxx/docs/ReleaseNotes/21.rst
index e7cfa625a132c..a1f30b26c5a1d 100644
--- a/libcxx/docs/ReleaseNotes/21.rst
+++ b/libcxx/docs/ReleaseNotes/21.rst
@@ -40,6 +40,7 @@ Implemented Papers
 
 - N4258: Cleaning-up noexcept in the Library (`Github 
`__)
 - P1361R2: Integration of chrono with text formatting (`Github 
`__)
+- P3107R5 - Permit an efficient implementation of ``std::print`` (`Github 
`__)
 
 Improvements and New Features
 -
diff --git a/libcxx/include/__format/buffer.h b/libcxx/include/__format/buffer.h
index c88b7f3222010..d6e4ddc840e2d 100644
--- a/libcxx/include/__format/buffer.h
+++ b/libcxx/include/__format/buffer.h
@@ -12,6 +12,7 @@
 
 #include <__algorithm/copy_n.h>
 #include <__algorithm/fill_n.h>
+#include <__algorithm/for_each.h>
 #include <__algorithm/max.h>
 #include <__algorithm/min.h>
 #include <__algorithm/ranges_copy.h>
@@ -34,11 +35,13 @@
 #include <__memory/construct_at.h>
 #include <__memory/destroy.h>
 #include <__memory/uninitialized_algorithms.h>
+#include <__system_error/system_error.h>
 #include <__type_traits/add_pointer.h>
 #include <__type_traits/conditional.h

[llvm-branch-commits] [libcxx] [libc++][format] Implements P3107R5 in . (PR #130500)

2025-03-09 Thread via llvm-branch-commits

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:



You can test this locally with the following command:


``bash
git-clang-format --diff 97732a4da20cf57acecee0a14e68cf57749930b2 
bfd4e08a6d624437b734af8ee6803c1f528ee7b6 --extensions ,h -- 
libcxx/include/__format/buffer.h libcxx/include/print
``





View the diff from clang-format here.


``diff
diff --git a/libcxx/include/print b/libcxx/include/print
index 8eeb2db4ef..f5141c641f 100644
--- a/libcxx/include/print
+++ b/libcxx/include/print
@@ -340,8 +340,8 @@ _LIBCPP_HIDE_FROM_ABI inline void 
__vprint_nonunicode_buffered(
 }
 
 template  // TODO PRINT template or availability markup fires 
too eagerly (http://llvm.org/PR61563).
-_LIBCPP_HIDE_FROM_ABI inline void __vprint_nonunicode_buffered(
-FILE* __stream, string_view __fmt, format_args __args, bool __write_nl) {
+_LIBCPP_HIDE_FROM_ABI inline void
+__vprint_nonunicode_buffered(FILE* __stream, string_view __fmt, format_args 
__args, bool __write_nl) {
   _LIBCPP_ASSERT_NON_NULL(__stream, "__stream must be a valid pointer to an 
output C stream");
   __print::__file_stream_buffer __buffer(__stream);
 
@@ -424,8 +424,8 @@ __vprint_unicode_windows(FILE* __stream, string_view __fmt, 
format_args __args,
 }
 
 template  // TODO PRINT template or availability markup fires 
too eagerly (http://llvm.org/PR61563).
-_LIBCPP_HIDE_FROM_ABI inline void
-__vprint_unicode_buffered_windows(FILE* __stream, string_view __fmt, 
format_args __args, bool __write_nl, bool __is_terminal) {
+_LIBCPP_HIDE_FROM_ABI inline void __vprint_unicode_buffered_windows(
+FILE* __stream, string_view __fmt, format_args __args, bool __write_nl, 
bool __is_terminal) {
   _LIBCPP_ASSERT_NON_NULL(__stream, "__stream must be a valid pointer to an 
output C stream");
 
   if (!__is_terminal)

``




https://github.com/llvm/llvm-project/pull/130500
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Replace IdentityMD set with a predicate in ValueMapper (PR #129147)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/129147

>From 17728ddd7c9e178978b02d693cb8b8806c1a331f Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Tue, 25 Feb 2025 10:53:57 -0800
Subject: [PATCH] [NFC][Cloning] Replace IdentityMD set with a predicate in
 ValueMapper

Summary:
We used the set only to check if it contains certain metadata nodes.
Replacing the set with a predicate makes the intention clearer and the
API more general.

Test Plan:
ninja check-all

stack-info: PR: https://github.com/llvm/llvm-project/pull/129147, branch: 
users/artempyanykh/fast-coro-upstream-part2-take2/5
---
 llvm/include/llvm/Transforms/Utils/Cloning.h  |  4 +--
 .../llvm/Transforms/Utils/ValueMapper.h   | 27 ++-
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp  |  7 +++--
 llvm/lib/Transforms/Utils/CloneFunction.cpp   | 10 ---
 llvm/lib/Transforms/Utils/ValueMapper.cpp | 15 +--
 5 files changed, 34 insertions(+), 29 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h 
b/llvm/include/llvm/Transforms/Utils/Cloning.h
index d36f91416db88..2252dda0b9aad 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -194,7 +194,7 @@ void CloneFunctionMetadataInto(Function &NewFunc, const 
Function &OldFunc,
ValueToValueMapTy &VMap, RemapFlags RemapFlag,
ValueMapTypeRemapper *TypeMapper = nullptr,
ValueMaterializer *Materializer = nullptr,
-   const MetadataSetTy *IdentityMD = nullptr);
+   const MetadataPredicate *IdentityMD = nullptr);
 
 /// Clone OldFunc's body into NewFunc.
 void CloneFunctionBodyInto(Function &NewFunc, const Function &OldFunc,
@@ -204,7 +204,7 @@ void CloneFunctionBodyInto(Function &NewFunc, const 
Function &OldFunc,
ClonedCodeInfo *CodeInfo = nullptr,
ValueMapTypeRemapper *TypeMapper = nullptr,
ValueMaterializer *Materializer = nullptr,
-   const MetadataSetTy *IdentityMD = nullptr);
+   const MetadataPredicate *IdentityMD = nullptr);
 
 void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
const Instruction *StartingInst,
diff --git a/llvm/include/llvm/Transforms/Utils/ValueMapper.h 
b/llvm/include/llvm/Transforms/Utils/ValueMapper.h
index 852d7095d1133..560df1d3f7f29 100644
--- a/llvm/include/llvm/Transforms/Utils/ValueMapper.h
+++ b/llvm/include/llvm/Transforms/Utils/ValueMapper.h
@@ -37,6 +37,7 @@ class Value;
 using ValueToValueMapTy = ValueMap;
 using DbgRecordIterator = simple_ilist::iterator;
 using MetadataSetTy = SmallPtrSet;
+using MetadataPredicate = std::function;
 
 /// This is a class that can be implemented by clients to remap types when
 /// cloning constants and instructions.
@@ -138,8 +139,8 @@ inline RemapFlags operator|(RemapFlags LHS, RemapFlags RHS) 
{
 /// alternate \a ValueToValueMapTy and \a ValueMaterializer and returns a ID to
 /// pass into the schedule*() functions.
 ///
-/// If an \a IdentityMD set is optionally provided, \a Metadata inside this set
-/// will be mapped onto itself in \a VM on first use.
+/// If an \a IdentityMD predicate is optionally provided, \a Metadata for which
+/// the predicate returns true will be mapped onto itself in \a VM on first 
use.
 ///
 /// TODO: lib/Linker really doesn't need the \a ValueHandle in the \a
 /// ValueToValueMapTy.  We should template \a ValueMapper (and its
@@ -158,7 +159,7 @@ class ValueMapper {
   ValueMapper(ValueToValueMapTy &VM, RemapFlags Flags = RF_None,
   ValueMapTypeRemapper *TypeMapper = nullptr,
   ValueMaterializer *Materializer = nullptr,
-  const MetadataSetTy *IdentityMD = nullptr);
+  const MetadataPredicate *IdentityMD = nullptr);
   ValueMapper(ValueMapper &&) = delete;
   ValueMapper(const ValueMapper &) = delete;
   ValueMapper &operator=(ValueMapper &&) = delete;
@@ -225,7 +226,7 @@ inline Value *MapValue(const Value *V, ValueToValueMapTy 
&VM,
RemapFlags Flags = RF_None,
ValueMapTypeRemapper *TypeMapper = nullptr,
ValueMaterializer *Materializer = nullptr,
-   const MetadataSetTy *IdentityMD = nullptr) {
+   const MetadataPredicate *IdentityMD = nullptr) {
   return ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
   .mapValue(*V);
 }
@@ -239,8 +240,8 @@ inline Value *MapValue(const Value *V, ValueToValueMapTy 
&VM,
 /// \c MD.
 ///  3. Else if \c MD is a \a ConstantAsMetadata, call \a MapValue() and
 /// re-wrap its return (returning nullptr on nullptr).
-///  4. Else if \c MD is in \c IdentityMD then add an identity mapping for it
-/// and return 

[llvm-branch-commits] [llvm] [NFC][Coro] Use CloneFunctionInto for coroutine cloning instead of CloneFunction (PR #129149)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/129149

>From 1a92af832e4ed8f135a3d1270477e81531b68c26 Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Tue, 25 Feb 2025 12:42:14 -0800
Subject: [PATCH] [NFC][Coro] Use CloneFunctionInto for coroutine cloning
 instead of CloneFunction

Summary:
CloneFunctionInto now is fast on its own and we don't need to use
CloneFunctionAttributes/Metadata/Body separately.

CommonDebugInfo in CoroClone is now unused and is cleaned up separately
in the next diff in the stack.

Test Plan:
ninja check-all

stack-info: PR: https://github.com/llvm/llvm-project/pull/129149, branch: 
users/artempyanykh/fast-coro-upstream-part2-take2/7
---
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 10 ++
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp 
b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index b2c4e64319725..fabbf5f020a74 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -921,14 +921,8 @@ void coro::BaseCloner::create() {
   auto savedLinkage = NewF->getLinkage();
   NewF->setLinkage(llvm::GlobalValue::ExternalLinkage);
 
-  MetadataPredicate IdentityMD = [&](const Metadata *MD) {
-return CommonDebugInfo.contains(MD);
-  };
-  CloneFunctionAttributesInto(NewF, &OrigF, VMap, false);
-  CloneFunctionMetadataInto(*NewF, OrigF, VMap, RF_None, nullptr, nullptr,
-&IdentityMD);
-  CloneFunctionBodyInto(*NewF, OrigF, VMap, RF_None, Returns, "", nullptr,
-nullptr, nullptr, &IdentityMD);
+  CloneFunctionInto(NewF, &OrigF, VMap,
+CloneFunctionChangeType::LocalChangesOnly, Returns);
 
   auto &Context = NewF->getContext();
 

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Remove now unused FindDebugInfoToIdentityMap (PR #129151)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/129151

>From 8b042ee4d004198d8f87c2cd644b93326215b18c Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Tue, 25 Feb 2025 13:00:47 -0800
Subject: [PATCH] [NFC][Cloning] Remove now unused FindDebugInfoToIdentityMap

Summary:
This function is no longer needed.

Test Plan:
ninja check-llvm-unit

stack-info: PR: https://github.com/llvm/llvm-project/pull/129151, branch: 
users/artempyanykh/fast-coro-upstream-part2-take2/9
---
 llvm/include/llvm/Transforms/Utils/Cloning.h | 19 ---
 llvm/lib/Transforms/Utils/CloneFunction.cpp  | 34 
 2 files changed, 53 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h 
b/llvm/include/llvm/Transforms/Utils/Cloning.h
index 2252dda0b9aad..ae00c16e7eada 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -244,25 +244,6 @@ DISubprogram *CollectDebugInfoForCloning(const Function &F,
  CloneFunctionChangeType Changes,
  DebugInfoFinder &DIFinder);
 
-/// Based on \p Changes and \p DIFinder return debug info that needs to be
-/// identity mapped during Metadata cloning.
-///
-/// NOTE: Such \a MetadataSetTy can be used by \a CloneFunction* to directly
-/// specify metadata that should be identity mapped (and hence not cloned). The
-/// metadata will be identity mapped in \a ValueToValueMapTy on first use. 
There
-/// are several reasons for doing it this way rather than eagerly identity
-/// mapping metadata nodes in a \a ValueMap:
-/// 1. Mapping metadata is not cheap, particularly because of tracking.
-/// 2. When cloning a Function we identity map lots of global module-level
-///metadata to avoid cloning it, while only a fraction of it is actually
-///used by the function. Mapping on first use is a lot faster for modules
-///with meaningful amount of debug info.
-/// 3. Eagerly identity mapping metadata makes it harder to cache module-level
-///data (e.g. a set of metadata nodes in a \a DICompileUnit).
-MetadataSetTy FindDebugInfoToIdentityMap(CloneFunctionChangeType Changes,
- DebugInfoFinder &DIFinder,
- DISubprogram *SPClonedWithinModule);
-
 /// This class captures the data input to the InlineFunction call, and records
 /// the auxiliary results produced by it.
 class InlineFunctionInfo {
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp 
b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 8080dca09be00..11033aeec7dda 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -189,40 +189,6 @@ DISubprogram *llvm::CollectDebugInfoForCloning(const 
Function &F,
   return SPClonedWithinModule;
 }
 
-MetadataSetTy
-llvm::FindDebugInfoToIdentityMap(CloneFunctionChangeType Changes,
- DebugInfoFinder &DIFinder,
- DISubprogram *SPClonedWithinModule) {
-  if (Changes >= CloneFunctionChangeType::DifferentModule)
-return {};
-
-  if (DIFinder.subprogram_count() == 0)
-assert(!SPClonedWithinModule &&
-   "Subprogram should be in DIFinder->subprogram_count()...");
-
-  MetadataSetTy MD;
-
-  // Avoid cloning types, compile units, and (other) subprograms.
-  for (DISubprogram *ISP : DIFinder.subprograms())
-if (ISP != SPClonedWithinModule)
-  MD.insert(ISP);
-
-  // If a subprogram isn't going to be cloned skip its lexical blocks as well.
-  for (DIScope *S : DIFinder.scopes()) {
-auto *LScope = dyn_cast(S);
-if (LScope && LScope->getSubprogram() != SPClonedWithinModule)
-  MD.insert(S);
-  }
-
-for (DICompileUnit *CU : DIFinder.compile_units())
-  MD.insert(CU);
-
-for (DIType *Type : DIFinder.types())
-  MD.insert(Type);
-
-  return MD;
-}
-
 void llvm::CloneFunctionMetadataInto(Function &NewFunc, const Function 
&OldFunc,
  ValueToValueMapTy &VMap,
  RemapFlags RemapFlag,

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Move DebugInfoFinder decl closer to its place of usage (PR #129154)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/129154

>From 8d4f26c39099348622834674bf78a2b10e889a8f Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Tue, 25 Feb 2025 13:09:23 -0800
Subject: [PATCH] [NFC][Cloning] Move DebugInfoFinder decl closer to its place
 of usage

Summary:
This makes it clear that DIFinder is only really necessary for llvm.dbg.cu 
update.

Test Plan:
ninja check-llvm-unit

stack-info: PR: https://github.com/llvm/llvm-project/pull/129154, branch: 
users/artempyanykh/fast-coro-upstream-part2-take2/12
---
 llvm/lib/Transforms/Utils/CloneFunction.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp 
b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 979cbad0d82c0..3af07594c848b 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -266,8 +266,6 @@ void llvm::CloneFunctionInto(Function *NewFunc, const 
Function *OldFunc,
   if (OldFunc->isDeclaration())
 return;
 
-  DebugInfoFinder DIFinder;
-
   if (Changes < CloneFunctionChangeType::DifferentModule) {
 assert((NewFunc->getParent() == nullptr ||
 NewFunc->getParent() == OldFunc->getParent()) &&
@@ -320,7 +318,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const 
Function *OldFunc,
 Visited.insert(Operand);
 
   // Collect and clone all the compile units referenced from the instructions 
in
-  // the function (e.g. as a scope).
+  // the function (e.g. as instructions' scope).
+  DebugInfoFinder DIFinder;
   collectDebugInfoFromInstructions(*OldFunc, DIFinder);
   for (auto *Unit : DIFinder.compile_units()) {
 MDNode *MappedUnit =

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Coro] Remove now unused CommonDebugInfo in CoroSplit (PR #129150)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/129150

>From 0febf8c32e713de76a382c4da34a6861c54f4736 Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Tue, 25 Feb 2025 12:47:10 -0800
Subject: [PATCH] [NFC][Coro] Remove now unused CommonDebugInfo in CoroSplit

Summary:
This cleans up the now unnecessary debug info collection in CoroSplit.

This makes CoroSplit pass almost as fast with -g2 as it is with -g1 on
the sample cpp file used with other parts of this stack:

| | Baseline | IdentityMD set | Prebuilt CommonDI | 
MetadataPred (cur) |
|-|--||---||
| CoroSplitPass   | 306ms| 221ms  | 68ms  | 3.8ms   
   |
| CoroCloner  | 101ms| 72ms   | 0.5ms | 0.5ms   
   |
| CollectCommonDI | -| -  | 63ms  | -   
   |
| Speed up| 1x   | 1.4x   | 4.5x  | 80x 
   |

Test Plan:
ninja check-all

stack-info: PR: https://github.com/llvm/llvm-project/pull/129150, branch: 
users/artempyanykh/fast-coro-upstream-part2-take2/8
---
 llvm/lib/Transforms/Coroutines/CoroCloner.h  | 31 ++--
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 37 +++-
 2 files changed, 16 insertions(+), 52 deletions(-)

diff --git a/llvm/lib/Transforms/Coroutines/CoroCloner.h 
b/llvm/lib/Transforms/Coroutines/CoroCloner.h
index b817e55cad9fc..d1887980fb3bc 100644
--- a/llvm/lib/Transforms/Coroutines/CoroCloner.h
+++ b/llvm/lib/Transforms/Coroutines/CoroCloner.h
@@ -48,9 +48,6 @@ class BaseCloner {
   CloneKind FKind;
   IRBuilder<> Builder;
   TargetTransformInfo &TTI;
-  // Common module-level metadata that's shared between all coroutine clones 
and
-  // doesn't need to be cloned itself.
-  const MetadataSetTy &CommonDebugInfo;
 
   ValueToValueMapTy VMap;
   Function *NewF = nullptr;
@@ -63,12 +60,12 @@ class BaseCloner {
   /// Create a cloner for a continuation lowering.
   BaseCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
  Function *NewF, AnyCoroSuspendInst *ActiveSuspend,
- TargetTransformInfo &TTI, const MetadataSetTy &CommonDebugInfo)
+ TargetTransformInfo &TTI)
   : OrigF(OrigF), Suffix(Suffix), Shape(Shape),
 FKind(Shape.ABI == ABI::Async ? CloneKind::Async
   : CloneKind::Continuation),
-Builder(OrigF.getContext()), TTI(TTI), 
CommonDebugInfo(CommonDebugInfo),
-NewF(NewF), ActiveSuspend(ActiveSuspend) {
+Builder(OrigF.getContext()), TTI(TTI), NewF(NewF),
+ActiveSuspend(ActiveSuspend) {
 assert(Shape.ABI == ABI::Retcon || Shape.ABI == ABI::RetconOnce ||
Shape.ABI == ABI::Async);
 assert(NewF && "need existing function for continuation");
@@ -77,11 +74,9 @@ class BaseCloner {
 
 public:
   BaseCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
- CloneKind FKind, TargetTransformInfo &TTI,
- const MetadataSetTy &CommonDebugInfo)
+ CloneKind FKind, TargetTransformInfo &TTI)
   : OrigF(OrigF), Suffix(Suffix), Shape(Shape), FKind(FKind),
-Builder(OrigF.getContext()), TTI(TTI),
-CommonDebugInfo(CommonDebugInfo) {}
+Builder(OrigF.getContext()), TTI(TTI) {}
 
   virtual ~BaseCloner() {}
 
@@ -89,14 +84,12 @@ class BaseCloner {
   static Function *createClone(Function &OrigF, const Twine &Suffix,
coro::Shape &Shape, Function *NewF,
AnyCoroSuspendInst *ActiveSuspend,
-   TargetTransformInfo &TTI,
-   const MetadataSetTy &CommonDebugInfo) {
+   TargetTransformInfo &TTI) {
 assert(Shape.ABI == ABI::Retcon || Shape.ABI == ABI::RetconOnce ||
Shape.ABI == ABI::Async);
 TimeTraceScope FunctionScope("BaseCloner");
 
-BaseCloner Cloner(OrigF, Suffix, Shape, NewF, ActiveSuspend, TTI,
-  CommonDebugInfo);
+BaseCloner Cloner(OrigF, Suffix, Shape, NewF, ActiveSuspend, TTI);
 Cloner.create();
 return Cloner.getFunction();
   }
@@ -136,9 +129,8 @@ class SwitchCloner : public BaseCloner {
 protected:
   /// Create a cloner for a switch lowering.
   SwitchCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
-   CloneKind FKind, TargetTransformInfo &TTI,
-   const MetadataSetTy &CommonDebugInfo)
-  : BaseCloner(OrigF, Suffix, Shape, FKind, TTI, CommonDebugInfo) {}
+   CloneKind FKind, TargetTransformInfo &TTI)
+  : BaseCloner(OrigF, Suffix, Shape, FKind, TTI) {}
 
   void create() override;
 
@@ -146,12 +138,11 @@ class SwitchCloner : public BaseCloner {
   /// Create a clone for a switch lowering.
   static Function *createClone(Function &OrigF, const Twine &Suffix,

[llvm-branch-commits] [llvm] [NFC][Cloning] Replace DIFinder usage in CloneFunctionInto with a MetadataPredicate (PR #129148)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/129148

>From 7801cdf0f13a713aa7aa73f0ea9c262c8c6bc36d Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Tue, 25 Feb 2025 12:07:03 -0800
Subject: [PATCH] [NFC][Cloning] Replace DIFinder usage in CloneFunctionInto
 with a MetadataPredicate

Summary:
The new code should be functionally identical to the old one (but
faster). The reasoning is as follows.

In the old code when cloning within the module:
1. DIFinder traverses and collects *all* debug info reachable from a
   function, its instructions, and its owning compile unit.
2. Then "compile units, types, other subprograms, and lexical blocks of
   other subprograms" are saved in a set.
3. Then when we MapMetadata, we traverse the function's debug info
   _again_ and those nodes that are in the set from p.2 are identity
   mapped.

This looks equivalent to just doing step 3 with identity mapping based
on a predicate that says to identity map "compile units, types, other
subprograms, and lexical blocks of other subprograms" (same as in step
2). This is what the new code does.

Test Plan:
ninja check-all
There's a bunch of tests around cloning and all of them pass.

stack-info: PR: https://github.com/llvm/llvm-project/pull/129148, branch: 
users/artempyanykh/fast-coro-upstream-part2-take2/6
---
 llvm/lib/Transforms/Utils/CloneFunction.cpp | 32 -
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp 
b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 502c4898c5940..8080dca09be00 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -50,6 +50,30 @@ void collectDebugInfoFromInstructions(const Function &F,
   DIFinder.processInstruction(*M, I);
   }
 }
+
+// Create a predicate that matches the metadata that should be identity mapped
+// during function cloning.
+MetadataPredicate createIdentityMDPredicate(const Function &F,
+CloneFunctionChangeType Changes) {
+  if (Changes >= CloneFunctionChangeType::DifferentModule)
+return [](const Metadata *MD) { return false; };
+
+  DISubprogram *SPClonedWithinModule = F.getSubprogram();
+  return [=](const Metadata *MD) {
+// Avoid cloning types, compile units, and (other) subprograms.
+if (isa(MD) || isa(MD))
+  return true;
+
+if (auto *SP = dyn_cast(MD); SP)
+  return SP != SPClonedWithinModule;
+
+// If a subprogram isn't going to be cloned skip its lexical blocks as 
well.
+if (auto *LScope = dyn_cast(MD); LScope)
+  return LScope->getSubprogram() != SPClonedWithinModule;
+
+return false;
+  };
+}
 } // namespace
 
 /// See comments in Cloning.h.
@@ -325,13 +349,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const 
Function *OldFunc,
 }
   }
 
-  DISubprogram *SPClonedWithinModule =
-  CollectDebugInfoForCloning(*OldFunc, Changes, DIFinder);
-
-  MetadataPredicate IdentityMD =
-  [MDSet =
-   FindDebugInfoToIdentityMap(Changes, DIFinder, 
SPClonedWithinModule)](
-  const Metadata *MD) { return MDSet.contains(MD); };
+  MetadataPredicate IdentityMD = createIdentityMDPredicate(*OldFunc, Changes);
 
   // Cloning is always a Module level operation, since Metadata needs to be
   // cloned.

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Clean up comments in CloneFunctionInto (PR #129153)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/129153

>From bee5d3439332be181a468187202fc0cbb13bc840 Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Tue, 25 Feb 2025 13:07:40 -0800
Subject: [PATCH] [NFC][Cloning] Clean up comments in CloneFunctionInto

Summary:
Some comments no longer make sense nor refer to an existing code path.

Test Plan:
ninja check-llvm-unit

stack-info: PR: https://github.com/llvm/llvm-project/pull/129153, branch: 
users/artempyanykh/fast-coro-upstream-part2-take2/11
---
 llvm/lib/Transforms/Utils/CloneFunction.cpp | 11 ---
 1 file changed, 11 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp 
b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index f32d9454eb076..979cbad0d82c0 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -266,24 +266,13 @@ void llvm::CloneFunctionInto(Function *NewFunc, const 
Function *OldFunc,
   if (OldFunc->isDeclaration())
 return;
 
-  // When we remap instructions within the same module, we want to avoid
-  // duplicating inlined DISubprograms, so record all subprograms we find as we
-  // duplicate instructions and then freeze them in the MD map. We also record
-  // information about dbg.value and dbg.declare to avoid duplicating the
-  // types.
   DebugInfoFinder DIFinder;
 
-  // Track the subprogram attachment that needs to be cloned to fine-tune the
-  // mapping within the same module.
   if (Changes < CloneFunctionChangeType::DifferentModule) {
-// Need to find subprograms, types, and compile units.
-
 assert((NewFunc->getParent() == nullptr ||
 NewFunc->getParent() == OldFunc->getParent()) &&
"Expected NewFunc to have the same parent, or no parent");
   } else {
-// Need to find all the compile units.
-
 assert((NewFunc->getParent() == nullptr ||
 NewFunc->getParent() != OldFunc->getParent()) &&
"Expected NewFunc to have different parents, or no parent");

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Remove now unused CollectDebugInfoForCloning (PR #129152)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/129152

>From 28f40fe8969e137db17ff1ba45b589b5c52aeb76 Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Tue, 25 Feb 2025 13:02:37 -0800
Subject: [PATCH] [NFC][Cloning] Remove now unused CollectDebugInfoForCloning

Summary:
This function is no longer used, let's remove it from the header and
impl.

Test Plan:
ninja check-llvm-unit

stack-info: PR: https://github.com/llvm/llvm-project/pull/129152, branch: 
users/artempyanykh/fast-coro-upstream-part2-take2/10
---
 llvm/include/llvm/Transforms/Utils/Cloning.h | 14 -
 llvm/lib/Transforms/Utils/CloneFunction.cpp  | 21 
 2 files changed, 35 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h 
b/llvm/include/llvm/Transforms/Utils/Cloning.h
index ae00c16e7eada..ec1a1d5faa7e9 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -230,20 +230,6 @@ void CloneAndPruneFunctionInto(Function *NewFunc, const 
Function *OldFunc,
const char *NameSuffix = "",
ClonedCodeInfo *CodeInfo = nullptr);
 
-/// Collect debug information such as types, compile units, and other
-/// subprograms that are reachable from \p F and can be considered global for
-/// the purposes of cloning (and hence not needing to be cloned).
-///
-/// What debug information should be processed depends on \p Changes: when
-/// cloning into the same module we process \p F's subprogram and instructions;
-/// when into a cloned module, neither of those.
-///
-/// Returns DISubprogram of the cloned function when cloning into the same
-/// module or nullptr otherwise.
-DISubprogram *CollectDebugInfoForCloning(const Function &F,
- CloneFunctionChangeType Changes,
- DebugInfoFinder &DIFinder);
-
 /// This class captures the data input to the InlineFunction call, and records
 /// the auxiliary results produced by it.
 class InlineFunctionInfo {
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp 
b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 11033aeec7dda..f32d9454eb076 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -168,27 +168,6 @@ void llvm::CloneFunctionAttributesInto(Function *NewFunc,
  OldAttrs.getRetAttrs(), NewArgAttrs));
 }
 
-DISubprogram *llvm::CollectDebugInfoForCloning(const Function &F,
-   CloneFunctionChangeType Changes,
-   DebugInfoFinder &DIFinder) {
-  // CloneModule takes care of cloning debug info for ClonedModule. Cloning 
into
-  // DifferentModule is taken care of separately in ClonedFunctionInto as part
-  // of llvm.dbg.cu update.
-  if (Changes >= CloneFunctionChangeType::DifferentModule)
-return nullptr;
-
-  DISubprogram *SPClonedWithinModule = nullptr;
-  if (Changes < CloneFunctionChangeType::DifferentModule) {
-SPClonedWithinModule = F.getSubprogram();
-  }
-  if (SPClonedWithinModule)
-DIFinder.processSubprogram(SPClonedWithinModule);
-
-  collectDebugInfoFromInstructions(F, DIFinder);
-
-  return SPClonedWithinModule;
-}
-
 void llvm::CloneFunctionMetadataInto(Function &NewFunc, const Function 
&OldFunc,
  ValueToValueMapTy &VMap,
  RemapFlags RemapFlag,

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Replace DIFinder usage in CloneFunctionInto with a MetadataPredicate (PR #129148)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/129148

>From 0f7fb67b79fed8a51234c1f07a660a6842cb455a Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Tue, 25 Feb 2025 12:07:03 -0800
Subject: [PATCH] [NFC][Cloning] Replace DIFinder usage in CloneFunctionInto
 with a MetadataPredicate

Summary:
The new code should be functionally identical to the old one (but
faster). The reasoning is as follows.

In the old code when cloning within the module:
1. DIFinder traverses and collects *all* debug info reachable from a
   function, its instructions, and its owning compile unit.
2. Then "compile units, types, other subprograms, and lexical blocks of
   other subprograms" are saved in a set.
3. Then when we MapMetadata, we traverse the function's debug info
   _again_ and those nodes that are in the set from p.2 are identity
   mapped.

This looks equivalent to just doing step 3 with identity mapping based
on a predicate that says to identity map "compile units, types, other
subprograms, and lexical blocks of other subprograms" (same as in step
2). This is what the new code does.

Test Plan:
ninja check-all
There's a bunch of tests around cloning and all of them pass.

stack-info: PR: https://github.com/llvm/llvm-project/pull/129148, branch: 
users/artempyanykh/fast-coro-upstream-part2-take2/6
---
 llvm/lib/Transforms/Utils/CloneFunction.cpp | 32 -
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp 
b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 502c4898c5940..8080dca09be00 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -50,6 +50,30 @@ void collectDebugInfoFromInstructions(const Function &F,
   DIFinder.processInstruction(*M, I);
   }
 }
+
+// Create a predicate that matches the metadata that should be identity mapped
+// during function cloning.
+MetadataPredicate createIdentityMDPredicate(const Function &F,
+CloneFunctionChangeType Changes) {
+  if (Changes >= CloneFunctionChangeType::DifferentModule)
+return [](const Metadata *MD) { return false; };
+
+  DISubprogram *SPClonedWithinModule = F.getSubprogram();
+  return [=](const Metadata *MD) {
+// Avoid cloning types, compile units, and (other) subprograms.
+if (isa(MD) || isa(MD))
+  return true;
+
+if (auto *SP = dyn_cast(MD); SP)
+  return SP != SPClonedWithinModule;
+
+// If a subprogram isn't going to be cloned skip its lexical blocks as 
well.
+if (auto *LScope = dyn_cast(MD); LScope)
+  return LScope->getSubprogram() != SPClonedWithinModule;
+
+return false;
+  };
+}
 } // namespace
 
 /// See comments in Cloning.h.
@@ -325,13 +349,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const 
Function *OldFunc,
 }
   }
 
-  DISubprogram *SPClonedWithinModule =
-  CollectDebugInfoForCloning(*OldFunc, Changes, DIFinder);
-
-  MetadataPredicate IdentityMD =
-  [MDSet =
-   FindDebugInfoToIdentityMap(Changes, DIFinder, 
SPClonedWithinModule)](
-  const Metadata *MD) { return MDSet.contains(MD); };
+  MetadataPredicate IdentityMD = createIdentityMDPredicate(*OldFunc, Changes);
 
   // Cloning is always a Module level operation, since Metadata needs to be
   // cloned.

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Coro] Remove now unused CommonDebugInfo in CoroSplit (PR #129150)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/129150

>From d16ea4bc22581510a080eea9f17bac5beb81706f Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Tue, 25 Feb 2025 12:47:10 -0800
Subject: [PATCH] [NFC][Coro] Remove now unused CommonDebugInfo in CoroSplit

Summary:
This cleans up the now unnecessary debug info collection in CoroSplit.

This makes CoroSplit pass almost as fast with -g2 as it is with -g1 on
the sample cpp file used with other parts of this stack:

| | Baseline | IdentityMD set | Prebuilt CommonDI | 
MetadataPred (cur) |
|-|--||---||
| CoroSplitPass   | 306ms| 221ms  | 68ms  | 3.8ms   
   |
| CoroCloner  | 101ms| 72ms   | 0.5ms | 0.5ms   
   |
| CollectCommonDI | -| -  | 63ms  | -   
   |
| Speed up| 1x   | 1.4x   | 4.5x  | 80x 
   |

Test Plan:
ninja check-all

stack-info: PR: https://github.com/llvm/llvm-project/pull/129150, branch: 
users/artempyanykh/fast-coro-upstream-part2-take2/8
---
 llvm/lib/Transforms/Coroutines/CoroCloner.h  | 31 ++--
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 37 +++-
 2 files changed, 16 insertions(+), 52 deletions(-)

diff --git a/llvm/lib/Transforms/Coroutines/CoroCloner.h 
b/llvm/lib/Transforms/Coroutines/CoroCloner.h
index b817e55cad9fc..d1887980fb3bc 100644
--- a/llvm/lib/Transforms/Coroutines/CoroCloner.h
+++ b/llvm/lib/Transforms/Coroutines/CoroCloner.h
@@ -48,9 +48,6 @@ class BaseCloner {
   CloneKind FKind;
   IRBuilder<> Builder;
   TargetTransformInfo &TTI;
-  // Common module-level metadata that's shared between all coroutine clones 
and
-  // doesn't need to be cloned itself.
-  const MetadataSetTy &CommonDebugInfo;
 
   ValueToValueMapTy VMap;
   Function *NewF = nullptr;
@@ -63,12 +60,12 @@ class BaseCloner {
   /// Create a cloner for a continuation lowering.
   BaseCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
  Function *NewF, AnyCoroSuspendInst *ActiveSuspend,
- TargetTransformInfo &TTI, const MetadataSetTy &CommonDebugInfo)
+ TargetTransformInfo &TTI)
   : OrigF(OrigF), Suffix(Suffix), Shape(Shape),
 FKind(Shape.ABI == ABI::Async ? CloneKind::Async
   : CloneKind::Continuation),
-Builder(OrigF.getContext()), TTI(TTI), 
CommonDebugInfo(CommonDebugInfo),
-NewF(NewF), ActiveSuspend(ActiveSuspend) {
+Builder(OrigF.getContext()), TTI(TTI), NewF(NewF),
+ActiveSuspend(ActiveSuspend) {
 assert(Shape.ABI == ABI::Retcon || Shape.ABI == ABI::RetconOnce ||
Shape.ABI == ABI::Async);
 assert(NewF && "need existing function for continuation");
@@ -77,11 +74,9 @@ class BaseCloner {
 
 public:
   BaseCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
- CloneKind FKind, TargetTransformInfo &TTI,
- const MetadataSetTy &CommonDebugInfo)
+ CloneKind FKind, TargetTransformInfo &TTI)
   : OrigF(OrigF), Suffix(Suffix), Shape(Shape), FKind(FKind),
-Builder(OrigF.getContext()), TTI(TTI),
-CommonDebugInfo(CommonDebugInfo) {}
+Builder(OrigF.getContext()), TTI(TTI) {}
 
   virtual ~BaseCloner() {}
 
@@ -89,14 +84,12 @@ class BaseCloner {
   static Function *createClone(Function &OrigF, const Twine &Suffix,
coro::Shape &Shape, Function *NewF,
AnyCoroSuspendInst *ActiveSuspend,
-   TargetTransformInfo &TTI,
-   const MetadataSetTy &CommonDebugInfo) {
+   TargetTransformInfo &TTI) {
 assert(Shape.ABI == ABI::Retcon || Shape.ABI == ABI::RetconOnce ||
Shape.ABI == ABI::Async);
 TimeTraceScope FunctionScope("BaseCloner");
 
-BaseCloner Cloner(OrigF, Suffix, Shape, NewF, ActiveSuspend, TTI,
-  CommonDebugInfo);
+BaseCloner Cloner(OrigF, Suffix, Shape, NewF, ActiveSuspend, TTI);
 Cloner.create();
 return Cloner.getFunction();
   }
@@ -136,9 +129,8 @@ class SwitchCloner : public BaseCloner {
 protected:
   /// Create a cloner for a switch lowering.
   SwitchCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
-   CloneKind FKind, TargetTransformInfo &TTI,
-   const MetadataSetTy &CommonDebugInfo)
-  : BaseCloner(OrigF, Suffix, Shape, FKind, TTI, CommonDebugInfo) {}
+   CloneKind FKind, TargetTransformInfo &TTI)
+  : BaseCloner(OrigF, Suffix, Shape, FKind, TTI) {}
 
   void create() override;
 
@@ -146,12 +138,11 @@ class SwitchCloner : public BaseCloner {
   /// Create a clone for a switch lowering.
   static Function *createClone(Function &OrigF, const Twine &Suffix,

[llvm-branch-commits] [llvm] [NFC][Cloning] Replace IdentityMD set with a predicate in ValueMapper (PR #129147)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/129147

>From ca21a682f1ab2bad9d1e76e4811ccb0aca2b9cf2 Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Tue, 25 Feb 2025 10:53:57 -0800
Subject: [PATCH] [NFC][Cloning] Replace IdentityMD set with a predicate in
 ValueMapper

Summary:
We used the set only to check if it contains certain metadata nodes.
Replacing the set with a predicate makes the intention clearer and the
API more general.

Test Plan:
ninja check-all

stack-info: PR: https://github.com/llvm/llvm-project/pull/129147, branch: 
users/artempyanykh/fast-coro-upstream-part2-take2/5
---
 llvm/include/llvm/Transforms/Utils/Cloning.h  |  4 +--
 .../llvm/Transforms/Utils/ValueMapper.h   | 27 ++-
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp  |  7 +++--
 llvm/lib/Transforms/Utils/CloneFunction.cpp   | 10 ---
 llvm/lib/Transforms/Utils/ValueMapper.cpp | 15 +--
 5 files changed, 34 insertions(+), 29 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h 
b/llvm/include/llvm/Transforms/Utils/Cloning.h
index d36f91416db88..2252dda0b9aad 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -194,7 +194,7 @@ void CloneFunctionMetadataInto(Function &NewFunc, const 
Function &OldFunc,
ValueToValueMapTy &VMap, RemapFlags RemapFlag,
ValueMapTypeRemapper *TypeMapper = nullptr,
ValueMaterializer *Materializer = nullptr,
-   const MetadataSetTy *IdentityMD = nullptr);
+   const MetadataPredicate *IdentityMD = nullptr);
 
 /// Clone OldFunc's body into NewFunc.
 void CloneFunctionBodyInto(Function &NewFunc, const Function &OldFunc,
@@ -204,7 +204,7 @@ void CloneFunctionBodyInto(Function &NewFunc, const 
Function &OldFunc,
ClonedCodeInfo *CodeInfo = nullptr,
ValueMapTypeRemapper *TypeMapper = nullptr,
ValueMaterializer *Materializer = nullptr,
-   const MetadataSetTy *IdentityMD = nullptr);
+   const MetadataPredicate *IdentityMD = nullptr);
 
 void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
const Instruction *StartingInst,
diff --git a/llvm/include/llvm/Transforms/Utils/ValueMapper.h 
b/llvm/include/llvm/Transforms/Utils/ValueMapper.h
index 852d7095d1133..560df1d3f7f29 100644
--- a/llvm/include/llvm/Transforms/Utils/ValueMapper.h
+++ b/llvm/include/llvm/Transforms/Utils/ValueMapper.h
@@ -37,6 +37,7 @@ class Value;
 using ValueToValueMapTy = ValueMap;
 using DbgRecordIterator = simple_ilist::iterator;
 using MetadataSetTy = SmallPtrSet;
+using MetadataPredicate = std::function;
 
 /// This is a class that can be implemented by clients to remap types when
 /// cloning constants and instructions.
@@ -138,8 +139,8 @@ inline RemapFlags operator|(RemapFlags LHS, RemapFlags RHS) 
{
 /// alternate \a ValueToValueMapTy and \a ValueMaterializer and returns a ID to
 /// pass into the schedule*() functions.
 ///
-/// If an \a IdentityMD set is optionally provided, \a Metadata inside this set
-/// will be mapped onto itself in \a VM on first use.
+/// If an \a IdentityMD predicate is optionally provided, \a Metadata for which
+/// the predicate returns true will be mapped onto itself in \a VM on first 
use.
 ///
 /// TODO: lib/Linker really doesn't need the \a ValueHandle in the \a
 /// ValueToValueMapTy.  We should template \a ValueMapper (and its
@@ -158,7 +159,7 @@ class ValueMapper {
   ValueMapper(ValueToValueMapTy &VM, RemapFlags Flags = RF_None,
   ValueMapTypeRemapper *TypeMapper = nullptr,
   ValueMaterializer *Materializer = nullptr,
-  const MetadataSetTy *IdentityMD = nullptr);
+  const MetadataPredicate *IdentityMD = nullptr);
   ValueMapper(ValueMapper &&) = delete;
   ValueMapper(const ValueMapper &) = delete;
   ValueMapper &operator=(ValueMapper &&) = delete;
@@ -225,7 +226,7 @@ inline Value *MapValue(const Value *V, ValueToValueMapTy 
&VM,
RemapFlags Flags = RF_None,
ValueMapTypeRemapper *TypeMapper = nullptr,
ValueMaterializer *Materializer = nullptr,
-   const MetadataSetTy *IdentityMD = nullptr) {
+   const MetadataPredicate *IdentityMD = nullptr) {
   return ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
   .mapValue(*V);
 }
@@ -239,8 +240,8 @@ inline Value *MapValue(const Value *V, ValueToValueMapTy 
&VM,
 /// \c MD.
 ///  3. Else if \c MD is a \a ConstantAsMetadata, call \a MapValue() and
 /// re-wrap its return (returning nullptr on nullptr).
-///  4. Else if \c MD is in \c IdentityMD then add an identity mapping for it
-/// and return 

[llvm-branch-commits] [llvm] [NFC][Cloning] Remove now unused CollectDebugInfoForCloning (PR #129152)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/129152

>From 395b2e45dc02fee9bf5979201cc8a46224200660 Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Tue, 25 Feb 2025 13:02:37 -0800
Subject: [PATCH] [NFC][Cloning] Remove now unused CollectDebugInfoForCloning

Summary:
This function is no longer used, let's remove it from the header and
impl.

Test Plan:
ninja check-llvm-unit

stack-info: PR: https://github.com/llvm/llvm-project/pull/129152, branch: 
users/artempyanykh/fast-coro-upstream-part2-take2/10
---
 llvm/include/llvm/Transforms/Utils/Cloning.h | 14 -
 llvm/lib/Transforms/Utils/CloneFunction.cpp  | 21 
 2 files changed, 35 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h 
b/llvm/include/llvm/Transforms/Utils/Cloning.h
index ae00c16e7eada..ec1a1d5faa7e9 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -230,20 +230,6 @@ void CloneAndPruneFunctionInto(Function *NewFunc, const 
Function *OldFunc,
const char *NameSuffix = "",
ClonedCodeInfo *CodeInfo = nullptr);
 
-/// Collect debug information such as types, compile units, and other
-/// subprograms that are reachable from \p F and can be considered global for
-/// the purposes of cloning (and hence not needing to be cloned).
-///
-/// What debug information should be processed depends on \p Changes: when
-/// cloning into the same module we process \p F's subprogram and instructions;
-/// when into a cloned module, neither of those.
-///
-/// Returns DISubprogram of the cloned function when cloning into the same
-/// module or nullptr otherwise.
-DISubprogram *CollectDebugInfoForCloning(const Function &F,
- CloneFunctionChangeType Changes,
- DebugInfoFinder &DIFinder);
-
 /// This class captures the data input to the InlineFunction call, and records
 /// the auxiliary results produced by it.
 class InlineFunctionInfo {
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp 
b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 11033aeec7dda..f32d9454eb076 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -168,27 +168,6 @@ void llvm::CloneFunctionAttributesInto(Function *NewFunc,
  OldAttrs.getRetAttrs(), NewArgAttrs));
 }
 
-DISubprogram *llvm::CollectDebugInfoForCloning(const Function &F,
-   CloneFunctionChangeType Changes,
-   DebugInfoFinder &DIFinder) {
-  // CloneModule takes care of cloning debug info for ClonedModule. Cloning 
into
-  // DifferentModule is taken care of separately in ClonedFunctionInto as part
-  // of llvm.dbg.cu update.
-  if (Changes >= CloneFunctionChangeType::DifferentModule)
-return nullptr;
-
-  DISubprogram *SPClonedWithinModule = nullptr;
-  if (Changes < CloneFunctionChangeType::DifferentModule) {
-SPClonedWithinModule = F.getSubprogram();
-  }
-  if (SPClonedWithinModule)
-DIFinder.processSubprogram(SPClonedWithinModule);
-
-  collectDebugInfoFromInstructions(F, DIFinder);
-
-  return SPClonedWithinModule;
-}
-
 void llvm::CloneFunctionMetadataInto(Function &NewFunc, const Function 
&OldFunc,
  ValueToValueMapTy &VMap,
  RemapFlags RemapFlag,

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Clean up comments in CloneFunctionInto (PR #129153)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/129153

>From f711cec94afd13bf77c20add5e176dc03d3123f8 Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Tue, 25 Feb 2025 13:07:40 -0800
Subject: [PATCH] [NFC][Cloning] Clean up comments in CloneFunctionInto

Summary:
Some comments no longer make sense nor refer to an existing code path.

Test Plan:
ninja check-llvm-unit

stack-info: PR: https://github.com/llvm/llvm-project/pull/129153, branch: 
users/artempyanykh/fast-coro-upstream-part2-take2/11
---
 llvm/lib/Transforms/Utils/CloneFunction.cpp | 11 ---
 1 file changed, 11 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp 
b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index f32d9454eb076..979cbad0d82c0 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -266,24 +266,13 @@ void llvm::CloneFunctionInto(Function *NewFunc, const 
Function *OldFunc,
   if (OldFunc->isDeclaration())
 return;
 
-  // When we remap instructions within the same module, we want to avoid
-  // duplicating inlined DISubprograms, so record all subprograms we find as we
-  // duplicate instructions and then freeze them in the MD map. We also record
-  // information about dbg.value and dbg.declare to avoid duplicating the
-  // types.
   DebugInfoFinder DIFinder;
 
-  // Track the subprogram attachment that needs to be cloned to fine-tune the
-  // mapping within the same module.
   if (Changes < CloneFunctionChangeType::DifferentModule) {
-// Need to find subprograms, types, and compile units.
-
 assert((NewFunc->getParent() == nullptr ||
 NewFunc->getParent() == OldFunc->getParent()) &&
"Expected NewFunc to have the same parent, or no parent");
   } else {
-// Need to find all the compile units.
-
 assert((NewFunc->getParent() == nullptr ||
 NewFunc->getParent() != OldFunc->getParent()) &&
"Expected NewFunc to have different parents, or no parent");

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Make DifferentModule case more obvious in CollectDebugInfoForCloning (PR #129146)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/129146

>From 7344ea1c8c0464652ab7dc6be8ff9921aab72736 Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Tue, 25 Feb 2025 10:20:06 -0800
Subject: [PATCH] [NFC][Cloning] Make DifferentModule case more obvious in
 CollectDebugInfoForCloning

Summary:
This should be behaviorally equivalent. DIFinder is only used when
cloning into a DifferentModule as part of llvm.dbg.cu update in
CloneFunctionInto.

Test Plan:
ninja check-llvm-unit check-llvm

stack-info: PR: https://github.com/llvm/llvm-project/pull/129146, branch: 
users/artempyanykh/fast-coro-upstream-part2-take2/4
---
 llvm/lib/Transforms/Utils/CloneFunction.cpp | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp 
b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index e03c5c27b5ac1..dd1b4fe718053 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -147,8 +147,10 @@ void llvm::CloneFunctionAttributesInto(Function *NewFunc,
 DISubprogram *llvm::CollectDebugInfoForCloning(const Function &F,
CloneFunctionChangeType Changes,
DebugInfoFinder &DIFinder) {
-  // CloneModule takes care of cloning debug info.
-  if (Changes == CloneFunctionChangeType::ClonedModule)
+  // CloneModule takes care of cloning debug info for ClonedModule. Cloning 
into
+  // DifferentModule is taken care of separately in ClonedFunctionInto as part
+  // of llvm.dbg.cu update.
+  if (Changes >= CloneFunctionChangeType::DifferentModule)
 return nullptr;
 
   DISubprogram *SPClonedWithinModule = nullptr;
@@ -362,6 +364,10 @@ void llvm::CloneFunctionInto(Function *NewFunc, const 
Function *OldFunc,
   SmallPtrSet Visited;
   for (auto *Operand : NMD->operands())
 Visited.insert(Operand);
+
+  // Collect and clone all the compile units referenced from the instructions 
in
+  // the function (e.g. as a scope).
+  collectDebugInfoFromInstructions(*OldFunc, DIFinder);
   for (auto *Unit : DIFinder.compile_units()) {
 MDNode *MappedUnit =
 MapMetadata(Unit, VMap, RF_None, TypeMapper, Materializer);

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Add a helper to collect debug info from instructions (PR #129145)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/129145

>From 181b126bed444ea0a24799f32be75fc2f62e260c Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Tue, 25 Feb 2025 10:03:41 -0800
Subject: [PATCH] [NFC][Cloning] Add a helper to collect debug info from
 instructions

Summary:
Just moving around. This helper will be used for further refactoring.

Test Plan:
ninja check-llvm-unit check-llvm

stack-info: PR: https://github.com/llvm/llvm-project/pull/129145, branch: 
users/artempyanykh/fast-coro-upstream-part2-take2/3
---
 llvm/lib/Transforms/Utils/CloneFunction.cpp | 19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp 
b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 7a309f7390c77..e03c5c27b5ac1 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -40,6 +40,18 @@ using namespace llvm;
 
 #define DEBUG_TYPE "clone-function"
 
+namespace {
+void collectDebugInfoFromInstructions(const Function &F,
+  DebugInfoFinder &DIFinder) {
+  const Module *M = F.getParent();
+  if (M) {
+// Inspect instructions to process e.g. DILexicalBlocks of inlined 
functions
+for (const auto &I : instructions(F))
+  DIFinder.processInstruction(*M, I);
+  }
+}
+} // namespace
+
 /// See comments in Cloning.h.
 BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy 
&VMap,
   const Twine &NameSuffix, Function *F,
@@ -146,12 +158,7 @@ DISubprogram *llvm::CollectDebugInfoForCloning(const 
Function &F,
   if (SPClonedWithinModule)
 DIFinder.processSubprogram(SPClonedWithinModule);
 
-  const Module *M = F.getParent();
-  if (M) {
-// Inspect instructions to process e.g. DILexicalBlocks of inlined 
functions
-for (const auto &I : instructions(F))
-  DIFinder.processInstruction(*M, I);
-  }
+  collectDebugInfoFromInstructions(F, DIFinder);
 
   return SPClonedWithinModule;
 }

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang-tools-extra] [clang-tidy] support pointee mutation check in misc-const-correctness (PR #130494)

2025-03-09 Thread Congcong Cai via llvm-branch-commits

HerrCai0907 wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/130494?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#130494** https://app.graphite.dev/github/pr/llvm/llvm-project/130494?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/130494?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#130493** https://app.graphite.dev/github/pr/llvm/llvm-project/130493?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/130494
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang-tools-extra] [clang-tidy] support pointee mutation check in misc-const-correctness (PR #130494)

2025-03-09 Thread Congcong Cai via llvm-branch-commits

https://github.com/HerrCai0907 ready_for_review 
https://github.com/llvm/llvm-project/pull/130494
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang-tools-extra] [clang-tidy] support pointee mutation check in misc-const-correctness (PR #130494)

2025-03-09 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang-tools-extra

Author: Congcong Cai (HerrCai0907)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/130494.diff


9 Files Affected:

- (modified) clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.cpp 
(+101-55) 
- (modified) clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.h (+3) 
- (modified) clang-tools-extra/docs/ReleaseNotes.rst (+2-1) 
- (modified) 
clang-tools-extra/docs/clang-tidy/checks/misc/const-correctness.rst (+44) 
- (added) 
clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-pointer-as-pointers.cpp
 (+50) 
- (modified) 
clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-transform-values.cpp
 (+1) 
- (modified) 
clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-values-before-cxx23.cpp
 (+1) 
- (modified) 
clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-values.cpp 
(+1) 
- (modified) 
clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-wrong-config.cpp
 (+4-3) 


``diff
diff --git a/clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.cpp 
b/clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.cpp
index dbe59233df699..023c834d5700f 100644
--- a/clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.cpp
+++ b/clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.cpp
@@ -13,6 +13,8 @@
 #include "clang/AST/ASTContext.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/ASTMatchers/ASTMatchers.h"
+#include "llvm/Support/Casting.h"
+#include 
 
 using namespace clang::ast_matchers;
 
@@ -39,34 +41,47 @@ ConstCorrectnessCheck::ConstCorrectnessCheck(StringRef Name,
 : ClangTidyCheck(Name, Context),
   AnalyzeValues(Options.get("AnalyzeValues", true)),
   AnalyzeReferences(Options.get("AnalyzeReferences", true)),
+  AnalyzePointers(Options.get("AnalyzePointers", true)),
   WarnPointersAsValues(Options.get("WarnPointersAsValues", false)),
+  WarnPointersAsPointers(Options.get("WarnPointersAsPointers", true)),
   TransformValues(Options.get("TransformValues", true)),
   TransformReferences(Options.get("TransformReferences", true)),
   TransformPointersAsValues(
   Options.get("TransformPointersAsValues", false)),
+  TransformPointersAsPointers(
+  Options.get("TransformPointersAsPointers", true)),
   AllowedTypes(
   utils::options::parseStringList(Options.get("AllowedTypes", ""))) {
-  if (AnalyzeValues == false && AnalyzeReferences == false)
+  if (AnalyzeValues == false && AnalyzeReferences == false &&
+  AnalyzePointers == false)
 this->configurationDiag(
 "The check 'misc-const-correctness' will not "
-"perform any analysis because both 'AnalyzeValues' and "
-"'AnalyzeReferences' are false.");
+"perform any analysis because both 'AnalyzeValues', "
+"'AnalyzeReferences' and 'AnalyzePointers' are false.");
 }
 
 void ConstCorrectnessCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
   Options.store(Opts, "AnalyzeValues", AnalyzeValues);
   Options.store(Opts, "AnalyzeReferences", AnalyzeReferences);
+  Options.store(Opts, "AnalyzePointers", AnalyzePointers);
   Options.store(Opts, "WarnPointersAsValues", WarnPointersAsValues);
+  Options.store(Opts, "WarnPointersAsPointers", WarnPointersAsPointers);
 
   Options.store(Opts, "TransformValues", TransformValues);
   Options.store(Opts, "TransformReferences", TransformReferences);
   Options.store(Opts, "TransformPointersAsValues", TransformPointersAsValues);
+  Options.store(Opts, "TransformPointersAsPointers",
+TransformPointersAsPointers);
   Options.store(Opts, "AllowedTypes",
 utils::options::serializeStringList(AllowedTypes));
 }
 
 void ConstCorrectnessCheck::registerMatchers(MatchFinder *Finder) {
-  const auto ConstType = hasType(isConstQualified());
+  const auto ConstType = hasType(
+  qualType(isConstQualified(),
+   // pointee check will check the const pointer and const array
+   unless(pointerType()), unless(arrayType(;
+
   const auto ConstReference = hasType(references(isConstQualified()));
   const auto RValueReference = hasType(
   referenceType(anyOf(rValueReferenceType(), 
unless(isSpelledAsLValue();
@@ -124,6 +139,11 @@ void ConstCorrectnessCheck::check(const 
MatchFinder::MatchResult &Result) {
   const auto *LocalScope = Result.Nodes.getNodeAs("scope");
   const auto *Variable = Result.Nodes.getNodeAs("local-value");
   const auto *Function = Result.Nodes.getNodeAs("function-decl");
+  const auto *VarDeclStmt = Result.Nodes.getNodeAs("decl-stmt");
+  // It can not be guaranteed that the variable is declared isolated,
+  // therefore a transformation might effect the other variables as well and
+  // be incorrect.
+  const bool CanBeFixIt = VarDeclStmt != nullptr && 
VarDeclStmt->isSingleDecl();
 
   /// If the variable was declared in a template it might be

[llvm-branch-commits] [clang-tools-extra] [clang-tidy] support pointee mutation check in misc-const-correctness (PR #130494)

2025-03-09 Thread Congcong Cai via llvm-branch-commits

https://github.com/HerrCai0907 created 
https://github.com/llvm/llvm-project/pull/130494

None

>From 4c1a7915fca1933773d3e9f3cf0b8c07916f1dcf Mon Sep 17 00:00:00 2001
From: Congcong Cai 
Date: Sun, 9 Mar 2025 15:43:37 +
Subject: [PATCH] [clang-tidy] support pointee mutation check in
 misc-const-correctness

---
 .../clang-tidy/misc/ConstCorrectnessCheck.cpp | 156 --
 .../clang-tidy/misc/ConstCorrectnessCheck.h   |   3 +
 clang-tools-extra/docs/ReleaseNotes.rst   |   3 +-
 .../checks/misc/const-correctness.rst |  44 +
 .../const-correctness-pointer-as-pointers.cpp |  50 ++
 .../const-correctness-transform-values.cpp|   1 +
 .../const-correctness-values-before-cxx23.cpp |   1 +
 .../misc/const-correctness-values.cpp |   1 +
 .../misc/const-correctness-wrong-config.cpp   |   7 +-
 9 files changed, 207 insertions(+), 59 deletions(-)
 create mode 100644 
clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-pointer-as-pointers.cpp

diff --git a/clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.cpp 
b/clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.cpp
index dbe59233df699..023c834d5700f 100644
--- a/clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.cpp
+++ b/clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.cpp
@@ -13,6 +13,8 @@
 #include "clang/AST/ASTContext.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/ASTMatchers/ASTMatchers.h"
+#include "llvm/Support/Casting.h"
+#include 
 
 using namespace clang::ast_matchers;
 
@@ -39,34 +41,47 @@ ConstCorrectnessCheck::ConstCorrectnessCheck(StringRef Name,
 : ClangTidyCheck(Name, Context),
   AnalyzeValues(Options.get("AnalyzeValues", true)),
   AnalyzeReferences(Options.get("AnalyzeReferences", true)),
+  AnalyzePointers(Options.get("AnalyzePointers", true)),
   WarnPointersAsValues(Options.get("WarnPointersAsValues", false)),
+  WarnPointersAsPointers(Options.get("WarnPointersAsPointers", true)),
   TransformValues(Options.get("TransformValues", true)),
   TransformReferences(Options.get("TransformReferences", true)),
   TransformPointersAsValues(
   Options.get("TransformPointersAsValues", false)),
+  TransformPointersAsPointers(
+  Options.get("TransformPointersAsPointers", true)),
   AllowedTypes(
   utils::options::parseStringList(Options.get("AllowedTypes", ""))) {
-  if (AnalyzeValues == false && AnalyzeReferences == false)
+  if (AnalyzeValues == false && AnalyzeReferences == false &&
+  AnalyzePointers == false)
 this->configurationDiag(
 "The check 'misc-const-correctness' will not "
-"perform any analysis because both 'AnalyzeValues' and "
-"'AnalyzeReferences' are false.");
+"perform any analysis because both 'AnalyzeValues', "
+"'AnalyzeReferences' and 'AnalyzePointers' are false.");
 }
 
 void ConstCorrectnessCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
   Options.store(Opts, "AnalyzeValues", AnalyzeValues);
   Options.store(Opts, "AnalyzeReferences", AnalyzeReferences);
+  Options.store(Opts, "AnalyzePointers", AnalyzePointers);
   Options.store(Opts, "WarnPointersAsValues", WarnPointersAsValues);
+  Options.store(Opts, "WarnPointersAsPointers", WarnPointersAsPointers);
 
   Options.store(Opts, "TransformValues", TransformValues);
   Options.store(Opts, "TransformReferences", TransformReferences);
   Options.store(Opts, "TransformPointersAsValues", TransformPointersAsValues);
+  Options.store(Opts, "TransformPointersAsPointers",
+TransformPointersAsPointers);
   Options.store(Opts, "AllowedTypes",
 utils::options::serializeStringList(AllowedTypes));
 }
 
 void ConstCorrectnessCheck::registerMatchers(MatchFinder *Finder) {
-  const auto ConstType = hasType(isConstQualified());
+  const auto ConstType = hasType(
+  qualType(isConstQualified(),
+   // pointee check will check the const pointer and const array
+   unless(pointerType()), unless(arrayType(;
+
   const auto ConstReference = hasType(references(isConstQualified()));
   const auto RValueReference = hasType(
   referenceType(anyOf(rValueReferenceType(), 
unless(isSpelledAsLValue();
@@ -124,6 +139,11 @@ void ConstCorrectnessCheck::check(const 
MatchFinder::MatchResult &Result) {
   const auto *LocalScope = Result.Nodes.getNodeAs("scope");
   const auto *Variable = Result.Nodes.getNodeAs("local-value");
   const auto *Function = Result.Nodes.getNodeAs("function-decl");
+  const auto *VarDeclStmt = Result.Nodes.getNodeAs("decl-stmt");
+  // It can not be guaranteed that the variable is declared isolated,
+  // therefore a transformation might effect the other variables as well and
+  // be incorrect.
+  const bool CanBeFixIt = VarDeclStmt != nullptr && 
VarDeclStmt->isSingleDecl();
 
   /// If the variable was declared in a template it might be analyzed multiple
   /// times. Only one of those i

[llvm-branch-commits] [clang-tools-extra] [clang-tidy] support pointee mutation check in misc-const-correctness (PR #130494)

2025-03-09 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang-tidy

Author: Congcong Cai (HerrCai0907)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/130494.diff


9 Files Affected:

- (modified) clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.cpp 
(+101-55) 
- (modified) clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.h (+3) 
- (modified) clang-tools-extra/docs/ReleaseNotes.rst (+2-1) 
- (modified) 
clang-tools-extra/docs/clang-tidy/checks/misc/const-correctness.rst (+44) 
- (added) 
clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-pointer-as-pointers.cpp
 (+50) 
- (modified) 
clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-transform-values.cpp
 (+1) 
- (modified) 
clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-values-before-cxx23.cpp
 (+1) 
- (modified) 
clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-values.cpp 
(+1) 
- (modified) 
clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-wrong-config.cpp
 (+4-3) 


``diff
diff --git a/clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.cpp 
b/clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.cpp
index dbe59233df699..023c834d5700f 100644
--- a/clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.cpp
+++ b/clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.cpp
@@ -13,6 +13,8 @@
 #include "clang/AST/ASTContext.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/ASTMatchers/ASTMatchers.h"
+#include "llvm/Support/Casting.h"
+#include 
 
 using namespace clang::ast_matchers;
 
@@ -39,34 +41,47 @@ ConstCorrectnessCheck::ConstCorrectnessCheck(StringRef Name,
 : ClangTidyCheck(Name, Context),
   AnalyzeValues(Options.get("AnalyzeValues", true)),
   AnalyzeReferences(Options.get("AnalyzeReferences", true)),
+  AnalyzePointers(Options.get("AnalyzePointers", true)),
   WarnPointersAsValues(Options.get("WarnPointersAsValues", false)),
+  WarnPointersAsPointers(Options.get("WarnPointersAsPointers", true)),
   TransformValues(Options.get("TransformValues", true)),
   TransformReferences(Options.get("TransformReferences", true)),
   TransformPointersAsValues(
   Options.get("TransformPointersAsValues", false)),
+  TransformPointersAsPointers(
+  Options.get("TransformPointersAsPointers", true)),
   AllowedTypes(
   utils::options::parseStringList(Options.get("AllowedTypes", ""))) {
-  if (AnalyzeValues == false && AnalyzeReferences == false)
+  if (AnalyzeValues == false && AnalyzeReferences == false &&
+  AnalyzePointers == false)
 this->configurationDiag(
 "The check 'misc-const-correctness' will not "
-"perform any analysis because both 'AnalyzeValues' and "
-"'AnalyzeReferences' are false.");
+"perform any analysis because both 'AnalyzeValues', "
+"'AnalyzeReferences' and 'AnalyzePointers' are false.");
 }
 
 void ConstCorrectnessCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
   Options.store(Opts, "AnalyzeValues", AnalyzeValues);
   Options.store(Opts, "AnalyzeReferences", AnalyzeReferences);
+  Options.store(Opts, "AnalyzePointers", AnalyzePointers);
   Options.store(Opts, "WarnPointersAsValues", WarnPointersAsValues);
+  Options.store(Opts, "WarnPointersAsPointers", WarnPointersAsPointers);
 
   Options.store(Opts, "TransformValues", TransformValues);
   Options.store(Opts, "TransformReferences", TransformReferences);
   Options.store(Opts, "TransformPointersAsValues", TransformPointersAsValues);
+  Options.store(Opts, "TransformPointersAsPointers",
+TransformPointersAsPointers);
   Options.store(Opts, "AllowedTypes",
 utils::options::serializeStringList(AllowedTypes));
 }
 
 void ConstCorrectnessCheck::registerMatchers(MatchFinder *Finder) {
-  const auto ConstType = hasType(isConstQualified());
+  const auto ConstType = hasType(
+  qualType(isConstQualified(),
+   // pointee check will check the const pointer and const array
+   unless(pointerType()), unless(arrayType(;
+
   const auto ConstReference = hasType(references(isConstQualified()));
   const auto RValueReference = hasType(
   referenceType(anyOf(rValueReferenceType(), 
unless(isSpelledAsLValue();
@@ -124,6 +139,11 @@ void ConstCorrectnessCheck::check(const 
MatchFinder::MatchResult &Result) {
   const auto *LocalScope = Result.Nodes.getNodeAs("scope");
   const auto *Variable = Result.Nodes.getNodeAs("local-value");
   const auto *Function = Result.Nodes.getNodeAs("function-decl");
+  const auto *VarDeclStmt = Result.Nodes.getNodeAs("decl-stmt");
+  // It can not be guaranteed that the variable is declared isolated,
+  // therefore a transformation might effect the other variables as well and
+  // be incorrect.
+  const bool CanBeFixIt = VarDeclStmt != nullptr && 
VarDeclStmt->isSingleDecl();
 
   /// If the variable was declared in a template it might be analyz

[llvm-branch-commits] [clang-tools-extra] [clang-tidy] support pointee mutation check in misc-const-correctness (PR #130494)

2025-03-09 Thread Baranov Victor via llvm-branch-commits


@@ -39,34 +41,47 @@ ConstCorrectnessCheck::ConstCorrectnessCheck(StringRef Name,
 : ClangTidyCheck(Name, Context),
   AnalyzeValues(Options.get("AnalyzeValues", true)),
   AnalyzeReferences(Options.get("AnalyzeReferences", true)),
+  AnalyzePointers(Options.get("AnalyzePointers", true)),
   WarnPointersAsValues(Options.get("WarnPointersAsValues", false)),
+  WarnPointersAsPointers(Options.get("WarnPointersAsPointers", true)),
   TransformValues(Options.get("TransformValues", true)),
   TransformReferences(Options.get("TransformReferences", true)),
   TransformPointersAsValues(
   Options.get("TransformPointersAsValues", false)),
+  TransformPointersAsPointers(
+  Options.get("TransformPointersAsPointers", true)),
   AllowedTypes(
   utils::options::parseStringList(Options.get("AllowedTypes", ""))) {
-  if (AnalyzeValues == false && AnalyzeReferences == false)
+  if (AnalyzeValues == false && AnalyzeReferences == false &&
+  AnalyzePointers == false)
 this->configurationDiag(
 "The check 'misc-const-correctness' will not "
-"perform any analysis because both 'AnalyzeValues' and "
-"'AnalyzeReferences' are false.");
+"perform any analysis because both 'AnalyzeValues', "

vbvictor wrote:

```suggestion
"perform any analysis because 'AnalyzeValues', "
```
Since there are 3 items now

https://github.com/llvm/llvm-project/pull/130494
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang-tools-extra] [clang-tidy] support pointee mutation check in misc-const-correctness (PR #130494)

2025-03-09 Thread Baranov Victor via llvm-branch-commits

https://github.com/vbvictor edited 
https://github.com/llvm/llvm-project/pull/130494
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [libc++][CI] Update action runner base image. (PR #130433)

2025-03-09 Thread Nikolas Klauser via llvm-branch-commits

https://github.com/philnik777 approved this pull request.


https://github.com/llvm/llvm-project/pull/130433
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [ValueTracking] Bail out on x86_fp80 when computing fpclass with knownbits (#130477) (PR #130489)

2025-03-09 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/130489

Backport 029e10289a02b438f1a22f401c94ed60ab4bb704

Requested by: @dtcxzyw

>From 5229bbb52b2585be2c1a2e4bf9fcf2fa73ce3fea Mon Sep 17 00:00:00 2001
From: Yingwei Zheng 
Date: Sun, 9 Mar 2025 21:10:35 +0800
Subject: [PATCH] [ValueTracking] Bail out on x86_fp80 when computing fpclass
 with knownbits (#130477)

In https://github.com/llvm/llvm-project/pull/97762, we assume the
minimum possible value of X is NaN implies X is NaN. But it doesn't hold
for x86_fp80 format. If the knownbits of X are
`?'010'`,
the minimum possible value of X is NaN/unnormal. However, it can be a
normal value.

Closes https://github.com/llvm/llvm-project/issues/130408.

(cherry picked from commit 029e10289a02b438f1a22f401c94ed60ab4bb704)
---
 llvm/lib/Analysis/ValueTracking.cpp   |  3 ++-
 llvm/test/Transforms/InstSimplify/fcmp.ll | 17 +
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Analysis/ValueTracking.cpp 
b/llvm/lib/Analysis/ValueTracking.cpp
index 8a674914641a8..4e79f41df2eb9 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -6135,13 +6135,14 @@ void computeKnownFPClass(const Value *V, const APInt 
&DemandedElts,
 else if (Bits.isNegative())
   Known.signBitMustBeOne();
 
-if (Ty->isIEEE()) {
+if (Ty->isIEEELikeFPTy()) {
   // IEEE floats are NaN when all bits of the exponent plus at least one of
   // the fraction bits are 1. This means:
   //   - If we assume unknown bits are 0 and the value is NaN, it will
   // always be NaN
   //   - If we assume unknown bits are 1 and the value is not NaN, it can
   // never be NaN
+  // Note: They do not hold for x86_fp80 format.
   if (APFloat(Ty->getFltSemantics(), Bits.One).isNaN())
 Known.KnownFPClasses = fcNan;
   else if (!APFloat(Ty->getFltSemantics(), ~Bits.Zero).isNaN())
diff --git a/llvm/test/Transforms/InstSimplify/fcmp.ll 
b/llvm/test/Transforms/InstSimplify/fcmp.ll
index 64132f5fb7db7..0c2be5210a741 100644
--- a/llvm/test/Transforms/InstSimplify/fcmp.ll
+++ b/llvm/test/Transforms/InstSimplify/fcmp.ll
@@ -16,3 +16,20 @@ define i1 @poison2(float %x) {
   %v = fcmp ueq float %x, poison
   ret i1 %v
 }
+
+define i1 @pr130408(x86_fp80 %x) {
+; CHECK-LABEL: @pr130408(
+; CHECK-NEXT:[[BITS:%.*]] = bitcast x86_fp80 [[X:%.*]] to i80
+; CHECK-NEXT:[[MASKED:%.*]] = and i80 [[BITS]], -60463063240877801473
+; CHECK-NEXT:[[OR:%.*]] = or i80 [[MASKED]], 302194561415509874573312
+; CHECK-NEXT:[[FP:%.*]] = bitcast i80 [[OR]] to x86_fp80
+; CHECK-NEXT:[[RES:%.*]] = fcmp uno x86_fp80 [[FP]], 
0xK
+; CHECK-NEXT:ret i1 [[RES]]
+;
+  %bits = bitcast x86_fp80 %x to i80
+  %masked = and i80 %bits, -60463063240877801473
+  %or = or i80 %masked, 302194561415509874573312
+  %fp = bitcast i80 %or to x86_fp80
+  %res = fcmp uno x86_fp80 %fp, 0xK
+  ret i1 %res
+}

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [ValueTracking] Bail out on x86_fp80 when computing fpclass with knownbits (#130477) (PR #130489)

2025-03-09 Thread via llvm-branch-commits

llvmbot wrote:

@arsenm What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/130489
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [ValueTracking] Bail out on x86_fp80 when computing fpclass with knownbits (#130477) (PR #130489)

2025-03-09 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/130489
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [ValueTracking] Bail out on x86_fp80 when computing fpclass with knownbits (#130477) (PR #130489)

2025-03-09 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: None (llvmbot)


Changes

Backport 029e10289a02b438f1a22f401c94ed60ab4bb704

Requested by: @dtcxzyw

---
Full diff: https://github.com/llvm/llvm-project/pull/130489.diff


2 Files Affected:

- (modified) llvm/lib/Analysis/ValueTracking.cpp (+2-1) 
- (modified) llvm/test/Transforms/InstSimplify/fcmp.ll (+17) 


``diff
diff --git a/llvm/lib/Analysis/ValueTracking.cpp 
b/llvm/lib/Analysis/ValueTracking.cpp
index 8a674914641a8..4e79f41df2eb9 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -6135,13 +6135,14 @@ void computeKnownFPClass(const Value *V, const APInt 
&DemandedElts,
 else if (Bits.isNegative())
   Known.signBitMustBeOne();
 
-if (Ty->isIEEE()) {
+if (Ty->isIEEELikeFPTy()) {
   // IEEE floats are NaN when all bits of the exponent plus at least one of
   // the fraction bits are 1. This means:
   //   - If we assume unknown bits are 0 and the value is NaN, it will
   // always be NaN
   //   - If we assume unknown bits are 1 and the value is not NaN, it can
   // never be NaN
+  // Note: They do not hold for x86_fp80 format.
   if (APFloat(Ty->getFltSemantics(), Bits.One).isNaN())
 Known.KnownFPClasses = fcNan;
   else if (!APFloat(Ty->getFltSemantics(), ~Bits.Zero).isNaN())
diff --git a/llvm/test/Transforms/InstSimplify/fcmp.ll 
b/llvm/test/Transforms/InstSimplify/fcmp.ll
index 64132f5fb7db7..0c2be5210a741 100644
--- a/llvm/test/Transforms/InstSimplify/fcmp.ll
+++ b/llvm/test/Transforms/InstSimplify/fcmp.ll
@@ -16,3 +16,20 @@ define i1 @poison2(float %x) {
   %v = fcmp ueq float %x, poison
   ret i1 %v
 }
+
+define i1 @pr130408(x86_fp80 %x) {
+; CHECK-LABEL: @pr130408(
+; CHECK-NEXT:[[BITS:%.*]] = bitcast x86_fp80 [[X:%.*]] to i80
+; CHECK-NEXT:[[MASKED:%.*]] = and i80 [[BITS]], -60463063240877801473
+; CHECK-NEXT:[[OR:%.*]] = or i80 [[MASKED]], 302194561415509874573312
+; CHECK-NEXT:[[FP:%.*]] = bitcast i80 [[OR]] to x86_fp80
+; CHECK-NEXT:[[RES:%.*]] = fcmp uno x86_fp80 [[FP]], 
0xK
+; CHECK-NEXT:ret i1 [[RES]]
+;
+  %bits = bitcast x86_fp80 %x to i80
+  %masked = and i80 %bits, -60463063240877801473
+  %or = or i80 %masked, 302194561415509874573312
+  %fp = bitcast i80 %or to x86_fp80
+  %res = fcmp uno x86_fp80 %fp, 0xK
+  ret i1 %res
+}

``




https://github.com/llvm/llvm-project/pull/130489
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [ValueTracking] Bail out on x86_fp80 when computing fpclass with knownbits (#130477) (PR #130489)

2025-03-09 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm approved this pull request.


https://github.com/llvm/llvm-project/pull/130489
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang-tools-extra] [clang] improve class type sugar preservation in pointers to members (PR #130537)

2025-03-09 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Matheus Izvekov (mizvekov)


Changes

This changes the MemberPointerType representation to use a NestedNameSpecifier 
instead of a Type to represent the class.

Since the qualifiers are always parsed as nested names, there was an impedance 
mismatch when converting these back and forth into types, and this led to 
issues in preserving sugar.

The nested names are indeed a better match for these, as the differences which 
a QualType can represent cannot be expressed syntactically, and it also 
represents the use case more exactly, being either dependent or referring to a 
CXXRecord, unqualified.

This patch also makes the MemberPointerType able to represent sugar for a 
{up/downcast}cast conversion of the base class, although for now the underlying 
type is canonical, as preserving the sugar up to that point requires further 
work.

As usual, includes a few drive-by fixes in order to make use of the 
improvements, and removing some duplications, for example CheckBaseClassAccess 
is deduplicated from across SemaAccess and SemaCast.

---

Patch is 183.99 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/130537.diff


81 Files Affected:

- (modified) 
clang-tools-extra/clang-tidy/bugprone/ComparePointerToMemberVirtualFunctionCheck.cpp
 (+1-4) 
- (modified) clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp (+1-2) 
- (modified) clang-tools-extra/clang-tidy/utils/ExceptionAnalyzer.cpp (+5-3) 
- (modified) clang-tools-extra/clangd/unittests/FindTargetTests.cpp (+1-1) 
- (modified) clang/docs/ReleaseNotes.rst (+3-2) 
- (modified) clang/include/clang/AST/ASTContext.h (+3-4) 
- (modified) clang/include/clang/AST/ASTNodeTraverser.h (+5-2) 
- (modified) clang/include/clang/AST/CanonicalType.h (+3-1) 
- (modified) clang/include/clang/AST/RecursiveASTVisitor.h (+5-4) 
- (modified) clang/include/clang/AST/Type.h (+15-13) 
- (modified) clang/include/clang/AST/TypeLoc.h (+18-14) 
- (modified) clang/include/clang/AST/TypeProperties.td (+6-3) 
- (modified) clang/include/clang/Basic/DiagnosticSemaKinds.td (+2-4) 
- (modified) clang/include/clang/Sema/Sema.h (+28-11) 
- (modified) clang/lib/AST/ASTContext.cpp (+160-34) 
- (modified) clang/lib/AST/ASTImporter.cpp (+9-5) 
- (modified) clang/lib/AST/ASTStructuralEquivalence.cpp (+2-2) 
- (modified) clang/lib/AST/ByteCode/Compiler.cpp (+6-4) 
- (modified) clang/lib/AST/ExprConstant.cpp (+3-2) 
- (modified) clang/lib/AST/ItaniumMangle.cpp (+10-1) 
- (modified) clang/lib/AST/MicrosoftMangle.cpp (+5-5) 
- (modified) clang/lib/AST/NestedNameSpecifier.cpp (+1) 
- (modified) clang/lib/AST/ODRHash.cpp (+1-1) 
- (modified) clang/lib/AST/QualTypeNames.cpp (+4-3) 
- (modified) clang/lib/AST/Type.cpp (+39-12) 
- (modified) clang/lib/AST/TypePrinter.cpp (+2-2) 
- (modified) clang/lib/CodeGen/CGCXXABI.cpp (+3-4) 
- (modified) clang/lib/CodeGen/CGClass.cpp (+3-4) 
- (modified) clang/lib/CodeGen/CGDebugInfo.cpp (+2-1) 
- (modified) clang/lib/CodeGen/CGExprCXX.cpp (+4-4) 
- (modified) clang/lib/CodeGen/CGPointerAuth.cpp (+2-2) 
- (modified) clang/lib/CodeGen/CGVTables.cpp (+2-3) 
- (modified) clang/lib/CodeGen/CodeGenModule.cpp (+1-1) 
- (modified) clang/lib/CodeGen/CodeGenTypes.cpp (+1-1) 
- (modified) clang/lib/CodeGen/ItaniumCXXABI.cpp (+11-10) 
- (modified) clang/lib/CodeGen/MicrosoftCXXABI.cpp (+4-3) 
- (modified) clang/lib/Sema/SemaAccess.cpp (+26-19) 
- (modified) clang/lib/Sema/SemaCast.cpp (+16-66) 
- (modified) clang/lib/Sema/SemaDeclCXX.cpp (+23-25) 
- (modified) clang/lib/Sema/SemaExpr.cpp (+4-6) 
- (modified) clang/lib/Sema/SemaExprCXX.cpp (+35-19) 
- (modified) clang/lib/Sema/SemaLookup.cpp (+2-5) 
- (modified) clang/lib/Sema/SemaOpenMP.cpp (+2-3) 
- (modified) clang/lib/Sema/SemaOverload.cpp (+121-73) 
- (modified) clang/lib/Sema/SemaStmt.cpp (+2-1) 
- (modified) clang/lib/Sema/SemaTemplate.cpp (+6-2) 
- (modified) clang/lib/Sema/SemaTemplateDeduction.cpp (+16-5) 
- (modified) clang/lib/Sema/SemaType.cpp (+22-100) 
- (modified) clang/lib/Sema/TreeTransform.h (+29-30) 
- (modified) clang/lib/Serialization/ASTReader.cpp (+1-1) 
- (modified) clang/lib/Serialization/ASTWriter.cpp (+1-1) 
- (modified) clang/lib/Serialization/TemplateArgumentHasher.cpp (+3-1) 
- (modified) clang/test/AST/ast-dump-template-json-win32-mangler-crash.cpp 
(+3-1) 
- (modified) clang/test/AST/ast-dump-templates.cpp (+238) 
- (modified) clang/test/AST/ast-dump-types-json.cpp (+338-44) 
- (modified) clang/test/AST/attr-print-emit.cpp (+1-1) 
- (modified) clang/test/Analysis/cxx-uninitialized-object-ptr-ref.cpp (+5-5) 
- (modified) clang/test/CXX/class.access/p6.cpp (+2-2) 
- (modified) clang/test/CXX/drs/cwg0xx.cpp (+6-6) 
- (modified) clang/test/CXX/drs/cwg13xx.cpp (+2-2) 
- (modified) clang/test/CXX/drs/cwg26xx.cpp (+3-3) 
- (modified) clang/test/CXX/drs/cwg2xx.cpp (+2-2) 
- (modified) clang/test/CXX/drs/cwg4xx.cpp (+1-1) 
- (modified) clang/test/CXX/drs/cwg7xx.cpp (+1-2) 
- (modified) clang/test/CXX/temp/

[llvm-branch-commits] [llvm] [NFC][Cloning] Remove now unused FindDebugInfoToIdentityMap (PR #129151)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/129151

>From 3b18ff812dd641d4a3ba9f59f09e635d75d84059 Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Tue, 25 Feb 2025 13:00:47 -0800
Subject: [PATCH] [NFC][Cloning] Remove now unused FindDebugInfoToIdentityMap

Summary:
This function is no longer needed.

Test Plan:
ninja check-llvm-unit

stack-info: PR: https://github.com/llvm/llvm-project/pull/129151, branch: 
users/artempyanykh/fast-coro-upstream-part2-take2/9
---
 llvm/include/llvm/Transforms/Utils/Cloning.h | 19 ---
 llvm/lib/Transforms/Utils/CloneFunction.cpp  | 34 
 2 files changed, 53 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h 
b/llvm/include/llvm/Transforms/Utils/Cloning.h
index 2252dda0b9aad..ae00c16e7eada 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -244,25 +244,6 @@ DISubprogram *CollectDebugInfoForCloning(const Function &F,
  CloneFunctionChangeType Changes,
  DebugInfoFinder &DIFinder);
 
-/// Based on \p Changes and \p DIFinder return debug info that needs to be
-/// identity mapped during Metadata cloning.
-///
-/// NOTE: Such \a MetadataSetTy can be used by \a CloneFunction* to directly
-/// specify metadata that should be identity mapped (and hence not cloned). The
-/// metadata will be identity mapped in \a ValueToValueMapTy on first use. 
There
-/// are several reasons for doing it this way rather than eagerly identity
-/// mapping metadata nodes in a \a ValueMap:
-/// 1. Mapping metadata is not cheap, particularly because of tracking.
-/// 2. When cloning a Function we identity map lots of global module-level
-///metadata to avoid cloning it, while only a fraction of it is actually
-///used by the function. Mapping on first use is a lot faster for modules
-///with meaningful amount of debug info.
-/// 3. Eagerly identity mapping metadata makes it harder to cache module-level
-///data (e.g. a set of metadata nodes in a \a DICompileUnit).
-MetadataSetTy FindDebugInfoToIdentityMap(CloneFunctionChangeType Changes,
- DebugInfoFinder &DIFinder,
- DISubprogram *SPClonedWithinModule);
-
 /// This class captures the data input to the InlineFunction call, and records
 /// the auxiliary results produced by it.
 class InlineFunctionInfo {
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp 
b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 8080dca09be00..11033aeec7dda 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -189,40 +189,6 @@ DISubprogram *llvm::CollectDebugInfoForCloning(const 
Function &F,
   return SPClonedWithinModule;
 }
 
-MetadataSetTy
-llvm::FindDebugInfoToIdentityMap(CloneFunctionChangeType Changes,
- DebugInfoFinder &DIFinder,
- DISubprogram *SPClonedWithinModule) {
-  if (Changes >= CloneFunctionChangeType::DifferentModule)
-return {};
-
-  if (DIFinder.subprogram_count() == 0)
-assert(!SPClonedWithinModule &&
-   "Subprogram should be in DIFinder->subprogram_count()...");
-
-  MetadataSetTy MD;
-
-  // Avoid cloning types, compile units, and (other) subprograms.
-  for (DISubprogram *ISP : DIFinder.subprograms())
-if (ISP != SPClonedWithinModule)
-  MD.insert(ISP);
-
-  // If a subprogram isn't going to be cloned skip its lexical blocks as well.
-  for (DIScope *S : DIFinder.scopes()) {
-auto *LScope = dyn_cast(S);
-if (LScope && LScope->getSubprogram() != SPClonedWithinModule)
-  MD.insert(S);
-  }
-
-for (DICompileUnit *CU : DIFinder.compile_units())
-  MD.insert(CU);
-
-for (DIType *Type : DIFinder.types())
-  MD.insert(Type);
-
-  return MD;
-}
-
 void llvm::CloneFunctionMetadataInto(Function &NewFunc, const Function 
&OldFunc,
  ValueToValueMapTy &VMap,
  RemapFlags RemapFlag,

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Coro] Remove now unused CommonDebugInfo in CoroSplit (PR #129150)

2025-03-09 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/129150

>From b85266b4d10a8aa141f391be6d161152663fa113 Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Tue, 25 Feb 2025 12:47:10 -0800
Subject: [PATCH] [NFC][Coro] Remove now unused CommonDebugInfo in CoroSplit

Summary:
This cleans up the now unnecessary debug info collection in CoroSplit.

This makes CoroSplit pass almost as fast with -g2 as it is with -g1 on
the sample cpp file used with other parts of this stack:

| | Baseline | IdentityMD set | Prebuilt CommonDI | 
MetadataPred (cur) |
|-|--||---||
| CoroSplitPass   | 306ms| 221ms  | 68ms  | 3.8ms   
   |
| CoroCloner  | 101ms| 72ms   | 0.5ms | 0.5ms   
   |
| CollectCommonDI | -| -  | 63ms  | -   
   |
| Speed up| 1x   | 1.4x   | 4.5x  | 80x 
   |

Test Plan:
ninja check-all

stack-info: PR: https://github.com/llvm/llvm-project/pull/129150, branch: 
users/artempyanykh/fast-coro-upstream-part2-take2/8
---
 llvm/lib/Transforms/Coroutines/CoroCloner.h  | 31 ++--
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 37 +++-
 2 files changed, 16 insertions(+), 52 deletions(-)

diff --git a/llvm/lib/Transforms/Coroutines/CoroCloner.h 
b/llvm/lib/Transforms/Coroutines/CoroCloner.h
index b817e55cad9fc..d1887980fb3bc 100644
--- a/llvm/lib/Transforms/Coroutines/CoroCloner.h
+++ b/llvm/lib/Transforms/Coroutines/CoroCloner.h
@@ -48,9 +48,6 @@ class BaseCloner {
   CloneKind FKind;
   IRBuilder<> Builder;
   TargetTransformInfo &TTI;
-  // Common module-level metadata that's shared between all coroutine clones 
and
-  // doesn't need to be cloned itself.
-  const MetadataSetTy &CommonDebugInfo;
 
   ValueToValueMapTy VMap;
   Function *NewF = nullptr;
@@ -63,12 +60,12 @@ class BaseCloner {
   /// Create a cloner for a continuation lowering.
   BaseCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
  Function *NewF, AnyCoroSuspendInst *ActiveSuspend,
- TargetTransformInfo &TTI, const MetadataSetTy &CommonDebugInfo)
+ TargetTransformInfo &TTI)
   : OrigF(OrigF), Suffix(Suffix), Shape(Shape),
 FKind(Shape.ABI == ABI::Async ? CloneKind::Async
   : CloneKind::Continuation),
-Builder(OrigF.getContext()), TTI(TTI), 
CommonDebugInfo(CommonDebugInfo),
-NewF(NewF), ActiveSuspend(ActiveSuspend) {
+Builder(OrigF.getContext()), TTI(TTI), NewF(NewF),
+ActiveSuspend(ActiveSuspend) {
 assert(Shape.ABI == ABI::Retcon || Shape.ABI == ABI::RetconOnce ||
Shape.ABI == ABI::Async);
 assert(NewF && "need existing function for continuation");
@@ -77,11 +74,9 @@ class BaseCloner {
 
 public:
   BaseCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
- CloneKind FKind, TargetTransformInfo &TTI,
- const MetadataSetTy &CommonDebugInfo)
+ CloneKind FKind, TargetTransformInfo &TTI)
   : OrigF(OrigF), Suffix(Suffix), Shape(Shape), FKind(FKind),
-Builder(OrigF.getContext()), TTI(TTI),
-CommonDebugInfo(CommonDebugInfo) {}
+Builder(OrigF.getContext()), TTI(TTI) {}
 
   virtual ~BaseCloner() {}
 
@@ -89,14 +84,12 @@ class BaseCloner {
   static Function *createClone(Function &OrigF, const Twine &Suffix,
coro::Shape &Shape, Function *NewF,
AnyCoroSuspendInst *ActiveSuspend,
-   TargetTransformInfo &TTI,
-   const MetadataSetTy &CommonDebugInfo) {
+   TargetTransformInfo &TTI) {
 assert(Shape.ABI == ABI::Retcon || Shape.ABI == ABI::RetconOnce ||
Shape.ABI == ABI::Async);
 TimeTraceScope FunctionScope("BaseCloner");
 
-BaseCloner Cloner(OrigF, Suffix, Shape, NewF, ActiveSuspend, TTI,
-  CommonDebugInfo);
+BaseCloner Cloner(OrigF, Suffix, Shape, NewF, ActiveSuspend, TTI);
 Cloner.create();
 return Cloner.getFunction();
   }
@@ -136,9 +129,8 @@ class SwitchCloner : public BaseCloner {
 protected:
   /// Create a cloner for a switch lowering.
   SwitchCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
-   CloneKind FKind, TargetTransformInfo &TTI,
-   const MetadataSetTy &CommonDebugInfo)
-  : BaseCloner(OrigF, Suffix, Shape, FKind, TTI, CommonDebugInfo) {}
+   CloneKind FKind, TargetTransformInfo &TTI)
+  : BaseCloner(OrigF, Suffix, Shape, FKind, TTI) {}
 
   void create() override;
 
@@ -146,12 +138,11 @@ class SwitchCloner : public BaseCloner {
   /// Create a clone for a switch lowering.
   static Function *createClone(Function &OrigF, const Twine &Suffix,

[llvm-branch-commits] [llvm] [AMDGPU][NPM] Cleanup AMDGPUPassRegistry.def (PR #130071)

2025-03-09 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/130071

>From 96b27b6690cb5b8c4925a7a584f1bfd4781a5cf0 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Thu, 6 Mar 2025 10:56:28 +
Subject: [PATCH] [AMDGPU][NPM] Cleanup AMDGPUPassRegistry.def

---
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 8 +---
 llvm/lib/Target/AMDGPU/AMDGPUPreloadKernArgProlog.cpp | 2 +-
 llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp| 1 +
 3 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def 
b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index f14499d0d3146..ad2f3fc29077c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -102,6 +102,7 @@ MACHINE_FUNCTION_PASS("amdgpu-pre-ra-long-branch-reg", 
GCNPreRALongBranchRegPass
 MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", 
GCNRewritePartialRegUsesPass())
 MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass())
 MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", 
GCNPreRAOptimizationsPass())
+MACHINE_FUNCTION_PASS("amdgpu-preload-kern-arg-prolog", 
AMDGPUPreloadKernArgPrologPass())
 MACHINE_FUNCTION_PASS("amdgpu-nsa-reassign", GCNNSAReassignPass())
 MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
 MACHINE_FUNCTION_PASS("gcn-create-vopd", GCNCreateVOPDPass())
@@ -131,13 +132,6 @@ MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
 #undef MACHINE_FUNCTION_PASS
 
 #define DUMMY_MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
-DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", 
GCNPreRAOptimizationsPass())
-DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", 
GCNRewritePartialRegUsesPass())
-
-// TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
-// already exists.
-DUMMY_MACHINE_FUNCTION_PASS("amdgpu-preload-kern-arg-prolog", 
AMDGPUPreloadKernArgPrologPass())
-
 // Global ISel passes
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-prelegalizer-combiner", 
AMDGPUPreLegalizerCombinerPass())
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-postlegalizer-combiner", 
AMDGPUPostLegalizerCombinerPass())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreloadKernArgProlog.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUPreloadKernArgProlog.cpp
index b3a2139dfd24e..40094518dce0a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPreloadKernArgProlog.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPreloadKernArgProlog.cpp
@@ -207,5 +207,5 @@ AMDGPUPreloadKernArgPrologPass::run(MachineFunction &MF,
   if (!AMDGPUPreloadKernArgProlog(MF).run())
 return PreservedAnalyses::all();
 
-  return PreservedAnalyses::none();
+  return getMachineFunctionPassPreservedAnalyses();
 }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index d53620c54237d..0c43da7adec6c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -24,6 +24,7 @@
 #include "AMDGPUMacroFusion.h"
 #include "AMDGPUOpenCLEnqueuedBlockLowering.h"
 #include "AMDGPUPerfHintAnalysis.h"
+#include "AMDGPUPreloadKernArgProlog.h"
 #include "AMDGPURemoveIncompatibleFunctions.h"
 #include "AMDGPUSplitModule.h"
 #include "AMDGPUTargetObjectFile.h"

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Dynamic VGPR support for llvm.amdgcn.cs.chain (PR #130094)

2025-03-09 Thread Diana Picus via llvm-branch-commits

rovka wrote:

Ping?

https://github.com/llvm/llvm-project/pull/130094
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][NPM] Port PostRAHazardRecognizer to NPM (PR #130066)

2025-03-09 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/130066

>From 0397215a75549a48fa2569344d8a5bd0a2addc91 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Thu, 6 Mar 2025 06:42:54 +
Subject: [PATCH] [CodeGen][NPM] Port PostRAHazardRecognizer to NPM

---
 .../llvm/CodeGen/PostRAHazardRecognizer.h | 26 +++
 llvm/include/llvm/InitializePasses.h  |  2 +-
 .../llvm/Passes/MachinePassRegistry.def   |  1 +
 llvm/lib/CodeGen/CodeGen.cpp  |  2 +-
 llvm/lib/CodeGen/PostRAHazardRecognizer.cpp   | 46 +--
 llvm/lib/Passes/PassBuilder.cpp   |  1 +
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  3 +-
 .../AMDGPU/break-smem-soft-clauses.mir|  2 +
 llvm/test/CodeGen/AMDGPU/dst-sel-hazard.mir   |  2 +
 .../hazard-flat-instruction-valu-check.mir|  1 +
 10 files changed, 68 insertions(+), 18 deletions(-)
 create mode 100644 llvm/include/llvm/CodeGen/PostRAHazardRecognizer.h

diff --git a/llvm/include/llvm/CodeGen/PostRAHazardRecognizer.h 
b/llvm/include/llvm/CodeGen/PostRAHazardRecognizer.h
new file mode 100644
index 0..3e0c04ac5e403
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/PostRAHazardRecognizer.h
@@ -0,0 +1,26 @@
+//===- llvm/CodeGen/PostRAHazardRecognizer.h *- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_CODEGEN_POSTRAHAZARDRECOGNIZER_H
+#define LLVM_CODEGEN_POSTRAHAZARDRECOGNIZER_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class PostRAHazardRecognizerPass
+: public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+  static bool isRequired() { return true; }
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_POSTRAHAZARDRECOGNIZER_H
diff --git a/llvm/include/llvm/InitializePasses.h 
b/llvm/include/llvm/InitializePasses.h
index f1c16e3b1cb40..a3fd97ee99f3b 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -237,7 +237,7 @@ void initializePostDomViewerWrapperPassPass(PassRegistry &);
 void initializePostDominatorTreeWrapperPassPass(PassRegistry &);
 void initializePostInlineEntryExitInstrumenterPass(PassRegistry &);
 void initializePostMachineSchedulerLegacyPass(PassRegistry &);
-void initializePostRAHazardRecognizerPass(PassRegistry &);
+void initializePostRAHazardRecognizerLegacyPass(PassRegistry &);
 void initializePostRAMachineSinkingPass(PassRegistry &);
 void initializePostRASchedulerLegacyPass(PassRegistry &);
 void initializePreISelIntrinsicLoweringLegacyPassPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index bedbc3e88a7ce..285ad9601c6ff 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -155,6 +155,7 @@ MACHINE_FUNCTION_PASS("opt-phis", OptimizePHIsPass())
 MACHINE_FUNCTION_PASS("patchable-function", PatchableFunctionPass())
 MACHINE_FUNCTION_PASS("peephole-opt", PeepholeOptimizerPass())
 MACHINE_FUNCTION_PASS("phi-node-elimination", PHIEliminationPass())
+MACHINE_FUNCTION_PASS("post-RA-hazard-rec", PostRAHazardRecognizerPass())
 MACHINE_FUNCTION_PASS("post-RA-sched", PostRASchedulerPass(TM))
 MACHINE_FUNCTION_PASS("postmisched", PostMachineSchedulerPass(TM))
 MACHINE_FUNCTION_PASS("post-ra-pseudos", ExpandPostRAPseudosPass())
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 375176ed4b1ce..69b4d8bac94cf 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -106,7 +106,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializePatchableFunctionLegacyPass(Registry);
   initializePeepholeOptimizerLegacyPass(Registry);
   initializePostMachineSchedulerLegacyPass(Registry);
-  initializePostRAHazardRecognizerPass(Registry);
+  initializePostRAHazardRecognizerLegacyPass(Registry);
   initializePostRAMachineSinkingPass(Registry);
   initializePostRASchedulerLegacyPass(Registry);
   initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
diff --git a/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp 
b/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
index 97b1532300b17..3ead2087fc1d9 100644
--- a/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
+++ b/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
@@ -26,6 +26,7 @@
 //
 
//===--===//
 
+#include "llvm/CodeGen/PostRAHazardRecognizer.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
@@ -40,30 +41,45 @@ using namespace llvm;
 STA

[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port SIMemoryLegalizer to NPM (PR #130060)

2025-03-09 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/130060

>From b1402edb380ddf044af4810a9b7a88c4f874c0ed Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Wed, 5 Mar 2025 11:06:40 +
Subject: [PATCH] [AMDGPU][NPM] Port SIMemoryLegalizer to NPM

---
 llvm/lib/Target/AMDGPU/AMDGPU.h   |  9 +++-
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def |  2 +-
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  5 ++-
 llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp  | 43 ++-
 4 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index f331f741e3993..4197a60e77014 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -364,6 +364,13 @@ class GCNCreateVOPDPass : public 
PassInfoMixin {
 MachineFunctionAnalysisManager &AM);
 };
 
+class SIMemoryLegalizerPass : public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+  static bool isRequired() { return true; }
+};
+
 FunctionPass *createAMDGPUAnnotateUniformValuesLegacy();
 
 ModulePass *createAMDGPUPrintfRuntimeBinding();
@@ -428,7 +435,7 @@ class SIAnnotateControlFlowPass
 void initializeSIAnnotateControlFlowLegacyPass(PassRegistry &);
 extern char &SIAnnotateControlFlowLegacyPassID;
 
-void initializeSIMemoryLegalizerPass(PassRegistry&);
+void initializeSIMemoryLegalizerLegacyPass(PassRegistry &);
 extern char &SIMemoryLegalizerID;
 
 void initializeSIModeRegisterLegacyPass(PassRegistry &);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def 
b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 0e3dcb4267ede..de959f8a2aa62 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -113,6 +113,7 @@ MACHINE_FUNCTION_PASS("si-load-store-opt", 
SILoadStoreOptimizerPass())
 MACHINE_FUNCTION_PASS("si-lower-control-flow", SILowerControlFlowPass())
 MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass())
 MACHINE_FUNCTION_PASS("si-lower-wwm-copies", SILowerWWMCopiesPass())
+MACHINE_FUNCTION_PASS("si-memory-legalizer", SIMemoryLegalizerPass())
 MACHINE_FUNCTION_PASS("si-mode-register", SIModeRegisterPass())
 MACHINE_FUNCTION_PASS("si-opt-vgpr-liverange", SIOptimizeVGPRLiveRangePass())
 MACHINE_FUNCTION_PASS("si-optimize-exec-masking", SIOptimizeExecMaskingPass())
@@ -132,7 +133,6 @@ DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", 
AMDGPUSetWavePriorityPas
 DUMMY_MACHINE_FUNCTION_PASS("si-insert-hard-clauses", 
SIInsertHardClausesPass())
 DUMMY_MACHINE_FUNCTION_PASS("si-insert-waitcnts", SIInsertWaitcntsPass())
 DUMMY_MACHINE_FUNCTION_PASS("si-late-branch-lowering", 
SILateBranchLoweringPass())
-DUMMY_MACHINE_FUNCTION_PASS("si-memory-legalizer", SIMemoryLegalizerPass())
 DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
 // TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
 // already exists.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 73ae9135eb319..dbe212ad0a216 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -541,7 +541,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void 
LLVMInitializeAMDGPUTarget() {
   initializeSILowerControlFlowLegacyPass(*PR);
   initializeSIPreEmitPeepholePass(*PR);
   initializeSILateBranchLoweringPass(*PR);
-  initializeSIMemoryLegalizerPass(*PR);
+  initializeSIMemoryLegalizerLegacyPass(*PR);
   initializeSIOptimizeExecMaskingLegacyPass(*PR);
   initializeSIPreAllocateWWMRegsLegacyPass(*PR);
   initializeSIFormMemoryClausesLegacyPass(*PR);
@@ -2151,7 +2151,8 @@ void 
AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const {
   if (isPassEnabled(EnableVOPD, CodeGenOptLevel::Less)) {
 addPass(GCNCreateVOPDPass());
   }
-  // TODO: addPass(SIMemoryLegalizerPass());
+
+  addPass(SIMemoryLegalizerPass());
   // TODO: addPass(SIInsertWaitcntsPass());
 
   // TODO: addPass(SIModeRegisterPass());
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp 
b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 34953f9c08db7..1375ba201ec58 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -21,8 +21,10 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/MemoryModelRelaxationAnnotations.h"
+#include "llvm/IR/PassManager.h"
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/TargetParser/TargetParser.h"
 
@@ -625,9 +627,9 @@ class SIGfx12CacheControl : public SIGfx11CacheControl {
   }
 };
 
-class SIMemoryLegalizer final : public MachineFunctionPass {
+class SIMemor

[llvm-branch-commits] [llvm] [CodeGen][NPM] Port MachineSanitizerBinaryMetadata to NPM (PR #130069)

2025-03-09 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/130069

>From ca3edf5d1ae10f70bce8efe35b858104c67b917c Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Thu, 6 Mar 2025 10:20:36 +
Subject: [PATCH] [CodeGen][NPM] Port MachineSanitizerBinaryMetadata to NPM

---
 .../llvm/CodeGen/SanitizerBinaryMetadata.h| 26 +
 llvm/include/llvm/InitializePasses.h  |  2 +-
 llvm/include/llvm/Passes/CodeGenPassBuilder.h |  3 +-
 .../llvm/Passes/MachinePassRegistry.def   |  2 +-
 llvm/lib/CodeGen/CodeGen.cpp  |  2 +-
 llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp  | 37 ++-
 llvm/lib/Passes/PassBuilder.cpp   |  1 +
 llvm/test/tools/llc/new-pm/pipeline.ll|  2 +-
 8 files changed, 61 insertions(+), 14 deletions(-)
 create mode 100644 llvm/include/llvm/CodeGen/SanitizerBinaryMetadata.h

diff --git a/llvm/include/llvm/CodeGen/SanitizerBinaryMetadata.h 
b/llvm/include/llvm/CodeGen/SanitizerBinaryMetadata.h
new file mode 100644
index 0..6cf2e11aa911e
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/SanitizerBinaryMetadata.h
@@ -0,0 +1,26 @@
+//===- llvm/CodeGen/SanitizerBinaryMetadata.h ---*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_CODEGEN_SANITIZERBINARYMETADATA_H
+#define LLVM_CODEGEN_SANITIZERBINARYMETADATA_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class MachineSanitizerBinaryMetadataPass
+: public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+  static bool isRequired() { return true; }
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_SANITIZERBINARYMETADATA_H
diff --git a/llvm/include/llvm/InitializePasses.h 
b/llvm/include/llvm/InitializePasses.h
index 3fd3cbb28bc3e..c7bc4320cf8f0 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -206,7 +206,7 @@ void initializeMachineOutlinerPass(PassRegistry &);
 void initializeMachinePipelinerPass(PassRegistry &);
 void initializeMachinePostDominatorTreeWrapperPassPass(PassRegistry &);
 void initializeMachineRegionInfoPassPass(PassRegistry &);
-void initializeMachineSanitizerBinaryMetadataPass(PassRegistry &);
+void initializeMachineSanitizerBinaryMetadataLegacyPass(PassRegistry &);
 void initializeMachineSchedulerLegacyPass(PassRegistry &);
 void initializeMachineSinkingLegacyPass(PassRegistry &);
 void initializeMachineTraceMetricsWrapperPassPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h 
b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index a86dc8d632a4e..74cdc7d66810b 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -75,6 +75,7 @@
 #include "llvm/CodeGen/RenameIndependentSubregs.h"
 #include "llvm/CodeGen/ReplaceWithVeclib.h"
 #include "llvm/CodeGen/SafeStack.h"
+#include "llvm/CodeGen/SanitizerBinaryMetadata.h"
 #include "llvm/CodeGen/SelectOptimize.h"
 #include "llvm/CodeGen/ShadowStackGCLowering.h"
 #include "llvm/CodeGen/SjLjEHPrepare.h"
@@ -1002,7 +1003,7 @@ Error CodeGenPassBuilder::addMachinePasses(
   addPass(RemoveLoadsIntoFakeUsesPass());
   addPass(StackMapLivenessPass());
   addPass(LiveDebugValuesPass());
-  addPass(MachineSanitizerBinaryMetadata());
+  addPass(MachineSanitizerBinaryMetadataPass());
 
   if (TM.Options.EnableMachineOutliner &&
   getOptLevel() != CodeGenOptLevel::None &&
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index cab8108ed30f6..8fa21751392f3 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -149,6 +149,7 @@ MACHINE_FUNCTION_PASS("localstackalloc", 
LocalStackSlotAllocationPass())
 MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass())
 MACHINE_FUNCTION_PASS("machine-cse", MachineCSEPass())
 MACHINE_FUNCTION_PASS("machine-latecleanup", MachineLateInstrsCleanupPass())
+MACHINE_FUNCTION_PASS("machine-sanmd", MachineSanitizerBinaryMetadataPass())
 MACHINE_FUNCTION_PASS("machine-scheduler", MachineSchedulerPass(TM))
 MACHINE_FUNCTION_PASS("machinelicm", MachineLICMPass())
 MACHINE_FUNCTION_PASS("no-op-machine-function", NoOpMachineFunctionPass())
@@ -279,7 +280,6 @@ DUMMY_MACHINE_FUNCTION_PASS("lrshrink", LiveRangeShrinkPass)
 DUMMY_MACHINE_FUNCTION_PASS("machine-combiner", MachineCombinerPass)
 DUMMY_MACHINE_FUNCTION_PASS("static-data-splitter", StaticDataSplitter)
 DUMMY_MACHINE_FUNCTION_PASS("machine-function-splitter", 
MachineFunctionSplitterPass)
-DUMMY_MACHINE_FUNCTION_PASS("machine-sanmd", MachineSanitizerBinary

[llvm-branch-commits] [llvm] [CodeGen][NPM] Port BranchRelaxation to NPM (PR #130067)

2025-03-09 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/130067

>From b8c2186b63bd1b494cec02624c588596b7237600 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Thu, 6 Mar 2025 06:56:04 +
Subject: [PATCH] [CodeGen][NPM] Port BranchRelaxation to NPM

This completes the PreEmitPasses
---
 llvm/include/llvm/CodeGen/BranchRelaxation.h  | 25 +++
 llvm/include/llvm/InitializePasses.h  |  2 +-
 .../llvm/Passes/MachinePassRegistry.def   |  1 +
 llvm/lib/CodeGen/BranchRelaxation.cpp | 31 ++-
 llvm/lib/CodeGen/CodeGen.cpp  |  2 +-
 llvm/lib/Passes/PassBuilder.cpp   |  1 +
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  3 +-
 .../AArch64/branch-relax-block-size.mir   |  1 +
 .../AArch64/branch-relax-cross-section.mir|  2 ++
 .../AMDGPU/branch-relax-no-terminators.mir|  1 +
 10 files changed, 59 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/CodeGen/BranchRelaxation.h

diff --git a/llvm/include/llvm/CodeGen/BranchRelaxation.h 
b/llvm/include/llvm/CodeGen/BranchRelaxation.h
new file mode 100644
index 0..2007cf05b3aa1
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/BranchRelaxation.h
@@ -0,0 +1,25 @@
+//===- llvm/CodeGen/BranchRelaxation.h --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_CODEGEN_BRANCHRELAXATION_H
+#define LLVM_CODEGEN_BRANCHRELAXATION_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class BranchRelaxationPass : public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+  static bool isRequired() { return true; }
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_BRANCHRELAXATION_H
diff --git a/llvm/include/llvm/InitializePasses.h 
b/llvm/include/llvm/InitializePasses.h
index a3fd97ee99f3b..e5bffde815117 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -61,7 +61,7 @@ void initializeBasicAAWrapperPassPass(PassRegistry &);
 void initializeBlockFrequencyInfoWrapperPassPass(PassRegistry &);
 void initializeBranchFolderPassPass(PassRegistry &);
 void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry &);
-void initializeBranchRelaxationPass(PassRegistry &);
+void initializeBranchRelaxationLegacyPass(PassRegistry &);
 void initializeBreakCriticalEdgesPass(PassRegistry &);
 void initializeBreakFalseDepsPass(PassRegistry &);
 void initializeCanonicalizeFreezeInLoopsPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 285ad9601c6ff..9300f6935aa90 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -138,6 +138,7 @@ MACHINE_FUNCTION_ANALYSIS("virtregmap", 
VirtRegMapAnalysis())
 #define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
 #endif
 MACHINE_FUNCTION_PASS("block-placement-stats", 
MachineBlockPlacementStatsPass())
+MACHINE_FUNCTION_PASS("branch-relaxation", BranchRelaxationPass())
 MACHINE_FUNCTION_PASS("dead-mi-elimination", DeadMachineInstructionElimPass())
 MACHINE_FUNCTION_PASS("early-ifcvt", EarlyIfConverterPass())
 MACHINE_FUNCTION_PASS("early-machinelicm", EarlyMachineLICMPass())
diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp 
b/llvm/lib/CodeGen/BranchRelaxation.cpp
index a762aab43ddd2..134ca59808c27 100644
--- a/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -6,6 +6,7 @@
 //
 
//===--===//
 
+#include "llvm/CodeGen/BranchRelaxation.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LivePhysRegs.h"
@@ -44,7 +45,7 @@ STATISTIC(NumUnconditionalRelaxed, "Number of unconditional 
branches relaxed");
 
 namespace {
 
-class BranchRelaxation : public MachineFunctionPass {
+class BranchRelaxation {
   /// BasicBlockInfo - Information about the offset and size of a single
   /// basic block.
   struct BasicBlockInfo {
@@ -115,23 +116,31 @@ class BranchRelaxation : public MachineFunctionPass {
   void dumpBBs();
   void verify();
 
+public:
+  bool run(MachineFunction &MF);
+};
+
+class BranchRelaxationLegacy : public MachineFunctionPass {
 public:
   static char ID;
 
-  BranchRelaxation() : MachineFunctionPass(ID) {}
+  BranchRelaxationLegacy() : MachineFunctionPass(ID) {}
 
-  bool runOnMachineFunction(MachineFunction &MF) override;
+  bool runOnMachineFunction(MachineFunction &MF) override {
+return BranchRelaxation().run(MF);
+  }
 
   StringRef getPassName() const override { return BRANCH_R

[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port SIInsertWaitcnts to NPM (PR #130061)

2025-03-09 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/130061

>From 11b7833df74f3d2dd933a28b69a5dcf86c041b21 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Thu, 6 Mar 2025 04:41:08 +
Subject: [PATCH] [AMDGPU][NPM] Port SIInsertWaitcnts to NPM

---
 llvm/lib/Target/AMDGPU/AMDGPU.h   |  9 +-
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def |  2 +-
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  4 +-
 llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp   | 91 +--
 llvm/test/CodeGen/AMDGPU/call-waw-waitcnt.mir |  1 +
 .../CodeGen/AMDGPU/insert-waitcnts-hang.mir   |  1 +
 .../AMDGPU/vccz-corrupt-bug-workaround.mir|  2 +
 7 files changed, 76 insertions(+), 34 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 4197a60e77014..4dcfaf9b12b5e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -371,6 +371,13 @@ class SIMemoryLegalizerPass : public 
PassInfoMixin {
   static bool isRequired() { return true; }
 };
 
+class SIInsertWaitcntsPass : public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+  static bool isRequired() { return true; }
+};
+
 FunctionPass *createAMDGPUAnnotateUniformValuesLegacy();
 
 ModulePass *createAMDGPUPrintfRuntimeBinding();
@@ -447,7 +454,7 @@ extern char &AMDGPUInsertDelayAluID;
 void initializeSIInsertHardClausesPass(PassRegistry &);
 extern char &SIInsertHardClausesID;
 
-void initializeSIInsertWaitcntsPass(PassRegistry&);
+void initializeSIInsertWaitcntsLegacyPass(PassRegistry &);
 extern char &SIInsertWaitcntsID;
 
 void initializeSIFormMemoryClausesLegacyPass(PassRegistry &);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def 
b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index de959f8a2aa62..c4641cba60e53 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -109,6 +109,7 @@ MACHINE_FUNCTION_PASS("si-fix-vgpr-copies", 
SIFixVGPRCopiesPass())
 MACHINE_FUNCTION_PASS("si-fold-operands", SIFoldOperandsPass());
 MACHINE_FUNCTION_PASS("si-form-memory-clauses", SIFormMemoryClausesPass())
 MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass())
+MACHINE_FUNCTION_PASS("si-insert-waitcnts", SIInsertWaitcntsPass())
 MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass())
 MACHINE_FUNCTION_PASS("si-lower-control-flow", SILowerControlFlowPass())
 MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass())
@@ -131,7 +132,6 @@ 
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartial
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", 
AMDGPUSetWavePriorityPass())
 
 DUMMY_MACHINE_FUNCTION_PASS("si-insert-hard-clauses", 
SIInsertHardClausesPass())
-DUMMY_MACHINE_FUNCTION_PASS("si-insert-waitcnts", SIInsertWaitcntsPass())
 DUMMY_MACHINE_FUNCTION_PASS("si-late-branch-lowering", 
SILateBranchLoweringPass())
 DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
 // TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index dbe212ad0a216..c3cc1dc6e495b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -535,7 +535,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void 
LLVMInitializeAMDGPUTarget() {
   initializeSIAnnotateControlFlowLegacyPass(*PR);
   initializeAMDGPUInsertDelayAluLegacyPass(*PR);
   initializeSIInsertHardClausesPass(*PR);
-  initializeSIInsertWaitcntsPass(*PR);
+  initializeSIInsertWaitcntsLegacyPass(*PR);
   initializeSIModeRegisterLegacyPass(*PR);
   initializeSIWholeQuadModeLegacyPass(*PR);
   initializeSILowerControlFlowLegacyPass(*PR);
@@ -2153,7 +2153,7 @@ void 
AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const {
   }
 
   addPass(SIMemoryLegalizerPass());
-  // TODO: addPass(SIInsertWaitcntsPass());
+  addPass(SIInsertWaitcntsPass());
 
   // TODO: addPass(SIModeRegisterPass());
 
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp 
b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index ee263f58bcaf2..8951a4144bd68 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -33,6 +33,7 @@
 #include "llvm/ADT/Sequence.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/CodeGen/MachinePostDominators.h"
 #include "llvm/Support/DebugCounter.h"
 #include "llvm/TargetParser/TargetParser.h"
@@ -594,7 +595,7 @@ class WaitcntGeneratorGFX12Plus : public WaitcntGenerator {
   AMDGPU::Waitcnt getAllZeroWaitcnt(bool IncludeVSCnt) const override;
 };
 
-class SIInsertWaitcnts : public MachineFunctionPass {
+class SIInsertWaitcnts {
 private:
   const GCNSubtarg

[llvm-branch-commits] [llvm] [CodeGen][NPM] Port RemoveLoadsIntoFakeUses to NPM (PR #130068)

2025-03-09 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/130068

>From ffd87288a22b18efaa3763909ca2d1ca75bff384 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Thu, 6 Mar 2025 09:30:37 +
Subject: [PATCH] [CodeGen][NPM] Port RemoveLoadsIntoFakeUses to NPM

---
 .../llvm/CodeGen/RemoveLoadsIntoFakeUses.h| 30 +
 llvm/include/llvm/InitializePasses.h  |  2 +-
 llvm/include/llvm/Passes/CodeGenPassBuilder.h |  2 +
 .../llvm/Passes/MachinePassRegistry.def   |  2 +-
 llvm/lib/CodeGen/CodeGen.cpp  |  2 +-
 llvm/lib/CodeGen/RemoveLoadsIntoFakeUses.cpp  | 44 +++
 llvm/lib/Passes/PassBuilder.cpp   |  1 +
 .../CodeGen/X86/fake-use-remove-loads.mir |  2 +
 8 files changed, 73 insertions(+), 12 deletions(-)
 create mode 100644 llvm/include/llvm/CodeGen/RemoveLoadsIntoFakeUses.h

diff --git a/llvm/include/llvm/CodeGen/RemoveLoadsIntoFakeUses.h 
b/llvm/include/llvm/CodeGen/RemoveLoadsIntoFakeUses.h
new file mode 100644
index 0..bbd5b8b430bf6
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/RemoveLoadsIntoFakeUses.h
@@ -0,0 +1,30 @@
+//===- llvm/CodeGen/RemoveLoadsIntoFakeUses.h ---*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_CODEGEN_REMOVELOADSINTOFAKEUSES_H
+#define LLVM_CODEGEN_REMOVELOADSINTOFAKEUSES_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class RemoveLoadsIntoFakeUsesPass
+: public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+
+  MachineFunctionProperties getRequiredProperties() const {
+return MachineFunctionProperties().set(
+MachineFunctionProperties::Property::NoVRegs);
+  }
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_REMOVELOADSINTOFAKEUSES_H
diff --git a/llvm/include/llvm/InitializePasses.h 
b/llvm/include/llvm/InitializePasses.h
index e5bffde815117..3fd3cbb28bc3e 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -265,7 +265,7 @@ void initializeRegionOnlyViewerPass(PassRegistry &);
 void initializeRegionPrinterPass(PassRegistry &);
 void initializeRegionViewerPass(PassRegistry &);
 void initializeRegisterCoalescerLegacyPass(PassRegistry &);
-void initializeRemoveLoadsIntoFakeUsesPass(PassRegistry &);
+void initializeRemoveLoadsIntoFakeUsesLegacyPass(PassRegistry &);
 void initializeRemoveRedundantDebugValuesLegacyPass(PassRegistry &);
 void initializeRenameIndependentSubregsLegacyPass(PassRegistry &);
 void initializeReplaceWithVeclibLegacyPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h 
b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index aab2c58ac0f78..a86dc8d632a4e 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -70,6 +70,7 @@
 #include "llvm/CodeGen/RegUsageInfoPropagate.h"
 #include "llvm/CodeGen/RegisterCoalescerPass.h"
 #include "llvm/CodeGen/RegisterUsageInfo.h"
+#include "llvm/CodeGen/RemoveLoadsIntoFakeUses.h"
 #include "llvm/CodeGen/RemoveRedundantDebugValues.h"
 #include "llvm/CodeGen/RenameIndependentSubregs.h"
 #include "llvm/CodeGen/ReplaceWithVeclib.h"
@@ -998,6 +999,7 @@ Error CodeGenPassBuilder::addMachinePasses(
 
   addPass(FuncletLayoutPass());
 
+  addPass(RemoveLoadsIntoFakeUsesPass());
   addPass(StackMapLivenessPass());
   addPass(LiveDebugValuesPass());
   addPass(MachineSanitizerBinaryMetadata());
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 9300f6935aa90..cab8108ed30f6 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -181,6 +181,7 @@ MACHINE_FUNCTION_PASS("reg-usage-collector", 
RegUsageInfoCollectorPass())
 MACHINE_FUNCTION_PASS("reg-usage-propagation", RegUsageInfoPropagationPass())
 MACHINE_FUNCTION_PASS("register-coalescer", RegisterCoalescerPass())
 MACHINE_FUNCTION_PASS("rename-independent-subregs", 
RenameIndependentSubregsPass())
+MACHINE_FUNCTION_PASS("remove-loads-into-fake-uses", 
RemoveLoadsIntoFakeUsesPass())
 MACHINE_FUNCTION_PASS("remove-redundant-debug-values", 
RemoveRedundantDebugValuesPass())
 MACHINE_FUNCTION_PASS("require-all-machine-function-properties",
   RequireAllMachineFunctionPropertiesPass())
@@ -292,7 +293,6 @@ DUMMY_MACHINE_FUNCTION_PASS("ra-pbqp", RAPBQPPass)
 DUMMY_MACHINE_FUNCTION_PASS("regalloc", RegAllocPass)
 DUMMY_MACHINE_FUNCTION_PASS("regallocscoringpass", RegAllocScoringPass)
 DUMMY_MACHINE_FUNCTION_PASS("regbankselect", RegBankSelectPass)
-DUMMY_MACHINE_FUNCTION_PASS("remove-loads-into-fa

[llvm-branch-commits] [llvm] [AMDGPU][NFC] Format GCNCreateVOPD.cpp (PR #130548)

2025-03-09 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan created 
https://github.com/llvm/llvm-project/pull/130548

None

>From 78bcc3a3576cc1f0dba5c9feb5ed781a62877ffe Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Mon, 10 Mar 2025 04:31:20 +
Subject: [PATCH] [AMDGPU][NFC] Format GCNCreateVOPD.cpp

---
 llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp 
b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
index d40a1a2a10d9b..798279b279da3 100644
--- a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
@@ -38,15 +38,15 @@ namespace {
 
 class GCNCreateVOPD : public MachineFunctionPass {
 private:
-class VOPDCombineInfo {
-public:
-  VOPDCombineInfo() = default;
-  VOPDCombineInfo(MachineInstr *First, MachineInstr *Second)
-  : FirstMI(First), SecondMI(Second) {}
-
-  MachineInstr *FirstMI;
-  MachineInstr *SecondMI;
-};
+  class VOPDCombineInfo {
+  public:
+VOPDCombineInfo() = default;
+VOPDCombineInfo(MachineInstr *First, MachineInstr *Second)
+: FirstMI(First), SecondMI(Second) {}
+
+MachineInstr *FirstMI;
+MachineInstr *SecondMI;
+  };
 
 public:
   static char ID;

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][OpenMP] Extend `do concurrent` mapping to multi-range loops (PR #127634)

2025-03-09 Thread Kareem Ergawy via llvm-branch-commits

https://github.com/ergawy updated 
https://github.com/llvm/llvm-project/pull/127634

>From 090ea42681a2e0bfbb73853eb75f8e31d3adf120 Mon Sep 17 00:00:00 2001
From: ergawy 
Date: Tue, 18 Feb 2025 06:17:17 -0600
Subject: [PATCH] [flang][OpenMP] Extend `do concurrent` mapping to multi-range
 loops

Adds support for converting mulit-range loops to OpenMP (on the host
only for now). The changes here "prepare" a loop nest for collapsing by
sinking iteration variables to the innermost `fir.do_loop` op in the
nest.
---
 flang/docs/DoConcurrentConversionToOpenMP.md  |  29 
 .../OpenMP/DoConcurrentConversion.cpp | 139 +-
 .../multiple_iteration_ranges.f90 |  72 +
 3 files changed, 239 insertions(+), 1 deletion(-)
 create mode 100644 
flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90

diff --git a/flang/docs/DoConcurrentConversionToOpenMP.md 
b/flang/docs/DoConcurrentConversionToOpenMP.md
index 19611615ee9d6..ecb4428d7d3ba 100644
--- a/flang/docs/DoConcurrentConversionToOpenMP.md
+++ b/flang/docs/DoConcurrentConversionToOpenMP.md
@@ -173,6 +173,35 @@ omp.parallel {
 
 
 
+### Multi-range loops
+
+The pass currently supports multi-range loops as well. Given the following
+example:
+
+```fortran
+   do concurrent(i=1:n, j=1:m)
+   a(i,j) = i * j
+   end do
+```
+
+The generated `omp.loop_nest` operation look like:
+
+```
+omp.loop_nest (%arg0, %arg1)
+: index = (%17, %19) to (%18, %20)
+inclusive step (%c1_2, %c1_4) {
+  fir.store %arg0 to %private_i#1 : !fir.ref
+  fir.store %arg1 to %private_j#1 : !fir.ref
+  ...
+  omp.yield
+}
+```
+
+It is worth noting that we have privatized versions for both iteration
+variables: `i` and `j`. These are locally allocated inside the parallel/target
+OpenMP region similar to what the single-range example in previous section
+shows.
+
 

[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port SIInsertWaitcnts to NPM (PR #130061)

2025-03-09 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Akshat Oke (optimisan)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/130061.diff


7 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPU.h (+8-1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def (+1-1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+2-2) 
- (modified) llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp (+61-30) 
- (modified) llvm/test/CodeGen/AMDGPU/call-waw-waitcnt.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/insert-waitcnts-hang.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir (+2) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 4197a60e77014..4dcfaf9b12b5e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -371,6 +371,13 @@ class SIMemoryLegalizerPass : public 
PassInfoMixin {
   static bool isRequired() { return true; }
 };
 
+class SIInsertWaitcntsPass : public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+  static bool isRequired() { return true; }
+};
+
 FunctionPass *createAMDGPUAnnotateUniformValuesLegacy();
 
 ModulePass *createAMDGPUPrintfRuntimeBinding();
@@ -447,7 +454,7 @@ extern char &AMDGPUInsertDelayAluID;
 void initializeSIInsertHardClausesPass(PassRegistry &);
 extern char &SIInsertHardClausesID;
 
-void initializeSIInsertWaitcntsPass(PassRegistry&);
+void initializeSIInsertWaitcntsLegacyPass(PassRegistry &);
 extern char &SIInsertWaitcntsID;
 
 void initializeSIFormMemoryClausesLegacyPass(PassRegistry &);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def 
b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index de959f8a2aa62..c4641cba60e53 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -109,6 +109,7 @@ MACHINE_FUNCTION_PASS("si-fix-vgpr-copies", 
SIFixVGPRCopiesPass())
 MACHINE_FUNCTION_PASS("si-fold-operands", SIFoldOperandsPass());
 MACHINE_FUNCTION_PASS("si-form-memory-clauses", SIFormMemoryClausesPass())
 MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass())
+MACHINE_FUNCTION_PASS("si-insert-waitcnts", SIInsertWaitcntsPass())
 MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass())
 MACHINE_FUNCTION_PASS("si-lower-control-flow", SILowerControlFlowPass())
 MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass())
@@ -131,7 +132,6 @@ 
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartial
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", 
AMDGPUSetWavePriorityPass())
 
 DUMMY_MACHINE_FUNCTION_PASS("si-insert-hard-clauses", 
SIInsertHardClausesPass())
-DUMMY_MACHINE_FUNCTION_PASS("si-insert-waitcnts", SIInsertWaitcntsPass())
 DUMMY_MACHINE_FUNCTION_PASS("si-late-branch-lowering", 
SILateBranchLoweringPass())
 DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
 // TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index dbe212ad0a216..c3cc1dc6e495b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -535,7 +535,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void 
LLVMInitializeAMDGPUTarget() {
   initializeSIAnnotateControlFlowLegacyPass(*PR);
   initializeAMDGPUInsertDelayAluLegacyPass(*PR);
   initializeSIInsertHardClausesPass(*PR);
-  initializeSIInsertWaitcntsPass(*PR);
+  initializeSIInsertWaitcntsLegacyPass(*PR);
   initializeSIModeRegisterLegacyPass(*PR);
   initializeSIWholeQuadModeLegacyPass(*PR);
   initializeSILowerControlFlowLegacyPass(*PR);
@@ -2153,7 +2153,7 @@ void 
AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const {
   }
 
   addPass(SIMemoryLegalizerPass());
-  // TODO: addPass(SIInsertWaitcntsPass());
+  addPass(SIInsertWaitcntsPass());
 
   // TODO: addPass(SIModeRegisterPass());
 
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp 
b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index ee263f58bcaf2..8951a4144bd68 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -33,6 +33,7 @@
 #include "llvm/ADT/Sequence.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/CodeGen/MachinePostDominators.h"
 #include "llvm/Support/DebugCounter.h"
 #include "llvm/TargetParser/TargetParser.h"
@@ -594,7 +595,7 @@ class WaitcntGeneratorGFX12Plus : public WaitcntGenerator {
   AMDGPU::Waitcnt getAllZeroWaitcnt(bool IncludeVSCnt) const override;
 };
 
-class SIInsertWaitcnts : public MachineFunctionPass {
+class SIInsertWaitcnts {
 private:
   const GCNSubtarget *ST = nullptr;
   const SIInstrInfo *TII = nullptr;
@@ -6

[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port SIInsertHardClauses to NPM (PR #130062)

2025-03-09 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Akshat Oke (optimisan)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/130062.diff


6 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPU.h (+7-1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def (+1-1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+1-1) 
- (modified) llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp (+35-15) 
- (modified) llvm/test/CodeGen/AMDGPU/hard-clauses-img-gfx10.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/hard-clauses-img-gfx11.mir (+1) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 4dcfaf9b12b5e..b434676f85581 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -378,6 +378,12 @@ class SIInsertWaitcntsPass : public 
PassInfoMixin {
   static bool isRequired() { return true; }
 };
 
+class SIInsertHardClausesPass : public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+};
+
 FunctionPass *createAMDGPUAnnotateUniformValuesLegacy();
 
 ModulePass *createAMDGPUPrintfRuntimeBinding();
@@ -451,7 +457,7 @@ extern char &SIModeRegisterID;
 void initializeAMDGPUInsertDelayAluLegacyPass(PassRegistry &);
 extern char &AMDGPUInsertDelayAluID;
 
-void initializeSIInsertHardClausesPass(PassRegistry &);
+void initializeSIInsertHardClausesLegacyPass(PassRegistry &);
 extern char &SIInsertHardClausesID;
 
 void initializeSIInsertWaitcntsLegacyPass(PassRegistry &);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def 
b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index c4641cba60e53..3eabe087a8a33 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -109,6 +109,7 @@ MACHINE_FUNCTION_PASS("si-fix-vgpr-copies", 
SIFixVGPRCopiesPass())
 MACHINE_FUNCTION_PASS("si-fold-operands", SIFoldOperandsPass());
 MACHINE_FUNCTION_PASS("si-form-memory-clauses", SIFormMemoryClausesPass())
 MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass())
+MACHINE_FUNCTION_PASS("si-insert-hard-clauses", SIInsertHardClausesPass())
 MACHINE_FUNCTION_PASS("si-insert-waitcnts", SIInsertWaitcntsPass())
 MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass())
 MACHINE_FUNCTION_PASS("si-lower-control-flow", SILowerControlFlowPass())
@@ -131,7 +132,6 @@ DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", 
GCNPreRAOptimizations
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", 
GCNRewritePartialRegUsesPass())
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", 
AMDGPUSetWavePriorityPass())
 
-DUMMY_MACHINE_FUNCTION_PASS("si-insert-hard-clauses", 
SIInsertHardClausesPass())
 DUMMY_MACHINE_FUNCTION_PASS("si-late-branch-lowering", 
SILateBranchLoweringPass())
 DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
 // TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index c3cc1dc6e495b..6c24fe5f1441a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -534,7 +534,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void 
LLVMInitializeAMDGPUTarget() {
   initializeAMDGPUUnifyMetadataPass(*PR);
   initializeSIAnnotateControlFlowLegacyPass(*PR);
   initializeAMDGPUInsertDelayAluLegacyPass(*PR);
-  initializeSIInsertHardClausesPass(*PR);
+  initializeSIInsertHardClausesLegacyPass(*PR);
   initializeSIInsertWaitcntsLegacyPass(*PR);
   initializeSIModeRegisterLegacyPass(*PR);
   initializeSIWholeQuadModeLegacyPass(*PR);
diff --git a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp 
b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
index dcc60765cc203..71b937f23cc3c 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
@@ -36,6 +36,7 @@
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachinePassManager.h"
 
 using namespace llvm;
 
@@ -89,18 +90,10 @@ enum HardClauseType {
   HARDCLAUSE_ILLEGAL,
 };
 
-class SIInsertHardClauses : public MachineFunctionPass {
+class SIInsertHardClauses {
 public:
-  static char ID;
   const GCNSubtarget *ST = nullptr;
 
-  SIInsertHardClauses() : MachineFunctionPass(ID) {}
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-AU.setPreservesCFG();
-MachineFunctionPass::getAnalysisUsage(AU);
-  }
-
   HardClauseType getHardClauseType(const MachineInstr &MI) {
 if (MI.mayLoad() || (MI.mayStore() && ST->shouldClusterStores())) {
   if (ST->getGeneration() == AMDGPUSubtarget::GFX10) {
@@ -189,9 +182,7 @@ class SIInsertHardClauses : public MachineFunctionPass {
 return true;
   }
 
-  bool runOnMachineFunction(Machi

[llvm-branch-commits] [llvm] [AMDGPU][NPM] Cleanup AMDGPUPassRegistry.def (PR #130071)

2025-03-09 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm approved this pull request.


https://github.com/llvm/llvm-project/pull/130071
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port GCNCreateVOPD to NPM (PR #130059)

2025-03-09 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan edited 
https://github.com/llvm/llvm-project/pull/130059
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][NPM] Port FEntryInserter to NPM (PR #129857)

2025-03-09 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm approved this pull request.

test?

https://github.com/llvm/llvm-project/pull/129857
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port SIMemoryLegalizer to NPM (PR #130060)

2025-03-09 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan ready_for_review 
https://github.com/llvm/llvm-project/pull/130060
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Dynamic VGPR support for llvm.amdgcn.cs.chain (PR #130094)

2025-03-09 Thread Matt Arsenault via llvm-branch-commits


@@ -172,7 +215,12 @@ bool 
SILateBranchLowering::runOnMachineFunction(MachineFunction &MF) {
 
   case AMDGPU::SI_CS_CHAIN_TC_W32:
   case AMDGPU::SI_CS_CHAIN_TC_W64:
-expandChainCall(MI);
+expandChainCall(MI, ST, /*DynamicVGPR*/ false);

arsenm wrote:

```suggestion
expandChainCall(MI, ST, /*DynamicVGPR=*/ false);
```

https://github.com/llvm/llvm-project/pull/130094
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Dynamic VGPR support for llvm.amdgcn.cs.chain (PR #130094)

2025-03-09 Thread Matt Arsenault via llvm-branch-commits


@@ -116,14 +117,56 @@ static void splitBlock(MachineBasicBlock &MBB, 
MachineInstr &MI,
   MDT->applyUpdates(DTUpdates);
 }
 
-void SILateBranchLowering::expandChainCall(MachineInstr &MI) {
+static void addRegOrCopyOp(MachineInstrBuilder &MIB, MachineOperand &Op) {
+  if (Op.isReg())
+MIB.addReg(Op.getReg());
+  else
+MIB->addOperand(Op);
+}
+
+void SILateBranchLowering::expandChainCall(MachineInstr &MI,
+   const GCNSubtarget &ST,
+   bool DynamicVGPR) {
   // This is a tail call that needs to be expanded into at least
   // 2 instructions, one for setting EXEC and one for the actual tail call.
-  constexpr unsigned ExecIdx = 3;
+  unsigned ExecIdx =

arsenm wrote:

```suggestion
  int ExecIdx =
```

Implicit cast 

https://github.com/llvm/llvm-project/pull/130094
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Dynamic VGPR support for llvm.amdgcn.cs.chain (PR #130094)

2025-03-09 Thread Matt Arsenault via llvm-branch-commits


@@ -736,6 +742,26 @@ multiclass si_cs_chain_tc_patterns<
 defm : si_cs_chain_tc_patterns;
 defm : si_cs_chain_tc_patterns;
 
+// Match dynamic VGPR case. This is always indirect since we choose the callee
+// dynamically based on the result of the VGPR reallocation, so make sure to
+// drop the callee info if there is any.
+multiclass si_cs_chain_tc_dvgpr_patterns<
+  ValueType execvt, RegisterOperand execrc = getSOPSrcForVT.ret,
+  Instruction tc = SI_CS_CHAIN_TC_W32_DVGPR> {
+  let AddedComplexity = 90 in {

arsenm wrote:

Why does this need AddedComplexity? 

https://github.com/llvm/llvm-project/pull/130094
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Dynamic VGPR support for llvm.amdgcn.cs.chain (PR #130094)

2025-03-09 Thread Matt Arsenault via llvm-branch-commits


@@ -1200,34 +1225,78 @@ bool AMDGPUCallLowering::lowerTailCall(
   if (!IsSibCall)
 CallSeqStart = MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKUP);
 
-  unsigned Opc =
-  getCallOpcode(MF, Info.Callee.isReg(), true, ST.isWave32(), CalleeCC);
+  bool IsChainCall = AMDGPU::isChainCC(Info.CallConv);
+  bool IsDynamicVGPRChainCall = false;
+
+  if (IsChainCall) {
+ArgInfo FlagsArg = Info.OrigArgs[ChainCallArgIdx::Flags];
+const APInt &FlagsValue = 
cast(FlagsArg.OrigValue)->getValue();
+if (FlagsValue.isZero()) {
+  if (Info.OrigArgs.size() != 5) {
+LLVM_DEBUG(dbgs() << "No additional args allowed if flags == 0");

arsenm wrote:

```suggestion
LLVM_DEBUG(dbgs() << "No additional args allowed if flags == 0\n");
```

https://github.com/llvm/llvm-project/pull/130094
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Dynamic VGPR support for llvm.amdgcn.cs.chain (PR #130094)

2025-03-09 Thread Matt Arsenault via llvm-branch-commits


@@ -692,36 +692,42 @@ def : GCNPat<
   (SI_TCRETURN_GFX Gfx_CCR_SGPR_64:$src0, (i64 0), i32imm:$fpdiff)
 >;
 
-// Pseudo for the llvm.amdgcn.cs.chain intrinsic.
-// This is essentially a tail call, but it also takes a mask to put in EXEC
-// right before jumping to the callee.
-class SI_CS_CHAIN_TC<
+// Pseudos for the llvm.amdgcn.cs.chain intrinsic.
+multiclass SI_CS_CHAIN_TC<
 ValueType execvt, Predicate wavesizepred,
-RegisterOperand execrc = getSOPSrcForVT.ret>
-: SPseudoInstSI <(outs),
-  (ins CCR_SGPR_64:$src0, unknown:$callee, i32imm:$fpdiff, execrc:$exec)> {
-  let FixedSize = 0;
-  let isCall = 1;
-  let isTerminator = 1;
-  let isBarrier = 1;
-  let isReturn = 1;
-  let UseNamedOperandTable = 1;
-  let SchedRW = [WriteBranch];
-  let isConvergent = 1;
-
-  let WaveSizePredicate = wavesizepred;
-}
-
-def SI_CS_CHAIN_TC_W32 : SI_CS_CHAIN_TC;
-def SI_CS_CHAIN_TC_W64 : SI_CS_CHAIN_TC;
+RegisterOperand execrc = getSOPSrcForVT.ret> {
+  let FixedSize = 0,

arsenm wrote:

Bad indent 

https://github.com/llvm/llvm-project/pull/130094
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Dynamic VGPR support for llvm.amdgcn.cs.chain (PR #130094)

2025-03-09 Thread Matt Arsenault via llvm-branch-commits


@@ -0,0 +1,97 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 4
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 
-mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck 
-check-prefix=GISEL-GFX12 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 
-mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck 
-check-prefix=DAGISEL-GFX12 %s

arsenm wrote:

Don't need the -mattrs 

https://github.com/llvm/llvm-project/pull/130094
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Dynamic VGPR support for llvm.amdgcn.cs.chain (PR #130094)

2025-03-09 Thread Matt Arsenault via llvm-branch-commits


@@ -1200,34 +1225,78 @@ bool AMDGPUCallLowering::lowerTailCall(
   if (!IsSibCall)
 CallSeqStart = MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKUP);
 
-  unsigned Opc =
-  getCallOpcode(MF, Info.Callee.isReg(), true, ST.isWave32(), CalleeCC);
+  bool IsChainCall = AMDGPU::isChainCC(Info.CallConv);
+  bool IsDynamicVGPRChainCall = false;
+
+  if (IsChainCall) {
+ArgInfo FlagsArg = Info.OrigArgs[ChainCallArgIdx::Flags];
+const APInt &FlagsValue = 
cast(FlagsArg.OrigValue)->getValue();
+if (FlagsValue.isZero()) {
+  if (Info.OrigArgs.size() != 5) {
+LLVM_DEBUG(dbgs() << "No additional args allowed if flags == 0");
+return false;
+  }
+} else if (FlagsValue.isOneBitSet(0)) {
+  IsDynamicVGPRChainCall = true;
+
+  if (Info.OrigArgs.size() != 8) {
+LLVM_DEBUG(dbgs() << "Expected 3 additional args");
+return false;
+  }
+
+  // On GFX12, we can only change the VGPR allocation for wave32.
+  if (!ST.isWave32()) {
+LLVM_DEBUG(dbgs() << "Dynamic VGPR mode is only supported for wave32");
+return false;

arsenm wrote:

This seems like something that should be upgraded to a proper 
DiagnosticInfoUnsupported error. Here you're just going to hit the fallback to 
the DAG, which will then in turn need to error as well 

https://github.com/llvm/llvm-project/pull/130094
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Dynamic VGPR support for llvm.amdgcn.cs.chain (PR #130094)

2025-03-09 Thread Matt Arsenault via llvm-branch-commits


@@ -1200,34 +1225,78 @@ bool AMDGPUCallLowering::lowerTailCall(
   if (!IsSibCall)
 CallSeqStart = MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKUP);
 
-  unsigned Opc =
-  getCallOpcode(MF, Info.Callee.isReg(), true, ST.isWave32(), CalleeCC);
+  bool IsChainCall = AMDGPU::isChainCC(Info.CallConv);
+  bool IsDynamicVGPRChainCall = false;
+
+  if (IsChainCall) {
+ArgInfo FlagsArg = Info.OrigArgs[ChainCallArgIdx::Flags];
+const APInt &FlagsValue = 
cast(FlagsArg.OrigValue)->getValue();
+if (FlagsValue.isZero()) {
+  if (Info.OrigArgs.size() != 5) {
+LLVM_DEBUG(dbgs() << "No additional args allowed if flags == 0");
+return false;
+  }
+} else if (FlagsValue.isOneBitSet(0)) {
+  IsDynamicVGPRChainCall = true;
+
+  if (Info.OrigArgs.size() != 8) {
+LLVM_DEBUG(dbgs() << "Expected 3 additional args");
+return false;
+  }
+
+  // On GFX12, we can only change the VGPR allocation for wave32.
+  if (!ST.isWave32()) {
+LLVM_DEBUG(dbgs() << "Dynamic VGPR mode is only supported for wave32");

arsenm wrote:

```suggestion
LLVM_DEBUG(dbgs() << "Dynamic VGPR mode is only supported for 
wave32\n");
```

https://github.com/llvm/llvm-project/pull/130094
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port SILateBranchLowering to NPM (PR #130063)

2025-03-09 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan ready_for_review 
https://github.com/llvm/llvm-project/pull/130063
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port SIMemoryLegalizer to NPM (PR #130060)

2025-03-09 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Akshat Oke (optimisan)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/130060.diff


4 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPU.h (+8-1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def (+1-1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+3-2) 
- (modified) llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp (+33-10) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index f331f741e3993..4197a60e77014 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -364,6 +364,13 @@ class GCNCreateVOPDPass : public 
PassInfoMixin {
 MachineFunctionAnalysisManager &AM);
 };
 
+class SIMemoryLegalizerPass : public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+  static bool isRequired() { return true; }
+};
+
 FunctionPass *createAMDGPUAnnotateUniformValuesLegacy();
 
 ModulePass *createAMDGPUPrintfRuntimeBinding();
@@ -428,7 +435,7 @@ class SIAnnotateControlFlowPass
 void initializeSIAnnotateControlFlowLegacyPass(PassRegistry &);
 extern char &SIAnnotateControlFlowLegacyPassID;
 
-void initializeSIMemoryLegalizerPass(PassRegistry&);
+void initializeSIMemoryLegalizerLegacyPass(PassRegistry &);
 extern char &SIMemoryLegalizerID;
 
 void initializeSIModeRegisterLegacyPass(PassRegistry &);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def 
b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 0e3dcb4267ede..de959f8a2aa62 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -113,6 +113,7 @@ MACHINE_FUNCTION_PASS("si-load-store-opt", 
SILoadStoreOptimizerPass())
 MACHINE_FUNCTION_PASS("si-lower-control-flow", SILowerControlFlowPass())
 MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass())
 MACHINE_FUNCTION_PASS("si-lower-wwm-copies", SILowerWWMCopiesPass())
+MACHINE_FUNCTION_PASS("si-memory-legalizer", SIMemoryLegalizerPass())
 MACHINE_FUNCTION_PASS("si-mode-register", SIModeRegisterPass())
 MACHINE_FUNCTION_PASS("si-opt-vgpr-liverange", SIOptimizeVGPRLiveRangePass())
 MACHINE_FUNCTION_PASS("si-optimize-exec-masking", SIOptimizeExecMaskingPass())
@@ -132,7 +133,6 @@ DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", 
AMDGPUSetWavePriorityPas
 DUMMY_MACHINE_FUNCTION_PASS("si-insert-hard-clauses", 
SIInsertHardClausesPass())
 DUMMY_MACHINE_FUNCTION_PASS("si-insert-waitcnts", SIInsertWaitcntsPass())
 DUMMY_MACHINE_FUNCTION_PASS("si-late-branch-lowering", 
SILateBranchLoweringPass())
-DUMMY_MACHINE_FUNCTION_PASS("si-memory-legalizer", SIMemoryLegalizerPass())
 DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
 // TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
 // already exists.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 73ae9135eb319..dbe212ad0a216 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -541,7 +541,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void 
LLVMInitializeAMDGPUTarget() {
   initializeSILowerControlFlowLegacyPass(*PR);
   initializeSIPreEmitPeepholePass(*PR);
   initializeSILateBranchLoweringPass(*PR);
-  initializeSIMemoryLegalizerPass(*PR);
+  initializeSIMemoryLegalizerLegacyPass(*PR);
   initializeSIOptimizeExecMaskingLegacyPass(*PR);
   initializeSIPreAllocateWWMRegsLegacyPass(*PR);
   initializeSIFormMemoryClausesLegacyPass(*PR);
@@ -2151,7 +2151,8 @@ void 
AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const {
   if (isPassEnabled(EnableVOPD, CodeGenOptLevel::Less)) {
 addPass(GCNCreateVOPDPass());
   }
-  // TODO: addPass(SIMemoryLegalizerPass());
+
+  addPass(SIMemoryLegalizerPass());
   // TODO: addPass(SIInsertWaitcntsPass());
 
   // TODO: addPass(SIModeRegisterPass());
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp 
b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 34953f9c08db7..1375ba201ec58 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -21,8 +21,10 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/MemoryModelRelaxationAnnotations.h"
+#include "llvm/IR/PassManager.h"
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/TargetParser/TargetParser.h"
 
@@ -625,9 +627,9 @@ class SIGfx12CacheControl : public SIGfx11CacheControl {
   }
 };
 
-class SIMemoryLegalizer final : public MachineFunctionPass {
+class SIMemoryLegalizer final {
 private:
-
+  const MachineModuleInfo &MMI;
   /// Cache Control.
   std::unique_ptr CC = n

[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port SILateBranchLowering to NPM (PR #130063)

2025-03-09 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm approved this pull request.


https://github.com/llvm/llvm-project/pull/130063
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port SIInsertWaitcnts to NPM (PR #130061)

2025-03-09 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm approved this pull request.


https://github.com/llvm/llvm-project/pull/130061
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port AMDGPUSetWavePriority to NPM (PR #130064)

2025-03-09 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Akshat Oke (optimisan)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/130064.diff


5 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPU.h (+8-1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def (+1-1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUSetWavePriority.cpp (+32-12) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+2-3) 
- (modified) llvm/test/CodeGen/AMDGPU/set-wave-priority.ll (+5) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index d1dc62e9cc526..27ae6d42ec21d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -392,6 +392,13 @@ class SILateBranchLoweringPass
   static bool isRequired() { return true; }
 };
 
+class AMDGPUSetWavePriorityPass
+: public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+};
+
 FunctionPass *createAMDGPUAnnotateUniformValuesLegacy();
 
 ModulePass *createAMDGPUPrintfRuntimeBinding();
@@ -504,7 +511,7 @@ void initializeGCNPreRAOptimizationsLegacyPass(PassRegistry 
&);
 extern char &GCNPreRAOptimizationsID;
 
 FunctionPass *createAMDGPUSetWavePriorityPass();
-void initializeAMDGPUSetWavePriorityPass(PassRegistry &);
+void initializeAMDGPUSetWavePriorityLegacyPass(PassRegistry &);
 
 void initializeGCNRewritePartialRegUsesLegacyPass(llvm::PassRegistry &);
 extern char &GCNRewritePartialRegUsesID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def 
b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 318aad5590cda..4956897d22fde 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -100,6 +100,7 @@ MACHINE_FUNCTION_PASS("amdgpu-insert-delay-alu", 
AMDGPUInsertDelayAluPass())
 MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this))
 MACHINE_FUNCTION_PASS("amdgpu-pre-ra-long-branch-reg", 
GCNPreRALongBranchRegPass())
 MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", 
GCNRewritePartialRegUsesPass())
+MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass())
 MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", 
GCNPreRAOptimizationsPass())
 MACHINE_FUNCTION_PASS("amdgpu-nsa-reassign", GCNNSAReassignPass())
 MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
@@ -131,7 +132,6 @@ MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
 #define DUMMY_MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", 
GCNPreRAOptimizationsPass())
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", 
GCNRewritePartialRegUsesPass())
-DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", 
AMDGPUSetWavePriorityPass())
 
 DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
 // TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSetWavePriority.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUSetWavePriority.cpp
index c16d33f1453c0..29aecda82bc4b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSetWavePriority.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSetWavePriority.cpp
@@ -19,6 +19,7 @@
 #include "SIInstrInfo.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachinePassManager.h"
 
 using namespace llvm;
 
@@ -40,15 +41,11 @@ struct MBBInfo {
 
 using MBBInfoSet = DenseMap;
 
-class AMDGPUSetWavePriority : public MachineFunctionPass {
+class AMDGPUSetWavePriority {
 public:
   static char ID;
 
-  AMDGPUSetWavePriority() : MachineFunctionPass(ID) {}
-
-  StringRef getPassName() const override { return "Set wave priority"; }
-
-  bool runOnMachineFunction(MachineFunction &MF) override;
+  bool run(MachineFunction &MF);
 
 private:
   MachineInstr *BuildSetprioMI(MachineBasicBlock &MBB,
@@ -58,15 +55,30 @@ class AMDGPUSetWavePriority : public MachineFunctionPass {
   const SIInstrInfo *TII;
 };
 
+class AMDGPUSetWavePriorityLegacy : public MachineFunctionPass {
+public:
+  static char ID;
+
+  AMDGPUSetWavePriorityLegacy() : MachineFunctionPass(ID) {}
+
+  StringRef getPassName() const override { return "Set wave priority"; }
+
+  bool runOnMachineFunction(MachineFunction &MF) override {
+if (skipFunction(MF.getFunction()))
+  return false;
+return AMDGPUSetWavePriority().run(MF);
+  }
+};
+
 } // End anonymous namespace.
 
-INITIALIZE_PASS(AMDGPUSetWavePriority, DEBUG_TYPE, "Set wave priority", false,
-false)
+INITIALIZE_PASS(AMDGPUSetWavePriorityLegacy, DEBUG_TYPE, "Set wave priority",
+false, false)
 
-char AMDGPUSetWavePriority::ID = 0;
+char AMDGPUSetWavePriorityLegacy::ID = 0;
 
 FunctionPass *llvm::createAMDGPUSetWavePriorityPass() {
-  return new AMDGPUSetWavePriority();
+  return new AMDGPUSetWavePriorityLegacy();
 }
 
 MachineInstr *
@@ -96,12 +108

[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port AMDGPUSetWavePriority to NPM (PR #130064)

2025-03-09 Thread Akshat Oke via llvm-branch-commits


@@ -1,6 +1,11 @@
 ; RUN: llc -mtriple=amdgcn -amdgpu-set-wave-priority=true -o - %s | \
 ; RUN:   FileCheck %s
 
+; RUN: llc -mtriple=amdgcn -stop-after=si-late-branch-lowering -o - %s | \
+; RUN:   llc -x mir -mtriple=amdgcn -passes=amdgpu-set-wave-priority -o - | \
+; RUN:   llc -x mir -mtriple=amdgcn -start-after=si-late-branch-lowering -o - 
| \
+; RUN:   FileCheck %s

optimisan wrote:

such cut-pasting for tests is liable to get damaged if legacy pipeline changes 
but will be replaced very soon (once whole NPM pipeline is in place)

https://github.com/llvm/llvm-project/pull/130064
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port SIInsertWaitcnts to NPM (PR #130061)

2025-03-09 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/130061

>From 5f9af25793c1415d00ba0e7b75be937bac22e94d Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Thu, 6 Mar 2025 04:41:08 +
Subject: [PATCH] [AMDGPU][NPM] Port SIInsertWaitcnts to NPM

---
 llvm/lib/Target/AMDGPU/AMDGPU.h   |  9 +-
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def |  2 +-
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  4 +-
 llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp   | 91 +--
 llvm/test/CodeGen/AMDGPU/call-waw-waitcnt.mir |  1 +
 .../CodeGen/AMDGPU/insert-waitcnts-hang.mir   |  1 +
 .../AMDGPU/vccz-corrupt-bug-workaround.mir|  2 +
 7 files changed, 76 insertions(+), 34 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 4197a60e77014..4dcfaf9b12b5e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -371,6 +371,13 @@ class SIMemoryLegalizerPass : public 
PassInfoMixin {
   static bool isRequired() { return true; }
 };
 
+class SIInsertWaitcntsPass : public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+  static bool isRequired() { return true; }
+};
+
 FunctionPass *createAMDGPUAnnotateUniformValuesLegacy();
 
 ModulePass *createAMDGPUPrintfRuntimeBinding();
@@ -447,7 +454,7 @@ extern char &AMDGPUInsertDelayAluID;
 void initializeSIInsertHardClausesPass(PassRegistry &);
 extern char &SIInsertHardClausesID;
 
-void initializeSIInsertWaitcntsPass(PassRegistry&);
+void initializeSIInsertWaitcntsLegacyPass(PassRegistry &);
 extern char &SIInsertWaitcntsID;
 
 void initializeSIFormMemoryClausesLegacyPass(PassRegistry &);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def 
b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index de959f8a2aa62..c4641cba60e53 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -109,6 +109,7 @@ MACHINE_FUNCTION_PASS("si-fix-vgpr-copies", 
SIFixVGPRCopiesPass())
 MACHINE_FUNCTION_PASS("si-fold-operands", SIFoldOperandsPass());
 MACHINE_FUNCTION_PASS("si-form-memory-clauses", SIFormMemoryClausesPass())
 MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass())
+MACHINE_FUNCTION_PASS("si-insert-waitcnts", SIInsertWaitcntsPass())
 MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass())
 MACHINE_FUNCTION_PASS("si-lower-control-flow", SILowerControlFlowPass())
 MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass())
@@ -131,7 +132,6 @@ 
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartial
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", 
AMDGPUSetWavePriorityPass())
 
 DUMMY_MACHINE_FUNCTION_PASS("si-insert-hard-clauses", 
SIInsertHardClausesPass())
-DUMMY_MACHINE_FUNCTION_PASS("si-insert-waitcnts", SIInsertWaitcntsPass())
 DUMMY_MACHINE_FUNCTION_PASS("si-late-branch-lowering", 
SILateBranchLoweringPass())
 DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
 // TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index dbe212ad0a216..c3cc1dc6e495b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -535,7 +535,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void 
LLVMInitializeAMDGPUTarget() {
   initializeSIAnnotateControlFlowLegacyPass(*PR);
   initializeAMDGPUInsertDelayAluLegacyPass(*PR);
   initializeSIInsertHardClausesPass(*PR);
-  initializeSIInsertWaitcntsPass(*PR);
+  initializeSIInsertWaitcntsLegacyPass(*PR);
   initializeSIModeRegisterLegacyPass(*PR);
   initializeSIWholeQuadModeLegacyPass(*PR);
   initializeSILowerControlFlowLegacyPass(*PR);
@@ -2153,7 +2153,7 @@ void 
AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const {
   }
 
   addPass(SIMemoryLegalizerPass());
-  // TODO: addPass(SIInsertWaitcntsPass());
+  addPass(SIInsertWaitcntsPass());
 
   // TODO: addPass(SIModeRegisterPass());
 
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp 
b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index ee263f58bcaf2..8951a4144bd68 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -33,6 +33,7 @@
 #include "llvm/ADT/Sequence.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/CodeGen/MachinePostDominators.h"
 #include "llvm/Support/DebugCounter.h"
 #include "llvm/TargetParser/TargetParser.h"
@@ -594,7 +595,7 @@ class WaitcntGeneratorGFX12Plus : public WaitcntGenerator {
   AMDGPU::Waitcnt getAllZeroWaitcnt(bool IncludeVSCnt) const override;
 };
 
-class SIInsertWaitcnts : public MachineFunctionPass {
+class SIInsertWaitcnts {
 private:
   const GCNSubtarg

[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port SIPreEmitPeephole to NPM (PR #130065)

2025-03-09 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/130065

>From 586bcbc3f1ed6b935770957a4e9c3dacda9904a9 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Thu, 6 Mar 2025 06:20:13 +
Subject: [PATCH] [AMDGPU][NPM] Port SIPreEmitPeephole to NPM

---
 llvm/lib/Target/AMDGPU/AMDGPU.h   |  9 +-
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def |  2 +-
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  7 ++---
 llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp  | 31 ++-
 .../AMDGPU/insert-handle-flat-vmem-ds.mir |  1 +
 ...ort-exec-branches-special-instructions.mir |  1 +
 .../CodeGen/AMDGPU/set-gpr-idx-peephole.mir   |  1 +
 7 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 27ae6d42ec21d..b8f5d85ef0b9a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -210,7 +210,7 @@ extern char &SIWholeQuadModeID;
 void initializeSILowerControlFlowLegacyPass(PassRegistry &);
 extern char &SILowerControlFlowLegacyID;
 
-void initializeSIPreEmitPeepholePass(PassRegistry &);
+void initializeSIPreEmitPeepholeLegacyPass(PassRegistry &);
 extern char &SIPreEmitPeepholeID;
 
 void initializeSILateBranchLoweringLegacyPass(PassRegistry &);
@@ -392,6 +392,13 @@ class SILateBranchLoweringPass
   static bool isRequired() { return true; }
 };
 
+class SIPreEmitPeepholePass : public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+  static bool isRequired() { return true; }
+};
+
 class AMDGPUSetWavePriorityPass
 : public PassInfoMixin {
 public:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def 
b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 4956897d22fde..f14499d0d3146 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -125,6 +125,7 @@ MACHINE_FUNCTION_PASS("si-optimize-exec-masking-pre-ra", 
SIOptimizeExecMaskingPr
 MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
 MACHINE_FUNCTION_PASS("si-post-ra-bundler", SIPostRABundlerPass())
 MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass())
+MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
 MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass())
 MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
 #undef MACHINE_FUNCTION_PASS
@@ -133,7 +134,6 @@ MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", 
GCNPreRAOptimizationsPass())
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", 
GCNRewritePartialRegUsesPass())
 
-DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
 // TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
 // already exists.
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-preload-kern-arg-prolog", 
AMDGPUPreloadKernArgPrologPass())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 857af30b348cb..05eb609956199 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -539,7 +539,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void 
LLVMInitializeAMDGPUTarget() {
   initializeSIModeRegisterLegacyPass(*PR);
   initializeSIWholeQuadModeLegacyPass(*PR);
   initializeSILowerControlFlowLegacyPass(*PR);
-  initializeSIPreEmitPeepholePass(*PR);
+  initializeSIPreEmitPeepholeLegacyPass(*PR);
   initializeSILateBranchLoweringLegacyPass(*PR);
   initializeSIMemoryLegalizerLegacyPass(*PR);
   initializeSIOptimizeExecMaskingLegacyPass(*PR);
@@ -2166,9 +2166,8 @@ void 
AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const {
   if (isPassEnabled(EnableSetWavePriority, CodeGenOptLevel::Less))
 addPass(AMDGPUSetWavePriorityPass());
 
-  if (TM.getOptLevel() > CodeGenOptLevel::None) {
-// TODO: addPass(SIPreEmitPeepholePass());
-  }
+  if (TM.getOptLevel() > CodeGenOptLevel::None)
+addPass(SIPreEmitPeepholePass());
 
   // The hazard recognizer that runs as part of the post-ra scheduler does not
   // guarantee to be able handle all hazards correctly. This is because if 
there
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp 
b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 2bb70c138a50c..9db2118f2997b 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -24,7 +24,7 @@ using namespace llvm;
 
 namespace {
 
-class SIPreEmitPeephole : public MachineFunctionPass {
+class SIPreEmitPeephole {
 private:
   const SIInstrInfo *TII = nullptr;
   const SIRegisterInfo *TRI = nullptr;
@@ -40,24 +40,31 @@ class SIPreEmitPeephole : public MachineFunctionPass {
  const MachineBasicBlock &To) const;
   bool removeExeczBranch(Mach

[llvm-branch-commits] [llvm] [CodeGen][NPM] Port BranchRelaxation to NPM (PR #130067)

2025-03-09 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/130067

>From 02a9dbc736992683f7e1351467de1a9b07ea3a4f Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Thu, 6 Mar 2025 06:56:04 +
Subject: [PATCH] [CodeGen][NPM] Port BranchRelaxation to NPM

This completes the PreEmitPasses
---
 llvm/include/llvm/CodeGen/BranchRelaxation.h  | 25 +++
 llvm/include/llvm/InitializePasses.h  |  2 +-
 .../llvm/Passes/MachinePassRegistry.def   |  1 +
 llvm/lib/CodeGen/BranchRelaxation.cpp | 31 ++-
 llvm/lib/CodeGen/CodeGen.cpp  |  2 +-
 llvm/lib/Passes/PassBuilder.cpp   |  1 +
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  3 +-
 .../AArch64/branch-relax-block-size.mir   |  1 +
 .../AArch64/branch-relax-cross-section.mir|  2 ++
 .../AMDGPU/branch-relax-no-terminators.mir|  1 +
 10 files changed, 59 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/CodeGen/BranchRelaxation.h

diff --git a/llvm/include/llvm/CodeGen/BranchRelaxation.h 
b/llvm/include/llvm/CodeGen/BranchRelaxation.h
new file mode 100644
index 0..2007cf05b3aa1
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/BranchRelaxation.h
@@ -0,0 +1,25 @@
+//===- llvm/CodeGen/BranchRelaxation.h --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_CODEGEN_BRANCHRELAXATION_H
+#define LLVM_CODEGEN_BRANCHRELAXATION_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class BranchRelaxationPass : public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+  static bool isRequired() { return true; }
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_BRANCHRELAXATION_H
diff --git a/llvm/include/llvm/InitializePasses.h 
b/llvm/include/llvm/InitializePasses.h
index a3fd97ee99f3b..e5bffde815117 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -61,7 +61,7 @@ void initializeBasicAAWrapperPassPass(PassRegistry &);
 void initializeBlockFrequencyInfoWrapperPassPass(PassRegistry &);
 void initializeBranchFolderPassPass(PassRegistry &);
 void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry &);
-void initializeBranchRelaxationPass(PassRegistry &);
+void initializeBranchRelaxationLegacyPass(PassRegistry &);
 void initializeBreakCriticalEdgesPass(PassRegistry &);
 void initializeBreakFalseDepsPass(PassRegistry &);
 void initializeCanonicalizeFreezeInLoopsPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 285ad9601c6ff..9300f6935aa90 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -138,6 +138,7 @@ MACHINE_FUNCTION_ANALYSIS("virtregmap", 
VirtRegMapAnalysis())
 #define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
 #endif
 MACHINE_FUNCTION_PASS("block-placement-stats", 
MachineBlockPlacementStatsPass())
+MACHINE_FUNCTION_PASS("branch-relaxation", BranchRelaxationPass())
 MACHINE_FUNCTION_PASS("dead-mi-elimination", DeadMachineInstructionElimPass())
 MACHINE_FUNCTION_PASS("early-ifcvt", EarlyIfConverterPass())
 MACHINE_FUNCTION_PASS("early-machinelicm", EarlyMachineLICMPass())
diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp 
b/llvm/lib/CodeGen/BranchRelaxation.cpp
index a762aab43ddd2..134ca59808c27 100644
--- a/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -6,6 +6,7 @@
 //
 
//===--===//
 
+#include "llvm/CodeGen/BranchRelaxation.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LivePhysRegs.h"
@@ -44,7 +45,7 @@ STATISTIC(NumUnconditionalRelaxed, "Number of unconditional 
branches relaxed");
 
 namespace {
 
-class BranchRelaxation : public MachineFunctionPass {
+class BranchRelaxation {
   /// BasicBlockInfo - Information about the offset and size of a single
   /// basic block.
   struct BasicBlockInfo {
@@ -115,23 +116,31 @@ class BranchRelaxation : public MachineFunctionPass {
   void dumpBBs();
   void verify();
 
+public:
+  bool run(MachineFunction &MF);
+};
+
+class BranchRelaxationLegacy : public MachineFunctionPass {
 public:
   static char ID;
 
-  BranchRelaxation() : MachineFunctionPass(ID) {}
+  BranchRelaxationLegacy() : MachineFunctionPass(ID) {}
 
-  bool runOnMachineFunction(MachineFunction &MF) override;
+  bool runOnMachineFunction(MachineFunction &MF) override {
+return BranchRelaxation().run(MF);
+  }
 
   StringRef getPassName() const override { return BRANCH_R

  1   2   >