From 50bee4d9aa1bf14c9d6420af763fc309d3d99d8c Mon Sep 17 00:00:00 2001
From: Kugan Vivekanandarajah <kvivekananda@nvidia.com>
Date: Sun, 7 Dec 2025 13:45:39 -0800
Subject: [PATCH 4/4] [Autofdo][V2] Add hierarchical discriminator for loop
 unrolling

Add hierarchical discriminator support for loop unrolling.
Assigns multiplicity and copyid discriminators to distinguish unrolled
iterations.

gcc/ChangeLog:

	* cfgloopmanip.cc (duplicate_loop_body_to_header_edge): Assign
	hierarchical discriminators for loop unrolling.
	* cfgloopmanip.h (DLTHE_RECORD_HIERARCHICAL_DISCRIMINATOR): New flag.
	* tree-ssa-loop-ivcanon.cc (try_unroll_loop_completely): Pass flag
	to enable hierarchical discriminator assignment.
	(try_peel_loop): Likewise.

gcc/testsuite/ChangeLog:

	* gcc.dg/hierarchical-discriminator-unroll.c: New test.

Signed-off-by: Kugan Vivekanandarajah <kvivekananda@nvidia.com>
---
 gcc/cfgloopmanip.cc                           | 25 ++++++++++++
 gcc/cfgloopmanip.h                            |  4 ++
 .../hierarchical-discriminator-unroll.c       | 39 +++++++++++++++++++
 gcc/tree-ssa-loop-ivcanon.cc                  |  7 +++-
 4 files changed, 73 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/hierarchical-discriminator-unroll.c

diff --git a/gcc/cfgloopmanip.cc b/gcc/cfgloopmanip.cc
index dda2fb661d8..78bfaea2e7c 100644
--- a/gcc/cfgloopmanip.cc
+++ b/gcc/cfgloopmanip.cc
@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "sreal.h"
 #include "tree-cfg.h"
 #include "tree-pass.h"
+#include "hierarchical_discriminator.h"
 
 static void copy_loops_to (class loop **, int,
 			   class loop *);
@@ -1422,6 +1423,30 @@ duplicate_loop_body_to_header_edge (class loop *loop, edge e,
 	    new_bbs[i]->aux = (void *)(size_t)(j + 1);
 	  }
 
+      /* Assign hierarchical discriminators to distinguish loop iterations.  */
+      if (flags & DLTHE_RECORD_HIERARCHICAL_DISCRIMINATOR)
+	{
+	  /* Only handle GIMPLE mode for now.  */
+	  if (current_ir_type () == IR_GIMPLE)
+	    {
+	      /* For loop unrolling, each unrolled iteration is a distinct copy
+		 of the code and should get a unique copyid:
+		 - copyid = DISCRIMINATOR_LOOP_UNROLL_BASE + iteration_number
+		 - multiplicity = preserved from vectorization (if present)
+		 
+		 The iteration number is 'j' (0 to ndupl-1).  */
+
+	      /* Calculate copyid for this iteration.  */
+	      unsigned int copyid = DISCRIMINATOR_LOOP_UNROLL_BASE + j;
+	      if (copyid > DISCR_COPYID_MAX)
+		copyid = DISCR_COPYID_MAX;
+
+	      /* Update all basic blocks created in this iteration.  */
+	      for (i = 0; i < n; i++)
+		assign_discriminators_to_bb (new_bbs[i], 0, copyid);
+	    }
+	}
+
       /* Note whether the blocks and edges belong to an irreducible loop.  */
       if (add_irreducible_flag)
 	{
diff --git a/gcc/cfgloopmanip.h b/gcc/cfgloopmanip.h
index 42def2fe40d..d3d1a73bdea 100644
--- a/gcc/cfgloopmanip.h
+++ b/gcc/cfgloopmanip.h
@@ -34,6 +34,10 @@ enum
 					   a complete peeling.  */
 #define DLTHE_FLAG_FLAT_PROFILE 8	/* Profile is flat; do not reduce
 					   count by unroll factor.  */
+#define DLTHE_RECORD_HIERARCHICAL_DISCRIMINATOR 16 /* Assign hierarchical
+						      discriminators to
+						      distinguish loop
+						      iterations.  */
 extern edge mfb_kj_edge;
 
 extern bool remove_path (edge, bool * = NULL, bitmap = NULL);
diff --git a/gcc/testsuite/gcc.dg/hierarchical-discriminator-unroll.c b/gcc/testsuite/gcc.dg/hierarchical-discriminator-unroll.c
new file mode 100644
index 00000000000..5b7b2557d19
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/hierarchical-discriminator-unroll.c
@@ -0,0 +1,39 @@
+/* Test hierarchical discriminators for loop unrolling.
+   This test verifies that loop unrolling sets the multiplicity field to
+   indicate the unroll factor, while preserving the original copyid.
+   
+   { dg-do compile }
+   { dg-options "-S -O2 -g  -fno-tree-vectorize" } */
+
+int a[100];
+int
+test_unroll (void)
+{
+  int sum = 0;
+  int i;
+  
+  /* Small fixed-count loop that should be completely unrolled */
+  #pragma GCC unroll 4
+  for (i = 0; i < 4; i++)
+    {
+      asm ("nop");
+      sum += a[i] * 2; 
+    }
+  
+  return sum;
+}
+
+/* Hierarchical discriminator format: [Base:8][Multiplicity:7][CopyID:11][Unused:6]
+   
+   Expected discriminators for 4x unrolled loop (each iteration gets distinct copyid):
+   - Iteration 0: base=0, mult=0, copyid=100 → 0|(0<<8)|(100<<15) = 3276800
+   - Iteration 1: base=0, mult=0, copyid=101 → 0|(0<<8)|(101<<15) = 3309568
+   - Iteration 2: base=0, mult=0, copyid=102 → 0|(0<<8)|(102<<15) = 3342336
+   - Iteration 3: base=0, mult=0, copyid=103 → 0|(0<<8)|(103<<15) = 3375104
+   
+   Note: Loop unrolling creates distinct code copies, so each iteration uses
+   a different copyid rather than multiplicity.  */
+/* { dg-final { scan-assembler "\\.loc 1 19 7 is_stmt 0 discriminator 3276800" } } */
+/* { dg-final { scan-assembler "\\.loc 1 19 7 is_stmt 0 discriminator 3309568" } } */
+/* { dg-final { scan-assembler "\\.loc 1 19 7 is_stmt 0 discriminator 3342336" } } */
+/* { dg-final { scan-assembler "\\.loc 1 19 7 is_stmt 0 discriminator 3375104" } } */
diff --git a/gcc/tree-ssa-loop-ivcanon.cc b/gcc/tree-ssa-loop-ivcanon.cc
index ca6295c7de2..fe774454bf5 100644
--- a/gcc/tree-ssa-loop-ivcanon.cc
+++ b/gcc/tree-ssa-loop-ivcanon.cc
@@ -65,6 +65,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-ssa-sccvn.h"
 #include "tree-vectorizer.h" /* For find_loop_location */
 #include "dbgcnt.h"
+#include "hierarchical_discriminator.h"
 
 /* Specifies types of loops that may be unrolled.  */
 
@@ -980,7 +981,8 @@ try_unroll_loop_completely (class loop *loop,
       if (!gimple_duplicate_loop_body_to_header_edge (
 	    loop, loop_preheader_edge (loop), n_unroll, wont_exit, exit,
 	    &edges_to_remove,
-	    DLTHE_FLAG_UPDATE_FREQ | DLTHE_FLAG_COMPLETTE_PEEL))
+	    DLTHE_FLAG_UPDATE_FREQ | DLTHE_FLAG_COMPLETTE_PEEL
+	    | DLTHE_RECORD_HIERARCHICAL_DISCRIMINATOR))
 	{
           free_original_copy_tables ();
 	  if (dump_file && (dump_flags & TDF_DETAILS))
@@ -1222,7 +1224,8 @@ try_peel_loop (class loop *loop,
 
   if (!gimple_duplicate_loop_body_to_header_edge (
 	loop, loop_preheader_edge (loop), npeel, wont_exit, exit,
-	&edges_to_remove, DLTHE_FLAG_UPDATE_FREQ))
+	&edges_to_remove,
+	DLTHE_FLAG_UPDATE_FREQ | DLTHE_RECORD_HIERARCHICAL_DISCRIMINATOR))
     {
       free_original_copy_tables ();
       return false;
-- 
2.34.1

