From 937841f25d7a7065b73327ce6fdd0ec3465dbfb3 Mon Sep 17 00:00:00 2001
From: Kugan Vivekanandarajah <kvivekananda@nvidia.com>
Date: Thu, 13 Nov 2025 17:15:48 -0800
Subject: [PATCH 4/4] [Autofdo] Add hierarchical discriminator for loop
 unrolling

Add hierarchical discriminator support for loop unrolling.
Assigns multiplicity and copyid discriminators to distinguish unrolled
iterations.

gcc/ChangeLog:

	* cfgloopmanip.cc (duplicate_loop_body_to_header_edge): Assign
	hierarchical discriminators for loop unrolling.
	* cfgloopmanip.h (DLTHE_RECORD_HIERARCHICAL_DISCRIMINATOR): New flag.
	* tree-ssa-loop-ivcanon.cc (try_unroll_loop_completely): Pass flag
	to enable hierarchical discriminator assignment.
	(try_peel_loop): Likewise.

gcc/testsuite/ChangeLog:

	* gcc.dg/hierarchical-discriminator-unroll.c: New test.

Signed-off-by: Kugan Vivekanandarajah <kvivekananda@nvidia.com>
---
 gcc/cfgloopmanip.cc                           | 51 +++++++++++++++++++
 gcc/cfgloopmanip.h                            |  4 ++
 .../hierarchical-discriminator-unroll.c       | 33 ++++++++++++
 gcc/tree-ssa-loop-ivcanon.cc                  |  7 ++-
 4 files changed, 93 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/hierarchical-discriminator-unroll.c

diff --git a/gcc/cfgloopmanip.cc b/gcc/cfgloopmanip.cc
index dda2fb661d8..7cf5aea5a0f 100644
--- a/gcc/cfgloopmanip.cc
+++ b/gcc/cfgloopmanip.cc
@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "sreal.h"
 #include "tree-cfg.h"
 #include "tree-pass.h"
+#include "hierarchical_discriminator.h"
 
 static void copy_loops_to (class loop **, int,
 			   class loop *);
@@ -1422,6 +1423,56 @@ duplicate_loop_body_to_header_edge (class loop *loop, edge e,
 	    new_bbs[i]->aux = (void *)(size_t)(j + 1);
 	  }
 
+      /* Assign hierarchical discriminators to distinguish loop iterations.  */
+      if (flags & DLTHE_RECORD_HIERARCHICAL_DISCRIMINATOR)
+	{
+	  /* Only handle GIMPLE mode for now.  */
+	  if (current_ir_type () == IR_GIMPLE)
+	    {
+	      /* For loop unrolling:
+		 - multiplicity = unroll factor (ndupl for complete unrolling)
+		 - copyid = keep original (don't change)
+		 All unrolled iterations share the same copyid but
+		 have multiplicity set.  */
+	      unsigned int unroll_factor = ndupl;
+
+	      for (i = 0; i < n; i++)
+		{
+		  for (gimple_stmt_iterator gsi = gsi_start_bb (new_bbs[i]);
+		       !gsi_end_p (gsi); gsi_next (&gsi))
+		    {
+		      gimple *stmt = gsi_stmt (gsi);
+		      location_t loc = gimple_location (stmt);
+
+		      if (loc == UNKNOWN_LOCATION || !is_gimple_debug (stmt))
+			continue;
+		      unsigned int base, old_multiplicity, old_copyid;
+		      get_discriminator_components_from_loc (loc, &base,
+							     &old_multiplicity,
+							     &old_copyid);
+
+		      /* Multiply existing multiplicity by unroll factor.
+			 Preserve the original copyid.  */
+		      unsigned int multiplicity = (old_multiplicity == 0)
+			? unroll_factor
+			: old_multiplicity * unroll_factor;
+		      if (multiplicity > DISCR_MULTIPLICITY_MAX)
+			multiplicity = DISCR_MULTIPLICITY_MAX;
+
+		      /* Preserve original copyid.  */
+		      unsigned int copyid = old_copyid;
+
+		      location_t new_loc
+			= location_with_discriminator_components (loc,
+								  base,
+								  multiplicity,
+								  copyid);
+		      gimple_set_location (stmt, new_loc);
+		    }
+		}
+	    }
+	}
+
       /* Note whether the blocks and edges belong to an irreducible loop.  */
       if (add_irreducible_flag)
 	{
diff --git a/gcc/cfgloopmanip.h b/gcc/cfgloopmanip.h
index 42def2fe40d..d3d1a73bdea 100644
--- a/gcc/cfgloopmanip.h
+++ b/gcc/cfgloopmanip.h
@@ -34,6 +34,10 @@ enum
 					   a complete peeling.  */
 #define DLTHE_FLAG_FLAT_PROFILE 8	/* Profile is flat; do not reduce
 					   count by unroll factor.  */
+#define DLTHE_RECORD_HIERARCHICAL_DISCRIMINATOR 16 /* Assign hierarchical
+						      discriminators to
+						      distinguish loop
+						      iterations.  */
 extern edge mfb_kj_edge;
 
 extern bool remove_path (edge, bool * = NULL, bitmap = NULL);
diff --git a/gcc/testsuite/gcc.dg/hierarchical-discriminator-unroll.c b/gcc/testsuite/gcc.dg/hierarchical-discriminator-unroll.c
new file mode 100644
index 00000000000..386f4cb442a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/hierarchical-discriminator-unroll.c
@@ -0,0 +1,33 @@
+/* Test hierarchical discriminators for loop unrolling.
+   This test verifies that loop unrolling sets the multiplicity field to
+   indicate the unroll factor, while preserving the original copyid.
+   
+   { dg-do compile }
+   { dg-options "-S -O2 -g  -fno-tree-vectorize" } */
+
+int a[100];
+int
+test_unroll (void)
+{
+  int sum = 0;
+  int i;
+  
+  /* Small fixed-count loop that should be completely unrolled */
+  #pragma GCC unroll 4
+  for (i = 0; i < 4; i++)
+    {
+      asm ("nop");
+      sum += a[i] * 2; 
+    }
+  
+  return sum;
+}
+
+/* Hierarchical discriminator format: [Base:8][Multiplicity:7][CopyID:11][Unused:6]
+   
+   Expected discriminator for unrolled loop:
+   - Base=0
+   - Multiplicity=4 (unroll factor)
+   - CopyID=0
+   - Calculation: base=0 | (4 << 8) | (0 << 15) = 1024.  */
+/* { dg-final { scan-assembler-times "\\.loc 1 19 7 is_stmt 0 discriminator 1024" 4 } } */
diff --git a/gcc/tree-ssa-loop-ivcanon.cc b/gcc/tree-ssa-loop-ivcanon.cc
index ca6295c7de2..fe774454bf5 100644
--- a/gcc/tree-ssa-loop-ivcanon.cc
+++ b/gcc/tree-ssa-loop-ivcanon.cc
@@ -65,6 +65,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-ssa-sccvn.h"
 #include "tree-vectorizer.h" /* For find_loop_location */
 #include "dbgcnt.h"
+#include "hierarchical_discriminator.h"
 
 /* Specifies types of loops that may be unrolled.  */
 
@@ -980,7 +981,8 @@ try_unroll_loop_completely (class loop *loop,
       if (!gimple_duplicate_loop_body_to_header_edge (
 	    loop, loop_preheader_edge (loop), n_unroll, wont_exit, exit,
 	    &edges_to_remove,
-	    DLTHE_FLAG_UPDATE_FREQ | DLTHE_FLAG_COMPLETTE_PEEL))
+	    DLTHE_FLAG_UPDATE_FREQ | DLTHE_FLAG_COMPLETTE_PEEL
+	    | DLTHE_RECORD_HIERARCHICAL_DISCRIMINATOR))
 	{
           free_original_copy_tables ();
 	  if (dump_file && (dump_flags & TDF_DETAILS))
@@ -1222,7 +1224,8 @@ try_peel_loop (class loop *loop,
 
   if (!gimple_duplicate_loop_body_to_header_edge (
 	loop, loop_preheader_edge (loop), npeel, wont_exit, exit,
-	&edges_to_remove, DLTHE_FLAG_UPDATE_FREQ))
+	&edges_to_remove,
+	DLTHE_FLAG_UPDATE_FREQ | DLTHE_RECORD_HIERARCHICAL_DISCRIMINATOR))
     {
       free_original_copy_tables ();
       return false;
-- 
2.34.1

