From da89cf1f20c481f0c5e439614aacbb9cce511a56 Mon Sep 17 00:00:00 2001
From: Kugan Vivekanandarajah <kvivekananda@nvidia.com>
Date: Tue, 11 Nov 2025 18:11:18 -0800
Subject: [PATCH 2/4] [AutoFDO] Add hierarchical discriminator for vectorizer

Add hierarchical discriminator support for vectorized loop versioning.
Assigns copyid discriminators to distinguish vectorized and scalar versions.

gcc/ChangeLog:

	* tree-vect-loop-manip.cc (vect_loop_versioning): Assign copyid
	discriminators for vectorized and scalar loop versions.
	* tree-vect-loop.cc (vect_transform_loop): Assign copyid
        discriminators for vectorized and scalar loop versions.

gcc/testsuite/ChangeLog:

	* gcc.dg/hierarchical-discriminator-vect-version.c: New test.

Signed-off-by: Kugan Vivekanandarajah <kvivekananda@nvidia.com>
---
 .../hierarchical-discriminator-vect-version.c | 44 +++++++++++++++++++
 gcc/tree-vect-loop-manip.cc                   | 17 +++++++
 gcc/tree-vect-loop.cc                         | 12 +++++
 3 files changed, 73 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/hierarchical-discriminator-vect-version.c

diff --git a/gcc/testsuite/gcc.dg/hierarchical-discriminator-vect-version.c b/gcc/testsuite/gcc.dg/hierarchical-discriminator-vect-version.c
new file mode 100644
index 00000000000..542e1cbe531
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/hierarchical-discriminator-vect-version.c
@@ -0,0 +1,44 @@
+/* Test hierarchical discriminators for vectorized loops.
+   This test verifies that the vectorizer assigns unique discriminators to
+   distinguish the main vectorized loop from epilog loops in AutoFDO profiles.
+   
+   Expected compilation:
+   - Main loop vectorized with VF=4
+   - Epilog loop for remainder iterations VF=2,
+   
+   { dg-do compile }
+   { dg-options "-O3 -g -ftree-vectorize " }
+   { dg-require-effective-target vect_int }  */
+
+void
+test_vectorize (int *restrict a, int *restrict b, int *restrict c, int n)
+{
+    int i;
+    for (i = 0; i < n; i++)
+      {
+	a[i] = b[i] + c[i];
+      }
+}
+
+/* Hierarchical discriminator format: [Base:8][Multiplicity:7][CopyID:11][Unused:6]
+   
+   Discriminators encode:
+   - Base (bits 0-7): Front-end discriminator for same-line statements
+   - Multiplicity (bits 8-14): Unroll/vectorization factor
+   - CopyID (bits 15-25): Loop variant identifier
+   
+   Expected loop variants:
+   
+   1. Main vectorized loop:
+      - CopyID=1 (DISCRIMINATOR_LOOP_VERSION_VECTORIZED)
+      - Multiplicity=4 (vectorization factor)
+      - Calculation: base=0 | (4 << 8) | (1 << 15) = 33792
+   
+   2. Eplog loop:
+      - CopyID=7 (DISCRIMINATOR_LOOP_EPILOG)
+      - Multiplicity=2 (unroll factor for epilog)
+      - Calculation: base=0 | (2 << 8) | (7 << 15) = 229888.  */
+   
+
+/* { dg-final { scan-assembler "\\.loc 1 19 10 is_stmt 0 discriminator 33792" } } */
+/* { dg-final { scan-assembler "\\.loc 1 19 10 is_stmt 0 discriminator 229888" } } */
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 20141dbc2e5..893838f6d7e 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -53,6 +53,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "langhooks.h"
 #include "tree-vector-builder.h"
 #include "optabs-tree.h"
+#include "hierarchical_discriminator.h"
 
 /*************************************************************************
   Simple Loop Peeling Utilities
@@ -3357,6 +3358,10 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
       gcc_assert (prolog);
       prolog->force_vectorize = false;
 
+      /* Assign hierarchical discriminators to distinguish prolog loop.
+	 Prolog loops are for alignment and are not vectorized.  */
+      assign_discriminators_to_loop (prolog, 0, DISCRIMINATOR_LOOP_PROLOG);
+
       first_loop = prolog;
       reset_original_copy_tables ();
 
@@ -3462,6 +3467,12 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
       epilog->force_vectorize = false;
       bb_before_epilog = loop_preheader_edge (epilog)->src;
 
+      /* Assign hierarchical discriminators to distinguish epilog loop.
+	 Only assign if it's a scalar epilog.  If it will be vectorized
+	 (vect_epilogues), discriminators will be assigned.  */
+      if (!vect_epilogues)
+	assign_discriminators_to_loop (epilog, 0, DISCRIMINATOR_LOOP_EPILOG);
+
       /* Scalar version loop may be preferred.  In this case, add guard
 	 and skip to epilog.  Note this only happens when the number of
 	 iterations of loop is unknown at compile time, otherwise this
@@ -4359,6 +4370,12 @@ vect_loop_versioning (loop_vec_info loop_vinfo,
       gcc_assert (nloop);
       nloop = get_loop_copy (loop);
 
+      /* Assign hierarchical discriminators to distinguish loop versions.
+	 Only assign to the scalar version here; the vectorized version will
+	 get discriminators later during transformation/peeling.  */
+      assign_discriminators_to_loop (nloop, 0,
+				     DISCRIMINATOR_LOOP_VERSION_SCALAR);
+
       /* For cycle vectorization with SLP we rely on the PHI arguments
 	 appearing in the same order as the SLP node operands which for the
 	 loop PHI nodes means the preheader edge dest index needs to remain
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 9320bf8e878..b3970518db9 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -59,6 +59,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "case-cfn-macros.h"
 #include "langhooks.h"
 #include "opts.h"
+#include "hierarchical_discriminator.h"
 
 /* Loop Vectorization Pass.
 
@@ -11101,6 +11102,17 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
 			      &step_vector, &niters_vector_mult_vf, th,
 			      check_profitability, niters_no_overflow,
 			      &advance);
+
+  /* Assign hierarchical discriminators to the vectorized loop.  */
+  poly_uint64 vf_val = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+  unsigned int vf_int = vf_val.to_constant ();
+  if (vf_int > DISCR_MULTIPLICITY_MAX)
+    vf_int = DISCR_MULTIPLICITY_MAX;
+  unsigned int copyid = LOOP_VINFO_EPILOGUE_P (loop_vinfo)
+			? DISCRIMINATOR_LOOP_EPILOG_VECTORIZED
+			: DISCRIMINATOR_LOOP_VERSION_VECTORIZED;
+  assign_discriminators_to_loop (loop, vf_int, copyid);
+
   if (LOOP_VINFO_SCALAR_LOOP (loop_vinfo)
       && LOOP_VINFO_SCALAR_LOOP_SCALING (loop_vinfo).initialized_p ())
     {
-- 
2.34.1

