On 1/21/21 8:16 PM, Jan Hubicka wrote:
On 1/21/21 8:03 PM, Jan Hubicka wrote:
What exactly is suggested?

This one.

Martin

 From 22bbf5342f2b73fad6c0286541bba6699c617380 Mon Sep 17 00:00:00 2001
From: Martin Liska <mli...@suse.cz>
Date: Thu, 21 Jan 2021 09:02:31 +0100
Subject: [PATCH 1/2] Restore -fprofile-reproducibility flag.

gcc/ChangeLog:

        PR gcov-profile/98739
        * common.opt: Add missing equal symbol.
        * value-prof.c (get_nth_most_common_value): Fix comment
        and skip TOP N counter properly when -fprofile-reproducibility
        is not serial.
---
  gcc/common.opt   |  2 +-
  gcc/value-prof.c | 18 ++++++++----------
  2 files changed, 9 insertions(+), 11 deletions(-)

  bool
  get_nth_most_common_value (gimple *stmt, const char *counter_type,
@@ -765,15 +762,16 @@ get_nth_most_common_value (gimple *stmt, const char 
*counter_type,
    *count = 0;
    *value = 0;
- gcov_type read_all = abs_hwi (hist->hvalue.counters[0]);
+  gcov_type read_all = hist->hvalue.counters[0];
+  gcov_type covered = 0;
+  for (unsigned i = 0; i < counters; ++i)
+    covered += hist->hvalue.counters[2 * i + 3];
gcov_type v = hist->hvalue.counters[2 * n + 2];
    gcov_type c = hist->hvalue.counters[2 * n + 3];
- if (hist->hvalue.counters[0] < 0
-      && (flag_profile_reproducible == PROFILE_REPRODUCIBILITY_PARALLEL_RUNS
-         || (flag_profile_reproducible
-             == PROFILE_REPRODUCIBILITY_MULTITHREADED)))
+  if (read_all != covered
+      && flag_profile_reproducible != PROFILE_REPRODUCIBILITY_SERIAL)

This should be right for REPRODUCIBILITY_MULTITHREADED but is too strict
for PARALLEL_RUNS (and I think we now have data that this difference
matters).  If you
  1) re-add logic that avoids stremaing targets with no more than 1/32 of
  overall execution counts from each run
  (we may want to have way to tweak the threshold, but I guess we may
  want to first see if it is necessary since it is easy to add and we
  already have bit too many environment variables)

Done that.

  2) re-add logic tracking if any values was lost during merging
  using the sign of first counter

Likewise this.

  3) make PARALLEL_RUNS to disregard the profile if the first counter is
  negetaive

Likewise.

We sould be able to track most of cases where number of values exceeds
32 but there is one or two really dominating.

Also I think it makes sense to default to =serial and use the new flag
in the few packages where we do profile feedback and care about
reproducibility.

Can you please review the attached patch.

I've PGO bootstrapped GCC and it looks fine.
Thanks,
Martin


Thanks a lot for looking into this!
Honza
      return false;
/* Indirect calls can't be verified. */
--
2.30.0



>From aef618fd7c340e0ff9b1c5fc383e5bb2a301e9ba Mon Sep 17 00:00:00 2001
From: Martin Liska <mli...@suse.cz>
Date: Fri, 22 Jan 2021 11:27:16 +0100
Subject: [PATCH] Restore profile reproducibility.

ChangeLog:

	PR gcov-profile/98739
	* Makefile.in: Add -fprofile-reproducible=parallel-runs for GCC
	bootstrap.

gcc/ChangeLog:

	PR gcov-profile/98739
	* common.opt: Add missing sign symbol.
	* value-prof.c (get_nth_most_common_value): Restore handling
	of PROFILE_REPRODUCIBILITY_PARALLEL_RUNS and
	PROFILE_REPRODUCIBILITY_MULTITHREADED.

libgcc/ChangeLog:

	PR gcov-profile/98739
	* libgcov-driver.c (prune_topn_counter): New.
	(prune_counters): New.
	(dump_one_gcov): Prune counters.
	* libgcov-merge.c (__gcov_merge_topn): Mark when merging
	ends with a dropped counter.
	* libgcov.h (gcov_topn_add_value): Add return value.
---
 Makefile.in             |  2 +-
 gcc/common.opt          |  2 +-
 gcc/value-prof.c        | 14 ++++++----
 libgcc/libgcov-driver.c | 60 +++++++++++++++++++++++++++++++++++++++++
 libgcc/libgcov-merge.c  | 11 +++++---
 libgcc/libgcov.h        | 13 ++++++---
 6 files changed, 88 insertions(+), 14 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index 247cb9c8711..03785200dc7 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -565,7 +565,7 @@ STAGEprofile_TFLAGS = $(STAGE2_TFLAGS)
 STAGEtrain_CFLAGS = $(filter-out -fchecking=1,$(STAGE3_CFLAGS))
 STAGEtrain_TFLAGS = $(filter-out -fchecking=1,$(STAGE3_TFLAGS))
 
-STAGEfeedback_CFLAGS = $(STAGE4_CFLAGS) -fprofile-use
+STAGEfeedback_CFLAGS = $(STAGE4_CFLAGS) -fprofile-use -fprofile-reproducible=parallel-runs
 STAGEfeedback_TFLAGS = $(STAGE4_TFLAGS)
 
 STAGEautoprofile_CFLAGS = $(STAGE2_CFLAGS) -g
diff --git a/gcc/common.opt b/gcc/common.opt
index bde1711870d..a8a2b67a99d 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2248,7 +2248,7 @@ Enum(profile_reproducibility) String(parallel-runs) Value(PROFILE_REPRODUCIBILIT
 EnumValue
 Enum(profile_reproducibility) String(multithreaded) Value(PROFILE_REPRODUCIBILITY_MULTITHREADED)
 
-fprofile-reproducible
+fprofile-reproducible=
 Common Joined RejectNegative Var(flag_profile_reproducible) Enum(profile_reproducibility) Init(PROFILE_REPRODUCIBILITY_SERIAL)
 -fprofile-reproducible=[serial|parallel-runs|multithreaded]	Control level of reproducibility of profile gathered by -fprofile-generate.
 
diff --git a/gcc/value-prof.c b/gcc/value-prof.c
index 4c916f4994f..8078a9569d7 100644
--- a/gcc/value-prof.c
+++ b/gcc/value-prof.c
@@ -747,8 +747,8 @@ gimple_divmod_fixed_value (gassign *stmt, tree value, profile_probability prob,
 
    abs (counters[0]) is the number of executions
    for i in 0 ... TOPN-1
-     counters[2 * i + 1] is target
-     abs (counters[2 * i + 2]) is corresponding hitrate counter.
+     counters[2 * i + 2] is target
+     counters[2 * i + 3] is corresponding hitrate counter.
 
    Value of counters[0] negative when counter became
    full during merging and some values are lost.  */
@@ -766,14 +766,18 @@ get_nth_most_common_value (gimple *stmt, const char *counter_type,
   *value = 0;
 
   gcov_type read_all = abs_hwi (hist->hvalue.counters[0]);
+  gcov_type covered = 0;
+  for (unsigned i = 0; i < counters; ++i)
+    covered += hist->hvalue.counters[2 * i + 3];
 
   gcov_type v = hist->hvalue.counters[2 * n + 2];
   gcov_type c = hist->hvalue.counters[2 * n + 3];
 
   if (hist->hvalue.counters[0] < 0
-      && (flag_profile_reproducible == PROFILE_REPRODUCIBILITY_PARALLEL_RUNS
-	  || (flag_profile_reproducible
-	      == PROFILE_REPRODUCIBILITY_MULTITHREADED)))
+      && flag_profile_reproducible == PROFILE_REPRODUCIBILITY_PARALLEL_RUNS)
+    return false;
+  else if (covered != read_all
+	   && flag_profile_reproducible == PROFILE_REPRODUCIBILITY_MULTITHREADED)
     return false;
 
   /* Indirect calls can't be verified.  */
diff --git a/libgcc/libgcov-driver.c b/libgcc/libgcov-driver.c
index e474e032b54..55df1bafa79 100644
--- a/libgcc/libgcov-driver.c
+++ b/libgcc/libgcov-driver.c
@@ -213,6 +213,63 @@ static struct gcov_fn_buffer *fn_buffer;
 /* Including system dependent components. */
 #include "libgcov-driver-system.c"
 
+/* Prune TOP N value COUNTERS.  It's needed in order to preserve
+   reproducibility of builds.  */
+
+static void
+prune_topn_counter (gcov_type *counters)
+{
+  gcov_type total = counters[0];
+  gcov_type threshold = total / GCOV_TOPN_MAXIMUM_TRACKED_VALUES;
+  gcov_type *nodes = &counters[1];
+
+  struct gcov_kvp **prev = (struct gcov_kvp **)(intptr_t)&counters[2];
+
+  for (struct gcov_kvp *node = *prev; node != NULL; node = node->next)
+    /* Count is small in this run, skip it.  */
+    {
+      if (node->count < threshold)
+	{
+	  --(*nodes);
+	  /* Skip the node from linked list.  */
+	  *prev = node->next;
+	}
+      else
+	prev = &node->next;
+    }
+}
+
+/* Prune counters so that they are ready to store or merge.  */
+
+static void
+prune_counters (struct gcov_info *gi)
+{
+  for (unsigned i = 0; i < gi->n_functions; i++)
+    {
+      const struct gcov_fn_info *gfi = gi->functions[i];
+      const struct gcov_ctr_info *ci = gfi->ctrs;
+
+      for (unsigned j = 0; j < GCOV_COUNTERS; j++)
+	{
+	  if (gi->merge[j] == NULL)
+	    continue;
+
+	  if (gi->merge[j] == __gcov_merge_topn)
+	    {
+	      gcc_assert (!(ci->num % GCOV_TOPN_MEM_COUNTERS));
+	      for (unsigned k = 0; k < (ci->num / GCOV_TOPN_MEM_COUNTERS); k++)
+		{
+		  gcov_type *counters
+		    = ci->values + (k * GCOV_TOPN_MEM_COUNTERS);
+		  prune_topn_counter (counters);
+		}
+	    }
+	  ci++;
+	}
+    }
+}
+
+
 /* This function merges counters in GI_PTR to an existing gcda file.
    Return 0 on success.
    Return -1 on error. In this case, caller will goto read_fatal.  */
@@ -475,6 +532,9 @@ dump_one_gcov (struct gcov_info *gi_ptr, struct gcov_filename *gf,
   gcov_unsigned_t tag;
   fn_buffer = 0;
 
+  /* Prune current counters before we merge them.  */
+  prune_counters (gi_ptr);
+
   error = gcov_exit_open_gcda_file (gi_ptr, gf);
   if (error == -1)
     return;
diff --git a/libgcc/libgcov-merge.c b/libgcc/libgcov-merge.c
index 9306e8d688c..35936a8364b 100644
--- a/libgcc/libgcov-merge.c
+++ b/libgcc/libgcov-merge.c
@@ -107,7 +107,9 @@ __gcov_merge_topn (gcov_type *counters, unsigned n_counters)
       gcov_type all = gcov_get_counter_ignore_scaling (-1);
       gcov_type n = gcov_get_counter_ignore_scaling (-1);
 
-      counters[GCOV_TOPN_MEM_COUNTERS * i] += all;
+      unsigned full = all < 0;
+      gcov_type *total = &counters[GCOV_TOPN_MEM_COUNTERS * i];
+      *total += full ? -all : all;
 
       for (unsigned j = 0; j < n; j++)
 	{
@@ -115,9 +117,12 @@ __gcov_merge_topn (gcov_type *counters, unsigned n_counters)
 	  gcov_type count = gcov_get_counter_ignore_scaling (-1);
 
 	  // TODO: we should use atomic here
-	  gcov_topn_add_value (counters + GCOV_TOPN_MEM_COUNTERS * i, value,
-			       count, 0, 0);
+	  full |= gcov_topn_add_value (counters + GCOV_TOPN_MEM_COUNTERS * i,
+				       value, count, 0, 0);
 	}
+
+      if (full)
+	*total = -(*total);
     }
 }
 #endif /* L_gcov_merge_topn */
diff --git a/libgcc/libgcov.h b/libgcc/libgcov.h
index b4a7e942a7e..df08e882dd7 100644
--- a/libgcc/libgcov.h
+++ b/libgcc/libgcov.h
@@ -435,9 +435,10 @@ allocate_gcov_kvp (void)
 
 /* Add key value pair VALUE:COUNT to a top N COUNTERS.  When INCREMENT_TOTAL
    is true, add COUNT to total of the TOP counter.  If USE_ATOMIC is true,
-   do it in atomic way.  */
+   do it in atomic way.  Return true when the counter is full, otherwise
+   return false.  */
 
-static inline void
+static inline unsigned
 gcov_topn_add_value (gcov_type *counters, gcov_type value, gcov_type count,
 		     int use_atomic, int increment_total)
 {
@@ -453,7 +454,7 @@ gcov_topn_add_value (gcov_type *counters, gcov_type value, gcov_type count,
       if (current_node->value == value)
 	{
 	  gcov_counter_add (&current_node->count, count, use_atomic);
-	  return;
+	  return 0;
 	}
 
       if (minimal_node == NULL
@@ -471,12 +472,14 @@ gcov_topn_add_value (gcov_type *counters, gcov_type value, gcov_type count,
 	  minimal_node->value = value;
 	  minimal_node->count = count;
 	}
+
+      return 1;
     }
   else
     {
       struct gcov_kvp *new_node = allocate_gcov_kvp ();
       if (new_node == NULL)
-	return;
+	return 0;
 
       new_node->value = value;
       new_node->count = count;
@@ -515,6 +518,8 @@ gcov_topn_add_value (gcov_type *counters, gcov_type value, gcov_type count,
       if (success)
 	gcov_counter_add (&counters[1], 1, use_atomic);
     }
+
+  return 0;
 }
 
 #endif /* !inhibit_libc */
-- 
2.30.0

Reply via email to