On 3/3/21 12:26 PM, Jan Hubicka wrote:
libgcc/ChangeLog:
PR gcov-profile/99105
* libgcov-driver.c (write_top_counters): Rename to ...
(write_topn_counters): ... this.
(write_one_data): Pre-allocate buffer for number of items
in the corresponding linked lists.
* libgcov-merge.c (__gcov_merge_topn): Use renamed function.
gcc/testsuite/ChangeLog:
PR gcov-profile/99105
* gcc.dg/tree-prof/indir-call-prof-malloc.c: Use profile
correction as the wrapped malloc is called one more time
from libgcov.
for (unsigned i = 0; i < counters; i++)
{
- gcov_type pair_count = ci_ptr->values[GCOV_TOPN_MEM_COUNTERS * i + 1];
gcov_write_counter (ci_ptr->values[GCOV_TOPN_MEM_COUNTERS * i]);
- gcov_write_counter (pair_count);
+ gcov_write_counter (list_sizes[i]);
gcov_type start = ci_ptr->values[GCOV_TOPN_MEM_COUNTERS * i + 2];
+
+ unsigned j = 0;
for (struct gcov_kvp *node = (struct gcov_kvp *)(intptr_t)start;
- node != NULL; node = node->next)
+ node != NULL; node = node->next, j++)
{
gcov_write_counter (node->value);
gcov_write_counter (node->count);
+
+ /* Stop when we reach expected number of items. */
+ if (j + 1 == list_sizes[i])
+ break;
Since you counted number of entries earlier, I would expect loop to
always terminate here and thus the node != NULL condition in for loop
above to be unnecesary.
Yes, fixed that.
}
}
+
+ free (list_sizes);
We already have our own mmap allocator, so I wonder if we don't want to
avoid malloc/free pair here. The counters are per-function, right? I
wonder if they happen to be large on some bigger project, but it may
reduct risk of user messing this up with his own malloc/free
implementation if we used alloca for counts of reasonable size.
Good idea, I've also implemented that.
}
/* Write counters in GI_PTR and the summary in PRG to a gcda file. In
@@ -425,7 +446,7 @@ write_one_data (const struct gcov_info *gi_ptr,
n_counts = ci_ptr->num;
if (t_ix == GCOV_COUNTER_V_TOPN || t_ix == GCOV_COUNTER_V_INDIR)
- write_top_counters (ci_ptr, t_ix, n_counts);
+ write_topn_counters (ci_ptr, t_ix, n_counts);
else
{
/* Do not stream when all counters are zero. */
diff --git a/libgcc/libgcov-merge.c b/libgcc/libgcov-merge.c
index 7db188a4f4c..3379b688128 100644
--- a/libgcc/libgcov-merge.c
+++ b/libgcc/libgcov-merge.c
@@ -109,6 +109,7 @@ __gcov_merge_topn (gcov_type *counters, unsigned n_counters)
/* First value is number of total executions of the profiler. */
gcov_type all = gcov_get_counter_ignore_scaling (-1);
gcov_type n = gcov_get_counter_ignore_scaling (-1);
+ gcc_assert (n <= GCOV_TOPN_MAXIMUM_TRACKED_VALUES);
I think in the runtime we do not want to have asserts checking
implementation correctness since it bloats it up. So I would leave it
out.
Makes sense.
I wonder if we can have some testcase for parallel updating/streaming in
testsuite?
Well, I tried something like this:
#include <pthread.h>
#include "gcov.h"
#define LOOPS 1000000
#define THREADS 8
void *
worker (void *data)
{
char memory[1024];
for (unsigned i = 0; i < LOOPS; i++)
{
asm volatile("" ::: "memory");
__builtin_memset (memory, 0, i % 15);
}
return NULL;
}
int main()
{
pthread_t threads[THREADS];
for (unsigned i = 0; i < THREADS; i++)
if (pthread_create (&threads[i], NULL, worker, NULL))
return 0;
__gcov_dump ();
return 0;
}
But it's quite difficult to make a setup where we hit point when we append to
the linked list.
I've just tested instrumented clang:
$ make -j128 check-clang
...
Testing Time: 365.93s
Unsupported : 787
Passed : 26513
Expectedly Failed: 25
[100%] Built target check-clang
May I install the patch?
Thanks,
Martin
Otherwise the patch looks good to me.
Honza
unsigned full = all < 0;
gcov_type *total = &counters[GCOV_TOPN_MEM_COUNTERS * i];
--
2.30.0
>From de61040043c30400a2b8662d81a24387f1a13b3f Mon Sep 17 00:00:00 2001
From: Martin Liska <mli...@suse.cz>
Date: Tue, 16 Feb 2021 16:28:06 +0100
Subject: [PATCH] profiling: fix streaming of TOPN counters
libgcc/ChangeLog:
PR gcov-profile/99105
* libgcov-driver.c (write_top_counters): Rename to ...
(write_topn_counters): ... this.
(write_one_data): Pre-allocate buffer for number of items
in the corresponding linked lists.
* libgcov-merge.c (__gcov_merge_topn): Use renamed function.
gcc/testsuite/ChangeLog:
PR gcov-profile/99105
* gcc.dg/tree-prof/indir-call-prof-malloc.c: Use profile
correction as the wrapped malloc is called one more time
from libgcov.
* gcc.dg/tree-prof/pr97461.c: Likewise.
---
.../gcc.dg/tree-prof/indir-call-prof-malloc.c | 2 +-
gcc/testsuite/gcc.dg/tree-prof/pr97461.c | 2 +-
libgcc/libgcov-driver.c | 56 ++++++++++++++++---
3 files changed, 50 insertions(+), 10 deletions(-)
diff --git a/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof-malloc.c b/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof-malloc.c
index 454e224c95f..7bda4aedfc8 100644
--- a/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof-malloc.c
+++ b/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof-malloc.c
@@ -1,4 +1,4 @@
-/* { dg-options "-O2 -ldl" } */
+/* { dg-options "-O2 -ldl -fprofile-correction" } */
#define _GNU_SOURCE
#include <stdio.h>
diff --git a/gcc/testsuite/gcc.dg/tree-prof/pr97461.c b/gcc/testsuite/gcc.dg/tree-prof/pr97461.c
index 213fac9af04..f684be4d80f 100644
--- a/gcc/testsuite/gcc.dg/tree-prof/pr97461.c
+++ b/gcc/testsuite/gcc.dg/tree-prof/pr97461.c
@@ -1,5 +1,5 @@
/* PR gcov-profile/97461 */
-/* { dg-options "-O2 -ldl" } */
+/* { dg-options "-O2 -ldl -fprofile-correction" } */
#define _GNU_SOURCE
diff --git a/libgcc/libgcov-driver.c b/libgcc/libgcov-driver.c
index 91462350132..d2e60a5a6df 100644
--- a/libgcc/libgcov-driver.c
+++ b/libgcc/libgcov-driver.c
@@ -42,6 +42,10 @@ void __gcov_init (struct gcov_info *p __attribute__ ((unused))) {}
#include <sys/stat.h>
#endif
+#if HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+
#ifdef L_gcov
/* A utility function for outputting errors. */
@@ -334,30 +338,66 @@ read_error:
return -1;
}
+#define MAX(X,Y) ((X) > (Y) ? (X) : (Y))
+
/* Store all TOP N counters where each has a dynamic length. */
static void
-write_top_counters (const struct gcov_ctr_info *ci_ptr,
- unsigned t_ix,
- gcov_unsigned_t n_counts)
+write_topn_counters (const struct gcov_ctr_info *ci_ptr,
+ unsigned t_ix,
+ gcov_unsigned_t n_counts)
{
unsigned counters = n_counts / GCOV_TOPN_MEM_COUNTERS;
gcc_assert (n_counts % GCOV_TOPN_MEM_COUNTERS == 0);
+
+ /* It can happen in a multi-threaded environment that number of counters is
+ different from the size of the corresponding linked lists. */
+#define LIST_SIZE_MIN_LENGTH 4 * 1024
+
+ static unsigned *list_sizes = NULL;
+ static unsigned list_size_length = 0;
+
+ if (list_sizes == NULL || counters > list_size_length)
+ {
+ list_size_length = MAX (LIST_SIZE_MIN_LENGTH, counters);
+#if HAVE_SYS_MMAN_H
+ list_sizes = (unsigned *)mmap (NULL, list_size_length * sizeof (unsigned),
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+#endif
+
+ /* Malloc fallback. */
+ if (list_sizes == NULL)
+ list_sizes = (unsigned *)xmalloc (list_size_length * sizeof (unsigned));
+ }
+
+ memset (list_sizes, 0, counters * sizeof (unsigned));
unsigned pair_total = 0;
+
for (unsigned i = 0; i < counters; i++)
- pair_total += ci_ptr->values[GCOV_TOPN_MEM_COUNTERS * i + 1];
+ {
+ gcov_type start = ci_ptr->values[GCOV_TOPN_MEM_COUNTERS * i + 2];
+ for (struct gcov_kvp *node = (struct gcov_kvp *)(intptr_t)start;
+ node != NULL; node = node->next)
+ {
+ ++pair_total;
+ ++list_sizes[i];
+ }
+ }
+
unsigned disk_size = GCOV_TOPN_DISK_COUNTERS * counters + 2 * pair_total;
gcov_write_tag_length (GCOV_TAG_FOR_COUNTER (t_ix),
GCOV_TAG_COUNTER_LENGTH (disk_size));
for (unsigned i = 0; i < counters; i++)
{
- gcov_type pair_count = ci_ptr->values[GCOV_TOPN_MEM_COUNTERS * i + 1];
gcov_write_counter (ci_ptr->values[GCOV_TOPN_MEM_COUNTERS * i]);
- gcov_write_counter (pair_count);
+ gcov_write_counter (list_sizes[i]);
gcov_type start = ci_ptr->values[GCOV_TOPN_MEM_COUNTERS * i + 2];
+
+ unsigned j = 0;
for (struct gcov_kvp *node = (struct gcov_kvp *)(intptr_t)start;
- node != NULL; node = node->next)
+ j < list_sizes[i]; node = node->next, j++)
{
gcov_write_counter (node->value);
gcov_write_counter (node->count);
@@ -425,7 +465,7 @@ write_one_data (const struct gcov_info *gi_ptr,
n_counts = ci_ptr->num;
if (t_ix == GCOV_COUNTER_V_TOPN || t_ix == GCOV_COUNTER_V_INDIR)
- write_top_counters (ci_ptr, t_ix, n_counts);
+ write_topn_counters (ci_ptr, t_ix, n_counts);
else
{
/* Do not stream when all counters are zero. */
--
2.30.1