On Thu, 1 Aug 2019, Alexander Monakov wrote: > On Thu, 1 Aug 2019, Richard Biener wrote: > > > So - were you able to allocate some cycles for this? I think > > we've settled with doing the implementation qsort_r-style while > > implementing the wrapping with fancy C++ templates. > > Yes. It works, but not quite as perfectly as I'd hoped. I found two issues: > > 1. The fancy wrapper needs C++11; in C++98, it can't be applied to static > comparators, so in that case we need option-1-like wrapping. > I noticed thanks to the fact we build stage1 with -std=gnu++98.
Ah, yes - I think this is why we have some non-static functions that are used in template params elsewhere. Boo. > 2. Sometimes we fail to inline the comparator into the wrapper; I imagine it > happens when inlining hits an internal limit (even though on its own such > inlining is clearly beneficial). In particular a couple comparators in > IRA/LRA suffer from this. Ugh. Yeah, it's probably some inlining parameter. OTOH I assumed the inliner would always inline into a simple wrapper if that makes the wrapped function unused - of course if we can't make that function static then it cannot know... it looks like we put the functions into anonmous namespaces though! Does that fix the inlining? > Two questions for you: > > 1. You indicated a dislike for using templates to "duplicate" functionality in > sort.cc to implement both classic qsort and qsort_r. What are the main > concerns, specifically? I find it a reasonable approach. I disliked duplicating the code, not duplicating the instantiation. Using templates sounds fine if it doesn't end up being too ugly. > 2. In your patch, you're using a union to copy between a void * and a function > pointer, saying C++ disallows a cast. A simple C-style cast worked well > enough for me (as used in the patch). Am I missing something? Hmm, it works to silence the fnptr to void * casting but not the other way around: void foo (); int main() { void (*p)() = foo; void *data = (void *)p; p = (void *)data; } > g++-8 t.C -S t.C: In function ‘int main()’: t.C:6:7: error: invalid conversion from ‘void*’ to ‘void (*)()’ [-fpermissive] p = (void *)data; ^~~~~~~~~~~~ > The patch is below, I'll add comments and resubmit properly if you want to see > it on trunk. Thanks! > > diff --git a/gcc/system.h b/gcc/system.h > index d04f8fd3360..2cd5475b32c 100644 > --- a/gcc/system.h > +++ b/gcc/system.h > @@ -1200,13 +1200,30 @@ helper_const_non_const_cast (const char *p) > /* GCC qsort API-compatible functions: except in release-checking compilers, > redirect 4-argument qsort calls to gcc_qsort; keep 1-argument invocations > corresponding to vec::qsort (cmp): they use C qsort internally anyway. */ > -void qsort_chk (void *, size_t, size_t, int (*)(const void *, const void *)); > -void gcc_qsort (void *, size_t, size_t, int (*)(const void *, const void *)); > -void gcc_stablesort (void *, size_t, size_t, > - int (*)(const void *, const void *)); > +typedef int sort_cmp_fn(const void *, const void *, void *); > +void gcc_sort (void *, size_t, size_t, sort_cmp_fn *, void *); > +void gcc_sort_chk (void *, size_t, size_t, sort_cmp_fn *, void *); > +void gcc_stablesort (void *, size_t, size_t, sort_cmp_fn *, void *); > +#ifdef __cplusplus > +#if __cplusplus >= 201103L > +template<int cmp (const void *, const void *)> > +int cmp2to3 (const void *a, const void *b, void *) > +{ > + return cmp (a, b); > +} > +#define qsort_4(base, n, sz, cmp) gcc_sort (base, n, sz, cmp2to3<cmp>, NULL) > +#define qsort_1(cmp) sort (cmp2to3<cmp>, NULL) > +#else > +int cmp2to3 (const void *, const void *, void *); > +#define qsort_4(base, n, sz, cmp) gcc_sort (base, n, sz, cmp2to3, (void > *)cmp) > +#define qsort_1(cmp) sort (cmp2to3, (void *)cmp) > +#endif > #define PP_5th(a1, a2, a3, a4, a5, ...) a5 > #undef qsort > -#define qsort(...) PP_5th (__VA_ARGS__, gcc_qsort, 3, 2, qsort, 0) > (__VA_ARGS__) > +#define qsort(...) PP_5th (__VA_ARGS__, qsort_4, 3, 2, qsort_1, 0) > (__VA_ARGS__) > +#else > +#pragma GCC poison qsort > +#endif > > #define ONE_K 1024 > #define ONE_M (ONE_K * ONE_K) > diff --git a/gcc/bb-reorder.c b/gcc/bb-reorder.c > index 0ac39140c6c..e1be383880a 100644 > --- a/gcc/bb-reorder.c > +++ b/gcc/bb-reorder.c > @@ -2360,7 +2360,7 @@ reorder_basic_blocks_software_trace_cache (void) > /* Order edges by execution frequency, higher first. */ > > static int > -edge_order (const void *ve1, const void *ve2) > +edge_order (const void *ve1, const void *ve2, void *) > { > edge e1 = *(const edge *) ve1; > edge e2 = *(const edge *) ve2; > @@ -2423,7 +2423,7 @@ reorder_basic_blocks_simple (void) > all edges are equally desirable. */ > > if (optimize_function_for_speed_p (cfun)) > - gcc_stablesort (edges, n, sizeof *edges, edge_order); > + gcc_stablesort (edges, n, sizeof *edges, edge_order, NULL); > > /* Now decide which of those edges to make fallthrough edges. We set > BB_VISITED if a block already has a fallthrough successor assigned > diff --git a/gcc/cfgloop.c b/gcc/cfgloop.c > index 4ad1f658708..e411eadc039 100644 > --- a/gcc/cfgloop.c > +++ b/gcc/cfgloop.c > @@ -971,11 +971,11 @@ get_loop_body_in_dom_order (const class loop *loop) > > basic_block * > get_loop_body_in_custom_order (const class loop *loop, > - int (*bb_comparator) (const void *, const void > *)) > + sort_cmp_fn *bb_comparator) > { > basic_block *bbs = get_loop_body (loop); > > - qsort (bbs, loop->num_nodes, sizeof (basic_block), bb_comparator); > + gcc_sort (bbs, loop->num_nodes, sizeof (basic_block), bb_comparator, NULL); > > return bbs; > } > diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h > index 0b0154ffd7b..7b7fa5bc6f7 100644 > --- a/gcc/cfgloop.h > +++ b/gcc/cfgloop.h > @@ -375,7 +375,7 @@ extern unsigned get_loop_body_with_size (const class loop > *, basic_block *, > extern basic_block *get_loop_body_in_dom_order (const class loop *); > extern basic_block *get_loop_body_in_bfs_order (const class loop *); > extern basic_block *get_loop_body_in_custom_order (const class loop *, > - int (*) (const void *, const void *)); > + sort_cmp_fn *); > > extern vec<edge> get_loop_exit_edges (const class loop *); > extern edge single_exit (const class loop *); > diff --git a/gcc/ggc-common.c b/gcc/ggc-common.c > index 0968d9769fa..2bf0fdd5e27 100644 > --- a/gcc/ggc-common.c > +++ b/gcc/ggc-common.c > @@ -925,7 +925,7 @@ public: > > /* Compare wrapper used by qsort method. */ > static int > - compare (const void *first, const void *second) > + compare (const void *first, const void *second, void *) > { > const mem_pair_t f = *(const mem_pair_t *)first; > const mem_pair_t s = *(const mem_pair_t *)second; > @@ -935,7 +935,7 @@ public: > > /* Compare rows in final GGC summary dump. */ > static int > - compare_final (const void *first, const void *second) > + compare_final (const void *first, const void *second, void *) > { > typedef std::pair<mem_location *, ggc_usage *> mem_pair_t; > > diff --git a/gcc/ira-color.c b/gcc/ira-color.c > index 99236994d64..ad2d7c433ce 100644 > --- a/gcc/ira-color.c > +++ b/gcc/ira-color.c > @@ -2251,7 +2251,7 @@ add_allocno_to_bucket (ira_allocno_t a, ira_allocno_t > *bucket_ptr) > one. As the result of such assignment order, the second allocno > has a better chance to get the best hard register. */ > static int > -bucket_allocno_compare_func (const void *v1p, const void *v2p) > +bucket_allocno_compare_func (const void *v1p, const void *v2p, void * = NULL) > { > ira_allocno_t a1 = *(const ira_allocno_t *) v1p; > ira_allocno_t a2 = *(const ira_allocno_t *) v2p; > @@ -2296,8 +2296,7 @@ bucket_allocno_compare_func (const void *v1p, const > void *v2p) > /* Sort bucket *BUCKET_PTR and return the result through > BUCKET_PTR. */ > static void > -sort_bucket (ira_allocno_t *bucket_ptr, > - int (*compare_func) (const void *, const void *)) > +sort_bucket (ira_allocno_t *bucket_ptr, sort_cmp_fn *compare_func) > { > ira_allocno_t a, head; > int n; > @@ -2308,7 +2307,7 @@ sort_bucket (ira_allocno_t *bucket_ptr, > sorted_allocnos[n++] = a; > if (n <= 1) > return; > - qsort (sorted_allocnos, n, sizeof (ira_allocno_t), compare_func); > + gcc_sort (sorted_allocnos, n, sizeof (ira_allocno_t), compare_func, NULL); > head = NULL; > for (n--; n >= 0; n--) > { > @@ -2580,7 +2579,7 @@ allocno_spill_priority_compare (ira_allocno_t a1, > ira_allocno_t a2) > > /* Used for sorting allocnos for spilling. */ > static int > -allocno_spill_sort_compare (const void *v1p, const void *v2p) > +allocno_spill_sort_compare (const void *v1p, const void *v2p, void *) > { > ira_allocno_t p1 = *(const ira_allocno_t *) v1p; > ira_allocno_t p2 = *(const ira_allocno_t *) v2p; > diff --git a/gcc/mem-stats.h b/gcc/mem-stats.h > index 9ceb9ccc55b..0a62603fd33 100644 > --- a/gcc/mem-stats.h > +++ b/gcc/mem-stats.h > @@ -188,7 +188,7 @@ public: > > /* Compare wrapper used by qsort method. */ > static int > - compare (const void *first, const void *second) > + compare (const void *first, const void *second, void *) > { > typedef std::pair<mem_location *, mem_usage *> mem_pair_t; > > @@ -362,13 +362,12 @@ public: > the number of elements in the list. If we want to process custom order, > CMP comparator can be provided. */ > mem_list_t *get_list (mem_alloc_origin origin, unsigned *length, > - int (*cmp) (const void *first, > - const void *second) = NULL); > + sort_cmp_fn *cmp = NULL); > > /* Dump all tracked instances of type ORIGIN. If we want to process custom > order, CMP comparator can be provided. */ > void dump (mem_alloc_origin origin, > - int (*cmp) (const void *first, const void *second) = NULL); > + sort_cmp_fn *cmp = NULL); > > /* Reverse object map used for every object allocation mapping. */ > reverse_object_map_t *m_reverse_object_map; > @@ -594,8 +593,7 @@ template <class T> > inline > typename mem_alloc_description<T>::mem_list_t * > mem_alloc_description<T>::get_list (mem_alloc_origin origin, unsigned > *length, > - int (*cmp) (const void *first, > - const void *second)) > + sort_cmp_fn *cmp) > { > /* vec data structure is not used because all vectors generate memory > allocation info a it would create a cycle. */ > @@ -608,7 +606,7 @@ mem_alloc_description<T>::get_list (mem_alloc_origin > origin, unsigned *length, > if ((*it).first->m_origin == origin) > list[i++] = std::pair<mem_location*, T*> (*it); > > - qsort (list, i, element_size, cmp == NULL ? T::compare : cmp); > + gcc_sort (list, i, element_size, cmp == NULL ? T::compare : cmp, NULL); > *length = i; > > return list; > @@ -638,8 +636,7 @@ mem_alloc_description<T>::get_sum (mem_alloc_origin > origin) > template <class T> > inline void > mem_alloc_description<T>::dump (mem_alloc_origin origin, > - int (*cmp) (const void *first, > - const void *second)) > + sort_cmp_fn *cmp) > { > unsigned length; > > diff --git a/gcc/sel-sched-ir.c b/gcc/sel-sched-ir.c > index bb8016bb530..4d6e5f3a2c0 100644 > --- a/gcc/sel-sched-ir.c > +++ b/gcc/sel-sched-ir.c > @@ -5982,7 +5982,7 @@ sel_create_new_region (void) > /* If X has a smaller topological sort number than Y, returns -1; > if greater, returns 1. */ > static int > -bb_top_order_comparator (const void *x, const void *y) > +bb_top_order_comparator (const void *x, const void *y, void *) > { > basic_block bb1 = *(const basic_block *) x; > basic_block bb2 = *(const basic_block *) y; > diff --git a/gcc/sort.cc b/gcc/sort.cc > index 3e9c032c462..efd0fe75478 100644 > --- a/gcc/sort.cc > +++ b/gcc/sort.cc > @@ -45,13 +45,11 @@ along with GCC; see the file COPYING3. If not see > #define noinline > #endif > > -/* C-style qsort comparator function type. */ > -typedef int cmp_fn (const void *, const void *); > - > /* Structure holding read-mostly (read-only in netsort) context. */ > struct sort_ctx > { > - cmp_fn *cmp; // pointer to comparator > + void *data; // comparator user data > + sort_cmp_fn *cmp; // pointer to comparator > char *out; // output buffer > size_t n; // number of elements > size_t size; // element size > @@ -128,10 +126,10 @@ do { > \ > This is noinline to avoid code growth and confine invocation > to a single call site, assisting indirect branch prediction. */ > noinline static intptr_t > -cmp1 (char *e0, char *e1, cmp_fn *cmp) > +cmp1 (char *e0, char *e1, void *data, sort_cmp_fn *cmp) > { > intptr_t x = (intptr_t)e0 ^ (intptr_t)e1; > - return x & (cmp (e0, e1) >> 31); > + return x & (cmp (e0, e1, data) >> 31); > } > > /* Execute network sort on 2 to 5 elements from IN, placing them into C->OUT. > @@ -141,7 +139,7 @@ netsort (char *in, sort_ctx *c) > { > #define CMP(e0, e1) \ > do { \ > - intptr_t x = cmp1 (e1, e0, c->cmp); \ > + intptr_t x = cmp1 (e1, e0, c->data, c->cmp); \ > e0 = (char *)((intptr_t)e0 ^ x); \ > e1 = (char *)((intptr_t)e1 ^ x); \ > } while (0) > @@ -194,7 +192,7 @@ mergesort (char *in, sort_ctx *c, size_t n, char *out, > char *tmp) > /* Merge sorted halves given by L, R to [OUT, END). */ > #define MERGE_ELTSIZE(SIZE) \ > do { \ > - intptr_t mr = c->cmp (r, l) >> 31; \ > + intptr_t mr = c->cmp (r, l, c->data) >> 31; \ > intptr_t lr = (intptr_t)l ^ (intptr_t)r; \ > lr = (intptr_t)l ^ (lr & mr); \ > out = (char *)memcpy (out, (char *)lr, SIZE); \ > @@ -204,7 +202,7 @@ do { \ > l += ~mr & SIZE; \ > } while (r != end) > > - if (likely (c->cmp(r, l + (r - out) - c->size) < 0)) > + if (likely (c->cmp(r, l + (r - out) - c->size, c->data) < 0)) > { > char *end = out + n * c->size; > if (sizeof (size_t) == 8 && likely (c->size == 8)) > @@ -218,7 +216,7 @@ do { \ > } > > void > -gcc_qsort (void *vbase, size_t n, size_t size, cmp_fn *cmp) > +gcc_sort (void *vbase, size_t n, size_t size, sort_cmp_fn *cmp, void *data) > { > if (n < 2) > return; > @@ -227,7 +225,7 @@ gcc_qsort (void *vbase, size_t n, size_t size, cmp_fn > *cmp) > if (stable) > nlim = 3, size = ~size; > char *base = (char *)vbase; > - sort_ctx c = {cmp, base, n, size, nlim}; > + sort_ctx c = {data, cmp, base, n, size, nlim}; > long long scratch[32]; > size_t bufsz = (n / 2) * size; > void *buf = bufsz <= sizeof scratch ? scratch : xmalloc (bufsz); > @@ -235,12 +233,19 @@ gcc_qsort (void *vbase, size_t n, size_t size, cmp_fn > *cmp) > if (buf != scratch) > free (buf); > #if CHECKING_P > - qsort_chk (vbase, n, size, cmp); > + gcc_sort_chk (vbase, n, size, cmp, data); > #endif > } > > void > -gcc_stablesort (void *vbase, size_t n, size_t size, cmp_fn *cmp) > +gcc_stablesort (void *base, size_t n, size_t size, sort_cmp_fn *cmp, void > *data) > { > - gcc_qsort (vbase, n, ~size, cmp); > + gcc_sort (base, n, ~size, cmp, data); > } > + > +#if __cplusplus < 201103L > +int cmp2to3(const void *a, const void *b, void *c) > +{ > + return ((int (*)(const void *, const void *))c)(a, b); > +} > +#endif > diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c > index 81784866ad1..fcc9cd6fe04 100644 > --- a/gcc/tree-loop-distribution.c > +++ b/gcc/tree-loop-distribution.c > @@ -486,7 +486,7 @@ static int bb_top_order_index_size; > if greater, returns 1. */ > > static int > -bb_top_order_cmp (const void *x, const void *y) > +bb_top_order_cmp (const void *x, const void *y, void *) > { > basic_block bb1 = *(const basic_block *) x; > basic_block bb2 = *(const basic_block *) y; > @@ -2606,7 +2606,7 @@ version_for_distribution_p (vec<struct partition *> > *partitions, > /* Compare base offset of builtin mem* partitions P1 and P2. */ > > static int > -offset_cmp (const void *vp1, const void *vp2) > +offset_cmp (const void *vp1, const void *vp2, void *) > { > struct partition *p1 = *(struct partition *const *) vp1; > struct partition *p2 = *(struct partition *const *) vp2; > @@ -2665,7 +2665,7 @@ fuse_memset_builtins (vec<struct partition *> > *partitions) > > /* Stable sort is required in order to avoid breaking dependence. */ > gcc_stablesort (&(*partitions)[i], j - i, sizeof (*partitions)[i], > - offset_cmp); > + offset_cmp, NULL); > /* Continue with next partition. */ > i = j; > } > diff --git a/gcc/vec.c b/gcc/vec.c > index cbd2db010f9..43ae799fcbd 100644 > --- a/gcc/vec.c > +++ b/gcc/vec.c > @@ -192,21 +192,23 @@ dump_vec_loc_statistics (void) > ATTRIBUTE_NORETURN ATTRIBUTE_COLD > static void > qsort_chk_error (const void *p1, const void *p2, const void *p3, > - int (*cmp) (const void *, const void *)) > + sort_cmp_fn *cmp, void *data) > { > if (!p3) > { > - int r1 = cmp (p1, p2), r2 = cmp (p2, p1); > - error ("qsort comparator not anti-commutative: %d, %d", r1, r2); > + int r1 = cmp (p1, p2, data), r2 = cmp (p2, p1, data); > + error ("qsort comparator not anti-symmetric: %d, %d", r1, r2); > } > else if (p1 == p2) > { > - int r = cmp (p1, p3); > + int r = cmp (p1, p3, data); > error ("qsort comparator non-negative on sorted output: %d", r); > } > else > { > - int r1 = cmp (p1, p2), r2 = cmp (p2, p3), r3 = cmp (p1, p3); > + int r1 = cmp (p1, p2, data); > + int r2 = cmp (p2, p3, data); > + int r3 = cmp (p1, p3, data); > error ("qsort comparator not transitive: %d, %d, %d", r1, r2, r3); > } > internal_error ("qsort checking failed"); > @@ -215,8 +217,7 @@ qsort_chk_error (const void *p1, const void *p2, const > void *p3, > /* Verify anti-symmetry and transitivity for comparator CMP on sorted array > of N SIZE-sized elements pointed to by BASE. */ > void > -qsort_chk (void *base, size_t n, size_t size, > - int (*cmp)(const void *, const void *)) > +gcc_sort_chk (void *base, size_t n, size_t size, sort_cmp_fn *cmp, void > *data) > { > #if 0 > #define LIM(n) (n) > @@ -225,9 +226,9 @@ qsort_chk (void *base, size_t n, size_t size, > #define LIM(n) ((n) <= 16 ? (n) : 12 + floor_log2 (n)) > #endif > #define ELT(i) ((const char *) base + (i) * size) > -#define CMP(i, j) cmp (ELT (i), ELT (j)) > -#define ERR2(i, j) qsort_chk_error (ELT (i), ELT (j), NULL, cmp) > -#define ERR3(i, j, k) qsort_chk_error (ELT (i), ELT (j), ELT (k), cmp) > +#define CMP(i, j) cmp (ELT (i), ELT (j), data) > +#define ERR2(i, j) qsort_chk_error (ELT (i), ELT (j), NULL, cmp, data) > +#define ERR3(i, j, k) qsort_chk_error (ELT (i), ELT (j), ELT (k), cmp, data) > size_t i1, i2, i, j; > /* This outer loop iterates over maximum spans [i1, i2) such that > elements within each span compare equal to each other. */ > diff --git a/gcc/vec.h b/gcc/vec.h > index 2dbf3074da0..86885486433 100644 > --- a/gcc/vec.h > +++ b/gcc/vec.h > @@ -592,7 +592,7 @@ public: > void ordered_remove (unsigned); > void unordered_remove (unsigned); > void block_remove (unsigned, unsigned); > - void qsort (int (*) (const void *, const void *)); > + void sort (sort_cmp_fn *, void *); > T *bsearch (const void *key, int (*compar)(const void *, const void *)); > unsigned lower_bound (T, bool (*)(const T &, const T &)) const; > bool contains (const T &search) const; > @@ -1108,10 +1108,10 @@ vec<T, A, vl_embed>::block_remove (unsigned ix, > unsigned len) > > template<typename T, typename A> > inline void > -vec<T, A, vl_embed>::qsort (int (*cmp) (const void *, const void *)) > +vec<T, A, vl_embed>::sort (sort_cmp_fn *cmp, void *data) > { > if (length () > 1) > - ::qsort (address (), length (), sizeof (T), cmp); > + gcc_sort (address (), length (), sizeof (T), cmp, data); > } > > > @@ -1400,7 +1400,7 @@ public: > void ordered_remove (unsigned); > void unordered_remove (unsigned); > void block_remove (unsigned, unsigned); > - void qsort (int (*) (const void *, const void *)); > + void sort (sort_cmp_fn *, void *); > T *bsearch (const void *key, int (*compar)(const void *, const void *)); > unsigned lower_bound (T, bool (*)(const T &, const T &)) const; > bool contains (const T &search) const; > @@ -1892,10 +1892,10 @@ vec<T, va_heap, vl_ptr>::block_remove (unsigned ix, > unsigned len) > > template<typename T> > inline void > -vec<T, va_heap, vl_ptr>::qsort (int (*cmp) (const void *, const void *)) > +vec<T, va_heap, vl_ptr>::sort (sort_cmp_fn *cmp, void *data) > { > if (m_vec) > - m_vec->qsort (cmp); > + m_vec->sort (cmp, data); > } > > > -- Richard Biener <rguent...@suse.de> SUSE Linux GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany; GF: Felix Imendörffer, Mary Higgins, Sri Rasiah; HRB 21284 (AG Nürnberg)