On Thu, 1 Aug 2019, Richard Biener wrote: > So - were you able to allocate some cycles for this? I think > we've settled with doing the implementation qsort_r-style while > implementing the wrapping with fancy C++ templates.
Yes. It works, but not quite as perfectly as I'd hoped. I found two issues: 1. The fancy wrapper needs C++11; in C++98, it can't be applied to static comparators, so in that case we need option-1-like wrapping. I noticed thanks to the fact we build stage1 with -std=gnu++98. 2. Sometimes we fail to inline the comparator into the wrapper; I imagine it happens when inlining hits an internal limit (even though on its own such inlining is clearly beneficial). In particular a couple comparators in IRA/LRA suffer from this. Two questions for you: 1. You indicated a dislike for using templates to "duplicate" functionality in sort.cc to implement both classic qsort and qsort_r. What are the main concerns, specifically? I find it a reasonable approach. 2. In your patch, you're using a union to copy between a void * and a function pointer, saying C++ disallows a cast. A simple C-style cast worked well enough for me (as used in the patch). Am I missing something? The patch is below, I'll add comments and resubmit properly if you want to see it on trunk. Thanks! diff --git a/gcc/system.h b/gcc/system.h index d04f8fd3360..2cd5475b32c 100644 --- a/gcc/system.h +++ b/gcc/system.h @@ -1200,13 +1200,30 @@ helper_const_non_const_cast (const char *p) /* GCC qsort API-compatible functions: except in release-checking compilers, redirect 4-argument qsort calls to gcc_qsort; keep 1-argument invocations corresponding to vec::qsort (cmp): they use C qsort internally anyway. */ -void qsort_chk (void *, size_t, size_t, int (*)(const void *, const void *)); -void gcc_qsort (void *, size_t, size_t, int (*)(const void *, const void *)); -void gcc_stablesort (void *, size_t, size_t, - int (*)(const void *, const void *)); +typedef int sort_cmp_fn(const void *, const void *, void *); +void gcc_sort (void *, size_t, size_t, sort_cmp_fn *, void *); +void gcc_sort_chk (void *, size_t, size_t, sort_cmp_fn *, void *); +void gcc_stablesort (void *, size_t, size_t, sort_cmp_fn *, void *); +#ifdef __cplusplus +#if __cplusplus >= 201103L +template<int cmp (const void *, const void *)> +int cmp2to3 (const void *a, const void *b, void *) +{ + return cmp (a, b); +} +#define qsort_4(base, n, sz, cmp) gcc_sort (base, n, sz, cmp2to3<cmp>, NULL) +#define qsort_1(cmp) sort (cmp2to3<cmp>, NULL) +#else +int cmp2to3 (const void *, const void *, void *); +#define qsort_4(base, n, sz, cmp) gcc_sort (base, n, sz, cmp2to3, (void *)cmp) +#define qsort_1(cmp) sort (cmp2to3, (void *)cmp) +#endif #define PP_5th(a1, a2, a3, a4, a5, ...) a5 #undef qsort -#define qsort(...) PP_5th (__VA_ARGS__, gcc_qsort, 3, 2, qsort, 0) (__VA_ARGS__) +#define qsort(...) PP_5th (__VA_ARGS__, qsort_4, 3, 2, qsort_1, 0) (__VA_ARGS__) +#else +#pragma GCC poison qsort +#endif #define ONE_K 1024 #define ONE_M (ONE_K * ONE_K) diff --git a/gcc/bb-reorder.c b/gcc/bb-reorder.c index 0ac39140c6c..e1be383880a 100644 --- a/gcc/bb-reorder.c +++ b/gcc/bb-reorder.c @@ -2360,7 +2360,7 @@ reorder_basic_blocks_software_trace_cache (void) /* Order edges by execution frequency, higher first. */ static int -edge_order (const void *ve1, const void *ve2) +edge_order (const void *ve1, const void *ve2, void *) { edge e1 = *(const edge *) ve1; edge e2 = *(const edge *) ve2; @@ -2423,7 +2423,7 @@ reorder_basic_blocks_simple (void) all edges are equally desirable. */ if (optimize_function_for_speed_p (cfun)) - gcc_stablesort (edges, n, sizeof *edges, edge_order); + gcc_stablesort (edges, n, sizeof *edges, edge_order, NULL); /* Now decide which of those edges to make fallthrough edges. We set BB_VISITED if a block already has a fallthrough successor assigned diff --git a/gcc/cfgloop.c b/gcc/cfgloop.c index 4ad1f658708..e411eadc039 100644 --- a/gcc/cfgloop.c +++ b/gcc/cfgloop.c @@ -971,11 +971,11 @@ get_loop_body_in_dom_order (const class loop *loop) basic_block * get_loop_body_in_custom_order (const class loop *loop, - int (*bb_comparator) (const void *, const void *)) + sort_cmp_fn *bb_comparator) { basic_block *bbs = get_loop_body (loop); - qsort (bbs, loop->num_nodes, sizeof (basic_block), bb_comparator); + gcc_sort (bbs, loop->num_nodes, sizeof (basic_block), bb_comparator, NULL); return bbs; } diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h index 0b0154ffd7b..7b7fa5bc6f7 100644 --- a/gcc/cfgloop.h +++ b/gcc/cfgloop.h @@ -375,7 +375,7 @@ extern unsigned get_loop_body_with_size (const class loop *, basic_block *, extern basic_block *get_loop_body_in_dom_order (const class loop *); extern basic_block *get_loop_body_in_bfs_order (const class loop *); extern basic_block *get_loop_body_in_custom_order (const class loop *, - int (*) (const void *, const void *)); + sort_cmp_fn *); extern vec<edge> get_loop_exit_edges (const class loop *); extern edge single_exit (const class loop *); diff --git a/gcc/ggc-common.c b/gcc/ggc-common.c index 0968d9769fa..2bf0fdd5e27 100644 --- a/gcc/ggc-common.c +++ b/gcc/ggc-common.c @@ -925,7 +925,7 @@ public: /* Compare wrapper used by qsort method. */ static int - compare (const void *first, const void *second) + compare (const void *first, const void *second, void *) { const mem_pair_t f = *(const mem_pair_t *)first; const mem_pair_t s = *(const mem_pair_t *)second; @@ -935,7 +935,7 @@ public: /* Compare rows in final GGC summary dump. */ static int - compare_final (const void *first, const void *second) + compare_final (const void *first, const void *second, void *) { typedef std::pair<mem_location *, ggc_usage *> mem_pair_t; diff --git a/gcc/ira-color.c b/gcc/ira-color.c index 99236994d64..ad2d7c433ce 100644 --- a/gcc/ira-color.c +++ b/gcc/ira-color.c @@ -2251,7 +2251,7 @@ add_allocno_to_bucket (ira_allocno_t a, ira_allocno_t *bucket_ptr) one. As the result of such assignment order, the second allocno has a better chance to get the best hard register. */ static int -bucket_allocno_compare_func (const void *v1p, const void *v2p) +bucket_allocno_compare_func (const void *v1p, const void *v2p, void * = NULL) { ira_allocno_t a1 = *(const ira_allocno_t *) v1p; ira_allocno_t a2 = *(const ira_allocno_t *) v2p; @@ -2296,8 +2296,7 @@ bucket_allocno_compare_func (const void *v1p, const void *v2p) /* Sort bucket *BUCKET_PTR and return the result through BUCKET_PTR. */ static void -sort_bucket (ira_allocno_t *bucket_ptr, - int (*compare_func) (const void *, const void *)) +sort_bucket (ira_allocno_t *bucket_ptr, sort_cmp_fn *compare_func) { ira_allocno_t a, head; int n; @@ -2308,7 +2307,7 @@ sort_bucket (ira_allocno_t *bucket_ptr, sorted_allocnos[n++] = a; if (n <= 1) return; - qsort (sorted_allocnos, n, sizeof (ira_allocno_t), compare_func); + gcc_sort (sorted_allocnos, n, sizeof (ira_allocno_t), compare_func, NULL); head = NULL; for (n--; n >= 0; n--) { @@ -2580,7 +2579,7 @@ allocno_spill_priority_compare (ira_allocno_t a1, ira_allocno_t a2) /* Used for sorting allocnos for spilling. */ static int -allocno_spill_sort_compare (const void *v1p, const void *v2p) +allocno_spill_sort_compare (const void *v1p, const void *v2p, void *) { ira_allocno_t p1 = *(const ira_allocno_t *) v1p; ira_allocno_t p2 = *(const ira_allocno_t *) v2p; diff --git a/gcc/mem-stats.h b/gcc/mem-stats.h index 9ceb9ccc55b..0a62603fd33 100644 --- a/gcc/mem-stats.h +++ b/gcc/mem-stats.h @@ -188,7 +188,7 @@ public: /* Compare wrapper used by qsort method. */ static int - compare (const void *first, const void *second) + compare (const void *first, const void *second, void *) { typedef std::pair<mem_location *, mem_usage *> mem_pair_t; @@ -362,13 +362,12 @@ public: the number of elements in the list. If we want to process custom order, CMP comparator can be provided. */ mem_list_t *get_list (mem_alloc_origin origin, unsigned *length, - int (*cmp) (const void *first, - const void *second) = NULL); + sort_cmp_fn *cmp = NULL); /* Dump all tracked instances of type ORIGIN. If we want to process custom order, CMP comparator can be provided. */ void dump (mem_alloc_origin origin, - int (*cmp) (const void *first, const void *second) = NULL); + sort_cmp_fn *cmp = NULL); /* Reverse object map used for every object allocation mapping. */ reverse_object_map_t *m_reverse_object_map; @@ -594,8 +593,7 @@ template <class T> inline typename mem_alloc_description<T>::mem_list_t * mem_alloc_description<T>::get_list (mem_alloc_origin origin, unsigned *length, - int (*cmp) (const void *first, - const void *second)) + sort_cmp_fn *cmp) { /* vec data structure is not used because all vectors generate memory allocation info a it would create a cycle. */ @@ -608,7 +606,7 @@ mem_alloc_description<T>::get_list (mem_alloc_origin origin, unsigned *length, if ((*it).first->m_origin == origin) list[i++] = std::pair<mem_location*, T*> (*it); - qsort (list, i, element_size, cmp == NULL ? T::compare : cmp); + gcc_sort (list, i, element_size, cmp == NULL ? T::compare : cmp, NULL); *length = i; return list; @@ -638,8 +636,7 @@ mem_alloc_description<T>::get_sum (mem_alloc_origin origin) template <class T> inline void mem_alloc_description<T>::dump (mem_alloc_origin origin, - int (*cmp) (const void *first, - const void *second)) + sort_cmp_fn *cmp) { unsigned length; diff --git a/gcc/sel-sched-ir.c b/gcc/sel-sched-ir.c index bb8016bb530..4d6e5f3a2c0 100644 --- a/gcc/sel-sched-ir.c +++ b/gcc/sel-sched-ir.c @@ -5982,7 +5982,7 @@ sel_create_new_region (void) /* If X has a smaller topological sort number than Y, returns -1; if greater, returns 1. */ static int -bb_top_order_comparator (const void *x, const void *y) +bb_top_order_comparator (const void *x, const void *y, void *) { basic_block bb1 = *(const basic_block *) x; basic_block bb2 = *(const basic_block *) y; diff --git a/gcc/sort.cc b/gcc/sort.cc index 3e9c032c462..efd0fe75478 100644 --- a/gcc/sort.cc +++ b/gcc/sort.cc @@ -45,13 +45,11 @@ along with GCC; see the file COPYING3. If not see #define noinline #endif -/* C-style qsort comparator function type. */ -typedef int cmp_fn (const void *, const void *); - /* Structure holding read-mostly (read-only in netsort) context. */ struct sort_ctx { - cmp_fn *cmp; // pointer to comparator + void *data; // comparator user data + sort_cmp_fn *cmp; // pointer to comparator char *out; // output buffer size_t n; // number of elements size_t size; // element size @@ -128,10 +126,10 @@ do { \ This is noinline to avoid code growth and confine invocation to a single call site, assisting indirect branch prediction. */ noinline static intptr_t -cmp1 (char *e0, char *e1, cmp_fn *cmp) +cmp1 (char *e0, char *e1, void *data, sort_cmp_fn *cmp) { intptr_t x = (intptr_t)e0 ^ (intptr_t)e1; - return x & (cmp (e0, e1) >> 31); + return x & (cmp (e0, e1, data) >> 31); } /* Execute network sort on 2 to 5 elements from IN, placing them into C->OUT. @@ -141,7 +139,7 @@ netsort (char *in, sort_ctx *c) { #define CMP(e0, e1) \ do { \ - intptr_t x = cmp1 (e1, e0, c->cmp); \ + intptr_t x = cmp1 (e1, e0, c->data, c->cmp); \ e0 = (char *)((intptr_t)e0 ^ x); \ e1 = (char *)((intptr_t)e1 ^ x); \ } while (0) @@ -194,7 +192,7 @@ mergesort (char *in, sort_ctx *c, size_t n, char *out, char *tmp) /* Merge sorted halves given by L, R to [OUT, END). */ #define MERGE_ELTSIZE(SIZE) \ do { \ - intptr_t mr = c->cmp (r, l) >> 31; \ + intptr_t mr = c->cmp (r, l, c->data) >> 31; \ intptr_t lr = (intptr_t)l ^ (intptr_t)r; \ lr = (intptr_t)l ^ (lr & mr); \ out = (char *)memcpy (out, (char *)lr, SIZE); \ @@ -204,7 +202,7 @@ do { \ l += ~mr & SIZE; \ } while (r != end) - if (likely (c->cmp(r, l + (r - out) - c->size) < 0)) + if (likely (c->cmp(r, l + (r - out) - c->size, c->data) < 0)) { char *end = out + n * c->size; if (sizeof (size_t) == 8 && likely (c->size == 8)) @@ -218,7 +216,7 @@ do { \ } void -gcc_qsort (void *vbase, size_t n, size_t size, cmp_fn *cmp) +gcc_sort (void *vbase, size_t n, size_t size, sort_cmp_fn *cmp, void *data) { if (n < 2) return; @@ -227,7 +225,7 @@ gcc_qsort (void *vbase, size_t n, size_t size, cmp_fn *cmp) if (stable) nlim = 3, size = ~size; char *base = (char *)vbase; - sort_ctx c = {cmp, base, n, size, nlim}; + sort_ctx c = {data, cmp, base, n, size, nlim}; long long scratch[32]; size_t bufsz = (n / 2) * size; void *buf = bufsz <= sizeof scratch ? scratch : xmalloc (bufsz); @@ -235,12 +233,19 @@ gcc_qsort (void *vbase, size_t n, size_t size, cmp_fn *cmp) if (buf != scratch) free (buf); #if CHECKING_P - qsort_chk (vbase, n, size, cmp); + gcc_sort_chk (vbase, n, size, cmp, data); #endif } void -gcc_stablesort (void *vbase, size_t n, size_t size, cmp_fn *cmp) +gcc_stablesort (void *base, size_t n, size_t size, sort_cmp_fn *cmp, void *data) { - gcc_qsort (vbase, n, ~size, cmp); + gcc_sort (base, n, ~size, cmp, data); } + +#if __cplusplus < 201103L +int cmp2to3(const void *a, const void *b, void *c) +{ + return ((int (*)(const void *, const void *))c)(a, b); +} +#endif diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c index 81784866ad1..fcc9cd6fe04 100644 --- a/gcc/tree-loop-distribution.c +++ b/gcc/tree-loop-distribution.c @@ -486,7 +486,7 @@ static int bb_top_order_index_size; if greater, returns 1. */ static int -bb_top_order_cmp (const void *x, const void *y) +bb_top_order_cmp (const void *x, const void *y, void *) { basic_block bb1 = *(const basic_block *) x; basic_block bb2 = *(const basic_block *) y; @@ -2606,7 +2606,7 @@ version_for_distribution_p (vec<struct partition *> *partitions, /* Compare base offset of builtin mem* partitions P1 and P2. */ static int -offset_cmp (const void *vp1, const void *vp2) +offset_cmp (const void *vp1, const void *vp2, void *) { struct partition *p1 = *(struct partition *const *) vp1; struct partition *p2 = *(struct partition *const *) vp2; @@ -2665,7 +2665,7 @@ fuse_memset_builtins (vec<struct partition *> *partitions) /* Stable sort is required in order to avoid breaking dependence. */ gcc_stablesort (&(*partitions)[i], j - i, sizeof (*partitions)[i], - offset_cmp); + offset_cmp, NULL); /* Continue with next partition. */ i = j; } diff --git a/gcc/vec.c b/gcc/vec.c index cbd2db010f9..43ae799fcbd 100644 --- a/gcc/vec.c +++ b/gcc/vec.c @@ -192,21 +192,23 @@ dump_vec_loc_statistics (void) ATTRIBUTE_NORETURN ATTRIBUTE_COLD static void qsort_chk_error (const void *p1, const void *p2, const void *p3, - int (*cmp) (const void *, const void *)) + sort_cmp_fn *cmp, void *data) { if (!p3) { - int r1 = cmp (p1, p2), r2 = cmp (p2, p1); - error ("qsort comparator not anti-commutative: %d, %d", r1, r2); + int r1 = cmp (p1, p2, data), r2 = cmp (p2, p1, data); + error ("qsort comparator not anti-symmetric: %d, %d", r1, r2); } else if (p1 == p2) { - int r = cmp (p1, p3); + int r = cmp (p1, p3, data); error ("qsort comparator non-negative on sorted output: %d", r); } else { - int r1 = cmp (p1, p2), r2 = cmp (p2, p3), r3 = cmp (p1, p3); + int r1 = cmp (p1, p2, data); + int r2 = cmp (p2, p3, data); + int r3 = cmp (p1, p3, data); error ("qsort comparator not transitive: %d, %d, %d", r1, r2, r3); } internal_error ("qsort checking failed"); @@ -215,8 +217,7 @@ qsort_chk_error (const void *p1, const void *p2, const void *p3, /* Verify anti-symmetry and transitivity for comparator CMP on sorted array of N SIZE-sized elements pointed to by BASE. */ void -qsort_chk (void *base, size_t n, size_t size, - int (*cmp)(const void *, const void *)) +gcc_sort_chk (void *base, size_t n, size_t size, sort_cmp_fn *cmp, void *data) { #if 0 #define LIM(n) (n) @@ -225,9 +226,9 @@ qsort_chk (void *base, size_t n, size_t size, #define LIM(n) ((n) <= 16 ? (n) : 12 + floor_log2 (n)) #endif #define ELT(i) ((const char *) base + (i) * size) -#define CMP(i, j) cmp (ELT (i), ELT (j)) -#define ERR2(i, j) qsort_chk_error (ELT (i), ELT (j), NULL, cmp) -#define ERR3(i, j, k) qsort_chk_error (ELT (i), ELT (j), ELT (k), cmp) +#define CMP(i, j) cmp (ELT (i), ELT (j), data) +#define ERR2(i, j) qsort_chk_error (ELT (i), ELT (j), NULL, cmp, data) +#define ERR3(i, j, k) qsort_chk_error (ELT (i), ELT (j), ELT (k), cmp, data) size_t i1, i2, i, j; /* This outer loop iterates over maximum spans [i1, i2) such that elements within each span compare equal to each other. */ diff --git a/gcc/vec.h b/gcc/vec.h index 2dbf3074da0..86885486433 100644 --- a/gcc/vec.h +++ b/gcc/vec.h @@ -592,7 +592,7 @@ public: void ordered_remove (unsigned); void unordered_remove (unsigned); void block_remove (unsigned, unsigned); - void qsort (int (*) (const void *, const void *)); + void sort (sort_cmp_fn *, void *); T *bsearch (const void *key, int (*compar)(const void *, const void *)); unsigned lower_bound (T, bool (*)(const T &, const T &)) const; bool contains (const T &search) const; @@ -1108,10 +1108,10 @@ vec<T, A, vl_embed>::block_remove (unsigned ix, unsigned len) template<typename T, typename A> inline void -vec<T, A, vl_embed>::qsort (int (*cmp) (const void *, const void *)) +vec<T, A, vl_embed>::sort (sort_cmp_fn *cmp, void *data) { if (length () > 1) - ::qsort (address (), length (), sizeof (T), cmp); + gcc_sort (address (), length (), sizeof (T), cmp, data); } @@ -1400,7 +1400,7 @@ public: void ordered_remove (unsigned); void unordered_remove (unsigned); void block_remove (unsigned, unsigned); - void qsort (int (*) (const void *, const void *)); + void sort (sort_cmp_fn *, void *); T *bsearch (const void *key, int (*compar)(const void *, const void *)); unsigned lower_bound (T, bool (*)(const T &, const T &)) const; bool contains (const T &search) const; @@ -1892,10 +1892,10 @@ vec<T, va_heap, vl_ptr>::block_remove (unsigned ix, unsigned len) template<typename T> inline void -vec<T, va_heap, vl_ptr>::qsort (int (*cmp) (const void *, const void *)) +vec<T, va_heap, vl_ptr>::sort (sort_cmp_fn *cmp, void *data) { if (m_vec) - m_vec->qsort (cmp); + m_vec->sort (cmp, data); }