On Thu, 1 Aug 2019, Richard Biener wrote:

> So - were you able to allocate some cycles for this?  I think
> we've settled with doing the implementation qsort_r-style while
> implementing the wrapping with fancy C++ templates.

Yes.  It works, but not quite as perfectly as I'd hoped.  I found two issues:

1. The fancy wrapper needs C++11; in C++98, it can't be applied to static
   comparators, so in that case we need option-1-like wrapping.
   I noticed thanks to the fact we build stage1 with -std=gnu++98.

2. Sometimes we fail to inline the comparator into the wrapper; I imagine it
   happens when inlining hits an internal limit (even though on its own such
   inlining is clearly beneficial).  In particular a couple comparators in
   IRA/LRA suffer from this.

Two questions for you:

1. You indicated a dislike for using templates to "duplicate" functionality in
   sort.cc to implement both classic qsort and qsort_r.  What are the main
   concerns, specifically?  I find it a reasonable approach.

2. In your patch, you're using a union to copy between a void * and a function
   pointer, saying C++ disallows a cast.  A simple C-style cast worked well
   enough for me (as used in the patch).  Am I missing something?

The patch is below, I'll add comments and resubmit properly if you want to see
it on trunk.  Thanks!

diff --git a/gcc/system.h b/gcc/system.h
index d04f8fd3360..2cd5475b32c 100644
--- a/gcc/system.h
+++ b/gcc/system.h
@@ -1200,13 +1200,30 @@ helper_const_non_const_cast (const char *p)
 /* GCC qsort API-compatible functions: except in release-checking compilers,
    redirect 4-argument qsort calls to gcc_qsort; keep 1-argument invocations
    corresponding to vec::qsort (cmp): they use C qsort internally anyway.  */
-void qsort_chk (void *, size_t, size_t, int (*)(const void *, const void *));
-void gcc_qsort (void *, size_t, size_t, int (*)(const void *, const void *));
-void gcc_stablesort (void *, size_t, size_t,
-                    int (*)(const void *, const void *));
+typedef int sort_cmp_fn(const void *, const void *, void *);
+void gcc_sort (void *, size_t, size_t, sort_cmp_fn *, void *);
+void gcc_sort_chk (void *, size_t, size_t, sort_cmp_fn *, void *);
+void gcc_stablesort (void *, size_t, size_t, sort_cmp_fn *, void *);
+#ifdef __cplusplus
+#if __cplusplus >= 201103L
+template<int cmp (const void *, const void *)>
+int cmp2to3 (const void *a, const void *b, void *)
+{
+  return cmp (a, b);
+}
+#define qsort_4(base, n, sz, cmp) gcc_sort (base, n, sz, cmp2to3<cmp>, NULL)
+#define qsort_1(cmp) sort (cmp2to3<cmp>, NULL)
+#else
+int cmp2to3 (const void *, const void *, void *);
+#define qsort_4(base, n, sz, cmp) gcc_sort (base, n, sz, cmp2to3, (void *)cmp)
+#define qsort_1(cmp) sort (cmp2to3, (void *)cmp)
+#endif
 #define PP_5th(a1, a2, a3, a4, a5, ...) a5
 #undef qsort
-#define qsort(...) PP_5th (__VA_ARGS__, gcc_qsort, 3, 2, qsort, 0) 
(__VA_ARGS__)
+#define qsort(...) PP_5th (__VA_ARGS__, qsort_4, 3, 2, qsort_1, 0) 
(__VA_ARGS__)
+#else
+#pragma GCC poison qsort
+#endif
 
 #define ONE_K 1024
 #define ONE_M (ONE_K * ONE_K)
diff --git a/gcc/bb-reorder.c b/gcc/bb-reorder.c
index 0ac39140c6c..e1be383880a 100644
--- a/gcc/bb-reorder.c
+++ b/gcc/bb-reorder.c
@@ -2360,7 +2360,7 @@ reorder_basic_blocks_software_trace_cache (void)
 /* Order edges by execution frequency, higher first.  */
 
 static int
-edge_order (const void *ve1, const void *ve2)
+edge_order (const void *ve1, const void *ve2, void *)
 {
   edge e1 = *(const edge *) ve1;
   edge e2 = *(const edge *) ve2;
@@ -2423,7 +2423,7 @@ reorder_basic_blocks_simple (void)
      all edges are equally desirable.  */
 
   if (optimize_function_for_speed_p (cfun))
-    gcc_stablesort (edges, n, sizeof *edges, edge_order);
+    gcc_stablesort (edges, n, sizeof *edges, edge_order, NULL);
 
   /* Now decide which of those edges to make fallthrough edges.  We set
      BB_VISITED if a block already has a fallthrough successor assigned
diff --git a/gcc/cfgloop.c b/gcc/cfgloop.c
index 4ad1f658708..e411eadc039 100644
--- a/gcc/cfgloop.c
+++ b/gcc/cfgloop.c
@@ -971,11 +971,11 @@ get_loop_body_in_dom_order (const class loop *loop)
 
 basic_block *
 get_loop_body_in_custom_order (const class loop *loop,
-                              int (*bb_comparator) (const void *, const void 
*))
+                              sort_cmp_fn *bb_comparator)
 {
   basic_block *bbs = get_loop_body (loop);
 
-  qsort (bbs, loop->num_nodes, sizeof (basic_block), bb_comparator);
+  gcc_sort (bbs, loop->num_nodes, sizeof (basic_block), bb_comparator, NULL);
 
   return bbs;
 }
diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
index 0b0154ffd7b..7b7fa5bc6f7 100644
--- a/gcc/cfgloop.h
+++ b/gcc/cfgloop.h
@@ -375,7 +375,7 @@ extern unsigned get_loop_body_with_size (const class loop 
*, basic_block *,
 extern basic_block *get_loop_body_in_dom_order (const class loop *);
 extern basic_block *get_loop_body_in_bfs_order (const class loop *);
 extern basic_block *get_loop_body_in_custom_order (const class loop *,
-                              int (*) (const void *, const void *));
+                                                  sort_cmp_fn *);
 
 extern vec<edge> get_loop_exit_edges (const class loop *);
 extern edge single_exit (const class loop *);
diff --git a/gcc/ggc-common.c b/gcc/ggc-common.c
index 0968d9769fa..2bf0fdd5e27 100644
--- a/gcc/ggc-common.c
+++ b/gcc/ggc-common.c
@@ -925,7 +925,7 @@ public:
 
   /* Compare wrapper used by qsort method.  */
   static int
-  compare (const void *first, const void *second)
+  compare (const void *first, const void *second, void *)
   {
     const mem_pair_t f = *(const mem_pair_t *)first;
     const mem_pair_t s = *(const mem_pair_t *)second;
@@ -935,7 +935,7 @@ public:
 
   /* Compare rows in final GGC summary dump.  */
   static int
-  compare_final (const void *first, const void *second)
+  compare_final (const void *first, const void *second, void *)
   {
     typedef std::pair<mem_location *, ggc_usage *> mem_pair_t;
 
diff --git a/gcc/ira-color.c b/gcc/ira-color.c
index 99236994d64..ad2d7c433ce 100644
--- a/gcc/ira-color.c
+++ b/gcc/ira-color.c
@@ -2251,7 +2251,7 @@ add_allocno_to_bucket (ira_allocno_t a, ira_allocno_t 
*bucket_ptr)
    one.  As the result of such assignment order, the second allocno
    has a better chance to get the best hard register.  */
 static int
-bucket_allocno_compare_func (const void *v1p, const void *v2p)
+bucket_allocno_compare_func (const void *v1p, const void *v2p, void * = NULL)
 {
   ira_allocno_t a1 = *(const ira_allocno_t *) v1p;
   ira_allocno_t a2 = *(const ira_allocno_t *) v2p;
@@ -2296,8 +2296,7 @@ bucket_allocno_compare_func (const void *v1p, const void 
*v2p)
 /* Sort bucket *BUCKET_PTR and return the result through
    BUCKET_PTR.  */
 static void
-sort_bucket (ira_allocno_t *bucket_ptr,
-            int (*compare_func) (const void *, const void *))
+sort_bucket (ira_allocno_t *bucket_ptr, sort_cmp_fn *compare_func)
 {
   ira_allocno_t a, head;
   int n;
@@ -2308,7 +2307,7 @@ sort_bucket (ira_allocno_t *bucket_ptr,
     sorted_allocnos[n++] = a;
   if (n <= 1)
     return;
-  qsort (sorted_allocnos, n, sizeof (ira_allocno_t), compare_func);
+  gcc_sort (sorted_allocnos, n, sizeof (ira_allocno_t), compare_func, NULL);
   head = NULL;
   for (n--; n >= 0; n--)
     {
@@ -2580,7 +2579,7 @@ allocno_spill_priority_compare (ira_allocno_t a1, 
ira_allocno_t a2)
 
 /* Used for sorting allocnos for spilling.  */
 static int
-allocno_spill_sort_compare (const void *v1p, const void *v2p)
+allocno_spill_sort_compare (const void *v1p, const void *v2p, void *)
 {
   ira_allocno_t p1 = *(const ira_allocno_t *) v1p;
   ira_allocno_t p2 = *(const ira_allocno_t *) v2p;
diff --git a/gcc/mem-stats.h b/gcc/mem-stats.h
index 9ceb9ccc55b..0a62603fd33 100644
--- a/gcc/mem-stats.h
+++ b/gcc/mem-stats.h
@@ -188,7 +188,7 @@ public:
 
   /* Compare wrapper used by qsort method.  */
   static int
-  compare (const void *first, const void *second)
+  compare (const void *first, const void *second, void *)
   {
     typedef std::pair<mem_location *, mem_usage *> mem_pair_t;
 
@@ -362,13 +362,12 @@ public:
      the number of elements in the list. If we want to process custom order,
      CMP comparator can be provided.  */
   mem_list_t *get_list (mem_alloc_origin origin, unsigned *length,
-                       int (*cmp) (const void *first,
-                                   const void *second) = NULL);
+                       sort_cmp_fn *cmp = NULL);
 
   /* Dump all tracked instances of type ORIGIN. If we want to process custom
      order, CMP comparator can be provided.  */
   void dump (mem_alloc_origin origin,
-            int (*cmp) (const void *first, const void *second) = NULL);
+            sort_cmp_fn *cmp = NULL);
 
   /* Reverse object map used for every object allocation mapping.  */
   reverse_object_map_t *m_reverse_object_map;
@@ -594,8 +593,7 @@ template <class T>
 inline
 typename mem_alloc_description<T>::mem_list_t *
 mem_alloc_description<T>::get_list (mem_alloc_origin origin, unsigned *length,
-                                   int (*cmp) (const void *first,
-                                               const void *second))
+                                   sort_cmp_fn *cmp)
 {
   /* vec data structure is not used because all vectors generate memory
      allocation info a it would create a cycle.  */
@@ -608,7 +606,7 @@ mem_alloc_description<T>::get_list (mem_alloc_origin 
origin, unsigned *length,
     if ((*it).first->m_origin == origin)
       list[i++] = std::pair<mem_location*, T*> (*it);
 
-  qsort (list, i, element_size, cmp == NULL ? T::compare : cmp);
+  gcc_sort (list, i, element_size, cmp == NULL ? T::compare : cmp, NULL);
   *length = i;
 
   return list;
@@ -638,8 +636,7 @@ mem_alloc_description<T>::get_sum (mem_alloc_origin origin)
 template <class T>
 inline void
 mem_alloc_description<T>::dump (mem_alloc_origin origin,
-                               int (*cmp) (const void *first,
-                                           const void *second))
+                               sort_cmp_fn *cmp)
 {
   unsigned length;
 
diff --git a/gcc/sel-sched-ir.c b/gcc/sel-sched-ir.c
index bb8016bb530..4d6e5f3a2c0 100644
--- a/gcc/sel-sched-ir.c
+++ b/gcc/sel-sched-ir.c
@@ -5982,7 +5982,7 @@ sel_create_new_region (void)
 /* If X has a smaller topological sort number than Y, returns -1;
    if greater, returns 1.  */
 static int
-bb_top_order_comparator (const void *x, const void *y)
+bb_top_order_comparator (const void *x, const void *y, void *)
 {
   basic_block bb1 = *(const basic_block *) x;
   basic_block bb2 = *(const basic_block *) y;
diff --git a/gcc/sort.cc b/gcc/sort.cc
index 3e9c032c462..efd0fe75478 100644
--- a/gcc/sort.cc
+++ b/gcc/sort.cc
@@ -45,13 +45,11 @@ along with GCC; see the file COPYING3.  If not see
 #define noinline
 #endif
 
-/* C-style qsort comparator function type.  */
-typedef int cmp_fn (const void *, const void *);
-
 /* Structure holding read-mostly (read-only in netsort) context.  */
 struct sort_ctx
 {
-  cmp_fn *cmp; // pointer to comparator
+  void        *data; // comparator user data
+  sort_cmp_fn *cmp;  // pointer to comparator
   char   *out; // output buffer
   size_t n;    // number of elements
   size_t size; // element size
@@ -128,10 +126,10 @@ do {                                                     \
    This is noinline to avoid code growth and confine invocation
    to a single call site, assisting indirect branch prediction.  */
 noinline static intptr_t
-cmp1 (char *e0, char *e1, cmp_fn *cmp)
+cmp1 (char *e0, char *e1, void *data, sort_cmp_fn *cmp)
 {
   intptr_t x = (intptr_t)e0 ^ (intptr_t)e1;
-  return x & (cmp (e0, e1) >> 31);
+  return x & (cmp (e0, e1, data) >> 31);
 }
 
 /* Execute network sort on 2 to 5 elements from IN, placing them into C->OUT.
@@ -141,7 +139,7 @@ netsort (char *in, sort_ctx *c)
 {
 #define CMP(e0, e1)                   \
 do {                                  \
-  intptr_t x = cmp1 (e1, e0, c->cmp); \
+  intptr_t x = cmp1 (e1, e0, c->data, c->cmp); \
   e0 = (char *)((intptr_t)e0 ^ x);    \
   e1 = (char *)((intptr_t)e1 ^ x);    \
 } while (0)
@@ -194,7 +192,7 @@ mergesort (char *in, sort_ctx *c, size_t n, char *out, char 
*tmp)
   /* Merge sorted halves given by L, R to [OUT, END).  */
 #define MERGE_ELTSIZE(SIZE)                     \
 do {                                            \
-  intptr_t mr = c->cmp (r, l) >> 31;            \
+  intptr_t mr = c->cmp (r, l, c->data) >> 31;   \
   intptr_t lr = (intptr_t)l ^ (intptr_t)r;      \
   lr = (intptr_t)l ^ (lr & mr);                 \
   out = (char *)memcpy (out, (char *)lr, SIZE); \
@@ -204,7 +202,7 @@ do {                                            \
   l += ~mr & SIZE;                              \
 } while (r != end)
 
-  if (likely (c->cmp(r, l + (r - out) - c->size) < 0))
+  if (likely (c->cmp(r, l + (r - out) - c->size, c->data) < 0))
     {
       char *end = out + n * c->size;
       if (sizeof (size_t) == 8 && likely (c->size == 8))
@@ -218,7 +216,7 @@ do {                                            \
 }
 
 void
-gcc_qsort (void *vbase, size_t n, size_t size, cmp_fn *cmp)
+gcc_sort (void *vbase, size_t n, size_t size, sort_cmp_fn *cmp, void *data)
 {
   if (n < 2)
     return;
@@ -227,7 +225,7 @@ gcc_qsort (void *vbase, size_t n, size_t size, cmp_fn *cmp)
   if (stable)
     nlim = 3, size = ~size;
   char *base = (char *)vbase;
-  sort_ctx c = {cmp, base, n, size, nlim};
+  sort_ctx c = {data, cmp, base, n, size, nlim};
   long long scratch[32];
   size_t bufsz = (n / 2) * size;
   void *buf = bufsz <= sizeof scratch ? scratch : xmalloc (bufsz);
@@ -235,12 +233,19 @@ gcc_qsort (void *vbase, size_t n, size_t size, cmp_fn 
*cmp)
   if (buf != scratch)
     free (buf);
 #if CHECKING_P
-  qsort_chk (vbase, n, size, cmp);
+  gcc_sort_chk (vbase, n, size, cmp, data);
 #endif
 }
 
 void
-gcc_stablesort (void *vbase, size_t n, size_t size, cmp_fn *cmp)
+gcc_stablesort (void *base, size_t n, size_t size, sort_cmp_fn *cmp, void 
*data)
 {
-  gcc_qsort (vbase, n, ~size, cmp);
+  gcc_sort (base, n, ~size, cmp, data);
 }
+
+#if __cplusplus < 201103L
+int cmp2to3(const void *a, const void *b, void *c)
+{
+  return ((int (*)(const void *, const void *))c)(a, b);
+}
+#endif
diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
index 81784866ad1..fcc9cd6fe04 100644
--- a/gcc/tree-loop-distribution.c
+++ b/gcc/tree-loop-distribution.c
@@ -486,7 +486,7 @@ static int bb_top_order_index_size;
    if greater, returns 1.  */
 
 static int
-bb_top_order_cmp (const void *x, const void *y)
+bb_top_order_cmp (const void *x, const void *y, void *)
 {
   basic_block bb1 = *(const basic_block *) x;
   basic_block bb2 = *(const basic_block *) y;
@@ -2606,7 +2606,7 @@ version_for_distribution_p (vec<struct partition *> 
*partitions,
 /* Compare base offset of builtin mem* partitions P1 and P2.  */
 
 static int
-offset_cmp (const void *vp1, const void *vp2)
+offset_cmp (const void *vp1, const void *vp2, void *)
 {
   struct partition *p1 = *(struct partition *const *) vp1;
   struct partition *p2 = *(struct partition *const *) vp2;
@@ -2665,7 +2665,7 @@ fuse_memset_builtins (vec<struct partition *> *partitions)
 
       /* Stable sort is required in order to avoid breaking dependence.  */
       gcc_stablesort (&(*partitions)[i], j - i, sizeof (*partitions)[i],
-                     offset_cmp);
+                     offset_cmp, NULL);
       /* Continue with next partition.  */
       i = j;
     }
diff --git a/gcc/vec.c b/gcc/vec.c
index cbd2db010f9..43ae799fcbd 100644
--- a/gcc/vec.c
+++ b/gcc/vec.c
@@ -192,21 +192,23 @@ dump_vec_loc_statistics (void)
 ATTRIBUTE_NORETURN ATTRIBUTE_COLD
 static void
 qsort_chk_error (const void *p1, const void *p2, const void *p3,
-                int (*cmp) (const void *, const void *))
+                sort_cmp_fn *cmp, void *data)
 {
   if (!p3)
     {
-      int r1 = cmp (p1, p2), r2 = cmp (p2, p1);
-      error ("qsort comparator not anti-commutative: %d, %d", r1, r2);
+      int r1 = cmp (p1, p2, data), r2 = cmp (p2, p1, data);
+      error ("qsort comparator not anti-symmetric: %d, %d", r1, r2);
     }
   else if (p1 == p2)
     {
-      int r = cmp (p1, p3);
+      int r = cmp (p1, p3, data);
       error ("qsort comparator non-negative on sorted output: %d", r);
     }
   else
     {
-      int r1 = cmp (p1, p2), r2 = cmp (p2, p3), r3 = cmp (p1, p3);
+      int r1 = cmp (p1, p2, data);
+      int r2 = cmp (p2, p3, data);
+      int r3 = cmp (p1, p3, data);
       error ("qsort comparator not transitive: %d, %d, %d", r1, r2, r3);
     }
   internal_error ("qsort checking failed");
@@ -215,8 +217,7 @@ qsort_chk_error (const void *p1, const void *p2, const void 
*p3,
 /* Verify anti-symmetry and transitivity for comparator CMP on sorted array
    of N SIZE-sized elements pointed to by BASE.  */
 void
-qsort_chk (void *base, size_t n, size_t size,
-          int (*cmp)(const void *, const void *))
+gcc_sort_chk (void *base, size_t n, size_t size, sort_cmp_fn *cmp, void *data)
 {
 #if 0
 #define LIM(n) (n)
@@ -225,9 +226,9 @@ qsort_chk (void *base, size_t n, size_t size,
 #define LIM(n) ((n) <= 16 ? (n) : 12 + floor_log2 (n))
 #endif
 #define ELT(i) ((const char *) base + (i) * size)
-#define CMP(i, j) cmp (ELT (i), ELT (j))
-#define ERR2(i, j) qsort_chk_error (ELT (i), ELT (j), NULL, cmp)
-#define ERR3(i, j, k) qsort_chk_error (ELT (i), ELT (j), ELT (k), cmp)
+#define CMP(i, j) cmp (ELT (i), ELT (j), data)
+#define ERR2(i, j) qsort_chk_error (ELT (i), ELT (j), NULL, cmp, data)
+#define ERR3(i, j, k) qsort_chk_error (ELT (i), ELT (j), ELT (k), cmp, data)
   size_t i1, i2, i, j;
   /* This outer loop iterates over maximum spans [i1, i2) such that
      elements within each span compare equal to each other.  */
diff --git a/gcc/vec.h b/gcc/vec.h
index 2dbf3074da0..86885486433 100644
--- a/gcc/vec.h
+++ b/gcc/vec.h
@@ -592,7 +592,7 @@ public:
   void ordered_remove (unsigned);
   void unordered_remove (unsigned);
   void block_remove (unsigned, unsigned);
-  void qsort (int (*) (const void *, const void *));
+  void sort (sort_cmp_fn *, void *);
   T *bsearch (const void *key, int (*compar)(const void *, const void *));
   unsigned lower_bound (T, bool (*)(const T &, const T &)) const;
   bool contains (const T &search) const;
@@ -1108,10 +1108,10 @@ vec<T, A, vl_embed>::block_remove (unsigned ix, 
unsigned len)
 
 template<typename T, typename A>
 inline void
-vec<T, A, vl_embed>::qsort (int (*cmp) (const void *, const void *))
+vec<T, A, vl_embed>::sort (sort_cmp_fn *cmp, void *data)
 {
   if (length () > 1)
-    ::qsort (address (), length (), sizeof (T), cmp);
+    gcc_sort (address (), length (), sizeof (T), cmp, data);
 }
 
 
@@ -1400,7 +1400,7 @@ public:
   void ordered_remove (unsigned);
   void unordered_remove (unsigned);
   void block_remove (unsigned, unsigned);
-  void qsort (int (*) (const void *, const void *));
+  void sort (sort_cmp_fn *, void *);
   T *bsearch (const void *key, int (*compar)(const void *, const void *));
   unsigned lower_bound (T, bool (*)(const T &, const T &)) const;
   bool contains (const T &search) const;
@@ -1892,10 +1892,10 @@ vec<T, va_heap, vl_ptr>::block_remove (unsigned ix, 
unsigned len)
 
 template<typename T>
 inline void
-vec<T, va_heap, vl_ptr>::qsort (int (*cmp) (const void *, const void *))
+vec<T, va_heap, vl_ptr>::sort (sort_cmp_fn *cmp, void *data)
 {
   if (m_vec)
-    m_vec->qsort (cmp);
+    m_vec->sort (cmp, data);
 }
 
 

Reply via email to