On Mon, 22 Jul 2019, Alexander Monakov wrote:

> Hi!
> 
> On Mon, 22 Jul 2019, Richard Biener wrote:
> 
> > I've noticed that we have quite some instances using qsort
> > and passing down state to the comparator via a global var.
> > Now that we have our own qsort implementation I wonder what
> > the best course of action is to add a variant passing down
> > such state to the comparator?  Copying nearly the whole
> > implementation would be possible but also some marshalling
> > of three-argument comparator calls to two-argument functions
> > (and some ABIs can do without actual marshalling).  The
> > other option is templating everything (ugh).
> 
> I think there are three choices.
> 
> 1. Have gcc/sort.cc implement only qsort_r, use a "universal comparator
> adapter"
> 
> int cmp2to3(void *a, void *b, void *ctx)
> {
>   return (int (*)(void *, void *))ctx(a, b);
> }
> 
> to adapt existing qsort uses.
> 
> 2. Have gcc/sort.cc implement both qsort and qsort_r by duplicating code,
> either copying manually (ugh) or by turning netsort, mergesort, qsort_chk
> to templates.
> 
> 3. Have gcc/sort.cc implement only qsort_r, but have existing qsort callers
> pass their comparators to a fancy C++ "universal comparator adapter" so
> they can be inlined into the three-argument wrapper and thus
> speed/size penalties are tiny (only from always loading the context pointer
> that is ignored by legacy two-argument comparators):
> 
> void qsort_r(void *, size_t, size_t, int cmp(void *, void *, void *));
> 
> template<int cmp(void *, void *)>
> int cmp2to3(void *a, void *b, void *c)
> {
>   return cmp(a, b);
> }
> 
> #define qsort(base, sz, n, cmp) \
>   qsort_r(base, sz, n, cmp2to3<cmp>)
> 
> static int my_cmp(void *a, void *b)
> {
>   return 0;
> }
> 
> void test(void *base, size_t sz, size_t n)
> {
>   qsort(base, sz, n, my_cmp);
> }

OK, so like below.  Slight complication is that C++ doesn't allow
the void * to int (*)() casting which means we probably have to go
with the template wrapper.  I have to check if we use gcc_qsort
from C code, the prototypes are currently in system.h.

Of course easiest is to simply rewrite all qsort calls to use
[gcc_]qsort_r and not provide qsort and poison it.  I'm leaning
towards this "solution".

On targets with register passing conventions we can also
completely elide the cmp2to3 wrapper (ok, that's a hack).

Richard.

Index: gcc/system.h
===================================================================
--- gcc/system.h        (revision 273657)
+++ gcc/system.h        (working copy)
@@ -1200,10 +1200,15 @@ helper_const_non_const_cast (const char
 /* GCC qsort API-compatible functions: except in release-checking compilers,
    redirect 4-argument qsort calls to gcc_qsort; keep 1-argument invocations
    corresponding to vec::qsort (cmp): they use C qsort internally anyway.  */
-void qsort_chk (void *, size_t, size_t, int (*)(const void *, const void *));
+void qsort_chk (void *, size_t, size_t, int (*)(const void *, const void *,
+                                               void *), void *);
 void gcc_qsort (void *, size_t, size_t, int (*)(const void *, const void *));
+void gcc_qsort_r (void *, size_t, size_t, int (*)(const void *, const void *,
+                                                 void *), void *);
 void gcc_stablesort (void *, size_t, size_t,
                     int (*)(const void *, const void *));
+void gcc_stablesort_r (void *, size_t, size_t,
+                      int (*)(const void *, const void *, void *), void *);
 #define PP_5th(a1, a2, a3, a4, a5, ...) a5
 #undef qsort
 #define qsort(...) PP_5th (__VA_ARGS__, gcc_qsort, 3, 2, qsort, 0) 
(__VA_ARGS__)
Index: gcc/sort.cc
===================================================================
--- gcc/sort.cc (revision 273657)
+++ gcc/sort.cc (working copy)
@@ -46,12 +46,13 @@ along with GCC; see the file COPYING3.
 #endif
 
 /* C-style qsort comparator function type.  */
-typedef int cmp_fn (const void *, const void *);
+typedef int cmp_fn (const void *, const void *, void *);
 
 /* Structure holding read-mostly (read-only in netsort) context.  */
 struct sort_ctx
 {
   cmp_fn *cmp; // pointer to comparator
+  void *cmp_d; // pointer to comparator data
   char   *out; // output buffer
   size_t n;    // number of elements
   size_t size; // element size
@@ -128,10 +129,10 @@ do {
    This is noinline to avoid code growth and confine invocation
    to a single call site, assisting indirect branch prediction.  */
 noinline static intptr_t
-cmp1 (char *e0, char *e1, cmp_fn *cmp)
+cmp1 (char *e0, char *e1, cmp_fn *cmp, void *cmp_d)
 {
   intptr_t x = (intptr_t)e0 ^ (intptr_t)e1;
-  return x & (cmp (e0, e1) >> 31);
+  return x & (cmp (e0, e1, cmp_d) >> 31);
 }
 
 /* Execute network sort on 2 to 5 elements from IN, placing them into C->OUT.
@@ -141,7 +142,7 @@ netsort (char *in, sort_ctx *c)
 {
 #define CMP(e0, e1)                   \
 do {                                  \
-  intptr_t x = cmp1 (e1, e0, c->cmp); \
+  intptr_t x = cmp1 (e1, e0, c->cmp, c->cmp_d); \
   e0 = (char *)((intptr_t)e0 ^ x);    \
   e1 = (char *)((intptr_t)e1 ^ x);    \
 } while (0)
@@ -194,7 +195,7 @@ mergesort (char *in, sort_ctx *c, size_t
   /* Merge sorted halves given by L, R to [OUT, END).  */
 #define MERGE_ELTSIZE(SIZE)                     \
 do {                                            \
-  intptr_t mr = c->cmp (r, l) >> 31;            \
+  intptr_t mr = c->cmp (r, l, c->cmp_d) >> 31;            \
   intptr_t lr = (intptr_t)l ^ (intptr_t)r;      \
   lr = (intptr_t)l ^ (lr & mr);                 \
   out = (char *)memcpy (out, (char *)lr, SIZE); \
@@ -204,7 +205,7 @@ do {
   l += ~mr & SIZE;                              \
 } while (r != end)
 
-  if (likely (c->cmp(r, l + (r - out) - c->size) < 0))
+  if (likely (c->cmp(r, l + (r - out) - c->size, c->cmp_d) < 0))
     {
       char *end = out + n * c->size;
       if (sizeof (size_t) == 8 && likely (c->size == 8))
@@ -218,7 +219,7 @@ do {
 }
 
 void
-gcc_qsort (void *vbase, size_t n, size_t size, cmp_fn *cmp)
+gcc_qsort_r (void *vbase, size_t n, size_t size, cmp_fn *cmp, void *data)
 {
   if (n < 2)
     return;
@@ -227,7 +228,7 @@ gcc_qsort (void *vbase, size_t n, size_t
   if (stable)
     nlim = 3, size = ~size;
   char *base = (char *)vbase;
-  sort_ctx c = {cmp, base, n, size, nlim};
+  sort_ctx c = {cmp, data, base, n, size, nlim};
   long long scratch[32];
   size_t bufsz = (n / 2) * size;
   void *buf = bufsz <= sizeof scratch ? scratch : xmalloc (bufsz);
@@ -235,12 +236,44 @@ gcc_qsort (void *vbase, size_t n, size_t
   if (buf != scratch)
     free (buf);
 #if CHECKING_P
-  qsort_chk (vbase, n, size, cmp);
+  qsort_chk (vbase, n, size, cmp, data);
 #endif
 }
 
 void
-gcc_stablesort (void *vbase, size_t n, size_t size, cmp_fn *cmp)
+gcc_stablesort_r (void *vbase, size_t n, size_t size, cmp_fn *cmp, void *data)
+{
+  gcc_qsort_r (vbase, n, ~size, cmp, data);
+}
+
+static int
+cmp2to3 (const void *a, const void *b, void *cmp)
+{
+  /* C++ doesn't permit conversions between pointer-to data
+     and pointer-to function.  */
+  union {
+    void *data;
+    int (*cmp)(const void *, const void *);
+  } u;
+  u.data = cmp;
+  return u.cmp (a, b);
+}
+
+void
+gcc_qsort (void *vbase, size_t n, size_t size,
+          int (*cmp)(const void *, const void *))
+{
+  union {
+    void *data;
+    int (*cmp)(const void *, const void *);
+  } u;
+  u.cmp = cmp;
+  gcc_qsort_r (vbase, n, size, cmp2to3, u.data);
+}
+
+void
+gcc_stablesort (void *vbase, size_t n, size_t size,
+               int (*cmp)(const void *, const void *))
 {
   gcc_qsort (vbase, n, ~size, cmp);
 }
Index: gcc/vec.c
===================================================================
--- gcc/vec.c   (revision 273657)
+++ gcc/vec.c   (working copy)
@@ -192,21 +192,23 @@ dump_vec_loc_statistics (void)
 ATTRIBUTE_NORETURN ATTRIBUTE_COLD
 static void
 qsort_chk_error (const void *p1, const void *p2, const void *p3,
-                int (*cmp) (const void *, const void *))
+                int (*cmp) (const void *, const void *, void *), void *data)
 {
   if (!p3)
     {
-      int r1 = cmp (p1, p2), r2 = cmp (p2, p1);
+      int r1 = cmp (p1, p2, data), r2 = cmp (p2, p1, data);
       error ("qsort comparator not anti-commutative: %d, %d", r1, r2);
     }
   else if (p1 == p2)
     {
-      int r = cmp (p1, p3);
+      int r = cmp (p1, p3, data);
       error ("qsort comparator non-negative on sorted output: %d", r);
     }
   else
     {
-      int r1 = cmp (p1, p2), r2 = cmp (p2, p3), r3 = cmp (p1, p3);
+      int r1 = cmp (p1, p2, data);
+      int r2 = cmp (p2, p3, data);
+      int r3 = cmp (p1, p3, data);
       error ("qsort comparator not transitive: %d, %d, %d", r1, r2, r3);
     }
   internal_error ("qsort checking failed");
@@ -216,7 +218,7 @@ qsort_chk_error (const void *p1, const v
    of N SIZE-sized elements pointed to by BASE.  */
 void
 qsort_chk (void *base, size_t n, size_t size,
-          int (*cmp)(const void *, const void *))
+          int (*cmp)(const void *, const void *, void *), void *data)
 {
 #if 0
 #define LIM(n) (n)
@@ -225,9 +227,9 @@ qsort_chk (void *base, size_t n, size_t
 #define LIM(n) ((n) <= 16 ? (n) : 12 + floor_log2 (n))
 #endif
 #define ELT(i) ((const char *) base + (i) * size)
-#define CMP(i, j) cmp (ELT (i), ELT (j))
-#define ERR2(i, j) qsort_chk_error (ELT (i), ELT (j), NULL, cmp)
-#define ERR3(i, j, k) qsort_chk_error (ELT (i), ELT (j), ELT (k), cmp)
+#define CMP(i, j) cmp (ELT (i), ELT (j), data)
+#define ERR2(i, j) qsort_chk_error (ELT (i), ELT (j), NULL, cmp, data)
+#define ERR3(i, j, k) qsort_chk_error (ELT (i), ELT (j), ELT (k), cmp, data)
   size_t i1, i2, i, j;
   /* This outer loop iterates over maximum spans [i1, i2) such that
      elements within each span compare equal to each other.  */
Index: gcc/domwalk.c
===================================================================
--- gcc/domwalk.c       (revision 273657)
+++ gcc/domwalk.c       (working copy)
@@ -128,14 +128,12 @@ along with GCC; see the file COPYING3.
     which is currently an abstraction over walking tree statements.  Thus
     the dominator walker is currently only useful for trees.  */
 
-/* Reverse postorder index of each basic block.  */
-static int *bb_postorder;
-
 static int
-cmp_bb_postorder (const void *a, const void *b)
+cmp_bb_postorder (const void *a, const void *b, void *data)
 {
   basic_block bb1 = *(const basic_block *)(a);
   basic_block bb2 = *(const basic_block *)(b);
+  int *bb_postorder = (int *)data;
   /* Place higher completion number first (pop off lower number first).  */
   return bb_postorder[bb2->index] - bb_postorder[bb1->index];
 }
@@ -144,7 +142,7 @@ cmp_bb_postorder (const void *a, const v
    i.e. by descending number in BB_POSTORDER array.  */
 
 static void
-sort_bbs_postorder (basic_block *bbs, int n)
+sort_bbs_postorder (basic_block *bbs, int n, int *bb_postorder)
 {
   if (__builtin_expect (n == 2, true))
     {
@@ -166,7 +164,7 @@ sort_bbs_postorder (basic_block *bbs, in
       bbs[0] = bb0, bbs[1] = bb1, bbs[2] = bb2;
     }
   else
-    qsort (bbs, n, sizeof *bbs, cmp_bb_postorder);
+    gcc_qsort_r (bbs, n, sizeof *bbs, cmp_bb_postorder, bb_postorder);
 }
 
 /* Set EDGE_EXECUTABLE on every edge within FN's CFG.  */
@@ -294,7 +292,6 @@ dom_walker::walk (basic_block bb)
   basic_block *worklist = XNEWVEC (basic_block,
                                   n_basic_blocks_for_fn (cfun) * 2);
   int sp = 0;
-  bb_postorder = m_bb_to_rpo;
 
   while (true)
     {
@@ -339,7 +336,8 @@ dom_walker::walk (basic_block bb)
              if (sp - saved_sp > 1
                  && m_dom_direction == CDI_DOMINATORS
                  && m_bb_to_rpo)
-               sort_bbs_postorder (&worklist[saved_sp], sp - saved_sp);
+               sort_bbs_postorder (&worklist[saved_sp], sp - saved_sp,
+                                   m_bb_to_rpo);
            }
        }
       /* NULL is used to mark pop operations in the recursion stack.  */
@@ -360,6 +358,5 @@ dom_walker::walk (basic_block bb)
       else
        break;
     }
-  bb_postorder = NULL;
   free (worklist);
 }

Reply via email to