On Mon, 22 Jul 2019, Alexander Monakov wrote: > Hi! > > On Mon, 22 Jul 2019, Richard Biener wrote: > > > I've noticed that we have quite some instances using qsort > > and passing down state to the comparator via a global var. > > Now that we have our own qsort implementation I wonder what > > the best course of action is to add a variant passing down > > such state to the comparator? Copying nearly the whole > > implementation would be possible but also some marshalling > > of three-argument comparator calls to two-argument functions > > (and some ABIs can do without actual marshalling). The > > other option is templating everything (ugh). > > I think there are three choices. > > 1. Have gcc/sort.cc implement only qsort_r, use a "universal comparator > adapter" > > int cmp2to3(void *a, void *b, void *ctx) > { > return (int (*)(void *, void *))ctx(a, b); > } > > to adapt existing qsort uses. > > 2. Have gcc/sort.cc implement both qsort and qsort_r by duplicating code, > either copying manually (ugh) or by turning netsort, mergesort, qsort_chk > to templates. > > 3. Have gcc/sort.cc implement only qsort_r, but have existing qsort callers > pass their comparators to a fancy C++ "universal comparator adapter" so > they can be inlined into the three-argument wrapper and thus > speed/size penalties are tiny (only from always loading the context pointer > that is ignored by legacy two-argument comparators): > > void qsort_r(void *, size_t, size_t, int cmp(void *, void *, void *)); > > template<int cmp(void *, void *)> > int cmp2to3(void *a, void *b, void *c) > { > return cmp(a, b); > } > > #define qsort(base, sz, n, cmp) \ > qsort_r(base, sz, n, cmp2to3<cmp>) > > static int my_cmp(void *a, void *b) > { > return 0; > } > > void test(void *base, size_t sz, size_t n) > { > qsort(base, sz, n, my_cmp); > }
OK, so like below. Slight complication is that C++ doesn't allow the void * to int (*)() casting which means we probably have to go with the template wrapper. I have to check if we use gcc_qsort from C code, the prototypes are currently in system.h. Of course easiest is to simply rewrite all qsort calls to use [gcc_]qsort_r and not provide qsort and poison it. I'm leaning towards this "solution". On targets with register passing conventions we can also completely elide the cmp2to3 wrapper (ok, that's a hack). Richard. Index: gcc/system.h =================================================================== --- gcc/system.h (revision 273657) +++ gcc/system.h (working copy) @@ -1200,10 +1200,15 @@ helper_const_non_const_cast (const char /* GCC qsort API-compatible functions: except in release-checking compilers, redirect 4-argument qsort calls to gcc_qsort; keep 1-argument invocations corresponding to vec::qsort (cmp): they use C qsort internally anyway. */ -void qsort_chk (void *, size_t, size_t, int (*)(const void *, const void *)); +void qsort_chk (void *, size_t, size_t, int (*)(const void *, const void *, + void *), void *); void gcc_qsort (void *, size_t, size_t, int (*)(const void *, const void *)); +void gcc_qsort_r (void *, size_t, size_t, int (*)(const void *, const void *, + void *), void *); void gcc_stablesort (void *, size_t, size_t, int (*)(const void *, const void *)); +void gcc_stablesort_r (void *, size_t, size_t, + int (*)(const void *, const void *, void *), void *); #define PP_5th(a1, a2, a3, a4, a5, ...) a5 #undef qsort #define qsort(...) PP_5th (__VA_ARGS__, gcc_qsort, 3, 2, qsort, 0) (__VA_ARGS__) Index: gcc/sort.cc =================================================================== --- gcc/sort.cc (revision 273657) +++ gcc/sort.cc (working copy) @@ -46,12 +46,13 @@ along with GCC; see the file COPYING3. #endif /* C-style qsort comparator function type. */ -typedef int cmp_fn (const void *, const void *); +typedef int cmp_fn (const void *, const void *, void *); /* Structure holding read-mostly (read-only in netsort) context. */ struct sort_ctx { cmp_fn *cmp; // pointer to comparator + void *cmp_d; // pointer to comparator data char *out; // output buffer size_t n; // number of elements size_t size; // element size @@ -128,10 +129,10 @@ do { This is noinline to avoid code growth and confine invocation to a single call site, assisting indirect branch prediction. */ noinline static intptr_t -cmp1 (char *e0, char *e1, cmp_fn *cmp) +cmp1 (char *e0, char *e1, cmp_fn *cmp, void *cmp_d) { intptr_t x = (intptr_t)e0 ^ (intptr_t)e1; - return x & (cmp (e0, e1) >> 31); + return x & (cmp (e0, e1, cmp_d) >> 31); } /* Execute network sort on 2 to 5 elements from IN, placing them into C->OUT. @@ -141,7 +142,7 @@ netsort (char *in, sort_ctx *c) { #define CMP(e0, e1) \ do { \ - intptr_t x = cmp1 (e1, e0, c->cmp); \ + intptr_t x = cmp1 (e1, e0, c->cmp, c->cmp_d); \ e0 = (char *)((intptr_t)e0 ^ x); \ e1 = (char *)((intptr_t)e1 ^ x); \ } while (0) @@ -194,7 +195,7 @@ mergesort (char *in, sort_ctx *c, size_t /* Merge sorted halves given by L, R to [OUT, END). */ #define MERGE_ELTSIZE(SIZE) \ do { \ - intptr_t mr = c->cmp (r, l) >> 31; \ + intptr_t mr = c->cmp (r, l, c->cmp_d) >> 31; \ intptr_t lr = (intptr_t)l ^ (intptr_t)r; \ lr = (intptr_t)l ^ (lr & mr); \ out = (char *)memcpy (out, (char *)lr, SIZE); \ @@ -204,7 +205,7 @@ do { l += ~mr & SIZE; \ } while (r != end) - if (likely (c->cmp(r, l + (r - out) - c->size) < 0)) + if (likely (c->cmp(r, l + (r - out) - c->size, c->cmp_d) < 0)) { char *end = out + n * c->size; if (sizeof (size_t) == 8 && likely (c->size == 8)) @@ -218,7 +219,7 @@ do { } void -gcc_qsort (void *vbase, size_t n, size_t size, cmp_fn *cmp) +gcc_qsort_r (void *vbase, size_t n, size_t size, cmp_fn *cmp, void *data) { if (n < 2) return; @@ -227,7 +228,7 @@ gcc_qsort (void *vbase, size_t n, size_t if (stable) nlim = 3, size = ~size; char *base = (char *)vbase; - sort_ctx c = {cmp, base, n, size, nlim}; + sort_ctx c = {cmp, data, base, n, size, nlim}; long long scratch[32]; size_t bufsz = (n / 2) * size; void *buf = bufsz <= sizeof scratch ? scratch : xmalloc (bufsz); @@ -235,12 +236,44 @@ gcc_qsort (void *vbase, size_t n, size_t if (buf != scratch) free (buf); #if CHECKING_P - qsort_chk (vbase, n, size, cmp); + qsort_chk (vbase, n, size, cmp, data); #endif } void -gcc_stablesort (void *vbase, size_t n, size_t size, cmp_fn *cmp) +gcc_stablesort_r (void *vbase, size_t n, size_t size, cmp_fn *cmp, void *data) +{ + gcc_qsort_r (vbase, n, ~size, cmp, data); +} + +static int +cmp2to3 (const void *a, const void *b, void *cmp) +{ + /* C++ doesn't permit conversions between pointer-to data + and pointer-to function. */ + union { + void *data; + int (*cmp)(const void *, const void *); + } u; + u.data = cmp; + return u.cmp (a, b); +} + +void +gcc_qsort (void *vbase, size_t n, size_t size, + int (*cmp)(const void *, const void *)) +{ + union { + void *data; + int (*cmp)(const void *, const void *); + } u; + u.cmp = cmp; + gcc_qsort_r (vbase, n, size, cmp2to3, u.data); +} + +void +gcc_stablesort (void *vbase, size_t n, size_t size, + int (*cmp)(const void *, const void *)) { gcc_qsort (vbase, n, ~size, cmp); } Index: gcc/vec.c =================================================================== --- gcc/vec.c (revision 273657) +++ gcc/vec.c (working copy) @@ -192,21 +192,23 @@ dump_vec_loc_statistics (void) ATTRIBUTE_NORETURN ATTRIBUTE_COLD static void qsort_chk_error (const void *p1, const void *p2, const void *p3, - int (*cmp) (const void *, const void *)) + int (*cmp) (const void *, const void *, void *), void *data) { if (!p3) { - int r1 = cmp (p1, p2), r2 = cmp (p2, p1); + int r1 = cmp (p1, p2, data), r2 = cmp (p2, p1, data); error ("qsort comparator not anti-commutative: %d, %d", r1, r2); } else if (p1 == p2) { - int r = cmp (p1, p3); + int r = cmp (p1, p3, data); error ("qsort comparator non-negative on sorted output: %d", r); } else { - int r1 = cmp (p1, p2), r2 = cmp (p2, p3), r3 = cmp (p1, p3); + int r1 = cmp (p1, p2, data); + int r2 = cmp (p2, p3, data); + int r3 = cmp (p1, p3, data); error ("qsort comparator not transitive: %d, %d, %d", r1, r2, r3); } internal_error ("qsort checking failed"); @@ -216,7 +218,7 @@ qsort_chk_error (const void *p1, const v of N SIZE-sized elements pointed to by BASE. */ void qsort_chk (void *base, size_t n, size_t size, - int (*cmp)(const void *, const void *)) + int (*cmp)(const void *, const void *, void *), void *data) { #if 0 #define LIM(n) (n) @@ -225,9 +227,9 @@ qsort_chk (void *base, size_t n, size_t #define LIM(n) ((n) <= 16 ? (n) : 12 + floor_log2 (n)) #endif #define ELT(i) ((const char *) base + (i) * size) -#define CMP(i, j) cmp (ELT (i), ELT (j)) -#define ERR2(i, j) qsort_chk_error (ELT (i), ELT (j), NULL, cmp) -#define ERR3(i, j, k) qsort_chk_error (ELT (i), ELT (j), ELT (k), cmp) +#define CMP(i, j) cmp (ELT (i), ELT (j), data) +#define ERR2(i, j) qsort_chk_error (ELT (i), ELT (j), NULL, cmp, data) +#define ERR3(i, j, k) qsort_chk_error (ELT (i), ELT (j), ELT (k), cmp, data) size_t i1, i2, i, j; /* This outer loop iterates over maximum spans [i1, i2) such that elements within each span compare equal to each other. */ Index: gcc/domwalk.c =================================================================== --- gcc/domwalk.c (revision 273657) +++ gcc/domwalk.c (working copy) @@ -128,14 +128,12 @@ along with GCC; see the file COPYING3. which is currently an abstraction over walking tree statements. Thus the dominator walker is currently only useful for trees. */ -/* Reverse postorder index of each basic block. */ -static int *bb_postorder; - static int -cmp_bb_postorder (const void *a, const void *b) +cmp_bb_postorder (const void *a, const void *b, void *data) { basic_block bb1 = *(const basic_block *)(a); basic_block bb2 = *(const basic_block *)(b); + int *bb_postorder = (int *)data; /* Place higher completion number first (pop off lower number first). */ return bb_postorder[bb2->index] - bb_postorder[bb1->index]; } @@ -144,7 +142,7 @@ cmp_bb_postorder (const void *a, const v i.e. by descending number in BB_POSTORDER array. */ static void -sort_bbs_postorder (basic_block *bbs, int n) +sort_bbs_postorder (basic_block *bbs, int n, int *bb_postorder) { if (__builtin_expect (n == 2, true)) { @@ -166,7 +164,7 @@ sort_bbs_postorder (basic_block *bbs, in bbs[0] = bb0, bbs[1] = bb1, bbs[2] = bb2; } else - qsort (bbs, n, sizeof *bbs, cmp_bb_postorder); + gcc_qsort_r (bbs, n, sizeof *bbs, cmp_bb_postorder, bb_postorder); } /* Set EDGE_EXECUTABLE on every edge within FN's CFG. */ @@ -294,7 +292,6 @@ dom_walker::walk (basic_block bb) basic_block *worklist = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) * 2); int sp = 0; - bb_postorder = m_bb_to_rpo; while (true) { @@ -339,7 +336,8 @@ dom_walker::walk (basic_block bb) if (sp - saved_sp > 1 && m_dom_direction == CDI_DOMINATORS && m_bb_to_rpo) - sort_bbs_postorder (&worklist[saved_sp], sp - saved_sp); + sort_bbs_postorder (&worklist[saved_sp], sp - saved_sp, + m_bb_to_rpo); } } /* NULL is used to mark pop operations in the recursion stack. */ @@ -360,6 +358,5 @@ dom_walker::walk (basic_block bb) else break; } - bb_postorder = NULL; free (worklist); }