The 4.4BSD qsort has a heuristic for detecting whether the input
is partially sorted, in which case it switches to insertion sort.
This is provides a large speed up for partially sorted workloads,
which are common.

However, this resulted in quadratic behavior for certain inputs.
For more details on this, see:
http://calmerthanyouare.org/2014/06/11/algorithmic-complexity-attacks-and-libc-qsort.html

Removing this optimization in turn resulted in quadratic behavior
for other workloads, as seen by pjanzen@.  We can safely restore
the switch to insertion sort if we bail out at certain point.  In
this diff, if we perform nmemb swaps we stop the insertion sort and
restart using quicksort.  Since swap_cnt is not reset we won't
switch to insertion sort again.

This results in a 2x improvement in the BSD "killer input" compared
to the version without the heuristic.  The entire qsort regress run
time is reduced slightly.

It also restores the performance lost for the particular workload
that went quadratic when we removed the optimization.

 - todd

Index: lib/libc/stdlib/qsort.c
===================================================================
RCS file: /cvs/src/lib/libc/stdlib/qsort.c,v
retrieving revision 1.18
diff -u -p -u -r1.18 qsort.c
--- lib/libc/stdlib/qsort.c     30 May 2017 14:54:09 -0000      1.18
+++ lib/libc/stdlib/qsort.c     6 Jun 2017 15:38:41 -0000
@@ -126,7 +126,7 @@ introsort(char *a, size_t n, size_t es, 
     int (*cmp)(const void *, const void *))
 {
        char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
-       int cmp_result;
+       int cmp_result, swap_cnt;
        size_t r, s;
 
 loop:  if (n < 7) {
@@ -141,6 +141,8 @@ loop:       if (n < 7) {
                        return;
        }
        maxdepth--;
+       swap_cnt = 0;
+restart:
        pm = a + (n / 2) * es;
        if (n > 7) {
                pl = a;
@@ -159,6 +161,7 @@ loop:       if (n < 7) {
        for (;;) {
                while (pb <= pc && (cmp_result = cmp(pb, a)) <= 0) {
                        if (cmp_result == 0) {
+                               swap_cnt = 1;
                                swap(pa, pb);
                                pa += es;
                        }
@@ -166,6 +169,7 @@ loop:       if (n < 7) {
                }
                while (pb <= pc && (cmp_result = cmp(pc, a)) >= 0) {
                        if (cmp_result == 0) {
+                               swap_cnt = 1;
                                swap(pc, pd);
                                pd -= es;
                        }
@@ -173,11 +177,24 @@ loop:     if (n < 7) {
                }
                if (pb > pc)
                        break;
+               swap_cnt = 1;
                swap(pb, pc);
                pb += es;
                pc -= es;
        }
 
+       if (swap_cnt == 0) {    /* Switch to insertion sort */
+               for (pm = a + es; pm < a + n * es; pm += es) {
+                       for (pl = pm; pl > a && cmp(pl - es, pl) > 0;
+                            pl -= es) {
+                               /* Avoid quadratic behavior */
+                               if (++swap_cnt > n)
+                                       goto restart;
+                               swap(pl, pl - es);
+                       }
+               }
+               return;
+       }
        pn = a + n * es;
        r = min(pa - a, pb - pa);
        vecswap(a, pb - r, r);

Reply via email to