Christopher Seiwald writes: > The FreeBSD qsort implementation has a rather nasty degenerate case. > If you have data that partitions exactly about the median pivot, yet > which is unsorted in either partition, you get get treated to an insertion > sort. Example: > > 1 2 3 4 5 6 7 8 9 10 19 18 17 16 14 14 13 12 11 > > qsort picks 10 as the median, does no swaps on its first pass, and so > decides to switch to an insertion sort. The idea is that if no swaps > occur, the data is likely to be nearly already sorted and a good candidate > for insertion sort. This turns out to be a (very) bad idea if you have > some unsorted data buried in the middle of a (very) large dataset. > > The implementation claims to come from Bentley and McIlroy's paper, > "Engineering a Sort Function," and this is largely true, but the insertion > sort optimization(?) isn't in that paper. The GNU qsort.c only does an > insertion sort if it is below a certain threshhold in size, and so never > attempts such a sort on the whole dataset. (The GNU qsort does a number > of other things pooh-poohed in the Bentley paper.) > > It's a pretty straightforward change to bypass the insertion sort for > large subsets of the data. If no one has a strong love for qsort, I'll > educate myself on how to make and contribute this change.
How about the code following sig. ? And the other codes and information on: http://www.mars.dti.ne.jp/~a-wada/qsortlib.html -- Akira Wada <a-w...@mars.dti.ne.jp> --------------------------------------------------------------------------------- /* a-wada/qsortlib/ccn/qsorts.c Mar. 30, 1999 */ /* Copyright (c) 1999 Akira Wada <a-w...@mars.dti.ne.jp> */ /* Quick sort compatible with "qsort (); in <stdlib.h>" */ /* efficiency by reducing the times of key comparisons and */ /* word_mode_swapping for word_aligned object */ /* prevention from the degeneration caused by peculiar input */ /* This qsort is not stable, but could avoid the bad behavior by many */ /* equal sort-keys. For stability, add a unique-key to compare-function. */ /* If the object to be sorted is an array of pointers to structs with */ /* an unsigned integer key in ascending order, set the 3'rd argument */ /* to the key position in bytes and the 4'th to NULL, then */ /* radixsort would be applied. for example : */ /* kps = 16; qsort (pv, n, kps, (ftp) 0); */ #include <time.h> #include <stdlib.h> void radixspi (void *, size_t, size_t); void srandom (unsigned long); /* if not in <stdlib.h> */ unsigned long random (); /* if not in <stdlib.h> */ #define MDT 16 /* threshold for median of the 5 or the more, MDT >= 8 */ #define MDA 2 /* adjusting threshold slightly, MDT + MDA >= 4 */ #define MDM 2 /* multiplier for threshold, MDM >= 2 */ #define DGT 256 /* threshold for checking degeneration */ #define DGV 16 /* degeneration assumed if the smaller < DGL */ #define DGM 0 /* threshold for selecting no median */ #define DGS 2 /* threshold for samples distributed uniformly */ #define DGU 4 /* threshold for sampling at random */ #define DGL ((unsigned) psz / DGV) * rl #define defdgn int dgn = 0, nsp, itv #define ckdgnr(s, e) if (psz >= DGT && s + DGL > e) dgn++ #define no_med (psz > MDT && DGM < dgn && dgn <= DGS) #define symptm (psz > MDT && dgn > DGS) #define xcsmpl() do { \ if (dgn <= DGU) { /* samples distributed uniformly */\ nsp = 3; while (psz > thr) thr *= MDM, nsp += 2; \ itv = ((unsigned) psz / (nsp - 1)) * rl; k = l, n = r; \ for (thr = MDT; psz > thr; thr *= MDM) { \ i += rl, k += itv; swap (i, k); \ j -= rl, n -= itv; swap (n, j);}} \ else { /* samples at random */\ if (dgn == DGU + 1) dgn++, srandom (time (0)); \ for (thr = (unsigned) thr / MDM; psz > thr; thr *= MDM) { \ rdmsmpl (i, i, j); i += rl; \ rdmsmpl (j, i, j); j -= rl;} \ rdmsmpl (m, i, j);} \ i = l, j = r, thr = MDT;} while (0) #define rdmsmpl(x, s, e) if (x != (n = s + rl * ( \ random () % ((unsigned) (e - (s)) / rl + 1)))) swap (x, n) #define swpwrk int t, w, z = sizeof (int), alg = ((int) av | rl) & (z - 1) #define swap(i, j) do { \ w = rl; if (alg == 0) do \ w -= z, t = *((int *) (i + w)), \ *((int *) (i + w)) = *((int *) (j + w)), \ *((int *) (j + w)) = t; while (w > 0); \ else do \ --w, t = *(i + w), *(i + w) = *(j + w), *(j + w) = t; \ while (w > 0);} while (0) #define rotate(i, j, k) do { \ w = rl; if (alg == 0) do \ w -= z, t = *((int *) (i + w)), \ *((int *) (i + w)) = *((int *) (j + w)), \ *((int *) (j + w)) = *((int *) (k + w)), \ *((int *) (k + w)) = t; while (w > 0); \ else do \ --w, t = *(i + w), *(i + w) = *(j + w), \ *(j + w) = *(k + w), *(k + w) = t; while (w > 0);} while (0) #define findxc(minmax, x, s, e, m) do { \ n = x, k = s; do \ if ((*qcmp) minmax > 0) n = k; while ((k += rl) <= e); \ if (n == x) swap (m, x); else rotate (m, n, x);} while (0) #define MIN (n, k) #define MAX (k, n) #define cksrtd(l, r) if (psz > MDT) do { \ k = l; while (k < r && (*qcmp)(k, k + rl) <= 0) k += rl; \ if (k >= r) i = j; else if (k >= m) i = m;} while (0) #define stack ptrtyp s[stksz], *p = s #define stksz sizeof (size_t) * 8 * 2 #define push(x, y) *(p++) = x, *(p++) = y #define empty (s >= p) #define pop(y, x) y = *(--p), x = *(--p) #define defwrk ptrtyp k, n; defdgn; swpwrk; stack typedef int (*ftp) (const void *, const void *); typedef char * ptrtyp; void qsort (void *av, size_t rn, size_t rl, ftp qcmp) { ptrtyp l, r, m, i, j; int psz, thr, cst, c; defwrk; if (qcmp == (ftp) 0) radixspi (av, rn, rl); else /* call RADIX SORT */ if (rn > 1 && rl > 0) { l = (char *) av, r = l + (rn - 1) * rl; for ( ; ; ) { /* initialize for current partition */ m = l + ((unsigned) (psz = (r - l) / rl) / 2) * rl, i = l, j = r, thr = MDT, psz -= (MDA), cst = 1; if (!no_med) { if (symptm) xcsmpl (); for ( ; ; ) { /* bring median of samples to middle */ if ((*qcmp)(m, j) <= 0) { if (i < m && (*qcmp)(i, m) > 0) { findxc (MIN, i, j, r, m); cst = 0;}} else { if (i >= m || (*qcmp)(i, m) > 0) swap (i, j); else findxc (MAX, j, l, i, m); cst = 0;} i += rl, j -= rl; if (psz > thr) thr *= MDM; else break;}} if (cst != 0) cksrtd (l, r); if (i < j) { /* do partitioning by middle as pivot */ for (n = m; ; ) { /* gathering equal keys close to pivot */ while (i < m) { if ((c = (*qcmp)(i, m)) < 0) i += rl; else if (c > 0) break; else { while (i < (m -= rl) && (c = (*qcmp)(m, i)) == 0); if (i >= m) break; swap (i, m); if (c > 0) break; else i += rl;}} while (n < j) { if ((c = (*qcmp)(n, j)) < 0) j -= rl; else if (c > 0) break; else { while ((n += rl) < j && (c = (*qcmp)(j, n)) == 0); if (n >= j) break; swap (n, j); if (c > 0) break; else j -= rl;}} if (i >= m && n >= j) break; if (n == j) { if (m == n) { swap (i, j); j -= rl, n = m = i;} else if (i < (m -= rl)) { rotate (i, m, j); n = j -= rl;} else { swap (i, j); j -= rl; break;}} else if (i == m) { if (n == m) { swap (i, j); i += rl; m = n = j;} else if ((n += rl) < j) { rotate (j, n, i); m = i += rl;} else { swap (i, j); i += rl; break;}} else { swap (i, j); j -= rl, i += rl;}} /* select subpartition for next iteration */ if ((i -= rl) - l < r - (j += rl)) { ckdgnr (l, j); if (l >= i) l = j; else { push (j, r), r = i; continue;}} else { ckdgnr (i, r); if (j >= r) r = i; else { push (l, i), l = j; continue;}} if (l < r) continue;} /* pop up next partition from stack */ if (!empty) pop (r, l); else break;}}} #include "radixspi-ar.c" /* a-wada/qsortlib/ccn/qsorts.c end */ /*----------------------------------------------------------------------------** *source codes on : radix_ar.c (original for array of unsigned longs by Andre Reinald) http://www.cubic.org/~submissive/sourcerer/download/radix_ar.c 25-May-97 radixspi-ar.c (modified for array of pointers to structs with u_int key) http://www.mars.dti.ne.jp/~a-wada/qsortlib/ccn/radixspi-ar.c **----------------------------------------------------------------------------*/ To Unsubscribe: send mail to majord...@freebsd.org with "unsubscribe freebsd-hackers" in the body of the message