On Sunday 07 September 2008 08:28:11 Jim Meyering wrote: > IMHO, changing sort -V to produce more intuitively-correct results > is the way to go. > > With ls -v, I'm willing to make the change as well, *assuming* > no one produces a counterargument. > > The question is what ordering function to use. > The more generally-useful the better. i.e., I hope > it can be compatible with both the rpm and deb version sorters. I made an investigation of existing functions which sort version strings: strverscmp - from glibc (now with its equivalent in gnulib) rpmvercmp - from rpm ververcmp - from dpkg
The main problem of all these functions is dealing with suffixes. So I modified them to drop the suffixes. So it is 6 implementations together (3 original + 3 modified). Then I ran a few series of tests with all implementations. I think the modified version of ververcmp (from dpkg) gives the best results - consider attached results of an example - results.tar.bz2 So I propose a new function filevercmp (attachment filevercmp.c) as the predicate function for new sort --version-sort. In the attachment is also the whole "testing framework" which I used to compare these functions - strverscmp.tar.bz2 - follow the steps in README inside archive. It is developed a bit quickly but I hope it could be useful :-) Kamil
/* Copyright (C) 1988, 1991-2008 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <config.h> #include "system.h" #include "vercmp.h" #define xisalnum isalnum #define xisdigit isdigit #define xisalpha isalpha /* * match file suffix defined as RE (\.[A-Za-z][A-Za-z0-9]*)*$ * * Return pointer to suffix begin or NULL if not found. */ const char* match_suffix(const char *str) { const char *match = NULL; bool read_alpha = false; while (*str) { if (read_alpha) { read_alpha = false; if (!xisalpha(*str)) match = NULL; } else if ('.'==*str) { read_alpha = true; if (!match) match = str; } else if (!xisalnum(*str)) match = NULL; str++; } return match; } int filevercmp (const char *a, const char *b) { char oldch1, oldch2; char * str1, * str2; char * one, * two; /* easy comparison to see if versions are identical */ if (!strcmp(a, b)) return 0; one = alloca(strlen(a) + 1); two = alloca(strlen(b) + 1); strcpy(one, a); strcpy(two, b); /* cut file suffixes */ oldch1 = oldch2 = '\0'; if ((str1 = (char*)match_suffix(one))) { oldch1 = *str1; *str1 = '\0'; } if ((str2 = (char*)match_suffix(two))) { oldch2 = *str2; *str2 = '\0'; } if ((str1 || str2) && 0 == strcmp(one, two)) { /* restore file suffixes */ if (str1) *str1 = oldch1; if (str2) *str2 = oldch2; } return verrevcmp(one, two); } #define cisdigit isdigit #define cisalpha isalpha /* following code was taken from dpkg (vercmp.c) */ /* assume ascii; warning: evaluates x multiple times! */ #define order(x) ((x) == '~' ? -1 \ : cisdigit((x)) ? 0 \ : !(x) ? 0 \ : cisalpha((x)) ? (x) \ : (x) + 256) int verrevcmp(const char *val, const char *ref) { /*if (!val) val= ""; if (!ref) ref= "";*/ while (*val || *ref) { int first_diff= 0; while ( (*val && !cisdigit(*val)) || (*ref && !cisdigit(*ref)) ) { int vc= order(*val), rc= order(*ref); if (vc != rc) return vc - rc; val++; ref++; } while ( *val == '0' ) val++; while ( *ref == '0' ) ref++; while (cisdigit(*val) && cisdigit(*ref)) { if (!first_diff) first_diff= *val - *ref; val++; ref++; } if (cisdigit(*val)) return 1; if (cisdigit(*ref)) return -1; if (first_diff) return first_diff; } return 0; }
results.tar.bz2
Description: application/tbz
strverscmp.tar.bz2
Description: application/tbz