On Sunday 07 September 2008 08:28:11 Jim Meyering wrote:
> IMHO, changing sort -V to produce more intuitively-correct results
> is the way to go.
>
> With ls -v, I'm willing to make the change as well, *assuming*
> no one produces a counterargument.
>
> The question is what ordering function to use.
> The more generally-useful the better.  i.e., I hope
> it can be compatible with both the rpm and deb version sorters.
I made an investigation of existing functions which sort version strings:
strverscmp - from glibc (now with its equivalent in gnulib)
rpmvercmp - from rpm
ververcmp - from dpkg

The main problem of all these functions is dealing with suffixes. So I 
modified them to drop the suffixes. So it is 6 implementations together (3 
original + 3 modified).

Then I ran a few series of tests with all implementations. I think the 
modified version of ververcmp (from dpkg) gives the best results - consider 
attached results of an example - results.tar.bz2

So I propose a new function filevercmp (attachment filevercmp.c) as the 
predicate function for new sort --version-sort.

In the attachment is also the whole "testing framework" which I used to 
compare these functions - strverscmp.tar.bz2 - follow the steps in README 
inside archive. It is developed a bit quickly but I hope it could be 
useful :-)


Kamil
/* 
   Copyright (C) 1988, 1991-2008 Free Software Foundation, Inc.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>. */

#include <config.h>
#include "system.h"
#include "vercmp.h"

#define xisalnum isalnum
#define xisdigit isdigit
#define xisalpha isalpha

/*
 * match file suffix defined as RE (\.[A-Za-z][A-Za-z0-9]*)*$
 *
 * Return pointer to suffix begin or NULL if not found.
 */
const char* match_suffix(const char *str)
{
  const char *match = NULL;
  bool read_alpha = false;
  while (*str)
    {
      if (read_alpha)
        {
          read_alpha = false;
          if (!xisalpha(*str))
            match = NULL;
        }
      else if ('.'==*str)
        {
          read_alpha = true;
          if (!match)
            match = str;
        }
      else if (!xisalnum(*str))
        match = NULL;
      str++;
    }
  return match;
}

int
filevercmp (const char *a, const char *b)
{
  char oldch1, oldch2;
  char * str1, * str2;
  char * one, * two;

  /* easy comparison to see if versions are identical */
  if (!strcmp(a, b)) return 0;

  one = alloca(strlen(a) + 1);
  two = alloca(strlen(b) + 1);

  strcpy(one, a);
  strcpy(two, b);

  /* cut file suffixes */
  oldch1 = oldch2 = '\0';
  if ((str1 = (char*)match_suffix(one))) {
    oldch1 = *str1;
    *str1 = '\0';
  }
  if ((str2 = (char*)match_suffix(two))) {
    oldch2 = *str2;
    *str2 = '\0';
  }

  if ((str1 || str2) && 0 == strcmp(one, two)) {
    /* restore file suffixes */

    if (str1)
      *str1 = oldch1;
    if (str2)
      *str2 = oldch2;
  }

  return verrevcmp(one, two);
}

#define cisdigit isdigit
#define cisalpha isalpha

/* following code was taken from dpkg (vercmp.c) */

/* assume ascii; warning: evaluates x multiple times! */
#define order(x) ((x) == '~' ? -1 \
    : cisdigit((x)) ? 0 \
    : !(x) ? 0 \
    : cisalpha((x)) ? (x) \
    : (x) + 256)

int verrevcmp(const char *val, const char *ref) {
  /*if (!val) val= "";
  if (!ref) ref= "";*/

  while (*val || *ref) {
    int first_diff= 0;

    while ( (*val && !cisdigit(*val)) || (*ref && !cisdigit(*ref)) ) {
      int vc= order(*val), rc= order(*ref);
      if (vc != rc) return vc - rc;
      val++; ref++;
    }

    while ( *val == '0' ) val++;
    while ( *ref == '0' ) ref++;
    while (cisdigit(*val) && cisdigit(*ref)) {
      if (!first_diff) first_diff= *val - *ref;
      val++; ref++;
    }
    if (cisdigit(*val)) return 1;
    if (cisdigit(*ref)) return -1;
    if (first_diff) return first_diff;
  }
  return 0;
}

Attachment: results.tar.bz2
Description: application/tbz

Attachment: strverscmp.tar.bz2
Description: application/tbz

Reply via email to