On Wed, Nov 20, 2002 at 02:27:53PM +1100, Tim Robbins wrote:
> On Wed, Nov 20, 2002 at 04:38:38AM +0300, Andrey A. Chernov wrote:
> 
> > On Tue, Nov 19, 2002 at 14:52:02 +0200, Ruslan Ermilov wrote:
> > > It seems that this patch has never been committed.  This is a critical
> > > bug that should be fixed before 5.0-RELEASE is out.
> > 
> > I agree. There is no locale yet and I never see that patch.
> 
> This patch seems to work, I used the logic from regcomp.c in libc.
> Long lines make it ugly, but it was like that when I got here ;)

> Index: src/usr.bin/awk/Makefile
> ===================================================================
> RCS file: /x/freebsd/src/usr.bin/awk/Makefile,v
> retrieving revision 1.9
> diff -u -r1.9 Makefile
> --- src/usr.bin/awk/Makefile  10 May 2002 20:36:21 -0000      1.9
> +++ src/usr.bin/awk/Makefile  20 Nov 2002 03:13:50 -0000
> @@ -6,7 +6,7 @@
>  PROG=        nawk
>  SRCS=        awkgram.y b.c lex.c lib.c main.c parse.c proctab.c run.c tran.c ytab.h
>  
> -CFLAGS+= -I. -I${AWKSRC}
> +CFLAGS+= -I. -I${AWKSRC} -I${.CURDIR}/../../lib/libc/locale
>  
Ouch.

>  DPADD=       ${LIBM}
>  LDADD=       -lm
> Index: src/contrib/one-true-awk/b.c
> ===================================================================
> RCS file: /x/freebsd/src/contrib/one-true-awk/b.c,v
> retrieving revision 1.1.1.2
> diff -u -r1.1.1.2 b.c
> --- src/contrib/one-true-awk/b.c      19 Feb 2002 09:35:24 -0000      1.1.1.2
> +++ src/contrib/one-true-awk/b.c      20 Nov 2002 03:16:10 -0000
> @@ -32,6 +32,7 @@
>  #include <stdlib.h>
>  #include "awk.h"
>  #include "ytab.h"
> +#include "collate.h"
>  
>  #define      HAT     (NCHARS-2)      /* matches ^ in regular expr */
>                               /* NCHARS is 2**n */
> @@ -284,7 +285,7 @@
>  
>  char *cclenter(char *argp)   /* add a character class */
>  {
> -     int i, c, c2;
> +     int i, j, c, c2;
>       uschar *p = (uschar *) argp;
>       uschar *op, *bp;
>       static uschar *buf = 0;
> @@ -308,12 +309,24 @@
>                                       i--;
>                                       continue;
>                               }
> -                             while (c < c2) {
> -                                     if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 
>100, (char **) &bp, 0))
> -                                             FATAL("out of space for character 
>class [%.10s...] 2", p);
> -                                     *bp++ = ++c;
> -                                     i++;
> -                             }
> +                             if (__collate_load_error) {
> +                                     while (c < c2) {
> +                                             if (!adjbuf((char **) &buf, &bufsz, 
>bp-buf+2, 100, (char **) &bp, 0))
> +                                                     FATAL("out of space for 
>character class [%.10s...] 2", p);
> +                                             *bp++ = ++c;
> +                                             i++;
> +                                     }
> +                             } else {
> +                                     for (j = CHAR_MIN; j <= CHAR_MAX; j++) {
> +                                             if (!adjbuf((char **) &buf, &bufsz, 
>bp-buf+2, 100, (char **) &bp, 0))
> +                                                     FATAL("out of space for 
>character class [%.10s...] 2", p);
> +                                             if (__collate_range_cmp(c, j) <= 0
> +                                                 && __collate_range_cmp(j, c2) <= 
>0) {
> +                                                     *bp++ = j;
> +                                                     i++;
> +                                             }
> +                                     }
> +                                }
>                               continue;
>                       }
>               }

There are a number of problems here:

1.  The "empty range" check preceding this block should be made
    locale-aware too.

2.  CHAR_MAX evaluates to 127 here.

Here's my version of the above fix plus [[:class:]] fixes Andrey mentioned.
I gave it only light testing.

The collate_range_cmp() was stolen from the old awk(1).


Cheers,
-- 
Ruslan Ermilov          Sysadmin and DBA,
[EMAIL PROTECTED]           Sunbay Software AG,
[EMAIL PROTECTED]          FreeBSD committer,
+380.652.512.251        Simferopol, Ukraine

http://www.FreeBSD.org  The Power To Serve
http://www.oracle.com   Enabling The Information Age
Index: b.c
===================================================================
RCS file: /home/ncvs/src/contrib/one-true-awk/b.c,v
retrieving revision 1.1.1.2
diff -u -p -r1.1.1.2 b.c
--- b.c 19 Feb 2002 09:35:24 -0000      1.1.1.2
+++ b.c 20 Nov 2002 12:51:10 -0000
@@ -282,9 +282,25 @@ int quoted(char **pp)      /* pick up next th
        return c;
 }
 
+static int collate_range_cmp (a, b)
+       int a, b;
+{
+       int r;
+       static char s[2][2];
+
+       if ((unsigned char)a == (unsigned char)b)
+               return 0;
+       s[0][0] = a;
+       s[1][0] = b;
+       if ((r = strcoll(s[0], s[1])) == 0)
+               r = (unsigned char)a - (unsigned char)b;
+       return r;
+}
+
 char *cclenter(char *argp)     /* add a character class */
 {
        int i, c, c2;
+       int j;
        uschar *p = (uschar *) argp;
        uschar *op, *bp;
        static uschar *buf = 0;
@@ -303,15 +319,18 @@ char *cclenter(char *argp)        /* add a char
                                c2 = *p++;
                                if (c2 == '\\')
                                        c2 = quoted((char **) &p);
-                               if (c > c2) {   /* empty; ignore */
+                               if (collate_range_cmp(c, c2) > 0) {     /* empty; 
+ignore */
                                        bp--;
                                        i--;
                                        continue;
                                }
-                               while (c < c2) {
+                               for (j = 0; j < NCHARS; j++) {
+                                       if ((collate_range_cmp(c, j) > 0) ||
+                                           collate_range_cmp(j, c2) > 0)
+                                               continue;
                                        if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 
100, (char **) &bp, 0))
                                                FATAL("out of space for character 
class [%.10s...] 2", p);
-                                       *bp++ = ++c;
+                                       *bp++ = j;
                                        i++;
                                }
                                continue;
@@ -696,20 +715,20 @@ Node *unary(Node *np)
 struct charclass {
        const char *cc_name;
        int cc_namelen;
-       const char *cc_expand;
+       int (*cc_func)(int);
 } charclasses[] = {
-       { "alnum",      5,      "0-9A-Za-z" },
-       { "alpha",      5,      "A-Za-z" },
-       { "blank",      5,      " \t" },
-       { "cntrl",      5,      "\000-\037\177" },
-       { "digit",      5,      "0-9" },
-       { "graph",      5,      "\041-\176" },
-       { "lower",      5,      "a-z" },
-       { "print",      5,      " \041-\176" },
-       { "punct",      5,      "\041-\057\072-\100\133-\140\173-\176" },
-       { "space",      5,      " \f\n\r\t\v" },
-       { "upper",      5,      "A-Z" },
-       { "xdigit",     6,      "0-9A-Fa-f" },
+       { "alnum",      5,      isalnum },
+       { "alpha",      5,      isalpha },
+       { "blank",      5,      isblank },
+       { "cntrl",      5,      iscntrl },
+       { "digit",      5,      isdigit },
+       { "graph",      5,      isgraph },
+       { "lower",      5,      islower },
+       { "print",      5,      isprint },
+       { "punct",      5,      ispunct },
+       { "space",      5,      isspace },
+       { "upper",      5,      isupper },
+       { "xdigit",     6,      isxdigit },
        { NULL,         0,      NULL },
 };
 
@@ -722,7 +741,7 @@ int relex(void)             /* lexical analyzer for
        static int bufsz = 100;
        uschar *bp;
        struct charclass *cc;
-       const uschar *p;
+       int i;
 
        switch (c = *prestr++) {
        case '|': return OR;
@@ -771,8 +790,14 @@ int relex(void)            /* lexical analyzer for
                                if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] 
== ':' &&
                                    prestr[2 + cc->cc_namelen] == ']') {
                                        prestr += cc->cc_namelen + 3;
-                                       for (p = (const uschar *) cc->cc_expand; *p; 
p++)
-                                               *bp++ = *p;
+                                       for (i = 0; i < NCHARS; i++) {
+                                               if (!adjbuf((char **) &buf, &bufsz, 
+bp-buf+1, 100, (char **) &bp, 0))
+                                                   FATAL("out of space for reg expr 
+%.10s...", lastre);
+                                               if (cc->cc_func(i)) {
+                                                       *bp++ = i;
+                                                       n++;
+                                               }
+                                       }
                                } else
                                        *bp++ = c;
                        } else if (c == '\0') {
Index: main.c
===================================================================
RCS file: /home/ncvs/src/contrib/one-true-awk/main.c,v
retrieving revision 1.1.1.3
diff -u -p -r1.1.1.3 main.c
--- main.c      16 Mar 2002 16:50:56 -0000      1.1.1.3
+++ main.c      20 Nov 2002 12:51:10 -0000
@@ -27,6 +27,7 @@ char  *version = "version 20020210";
 #define DEBUG
 #include <stdio.h>
 #include <ctype.h>
+#include <locale.h>
 #include <stdlib.h>
 #include <string.h>
 #include <signal.h>
@@ -54,6 +55,7 @@ int main(int argc, char *argv[])
 {
        char *fs = NULL;
 
+       setlocale(LC_ALL, "");
        cmdname = argv[0];
        if (argc == 1) {
                fprintf(stderr, "Usage: %s [-f programfile | 'program'] [-Ffieldsep] 
[-v var=value] [files]\n", cmdname);
Index: run.c
===================================================================
RCS file: /home/ncvs/src/contrib/one-true-awk/run.c,v
retrieving revision 1.1.1.2
diff -u -p -r1.1.1.2 run.c
--- run.c       19 Feb 2002 09:35:25 -0000      1.1.1.2
+++ run.c       20 Nov 2002 12:51:10 -0000
@@ -1504,11 +1504,11 @@ Cell *bltin(Node **a, int n)    /* builtin 
                if (t == FTOUPPER) {
                        for (p = buf; *p; p++)
                                if (islower((uschar) *p))
-                                       *p = toupper(*p);
+                                       *p = toupper((uschar)*p);
                } else {
                        for (p = buf; *p; p++)
                                if (isupper((uschar) *p))
-                                       *p = tolower(*p);
+                                       *p = tolower((uschar)*p);
                }
                tempfree(x);
                x = gettemp();

Attachment: msg46995/pgp00000.pgp
Description: PGP signature

Reply via email to