Author: ache
Date: Sat Mar  6 19:21:57 2010
New Revision: 204803
URL: http://svn.freebsd.org/changeset/base/204803

Log:
  1) Rewrite input processing to not exit with error on the first EILSEQ found
  in the input data but fallback to "binary equal" check instead.
  
  POSIX says: "The input file shall be a text file", nothing more,
  so the text file with illegal sequence is valid input.
  BTW, GNU sort does not fails on EILSEQ too.
  
  2) Speedup input processing a bit in complex cases like skipping fields,
  chars or ignore case.
  
  3) Enforce the implied LINE_MAX limit (from POSIX definition of "text file"
  and POSIX uniq(1) description).

Modified:
  head/usr.bin/uniq/uniq.c

Modified: head/usr.bin/uniq/uniq.c
==============================================================================
--- head/usr.bin/uniq/uniq.c    Sat Mar  6 17:31:09 2010        (r204802)
+++ head/usr.bin/uniq/uniq.c    Sat Mar  6 19:21:57 2010        (r204803)
@@ -60,31 +60,25 @@ static const char rcsid[] =
 #include <wchar.h>
 #include <wctype.h>
 
-#define        INITLINELEN     (LINE_MAX + 1)
-#define        MAXLINELEN      ((SIZE_MAX / sizeof(wchar_t)) / 2)
-
-int cflag, dflag, uflag;
+int cflag, dflag, uflag, iflag;
 int numchars, numfields, repeats;
 
 FILE   *file(const char *, const char *);
-wchar_t        *getline(wchar_t *, size_t *, FILE *);
-void    show(FILE *, wchar_t *);
+wchar_t        *convert(wchar_t *, const char *);
+char   *getlinemax(char *, FILE *);
+void    show(FILE *, const char *);
 wchar_t        *skip(wchar_t *);
 void    obsolete(char *[]);
 static void     usage(void);
-int      wcsicoll(wchar_t *, wchar_t *);
 
 int
 main (int argc, char *argv[])
 {
-       wchar_t *t1, *t2;
+       wchar_t *tprev, *tthis, *wprev, *wthis, *wp;
        FILE *ifp, *ofp;
-       int ch, b1;
-       size_t prevbuflen, thisbuflen;
-       wchar_t *prevline, *thisline;
-       char *p;
+       int ch, comp;
+       char *prevline, *thisline, *p;
        const char *ifn;
-       int iflag = 0, comp;
 
        (void) setlocale(LC_ALL, "");
 
@@ -139,48 +133,47 @@ main (int argc, char *argv[])
        if (argc > 1)
                ofp = file(argv[1], "w");
 
-       prevbuflen = INITLINELEN;
-       thisbuflen = INITLINELEN;
-       prevline = malloc(prevbuflen * sizeof(*prevline));
-       thisline = malloc(thisbuflen * sizeof(*thisline));
-       if (prevline == NULL || thisline == NULL)
+       prevline = malloc(LINE_MAX);
+       thisline = malloc(LINE_MAX);
+       wprev = malloc(LINE_MAX * sizeof(*wprev));
+       wthis = malloc(LINE_MAX * sizeof(*wthis));
+       if (prevline == NULL || thisline == NULL ||
+           wprev == NULL || wthis == NULL)
                err(1, "malloc");
 
-       if ((prevline = getline(prevline, &prevbuflen, ifp)) == NULL) {
+       if ((prevline = getlinemax(prevline, ifp)) == NULL) {
                if (ferror(ifp))
                        err(1, "%s", ifn);
                exit(0);
        }
+       tprev = convert(wprev, prevline);
+
        if (!cflag && uflag && dflag)
                show(ofp, prevline);
 
-       while ((thisline = getline(thisline, &thisbuflen, ifp)) != NULL) {
-               /* If requested get the chosen fields + character offsets. */
-               if (numfields || numchars) {
-                       t1 = skip(thisline);
-                       t2 = skip(prevline);
-               } else {
-                       t1 = thisline;
-                       t2 = prevline;
-               }
+       while ((thisline = getlinemax(thisline, ifp)) != NULL) {
+               tthis = convert(wthis, thisline);
 
-               /* If different, print; set previous to new value. */
-               if (iflag)
-                       comp = wcsicoll(t1, t2);
+               if (tthis == NULL && tprev == NULL)
+                       comp = strcmp(thisline, prevline);
+               else if (tthis == NULL || tprev == NULL)
+                       comp = 1;
                else
-                       comp = wcscoll(t1, t2);
+                       comp = wcscoll(tthis, tprev);
 
                if (comp) {
+                       /* If different, print; set previous to new value. */
                        if (cflag || !dflag || !uflag)
                                show(ofp, prevline);
-                       t1 = prevline;
-                       b1 = prevbuflen;
+                       p = prevline;
+                       wp = wprev;
                        prevline = thisline;
-                       prevbuflen = thisbuflen;
+                       wprev = wthis;
+                       tprev = tthis;
                        if (!cflag && uflag && dflag)
                                show(ofp, prevline);
-                       thisline = t1;
-                       thisbuflen = b1;
+                       thisline = p;
+                       wthis = wp;
                        repeats = 0;
                } else
                        ++repeats;
@@ -192,44 +185,61 @@ main (int argc, char *argv[])
        exit(0);
 }
 
-wchar_t *
-getline(wchar_t *buf, size_t *buflen, FILE *fp)
+char *
+getlinemax(char *buf, FILE *fp)
 {
        size_t bufpos;
-       wint_t ch;
+       int ch;
 
        bufpos = 0;
-       while ((ch = getwc(fp)) != WEOF && ch != '\n') {
-               if (bufpos + 1 >= *buflen) {
-                       *buflen = *buflen * 2;
-                       if (*buflen > MAXLINELEN)
-                               errx(1,
-                                   "Maximum line buffer length (%zu) exceeded",
-                                   MAXLINELEN);
-                       buf = reallocf(buf, *buflen * sizeof(*buf));
-                       if (buf == NULL)
-                               err(1, "reallocf");
-               }
+       while ((ch = getc(fp)) != EOF && ch != '\n') {
                buf[bufpos++] = ch;
+               if (bufpos >= LINE_MAX)
+                       errx(1, "Maximum line length (%zu) exceeded",
+                            LINE_MAX);
        }
        buf[bufpos] = '\0';
 
        return (bufpos != 0 || ch == '\n' ? buf : NULL);
 }
 
+wchar_t *
+convert(wchar_t *buf, const char *str)
+{
+       size_t n;
+       wchar_t *p, *ret;
+
+       if ((n = mbstowcs(buf, str, LINE_MAX)) == LINE_MAX)
+               errx(1, "Maximum line length (%zu) exceeded", LINE_MAX);
+       else if (n != (size_t)-1) {
+               /* If requested get the chosen fields + character offsets. */
+               if (numfields || numchars)
+                       ret = skip(buf);
+               else
+                       ret = buf;
+               if (iflag) {
+                       for (p = ret; *p != L'\0'; p++)
+                               *p = towlower(*p);
+               }
+       } else
+               ret = NULL;
+
+       return (ret);
+}
+
 /*
  * show --
  *     Output a line depending on the flags and number of repetitions
  *     of the line.
  */
 void
-show(FILE *ofp, wchar_t *str)
+show(FILE *ofp, const char *str)
 {
 
        if (cflag)
-               (void)fprintf(ofp, "%4d %ls\n", repeats + 1, str);
+               (void)fprintf(ofp, "%4d %s\n", repeats + 1, str);
        if ((dflag && repeats) || (uflag && !repeats))
-               (void)fprintf(ofp, "%ls\n", str);
+               (void)fprintf(ofp, "%s\n", str);
 }
 
 wchar_t *
@@ -237,13 +247,14 @@ skip(wchar_t *str)
 {
        int nchars, nfields;
 
-       for (nfields = 0; *str != '\0' && nfields++ != numfields; ) {
+       for (nfields = 0; *str != L'\0' && nfields++ != numfields; ) {
                while (iswblank(*str))
                        str++;
-               while (*str != '\0' && !iswblank(*str))
+               while (*str != L'\0' && !iswblank(*str))
                        str++;
        }
-       for (nchars = numchars; nchars-- && *str; ++str);
+       for (nchars = numchars; nchars-- && *str != L'\0'; ++str)
+               ;
        return(str);
 }
 
@@ -293,52 +304,3 @@ usage(void)
 "usage: uniq [-c | -d | -u] [-i] [-f fields] [-s chars] [input [output]]\n");
        exit(1);
 }
-
-static size_t wcsicoll_l1_buflen = 0, wcsicoll_l2_buflen = 0;
-static wchar_t *wcsicoll_l1_buf = NULL, *wcsicoll_l2_buf = NULL;
-
-int
-wcsicoll(wchar_t *s1, wchar_t *s2)
-{
-       wchar_t *p;
-       size_t l1, l2;
-       size_t new_l1_buflen, new_l2_buflen;
-
-       l1 = wcslen(s1) + 1;
-       l2 = wcslen(s2) + 1;
-       new_l1_buflen = wcsicoll_l1_buflen;
-       new_l2_buflen = wcsicoll_l2_buflen;
-       while (new_l1_buflen < l1) {
-               if (new_l1_buflen == 0)
-                       new_l1_buflen = INITLINELEN;
-               else
-                       new_l1_buflen *= 2;
-       }
-       while (new_l2_buflen < l2) {
-               if (new_l2_buflen == 0)
-                       new_l2_buflen = INITLINELEN;
-               else
-                       new_l2_buflen *= 2;
-       }
-       if (new_l1_buflen > wcsicoll_l1_buflen) {
-               wcsicoll_l1_buf = reallocf(wcsicoll_l1_buf, new_l1_buflen * 
sizeof(*wcsicoll_l1_buf));
-               if (wcsicoll_l1_buf == NULL)
-                       err(1, "reallocf");
-               wcsicoll_l1_buflen = new_l1_buflen;
-       }
-       if (new_l2_buflen > wcsicoll_l2_buflen) {
-               wcsicoll_l2_buf = reallocf(wcsicoll_l2_buf, new_l2_buflen * 
sizeof(*wcsicoll_l2_buf));
-               if (wcsicoll_l2_buf == NULL)
-                       err(1, "reallocf");
-               wcsicoll_l2_buflen = new_l2_buflen;
-       }
-
-       for (p = wcsicoll_l1_buf; *s1; s1++)
-               *p++ = towlower(*s1);
-       *p = '\0';
-       for (p = wcsicoll_l2_buf; *s2; s2++)
-               *p++ = towlower(*s2);
-       *p = '\0';
-
-       return (wcscoll(wcsicoll_l1_buf, wcsicoll_l2_buf));
-}
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to