commit e8e3689fb359ba1c2d8b137abcf239782a6cd8d0
Author: FRIGN <[email protected]>
Date:   Sun Mar 22 21:43:59 2015 +0100

    Audit col(1), add UTF-8-support
    
    Nothing special here, only renaming of variables and adding the
    Rune-utility-functions.
    Also, I refactored the manpage.

diff --git a/README b/README
index 090c841..e5d9b5e 100644
--- a/README
+++ b/README
@@ -19,7 +19,7 @@ The following tools are implemented ('*' == finished, '#' == 
UTF-8 support,
 =*| cksum           yes                          none
 =*| cmp             yes                          none
 #*| cols            non-posix                    none
-    col             yes                          none
+#*| col             non-posix                    none
 =*| comm            yes                          none
 =*| cp              yes                          none (-i)
 =*| cron            non-posix                    none
diff --git a/col.1 b/col.1
index b9d1c4d..939d354 100644
--- a/col.1
+++ b/col.1
@@ -1,4 +1,4 @@
-.Dd March 6, 2014
+.Dd March 22, 2014
 .Dt COL 1
 .Os sbase
 .Sh NAME
@@ -9,20 +9,15 @@
 .Op Fl bfpx
 .Op Fl l Ar num
 .Sh DESCRIPTION
-The
 .Nm
-utility filters all the reverse (and half reverse) line feeds,
-as they are produced by
+filters all reverse (and half reverse) line feeds,
+as produced by
 .Xr nroff 1
-with .2C of
+with .2C,
 .Xr ms 6
-or by
+or
 .Xr tbl 1 .
-.Nm
-also replaces spaces by tabs when it is possible.
-The control sequences managed by
-.Nm
-are:
+The recognized control sequences are:
 .Bl -tag -width Ds
 .It ESC-7
 Reverse line-feed
@@ -42,25 +37,24 @@ Carriage return
 New line
 .El
 .Pp
-All the other control codes and escape sequences are removed.
+All other control codes and escape sequences are removed.
 .Nm
-transforms all the spaces into tabulators.
+converts all spaces to tabs.
 .Sh OPTIONS
 .Bl -tag -width Ds
 .It Fl p
-Print unknown escape sequences to the output.
+Print unknown escape sequences.
 .It Fl b
-Do not print backspaces in output,
-and print only the last overstriked character in the output.
+Do not print backspaces and instead only print the last
+character written to each column position.
 .It Fl f
 Allow forward half line feeds in the output.
 .It Fl x
-Do not convert spaces in tabulators.
+Do not convert spaces to tabs.
 .It Fl l Ar num
-Increment to
+Buffer
 .Ar num
-the number of lines buffered for
-.Nm
+lines in memory.
 .El
 .Sh SEE ALSO
 .Xr nroff 1 ,
@@ -68,13 +62,9 @@ the number of lines buffered for
 .Xr ms 6
 .Sh BUGS
 .Nm
-only process text with a maximum of 256 lines with 800 bytes per line,
-although the number of lines can be modified with the
-.Fl l
-option.
-When the number of lines is bigger,
-the buffer is flushed to the output,
-so new reverse line feeds can not operate in the flushed  lines.
-This implementation ignores SI and SO selection character sets,
-because it is supposed to work only with UTF-8 strings,
-although the UTF-8 support is missed.
+only buffers up to 256 lines with up to 800 bytes per line
+if the line-number hasn't been set differently with the
+.Op Fl l
+flag.
+When the number of lines is bigger, the buffer is flushed and
+reverse line feeds can not operate on the flushed lines.
diff --git a/col.c b/col.c
index 8b86240..46c3332 100644
--- a/col.c
+++ b/col.c
@@ -1,47 +1,47 @@
 /* See LICENSE file for copyright and license details. */
+#include <limits.h>
 #include <stdio.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
-#include <ctype.h>
 
+#include "utf.h"
 #include "util.h"
 
 #define NLINES 256
 #define NCOLS 800
 
-static char **buff;
+static Rune **buf;
 
-static int obackspace, onotabs, ohalfline, oescape;
-static unsigned nline, ncol, nchar, nspaces, maxline, bs;
-static size_t pagsize = NLINES;
+static int    backspace, notabs, halfline, escape;
+static size_t nline, ncol, nchar, nspaces, maxline, bs, pagesize = NLINES;
 
 static void
 flush(void)
 {
-       int c;
-       unsigned i, j;
+       Rune c;
+       size_t i, j;
 
        for (i = 0; i < maxline; ++i) {
-               for (j = 0; j < NCOLS && (c = buff[i][j]) != '\0'; ++j)
-                       putchar(c);
+               for (j = 0; j < NCOLS && (c = buf[i][j]); ++j)
+                       efputrune(&c, stdout, "<stdout>");
                putchar('\n');
        }
        bs = nchar = nline = ncol = 0;
 }
 
 static void
-forward(unsigned n)
+forward(size_t n)
 {
-       unsigned lim;
+       size_t lim;
 
-       for (lim = ncol + n; ncol != lim && nchar < NCOLS-1; ++nchar) {
-               switch (buff[nline][nchar]) {
+       for (lim = ncol + n; ncol != lim && nchar < NCOLS - 1; ++nchar) {
+               switch (buf[nline][nchar]) {
                case '\b':
                        --ncol;
                        break;
                case '\0':
-                       buff[nline][nchar] = ' ';
+                       buf[nline][nchar] = ' ';
                        /* FALLTHROUGH */
                default:
                        ++ncol;
@@ -53,31 +53,30 @@ forward(unsigned n)
 static void
 linefeed(int up, int rcarriage)
 {
-       unsigned oncol = ncol;
+       size_t oncol = ncol;
 
        nspaces = 0;
        if (up > 0) {
-               if (nline == pagsize-1) {
+               if (nline == pagesize - 1) {
                        flush();
                }  else {
                        if (++nline > maxline)
                                maxline = nline;
                }
-       } else {
-               if (nline > 0)
-                       --nline;
+       } else if (nline > 0) {
+               --nline;
        }
        bs = 0;
        if (rcarriage) {
                forward(oncol);
-                nchar = ncol = 0;
+               nchar = ncol = 0;
        }
 }
 
 static void
-newchar(int c)
+newchar(Rune c)
 {
-       char *cp;
+       Rune *cp;
 
        forward(nspaces);
        nspaces = 0;
@@ -90,7 +89,7 @@ newchar(int c)
                nchar = ncol = 0;
                break;
        case '\t':
-               forward(8 - ncol%8);
+               forward(8 - ncol % 8);
                break;
        case '\b':
                if (ncol > 0)
@@ -100,20 +99,18 @@ newchar(int c)
                bs = 1;
                break;
        default:
-               cp = &buff[nline][nchar];
-               if (*cp != '\0' && *cp != ' ' && bs && !obackspace) {
-                       if (nchar != NCOLS-3) {
-                               memmove(cp + 3, cp + 1, NCOLS - nchar - 2);
-                               cp[1] = '\b';
-                               nchar += 2;
-                       }
+               cp = &buf[nline][nchar];
+               if (*cp && *cp != ' ' && bs && !backspace && nchar != NCOLS - 
3) {
+                       memmove(cp + 3, cp + 1, (NCOLS - nchar - 2) * 
sizeof(*cp));
+                       cp[1] = '\b';
+                       nchar += 2;
                }
-               if (nchar != NCOLS-1) {
-                       for (cp = buff[nline]; cp < &buff[nline][nchar]; ++cp) {
+               if (nchar != NCOLS - 1) {
+                       for (cp = buf[nline]; cp < &buf[nline][nchar]; ++cp) {
                                if (*cp == '\0')
                                        *cp = ' ';
                        }
-                       buff[nline][nchar++] = c;
+                       buf[nline][nchar++] = c;
                        ++ncol;
                }
                bs = 0;
@@ -123,50 +120,52 @@ newchar(int c)
 static void
 col(void)
 {
-       int c;
+       Rune r;
+       int ret;
 
-       while ((c = getchar()) != EOF) {
-               switch (c) {
+       while (efgetrune(&r, stdin, "<stdin>")) {
+               switch (r) {
                case '\x1b':
-                       switch (c = getchar()) {
+                       ret = efgetrune(&r, stdin, "<stdin>");
+                       switch (r) {
                        case '8': /* reverse half-line-feed */
                        case '7': /* reverse line-feed */
                                linefeed(-1, 0);
                                continue;
                        case '9':  /* forward half-line-feed */
-                               if (ohalfline)
+                               if (halfline)
                                        break;
                                linefeed(1, 0);
                                continue;
                        }
-                       if (!oescape)
+                       if (!escape)
                                continue;
                        newchar('\x1b');
-                       if (c != EOF)
-                               newchar(c);
+                       if (ret)
+                               newchar(r);
                        break;
                case '\v':
                        linefeed(-1, 0);
                        break;
                case ' ':
-                       if (!onotabs) {
+                       if (!notabs) {
                                if (++nspaces != 8)
                                        continue;
-                               c = '\t';
+                               r = '\t';
                                nspaces = 0;
                        }
                        /* FALLTHROUGH */
                case '\r':
                case '\b':
                case '\t':
-                       newchar(c);
+                       newchar(r);
                        break;
                case '\n':
                        linefeed(1, 1);
                        break;
                default:
-                       if (!iscntrl(c))
-                               newchar(c);
+                       if (!iscntrlrune(r))
+                               newchar(r);
                        break;
                }
        }
@@ -175,17 +174,17 @@ col(void)
 static void
 allocbuf(void)
 {
-       char **bp;
+       Rune **bp;
 
-       buff = ereallocarray(NULL, pagsize, sizeof(*buff));
-       for (bp = buff; bp < &buff[pagsize]; ++bp)
-               *bp = emalloc(NCOLS);
+       buf = ereallocarray(NULL, pagesize, sizeof(*buf));
+       for (bp = buf; bp < buf + pagesize; ++bp)
+               *bp = ereallocarray(NULL, NCOLS, sizeof(**buf));
 }
 
 static void
 usage(void)
 {
-       enprintf(2, "usage: %s [-p][-l num][-b][-f][-x]\n", argv0);
+       enprintf(2, "usage: %s [-pbfx] [-l num]\n", argv0);
 }
 
 int
@@ -193,35 +192,30 @@ main(int argc, char *argv[])
 {
        ARGBEGIN {
        case 'b':
-               obackspace = 1;
+               backspace = 1;
                break;
        case 'f':
-               ohalfline = 1;
+               halfline = 1;
                break;
        case 'l':
-               pagsize = estrtonum(EARGF(usage()), 0, SIZE_MAX);
+               pagesize = estrtonum(EARGF(usage()), 0, MIN(SIZE_MAX, 
LLONG_MAX));
                break;
        case 'p':
-               oescape = 1;
+               escape = 1;
                break;
        case 'x':
-               onotabs = 1;
+               notabs = 1;
                break;
        default:
                usage();
        } ARGEND;
 
-       if (argc > 0)
+       if (argc)
                usage();
 
        allocbuf();
        col();
        flush();
 
-       if (ferror(stdin))
-               enprintf(1, "error reading input");
-       if (ferror(stdout))
-               enprintf(2, "error writing output");
-
        return 0;
 }

Reply via email to