Will now need libutf.

diff -r 8cf300476909 Makefile
--- a/Makefile  Sat Jun 09 18:53:39 2012 +0100
+++ b/Makefile  Tue Jul 31 23:06:28 2012 -0500
@@ -27,6 +27,7 @@
        cksum.c    \
        cmp.c      \
        cp.c       \
+       cut.c      \
        date.c     \
        dirname.c  \
        echo.c     \
diff -r 8cf300476909 config.mk
--- a/config.mk Sat Jun 09 18:53:39 2012 +0100
+++ b/config.mk Tue Jul 31 23:06:28 2012 -0500
@@ -10,7 +10,7 @@
 LD = $(CC)
 CPPFLAGS = -D_POSIX_C_SOURCE=200112L
 CFLAGS   = -g -ansi -Wall -pedantic $(CPPFLAGS)
-LDFLAGS  = -g
+LDFLAGS  = -g -lutf

 #CC = tcc
 #LD = $(CC)
diff -r 8cf300476909 cut.1
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/cut.1     Tue Jul 31 23:06:28 2012 -0500
@@ -0,0 +1,51 @@
+.TH CUT 1
+.SH NAME
+cut \- select columns of file
+.SH SYNOPSIS
+.B cut -f
+.I ranges
+[
+.B -d
+.I delimiter
+]
+[
+.I file ...
+]
+.br
+.B cut -c
+.I ranges
+[
+.I file ...
+]
+.br
+.B cut -b
+.I ranges
+[
+.I file ...
+]
+.br
+.SH OPERATION
+Cut reads from given files, or stdin if no files given, and for each
line selects
+.TP
+.B columns,
+with -f flag
+.TP
+.B characters,
+with -c flag
+.TP
+.B bytes,
+with -b flag
+.LP
+within given comma- or space-delimited ranges.
+.LP
+.br
+Each range is either a single decimal number, or of this form:
+.br
+.I x
+-
+.I y
+.br
+where x and y are decimal numbers, or empty.
+Empty x means first, and empty y means last column/character/byte on the line.
+.LP
+If -d option given, then the first character of its argument is the
delimiter; otherwise it is tab.
diff -r 8cf300476909 cut.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/cut.c     Tue Jul 31 23:06:28 2012 -0500
@@ -0,0 +1,192 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <utf.h>
+#include "text.h"
+
+typedef struct {
+       int min, max;
+} range;
+
+int inRange (range r, unsigned int n) {
+       if ((0 == r.max || n <= r.max) && n >= r.min) return 1;
+       else return 0;
+}
+
+int fputrune (Rune r, FILE *f) {
+       char x[UTFmax];
+       int n;
+       n = runetochar (x, &r);
+       fwrite (x, 1, n, f);
+       return n;
+}
+
+void cutLineF (Rune d, unsigned int s, range *rs, char *x) {
+       int ii, n;
+       if (!utfrune (x, d)) {
+               if (!s) fputs (x, stdout);
+               return;
+       }
+       /* kludge; be warned */
+       for (ii = 0; rs[ii].min; ii++) {
+               char *y;
+               y = x;
+               for (n = 1; y; n++) {
+                       char *z;
+                       char ch;
+                       z = utfrune (y, d);
+                       if (z) {
+                               ch = *z;
+                               *z = 0;
+                       }
+                       if (inRange (rs[ii], n)) {
+                               fputs (y, stdout);
+                               fputrune (d, stdout);
+                       }
+                       if (z) {
+                               *z = ch;
+                               z += runelen (d);
+                       }
+                       y = z;
+               }
+       }
+}
+
+void cutLineC (range *rs, char *x) {
+       Rune _r;
+       int ii, n;
+       for (ii = 0; rs[ii].min; ii++) {
+               char *y;
+               y = x;
+               for (n = 1; *y; n++) {
+                       int l = chartorune (&_r, y);
+                       if (inRange (rs[ii], n)) fwrite (y, 1, l, stdout);
+                       y += l;
+               }
+       }
+}
+
+void cutLineB (range *rs, char *x) {
+       int ii, n;
+       for (ii = 0; rs[ii].min; ii++) {
+               for (n = rs[ii].min - 1; rs[ii].max ? n < rs[ii].max : x[n]; 
n++) {
+                       fputc (x[n], stdout);
+               }
+       }
+}
+
+void go (int mode, Rune d, unsigned int s, range *rs) {
+       char *x;
+       size_t size = 0;
+       x = 0;
+
+       while (afgets (&x, &size, stdin)) {
+               int ii;
+               /* must delete newline here, and redo later;
+                  otherwise, unknown whether it was included in cut */
+               for (ii = 0; x[ii]; ii++) if (x[ii] == '\n') x[ii] = 0;
+               switch (mode) {
+               case 'f':
+                       if (!utfrune (x, d)) {
+                               if (!s) {
+                                       fputs (x, stdout);
+                                       fputc ('\n', stdout);
+                               }
+                       }
+                       else {
+                               cutLineF (d, s, rs, x);
+                               fputc ('\n', stdout);
+                       }
+                       break;
+               case 'c': cutLineC (rs, x); fputc ('\n', stdout); break;
+               case 'b': cutLineB (rs, x); fputc ('\n', stdout); break;
+               }
+       }
+}
+
+int main (int argc, char *argu[]) {
+       int mode = 0;
+       Rune d = '\t';
+       unsigned int s = 0;
+       range *rs = 0;
+       int ii;
+       
+       /* parse options */
+       for (ii = 1; ii < argc; ii++) {
+               int jj;
+               if (argu[ii][0] != '-') break;
+               for (jj = 1; argu[ii][jj]; jj++) switch (argu[ii][jj]) {
+               case 'b':
+               case 'c':
+               case 'f':
+                       mode = argu[ii][jj];
+                       
+                       if (++ii >= argc) {
+                               fputs ("No range argument\n", stderr);
+                               return 1;
+                       }
+                       
+                       rs = malloc (sizeof (range) * (utflen (argu[ii]) + 1));
+                       if (!rs) {
+                               fputs ("Failed to allocate memory\n", stderr);
+                               return 1;
+                       }
+                       
+                       /* ensure space delimitation for strtoul */
+                       for (jj = 0; argu[ii][jj]; jj++) if (argu[ii][jj] == 
',')
argu[ii][jj] = ' ';
+                       
+                       /* parse ranges */
+                       /* max = 0 to denote last */
+                       {
+                               char *p;
+                               p = argu[ii];
+                               jj = 0;
+                               while (*p) {
+                                       rs[jj].min = *p == '-' ? 1 : strtoul 
(p, &p, 10);
+                                       rs[jj].max = *p == '-' ? strtoul (++p, 
&p, 10) : rs[jj].min;
+                                       switch (*p) {
+                                       case '\0':
+                                               rs[++jj].min = 0;
+                                               break;
+                                       case ' ':
+                                       case '\f':
+                                       case '\v':
+                                       case '\t':
+                                       case '\r':
+                                       case '\n':
+                                               jj++;
+                                               break;
+                                       default:
+                                               fprintf (stderr, "Malformed 
ranges\n");
+                                               return 1;
+                                       }
+                               }
+                       }
+                       goto nextArgument;
+               case 'd':
+                       chartorune (&d, argu[++ii]);
+                       goto nextArgument;
+               case 's':
+                       s = 1;
+                       break;
+               }
+nextArgument:  ;
+       }
+
+       if (!mode) {
+               fprintf (stderr, "No mode given\n");
+               return 1;
+       }
+       
+       if (ii < argc) {
+               for (; ii < argc; ii++) {
+                       if (!freopen (argu[ii], "r", stdin)) {
+                               fprintf (stderr, "Failed to open file %s\n", 
argu[ii]);
+                               return 1;
+                       }
+                       go (mode, d, s, rs);
+               }
+       }
+       else go (mode, d, s, rs);
+       
+       return 0;
+}

Reply via email to