Will now need libutf. diff -r 8cf300476909 Makefile --- a/Makefile Sat Jun 09 18:53:39 2012 +0100 +++ b/Makefile Tue Jul 31 23:06:28 2012 -0500 @@ -27,6 +27,7 @@ cksum.c \ cmp.c \ cp.c \ + cut.c \ date.c \ dirname.c \ echo.c \ diff -r 8cf300476909 config.mk --- a/config.mk Sat Jun 09 18:53:39 2012 +0100 +++ b/config.mk Tue Jul 31 23:06:28 2012 -0500 @@ -10,7 +10,7 @@ LD = $(CC) CPPFLAGS = -D_POSIX_C_SOURCE=200112L CFLAGS = -g -ansi -Wall -pedantic $(CPPFLAGS) -LDFLAGS = -g +LDFLAGS = -g -lutf
#CC = tcc #LD = $(CC) diff -r 8cf300476909 cut.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cut.1 Tue Jul 31 23:06:28 2012 -0500 @@ -0,0 +1,51 @@ +.TH CUT 1 +.SH NAME +cut \- select columns of file +.SH SYNOPSIS +.B cut -f +.I ranges +[ +.B -d +.I delimiter +] +[ +.I file ... +] +.br +.B cut -c +.I ranges +[ +.I file ... +] +.br +.B cut -b +.I ranges +[ +.I file ... +] +.br +.SH OPERATION +Cut reads from given files, or stdin if no files given, and for each line selects +.TP +.B columns, +with -f flag +.TP +.B characters, +with -c flag +.TP +.B bytes, +with -b flag +.LP +within given comma- or space-delimited ranges. +.LP +.br +Each range is either a single decimal number, or of this form: +.br +.I x +- +.I y +.br +where x and y are decimal numbers, or empty. +Empty x means first, and empty y means last column/character/byte on the line. +.LP +If -d option given, then the first character of its argument is the delimiter; otherwise it is tab. diff -r 8cf300476909 cut.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cut.c Tue Jul 31 23:06:28 2012 -0500 @@ -0,0 +1,192 @@ +#include <stdio.h> +#include <stdlib.h> +#include <utf.h> +#include "text.h" + +typedef struct { + int min, max; +} range; + +int inRange (range r, unsigned int n) { + if ((0 == r.max || n <= r.max) && n >= r.min) return 1; + else return 0; +} + +int fputrune (Rune r, FILE *f) { + char x[UTFmax]; + int n; + n = runetochar (x, &r); + fwrite (x, 1, n, f); + return n; +} + +void cutLineF (Rune d, unsigned int s, range *rs, char *x) { + int ii, n; + if (!utfrune (x, d)) { + if (!s) fputs (x, stdout); + return; + } + /* kludge; be warned */ + for (ii = 0; rs[ii].min; ii++) { + char *y; + y = x; + for (n = 1; y; n++) { + char *z; + char ch; + z = utfrune (y, d); + if (z) { + ch = *z; + *z = 0; + } + if (inRange (rs[ii], n)) { + fputs (y, stdout); + fputrune (d, stdout); + } + if (z) { + *z = ch; + z += runelen (d); + } + y = z; + } + } +} + +void cutLineC (range *rs, char *x) { + Rune _r; + int ii, n; + for (ii = 0; rs[ii].min; ii++) { + char *y; + y = x; + for (n = 1; *y; n++) { + int l = chartorune (&_r, y); + if (inRange (rs[ii], n)) fwrite (y, 1, l, stdout); + y += l; + } + } +} + +void cutLineB (range *rs, char *x) { + int ii, n; + for (ii = 0; rs[ii].min; ii++) { + for (n = rs[ii].min - 1; rs[ii].max ? n < rs[ii].max : x[n]; n++) { + fputc (x[n], stdout); + } + } +} + +void go (int mode, Rune d, unsigned int s, range *rs) { + char *x; + size_t size = 0; + x = 0; + + while (afgets (&x, &size, stdin)) { + int ii; + /* must delete newline here, and redo later; + otherwise, unknown whether it was included in cut */ + for (ii = 0; x[ii]; ii++) if (x[ii] == '\n') x[ii] = 0; + switch (mode) { + case 'f': + if (!utfrune (x, d)) { + if (!s) { + fputs (x, stdout); + fputc ('\n', stdout); + } + } + else { + cutLineF (d, s, rs, x); + fputc ('\n', stdout); + } + break; + case 'c': cutLineC (rs, x); fputc ('\n', stdout); break; + case 'b': cutLineB (rs, x); fputc ('\n', stdout); break; + } + } +} + +int main (int argc, char *argu[]) { + int mode = 0; + Rune d = '\t'; + unsigned int s = 0; + range *rs = 0; + int ii; + + /* parse options */ + for (ii = 1; ii < argc; ii++) { + int jj; + if (argu[ii][0] != '-') break; + for (jj = 1; argu[ii][jj]; jj++) switch (argu[ii][jj]) { + case 'b': + case 'c': + case 'f': + mode = argu[ii][jj]; + + if (++ii >= argc) { + fputs ("No range argument\n", stderr); + return 1; + } + + rs = malloc (sizeof (range) * (utflen (argu[ii]) + 1)); + if (!rs) { + fputs ("Failed to allocate memory\n", stderr); + return 1; + } + + /* ensure space delimitation for strtoul */ + for (jj = 0; argu[ii][jj]; jj++) if (argu[ii][jj] == ',') argu[ii][jj] = ' '; + + /* parse ranges */ + /* max = 0 to denote last */ + { + char *p; + p = argu[ii]; + jj = 0; + while (*p) { + rs[jj].min = *p == '-' ? 1 : strtoul (p, &p, 10); + rs[jj].max = *p == '-' ? strtoul (++p, &p, 10) : rs[jj].min; + switch (*p) { + case '\0': + rs[++jj].min = 0; + break; + case ' ': + case '\f': + case '\v': + case '\t': + case '\r': + case '\n': + jj++; + break; + default: + fprintf (stderr, "Malformed ranges\n"); + return 1; + } + } + } + goto nextArgument; + case 'd': + chartorune (&d, argu[++ii]); + goto nextArgument; + case 's': + s = 1; + break; + } +nextArgument: ; + } + + if (!mode) { + fprintf (stderr, "No mode given\n"); + return 1; + } + + if (ii < argc) { + for (; ii < argc; ii++) { + if (!freopen (argu[ii], "r", stdin)) { + fprintf (stderr, "Failed to open file %s\n", argu[ii]); + return 1; + } + go (mode, d, s, rs); + } + } + else go (mode, d, s, rs); + + return 0; +}