Use awk.

On Wed, Aug 1, 2012 at 6:07 AM, Strake <strake...@gmail.com> wrote:
> Will now need libutf.
>
> diff -r 8cf300476909 Makefile
> --- a/Makefile  Sat Jun 09 18:53:39 2012 +0100
> +++ b/Makefile  Tue Jul 31 23:06:28 2012 -0500
> @@ -27,6 +27,7 @@
>         cksum.c    \
>         cmp.c      \
>         cp.c       \
> +       cut.c      \
>         date.c     \
>         dirname.c  \
>         echo.c     \
> diff -r 8cf300476909 config.mk
> --- a/config.mk Sat Jun 09 18:53:39 2012 +0100
> +++ b/config.mk Tue Jul 31 23:06:28 2012 -0500
> @@ -10,7 +10,7 @@
>  LD = $(CC)
>  CPPFLAGS = -D_POSIX_C_SOURCE=200112L
>  CFLAGS   = -g -ansi -Wall -pedantic $(CPPFLAGS)
> -LDFLAGS  = -g
> +LDFLAGS  = -g -lutf
>
>  #CC = tcc
>  #LD = $(CC)
> diff -r 8cf300476909 cut.1
> --- /dev/null   Thu Jan 01 00:00:00 1970 +0000
> +++ b/cut.1     Tue Jul 31 23:06:28 2012 -0500
> @@ -0,0 +1,51 @@
> +.TH CUT 1
> +.SH NAME
> +cut \- select columns of file
> +.SH SYNOPSIS
> +.B cut -f
> +.I ranges
> +[
> +.B -d
> +.I delimiter
> +]
> +[
> +.I file ...
> +]
> +.br
> +.B cut -c
> +.I ranges
> +[
> +.I file ...
> +]
> +.br
> +.B cut -b
> +.I ranges
> +[
> +.I file ...
> +]
> +.br
> +.SH OPERATION
> +Cut reads from given files, or stdin if no files given, and for each
> line selects
> +.TP
> +.B columns,
> +with -f flag
> +.TP
> +.B characters,
> +with -c flag
> +.TP
> +.B bytes,
> +with -b flag
> +.LP
> +within given comma- or space-delimited ranges.
> +.LP
> +.br
> +Each range is either a single decimal number, or of this form:
> +.br
> +.I x
> +-
> +.I y
> +.br
> +where x and y are decimal numbers, or empty.
> +Empty x means first, and empty y means last column/character/byte on the 
> line.
> +.LP
> +If -d option given, then the first character of its argument is the
> delimiter; otherwise it is tab.
> diff -r 8cf300476909 cut.c
> --- /dev/null   Thu Jan 01 00:00:00 1970 +0000
> +++ b/cut.c     Tue Jul 31 23:06:28 2012 -0500
> @@ -0,0 +1,192 @@
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <utf.h>
> +#include "text.h"
> +
> +typedef struct {
> +       int min, max;
> +} range;
> +
> +int inRange (range r, unsigned int n) {
> +       if ((0 == r.max || n <= r.max) && n >= r.min) return 1;
> +       else return 0;
> +}
> +
> +int fputrune (Rune r, FILE *f) {
> +       char x[UTFmax];
> +       int n;
> +       n = runetochar (x, &r);
> +       fwrite (x, 1, n, f);
> +       return n;
> +}
> +
> +void cutLineF (Rune d, unsigned int s, range *rs, char *x) {
> +       int ii, n;
> +       if (!utfrune (x, d)) {
> +               if (!s) fputs (x, stdout);
> +               return;
> +       }
> +       /* kludge; be warned */
> +       for (ii = 0; rs[ii].min; ii++) {
> +               char *y;
> +               y = x;
> +               for (n = 1; y; n++) {
> +                       char *z;
> +                       char ch;
> +                       z = utfrune (y, d);
> +                       if (z) {
> +                               ch = *z;
> +                               *z = 0;
> +                       }
> +                       if (inRange (rs[ii], n)) {
> +                               fputs (y, stdout);
> +                               fputrune (d, stdout);
> +                       }
> +                       if (z) {
> +                               *z = ch;
> +                               z += runelen (d);
> +                       }
> +                       y = z;
> +               }
> +       }
> +}
> +
> +void cutLineC (range *rs, char *x) {
> +       Rune _r;
> +       int ii, n;
> +       for (ii = 0; rs[ii].min; ii++) {
> +               char *y;
> +               y = x;
> +               for (n = 1; *y; n++) {
> +                       int l = chartorune (&_r, y);
> +                       if (inRange (rs[ii], n)) fwrite (y, 1, l, stdout);
> +                       y += l;
> +               }
> +       }
> +}
> +
> +void cutLineB (range *rs, char *x) {
> +       int ii, n;
> +       for (ii = 0; rs[ii].min; ii++) {
> +               for (n = rs[ii].min - 1; rs[ii].max ? n < rs[ii].max : x[n]; 
> n++) {
> +                       fputc (x[n], stdout);
> +               }
> +       }
> +}
> +
> +void go (int mode, Rune d, unsigned int s, range *rs) {
> +       char *x;
> +       size_t size = 0;
> +       x = 0;
> +
> +       while (afgets (&x, &size, stdin)) {
> +               int ii;
> +               /* must delete newline here, and redo later;
> +                  otherwise, unknown whether it was included in cut */
> +               for (ii = 0; x[ii]; ii++) if (x[ii] == '\n') x[ii] = 0;
> +               switch (mode) {
> +               case 'f':
> +                       if (!utfrune (x, d)) {
> +                               if (!s) {
> +                                       fputs (x, stdout);
> +                                       fputc ('\n', stdout);
> +                               }
> +                       }
> +                       else {
> +                               cutLineF (d, s, rs, x);
> +                               fputc ('\n', stdout);
> +                       }
> +                       break;
> +               case 'c': cutLineC (rs, x); fputc ('\n', stdout); break;
> +               case 'b': cutLineB (rs, x); fputc ('\n', stdout); break;
> +               }
> +       }
> +}
> +
> +int main (int argc, char *argu[]) {
> +       int mode = 0;
> +       Rune d = '\t';
> +       unsigned int s = 0;
> +       range *rs = 0;
> +       int ii;
> +
> +       /* parse options */
> +       for (ii = 1; ii < argc; ii++) {
> +               int jj;
> +               if (argu[ii][0] != '-') break;
> +               for (jj = 1; argu[ii][jj]; jj++) switch (argu[ii][jj]) {
> +               case 'b':
> +               case 'c':
> +               case 'f':
> +                       mode = argu[ii][jj];
> +
> +                       if (++ii >= argc) {
> +                               fputs ("No range argument\n", stderr);
> +                               return 1;
> +                       }
> +
> +                       rs = malloc (sizeof (range) * (utflen (argu[ii]) + 
> 1));
> +                       if (!rs) {
> +                               fputs ("Failed to allocate memory\n", stderr);
> +                               return 1;
> +                       }
> +
> +                       /* ensure space delimitation for strtoul */
> +                       for (jj = 0; argu[ii][jj]; jj++) if (argu[ii][jj] == 
> ',')
> argu[ii][jj] = ' ';
> +
> +                       /* parse ranges */
> +                       /* max = 0 to denote last */
> +                       {
> +                               char *p;
> +                               p = argu[ii];
> +                               jj = 0;
> +                               while (*p) {
> +                                       rs[jj].min = *p == '-' ? 1 : strtoul 
> (p, &p, 10);
> +                                       rs[jj].max = *p == '-' ? strtoul 
> (++p, &p, 10) : rs[jj].min;
> +                                       switch (*p) {
> +                                       case '\0':
> +                                               rs[++jj].min = 0;
> +                                               break;
> +                                       case ' ':
> +                                       case '\f':
> +                                       case '\v':
> +                                       case '\t':
> +                                       case '\r':
> +                                       case '\n':
> +                                               jj++;
> +                                               break;
> +                                       default:
> +                                               fprintf (stderr, "Malformed 
> ranges\n");
> +                                               return 1;
> +                                       }
> +                               }
> +                       }
> +                       goto nextArgument;
> +               case 'd':
> +                       chartorune (&d, argu[++ii]);
> +                       goto nextArgument;
> +               case 's':
> +                       s = 1;
> +                       break;
> +               }
> +nextArgument:  ;
> +       }
> +
> +       if (!mode) {
> +               fprintf (stderr, "No mode given\n");
> +               return 1;
> +       }
> +
> +       if (ii < argc) {
> +               for (; ii < argc; ii++) {
> +                       if (!freopen (argu[ii], "r", stdin)) {
> +                               fprintf (stderr, "Failed to open file %s\n", 
> argu[ii]);
> +                               return 1;
> +                       }
> +                       go (mode, d, s, rs);
> +               }
> +       }
> +       else go (mode, d, s, rs);
> +
> +       return 0;
> +}
>

Reply via email to