This commit adds a simple implementation of the POSIX
standard paste(1) command, and its man page.
TODO and Makefile have been updated accordingly.
---
 Makefile |   1 +
 TODO     |   2 -
 paste.1  | 122 +++++++++++++++++++++++++++++++++
 paste.c  | 231 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 354 insertions(+), 2 deletions(-)
 create mode 100644 paste.1
 create mode 100644 paste.c

diff --git a/Makefile b/Makefile
index c3c36fe..4e1f3e6 100644
--- a/Makefile
+++ b/Makefile
@@ -43,6 +43,7 @@ SRC = \
        mv.c       \
        nl.c       \
        nohup.c    \
+       paste.c    \
        pwd.c      \
        rm.c       \
        sleep.c    \
diff --git a/TODO b/TODO
index 20be4b8..d6b9e0b 100644
--- a/TODO
+++ b/TODO
@@ -28,8 +28,6 @@ md5sum [-c] [file...]
 
 nice [-n N] [command]
 
-paste [-s] [-d list] [file...]
-
 printenv [variable...]
 
 printf [format] [data...]
diff --git a/paste.1 b/paste.1
new file mode 100644
index 0000000..983b41f
--- /dev/null
+++ b/paste.1
@@ -0,0 +1,122 @@
+.TH PASTE 1 paste-VERSION "Apr 2013"
+.SH NAME
+paste \- merge corresponding or subsequent lines of files
+.SH "SYNOPSIS"
+.PP
+.B paste
+[
+.B \-s
+]
+[
+.B \-d
+.I list
+]
+.I file...
+.SH DESCRIPTION
+The
+.B paste
+utility concatenates the corresponding lines of the given input files,
+and writes the resulting lines to standard output. The default operation
+of
+.B paste
+concatenates the corresponding  lines of the input files.
+The newline of every line except the line  from the last input file is
+replaced with a tab.
+If an end-of-file condition is detected on one or more input files, 
+but not all input files,
+.B paste
+behaves as though empty lines were read from the files on which
+end-of-file was detected, unless the
+.B \-s
+option is specified.
+.SH OPTIONS
+.TP
+.B \-d list
+unless a backslash character appears in
+.I list
+each character is an element specifying a delimiter.
+If a backslash character appears, that and one or more characters 
+following it are an element specifying a delimiter.
+These elements specify one or more characters to use, 
+instead of the default tab, to replace the newline of the input 
+lines. The elements in
+.I list
+are used circularly; that is, when the 
+.I list
+is exhausted the first element from the list is reused.
+When the
+.B \-s 
+option is specified, the last newline in a file is not be modified.
+The delimiter is reset to the first element of list after each file
+operand is processed.
+If a backslash character appears in list, it and the character following
+it represents the following delimiters:
+.RS
+.TP
+.I \en
+newline character
+.TP
+.I \et
+tab character
+.TP
+.I \e\e
+backslash character
+.TP
+.I \e0
+empty string (not a null character)
+.TP
+If Any other characters follow the backslash, results are unspecified.
+.RE
+.TP
+.B \-s
+concatenate all of the lines of each separate input file in command line 
+order. The newline of every line except the last line in each input file
+are replaced with the tab, unless otherwise specified by the 
+.B \-d
+option.
+.PP
+If '\-' is specified for one or more input files, the standard input is
+used; standard input is read one line at a time, circularly for each
+instance of '\-'.
+.SH EXIT VALUES
+The
+.B paste
+utility exits 0 on successful completion, and >0 if an error
+occurs.
+.SH ENVIRONMENT VARIABLES
+The following environment variables affect the execution:
+.TP
+.B LANG
+provide a default value for the internationalization variables
+that are unset or null.
+.TP
+.B LC_ALL
+if set to a non-empty string value, override the values of all the
+other internationalization variables.
+.TP
+.B LC_CTYPE
+determine the locale for the interpretation of sequences of bytes
+of text data as characters (for example, single-byte as opposed to
+multi-byte characters in arguments and input files).
+.TP
+.B LC_MESSAGES
+determine the locale that should be used to affect the format and
+contents of diagnostic messages written to standard error.
+.SH CONFORMING TO
+The
+.B paste
+utility is IEEE Std 1003.2 (POSIX.2) compatible.
+.SH EXAMPLES
+.TP
+.I "ls | paste - - - -"
+.PP
+Write out a directory in four columns.
+.TP
+.I "paste -s -d '\et\en' file"
+.PP
+Combine pairs of lines from a file into single lines.
+.SH AUTHOR
+Written by Lorenzo Cogotti.
+.SH SEE ALSO
+.BR cut(1)
+.BR lam(1)
diff --git a/paste.c b/paste.c
new file mode 100644
index 0000000..0d0bcdb
--- /dev/null
+++ b/paste.c
@@ -0,0 +1,231 @@
+/* See LICENSE file for copyright and license details. */
+#include <locale.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <wchar.h>
+#include "util.h"
+
+typedef struct {
+       FILE *fp;
+       const char *name;
+} Fdescr;
+
+static void eusage(void);
+static size_t unescape(wchar_t *);
+static wint_t in(Fdescr *);
+static void out(wchar_t);
+static void sequential(Fdescr *, int, const wchar_t *, size_t);
+static void parallel(Fdescr *, int, const wchar_t *, size_t);
+
+int
+main(int argc, char **argv) {
+       const char *adelim = NULL;
+       bool seq = false;
+       wchar_t *delim;
+       size_t len;
+       Fdescr *dsc;
+       int i, c;
+       
+       setlocale(LC_CTYPE, "");
+       
+       while((c = getopt(argc, argv, "sd:")) != -1)
+               switch(c) {
+               case 's':
+                       seq = true;
+                       break;
+               case 'd':
+                       adelim = optarg;
+                       break;
+               case '?':
+               default:
+                       eusage();
+                       break;
+               }
+       
+       argc -= optind;
+       argv += optind;
+       if(argc == 0)
+               eusage();
+       
+       /* populate delimeters */
+       if(!adelim)
+               adelim = "\t";
+       
+       len = mbstowcs(NULL, adelim, 0);
+       if(len == (size_t)-1)
+               eprintf("invalid delimiter\n");
+       
+       delim = malloc((len + 1) * sizeof(*delim));
+       if(!delim)
+               eprintf("out of memory\n");
+       
+       mbstowcs(delim, adelim, len);
+       len = unescape(delim);
+       if(len == 0)
+               eprintf("no delimiters specified\n");
+       
+       /* populate file list */
+       dsc = malloc(argc * sizeof(*dsc));
+       if(!dsc)
+               eprintf("out of memory\n");
+       
+       for(i = 0; i < argc; i++) {
+               const char *name = argv[i];
+               
+               if(strcmp(name, "-") == 0)
+                       dsc[i].fp = stdin;
+               else
+                       dsc[i].fp = fopen(name, "r");
+               
+               if(!dsc[i].fp)
+                       eprintf("can't open '%s':", name);
+               
+               dsc[i].name = name;
+       }
+       
+       if(seq)
+               sequential(dsc, argc, delim, len);
+       else
+               parallel(dsc, argc, delim, len);
+       
+       for(i = 0; i < argc; i++) {
+               if(dsc[i].fp != stdin)
+                       (void)fclose(dsc[i].fp);
+       }
+       
+       free(delim);
+       free(dsc);
+       return 0;
+}
+
+static void
+eusage(void) {
+       eprintf("usage: paste [-s][-d list] file...\n");
+}
+
+static size_t
+unescape(wchar_t *delim) {
+       wchar_t c;
+       size_t i;
+       size_t len;
+       
+       for(i = 0, len = 0; (c = delim[i++]) != '\0'; len++) {
+               if(c == '\\') {
+                       switch(delim[i++]) {
+                       case 'n':
+                               delim[len] = '\n';
+                               break;
+                       case 't':
+                               delim[len] = '\t';
+                               break;
+                       case '0':
+                               delim[len] = '\0';
+                               break;
+                       case '\\':
+                               delim[len] = '\\';
+                               break;
+                       case '\0':
+                       default:
+                               /* POSIX: unspecified results */
+                               return len;
+                       }
+               } else
+                       delim[len] = c;
+       }
+       
+       return len;
+}
+
+static wint_t
+in(Fdescr *f) {
+       wint_t c = fgetwc(f->fp);
+       
+       if(c == WEOF && ferror(f->fp))
+               eprintf("'%s' read error:", f->name);
+       
+       return c;
+}
+
+static void
+out(wchar_t c) {
+       putwchar(c);
+       if(ferror(stdout))
+               eprintf("write error:");
+}
+
+static void
+sequential(Fdescr *dsc, int len, const wchar_t *delim, size_t cnt) {
+       int i;
+       
+       for(i = 0; i < len; i++) {
+               size_t d = 0;
+               wint_t c, last = WEOF;
+               
+               while((c = in(&dsc[i])) != WEOF) {
+                       if(last == '\n') {
+                               if(delim[d] != '\0')
+                                       out(delim[d]);
+                               
+                               d++;
+                               d %= cnt;
+                       }
+                       
+                       if(c != '\n')
+                               out((wchar_t)c);
+                       
+                       last = c;
+               }
+               
+               if(last == '\n')
+                       out((wchar_t)last);
+       }
+}
+
+static void
+parallel(Fdescr *dsc, int len, const wchar_t *delim, size_t cnt) {
+       int last;
+       
+       do {
+               int i;
+               
+               last = 0;
+               for(i = 0; i < len; i++) {
+                       wint_t c;
+                       wchar_t d = delim[i % cnt];
+                       
+                       do {
+                               wint_t o = in(&dsc[i]);
+                               
+                               c = o;
+                               switch(c) {
+                               case WEOF:
+                                       if(last == 0)
+                                               break;
+                                       
+                                       o = '\n';
+                                       /* fallthrough */
+                               case '\n':
+                                       if(i != len - 1)
+                                               o = d;
+                                       
+                                       break;
+                               default:
+                                       break;
+                               }
+                               
+                               if(o != WEOF) {
+                                       /* pad with delimiters up to this point 
*/
+                                       while(++last < i) {
+                                               if(d != '\0')
+                                                       out(d);
+                                       }
+                                       
+                                       out((wchar_t)o);
+                               }
+                       } while(c != '\n' && c != WEOF);
+               }
+       } while(last > 0);
+}
-- 
1.8.2


Reply via email to