Hi

This is a braindead and incomplete implementation of tr that only
works for one-byte encodings. Do you think it makes sense to use this
implementation as some kind of stopgap-measure until we have a more
robust version of tr?

If you you would rather not take this version, what approach would
you take for the character set mapping when using UTF-8? A hashmap-,
or B-tree-based solution or something else entirely?


Cheers,

Silvan

-- >8 --- (use git am -c)
The taken approach works only for one-byte encodings and is rather slow.
---
 Makefile |   1 +
 tr.c     | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 118 insertions(+)
 create mode 100644 tr.c

diff --git a/Makefile b/Makefile
index 2a72a1c..b78ad2c 100644
--- a/Makefile
+++ b/Makefile
@@ -81,6 +81,7 @@ SRC = \
        tee.c      \
        test.c     \
        touch.c    \
+       tr.c       \
        true.c     \
        tty.c      \
        uname.c    \
diff --git a/tr.c b/tr.c
new file mode 100644
index 0000000..0053a52
--- /dev/null
+++ b/tr.c
@@ -0,0 +1,117 @@
+/* See LICENSE file for copyright and license details. */
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "text.h"
+#include "util.h"
+
+static void
+usage(void)
+{
+       eprintf("usage: tr set1 [set2]\n");
+}
+
+void
+handle_escapes(char *s)
+{
+    switch(*s) {
+       case 'n':
+               *s = '\x0A';
+               break;
+       case 't':
+               *s = '\x09';
+               break;
+       case '\\':
+               *s = '\x5c';
+               break;
+    }
+}
+
+void
+parse_mapping(char *set1, char *set2, char *mappings)
+{
+       char *s;
+       size_t n = 0;
+       size_t lset2;
+
+       if(set2) {
+           lset2 = strnlen(set2, 255);
+       } else {
+           set2 = (char*) &set1[0];
+           lset2 = 0;
+       }
+
+       for(s = set1; *s; s++) {
+           if(*s == '\\') {
+               handle_escapes(++s);
+               ++n;
+           }
+
+           mappings[(int) *s] = set2[n];
+           if(n < (lset2 - 1))
+               n++;
+       }
+}
+
+void
+map_to_null(const char *mappings, char *in)
+{
+       const char *s;
+
+       for(s = in; *s; s++) {
+           if(!mappings[(int) *s])
+               putchar((int) *s);
+       }
+}
+
+void
+map_to_set(const char *mappings, char *in)
+{
+       const char *s;
+
+       for(s = in; *s; s++) {
+           if(!mappings[(int) *s]) {
+               putchar((int) *s);
+           } else {
+               putchar((int) mappings[(int) *s]);
+           }
+       }
+}
+
+int
+main(int argc, char *argv[])
+{
+       char mappings[255];
+       char *buf = NULL;
+       size_t size = 0;
+       void (*mapfunc) (const char*, char*);
+
+       memset(mappings, 0, 255);
+
+       ARGBEGIN {
+       default:
+               usage();
+       } ARGEND;
+
+       if(!argc)
+           usage();
+
+       if(argc >= 2) {
+           parse_mapping(argv[0], argv[1], mappings);
+           mapfunc = map_to_set;
+       } else {
+           parse_mapping(argv[0], NULL, mappings);
+           mapfunc = map_to_null;
+       }
+
+       while(afgets(&buf, &size, stdin))
+               mapfunc(mappings, buf);
+
+       if (ferror(stdin)) {
+           eprintf("<stdin>: read error:");
+           return EXIT_FAILURE;
+           }
+
+       return EXIT_SUCCESS;
+}
-- 
1.8.4.2


Reply via email to