The beginning of an implementation of diff.

It only does the bare essentials, and it is not
usable implemention of diff yet.

Signed-off-by: Mattias Andrée <maand...@kth.se>
---
 LICENSE  |   1 +
 Makefile |   1 +
 diff.c   | 207 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 209 insertions(+)
 create mode 100644 diff.c

diff --git a/LICENSE b/LICENSE
index cb5a797..2a26979 100644
--- a/LICENSE
+++ b/LICENSE
@@ -59,3 +59,4 @@ Authors/contributors include:
 © 2015 Quentin Rameau <qu...@quinq.eu.org>
 © 2015 Dionysis Grigoropoulos <i...@erethon.com>
 © 2015 Wolfgang Corcoran-Mathe <first.lord.of.t...@gmail.com>
+© 2016 Mattias Andrée <maand...@kth.se>
diff --git a/Makefile b/Makefile
index 1c09cac..74e071e 100644
--- a/Makefile
+++ b/Makefile
@@ -89,6 +89,7 @@ BIN =\
        cron\
        cut\
        date\
+       diff\
        dirname\
        du\
        echo\
diff --git a/diff.c b/diff.c
new file mode 100644
index 0000000..21f9849
--- /dev/null
+++ b/diff.c
@@ -0,0 +1,207 @@
+/* See LICENSE file for copyright and license details. */
+#include <stdio.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "util.h"
+
+#define END_OF_PATH  127
+
+#define printf(...)  do  {if (printf(__VA_ARGS__) < 0) perror("printf"), 
exit(EXIT_FAILURE); } while(0)
+
+struct file_data {
+       char **lines;
+       size_t line_count;
+       int lf_terminated;
+};
+
+struct output {
+       char colour;
+       char* text;
+};
+
+static struct output *red = 0;
+static struct output *green = 0;
+
+static struct file_data *
+load_lines(const char *pathname)
+{
+       int fd;
+       char *buffer;
+       char *p;
+       char *end;
+       size_t ptr, size, n;
+       ssize_t m;
+       struct file_data* rc;
+
+       fd = open(pathname, O_RDONLY);
+       if (fd == -1)
+               perror(pathname), exit(EXIT_FAILURE);
+
+       ptr = 0;
+       buffer = emalloc((size = 8096) + 1);
+       for (;;) {
+               if (ptr == size)
+                       buffer = erealloc(buffer, (size <<= 1) + 1);
+               m = read(fd, buffer + ptr, size - ptr);
+               if (m < 0)
+                       perror(pathname), exit(EXIT_FAILURE);
+               if (m == 0)
+                       break;
+               ptr += (size_t)m;
+       }
+       buffer[ptr] = 0;
+
+       close(fd);
+
+       for (n = 1, p = buffer;; n += 1) {
+               char *lf = strchr(p, '\n');
+               if (!lf)
+                       break;
+               p = lf + 1;
+       }
+       if (strchr(p, '\0') != buffer + ptr)
+               enprintf(1, "%s: file is binary\n", pathname);
+
+       rc = erealloc(buffer, sizeof(*rc) + (n + 1) * sizeof(char *) + (ptr + 
1));
+       buffer = ((char *)rc) + sizeof(*rc) + (n + 1) * sizeof(char *);
+       memmove(buffer, rc, ptr);
+       rc->lines = (char **)((char *)rc + sizeof(*rc));
+       rc->lf_terminated = ptr && buffer[ptr - 1] == '\n';
+       rc->line_count = n -= rc->lf_terminated;
+       buffer[ptr - rc->lf_terminated] = 0;
+
+       rc->lines[n] = 0;
+       for (ptr = 0, p = buffer; p; p = end) {
+               end = strchr(p, '\n');
+               if (end)
+                 *end++ = 0;
+               rc->lines[ptr++] = p;
+       }
+
+       return rc;
+}
+
+static char *
+diff2(char **a, char **b, size_t an, size_t bn)
+{
+       size_t **matrix = emalloc((an + 1) * sizeof(size_t *) + (an + 1) * (bn 
+ 1) * sizeof(size_t));
+       char   **map    = emalloc((an + 1) * sizeof(char   *) + (an + 1) * (bn 
+ 1) * sizeof(char));
+       size_t ai, bi, ri = 0;
+       char *rc;
+
+       for (ai = 0; ai <= an; ai++) {
+               matrix[ai] = (size_t *)(matrix + an + 1) + ai * (bn + 1);
+               map   [ai] = (char   *)(map    + an + 1) + ai * (bn + 1);
+               matrix[ai][0] = ai;
+               map   [ai][0] = 1;
+       }
+       for (bi = 0; bi <= bn; bi++) {
+               matrix[0][bi] = bi;
+               map   [0][bi] = 2;
+       }
+       map[0][0] = 0;
+
+       a--, b--;
+
+       for (ai = 1; ai <= an; ai++) {
+               for (bi = 1; bi <= bn; bi++) {
+                       size_t d = matrix[ai - 1][bi - 1];
+                       size_t u = matrix[ai - 1][bi    ];
+                       size_t l = matrix[ai    ][bi - 1];
+                       size_t lu = 1 + (l < u ? l : u);
+                       int ch = strcmp(a[ai], b[bi]); /* remember !! if using 
d += ch */
+                       matrix[ai][bi] = (lu < d || ch) ? lu : d;
+                       map   [ai][bi] = (lu < d || ch) ? 1 + (l < u) : 0;
+               }
+       }
+
+       rc = emalloc(an + bn + 2);
+       rc[ri++] = END_OF_PATH;
+       for (ai = an, bi = bn; ai + bi; ri++) {
+               rc[ri] = map[ai][bi];
+               ai -= rc[ri] != 2;
+               bi -= rc[ri] != 1;
+       }
+
+       free(matrix);
+       free(map);
+       return rc + ri;
+}
+
+static void
+flush_output(struct output *output, size_t n)
+{
+       if (!n)
+               return;
+
+       if (output->colour == 0) {
+               printf("%s%s\n", " ", output->text);
+       } else {
+               size_t colour_n[] = { [1] = 0, [2] = 0 }, i;
+               static size_t last_size = 0;
+
+               if (last_size < n) {
+                       red   = erealloc(red,   n * sizeof(*red));
+                       green = erealloc(green, n * sizeof(*green));
+                       last_size = n;
+               }
+
+               for (i = 0; i < n; i++)
+                       (output[i].colour == 1 ? red : 
green)[colour_n[(int)(output[i].colour)]++] = output[i];
+
+               for (i = 0, n = colour_n[1]; i < n; i++)
+                       printf("\033[31m-%s\033[m\n", red[i].text);
+               for (i = 0, n = colour_n[2]; i < n; i++)
+                       printf("\033[32m+%s\033[m\n", green[i].text);
+       }
+}
+
+int
+main(int argc, char *argv[])
+{
+       struct file_data *old;
+       struct file_data *new;
+       char *path;
+       char trace;
+       struct output *output;
+       size_t n, ai, bi;
+       char **a;
+       char **b;
+       int ret = 0;
+
+       argv0 = argv[0];
+
+       old = load_lines(argv[1]);
+       new = load_lines(argv[2]);
+
+       a = old->lines, b = new->lines;
+       path = diff2(a, b, old->line_count, new->line_count);
+       
+       output = emalloc((old->line_count + new->line_count) * sizeof(*output));
+       for (n = ai = bi = 0; (trace = *--path) != END_OF_PATH;) {
+               if (trace) {
+                       output[n++] = (struct output){ trace, trace == 1 ? 
a[ai] : b[bi] };
+               } else {
+                       flush_output(output, n), n = 0;
+                       output[n++] = (struct output){ trace, a[ai] };
+                       flush_output(output, n), n = 0;
+               }
+               ai += trace != 2;
+               bi += trace != 1;
+       }
+       flush_output(output, n);
+
+       if (fshut(stdout, "<stdout>"))
+               ret = 2;
+
+       free(red);
+       free(green);
+       free(old);
+       free(new);
+       free(path);
+       free(output);
+       return ret;
+}
-- 
2.7.0


Reply via email to