On Sat, Jan 30, 2016 at 4:13 PM, Mattias Andrée <[email protected]> wrote: > New command. Should be POSIX-compliant. > > Extensions to POSIX: > > 1) In directories, sockets are not compared. > POSIX specifies that special devices and FIFO:s > shall never be compared, and that for other types > than these and regular files and directories, it > is implementation-specified. > > 2) Output is coloured when stdout is a tty. > This was added to make it easier to spot errors. > Perhaps this should be removed, but I let it still > just in case. > > There is a comment in the code refering to a post on > the mailing list, for a diff algorithm that chould be > used to improve time and space complexity. However, > this algorithm does not produce a minimal list of > necessary changes, which POSIX specifies that it should > do. In GNU diff, the output is not minimal, even for > short file, unless -d (--minimal) is specified. Some > UNIX-like systems have bdiff that is able to compare > files too big for diff, I assume they produce minimal > output with diff, and use minimal complexity with bdiff. > --- > LICENSE | 1 + > Makefile | 1 + > diff.c | 873 > +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 875 insertions(+) > create mode 100644 diff.c > > diff --git a/LICENSE b/LICENSE > index cb5a797..2a26979 100644 > --- a/LICENSE > +++ b/LICENSE > @@ -59,3 +59,4 @@ Authors/contributors include: > © 2015 Quentin Rameau <[email protected]> > © 2015 Dionysis Grigoropoulos <[email protected]> > © 2015 Wolfgang Corcoran-Mathe <[email protected]> > +© 2016 Mattias Andrée <[email protected]> > diff --git a/Makefile b/Makefile > index 1c09cac..74e071e 100644 > --- a/Makefile > +++ b/Makefile > @@ -89,6 +89,7 @@ BIN =\ > cron\ > cut\ > date\ > + diff\ > dirname\ > du\ > echo\ > diff --git a/diff.c b/diff.c > new file mode 100644 > index 0000000..3c99ae8 > --- /dev/null > +++ b/diff.c > @@ -0,0 +1,873 @@ > +/* See LICENSE file for copyright and license details. */ > +#include <stdio.h> > +#include <fcntl.h> > +#include <stdlib.h> > +#include <string.h> > +#include <unistd.h> > +#include <stdint.h> > +#include <ctype.h> > +#include <time.h> > +#include <errno.h> > +#include <libgen.h> > +#include <dirent.h> > +#include <sys/stat.h> > + > +#include "arg.h" > +#include "util.h" > + > +/* > + * Lines that only appear in file-1 are marked 1. > + * Lines that only appear in file-2 are marked 2. > + * Lines that appear in both files are marked 0. > + */ > + > +#define END_OF_PATH 127 > +#define NO_LF_MARK "\n\033[7m\\ No newline at end of file\033[27m" > + > +#undef EXIT_FAILURE > +#define EXIT_FAILURE 2 > + > +#define emalloc(...) enmalloc(EXIT_FAILURE, __VA_ARGS__) > +#define erealloc(...) enrealloc(EXIT_FAILURE, __VA_ARGS__) > +#define eprintf(...) enprintf(EXIT_FAILURE, __VA_ARGS__) > +#define eperror(...) (perror(__VA_ARGS__), exit(EXIT_FAILURE)) > + > +#define CLASSIFY(f) (!(f) ? "directory" : (f)->is_empty ? "regular empty > file" : "regular file") > +#define BOLD(...) use_colour ? "\033[1m" : "", __VA_ARGS__, use_colour ? > "\033[m" : "" > + > +struct file_data { > + char **lines; > + size_t line_count; /* used as length of `lines[0]` if `is_binary` */ > + int lf_terminated; > + int is_binary; > + int is_empty; > + struct stat attr; > + const char *path; > +}; > + > +struct trace { > + char f; > + int ch; > + size_t d; > + size_t a_len; > + size_t b_len; > +}; > + > +struct chunk { > + size_t ai; > + size_t bi; > + int have_a; > + int have_b; > + struct trace *chunk; > +}; > + > +static int bflag = 0; > +static int cflag = 0; > +static int eflag = 0; > +static int fflag = 0; > +static int uflag = 0; > +static int rflag = 0; > +static int use_colour = 0; > +static size_t n_context = 0; > + > +static void > +usage(void) > +{ > + eprintf("usage: %s [-c | -C n | -e | -f | -u | -U n] [-br] file1 > file2\n", argv0); > +} > + > +static struct file_data * > +load_lines(const char *pathname) > +{ > + int fd, bin = 0; > + char *buffer; > + char *p; > + char *end; > + size_t ptr, size, n; > + ssize_t m; > + struct file_data* rc; > + struct stat attr; > + > + p = strrchr(pathname, '/'); > + if (p && !p[1]) > + return 0; > + > + fd = open(strcmp(pathname, "-") ? pathname : "/dev/stdin", O_RDONLY); > + if (fd == -1) { > + if (errno == EISDIR) > + return 0; > + eperror(pathname); > + } > + > + fstat(fd, &attr); > + if (S_ISDIR(attr.st_mode)) > + return 0; > + > + ptr = 0; > + size = attr.st_blksize ? attr.st_blksize : 8096; > + buffer = emalloc(size + 1); > + for (;;) { > + if (ptr == size) > + buffer = erealloc(buffer, (size <<= 1) + 1); > + m = read(fd, buffer + ptr, size - ptr); > + if (m < 0) > + eperror(pathname); > + if (m == 0) > + break; > + ptr += (size_t)m; > + } > + buffer[ptr] = 0; > + > + for (n = 1, p = buffer;; n += 1) { > + char *lf = strchr(p, '\n'); > + if (!lf) > + break; > + p = lf + 1; > + } > + bin = (strchr(p, '\0') != buffer + ptr); > + > + rc = erealloc(buffer, sizeof(*rc) + (n + 1) * sizeof(char *) + (ptr + > 1 + sizeof(NO_LF_MARK))); > + buffer = ((char *)rc) + sizeof(*rc) + (n + 1) * sizeof(char *); > + memmove(buffer, rc, ptr); > + rc->lines = (char **)((char *)rc + sizeof(*rc)); > + rc->lf_terminated = ptr && buffer[ptr - 1] == '\n'; > + rc->line_count = bin ? ptr : (n -= rc->lf_terminated); > + buffer[ptr - rc->lf_terminated] = 0; > + rc->attr = attr; > + rc->path = pathname; > + rc->is_binary = bin; > + rc->is_empty = (ptr == 0); > + > + close(fd); > + > + rc->lines[bin ? n : 1] = 0; > + if (bin) { > + rc->lines[0] = buffer; > + } else { > + for (ptr = 0, p = buffer; p; p = end) { > + end = strchr(p, '\n'); > + if (end) > + *end++ = 0; > + rc->lines[ptr++] = p; > + } > + } > + > + return rc; > +} > + > +static char * > +rstrip(char *text, char *removed) > +{ > + char *end = strchr(text, '\0'); > + while ((end != text) && isspace(end[-1])) > + end--; > + *removed = *end; > + *end = '\0'; > + return end; > +} > + > +static int > +strcmp_rstrip_a(char *a, char *b) > +{ > + static char *last_a = NULL; > + static char *a_p = NULL; > + static char a_pc = 0; > + if (a != last_a) { > + if (last_a) > + *a_p = a_pc; > + if (a) > + a_p = rstrip(last_a = a, &a_pc); > + } > + return a ? strcmp(a, b) : 0; > +} > + > +/* TODO use <20160128154757.GA20170@debian> when `an` is too large. */ > +static char * > +diff2_(char **a, char **b, size_t an, size_t bn, int (*cmp)(char *, char *)) > +{ > +#define matrix (*matrix) > +#define map (*map) > + char map[an + 1][bn + 1] = emalloc(sizeof(char[an + 1][bn + 1])); > + size_t matrix[2][bn + 1] = ecalloc(1, sizeof(size_t[2][bn + 1])); > + char *rc; > + size_t ai, bi, ri = 0, mi = 0; > + > + memset(map[0], 2, bn + 1); > + > + a--, b--; > + for (ai = 1; ai <= an; ai++) { > + size_t *last = matrix[mi]; > + size_t *this = matrix[mi ^= 1]; > + map[ai][0] = 1; > + for (bi = 1; bi <= bn; bi++) { > + if (!cmp(a[ai], b[bi])) { > + this[bi] = last[bi - 1] + 1; > + map[ai][bi] = 0; > + } else { > + size_t u = last[bi]; > + size_t l = this[bi - 1]; > + this[bi] = l >= u ? l : u; > + map[ai][bi] = 1 + (l >= u); > + } > + } > + } > +#undef matrix > + free(matrix); > + > + rc = emalloc(an + bn + 1); > + rc[ri++] = END_OF_PATH; > + for (ai = an, bi = bn; ai + bi; ri++) { > + rc[ri] = map[ai][bi]; > + ai -= rc[ri] != 2; > + bi -= rc[ri] != 1; > + } > +#undef map > + free(map); > + > + return rc + ri; > +} > + > +static struct trace * > +enhance_trace(char *path) > +{ > + char *p = path; > + size_t len, a_len = 0, b_len = 0, i = 0, d = 0, a = 0, b = 0, j = 0; > + int have_d = 0, ch = 0; > + struct trace *rc; > + > + while (*--p != END_OF_PATH); > + len = (size_t)(path - p); > + rc = ecalloc(len, sizeof(*rc)); > + > + /* Find distance from edits, and mark exchanges. (left-to-right) */ > + for (--len; i < len; i++) { > + rc[i].f = *--path; > + if (rc[i].f) { > + d = 0, have_d = 1; > + ch |= ch ? ch : (3 - rc[i].f); > + if (rc[i].f == ch) > + rc[i].ch = 1; > + } else { > + ch = 0; > + rc[i].d = (have_d ? ++d : SIZE_MAX); > + } > + } > + rc[i].f = END_OF_PATH; > + > + /* Find distance from edits, mark exchanges, and get chunk lengths. > (right-to-left) */ > + for (i = len, d = 0, ch = have_d = 0; i-- > 0;) { > + rc[i].a_len = a_len += (rc[i].f != 2); > + rc[i].b_len = b_len += (rc[i].f != 1); > + if (rc[i].f) { > + d = 0, have_d = 1; > + ch |= ch ? ch : (3 - rc[i].f); > + if (rc[i].f == ch) > + rc[i].ch = 1; > + } else { > + ch = 0; > + if (have_d && (d + 1) < rc[i].d) > + rc[i].d = ++d; > + if (rc[i].d > n_context) > + a_len = b_len = 0; > + } > + } > + > + /* Put removals before additions. */ > + for (i = 0; i < len; i++) { > + if (rc[i].f == 0) { > + while (a--) > + rc[j++].f = 1; > + while (b--) > + rc[j++].f = 2; > + j = i + 1, a = b = 0; > + } else if (rc[i].f == 1) { > + a++; > + } else { > + b++; > + } > + } > + while (a--) > + rc[j++].f = 1; > + while (b--) > + rc[j++].f = 2; > + > + free(p); > + return rc; > +} > + > +static struct trace * > +diff2(char **a, char **b, size_t an, size_t bn, int do_rstrip) > +{ > + size_t skip_start = 0, skip_end = 0; > + char *rc; > + int (*cmp)(char *, char *) = (int (*)(char *, char *))strcmp; > + int transpose = bn < an; > + > + if (do_rstrip) { > + char **lines; > + char _c; > + for (lines = !transpose ? b : a; *lines; lines++) > + rstrip(*lines, &_c); > + cmp = strcmp_rstrip_a; > + } > + > + /* Reduce problem set, by skiping identical head. */ > + for (skip_start = 0;; skip_start++) { > + char *a_elem = a[skip_start]; > + char *b_elem = b[skip_start]; > + if (!a_elem || !b_elem || cmp(a_elem, b_elem)) > + break; > + } > + a += skip_start, an -= skip_start; > + b += skip_start, bn -= skip_start; > + /* Reduce problem set, by skiping identical tail. */ > + for (skip_end = 0; an && bn; an--, bn--, skip_end++) > + if (cmp(a[an - 1], b[bn - 1])) > + break; > + > + rc = !transpose ? diff2_(a, b, an, bn, cmp) : diff2_(b, a, bn, an, > cmp); > + if (transpose) { > + char *path; > + char trace; > + for (path = rc; (trace = *--path) != END_OF_PATH;) > + if (trace) > + *path = 3 - trace; > + } > + > + /* Add skipped part to the path. */ > + if (skip_start || skip_end) { > + char *path = rc; > + size_t path_len; > + while (*--path != END_OF_PATH); > + path_len = (size_t)(rc - path); > + path = erealloc(path, skip_end + path_len + skip_start); > + if (skip_end) { > + memmove(path + skip_end + 1, path + 1, path_len - 1); > + memset(path + 1, 0, skip_end); > + } > + memset(path + skip_end + path_len, 0, skip_start); > + rc = path + skip_end + path_len + skip_start; > + } > + > + return enhance_trace(rc); > +} > + > +static char * > +get_time_string(const struct stat *attr) > +{ > + static char buf[sizeof("0000-00-00 00:00:00.000000000 +0000")]; > + struct tm *tm; > + > + tm = localtime(&(attr->st_mtime)); > + if (tm == NULL) > + eperror("localtime"); > + > +#ifdef st_mtime > + strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S.000000000 %z", tm); > + sprintf(buf + (sizeof("0000-00-00 00:00:00.") - 1), "%09lu", > attr->st_mtim.tv_nsec); > + buf[sizeof("0000-00-00 00:00:00.") - 1 + 9] = ' '; > +#else > + strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S %z", tm); > +#endif > + return buf; > +} > + > +static int > +get_diff_chunks(struct trace *path, size_t an, size_t bn, struct chunk > **head, struct chunk **tail) > +{ > +#define head (*head) > +#define tail (*tail) > + struct trace trace; > + size_t ai, bi; > + int ret = 0, suppressed = 1, have_a = 0, have_b = 0; > + > + head = ecalloc(an + bn + 1, sizeof(*head)); > + tail = head++; > + > + for (ai = bi = 0; (trace = *path++).f != END_OF_PATH;) { > + if (trace.d > n_context) { > + suppressed = 1; > + if (head->chunk) { > + head->have_a = have_a; > + head->have_b = have_b; > + head++; > + } > + have_a = have_b = 0; > + goto next; > + } > + if (suppressed) { > + head->ai = ai; > + head->bi = bi; > + head->chunk = path - 1; > + } > + have_a |= trace.f == 1; > + have_b |= trace.f == 2; > + suppressed = 0; > + next: > + ret |= trace.f != 0; > + ai += trace.f != 2; > + bi += trace.f != 1; > + } > + if (head->chunk) { > + head->have_a = have_a; > + head->have_b = have_b; > + head++; > + } > + > + return ret; > +#undef head > +#undef tail > +} > + > +#define OUTPUT_BEGIN\ > + struct trace *path;\ > + size_t ai, bi;\ > + int ret = 0, have_a = 0, have_b = 0;\ > + struct trace *chunk;\ > + struct trace *chunk_old;\ > + struct chunk *head;\ > + struct chunk *tail;\ > + char **a = old->lines;\ > + char **b = new->lines\ > + > +#define OUTPUT_HEAD(A, B)\ > + printf("%s"A" %s\t%s%s\n", BOLD(old->path, > get_time_string(&(old->attr))));\ > + printf("%s"B" %s\t%s%s\n", BOLD(new->path, > get_time_string(&(new->attr)))) > + > +#define OUTPUT_QUEUE\ > + path = diff2(a, b, old->line_count, new->line_count, bflag);\ > + ret = get_diff_chunks(path, old->line_count, new->line_count, &head, > &tail);\ > + (void) chunk_old;\ > + for (head = tail;;) {\ > + head++;\ > + ai = head->ai;\ > + bi = head->bi;\ > + have_a = head->have_a;\ > + have_b = head->have_b;\ > + chunk = head->chunk;\ > + if (!chunk)\ > + break > + > +#define OUTPUT_STACK\ > + path = diff2(a, b, old->line_count, new->line_count, bflag);\ > + ret = get_diff_chunks(path, old->line_count, new->line_count, &head, > &tail);\ > + (void) chunk_old;\ > + for (;;) {\ > + head--;\ > + ai = head->ai;\ > + bi = head->bi;\ > + have_a = head->have_a;\ > + have_b = head->have_b;\ > + chunk = head->chunk;\ > + if (!chunk)\ > + break > + > +#define OUTPUT_END\ > + }\ > + free(tail);\ > + free(path);\ > + return ret > + > +static int > +output_unified(struct file_data *old, struct file_data *new) > +{ > + struct trace *path; > + struct trace *path_; > + struct trace trace; > + size_t ai, bi; > + char **a; > + char **b; > + int ret = 0; > + int suppressed = 1; > + > + path = diff2(old->lines, new->lines, old->line_count, > new->line_count, bflag); > + path_ = path; > + > + OUTPUT_HEAD("---", "+++"); > + > + a = old->lines, b = new->lines; > + for (ai = bi = 0; (trace = *path++).f != END_OF_PATH;) { > + char f = trace.f; > + if (trace.d > n_context) { > + suppressed = 1; > + goto next; > + } > + if (suppressed) { > + suppressed = 0; > + printf("%s@@ -%zu", use_colour ? "\033[36m" : "", ai > + 1 - !trace.a_len); > + if (trace.a_len != 1) > + printf(",%zu", trace.a_len); > + printf(" +%zu", bi + 1 - !trace.b_len); > + if (trace.b_len != 1) > + printf(",%zu", trace.b_len); > + printf(" @@%s\n", > + use_colour ? "\033[m" : ""); > + } > + if (f == 0) > + printf(" %s\n", a[ai]); > + else if (use_colour) > + printf("\033[3%im%c%s\033[m\n", f, " -+"[(int)f], f > == 1 ? a[ai] : b[bi]); > + else > + printf("%c%s\n", " -+"[(int)f], f == 1 ? a[ai] : > b[bi]); > + next: > + ret |= f != 0; > + ai += f != 2; > + bi += f != 1; > + } > + > + free(path_); > + return ret; > +} > + > +static int > +output_copied(struct file_data *old, struct file_data *new) > +{ > + OUTPUT_BEGIN; > + OUTPUT_HEAD("***", "---"); > + OUTPUT_QUEUE; > +#define PRINT_PART(L, C, S, A)\ > + printf("%s"A" %zu", use_colour ? "\033[1;3"#C"m" : "", L##i + 1 - > (!have_##L));\ > + if (chunk->L##_len > 1)\ > + printf(",%zu", L##i + chunk->L##_len);\ > + printf(" "A"%s\n", use_colour ? "\033[m" : "");\ > + for (; have_##L && chunk->f != END_OF_PATH && chunk->d <= n_context; > chunk++) {\ > + if (chunk->f == 0)\ > + printf(" %s\n", L[L##i]);\ > + else if (chunk->f == (3 - C));\ > + else if (use_colour)\ > + printf("\033[3%im%c %s\033[m\n", chunk->ch ? 3 : C, > S"!"[chunk->ch], L[L##i]);\ > + else\ > + printf("%c %s\n", S"!"[chunk->ch], L[L##i]);\ > + L##i += chunk->f != (3 - C);\ > + } > + > + printf("%s\n", use_colour ? "\033[36m***************\033[m" : > "***************"); > + chunk_old = chunk; > + PRINT_PART(a, 1, "-", "***"); > + chunk = chunk_old; > + PRINT_PART(b, 2, "+", "---"); > +#undef PRINT_PART > + OUTPUT_END; > +} > + > +static int > +output_default(struct file_data *old, struct file_data *new) > +{ > + OUTPUT_BEGIN; > + OUTPUT_QUEUE; > +#define PRINT_PART(L, C, S)\ > + for (; have_##L && chunk->f != END_OF_PATH && chunk->d <= n_context; > chunk++) {\ > + if (chunk->f == 0)\ > + printf(" %s\n", L[L##i]);\ > + else if (chunk->f == (3 - C));\ > + else if (use_colour)\ > + printf("\033[3"#C"m"S" %s\033[m\n", L[L##i]);\ > + else\ > + printf(S" %s\n", L[L##i]);\ > + L##i += chunk->f != (3 - C);\ > + } > + > + printf("%s%zu", use_colour ? "\033[36m" : "", ai + 1 - (!have_a)); > + if (chunk->a_len > 1) > + printf(",%zu", ai + chunk->a_len); > + printf("%c", " dac"[have_a + 2 * have_b]); > + printf("%zu", bi + 1 - (!have_b)); > + if (chunk->b_len > 1) > + printf(",%zu", bi + chunk->b_len); > + printf("%s\n", use_colour ? "\033[m" : ""); > + > + chunk_old = chunk; > + PRINT_PART(a, 1, "<"); > + if (have_a && have_b) > + printf("%s\n", use_colour ? "\033[36m---\033[m" : "---"); > + chunk = chunk_old; > + PRINT_PART(b, 2, ">"); > +#undef PRINT_PART > + OUTPUT_END; > +} > + > +static int > +output_ed(struct file_data *old, struct file_data *new) > +{ > + OUTPUT_BEGIN; > + OUTPUT_STACK; > + if (!have_b) { > + printf("%zud\n", ai + 1); > + } else { > + int have_dot = 0; > + printf("%zu", ai + 1 - (!have_a)); > + if (chunk->a_len > 1) > + printf(",%zu", ai + chunk->a_len); > + printf("%c\n", "ac"[chunk->ch]); > + for (; chunk->f != END_OF_PATH && chunk->d <= n_context; > chunk++) { > + if (chunk->f == 1); > + else if (use_colour) > + printf("\033[3%im%s%s\033[m\n", chunk->ch ? 3 > : 2, > + b[bi][0] == '.' ? "." : "", b[bi]); > + else > + printf("%s%s\n", > + b[bi][0] == '.' ? "." : "", b[bi]); > + have_dot = (chunk->f == 2 && b[bi][0] == '.'); > + if (have_dot) > + printf(".\ns/.//\na\n"); > + bi += chunk->f != 1; > + } > + if (!have_dot) > + printf(".\n"); > + } > + OUTPUT_END; > +} > + > +static int > +output_ed_alternative(struct file_data *old, struct file_data *new) > +{ > + OUTPUT_BEGIN; > + OUTPUT_QUEUE; > + if (!have_b) { > + printf("d%zu\n", ai + 1); > + } else { > + printf("%c%zu", "ac"[chunk->ch], ai + 1 - (!have_a)); > + if (chunk->a_len > 1) > + printf(" %zu", ai + chunk->a_len); > + printf("\n"); > + for (; chunk->f != END_OF_PATH && chunk->d <= n_context; > chunk++) { > + if (chunk->f == 1); > + else if (use_colour) > + printf("\033[3%im%s\033[m\n", chunk->ch ? 3 : > 2, b[bi]); > + else > + printf("%s\n", b[bi]); > + bi += chunk->f != 1; > + } > + printf(".\n"); > + } > + OUTPUT_END; > +} > + > +static int > +do_binaries_differ(struct file_data *old, struct file_data *new) > +{ > +#define TURN_INTO_BINARY(f)\ > + if (!f->is_binary) {\ > + char **lines = f->lines;\ > + size_t len = 0, part_len;\ > + for (; *lines; lines++) {\ > + len += 1 + (part_len = strlen(*lines));\ > + (*lines)[part_len] = '\n';\ > + }\ > + f->line_count = len - !f->lf_terminated;\ > + } > + > + TURN_INTO_BINARY(old); > + TURN_INTO_BINARY(new); > + > + if (old->line_count != new->line_count) > + return 1; > + > + return memcmp(old->lines[0], new->lines[0], old->line_count); > +} > + > +static int > +compare_files(struct file_data *old, struct file_data *new) > +{ > + int ret; > + > + if (old->is_binary || new->is_binary) { > + if (do_binaries_differ(old, new)) { > + printf("Binary files %s and %s differ\n", old->path, > new->path); > + ret = 2; > + } > + return ret; > + } > + > + if (!(eflag || fflag)) { > + if (!old->lf_terminated) > + strcpy(strchr(old->lines[old->line_count - 1], '\0'), > NO_LF_MARK); > + if (!new->lf_terminated) > + strcpy(strchr(new->lines[new->line_count - 1], '\0'), > NO_LF_MARK); > + } > + > + ret = (uflag ? output_unified : > + cflag ? output_copied : > + eflag ? output_ed : > + fflag ? output_ed_alternative : > + output_default)(old, new); > + > + if (eflag || fflag) { > + if (!old->lf_terminated) > + fprintf(stderr, "%s: %s: No newline at end of > file\n\n", argv0, old->path); > + if (!new->lf_terminated) > + fprintf(stderr, "%s: %s: No newline at end of > file\n\n", argv0, new->path); > + ret = (!old->lf_terminated || !new->lf_terminated) ? 2 : ret; > + } > + > + return ret; > +} > + > +static int > +compare_directories(const char *old, const char *new, const char *diff_line) > +{ > +#define GET_FILENAME(buf, i)\ > + (buf = emalloc(strlen(paths[i]) + strlen(file->d_name) + 2),\ > + stpcpy(stpcpy(stpcpy(buf, paths[i]), "/"), file->d_name)) > + > + int ret = 0, r, i = 0, j = 1; > + DIR *dir; > + const char *paths[2] = { old, new }; > + struct dirent *file; > + struct file_data *a; > + struct file_data *b; > + char *b_path; > + char *a_path; > + struct stat a_attr; > + struct stat b_attr; > + > +again: > + dir = opendir(paths[i]); > + if (!dir) > + eperror(paths[i]); > + while ((errno = 0, file = readdir(dir))) { > + if (!strcmp(file->d_name, ".") || !strcmp(file->d_name, "..")) > + continue; > + GET_FILENAME(b_path, j); > + if (access(b_path, F_OK)) { > + printf("%sOnly i %s: %s%s\n", BOLD(paths[i], > file->d_name)); > + ret = ret > 1 ? ret : 1; > + goto next; > + } else if (i == 1) { > + goto next; > + } > + GET_FILENAME(a_path, i); > + > + if (stat(a_path, &a_attr)) > + eperror(a_path); > + if (stat(b_path, &b_attr)) > + eperror(a_path); > + > + if (a_attr.st_dev == b_attr.st_dev && a_attr.st_ino == > b_attr.st_ino) > + goto skip; > + /* POSIX specifies that if a and b refer to the same special > device, > + * there should be no comparision. This seems unnecessary > since it > + * also specifies that special devices and FIFO:s shall not > be compared. > + * We extend this to not compare sockets either. POSIX says > that it > + * is implementation-specified for other types than special > files, > + * FIFO:s, regular files and directories. */ > +#define IS_INCOMMENSURABLE(mode) (S_ISCHR(mode) || S_ISBLK(mode) || > S_ISFIFO(mode) || S_ISSOCK(mode)) > + if (IS_INCOMMENSURABLE(a_attr.st_mode) || > IS_INCOMMENSURABLE(b_attr.st_mode)) > + goto skip; > + > + a = load_lines(a_path); > + b = load_lines(b_path); > + > + if (!a ^ !b) { > + printf("%sFile %s is a %s while file %s is a %s%s\n", > + BOLD(a_path, CLASSIFY(a), b_path, > CLASSIFY(b))); > + ret = ret > 1 ? ret : 1; > + } else if (!a && !b && !rflag) { > + printf("%sCommon subdirectories: %s and %s%s\n", > BOLD(a_path, b_path)); > + ret = ret > 1 ? ret : 1; > + } else if (!a && !b) { > + r = compare_directories(a_path, b_path, diff_line); > + ret = ret > r ? ret : r; > + } else { > + printf("%s%s %s %s%s\n", BOLD(diff_line, a_path, > b_path)); > + r = compare_files(a, b); > + ret = ret > r ? ret : r; > + } > + > + free(a); > + free(b); > + skip: > + free(a_path); > + next: > + free(b_path); > + } > + if (errno) > + eperror("readdir"); > + closedir(dir); > + > + > + if (i) > + return ret; > + i = 1, j = 0; > + goto again; > +} > + > +int > +main(int argc, char *argv[]) > +{ > + struct file_data *old; > + struct file_data *new; > + char *old_proper = 0; > + char *new_proper = 0; > + int ret; > + char *diff_line = 0; > + char *p; > + > + /* Construct the 'diff OPTIONS FILE-1 FILE-2' line used diff:ing > directories. */ > + if (argc > 2) { > + size_t len = 0; > + int i; > + p = strrchr(argv[0], '/'); > + if (p) > + argv[0] = p + 1; > + for (i = 0; i < argc - 2; i++) > + len += strlen(argv[i]) + 1; > + p = diff_line = emalloc(len + 1); > + for (i = 0; i < argc - 2; i++) > + p = stpcpy(stpcpy(p, argv[i]), " "); > + p[-1] = 0; > + } > + > + ARGBEGIN { > + case 'b': bflag++; break; > + case 'c': cflag++; n_context = 3; break; > + case 'C': cflag++; n_context = atol(EARGF(usage())); break; > + case 'e': eflag++; break; > + case 'f': fflag++; break; > + case 'u': uflag++; n_context = 3; break; > + case 'U': uflag++; n_context = atol(EARGF(usage())); break; > + case 'r': rflag++; break; > + default: > + usage(); > + } ARGEND; > + /* Use of `atol` is intentional, '-U -1' and '-C -1' shall display > the entire file. */ > + > + if (argc != 2 || (bflag | rflag) > 1 || cflag + eflag + fflag + uflag > > 1) > + usage(); > + > + use_colour = isatty(STDOUT_FILENO); > + > +redo: > + old = load_lines(old_proper ? old_proper : argv[0]); > + new = load_lines(new_proper ? new_proper : argv[1]); > + > + if ((old_proper || new_proper) && (!old || !new)) { > + printf("%sFile %s is a %s while file %s is a %s%s\n", > + BOLD(old_proper ? old_proper : argv[0], CLASSIFY(old), > + new_proper ? new_proper : argv[1], > CLASSIFY(new))); > + ret = 1; > + } else if (!old && new) { > + old_proper = emalloc(strlen(argv[0]) + strlen(argv[1]) + 2); > + stpcpy(stpcpy(stpcpy(old_proper, argv[0]), "/"), > basename(argv[1])); > + goto redo; > + } else if (old && !new) { > + old_proper = emalloc(strlen(argv[0]) + strlen(argv[1]) + 2); > + stpcpy(stpcpy(stpcpy(old_proper, argv[0]), "/"), > basename(argv[1])); > + goto redo; > + } else if (!old && !new) { > + ret = compare_directories(argv[0], argv[1], diff_line); > + } else { > + ret = compare_files(old, new); > + } > + > +done: > + if (fshut(stdout, "<stdout>")) > + ret = EXIT_FAILURE; > + > + free(old); > + free(new); > + free(old_proper); > + free(new_proper); > + free(diff_line); > + return ret; > +} > -- > 2.7.0 > >
Some notes from me: - I would avoid using variadic arrays, use a sane subset of C99. - Use snprintf, instead of sprintf and the check. - Use strlcpy or snprintf instead of strcpy. - Avoid using the "inline" #defines. - Disable colour output by default, maybe just add a flag for it to explicitly enable it. - We should also allow using stdin for one of the file inputs (so can't "reread" file): diff file.c - Kind regards, Hiltjo
