The branch main has been updated by pstef:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=3cbf98e2bee91db9ed9118ff557e02cdd449f49a

commit 3cbf98e2bee91db9ed9118ff557e02cdd449f49a
Author:     Piotr Pawel Stefaniak <ps...@freebsd.org>
AuthorDate: 2021-08-22 19:57:13 +0000
Commit:     Piotr Pawel Stefaniak <ps...@freebsd.org>
CommitDate: 2021-08-23 14:09:05 +0000

    diff: read whole files to determine if they are ASCII text
    
    Before this change, only the first BUFSIZE bytes were checked.
    
    Reviewed by:    bapt (previous version)
    Differential Revision:  https://reviews.freebsd.org/D31639
---
 usr.bin/diff/diffreg.c          | 59 +++++++++++++++++++++++++----------------
 usr.bin/diff/tests/diff_test.sh | 14 ++++++++++
 2 files changed, 50 insertions(+), 23 deletions(-)

diff --git a/usr.bin/diff/diffreg.c b/usr.bin/diff/diffreg.c
index 113b7b621256..e728441c2cb2 100644
--- a/usr.bin/diff/diffreg.c
+++ b/usr.bin/diff/diffreg.c
@@ -180,6 +180,8 @@ struct context_vec {
        int     d;              /* end line in new file */
 };
 
+enum readhash { RH_BINARY, RH_OK, RH_EOF };
+
 #define MIN_PAD                1
 static FILE    *opentemp(const char *);
 static void     output(char *, FILE *, char *, FILE *, int);
@@ -188,7 +190,7 @@ static void  range(int, int, const char *);
 static void     uni_range(int, int);
 static void     dump_context_vec(FILE *, FILE *, int);
 static void     dump_unified_vec(FILE *, FILE *, int);
-static void     prepare(int, FILE *, size_t, int);
+static bool     prepare(int, FILE *, size_t, int);
 static void     prune(void);
 static void     equiv(struct line *, int, struct line *, int, int *);
 static void     unravel(int);
@@ -206,7 +208,7 @@ static int   search(int *, int, int);
 static int      skipline(FILE *);
 static int      isqrt(int);
 static int      stone(int *, int, int *, int *, int);
-static int      readhash(FILE *, int);
+static enum readhash readhash(FILE *, int, unsigned *);
 static int      files_differ(FILE *, FILE *, int);
 static char    *match_function(const long *, int, FILE *);
 static char    *preadline(int, size_t, off_t);
@@ -380,14 +382,16 @@ diffreg(char *file1, char *file2, int flags, int capsicum)
                status |= 1;
                goto closem;
        }
-       if ((flags & D_FORCEASCII) == 0 &&
-           (!asciifile(f1) || !asciifile(f2))) {
+       if ((flags & D_FORCEASCII) != 0) {
+               (void)prepare(0, f1, stb1.st_size, flags);
+               (void)prepare(1, f2, stb2.st_size, flags);
+       } else if (!asciifile(f1) || !asciifile(f2) ||
+                   !prepare(0, f1, stb1.st_size, flags) ||
+                   !prepare(1, f2, stb2.st_size, flags)) {
                rval = D_BINARY;
                status |= 1;
                goto closem;
        }
-       prepare(0, f1, stb1.st_size, flags);
-       prepare(1, f2, stb2.st_size, flags);
 
        prune();
        sort(sfile[0], slen[0]);
@@ -511,12 +515,13 @@ splice(char *dir, char *path)
        return (buf);
 }
 
-static void
+static bool
 prepare(int i, FILE *fd, size_t filesize, int flags)
 {
        struct line *p;
-       int h;
-       size_t sz, j;
+       unsigned h;
+       size_t sz, j = 0;
+       enum readhash r;
 
        rewind(fd);
 
@@ -525,15 +530,23 @@ prepare(int i, FILE *fd, size_t filesize, int flags)
                sz = 100;
 
        p = xcalloc(sz + 3, sizeof(*p));
-       for (j = 0; (h = readhash(fd, flags));) {
-               if (j == sz) {
-                       sz = sz * 3 / 2;
-                       p = xreallocarray(p, sz + 3, sizeof(*p));
+       while ((r = readhash(fd, flags, &h)) != RH_EOF)
+               switch (r) {
+               case RH_EOF: /* otherwise clang complains */
+               case RH_BINARY:
+                       return (false);
+               case RH_OK:
+                       if (j == sz) {
+                               sz = sz * 3 / 2;
+                               p = xreallocarray(p, sz + 3, sizeof(*p));
+                       }
+                       p[++j].value = h;
                }
-               p[++j].value = h;
-       }
+
        len[i] = j;
        file[i] = p;
+
+       return (true);
 }
 
 static void
@@ -1350,8 +1363,8 @@ fetch(long *f, int a, int b, FILE *lb, int ch, int 
oldfile, int flags)
 /*
  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
  */
-static int
-readhash(FILE *f, int flags)
+static enum readhash
+readhash(FILE *f, int flags, unsigned *hash)
 {
        int i, t, space;
        unsigned sum;
@@ -1360,6 +1373,9 @@ readhash(FILE *f, int flags)
        space = 0;
        for (i = 0;;) {
                switch (t = getc(f)) {
+               case '\0':
+                       if ((flags & D_FORCEASCII) == 0)
+                               return (RH_BINARY);
                case '\r':
                        if (flags & D_STRIPCR) {
                                t = getc(f);
@@ -1387,18 +1403,15 @@ readhash(FILE *f, int flags)
                        continue;
                case EOF:
                        if (i == 0)
-                               return (0);
+                               return (RH_EOF);
                        /* FALLTHROUGH */
                case '\n':
                        break;
                }
                break;
        }
-       /*
-        * There is a remote possibility that we end up with a zero sum.
-        * Zero is used as an EOF marker, so return 1 instead.
-        */
-       return (sum == 0 ? 1 : sum);
+       *hash = sum;
+       return (RH_OK);
 }
 
 static int
diff --git a/usr.bin/diff/tests/diff_test.sh b/usr.bin/diff/tests/diff_test.sh
index b8b7fe1d8068..4f73b23d686a 100755
--- a/usr.bin/diff/tests/diff_test.sh
+++ b/usr.bin/diff/tests/diff_test.sh
@@ -18,6 +18,7 @@ atf_test_case conflicting_format
 atf_test_case label
 atf_test_case report_identical
 atf_test_case non_regular_file
+atf_test_case binary
 
 simple_body()
 {
@@ -265,6 +266,18 @@ non_regular_file_body()
                diff --label A --label B -u A B
 }
 
+binary_body()
+{
+       # the NUL byte has to be after at least BUFSIZ bytes to trick 
asciifile()
+       yes 012345678901234567890123456789012345678901234567890 | head -n 174 > 
A
+       cp A B
+       printf '\n\0\n' >> A
+       printf '\nx\n' >> B
+
+       atf_check -o inline:"Binary files A and B differ\n" -s exit:1 diff A B
+       atf_check -o inline:"176c\nx\n.\n" -s exit:1 diff -ae A B
+}
+
 atf_init_test_cases()
 {
        atf_add_test_case simple
@@ -285,4 +298,5 @@ atf_init_test_cases()
        atf_add_test_case label
        atf_add_test_case report_identical
        atf_add_test_case non_regular_file
+       atf_add_test_case binary
 }
_______________________________________________
dev-commits-src-main@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main
To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"

Reply via email to