git: c923dda75a23 - stable/13 - libarchive: merge bugfixes from vendor branch

Martin Matuska Sat, 04 May 2024 05:38:54 -0700

The branch stable/13 has been updated by mm:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=c923dda75a2324283d6dbdc6c8ae132767e83086


commit c923dda75a2324283d6dbdc6c8ae132767e83086
Author:     Martin Matuska <m...@freebsd.org>
AuthorDate: 2024-04-30 09:53:06 +0000
Commit:     Martin Matuska <m...@freebsd.org>
CommitDate: 2024-05-04 11:55:21 +0000

    libarchive: merge bugfixes from vendor branch
    
     #2147 archive_string: clean up strncat_from_utf8_to_utf8 (36047967a)
     #2153 archive_match: check archive_read_support_format_raw()
           return value (0ce1b4c38)
     #2154 archive_match: turn counter into flag (287e05d53)
     #2155 lha: Do not allow negative file sizes (93b11caed)
     #2156 tests: setenv LANG to en_US.UTF-8 in bsdunzip test_I.c (83e8b0ea8)
    
    (cherry picked from commit c0b58e65deca1e5e2c434ede7e64f03af6044be8)
---
 contrib/libarchive/libarchive/archive_match.c      | 18 ++--
 .../libarchive/archive_read_support_format_lha.c   |  2 +
 contrib/libarchive/libarchive/archive_string.c     | 96 ++++++++++------------
 contrib/libarchive/unzip/test/test_I.c             |  8 ++
 4 files changed, 61 insertions(+), 63 deletions(-)

diff --git a/contrib/libarchive/libarchive/archive_match.c 
b/contrib/libarchive/libarchive/archive_match.c
index fc8a4ce8127b..3ab8eda36038 100644
--- a/contrib/libarchive/libarchive/archive_match.c
+++ b/contrib/libarchive/libarchive/archive_match.c
@@ -46,7 +46,7 @@
 
 struct match {
        struct match            *next;
-       int                      matches;
+       int                      matched;
        struct archive_mstring   pattern;
 };
 
@@ -605,7 +605,8 @@ add_pattern_from_file(struct archive_match *a, struct 
match_list *mlist,
                return (ARCHIVE_FATAL);
        }
        r = archive_read_support_format_raw(ar);
-       r = archive_read_support_format_empty(ar);
+       if (r == ARCHIVE_OK)
+               r = archive_read_support_format_empty(ar);
        if (r != ARCHIVE_OK) {
                archive_copy_error(&(a->archive), ar);
                archive_read_free(ar);
@@ -724,12 +725,12 @@ path_excluded(struct archive_match *a, int mbs, const 
void *pathname)
        matched = NULL;
        for (match = a->inclusions.first; match != NULL;
            match = match->next){
-               if (match->matches == 0 &&
+               if (!match->matched &&
                    (r = match_path_inclusion(a, match, mbs, pathname)) != 0) {
                        if (r < 0)
                                return (r);
                        a->inclusions.unmatched_count--;
-                       match->matches++;
+                       match->matched = 1;
                        matched = match;
                }
        }
@@ -752,11 +753,10 @@ path_excluded(struct archive_match *a, int mbs, const 
void *pathname)
        for (match = a->inclusions.first; match != NULL;
            match = match->next){
                /* We looked at previously-unmatched inclusions already. */
-               if (match->matches > 0 &&
+               if (match->matched &&
                    (r = match_path_inclusion(a, match, mbs, pathname)) != 0) {
                        if (r < 0)
                                return (r);
-                       match->matches++;
                        return (0);
                }
        }
@@ -879,7 +879,7 @@ match_list_unmatched_inclusions_next(struct archive_match 
*a,
        for (m = list->unmatched_next; m != NULL; m = m->next) {
                int r;
 
-               if (m->matches)
+               if (m->matched)
                        continue;
                if (mbs) {
                        const char *p;
@@ -1793,7 +1793,7 @@ match_owner_name_mbs(struct archive_match *a, struct 
match_list *list,
                    < 0 && errno == ENOMEM)
                        return (error_nomem(a));
                if (p != NULL && strcmp(p, name) == 0) {
-                       m->matches++;
+                       m->matched = 1;
                        return (1);
                }
        }
@@ -1814,7 +1814,7 @@ match_owner_name_wcs(struct archive_match *a, struct 
match_list *list,
                    < 0 && errno == ENOMEM)
                        return (error_nomem(a));
                if (p != NULL && wcscmp(p, name) == 0) {
-                       m->matches++;
+                       m->matched = 1;
                        return (1);
                }
        }
diff --git a/contrib/libarchive/libarchive/archive_read_support_format_lha.c 
b/contrib/libarchive/libarchive/archive_read_support_format_lha.c
index 4d6290ac33bb..ae5a1d7d668e 100644
--- a/contrib/libarchive/libarchive/archive_read_support_format_lha.c
+++ b/contrib/libarchive/libarchive/archive_read_support_format_lha.c
@@ -1347,6 +1347,8 @@ lha_read_file_extended_header(struct archive_read *a, 
struct lha *lha,
                                lha->compsize = archive_le64dec(extdheader);
                                extdheader += sizeof(uint64_t);
                                lha->origsize = archive_le64dec(extdheader);
+                               if (lha->compsize < 0 || lha->origsize < 0)
+                                       goto invalid;
                        }
                        break;
                case EXT_CODEPAGE:
diff --git a/contrib/libarchive/libarchive/archive_string.c 
b/contrib/libarchive/libarchive/archive_string.c
index f39677ad7a26..be6c39600d72 100644
--- a/contrib/libarchive/libarchive/archive_string.c
+++ b/contrib/libarchive/libarchive/archive_string.c
@@ -2640,81 +2640,69 @@ unicode_to_utf16le(char *p, size_t remaining, uint32_t 
uc)
 }
 
 /*
- * Copy UTF-8 string in checking surrogate pair.
- * If any surrogate pair are found, it would be canonicalized.
+ * Append new UTF-8 string to existing UTF-8 string.
+ * Existing string is assumed to already be in proper form;
+ * the new string will have invalid sequences replaced and
+ * surrogate pairs canonicalized.
  */
 static int
-strncat_from_utf8_to_utf8(struct archive_string *as, const void *_p,
+strncat_from_utf8_to_utf8(struct archive_string *as, const void *_src,
     size_t len, struct archive_string_conv *sc)
 {
-       const char *s;
-       char *p, *endp;
-       int n, ret = 0;
-
+       int ret = 0;
+       const char *src = _src;
        (void)sc; /* UNUSED */
 
+       /* Pre-extend the destination */
        if (archive_string_ensure(as, as->length + len + 1) == NULL)
                return (-1);
 
-       s = (const char *)_p;
-       p = as->s + as->length;
-       endp = as->s + as->buffer_length -1;
-       do {
+       /* Invariant: src points to the first UTF8 byte that hasn't
+        * been copied to the destination `as`. */
+       for (;;) {
+               int n;
                uint32_t uc;
-               const char *ss = s;
-               size_t w;
+               const char *e = src;
 
-               /*
-                * Forward byte sequence until a conversion of that is needed.
-                */
-               while ((n = utf8_to_unicode(&uc, s, len)) > 0) {
-                       s += n;
+               /* Skip UTF-8 sequences until we reach end-of-string or
+                * a code point that needs conversion. */
+               while ((n = utf8_to_unicode(&uc, e, len)) > 0) {
+                       e += n;
                        len -= n;
                }
-               if (ss < s) {
-                       if (p + (s - ss) > endp) {
-                               as->length = p - as->s;
-                               if (archive_string_ensure(as,
-                                   as->buffer_length + len + 1) == NULL)
-                                       return (-1);
-                               p = as->s + as->length;
-                               endp = as->s + as->buffer_length -1;
-                       }
-
-                       memcpy(p, ss, s - ss);
-                       p += s - ss;
+               /* Copy the part that doesn't need conversion */
+               if (e > src) {
+                       if (archive_string_append(as, src, e - src) == NULL)
+                               return (-1);
+                       src = e;
                }
 
-               /*
-                * If n is negative, current byte sequence needs a replacement.
-                */
-               if (n < 0) {
+               if (n == 0) {
+                       /* We reached end-of-string */
+                       return (ret);
+               } else {
+                       /* Next code point needs conversion */
+                       char t[4];
+                       size_t w;
+
+                       /* Try decoding a surrogate pair */
                        if (n == -3 && IS_SURROGATE_PAIR_LA(uc)) {
-                               /* Current byte sequence may be CESU-8. */
-                               n = cesu8_to_unicode(&uc, s, len);
+                               n = cesu8_to_unicode(&uc, src, len);
                        }
+                       /* Not a (valid) surrogate, so use a replacement char */
                        if (n < 0) {
-                               ret = -1;
-                               n *= -1;/* Use a replaced unicode character. */
-                       }
-
-                       /* Rebuild UTF-8 byte sequence. */
-                       while ((w = unicode_to_utf8(p, endp - p, uc)) == 0) {
-                               as->length = p - as->s;
-                               if (archive_string_ensure(as,
-                                   as->buffer_length + len + 1) == NULL)
-                                       return (-1);
-                               p = as->s + as->length;
-                               endp = as->s + as->buffer_length -1;
+                               ret = -1; /* Return -1 if we used any 
replacement */
+                               n *= -1;
                        }
-                       p += w;
-                       s += n;
+                       /* Consume converted code point */
+                       src += n;
                        len -= n;
+                       /* Convert and append new UTF-8 sequence. */
+                       w = unicode_to_utf8(t, sizeof(t), uc);
+                       if (archive_string_append(as, t, w) == NULL)
+                               return (-1);
                }
-       } while (n > 0);
-       as->length = p - as->s;
-       as->s[as->length] = '\0';
-       return (ret);
+       }
 }
 
 static int
diff --git a/contrib/libarchive/unzip/test/test_I.c 
b/contrib/libarchive/unzip/test/test_I.c
index 5d31ce8d1611..d189edca1a5c 100644
--- a/contrib/libarchive/unzip/test/test_I.c
+++ b/contrib/libarchive/unzip/test/test_I.c
@@ -33,6 +33,7 @@
 DEFINE_TEST(test_I)
 {
        const char *reffile = "test_I.zip";
+       const char *lang;
        int r;
 
 #if HAVE_SETLOCALE
@@ -44,6 +45,8 @@ DEFINE_TEST(test_I)
        skipping("setlocale() not available on this system.");
 #endif
 
+       lang = getenv("LANG");
+       setenv("LANG", "en_US.UTF-8", 1);
        extract_reference_file(reffile);
        r = systemf("%s -I UTF-8 %s >test.out 2>test.err", testprog, reffile);
        assertEqualInt(0, r);
@@ -51,4 +54,9 @@ DEFINE_TEST(test_I)
        assertEmptyFile("test.err");
 
        assertTextFileContents("Hello, World!\n", "Γειά σου Κόσμε.txt");
+
+       if (lang == NULL)
+               unsetenv("LANG");
+       else
+               setenv("LANG", lang, 1);
 }

git: c923dda75a23 - stable/13 - libarchive: merge bugfixes from vendor branch

Reply via email to