Most tar implementations (GNU, BusyBox, toybox, libarchive) recognize leading spaces as equivalent to leading zeroes in numeric fields. This leads to some archives begin recognized as malformed. This fixes it by replacing leading spaces by leading zeroes in sanitize(). Considering numeric fields are used in chktar(), it was also necessary to reverse their order in xt(). --- tar.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/tar.c b/tar.c index d3a9f3b..9833ae0 100644 --- a/tar.c +++ b/tar.c @@ -382,6 +382,7 @@ static void sanitize(struct header *h) { size_t i, j; + int leading; struct { char *f; size_t l; @@ -399,10 +400,17 @@ sanitize(struct header *h) /* Numeric fields can be terminated with spaces instead of * NULs as per the ustar specification. Patch all of them to * use NULs so we can perform string operations on them. */ + + /* Most tar implementations also recognize leading spaces + * in numeric fields. Some tar archives use ' 's instead of + * '0's for them. So patch those too, so we can correctly + * recognize them. */ for (i = 0; i < LEN(fields); i++) - for (j = 0; j < fields[i].l; j++) + for (leading = 1, j = 0; j < fields[i].l; j++) if (fields[i].f[j] == ' ') - fields[i].f[j] = '\0'; + fields[i].f[j] = leading ? '0' : '\0'; + else + leading = 0; } static void @@ -454,8 +462,8 @@ xt(int argc, char *argv[], int mode) int (*fn)(char *, ssize_t, char[BLKSIZ]) = (mode == 'x') ? unarchive : print; while (eread(tarfd, b, BLKSIZ) > 0 && h->name[0]) { - chktar(h); - sanitize(h), n = 0; + sanitize(h); + chktar(h), n = 0; /* small dance around non-null terminated fields */ if (h->prefix[0]) -- 2.43.0