Hello everyone, While playing with sbase and musl, I encountered a strange bug: tar fails to extract bzip2 compressed archives when bzip2 is compiled against musl (I only tested static linking). Note that bzip2 uncompresses the files correctly. The error only occurs when the data flows through a pipe:
The file has been created using GNU/tar, dynamically compiled against glibc. As a reference, here is a test with the static bzip2 and GNU/tar: If using the bzip2 binary provided with my distro (v1.0.6), everything works as expected: $ /usr/bin/bzip2 -d < pm.tbz | musl/tar -t pm/ pm/IDEAS pm/LICENSE pm/README pm/arg.h pm/makefile pm/pack.5 pm/pm.1 pm/pm.c pm/config.mk As I said earlier, The error occurs only with a pipe in-between, so uncompressing the file completely before extracting it works flawlessly: $ musl/bzip2 -d < pm.tbz > pm.tar $ musl/tar -tf pm.tar pm/ pm/IDEAS pm/LICENSE pm/README pm/arg.h pm/makefile pm/pack.5 pm/pm.1 pm/pm.c pm/config.mk I added some debug messages to `chktar()` in order to identify what is going wrong (see patch attached), and the error changes (apparently) randomly. Sometime, extracting simply stop without an error: $ musl/bzip2 -d < pm.tbz | musl/tar -t pm/ pm/IDEAS pm/LICENSE pm/README Sometimes the magic string is wrong (header is read within a previous entry): $ musl/bzip2 -d < pm.tbz | musl/tar -t pm/ pm/IDEAS pm/LICENSE pm/README pm/arg.h pm/makefile pm/pack.5 pm/pm.1 pm/pm.c chktar: invalid magic "ootfs, datadir, *argv); if (action == ACTION_UPDATE) r += update(rootfs, datadir, *argv); if (action == ACTION_DELETE) r += delete(rootfs, datadir, *argv); argv++; } break; case ACTION_INSPECT: if (inspect(datadir, n) != 0) retu" musl/tar: malformed tar archive And sometimes the magic string is ok, but the checksum fails: $ musl/bzip2 -d < pm.tbz | musl/tar -t pm/ pm/IDEAS pm/LICENSE pm/README pm/arg.h pm/makefile pm/pack.5 pm/pm.1 chktar: bad checksum musl/tar: malformed tar archive At this point, I'm not sure where to look at, or even if the bug really lies in tar and not in bzip2. I checked the size (both octal and converted) and the value is good. So I'm not sure why the headers are not well read. In the case of the archive above, the error occurs on the last entry, but depending on the archives, it can occur on the second entry or in the middle, there's no rule. I hope my example are clear to define what the issue is. Would anyone have an idea? -- willy
diff --git a/tar.c b/tar.c index 71719b0..49ecfae 100644 --- a/tar.c +++ b/tar.c @@ -351,8 +351,10 @@ skipblk(ssize_t l) char b[BLKSIZ]; for (; l > 0; l -= BLKSIZ) - if (!eread(tarfd, b, BLKSIZ)) + if (!eread(tarfd, b, BLKSIZ)) { + fprintf(stderr, "skipblk: failed to read %d bytes\n", BLKSIZ); break; + } } static int @@ -402,16 +404,42 @@ sanitize(struct header *h) } static void +dumpheader(struct header *h) +{ + puts(""); + printf("%s: %s\n", "name", h->name); + printf("%s: %s\n", "mode", h->mode); + printf("%s: %s\n", "uid", h->uid); + printf("%s: %s\n", "gid", h->gid); + printf("%s: %s\n", "size", h->size); + printf("%s: %s\n", "mtime", h->mtime); + printf("%s: %s\n", "chksum", h->chksum); + printf("%s: %c\n", "type", h->type); + printf("%s: %s\n", "linkname", h->linkname); + printf("%s: %s\n", "magic", h->magic); + printf("%s: %s\n", "version", h->version); + printf("%s: %s\n", "uname", h->uname); + printf("%s: %s\n", "gname", h->gname); + printf("%s: %s\n", "major", h->major); + printf("%s: %s\n", "minor", h->minor); + printf("%s: %s\n", "prefix", h->prefix); +} + +static void chktar(struct header *h) { char tmp[8], *err; char *p = (char *)h; long s1, s2, i; - if (h->prefix[0] == '\0' && h->name[0] == '\0') + if (h->prefix[0] == '\0' && h->name[0] == '\0') { + fprintf(stderr, "chktar: empty prefix and name\n"); goto bad; - if (h->magic[0] && strncmp("ustar", h->magic, 5)) + } + if (h->magic[0] && strncmp("ustar", h->magic, 5)) { + fprintf(stderr, "chktar: invalid magic \"%s\"\n", h->magic); goto bad; + } memcpy(tmp, h->chksum, sizeof(tmp)); for (i = 0; i < sizeof(tmp); i++) if (tmp[i] == ' ') @@ -422,8 +450,10 @@ chktar(struct header *h) memset(h->chksum, ' ', sizeof(h->chksum)); for (i = 0, s2 = 0; i < sizeof(*h); i++) s2 += (unsigned char)p[i]; - if (s1 != s2) + if (s1 != s2) { + fprintf(stderr, "chktar: bad checksum\n"); goto bad; + } memcpy(h->chksum, tmp, sizeof(h->chksum)); return; bad: @@ -442,6 +472,8 @@ xt(int argc, char *argv[], int mode) int (*fn)(char *, ssize_t, char[BLKSIZ]) = (mode == 'x') ? unarchive : print; while (eread(tarfd, b, BLKSIZ) > 0 && h->name[0]) { + if (vflag) + dumpheader(h); chktar(h); sanitize(h), n = 0;