1. A fix for a bug in the unarchive() function (causing metadata loss when used on large tar files). This bug is due to existing code continuing to check for h->type == HARDLINK and SOFTLINK (near end of function), when the entire header block has already been overwritten by a call to eread() prior to the h->type checks.
2. Long (>=256 byte) file name compatibility: 'L' style long file names were extremely simple to add, requiring a (net) addition of a handful lines of code, and this patch supports both extracting and creating L-style tar archives. Pax 'x' style long names are more complex to parse, and this patch only supports extracting pax 'x' tars, but not creating them. 3. Command line argument compatibility improvements: 'c', 'x', 't' args are accepted without needing a hyphen in front. The '-p' flag is also accepted but is no-op (as it is the normal behaviour of sbase tar anyway, and allows sbase tar to be used in scripts specifying this flag). Directory tree member extraction is also supported by this patch. 4. Handle tar archives with "." (current directory) entries. Some archives contain "." or "./" entries, causing error reports when the current code tries to remove() the current dir. I have added a check in unarchive() to not perform remove() on encountering these entries. --- tar.c | 203 ++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 113 insertions(+), 90 deletions(-) diff --git a/tar.c b/tar.c index 6a9ab11..b28d419 100644 --- a/tar.c +++ b/tar.c @@ -19,7 +19,7 @@ #include "fs.h" #include "util.h" -#define BLKSIZ 512 +#define BLKSIZ (sizeof (struct header)) enum Type { REG = '0', @@ -50,6 +50,7 @@ struct header { char major[8]; char minor[8]; char prefix[155]; + char padding[12]; }; static struct dirtime { @@ -169,6 +170,17 @@ ewrite(int fd, const void *buf, size_t n) return r; } +static long +chksum(struct header *h) +{ + long s2, i; + + memset(h->chksum, ' ', sizeof(h->chksum)); + for (i = 0, s2 = 0; i < sizeof(*h); i++) + s2 += *((unsigned char *)h + i); + return s2; +} + static void putoctal(char *dst, unsigned num, int size) { @@ -179,14 +191,16 @@ putoctal(char *dst, unsigned num, int size) static int archive(const char *path) { - char b[BLKSIZ]; - const char *base, *p; - struct group *gr; - struct header *h; + static const struct header blank = { + "././@LongLink", "0000000", "0000000", "0000000", "00000000000", + "00000000000", " ", AREG, "", "ustar", "00", + }; + char b[BLKSIZ + BLKSIZ], *p; + struct header *h = (struct header *)b; + struct group *gr; struct passwd *pw; struct stat st; - size_t chksum, i, nlen, plen; - ssize_t l, r; + ssize_t l, n, r; int fd = -1; if (lstat(path, &st) < 0) { @@ -196,47 +210,37 @@ archive(const char *path) weprintf("ignoring %s\n", path); return 0; } - pw = getpwuid(st.st_uid); gr = getgrgid(st.st_gid); - h = (struct header *)b; - memset(b, 0, sizeof(b)); - - plen = 0; - base = path; - if ((nlen = strlen(base)) >= sizeof(h->name)) { - /* - * Cover case where path name is too long (in which case we - * need to split it to prefix and name). - */ - if ((base = strrchr(path, '/')) == NULL) - goto too_long; - for (p = base++; p > path && *p == '/'; --p) - ; - - nlen -= base - path; - plen = p - path + 1; - if (nlen >= sizeof(h->name) || plen >= sizeof(h->prefix)) - goto too_long; + *h = blank; + n = strlcpy(h->name, path, sizeof(h->name)); + if (n >= sizeof(h->name)) { + *++h = blank; + h->type = 'L'; + putoctal(h->size, n, sizeof(h->size)); + putoctal(h->chksum, chksum(h), sizeof(h->chksum)); + + ewrite(tarfd, (char *)h, BLKSIZ); + for (p = (char *)path; n > 0; n -= BLKSIZ, p += BLKSIZ) { + if (n < BLKSIZ) { + p = memcpy(h--, p, n); + memset(p + n, 0, BLKSIZ - n); + } + ewrite(tarfd, p, BLKSIZ); + } } - memcpy(h->name, base, nlen); - memcpy(h->prefix, path, plen); - putoctal(h->mode, (unsigned)st.st_mode & 0777, sizeof(h->mode)); putoctal(h->uid, (unsigned)st.st_uid, sizeof(h->uid)); putoctal(h->gid, (unsigned)st.st_gid, sizeof(h->gid)); - putoctal(h->size, 0, sizeof(h->size)); putoctal(h->mtime, (unsigned)st.st_mtime, sizeof(h->mtime)); - memcpy( h->magic, "ustar", sizeof(h->magic)); - memcpy( h->version, "00", sizeof(h->version)); estrlcpy(h->uname, pw ? pw->pw_name : "", sizeof(h->uname)); estrlcpy(h->gname, gr ? gr->gr_name : "", sizeof(h->gname)); if (S_ISREG(st.st_mode)) { h->type = REG; - putoctal(h->size, (unsigned)st.st_size, sizeof(h->size)); + putoctal(h->size, st.st_size, sizeof(h->size)); fd = open(path, O_RDONLY); if (fd < 0) eprintf("open %s:", path); @@ -255,10 +259,7 @@ archive(const char *path) h->type = FIFO; } - memset(h->chksum, ' ', sizeof(h->chksum)); - for (i = 0, chksum = 0; i < sizeof(*h); i++) - chksum += (unsigned char)b[i]; - putoctal(h->chksum, chksum, sizeof(h->chksum)); + putoctal(h->chksum, chksum(h), sizeof(h->chksum)); ewrite(tarfd, b, BLKSIZ); if (fd != -1) { @@ -271,24 +272,21 @@ archive(const char *path) } return 0; - -too_long: - eprintf("filename too long: %s\n", path); } static int unarchive(char *fname, ssize_t l, char b[BLKSIZ]) { - char lname[101], *tmp, *p; - long mode, major, minor, type, mtime, uid, gid; struct header *h = (struct header *)b; - int fd = -1; struct timespec times[2]; + char lname[101], *tmp, *p; + long mode, major, minor, type, mtime, uid, gid; + int fd = -1, lnk = h->type == HARDLINK; if (!mflag && ((mtime = strtol(h->mtime, &p, 8)) < 0 || *p != '\0')) eprintf("strtol %s: invalid number\n", h->mtime); - if (remove(fname) < 0 && errno != ENOENT) - weprintf("remove %s:", fname); + if (strcmp(fname, ".") && strcmp(fname, "./") && remove(fname) < 0) + if (errno != ENOENT) weprintf("remove %s:", fname); tmp = estrdup(fname); mkdirp(dirname(tmp), 0777, 0777); @@ -308,10 +306,9 @@ unarchive(char *fname, ssize_t l, char b[BLKSIZ]) case SYMLINK: snprintf(lname, sizeof(lname), "%.*s", (int)sizeof(h->linkname), h->linkname); - if (((h->type == HARDLINK) ? link : symlink)(lname, fname) < 0) - eprintf("%s %s -> %s:", - (h->type == HARDLINK) ? "link" : "symlink", - fname, lname); + if ((lnk ? link:symlink)(lname, fname) < 0) + eprintf("%s %s -> %s:", lnk ? "link" : "symlink", fname, lname); + lnk++; break; case DIRECTORY: if ((mode = strtol(h->mode, &p, 8)) < 0 || *p != '\0') @@ -354,14 +351,14 @@ unarchive(char *fname, ssize_t l, char b[BLKSIZ]) close(fd); } - if (h->type == HARDLINK) + if (lnk > 1) return 0; times[0].tv_sec = times[1].tv_sec = mtime; times[0].tv_nsec = times[1].tv_nsec = 0; if (!mflag && utimensat(AT_FDCWD, fname, times, AT_SYMLINK_NOFOLLOW) < 0) weprintf("utimensat %s:", fname); - if (h->type == SYMLINK) { + if (lnk) { if (!getuid() && lchown(fname, uid, gid)) weprintf("lchown %s:", fname); } else { @@ -435,9 +432,9 @@ sanitize(struct header *h) static void chktar(struct header *h) { - char tmp[8], *err, *p = (char *)h; const char *reason; - long s1, s2, i; + char tmp[8], *err; + long s1, i; if (h->prefix[0] == '\0' && h->name[0] == '\0') { reason = "empty filename"; @@ -457,10 +454,7 @@ chktar(struct header *h) reason = "invalid checksum"; goto bad; } - memset(h->chksum, ' ', sizeof(h->chksum)); - for (i = 0, s2 = 0; i < sizeof(*h); i++) - s2 += (unsigned char)p[i]; - if (s1 != s2) { + if (s1 != chksum(h)) { reason = "incorrect checksum"; goto bad; } @@ -473,45 +467,68 @@ bad: static void xt(int argc, char *argv[], int mode) { - char b[BLKSIZ], fname[256 + 1], *p; + long size, n, l; + char b[BLKSIZ], fname[l = PATH_MAX], *p, *q = NULL; + int i; + int (*fn)(char *, ssize_t, char[BLKSIZ]) = (mode == 'x') ? unarchive : print; struct timespec times[2]; struct header *h = (struct header *)b; struct dirtime *dirtime; - long size; - int i, n; - int (*fn)(char *, ssize_t, char[BLKSIZ]) = (mode == 'x') ? unarchive : print; - while (eread(tarfd, b, BLKSIZ) > 0 && (h->name[0] || h->prefix[0])) { + while (eread(tarfd, b, BLKSIZ) > 0 && h->name[0]) { chktar(h); - sanitize(h), n = 0; - - /* small dance around non-null terminated fields */ - if (h->prefix[0]) - n = snprintf(fname, sizeof(fname), "%.*s/", - (int)sizeof(h->prefix), h->prefix); - snprintf(fname + n, sizeof(fname) - n, "%.*s", - (int)sizeof(h->name), h->name); + sanitize(h); if ((size = strtol(h->size, &p, 8)) < 0 || *p != '\0') - eprintf("strtol %s: invalid number\n", h->size); + eprintf("strtol %s: invalid size\n", h->size); - if (argc) { - /* only extract the given files */ - for (i = 0; i < argc; i++) - if (!strcmp(argv[i], fname)) + /* long file path is read direcly into fname*/ + if (h->type == 'L' || h->type == 'x' || h->type == 'g') { + + /* read header only up to size of fname buffer */ + for (q = fname; q < fname+size; q += BLKSIZ) { + eread(tarfd, q, BLKSIZ); + if (q + BLKSIZ > fname + l) + eprintf("name exceeds buffer: %s\n", l, fname); + } + + /* convert pax x header with 'path=' field into L header */ + if (h->type == 'x') for (q = fname; q < fname+size-16; q += n) { + if ((n = strtol(q, &p, 10)) < 0 || *p != ' ') + eprintf("strtol %.*s: invalid number\n", p+1-q, q); + if (n && strncmp(p+1, "path=", 5) == 0) { + memmove(fname, p+6, size = q+n - p-6 - 1); + h->type = 'L'; break; + } + } + fname[size] = '\0'; + + /* non L-like header (eg. pax 'g') is skipped by setting q=null */ + if (h->type != 'L') q = NULL; + continue; + } + + /* ustar path is copied into fname if no L header (ie: q is NULL) */ + if (q) q = NULL; else { + n = !h->prefix[0] ? 0 : + snprintf(fname, l, "%.*s/", (int)sizeof h->prefix, h->prefix); + snprintf(fname + n, l - n, "%.*s", (int)sizeof h->name, h->name); + } + + /* if argc > 0 then only extract the given files */ + if (argc) { + for (i = 0; i < argc; i++) { + n = strlen(argv[i]); + if (!strncmp(argv[i], fname, n)) + if (strchr("/", fname[n]) || argv[i][n-1] == '/') break; + } if (i == argc) { skipblk(size); continue; } } - /* ignore global pax header craziness */ - if (h->type == 'g' || h->type == 'x') { - skipblk(size); - continue; - } - fn(fname, size, b); if (vflag && mode != 't') puts(fname); @@ -530,12 +547,15 @@ xt(int argc, char *argv[], int mode) } } +char **args; +int argn; + static void usage(void) { - eprintf("usage: %s [-C dir] [-J | -Z | -a | -j | -z] -x [-m | -t] " + eprintf("usage: %s [x | t | -x | -t] [-C dir] [-J | -Z | -a | -j | -z] [-m] [-p] " "[-f file] [file ...]\n" - " %s [-C dir] [-J | -Z | -a | -j | -z] [-h] -c path ... " + " %s [c | -c] [-C dir] [-J | -Z | -a | -j | -z] [-h] path ... " "[-f file]\n", argv0, argv0); } @@ -547,6 +567,10 @@ main(int argc, char *argv[]) char *file = NULL, *dir = ".", mode = '\0'; int fd; + argv0 = argv[0]; + if (argc > 1 && strchr("cxt", mode = *argv[1])) + *(argv[1]+1) ? *argv[1] = '-' : (*++argv = argv0, --argc); + ARGBEGIN { case 'x': case 'c': @@ -576,18 +600,15 @@ main(int argc, char *argv[]) case 'v': vflag = 1; break; + case 'p': + break; /* Do nothing as already default behaviour */ default: usage(); } ARGEND - if (!mode) - usage(); - if (mode == 'c') - if (!argc) - usage(); - switch (mode) { case 'c': + if (!argc) usage(); tarfd = 1; if (file && *file != '-') { tarfd = open(file, O_WRONLY | O_TRUNC | O_CREAT, 0644); @@ -626,6 +647,8 @@ main(int argc, char *argv[]) eprintf("chdir %s:", dir); xt(argc, argv, mode); break; + default: + usage(); } return recurse_status; -- 2.49.0