This patch depends on:
"tar: fix long names crashing tar archiving"
https://lists.suckless.org/hackers/2402/19071.html

I just did the simplest possible implementation, in order to
have at least something (it's a feature of 'tar' that I do
actually use in my projects, so I needed it).
Insights for more interesting approaches can be found here:
https://lists.suckless.org/dev/2301/35084.html

This should finally cover the specifications, although one
might then want to support even longer paths (more than 150
(prefix) + 100 (name) for regular files, and more than 100
for links); but that should be discussed I think on dev.
---
 tar.c | 101 +++++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 90 insertions(+), 11 deletions(-)

diff --git a/tar.c b/tar.c
index a2dea4d..a54d66d 100644
--- a/tar.c
+++ b/tar.c
@@ -52,6 +52,16 @@ struct header {
        char prefix[155];
 };
 
+/* List of encountered hardlinks. */
+struct hlink {
+       struct hlink *next;
+       dev_t dev;
+       ino_t ino;
+       char linkname[100];
+};
+struct hlink *hlinklist = NULL;
+struct hlink *hlinktail = NULL;
+
 static struct dirtime {
        char *name;
        time_t mtime;
@@ -182,14 +192,15 @@ archive(const char *path)
        char b[BLKSIZ];
        struct group *gr;
        struct header *h;
+       struct hlink *hlp;
        struct passwd *pw;
        struct stat st;
        size_t chksum, i;
        ssize_t l, r;
        int fd = -1;
-       size_t path_len;
        char tmp_prefix[PATH_MAX];
        char *bsname;
+       int found_hlink = 0;
 
        if (lstat(path, &st) < 0) {
                weprintf("lstat %s:", path);
@@ -205,14 +216,13 @@ archive(const char *path)
        h = (struct header *)b;
        memset(b, 0, sizeof(b));
 
-       path_len = strlen(path);
-       if (path_len > 100 - 1) {
+       if (strlen(path) >= 100) {
                // Cover case where path name is too long (in which case we need
                // to split it to prefix and name).
                bsname = basename((char *)path);
-               strncpy(tmp_prefix, path, PATH_MAX);
+               estrlcpy(tmp_prefix, path, PATH_MAX);
                dirname(tmp_prefix);
-               // Could still be too long to fit in the struct.
+               // Could still be too long to fit in the fields.
                if (strlen(bsname) >= sizeof(h->name) ||
                    strlen(tmp_prefix) >= sizeof(h->prefix)) {
                        eprintf("filename too long: %s\n", path);
@@ -234,11 +244,55 @@ archive(const char *path)
        estrlcpy(h->gname,   gr ? gr->gr_name : "",       sizeof(h->gname));
 
        if (S_ISREG(st.st_mode)) {
-               h->type = REG;
-               putoctal(h->size, (unsigned)st.st_size,  sizeof(h->size));
-               fd = open(path, O_RDONLY);
-               if (fd < 0)
-                       eprintf("open %s:", path);
+               if (st.st_nlink > 1) {
+                       /* It's an hardlink */
+                       for (hlp = hlinklist; hlp; hlp = hlp->next) {
+                               if (hlp->ino == st.st_ino &&
+                                   hlp->dev == st.st_dev) {
+                                       /* Found in our list. */
+                                       found_hlink = 1;
+                                       h->type = HARDLINK;
+                                       putoctal(h->size, 0, sizeof(h->size));
+                                       estrlcpy(
+                                           h->linkname, hlp->linkname,
+                                           sizeof(h->linkname));
+                                       break;
+                               }
+                       }
+                       if (!found_hlink) {
+                               /* Never encountered this hardlink before. Let's
+                                * store it in our list. */
+                               if (strlen(h->prefix) > 0)
+                                       eprintf(
+                                           "filename too long to be able to "
+                                           "store it as a hardlink: %s\n",
+                                           path);
+                               struct hlink *new_hlink =
+                                   ecalloc(1, sizeof(struct hlink));
+                               new_hlink->next = NULL;
+                               new_hlink->dev = st.st_dev;
+                               new_hlink->ino = st.st_ino;
+                               estrlcpy(
+                                   new_hlink->linkname, h->name,
+                                   sizeof(new_hlink->linkname));
+                               if (hlinklist == NULL)
+                                       hlinklist = new_hlink;
+                               else
+                                       hlinktail->next = new_hlink;
+                               hlinktail = new_hlink;
+                       }
+               }
+               /* If it's a regular file OR if it is an hardlink but it's the
+                  first time we encounter it, we need to dump the file content.
+               */
+               if (!found_hlink) {
+                       h->type = REG;
+                       putoctal(
+                           h->size, (unsigned)st.st_size, sizeof(h->size));
+                       fd = open(path, O_RDONLY);
+                       if (fd < 0)
+                               eprintf("open %s:", path);
+               }
        } else if (S_ISDIR(st.st_mode)) {
                h->type = DIRECTORY;
        } else if (S_ISLNK(st.st_mode)) {
@@ -272,6 +326,20 @@ archive(const char *path)
        return 0;
 }
 
+static void
+freehlinklist()
+{
+       struct hlink *hlp = hlinklist;
+       struct hlink *next;
+       while (hlp != NULL) {
+               next = hlp->next;
+               free(hlp);
+               hlp = next;
+       }
+       hlinklist = NULL;
+       hlinktail = NULL;
+}
+
 static int
 unarchive(char *fname, ssize_t l, char b[BLKSIZ])
 {
@@ -383,8 +451,16 @@ skipblk(ssize_t l)
 static int
 print(char *fname, ssize_t l, char b[BLKSIZ])
 {
-       puts(fname);
+       struct header *h = (struct header *)b;
+
+       fputs(fname, stdout);
+       if (vflag && h->linkname[0]) {
+               fputs(" link to ", stdout);
+               fputs(h->linkname, stdout);
+       }
+       fputs("\n", stdout);
        skipblk(l);
+
        return 0;
 }
 
@@ -602,6 +678,9 @@ main(int argc, char *argv[])
                        eprintf("chdir %s:", dir);
                for (; *argv; argc--, argv++)
                        recurse(AT_FDCWD, *argv, NULL, &r);
+
+               freehlinklist();
+
                break;
        case 't':
        case 'x':
-- 
2.34.1


Reply via email to