This started in 2021 and I have no idea what to do with it so here it
is. Let me get back to how it started:
I was looking into the why does it take some time after apt downloaded
the packages before dpkg starts installing them. As it turns out, I had
apt-listchanges installed which decompresses all .deb files looking for
"NEWS". That was the pause. Then dpkg decompresses it again while
installing. So a bit of waste…

Anyway. I though it would speed up things by moving the files of
interest to the front of the archive. Then I realized that it would
probably require a new interface to query this sort of information or
otherwise it would require some heuristic to decide whether or not the
files of interest are to be expected at the front and the kill dpkg
midway. Then I kind of stopped playing with it.
A few days later I remembered that RAR had (has?) this "solid" kind
of archives where it improves compression by grouping files of same kind
(file extension) together. So I altered the original patch a bit and
made this. I tested this by re-compressing a few .debs and the "ordered"
.debs got smaller by a few bytes / KiBs but not by an order of magnitue.
Then I forgot all about it.

I just remember it all while doing openssl-dpkg patches the other week
and just cleaned up the branch.

Now on the serious side:
- Does this look usefull?
- Any suggestion to how to speed apt-listchanges? A special interface or
  better remove the package?

Signed-off-by: Sebastian Andrzej Siewior <sebast...@breakpoint.cc>
---
 lib/dpkg/treewalk.c |  11 +++
 lib/dpkg/treewalk.h |   2 +
 src/deb/build.c     | 192 ++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 191 insertions(+), 14 deletions(-)

diff --git a/lib/dpkg/treewalk.c b/lib/dpkg/treewalk.c
index b9a6207f60f52..6c4bcd6728759 100644
--- a/lib/dpkg/treewalk.c
+++ b/lib/dpkg/treewalk.c
@@ -217,6 +217,17 @@ treenode_cmp(const void *a, const void *b)
                      (*(const struct treenode **)b)->name);
 }
 
+int treenode_sort_nop(struct treenode *dir)
+{
+       size_t i;
+
+       /* Relink the nodes. */
+       for (i = 0; i < dir->down_used - 1; i++)
+               dir->down[i]->next = dir->down[i + 1];
+       dir->down[i]->next = NULL;
+       return 0;
+}
+
 static void
 treenode_sort_down(struct treenode *dir)
 {
diff --git a/lib/dpkg/treewalk.h b/lib/dpkg/treewalk.h
index 16b7a310cf939..918808c4a02da 100644
--- a/lib/dpkg/treewalk.h
+++ b/lib/dpkg/treewalk.h
@@ -54,6 +54,8 @@ struct treewalk_funcs {
        treenode_skip_func *skip;
 };
 
+int treenode_sort_nop(struct treenode *dir);
+
 struct treeroot *
 treewalk_open(const char *rootdir, enum treewalk_options options,
               const struct treewalk_funcs *funcs);
diff --git a/src/deb/build.c b/src/deb/build.c
index 175ee907b1561..9f7f4cc1aae0a 100644
--- a/src/deb/build.c
+++ b/src/deb/build.c
@@ -150,16 +150,166 @@ file_info_list_free(struct file_info *fi)
   }
 }
 
+struct node_array {
+       char **names;
+       unsigned int available;
+       unsigned int used;
+};
+
+static void append_file_node(struct node_array *nodes, const char *name)
+{
+       if (nodes->available == nodes->used) {
+               nodes->available += 512;
+               nodes->names = m_realloc(nodes->names,
+                                        nodes->available * sizeof(char*));
+       }
+       nodes->names[nodes->used] = m_strdup(name);
+       nodes->used++;
+}
+
+static const char ELF_header[] = { 0x7f, 0x45, 0x4c, 0x46 };
+
+static int file_is_ELF(const char *fname)
+{
+       char buf[4];
+       size_t bytes;
+       int fd;
+
+       fd = open(fname, O_RDONLY);
+       if (fd < 0)
+               return 0;
+
+       bytes = read(fd, buf, sizeof(buf));
+       close(fd);
+
+       if (bytes != sizeof(buf))
+               return 0;
+
+       if (!memcmp(buf, ELF_header, sizeof(ELF_header)))
+               return 1;
+
+       return 0;
+}
+
+static int file_has_dot(const char *fname)
+{
+       const char *s;
+       const char *ext;
+
+       s = strrchr(fname, '/');
+       if (!s)
+               return 0;
+
+       ext = strrchr(s, '.');
+       if (ext)
+               return 1;
+       return 0;
+}
+
+#define ARRAY_SIZE(x)  (sizeof(x) / sizeof(*(x)))
+
+static const char *compress_ext[] = {
+       "gz",
+       "xz",
+       "bz2",
+       "zst",
+};
+
+static int file_has_ext_comp(const char *fname)
+{
+       const char *ext;
+       unsigned long i;
+
+       ext = strrchr(fname, '.');
+       if (!ext)
+               return 0;
+       ext++;
+       for (i = 0; i < ARRAY_SIZE(compress_ext); i++) {
+               if (!strcmp(ext, compress_ext[i]))
+                       return 1;
+       }
+       return 0;
+}
+
+static int sort_nodes_simple(const void *p1, const void *p2)
+{
+       const char *f1 = *(const char **)p1;
+       const char *f2 = *(const char **)p2;
+
+       return strcmp(f1, f2);
+}
+
+static int sort_nodes_files_ext(const void *p1, const void *p2)
+{
+       const char *f1 = *(const char **)p1;
+       const char *f2 = *(const char **)p2;
+       const char *e1;
+       const char *e2;
+       int r = 0;
+
+       e1 = strrchr(f1, '.');
+       e2 = strrchr(f2, '.');
+       if (e1 && e2)
+               r = strcmp(e1, e2);
+       if (r)
+               return r;
+
+       return strcmp(f1, f2);
+}
+
+static int sort_nodes_files_name(const void *p1, const void *p2)
+{
+       const char *f1 = *(const char **)p1;
+       const char *f2 = *(const char **)p2;
+       const char *e1;
+       const char *e2;
+       int r = 0;
+
+       e1 = strrchr(f1, '/');
+       e2 = strrchr(f2, '/');
+       if (e1 && e2)
+               r = strcmp(e1, e2);
+       if (r)
+               return r;
+
+       return strcmp(f1, f2);
+}
+
+static void files_sort(struct node_array *na,
+                      int (*compar)(const void *, const void *))
+{
+       qsort(na->names, na->used, sizeof(char *), compar);
+}
+
+static void file_names_write_free(int fd_out, struct node_array *files)
+{
+       unsigned int pos;
+
+       for (pos = 0; pos < files->used; pos++) {
+               if (fd_write(fd_out, files->names[pos],
+                            strlen(files->names[pos]) + 1) < 0)
+                       ohshite(_("Failed to write filename to tar pipe (%s)"), 
_("data member"));
+               free(files->names[pos]);
+       }
+       free(files->names);
+}
+
 static void
 file_treewalk_feed(const char *dir, int fd_out)
 {
   struct treeroot *tree;
   struct treenode *node;
-  struct file_info *fi;
-  struct file_info *symlist = NULL;
-  struct file_info *symlist_end = NULL;
+  struct treewalk_funcs nsort = {
+       .sort = treenode_sort_nop,
+  };
+  struct node_array files_dir = {};
+  struct node_array files_ELF = {};
+  struct node_array files_dot = {};
+  struct node_array files_other = {};
+  struct node_array files_compress = {};
+  struct node_array files_symlink = {};
 
-  tree = treewalk_open(dir, TREEWALK_NONE, NULL);
+  tree = treewalk_open(dir, TREEWALK_NONE, &nsort);
   for (node = treewalk_node(tree); node ; node = treewalk_next(tree)) {
     const char *virtname = treenode_get_virtname(node);
     char *nodename;
@@ -175,23 +325,37 @@ file_treewalk_feed(const char *dir, int fd_out)
     /* We need to reorder the files so we can make sure that symlinks
      * will not appear before their target. */
     if (S_ISLNK(treenode_get_mode(node))) {
-      fi = file_info_new(nodename);
-      file_info_list_append(&symlist, &symlist_end, fi);
+           append_file_node(&files_symlink, nodename);
+    } else if (S_ISDIR(treenode_get_mode(node))) {
+           append_file_node(&files_dir, nodename);
+    } else if (file_is_ELF(treenode_get_pathname(node))) {
+           append_file_node(&files_ELF, nodename);
+    } else if (file_has_dot(nodename)) {
+           if (file_has_ext_comp(nodename)) {
+                   append_file_node(&files_compress, nodename);
+           } else {
+                   append_file_node(&files_dot, nodename);
+           }
     } else {
-      if (fd_write(fd_out, nodename, strlen(nodename) + 1) < 0)
-        ohshite(_("failed to write filename to tar pipe (%s)"),
-                _("data member"));
+           append_file_node(&files_other, nodename);
     }
-
     free(nodename);
   }
   treewalk_close(tree);
 
-  for (fi = symlist; fi; fi = fi->next)
-    if (fd_write(fd_out, fi->fn, strlen(fi->fn) + 1) < 0)
-      ohshite(_("failed to write filename to tar pipe (%s)"), _("data 
member"));
+  files_sort(&files_dir, sort_nodes_simple);
+  files_sort(&files_ELF, sort_nodes_files_ext);
+  files_sort(&files_dot, sort_nodes_files_ext);
+  files_sort(&files_other, sort_nodes_simple);
+  files_sort(&files_compress, sort_nodes_simple);
+  files_sort(&files_symlink, sort_nodes_simple);
 
-  file_info_list_free(symlist);
+  file_names_write_free(fd_out, &files_dir);
+  file_names_write_free(fd_out, &files_ELF);
+  file_names_write_free(fd_out, &files_dot);
+  file_names_write_free(fd_out, &files_other);
+  file_names_write_free(fd_out, &files_compress);
+  file_names_write_free(fd_out, &files_symlink);
 }
 
 static const char *const maintainerscripts[] = {
-- 
2.40.1

Reply via email to