commit:     61550c2bc704224fadc338ae9e774b260d6dfaa4
Author:     Fabian Groffen <grobian <AT> gentoo <DOT> org>
AuthorDate: Thu Aug 28 07:17:13 2025 +0000
Commit:     Fabian Groffen <grobian <AT> gentoo <DOT> org>
CommitDate: Thu Aug 28 07:27:31 2025 +0000
URL:        https://gitweb.gentoo.org/proj/portage-utils.git/commit/?id=61550c2b

libq/tree: implement gtree cache usage

Use gtree cache when available (e.g. generated by q -c).  A small
experiment shows the (unexpected) benefits on an SSD system, doing a scan
over all ebuilds, forcing to read metadata using `qlist -IStv`.

qlist -IStv in this case returns 30321 ebuilds

without gtree, using md5-cache
cold/first run: 7.20s
warm runs: 5.30s 4.81s 5.05s

with gtree (generated by `q -c` in 8.47s)
cold/first run: 0.17s
warm runs: 0.17s 0.17s 0.17s

Most notably the gtree runs are much more stable, which probably is due
to the constrained IO (single stream read from the archive).  Reading
many files from the filesystem incurs high cost, as seen in particular
when avoiding to read any data.

When the -S argument is dropped, and qlist can serve the atoms from the
directories only (e.g. no metadata cache file read is necessary), 0.30s
is taken, while the gtree version requires slighly less, 0.15s, most
likely due to printing overhead (it always reads the cache values, so
there is no difference with the earlier -S run).

Signed-off-by: Fabian Groffen <grobian <AT> gentoo.org>

 libq/tree.c | 328 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 libq/tree.h |   3 +
 2 files changed, 320 insertions(+), 11 deletions(-)

diff --git a/libq/tree.c b/libq/tree.c
index e780fb1..9b65bda 100644
--- a/libq/tree.c
+++ b/libq/tree.c
@@ -79,6 +79,7 @@ tree_open_int(const char *sroot, const char *tdir, bool quiet)
 
 static const char portcachedir_pms[] = "metadata/cache";
 static const char portcachedir_md5[] = "metadata/md5-cache";
+static const char portcachedir_gt1[] = "metadata/repo.gtree.tar";
 static const char portrepo_name[]    = "profiles/repo_name";
 tree_ctx *
 tree_open(const char *sroot, const char *portdir)
@@ -96,6 +97,15 @@ tree_open(const char *sroot, const char *portdir)
        }
 
        /* look for cache trees to speed things up */
+#ifdef HAVE_LIBARCHIVE
+       snprintf(buf, sizeof(buf), "%s/%s", portdir, portcachedir_gt1);
+       ret->subtree = tree_open_gtree_int(sroot, buf, true);
+       if (ret->subtree != NULL) {
+               ret->subtree->treetype = TREE_METADATA_GTREE;
+               return ret;
+       }
+#endif
+
        snprintf(buf, sizeof(buf), "%s/%s", portdir, portcachedir_md5);
        ret->subtree = tree_open_int(sroot, buf, true);
        if (ret->subtree != NULL) {
@@ -166,6 +176,70 @@ tree_open_binpkg(const char *sroot, const char *spkg)
        return ret;
 }
 
+#ifdef HAVE_LIBARCHIVE
+tree_ctx *
+tree_open_gtree_int(const char *sroot, const char *tar, bool quiet)
+{
+       tree_ctx             *ctx = xzalloc(sizeof(*ctx));
+       struct archive       *gt;
+       struct archive_entry *entry;
+
+       ctx->portroot_fd = open(sroot, O_RDONLY | O_CLOEXEC | O_PATH);
+       if (ctx->portroot_fd == -1) {
+               if (!quiet)
+                       warnp("could not open root: %s", sroot);
+               free(ctx);
+               return NULL;
+       }
+
+       /* Skip the leading slash */
+       tar++;
+       if (*tar == '\0')
+               tar = ".";
+       ctx->tree_fd = openat(ctx->portroot_fd, tar, O_RDONLY | O_CLOEXEC);
+       if (ctx->tree_fd == -1) {
+               if (!quiet)
+                       warnp("could not open tree: %s (in root %s)", tar, 
sroot);
+               close(ctx->portroot_fd);
+               free(ctx);
+               return NULL;
+       }
+
+       if (sroot[0] == '/' && sroot[1] == '\0')
+               sroot = "";
+       snprintf(ctx->path, sizeof(ctx->path), "%s/%s", sroot, tar);
+
+       gt = archive_read_new();
+       archive_read_support_format_all(gt);
+       if (archive_read_open_fd(gt, ctx->tree_fd, BUFSIZ) != ARCHIVE_OK ||
+               archive_read_next_header(gt, &entry) != ARCHIVE_OK)
+       {
+               if (!quiet)
+                       warnp("could not open tree: %s (in root %s): %s",
+                                 tar, sroot, archive_error_string(gt));
+               archive_read_free(gt);
+               close(ctx->portroot_fd);
+               close(ctx->tree_fd);
+               free(ctx);
+               return NULL;
+       }
+       if (strcmp(archive_entry_pathname(entry), "gtree-1") != 0) {
+               if (!quiet)
+                       warnp("could not open tree: %s (in root %s): not a 
gtree container",
+                                 tar, sroot);
+               archive_read_free(gt);
+               close(ctx->portroot_fd);
+               close(ctx->tree_fd);
+               free(ctx);
+               return NULL;
+       }
+       /* defer repo for now */
+       archive_read_free(gt);
+
+       return ctx;
+}
+#endif
+
 void
 tree_close(tree_ctx *ctx)
 {
@@ -189,9 +263,10 @@ tree_close(tree_ctx *ctx)
        if (ctx->cache.store != NULL)
                free(ctx->cache.store);
 
-       closedir(ctx->dir);
-       /* closedir() above does this for us: */
-       /* close(ctx->tree_fd); */
+       if (ctx->dir != NULL)
+               closedir(ctx->dir);
+       else if (ctx->tree_fd >= 0)
+               close(ctx->tree_fd); /* closedir() above does this for us */
        close(ctx->portroot_fd);
        if (ctx->do_sort)
                scandir_free(ctx->cat_de, ctx->cat_cnt);
@@ -942,13 +1017,18 @@ tree_read_file_ebuild(tree_pkg_ctx *pkg_ctx)
                                                                continue;
                                                        }
                                                } else {
-                                                       /* implement line 
continuation (\ before newline) */
-                                                       if (esc && (*p == '\n' 
|| *p == '\r'))
+                                                       /* stash everything on 
a single line like
+                                                        * VDB and md5-cache do 
*/
+                                                       if (*p == '\n' || *p == 
'\r')
                                                                *p = ' ';
                                                        esc = false;
                                                }
 
-                                               *w++ = *p++;
+                                               /* collapse sequences of spaces 
*/
+                                               if (*w != ' ' || *p != ' ')
+                                                       *w++ = *p++;
+                                               else
+                                                       p++;
                                        }
                                        if (*p == *q && esc) {
                                                /* escaped, move along */
@@ -1625,17 +1705,17 @@ tree_foreach_packages(tree_ctx *ctx, tree_pkg_cb 
callback, void *priv)
                                        atom->BUILDID = atoi(meta.Q_BUILDID);
                                pkgnamelen = 0;
                                if (meta.Q_PATH != NULL) {
-                                       size_t len = strlen(meta.Q_PATH);
-                                       if (len > sizeof(".tbz2") - 1 &&
-                                               memcmp(meta.Q_PATH + len - 
sizeof(".tbz2") - 1,
+                                       size_t plen = strlen(meta.Q_PATH);
+                                       if (plen > sizeof(".tbz2") - 1 &&
+                                               memcmp(meta.Q_PATH + plen - 
sizeof(".tbz2") - 1,
                                                           ".tbz2", 
sizeof(".tbz2") - 1) == 0)
                                        {
                                                pkgnamelen = snprintf(pkgname, 
sizeof(pkgname),
                                                                                
          "%s.tbz2", atom->PF);
                                                pkgname[pkgnamelen - 
(sizeof(".tbz2") - 1)] = '\0';
                                        }
-                                       if (len > sizeof(".gpkg.tar") - 1 &&
-                                               memcmp(meta.Q_PATH + len - 
sizeof(".gpkg.tar") - 1,
+                                       if (plen > sizeof(".gpkg.tar") - 1 &&
+                                               memcmp(meta.Q_PATH + plen - 
sizeof(".gpkg.tar") - 1,
                                                           ".gpkg.tar", 
sizeof(".gpkg.tar") - 1) == 0)
                                        {
                                                pkgnamelen = snprintf(pkgname, 
sizeof(pkgname),
@@ -1742,6 +1822,222 @@ tree_foreach_packages(tree_ctx *ctx, tree_pkg_cb 
callback, void *priv)
        return ret;
 }
 
+#ifdef HAVE_LIBARCHIVE
+struct tree_gtree_cb_ctx {
+       struct archive *archive;
+};
+static la_ssize_t
+tree_gtree_read_cb(struct archive *a, void *cctx, const void **buf)
+{
+       struct tree_gtree_cb_ctx *ctx = cctx;
+       size_t                    size;
+       la_int64_t                offset;  /* unused */
+       int                       ret;
+
+       ret = archive_read_data_block(ctx->archive, buf, &size, &offset);
+       if (ret == ARCHIVE_EOF)
+               return 0;
+       if (ret != ARCHIVE_OK)
+               return -1;
+       (void)offset;
+       /* at this point I sincerely hope size is not going to be over the
+        * unsigned variant */
+       return (la_ssize_t)size;
+}
+static int
+tree_gtree_close_cb(struct archive *a, void *cctx)
+{
+       /* noop */
+       return ARCHIVE_OK;
+}
+
+static int
+tree_foreach_gtree(tree_ctx *ctx, tree_pkg_cb callback, void *priv)
+{
+       tree_cat_ctx             *cat         = NULL;
+       tree_pkg_ctx              pkg;
+       tree_pkg_meta             meta;
+       depend_atom              *atom        = NULL;
+       struct archive           *outer;
+       struct archive           *inner;
+       struct archive_entry     *entry;
+       struct tree_gtree_cb_ctx  cb_ctx;
+       char                     *p;
+       const depend_atom        *query       = ctx->query_atom;
+       bool                      foundcaches = false;
+       size_t                    len;
+       int                       ret         = 0;
+
+       /* reused for every entry */
+       cat       = xzalloc(sizeof(*cat));
+       cat->ctx  = ctx;
+       cat->name = "";
+
+       /* rewind the outer tar, and re-read it, it's slight overhead that
+        * we have to re-read it entirely, but I cannot find an API to
+        * rewind or use an offset to re-read an entry again, and the
+        * overhead should be small given the outer is uncompressed and it's
+        * the second entry */
+       lseek(ctx->tree_fd, 0, SEEK_SET);
+       outer = archive_read_new();
+       archive_read_support_format_all(outer);  /* don't see why not */
+       if (archive_read_open_fd(outer, ctx->tree_fd, BUFSIZ) != ARCHIVE_OK) {
+               warn("unable to read gtree container: %s",
+                        archive_error_string(outer));
+               archive_read_free(outer);
+               return 1;
+       }
+
+       while (archive_read_next_header(outer, &entry) == ARCHIVE_OK) {
+               const char *fname = archive_entry_pathname(entry);
+               if (fname == NULL)
+                       continue;
+               if (strncmp(fname, "repo.tar", sizeof("repo.tar") - 1) == 0 &&
+                       (fname[sizeof("repo.tar") - 1] == '.' ||
+                        fname[sizeof("repo.tar") - 1] == '\0'))
+                       break;
+               entry = NULL;
+       }
+       if (entry == NULL) {
+               archive_read_free(outer);
+               return 1;
+       }
+
+       /* use wrapper to read straight from this archive */
+       inner = archive_read_new();
+       archive_read_support_format_all(inner);
+       archive_read_support_filter_all(inner);
+       memset(&cb_ctx, 0, sizeof(cb_ctx));
+       cb_ctx.archive = outer;
+       if (archive_read_open(inner, &cb_ctx, NULL,
+                                                 tree_gtree_read_cb,
+                                                 tree_gtree_close_cb) != 
ARCHIVE_OK)
+               warn("unable to read gtree data %s: %s", 
archive_entry_pathname(entry),
+                        archive_error_string(inner));
+
+       while (archive_read_next_header(inner, &entry) == ARCHIVE_OK) {
+               const char *fname = archive_entry_pathname(entry);
+
+               if (fname == NULL)
+                       continue;
+
+               if (strncmp(fname, "caches/", sizeof("caches/") - 1) == 0) {
+                       char *nexttok = NULL;
+
+                       foundcaches = true;
+                       fname += sizeof("caches/") - 1;
+                       atom = atom_explode(fname);
+                       if (query != NULL && atom_compare(atom, query) != EQUAL)
+                               continue;
+
+                       /* ok, we're in business */
+                       len = archive_entry_size(entry);
+                       if (len > ctx->cache.storesize) {
+                               ctx->cache.storesize = len;
+                               ctx->cache.store     = 
xrealloc(ctx->cache.store, len);
+                       }
+                       archive_read_data(inner, ctx->cache.store, 
ctx->cache.storesize);
+
+                       memset(&meta, 0, sizeof(meta));
+                       /* entries are strictly single line, starting with KEY= 
(no
+                        * whitespace) */
+                       for (p = strtok_r(ctx->cache.store, "=", &nexttok);
+                                p != NULL;
+                                p = strtok_r(NULL, "=", &nexttok))
+                       {
+                               char **ptr = NULL;
+
+                               if (1 == 0) {
+                                       /* dummy case for syntax */
+                               }
+#define match2(K,N) \
+                               else if (strcmp(p, #N) == 0) \
+                                       ptr = &meta.Q_##K
+#define match(K)  match2(K,K)
+                               match(DEPEND);
+                               match(RDEPEND);
+                               match(SLOT);
+                               match(SRC_URI);
+                               match(RESTRICT);
+                               match(LICENSE);
+                               match(DESCRIPTION);
+                               match(KEYWORDS);
+                               match(INHERITED);
+                               match(IUSE);
+                               match(CDEPEND);
+                               match(PDEPEND);
+                               match(PROVIDE);
+                               match(EAPI);
+                               match(PROPERTIES);
+                               match(BDEPEND);
+                               match(IDEPEND);
+                               match(DEFINED_PHASES);
+                               match(REQUIRED_USE);
+                               match(CONTENTS);
+                               match(USE);
+                               match(EPREFIX);
+                               match(PATH);
+                               match(BUILDID);
+                               match(SIZE);
+                               match2(_eclasses_, eclasses);
+#undef match
+#undef match2
+
+                               /* always advance to end of line, even when 
nothing
+                                * matched */
+                               p = strtok_r(NULL, "\n", &nexttok);
+                               if (p == NULL)
+                                       break;
+                               if (ptr != NULL)
+                                       *ptr = p;
+                       }
+
+                       pkg.name    = atom->PF;
+                       pkg.slot    = meta.Q_SLOT == NULL ? (char *)"0" : 
meta.Q_SLOT;
+                       pkg.repo    = ctx->repo;
+                       pkg.atom    = atom;
+                       pkg.cat_ctx = cat;
+                       pkg.fd      = -2;  /* intentional, meta has already 
been read */
+
+                       if (strcmp(cat->name, atom->CATEGORY) != 0)
+                       {
+                               if (cat->pkg_ctxs != NULL) {
+                                       atom_implode((depend_atom 
*)cat->pkg_ctxs);
+                                       cat->pkg_ctxs = NULL;
+                               }
+                               cat->name     = atom->CATEGORY;
+                               cat->pkg_ctxs = (tree_pkg_ctx **)atom;  /* for 
name */
+                               atom          = NULL;  /* avoid double free */
+                       }
+
+                       /* do call callback with pkg_atom (populate cat and 
pkg) */
+                       ret |= callback(&pkg, priv);
+
+                       if (atom != NULL)
+                               atom_implode(atom);
+               } else if (foundcaches) {
+                       break;  /* stop searching if we processed all cache 
entries */
+               } else if (ctx->repo == NULL &&
+                                  strcmp(fname, "repository") == 0)
+               {
+                       /* fill in repo, so it can be used when requested */
+                       len = archive_entry_size(entry);
+                       ctx->repo = xmalloc(len);
+                       archive_read_data(inner, ctx->repo, len);
+               }
+       }
+
+       /* ensure we don't free double */
+       ctx->repo = NULL;
+       ctx->do_sort = false;
+       if (ctx->cache.storesize > 0)
+               ctx->cache.store[0] = '\0';
+       free(cat);
+
+       return ret;
+}
+#endif
+
 int
 tree_foreach_pkg(tree_ctx *ctx, tree_pkg_cb callback, void *priv,
                bool sort, const depend_atom *query)
@@ -1760,6 +2056,16 @@ tree_foreach_pkg(tree_ctx *ctx, tree_pkg_cb callback, 
void *priv,
        if (ctx->treetype == TREE_PACKAGES)
                return tree_foreach_packages(ctx, callback, priv);
 
+#ifdef HAVE_LIBARCHIVE
+       /* similar a gtree cache can be read sequentially in one go */
+       if (ctx->treetype == TREE_METADATA_GTREE)
+               return tree_foreach_gtree(ctx, callback, priv);
+       if (ctx->treetype == TREE_EBUILD &&
+               ctx->subtree != NULL &&
+               ctx->subtree->treetype == TREE_METADATA_GTREE)
+               return tree_foreach_gtree(ctx->subtree, callback, priv);
+#endif
+
        ret = 0;
        while ((cat_ctx = tree_next_cat(ctx))) {
                while ((pkg_ctx = tree_next_pkg(cat_ctx))) {

diff --git a/libq/tree.h b/libq/tree.h
index 6f2afdb..1a83c6c 100644
--- a/libq/tree.h
+++ b/libq/tree.h
@@ -32,6 +32,7 @@ struct tree_ctx {
        bool do_sort:1;
        enum {
                TREE_UNSET = 0,
+               TREE_METADATA_GTREE,
                TREE_METADATA_MD5,
                TREE_METADATA_PMS,
                TREE_EBUILD,
@@ -154,6 +155,8 @@ tree_ctx *tree_open(const char *sroot, const char *portdir);
 tree_ctx *tree_open_vdb(const char *sroot, const char *svdb);
 tree_ctx *tree_open_ebuild(const char *sroot, const char *portdir);
 tree_ctx *tree_open_binpkg(const char *sroot, const char *spkg);
+#define tree_open_gtree(R,T) tree_open_gtree_int(R,T,false)
+tree_ctx *tree_open_gtree_int(const char *sroot, const char *tar, bool quiet);
 void tree_close(tree_ctx *ctx);
 tree_cat_ctx *tree_open_cat(tree_ctx *ctx, const char *name);
 void tree_close_cat(tree_cat_ctx *cat_ctx);

Reply via email to