commit:     b471c5cb82debe479b2021d7adfffe9e228ac896
Author:     Arthur Zamarin <arthurzam <AT> gentoo <DOT> org>
AuthorDate: Fri May  2 09:52:35 2025 +0000
Commit:     Arthur Zamarin <arthurzam <AT> gentoo <DOT> org>
CommitDate: Fri May  2 10:03:31 2025 +0000
URL:        
https://gitweb.gentoo.org/proj/pkgcore/pkgcheck.git/commit/?id=b471c5cb

caches: support compression of cache files

profiles.pickle has become quite fat on disk, getting to 185MB in size.
More information on the source of issue can be found in the issue linked
below, but I've decided to use "zstd -T0" (with default compression level)
to compress the cache files. This should help with the size of the
cache files, and the performance hit should be negligible.

I've measured the time it takes to load the cache files before and after
this change, and the difference is nil. The time is mostly the cost of
pickle.load, and the compression/decompression is negligible in comparison.

I'm still somewhat concerned about my usage of subprocess.Popen, but I
think it's fine.

Resolves: https://github.com/pkgcore/pkgcheck/issues/735
Signed-off-by: Arthur Zamarin <arthurzam <AT> gentoo.org>

 src/pkgcheck/addons/caches.py   | 30 ++++++++++++++++++++++++------
 src/pkgcheck/addons/profiles.py |  2 +-
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/src/pkgcheck/addons/caches.py b/src/pkgcheck/addons/caches.py
index 9cd13e58..a6c94b73 100644
--- a/src/pkgcheck/addons/caches.py
+++ b/src/pkgcheck/addons/caches.py
@@ -5,6 +5,7 @@ import os
 import pathlib
 import pickle
 import shutil
+import subprocess
 from collections import UserDict
 from dataclasses import dataclass
 from hashlib import blake2b
@@ -79,11 +80,21 @@ class CachedAddon(Addon):
         dirname = f"{repo.repo_id.lstrip(os.sep)}-{token}"
         return pjoin(self.options.cache_dir, "repos", dirname, self.cache.file)
 
-    def load_cache(self, path, fallback=None):
+    def load_cache(self, path: str, fallback=None):
         cache = fallback
         try:
-            with open(path, "rb") as f:
-                cache = pickle.load(f)
+            if path.endswith(".zst"):
+                if not os.path.exists(path):
+                    raise FileNotFoundError(path)
+                with subprocess.Popen(("zstd", "-qdcf", path), 
stdout=subprocess.PIPE) as proc:
+                    if proc.poll():
+                        raise PkgcheckUserException(
+                            f"failed decompressing {self.cache.type} cache: 
{path!r}"
+                        )
+                    cache = pickle.load(proc.stdout)
+            else:
+                with open(path, "rb") as f:
+                    cache = pickle.load(f)
             if cache.version != self.cache.version:
                 logger.debug("forcing %s cache regen due to outdated version", 
self.cache.type)
                 os.remove(path)
@@ -98,11 +109,18 @@ class CachedAddon(Addon):
             cache = fallback
         return cache
 
-    def save_cache(self, data, path):
+    def save_cache(self, data, path: str):
         try:
             os.makedirs(os.path.dirname(path), exist_ok=True)
-            with AtomicWriteFile(path, binary=True) as f:
-                pickle.dump(data, f, protocol=-1)
+            if path.endswith(".zst"):
+                with subprocess.Popen(("zstd", "-T0", "-fqo", path), 
stdin=subprocess.PIPE) as proc:
+                    pickle.dump(data, proc.stdin, protocol=-1)
+                if os.path.exists(path[:-4]):
+                    logger.warning("removing old %s cache file", 
self.cache.type)
+                    os.remove(path[:-4])
+            else:
+                with AtomicWriteFile(path, binary=True) as f:
+                    pickle.dump(data, f, protocol=-1)
         except IOError as e:
             msg = f"failed dumping {self.cache.type} cache: {path!r}: 
{e.strerror}"
             raise PkgcheckUserException(msg)

diff --git a/src/pkgcheck/addons/profiles.py b/src/pkgcheck/addons/profiles.py
index f9a6862e..ec4e8e8e 100644
--- a/src/pkgcheck/addons/profiles.py
+++ b/src/pkgcheck/addons/profiles.py
@@ -119,7 +119,7 @@ class ProfileAddon(caches.CachedAddon):
     non_profile_dirs = frozenset(["desc", "updates"])
 
     # cache registry
-    cache = caches.CacheData(type="profiles", file="profiles.pickle", 
version=2)
+    cache = caches.CacheData(type="profiles", file="profiles.pickle.zst", 
version=3)
 
     @classmethod
     def mangle_argparser(cls, parser):

Reply via email to