commit: b471c5cb82debe479b2021d7adfffe9e228ac896 Author: Arthur Zamarin <arthurzam <AT> gentoo <DOT> org> AuthorDate: Fri May 2 09:52:35 2025 +0000 Commit: Arthur Zamarin <arthurzam <AT> gentoo <DOT> org> CommitDate: Fri May 2 10:03:31 2025 +0000 URL: https://gitweb.gentoo.org/proj/pkgcore/pkgcheck.git/commit/?id=b471c5cb
caches: support compression of cache files profiles.pickle has become quite fat on disk, getting to 185MB in size. More information on the source of issue can be found in the issue linked below, but I've decided to use "zstd -T0" (with default compression level) to compress the cache files. This should help with the size of the cache files, and the performance hit should be negligible. I've measured the time it takes to load the cache files before and after this change, and the difference is nil. The time is mostly the cost of pickle.load, and the compression/decompression is negligible in comparison. I'm still somewhat concerned about my usage of subprocess.Popen, but I think it's fine. Resolves: https://github.com/pkgcore/pkgcheck/issues/735 Signed-off-by: Arthur Zamarin <arthurzam <AT> gentoo.org> src/pkgcheck/addons/caches.py | 30 ++++++++++++++++++++++++------ src/pkgcheck/addons/profiles.py | 2 +- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/src/pkgcheck/addons/caches.py b/src/pkgcheck/addons/caches.py index 9cd13e58..a6c94b73 100644 --- a/src/pkgcheck/addons/caches.py +++ b/src/pkgcheck/addons/caches.py @@ -5,6 +5,7 @@ import os import pathlib import pickle import shutil +import subprocess from collections import UserDict from dataclasses import dataclass from hashlib import blake2b @@ -79,11 +80,21 @@ class CachedAddon(Addon): dirname = f"{repo.repo_id.lstrip(os.sep)}-{token}" return pjoin(self.options.cache_dir, "repos", dirname, self.cache.file) - def load_cache(self, path, fallback=None): + def load_cache(self, path: str, fallback=None): cache = fallback try: - with open(path, "rb") as f: - cache = pickle.load(f) + if path.endswith(".zst"): + if not os.path.exists(path): + raise FileNotFoundError(path) + with subprocess.Popen(("zstd", "-qdcf", path), stdout=subprocess.PIPE) as proc: + if proc.poll(): + raise PkgcheckUserException( + f"failed decompressing {self.cache.type} cache: {path!r}" + ) + cache = pickle.load(proc.stdout) + else: + with open(path, "rb") as f: + cache = pickle.load(f) if cache.version != self.cache.version: logger.debug("forcing %s cache regen due to outdated version", self.cache.type) os.remove(path) @@ -98,11 +109,18 @@ class CachedAddon(Addon): cache = fallback return cache - def save_cache(self, data, path): + def save_cache(self, data, path: str): try: os.makedirs(os.path.dirname(path), exist_ok=True) - with AtomicWriteFile(path, binary=True) as f: - pickle.dump(data, f, protocol=-1) + if path.endswith(".zst"): + with subprocess.Popen(("zstd", "-T0", "-fqo", path), stdin=subprocess.PIPE) as proc: + pickle.dump(data, proc.stdin, protocol=-1) + if os.path.exists(path[:-4]): + logger.warning("removing old %s cache file", self.cache.type) + os.remove(path[:-4]) + else: + with AtomicWriteFile(path, binary=True) as f: + pickle.dump(data, f, protocol=-1) except IOError as e: msg = f"failed dumping {self.cache.type} cache: {path!r}: {e.strerror}" raise PkgcheckUserException(msg) diff --git a/src/pkgcheck/addons/profiles.py b/src/pkgcheck/addons/profiles.py index f9a6862e..ec4e8e8e 100644 --- a/src/pkgcheck/addons/profiles.py +++ b/src/pkgcheck/addons/profiles.py @@ -119,7 +119,7 @@ class ProfileAddon(caches.CachedAddon): non_profile_dirs = frozenset(["desc", "updates"]) # cache registry - cache = caches.CacheData(type="profiles", file="profiles.pickle", version=2) + cache = caches.CacheData(type="profiles", file="profiles.pickle.zst", version=3) @classmethod def mangle_argparser(cls, parser):
