Revision: 15673 http://gar.svn.sourceforge.net/gar/?rev=15673&view=rev Author: wahwah Date: 2011-09-20 08:44:31 +0000 (Tue, 20 Sep 2011) Log Message: ----------- catalog-gc: Garbage collecting for the catalog
As of 2011-09-20, there are about 5300 files that are present in the allpkgs directory, but are not part of any catalogs. This script is the first step - it doesn't delete any files, but prints rm commands that can be reviewed and executed later. Added Paths: ----------- csw/mgar/gar/v2/lib/python/catalog_gc.py Added: csw/mgar/gar/v2/lib/python/catalog_gc.py =================================================================== --- csw/mgar/gar/v2/lib/python/catalog_gc.py (rev 0) +++ csw/mgar/gar/v2/lib/python/catalog_gc.py 2011-09-20 08:44:31 UTC (rev 15673) @@ -0,0 +1,75 @@ +#!/opt/csw/bin/python2.6 + +"""Garbage-collecting for a catalog. + +The allpkgs directory may contain unused files. They should be deleted. +""" + +import optparse +import logging +import os.path +import re +import common_constants + +class Error(Exception): + """Base error.""" + +class UsageError(Error): + """Wrong usage.""" + + +class CatalogGarbageCollector(object): + + ADDITIONAL_CATALOGS = ("current", "stable") + + def __init__(self, d): + logging.debug("CatalogGarbageCollector(%s)", repr(d)) + self.catalog_dir = d + + def GarbageCollect(self): + allpkgs_path = os.path.join(self.catalog_dir, "allpkgs") + allpkgs = set() + files_in_catalogs = set() + catalogs_by_files = {} + for p in os.listdir(allpkgs_path): + allpkgs.add(p) + catalogs_to_check = ( + tuple(common_constants.DEFAULT_CATALOG_RELEASES) + + self.ADDITIONAL_CATALOGS) + for catrel in catalogs_to_check: + for arch in common_constants.PHYSICAL_ARCHITECTURES: + for osrel_long in common_constants.OS_RELS: + osrel_short = re.sub(r"^SunOS", r"", osrel_long) + catalog_path = os.path.join( + self.catalog_dir, catrel, arch, osrel_short) + if not os.path.exists(catalog_path): + logging.debug("%s does not exist", catalog_path) + continue + pkg_re = re.compile(r"\.pkg(\.gz)?$") + for p in os.listdir(catalog_path): + if pkg_re.search(p): + # It's a package + files_in_catalogs.add(p) + l = catalogs_by_files.setdefault(p, []) + l.append((catrel, arch, osrel_short)) + for p in allpkgs.difference(files_in_catalogs): + logging.debug("File %s is not used by any catalogs.", p) + print "rm %s/%s" % (allpkgs_path, p) + + +def main(): + parser = optparse.OptionParser() + parser.add_option("-c", "--catalog", + dest="catalog", + help="Catalog path") + options, args = parser.parse_args() + logging.basicConfig(level=logging.DEBUG) + if not options.catalog: + parser.print_usage() + raise UsageError("Missing catalog option, see --help.") + gcg = CatalogGarbageCollector(options.catalog) + gcg.GarbageCollect() + + +if __name__ == '__main__': + main() Property changes on: csw/mgar/gar/v2/lib/python/catalog_gc.py ___________________________________________________________________ Added: svn:executable + * This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. _______________________________________________ devel mailing list devel@lists.opencsw.org https://lists.opencsw.org/mailman/listinfo/devel