Hi Carsten, More comments! I hope you'll be able to reduce the size of this script.
2014-01-28 <cgrze...@users.sourceforge.net> > Revision: 22910 > http://sourceforge.net/p/gar/code/22910 > Author: cgrzemba > Date: 2014-01-28 16:20:56 +0000 (Tue, 28 Jan 2014) > Log Message: > ----------- > use argparse, add out of order pkg compare > > Modified Paths: > -------------- > csw/mgar/gar/v2/lib/python/compare_catalog.py > > Modified: csw/mgar/gar/v2/lib/python/compare_catalog.py > =================================================================== > --- csw/mgar/gar/v2/lib/python/compare_catalog.py 2014-01-28 > 12:36:44 UTC (rev 22909) > +++ csw/mgar/gar/v2/lib/python/compare_catalog.py 2014-01-28 > 16:20:56 UTC (rev 22910) > @@ -2,55 +2,108 @@ > > import cjson > import logging > -import optparse > +import argparse > import urllib2 > import sys > +import re > > logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s') > logger = logging.getLogger(__name__) > > +remote_scheme = ['http','https'] > +local_scheme = ['file'] > + > +def prepareCatListFromURI(uri): > + catlst = [] > + if '://' in uri: > We can say that you have to have a valid URI that either starts with http://or https://or file:// > + scheme = uri.split(':')[0] > + if scheme in remote_scheme: > + logger.info("fetch remote %s", uri) > + data = urllib2.urlopen(uri).read() > Let's use the requests module. We have a package. http://sourceforge.net/apps/trac/gar/browser/csw/mgar/gar/v2/lib/python/rest.py#L250 > + catlst = cjson.decode(data) > + for e in catlst: > + del e[9] > + return catlst > + elif scheme in local_scheme: > + uri = re.sub('.*://','',uri) > + else: > + logger.error('unsupported URI format') > + sys.exit(4) > + with open(uri) as lcat: > + logger.info("fetch local %s", uri) > + for line in lcat: # skip 4 lines header '# CREATIONDATE' > We already have a parser, please use it. http://sourceforge.net/apps/trac/gar/browser/csw/mgar/gar/v2/lib/python/catalog.py#L66 > + if line.startswith("# CREATIONDATE"): > + break > + for line in lcat: > + if line.startswith("-----BEGIN PGP SIGNATURE"): > + break > + catlst.append(line.rstrip().split(' ')) > + return catlst > + > +def compareOutOfOrder(a_catlst, b_catlst, idx): > + a_pkgName2Idx = {} > + i = idx > + for j in range(idx,len(a_catlst)): > + a_pkgName2Idx[a_catlst[j][0]] = j > + # import pdb; pdb.set_trace() > + while i < len(b_catlst): > + if b_catlst[i][0] in a_pkgName2Idx: > + if b_catlst[i] != a_catlst[a_pkgName2Idx[b_catlst[i][0]]]: > + logger.warning("pkgs different at {0},{1}: {2} > {3}".format(i,a_pkgName2Idx[b_catlst[i][0]],a_catlst[a_pkgName2Idx[b_catlst[i][0]]],b_catlst[i])) > + sys.exit(1) > + else: > + logger.warning("not in acat: %s", b_catlst[i]) > + sys.exit(1) > + i += 1 > + b_pkgName2Idx = {} > + for j in range(idx,len(b_catlst)): > + b_pkgName2Idx[b_catlst[j][0]] = j > + # import pdb; pdb.set_trace() > + i = idx > + while i < len(a_catlst): > + if a_catlst[i][0] not in b_pkgName2Idx: > + logger.warning("not in bcat: %s", a_catlst[i]) > + sys.exit(1) > + i += 1 > Why not convert both to a data structure consisting of basic types: nested lists and dicts? Then you can just compare them using the == operator. If you wanted some diagnostic output to display the difference, you can always serialize them and display the textual diff - it will save you lots of lines of code. > def main(): > - parser = optparse.OptionParser() > - parser.add_option("-v","--verbose", dest="verbose", > action="store_true",default=False) > - parser.add_option("-a","--existing-catalog", dest="oldcatalog", > - help='set URI of existing catalog', metavar = > 'catalog') > - parser.add_option("-b","--new-catalog", dest="newcatalog", > - help='set URI of catalog to generate', metavar = > 'catalog') > - options, args = parser.parse_args() > + parser = argparse.ArgumentParser() > + parser.add_argument("-v","--verbose", dest="verbose", > action="store_true",default=False) > + parser.add_argument("acat",help="catalog URI") > + parser.add_argument("bcat",help="catalog URI") > + args = parser.parse_args() > opterror = False > - if options.verbose: > + if args.verbose: > logger.setLevel(logging.INFO) > - if options.debug: > - logger.setLevel(logging.DEBUG) > - if options.newcatalog is None or options.oldcatalog is None: > - logger.error("mandatory option missing") > + if args.acat is None or args.bcat is None: > + logger.error("mandatory args 'acat' 'bcat' missing") > sys.exit(2) > - oldcat = options.oldcatalog > - newcat = options.newcatalog > - logger.info(" compare %s with %s", oldcat, newcat) > > - data = urllib2.urlopen(oldcat).read() > - a_catlst = cjson.decode(data) > - for e in a_catlst: > - del e[9] > - b_catlst = [] > - with open(newcat) as nc: > - for i in range(4): # skip 4 lines header > - nc.readline() > - for cl in nc.readlines(): > - if "-----BEGIN" == cl.split(' ')[0]: > - break > - b_catlst.append(cl.rstrip().split(' ')) > + logger.info("fetch cat_a %s", args.acat) > + a_catlst = prepareCatListFromURI(args.acat) > + > + logger.info("fetch cat_b %s", args.bcat) > + b_catlst = prepareCatListFromURI(args.bcat) > + > + logger.info("compare ...") > if len(a_catlst) != len(b_catlst): > - logger.warning("a has %d, b has %d > packges",len(a_catlst),len(b_catlst)) > - sys.exit(1) > + logger.warning("a has %d, b has %d > packages",len(a_catlst),len(b_catlst)) > + # sys.exit(1) > for i in range(len(b_catlst)): > - if b_catlst[i] != a_catlst[i] : > - logger.warning("a is {0}, b is > {1}".format(a_catlst[i],b_catlst[i])) > - sys.exit(1) > + try: > + if b_catlst[i] != a_catlst[i] : > + if b_catlst[i][0] != a_catlst[i][0]: > + logger.warning("packages out of order: A: %s; B: > %s",a_catlst[i][0], b_catlst[i][0]) > Hm, what I meant is that out of order comparing: 1. should just work 2. should not be a special case The code should use such data structures that the ordering doesn't matter. For example, if you use a dict, then the ordering doesn't matter: >>> a = dict([('a', 1), ('b', 2)]) >>> b = dict([('b', 2), ('a', 1)]) >>> a == b True > + compareOutOfOrder(a_catlst, b_catlst, i) > + break > + else: > + logger.warning("pkgs different: {0} > {1}".format(a_catlst[i],b_catlst[i])) > + sys.exit(1) > + except IndexError as e: > + logger.info("package %s not in acat", b_catlst[i]) > > # import pdb; pdb.set_trace() > - logger.debug("catalogs are same") > + logger.info("catalogs are same") > sys.exit(0) > > > > This was sent by the SourceForge.net collaborative development platform, > the world's largest Open Source development site. > >