Sending again Em 28/01/2014 16:36, "Maciej (Matchek) BliziĆski" <mac...@opencsw.org> escreveu:
> Hi Carsten, > > More comments! I hope you'll be able to reduce the size of this script. > > 2014-01-28 <cgrze...@users.sourceforge.net> > >> Revision: 22910 >> http://sourceforge.net/p/gar/code/22910 >> Author: cgrzemba >> Date: 2014-01-28 16:20:56 +0000 (Tue, 28 Jan 2014) >> Log Message: >> ----------- >> use argparse, add out of order pkg compare >> >> Modified Paths: >> -------------- >> csw/mgar/gar/v2/lib/python/compare_catalog.py >> >> Modified: csw/mgar/gar/v2/lib/python/compare_catalog.py >> =================================================================== >> --- csw/mgar/gar/v2/lib/python/compare_catalog.py 2014-01-28 >> 12:36:44 UTC (rev 22909) >> +++ csw/mgar/gar/v2/lib/python/compare_catalog.py 2014-01-28 >> 16:20:56 UTC (rev 22910) >> @@ -2,55 +2,108 @@ >> >> import cjson >> import logging >> -import optparse >> +import argparse >> import urllib2 >> import sys >> +import re >> >> logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s') >> logger = logging.getLogger(__name__) >> >> +remote_scheme = ['http','https'] >> +local_scheme = ['file'] >> + >> +def prepareCatListFromURI(uri): >> + catlst = [] >> + if '://' in uri: >> > > We can say that you have to have a valid URI that either starts with > http:// or https:// or file:// > > >> + scheme = uri.split(':')[0] >> + if scheme in remote_scheme: >> + logger.info("fetch remote %s", uri) >> + data = urllib2.urlopen(uri).read() >> > > Let's use the requests module. We have a package. > > > http://sourceforge.net/apps/trac/gar/browser/csw/mgar/gar/v2/lib/python/rest.py#L250 > > >> + catlst = cjson.decode(data) >> + for e in catlst: >> + del e[9] >> + return catlst >> + elif scheme in local_scheme: >> + uri = re.sub('.*://','',uri) >> + else: >> + logger.error('unsupported URI format') >> + sys.exit(4) >> + with open(uri) as lcat: >> + logger.info("fetch local %s", uri) >> + for line in lcat: # skip 4 lines header '# CREATIONDATE' >> > > We already have a parser, please use it. > > http://sourceforge.net/apps/trac/gar/browser/csw/mgar/gar/v2/lib/python/catalog.py#L66 > > >> + if line.startswith("# CREATIONDATE"): >> + break >> + for line in lcat: >> + if line.startswith("-----BEGIN PGP SIGNATURE"): >> + break >> + catlst.append(line.rstrip().split(' ')) >> + return catlst >> + >> +def compareOutOfOrder(a_catlst, b_catlst, idx): >> + a_pkgName2Idx = {} >> + i = idx >> + for j in range(idx,len(a_catlst)): >> + a_pkgName2Idx[a_catlst[j][0]] = j >> + # import pdb; pdb.set_trace() >> + while i < len(b_catlst): >> + if b_catlst[i][0] in a_pkgName2Idx: >> + if b_catlst[i] != a_catlst[a_pkgName2Idx[b_catlst[i][0]]]: >> + logger.warning("pkgs different at {0},{1}: {2} >> {3}".format(i,a_pkgName2Idx[b_catlst[i][0]],a_catlst[a_pkgName2Idx[b_catlst[i][0]]],b_catlst[i])) >> + sys.exit(1) >> + else: >> + logger.warning("not in acat: %s", b_catlst[i]) >> + sys.exit(1) >> + i += 1 >> + b_pkgName2Idx = {} >> + for j in range(idx,len(b_catlst)): >> + b_pkgName2Idx[b_catlst[j][0]] = j >> + # import pdb; pdb.set_trace() >> + i = idx >> + while i < len(a_catlst): >> + if a_catlst[i][0] not in b_pkgName2Idx: >> + logger.warning("not in bcat: %s", a_catlst[i]) >> + sys.exit(1) >> + i += 1 >> > > Why not convert both to a data structure consisting of basic types: nested > lists and dicts? Then you can just compare them using the == operator. If > you wanted some diagnostic output to display the difference, you can always > serialize them and display the textual diff - it will save you lots of > lines of code. > > >> def main(): >> - parser = optparse.OptionParser() >> - parser.add_option("-v","--verbose", dest="verbose", >> action="store_true",default=False) >> - parser.add_option("-a","--existing-catalog", dest="oldcatalog", >> - help='set URI of existing catalog', metavar = >> 'catalog') >> - parser.add_option("-b","--new-catalog", dest="newcatalog", >> - help='set URI of catalog to generate', metavar = >> 'catalog') >> - options, args = parser.parse_args() >> + parser = argparse.ArgumentParser() >> + parser.add_argument("-v","--verbose", dest="verbose", >> action="store_true",default=False) >> + parser.add_argument("acat",help="catalog URI") >> + parser.add_argument("bcat",help="catalog URI") >> + args = parser.parse_args() >> opterror = False >> - if options.verbose: >> + if args.verbose: >> logger.setLevel(logging.INFO) >> - if options.debug: >> - logger.setLevel(logging.DEBUG) >> - if options.newcatalog is None or options.oldcatalog is None: >> - logger.error("mandatory option missing") >> + if args.acat is None or args.bcat is None: >> + logger.error("mandatory args 'acat' 'bcat' missing") >> sys.exit(2) >> - oldcat = options.oldcatalog >> - newcat = options.newcatalog >> - logger.info(" compare %s with %s", oldcat, newcat) >> >> - data = urllib2.urlopen(oldcat).read() >> - a_catlst = cjson.decode(data) >> - for e in a_catlst: >> - del e[9] >> - b_catlst = [] >> - with open(newcat) as nc: >> - for i in range(4): # skip 4 lines header >> - nc.readline() >> - for cl in nc.readlines(): >> - if "-----BEGIN" == cl.split(' ')[0]: >> - break >> - b_catlst.append(cl.rstrip().split(' ')) >> + logger.info("fetch cat_a %s", args.acat) >> + a_catlst = prepareCatListFromURI(args.acat) >> + >> + logger.info("fetch cat_b %s", args.bcat) >> + b_catlst = prepareCatListFromURI(args.bcat) >> + >> + logger.info("compare ...") >> if len(a_catlst) != len(b_catlst): >> - logger.warning("a has %d, b has %d >> packges",len(a_catlst),len(b_catlst)) >> - sys.exit(1) >> + logger.warning("a has %d, b has %d >> packages",len(a_catlst),len(b_catlst)) >> + # sys.exit(1) >> for i in range(len(b_catlst)): >> - if b_catlst[i] != a_catlst[i] : >> - logger.warning("a is {0}, b is >> {1}".format(a_catlst[i],b_catlst[i])) >> - sys.exit(1) >> + try: >> + if b_catlst[i] != a_catlst[i] : >> + if b_catlst[i][0] != a_catlst[i][0]: >> + logger.warning("packages out of order: A: %s; B: >> %s",a_catlst[i][0], b_catlst[i][0]) >> > > Hm, what I meant is that out of order comparing: > > 1. should just work > 2. should not be a special case > > The code should use such data structures that the ordering doesn't matter. > For example, if you use a dict, then the ordering doesn't matter: > > >>> a = dict([('a', 1), ('b', 2)]) > >>> b = dict([('b', 2), ('a', 1)]) > >>> a == b > True > > > >> + compareOutOfOrder(a_catlst, b_catlst, i) >> + break >> + else: >> + logger.warning("pkgs different: {0} >> {1}".format(a_catlst[i],b_catlst[i])) >> + sys.exit(1) >> + except IndexError as e: >> + logger.info("package %s not in acat", b_catlst[i]) >> >> # import pdb; pdb.set_trace() >> - logger.debug("catalogs are same") >> + logger.info("catalogs are same") >> sys.exit(0) >> >> >> >> This was sent by the SourceForge.net collaborative development platform, >> the world's largest Open Source development site. >> >> >