-----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 Hi Paul & Andreas,
>> /org/udd.debian.org/tmp/all-popcon-results.txt.gz:279 - illegal package >> name Package: 7kaa-data:all 0 0 0 1 > > Looking at the file it seems that the format has changed. I suggest > UDD needs to adapt to the new format rather than popcon.debian.org > reverting to the old format. Actually, I suspect that this change is entirely accidental due to multiarch enabled people submitting popcon data and they now accidentally have an architecture specifier in their popcon package lists. I don't believe that it really makes any sense for all-popcon-results.txt.gz to contain architecture specifiers like this and presume that at some stage, the popcon maintainers will update their way they process reports to handle multiarch. In the meantime, UDD can either: 1) silence the warnings with a grep -v on the end of the cron job 2) throw away these arch-qualified package names in the gatherer 3) import the data exactly as is (giving a strange mix of unqualified and qualified names) 4) add the arch-qualified package data to the non-arch-qualified data. In discussions on #debian-qa a week or more ago, the feeling was that (4) was the best option for UDD at this stage; the attached patch implements this. I'll commit it to svn in a few days time if there are no objections. cheers Stuart - -- Stuart Prescott www.nanoNANOnano.net -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.10 (GNU/Linux) iEYEARECAAYFAk9TqpUACgkQn+i4zXHF0ahf6wCglR0GfEUsqoPtatjoFsmiMEqz yPoAnRRnCiQfDcRbt14mMDBLSDZxRS7B =TiHU -----END PGP SIGNATURE-----
>From 4a1523abb6639251393b76ffbe8dcc7bba05a4bc Mon Sep 17 00:00:00 2001 From: Stuart Prescott <stuart+deb...@nanonanonano.net> Date: Sun, 4 Mar 2012 17:16:47 +0000 Subject: [PATCH] Add arch-specified data to package data in popcon Strip off :architecture specifiers from popcon's all-popcon-results.txt.gz and add these data to the installation data for non-architecture-qualified packages. (if popcon later fixes its export to not include these data, backing out this patch would be sensible as it is quite memory hungry) --- udd/popcon_gatherer.py | 19 +++++++++++++------ 1 files changed, 13 insertions(+), 6 deletions(-) diff --git a/udd/popcon_gatherer.py b/udd/popcon_gatherer.py index d874080..2b22005 100644 --- a/udd/popcon_gatherer.py +++ b/udd/popcon_gatherer.py @@ -44,11 +44,12 @@ class popcon_gatherer(gatherer): cur.execute("DELETE FROM " + table_src_average) # used for ignoring ubuntu's broken popcon lines - ascii_match = re.compile("^[A-Za-z0-9-.+_]+$") + ascii_match = re.compile("^(?P<package>[A-Za-z0-9-.+_]+)(:(?P<arch>[a-z0-9]+))?$") linenr = 0 - d = {} + votes = {} for line in popcon: + d = {} linenr += 1 name, data = line.split(None, 1) if name == "Submissions:": @@ -57,18 +58,24 @@ class popcon_gatherer(gatherer): continue try: (name, vote, old, recent, nofiles) = data.split() - d['name'] = name for k in ['vote', 'old', 'recent', 'nofiles']: exec '%s = int(%s)' % (k,k) exec 'd["%s"] = %s' % (k,k) d['insts'] = vote + old + recent + nofiles - if ascii_match.match(name) == None: + matches = ascii_match.match(name) + if matches == None: print "%s:%d - illegal package name %s" % (my_config['path'], linenr, line.rstrip("\n")) continue - query = "EXECUTE pop_insert(%(name)s, %(insts)s, %(vote)s, %(old)s, %(recent)s, %(nofiles)s)" - cur.execute(query, d) + d['name'] = matches.group('package') + if d['name'] in votes: + for k in ['vote', 'old', 'recent', 'nofiles', 'insts']: + votes[d['name']][k] += d[k] + else: + votes[d['name']] = d except ValueError: continue + query = "EXECUTE pop_insert(%(name)s, %(insts)s, %(vote)s, %(old)s, %(recent)s, %(nofiles)s)" + cur.executemany(query, votes.values()) cur.execute("DEALLOCATE pop_insert") -- 1.7.2.5