-----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 Hi Andreas,
attached is a patch to: * rename the ddtp table to descriptions * create ubuntu_descriptions and derivatives_descriptions tables * import squeeze's long descriptions into the descriptions table, referencing them by calculated md5 in the packages table Comments welcome -- the ddtp importer may need some changing to match these changes. cheers Stuart - -- Stuart Prescott www.nanoNANOnano.net -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.10 (GNU/Linux) iEYEARECAAYFAk82frMACgkQn+i4zXHF0aib/ACfXs6Pbqq8J6/J1MXwsRLOoDNw bqUAn3pJJRmf5RbjB2X6ArCh9m4WAjL0 =wKIS -----END PGP SIGNATURE-----
>From f6942dbc3efdc956206865b90312e4e3fbc720fc Mon Sep 17 00:00:00 2001 From: Stuart Prescott <stuart+deb...@nanonanonano.net> Date: Sat, 11 Feb 2012 12:11:36 +0000 Subject: [PATCH 1/2] Import long description in to descriptions table * Rename "ddtp" table to "descriptions"; also create "ubuntu_descriptions" and "derivatives_descriptions". * Add a "descriptions-table" configuration key for packages * If set, calculate the Description-md5 for each package and add that to the packages table; push the long description information into the descriptions table. --- config-org.yaml | 33 ++++++++++++++++++++++- sql/setup.sql | 25 ++++++++++++++++- sql/upgrade.sql | 35 ++++++++++++++++++++++++ udd/packages_gatherer.py | 66 +++++++++++++++++++++++++++++++++++++++------- 4 files changed, 146 insertions(+), 13 deletions(-) diff --git a/config-org.yaml b/config-org.yaml index 938c51b..f2f4fbb 100644 --- a/config-org.yaml +++ b/config-org.yaml @@ -168,6 +168,7 @@ debian-squeeze: packages-schema: packages sources-schema: sources release: squeeze + descriptions-table: descriptions debian-squeeze-security: type: src-pkg @@ -183,6 +184,7 @@ debian-squeeze-security: packages-schema: packages sources-schema: sources release: squeeze-security + descriptions-table: descriptions debian-squeeze-proposed-updates: type: src-pkg @@ -196,6 +198,7 @@ debian-squeeze-proposed-updates: packages-schema: packages sources-schema: sources release: squeeze-proposed-updates + descriptions-table: descriptions debian-squeeze-updates: type: src-pkg @@ -209,6 +212,7 @@ debian-squeeze-updates: packages-schema: packages sources-schema: sources release: squeeze-updates + descriptions-table: descriptions debian-backports-squeeze: type: src-pkg @@ -222,6 +226,7 @@ debian-backports-squeeze: packages-schema: packages sources-schema: sources release: squeeze + descriptions-table: descriptions #debian-backports-squeeze-sloppy: # type: src-pkg @@ -235,6 +240,7 @@ debian-backports-squeeze: # packages-schema: packages # sources-schema: sources # release: squeeze +# descriptions-table: descriptions debian-wheezy: type: src-pkg @@ -450,6 +456,7 @@ ubuntu-natty: uploaders-table: ubuntu_uploaders packages-schema: packages sources-schema: sources + descriptions-table: ubuntu_descriptions ubuntu-natty-security: type: src-pkg @@ -463,6 +470,7 @@ ubuntu-natty-security: uploaders-table: ubuntu_uploaders packages-schema: packages sources-schema: sources + descriptions-table: ubuntu_descriptions ubuntu-natty-updates: type: src-pkg @@ -476,6 +484,7 @@ ubuntu-natty-updates: uploaders-table: ubuntu_uploaders packages-schema: packages sources-schema: sources + descriptions-table: ubuntu_descriptions ubuntu-natty-proposed: type: src-pkg @@ -489,6 +498,7 @@ ubuntu-natty-proposed: uploaders-table: ubuntu_uploaders packages-schema: packages sources-schema: sources + descriptions-table: ubuntu_descriptions ubuntu-natty-backports: type: src-pkg @@ -502,6 +512,7 @@ ubuntu-natty-backports: uploaders-table: ubuntu_uploaders packages-schema: packages sources-schema: sources + descriptions-table: ubuntu_descriptions ubuntu-maverick: @@ -516,6 +527,7 @@ ubuntu-maverick: uploaders-table: ubuntu_uploaders packages-schema: packages sources-schema: sources + descriptions-table: ubuntu_descriptions ubuntu-maverick-security: type: src-pkg @@ -529,6 +541,7 @@ ubuntu-maverick-security: uploaders-table: ubuntu_uploaders packages-schema: packages sources-schema: sources + descriptions-table: ubuntu_descriptions ubuntu-maverick-updates: type: src-pkg @@ -542,6 +555,7 @@ ubuntu-maverick-updates: uploaders-table: ubuntu_uploaders packages-schema: packages sources-schema: sources + descriptions-table: ubuntu_descriptions ubuntu-maverick-proposed: type: src-pkg @@ -555,6 +569,7 @@ ubuntu-maverick-proposed: uploaders-table: ubuntu_uploaders packages-schema: packages sources-schema: sources + descriptions-table: ubuntu_descriptions ubuntu-maverick-backports: type: src-pkg @@ -568,6 +583,7 @@ ubuntu-maverick-backports: uploaders-table: ubuntu_uploaders packages-schema: packages sources-schema: sources + descriptions-table: ubuntu_descriptions ubuntu-lucid: @@ -582,6 +598,7 @@ ubuntu-lucid: uploaders-table: ubuntu_uploaders packages-schema: packages sources-schema: sources + descriptions-table: ubuntu_descriptions ubuntu-lucid-security: type: src-pkg @@ -595,6 +612,7 @@ ubuntu-lucid-security: uploaders-table: ubuntu_uploaders packages-schema: packages sources-schema: sources + descriptions-table: ubuntu_descriptions ubuntu-lucid-updates: type: src-pkg @@ -608,6 +626,7 @@ ubuntu-lucid-updates: uploaders-table: ubuntu_uploaders packages-schema: packages sources-schema: sources + descriptions-table: ubuntu_descriptions ubuntu-lucid-proposed: type: src-pkg @@ -621,6 +640,7 @@ ubuntu-lucid-proposed: uploaders-table: ubuntu_uploaders packages-schema: packages sources-schema: sources + descriptions-table: ubuntu_descriptions ubuntu-lucid-backports: type: src-pkg @@ -634,6 +654,7 @@ ubuntu-lucid-backports: uploaders-table: ubuntu_uploaders packages-schema: packages sources-schema: sources + descriptions-table: ubuntu_descriptions ubuntu-karmic: @@ -648,6 +669,7 @@ ubuntu-karmic: uploaders-table: ubuntu_uploaders packages-schema: packages sources-schema: sources + descriptions-table: ubuntu_descriptions ubuntu-karmic-security: type: src-pkg @@ -661,6 +683,7 @@ ubuntu-karmic-security: uploaders-table: ubuntu_uploaders packages-schema: packages sources-schema: sources + descriptions-table: ubuntu_descriptions ubuntu-karmic-updates: type: src-pkg @@ -674,6 +697,7 @@ ubuntu-karmic-updates: uploaders-table: ubuntu_uploaders packages-schema: packages sources-schema: sources + descriptions-table: ubuntu_descriptions ubuntu-karmic-proposed: type: src-pkg @@ -687,6 +711,7 @@ ubuntu-karmic-proposed: uploaders-table: ubuntu_uploaders packages-schema: packages sources-schema: sources + descriptions-table: ubuntu_descriptions ubuntu-karmic-backports: type: src-pkg @@ -700,6 +725,7 @@ ubuntu-karmic-backports: uploaders-table: ubuntu_uploaders packages-schema: packages sources-schema: sources + descriptions-table: ubuntu_descriptions #ubuntu-jaunty: @@ -741,6 +767,7 @@ ubuntu-hardy: uploaders-table: ubuntu_uploaders packages-schema: packages sources-schema: sources + descriptions-table: ubuntu_descriptions ubuntu-hardy-security: type: src-pkg @@ -754,6 +781,7 @@ ubuntu-hardy-security: uploaders-table: ubuntu_uploaders packages-schema: packages sources-schema: sources + descriptions-table: ubuntu_descriptions ubuntu-hardy-updates: type: src-pkg @@ -767,6 +795,7 @@ ubuntu-hardy-updates: uploaders-table: ubuntu_uploaders packages-schema: packages sources-schema: sources + descriptions-table: ubuntu_descriptions ubuntu-hardy-proposed: type: src-pkg @@ -780,6 +809,7 @@ ubuntu-hardy-proposed: uploaders-table: ubuntu_uploaders packages-schema: packages sources-schema: sources + descriptions-table: ubuntu_descriptions ubuntu-hardy-backports: type: src-pkg @@ -793,6 +823,7 @@ ubuntu-hardy-backports: uploaders-table: ubuntu_uploaders packages-schema: packages sources-schema: sources + descriptions-table: ubuntu_descriptions #ubuntu-dapper: @@ -972,7 +1003,7 @@ ddtp: mirror: ftp.debian.org/debian files: Translation-.*\.gz releases: sid lenny squeeze - table: ddtp + table: descriptions schema: ddtp ftpnew: diff --git a/sql/setup.sql b/sql/setup.sql index d433d5d..315556b 100644 --- a/sql/setup.sql +++ b/sql/setup.sql @@ -601,7 +601,7 @@ GRANT SELECT ON all_packages TO PUBLIC; GRANT SELECT ON all_packages_distrelcomparch TO PUBLIC; GRANT SELECT ON all_bugs TO PUBLIC; -CREATE TABLE ddtp ( +CREATE TABLE descriptions ( package text, release text, language text, @@ -610,8 +610,29 @@ CREATE TABLE ddtp ( description_md5 text, -- md5 sum of the original English description PRIMARY KEY (package, release, language, description, description_md5) ); +GRANT SELECT ON descriptions TO PUBLIC; -GRANT SELECT ON ddtp TO PUBLIC; +CREATE TABLE ubuntu_descriptions ( + package text, + release text, + language text, + description text, + long_description text, + description_md5 text, -- md5 sum of the original English description + PRIMARY KEY (package, release, language, description, description_md5) +); +GRANT SELECT ON ubuntu_descriptions TO PUBLIC; + +CREATE TABLE derivatives_descriptions ( + package text, + release text, + language text, + description text, + long_description text, + description_md5 text, -- md5 sum of the original English description + PRIMARY KEY (package, release, language, description, description_md5) +); +GRANT SELECT ON derivatives_descriptions TO PUBLIC; -- active_dds view CREATE VIEW active_dds AS diff --git a/sql/upgrade.sql b/sql/upgrade.sql index 58e0b87..24c489b 100644 --- a/sql/upgrade.sql +++ b/sql/upgrade.sql @@ -365,3 +365,38 @@ CREATE TABLE ddtp ( PRIMARY KEY (package, release, language, description, description_md5) ); GRANT SELECT ON ddtp TO PUBLIC; +-- 2012-02-09 +-- Add description tables for ubuntu and derivatives; rename ddtp table +ALTER TABLE ddtp RENAME TO descriptions; +GRANT SELECT ON descriptions TO PUBLIC; + +CREATE TABLE ubuntu_descriptions ( + package text, + release text, + language text, + description text, + long_description text, + description_md5 text, -- md5 sum of the original English description + PRIMARY KEY (package, release, language, description, description_md5) +); +GRANT SELECT ON ubuntu_descriptions TO PUBLIC; + +CREATE TABLE derivatives_descriptions ( + package text, + release text, + language text, + description text, + long_description text, + description_md5 text, -- md5 sum of the original English description + PRIMARY KEY (package, release, language, description, description_md5) +); +GRANT SELECT ON derivatives_descriptions TO PUBLIC; + +DROP VIEW all_packages; +ALTER TABLE packages DROP COLUMN long_description; +ALTER TABLE ubuntu_packages DROP COLUMN long_description; +ALTER TABLE derivatives_packages DROP COLUMN long_description; +CREATE VIEW all_packages AS +SELECT * FROM packages +UNION ALL SELECT * FROM ubuntu_packages +UNION ALL SELECT * FROM derivatives_packages; diff --git a/udd/packages_gatherer.py b/udd/packages_gatherer.py index 3748f76..cd8e454 100644 --- a/udd/packages_gatherer.py +++ b/udd/packages_gatherer.py @@ -17,6 +17,7 @@ from gatherer import gatherer from time import time import email.Utils import re +import hashlib def get_gatherer(connection, config, source): return packages_gatherer(connection, config, source) @@ -41,7 +42,7 @@ class packages_gatherer(gatherer): pkgquery = """EXECUTE package_insert (%(Package)s, %(Version)s, %(Architecture)s, %(Maintainer)s, %(maintainer_name)s, %(maintainer_email)s, - %(Description)s, %(Long_Description)s, %(Description-md5)s, %(Source)s, %(Source_Version)s, %(Essential)s, + %(Description)s, %(Description-md5)s, %(Source)s, %(Source_Version)s, %(Essential)s, %(Depends)s, %(Recommends)s, %(Suggests)s, %(Enhances)s, %(Pre-Depends)s, %(Breaks)s, %(Installed-Size)s, %(Homepage)s, %(Size)s, %(Build-Essential)s, %(Origin)s, %(SHA1)s, @@ -49,6 +50,10 @@ class packages_gatherer(gatherer): %(Tag)s, %(Task)s, %(Python-Version)s, %(Ruby-Versions)s, %(Provides)s, %(Conflicts)s, %(SHA256)s, %(Original-Maintainer)s)""" + descriptionquery = """EXECUTE description_insert + (%(Package)s, %(Language)s, + %(Description)s, %(Long_Description)s, %(Description-md5)s)""" + def __init__(self, connection, config, source): gatherer.__init__(self, connection, config, source) # The ID for the distribution we want to include @@ -61,6 +66,7 @@ class packages_gatherer(gatherer): # because different architectures include packages for architecture 'all' # with the same version, and we don't want these duplicate entries self.imported_all_pkgs = {} + self.add_descriptions = False def build_dict(self, control): """Build a dictionary from the control dictionary. @@ -92,6 +98,7 @@ class packages_gatherer(gatherer): it is called.The Format of the sequence is expected to be that of a debian packages file.""" pkgs = [] + pkgdescs = [] # The fields that are to be read. Other fields are ignored for control in deb822.Packages.iter_paragraphs(sequence): @@ -106,11 +113,20 @@ class packages_gatherer(gatherer): # We split the description if 'Description' in d: - if len(d['Description'].split("\n",1)) > 1: - d['Long_Description'] = d['Description'].split("\n",1)[1] - else: - d['Long_Description'] = '' - d['Description'] = d['Description'].split("\n",1)[0] + if self.add_descriptions and \ + ('Description-md5' not in d or not d['Description-md5']): + try: + d['Description-md5'] = hashlib.md5((d['Description']+"\n").encode('utf-8')).hexdigest() + d['Language'] = 'en' + pkgdescs.append(d) + except UnicodeEncodeError: + self.warned_about['%s description encoding' % d['Package']] = 1 + if len(d['Description'].split("\n",1)) > 1: + d['Long_Description'] = d['Description'].split("\n",1)[1] + else: + d['Long_Description'] = '' + d['Description'] = d['Description'].split("\n",1)[0] + # Calculate Description-md5 for releases that don't include it # Convert numbers to numbers for f in ['Installed-Size', 'Size']: @@ -136,6 +152,12 @@ class packages_gatherer(gatherer): except psycopg2.ProgrammingError: print "Error while inserting packages" raise + try: + if self.add_descriptions: + cur.executemany(self.descriptionquery, pkgdescs) + except psycopg2.ProgrammingError: + print "Error while inserting descriptions" + raise def setup(self): if 'schema-dir' in self.config['general']: @@ -162,6 +184,9 @@ class packages_gatherer(gatherer): # Get distribution ID self._distr = src_cfg['distribution'] + self.add_descriptions = ('descriptions-table' in self.my_config + and self.my_config['descriptions-table']) + cur = self.cursor() # defer constraints checking until the end of the transaction cur.execute("SET CONSTRAINTS ALL DEFERRED") @@ -169,12 +194,17 @@ class packages_gatherer(gatherer): # For every part and every architecture, import the packages into the DB for comp in src_cfg['components']: cur.execute("DELETE FROM %s WHERE distribution = '%s' AND release = '%s' AND component = '%s'" %\ - (table, self._distr, src_cfg['release'], comp)) + (table, self._distr, src_cfg['release'], comp)) + # For releases that have long descriptions in Packages and not in Translation, + # add the description to the configured descriptions table. + if self.add_descriptions: + cur.execute("DELETE FROM %s WHERE release = '%s' AND language = '%s'" %\ + (src_cfg['descriptions-table'], src_cfg['release'], 'en')) for arch in src_cfg['archs']: path = os.path.join(src_cfg['directory'], comp, 'binary-' + arch, 'Packages.gz') try: cur.execute("""PREPARE package_insert AS INSERT INTO %s - (Package, Version, Architecture, Maintainer, maintainer_name, maintainer_email, Description, Long_Description, description_md5, Source, + (Package, Version, Architecture, Maintainer, maintainer_name, maintainer_email, Description, description_md5, Source, Source_Version, Essential, Depends, Recommends, Suggests, Enhances, Pre_Depends, Breaks, Installed_Size, Homepage, Size, build_essential, origin, sha1, replaces, section, @@ -184,8 +214,22 @@ class packages_gatherer(gatherer): VALUES ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, - $29, $30, $31, $32, $33, $34, $35, $36, $37, '%s', '%s', '%s') + $29, $30, $31, $32, $33, $34, $35, $36, '%s', '%s', '%s') """ % (table, self._distr, src_cfg['release'], comp)) + if self.add_descriptions: + cur.execute("""PREPARE description_insert AS + INSERT INTO %s + (package, release, language, description, long_description, description_md5) + (SELECT $1 AS package, '%s' AS release, $2 AS language, + $3 AS description, $4 AS long_description, $5 AS description_md5 + WHERE NOT EXISTS + (SELECT 1 + FROM %s + WHERE package=$1 AND release='%s' AND language=$2 AND + description=$3 AND long_description=$4 AND description_md5=$5)) + + """ % (src_cfg['descriptions-table'], src_cfg['release'], + src_cfg['descriptions-table'], src_cfg['release'])) # aux.print_debug("Reading file " + path) # Copy content from gzipped file to temporary file, so that apt_pkg is # used by debian @@ -200,7 +244,9 @@ class packages_gatherer(gatherer): except IOError, (e, message): print "Could not read packages from %s: %s" % (path, message) sys.exit(1) - cur.execute("DEALLOCATE package_insert") + cur.execute("DEALLOCATE package_insert") + if self.add_descriptions: + cur.execute("DEALLOCATE description_insert") # Fill the summary tables cur.execute("DELETE FROM %s" % (table + '_summary')); cur.execute("""INSERT INTO %s (package, version, source, source_version, -- 1.7.2.5