scripts/qawrangler-stats.py | 147 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 119 insertions(+), 28 deletions(-)
New commits: commit 75083a0e822d25ae7c3dfd3c86b335440d0a60b6 Author: Marc Garcia <garcia.m...@gmail.com> Date: Mon Apr 29 12:40:37 2013 +0200 qa wranglers script file refactored Signed-off-by: Petr Mladek <pmla...@suse.cz> diff --git a/scripts/qawrangler-stats.py b/scripts/qawrangler-stats.py index f73c38e..8b1c52f 100755 --- a/scripts/qawrangler-stats.py +++ b/scripts/qawrangler-stats.py @@ -1,44 +1,135 @@ #!/usr/bin/env python3 +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# qawrangler-stats.py +# +# Returns statistics of most active wranglers and reporters for a given +# month. +# +# For usage information, run: +# qawrangler-stats.py -h +# -import sys, re +import sys +import re +import datetime import gzip +import argparse +import csv from urllib.request import urlopen, URLError from io import BytesIO -from collections import Counter +from collections import Counter, OrderedDict -month = '' -if len(sys.argv) >= 2: - month = sys.argv[1] +URL = 'http://lists.freedesktop.org/archives/libreoffice-bugs/{}.txt.gz' +ENTITIES = OrderedDict({ + 'changers': re.compile(r'^(.+)\schanged:$', re.MULTILINE), + 'reporters': re.compile(r'^\s*Reporter:\s(.+)$', re.MULTILINE), + 'commentators': re.compile(r'^--- Comment #\d+ from (.+) ---$', + re.MULTILINE), +}) -url = 'http://lists.freedesktop.org/archives/libreoffice-bugs/' + month + '.txt.gz' -print('Downloading ' + url) +def get_parser(): + """Returns an argparse instance, setting the arguments for the script""" + parser = argparse.ArgumentParser( + description='LibreOffice contributor statistics') + parser.add_argument('-m', '--month', dest='month', type=int, + default=datetime.date.today().month, + help='month to generate statistics from (default is current month)') + parser.add_argument('-y', '--year', dest='year', type=int, + default=datetime.date.today().year, + help='year to generate statistics from (default is current year)') + parser.add_argument('-n', '--num', dest='num', type=int, default=None, + help='number of top contributors of each category (default is all)') + parser.add_argument('--csv', dest='csv', action='store_true', + help='output information in CSV format') -try: - response = urlopen(url) - buf = BytesIO(response.read()) - gz = gzip.GzipFile(fileobj=buf) + return parser - txt = gz.read().decode('us-ascii') - gz.close() +def get_fname(date): + """Returns the `Libreoffice-bugs Archives' file name for a given a @date + datetime object. Note that only year and month are relevant, day is + ignored""" + return '{}-{}'.format(date.year, date.strftime('%B')) - reportedby = re.compile(r'^.*Reporter:.(.*)$', re.MULTILINE) - reporters = re.findall(reportedby, txt) +def get_data(url): + """Fetches and uncompresses the `Libreoffice-bugs Archives' file given its + @url. The return of the function is the content of the gile as a string""" + try: + resp = urlopen(url) + except URLError: + sys.stderr.write('Error fetching {}'.format(url)) + sys.exit(1) + else: + with gzip.GzipFile(fileobj=BytesIO(resp.read())) as f: + return f.read().decode('us-ascii') - wrangledby = re.compile(r'^.*<(.*)> changed:$', re.MULTILINE) - wranglers = re.findall(wrangledby, txt) +def get_entity_values(data, pattern, num): + """Returns the first @num matches of a @pattern in the @data string. If + @num is None, all matches are returned""" + values = re.findall(pattern, data) + return Counter(values).most_common(num) - topreporters = Counter(reporters).most_common(30) - topwranglers = Counter(wranglers).most_common(30) +def nice_print(values_dict, num_output, date): + """Prints to stdout the output of the script in a human readable way. + @values_dict is a dict containing a key for each entity (e.g. wranglers, + reporters, etc), and as values, a list of tuples containing the name and + the number of occurrences. An example: - print('\n=== ' + month[5:] + ' ' + month[:4] + '===') - print('\n--- Top 30 reporters ---') - for reporter in topreporters: - print('{0:40}{1:5d}'.format(reporter[0], reporter[1])) + >>> { + >>> 'wranglers': [ + >>> ('Wrangler 1 <wrangler1@his_email.com>', 30), + >>> # 30 is the number of times he wrangled + >>> ('Wrangler 2 <wrangler2@his_email.com>', 15), + >>> ] + >>> } - print('\n--- Top 30 wranglers ---') - for wrangler in topwranglers: - print('{0:40}{1:5d}'.format(wrangler[0], wrangler[1])) + @num_output is the number of top values in each categories are requested + to be displayed (e.g. number of top wranglers), and @date is a datetime + object containing the requested year and month""" + print('=== {} ==='.format(date.strftime('%B %Y'))) + print() + for name, values in values_dict.items(): + print('--- Top {} {} ---'.format(num_output or '', name)) + print('\n'.join('{0:75}{1:5d}'.format(*v) for v in values)) + print() -except URLError: - print('Unknown file - give an archive in the form YYYY-Month as argv[1]') +def csv_print(values_dict): + """Print to stdout the output of the script in CSV format. @values_dict + has the same format as for the `nice_print' function. The CSV file has + the default format for Python's csv module (comma delimited, strings + quoted when necessary)""" + writer = csv.writer(sys.stdout) + for entity_name, values in values_dict.items(): + for val_name, val_count in values: + writer.writerow([entity_name, val_name, val_count]) + +def main(args): + """Main function of the program. + * Fetches the file for the requested month and date + * For each defined entity, gathers each match of its pattern, + and counts the number of occurrences + * Prints the retrieved information to stdout in the requested format + """ + date = datetime.date(args.year, args.month, 1) + fname = get_fname(date) + url = URL.format(fname) + data = get_data(url) + values = OrderedDict() + for name, regex in ENTITIES.items(): + values[name] = get_entity_values(data, regex, args.num) + + if args.csv: + csv_print(values) + else: + nice_print(values, args.num, date) + +if __name__ == '__main__': + parser = get_parser() + args = parser.parse_args() + main(args) _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits