Reviewers: hahnjo, Message: sure.
Description: Add a script for running timing benchmarks Please review this at https://codereview.appspot.com/545950043/ Affected files (+97, -0 lines): A scripts/auxiliar/benchmark.py Index: scripts/auxiliar/benchmark.py diff --git a/scripts/auxiliar/benchmark.py b/scripts/auxiliar/benchmark.py new file mode 100644 index 0000000000000000000000000000000000000000..f440efaf1f097c49fe65bc0f142949de9105a1da --- /dev/null +++ b/scripts/auxiliar/benchmark.py @@ -0,0 +1,97 @@ +#!/usr/bin/python + + +""" +Script for structured benchmarking of lilypond binaries. To use, compile like this: + + +$ make && cp lily/out/lilypond binaries/lilypond.$(git rev-parse --short HEAD) ; ln binaries/* out/bin/ + +then + +$ python scripts/auxiliar/benchmark.py -n3 -v HEAD^ -v HEAD -- input/regression/mozart-hrn-3.ly + + +The first -v should be the baseline, the second should be what you want to test. +""" + + +import re +import os +import sys +import statistics +import getopt + +opts, args = getopt.getopt(sys.argv[1:], "n:v:",[]) + +run_count = 3 +versions = [] +descriptions = {} +for (o, a) in opts: + if o == '-n': + run_count = int(a) + if o == '-v': + commit = os.popen("git rev-parse --short %s" % a).read().strip() + descr = os.popen("git log -1 --pretty=format:%%s %s" % commit).read() + bin = "out/bin/lilypond.%s" % commit + if not os.path.exists(bin): + sys.stderr.write("missing %s\n" % bin) + sys.exit(1) + versions.append(commit) + descriptions[commit] = descr + +out_dir = "benchmark-results" +os.makedirs(out_dir, exist_ok=True) +results = {} +for v in versions: + results[v] = [] + +command_id = re.sub('[^a-zA-Z0-9_-]', '', '-'.join(args)) + +for run in range(0, run_count): + for v in versions: + out = '%s/%s-%s.%d.txt' % (out_dir, command_id, v, run) + cmd = "/usr/bin/time -v out/bin/lilypond.%s %s >& %s" % (v, ' '.join(args), out) + print("running %s" % cmd) + status = os.system(cmd) + if status: + sys.stderr.write("failure: see %s\n" %out) + sys.exit(1) + + for l in open(out).readlines(): + m = re.search('User time \(seconds\): ([0-9.]+)', l) + if m: + results[v].append(float(m.group(1))) + break + + +result = "" + +result += "benchmark for arguments: %s\n" % ' '.join(args) + +result += "raw data: %s\n" % results + + +medians = [] +for (v, timings) in results.items(): + result +=("Version %s: %s\n" % (v, descriptions[v])) + + result += (" avg %f\n" % statistics.mean(timings)) + result += (" med %f\n" % statistics.median(timings)) + + medians.append(statistics.median(timings)) + if run_count > 1: + result += (" stddev %.3f\n" % statistics.stdev(timings)) + +result += "\nDELTA\n\n" +for (v, m) in zip(versions[1:], medians[1:]): + delta = m - medians[0] + result += ("%s - %s\n" %(v, descriptions[v])) + result += (" med diff %f\n" % delta) + qual = ((delta > 0) and "slower") or "faster" + result += (" med diff %f %% (%s is %s)\n" % (delta /medians[0] * 100.0, v, qual)) + + +open("%s/%s-%s-summary.txt" % (out_dir, command_id, '-'.join(versions[1:])), 'w').write(result) + +sys.stdout.write(result)