Adam Israel has proposed merging lp:~aisrael/charms/trusty/apache-hadoop-client/benchmarks into lp:~bigdata-dev/charms/trusty/apache-hadoop-client/trunk.
Requested reviews: Juju Big Data Development (bigdata-dev) For more details, see: https://code.launchpad.net/~aisrael/charms/trusty/apache-hadoop-client/benchmarks/+merge/260526 This merge proposal adds support for benchmarking, and implements a 'terasort' benchmark. This adds two external dependencies: python-pip (which may already be installed via other requirements) and charm-benchmark, which is installed via pip. -- Your team Juju Big Data Development is requested to review the proposed merge of lp:~aisrael/charms/trusty/apache-hadoop-client/benchmarks into lp:~bigdata-dev/charms/trusty/apache-hadoop-client/trunk.
=== added directory 'actions' === added file 'actions.yaml' --- actions.yaml 1970-01-01 00:00:00 +0000 +++ actions.yaml 2015-05-28 21:01:32 +0000 @@ -0,0 +1,38 @@ +teragen: + description: foo + params: + size: + description: The number of 100 byte rows, default to 100MB of data to generate and sort + type: string + default: "10000000" + indir: + description: foo + type: string + default: 'tera_demo_in' +terasort: + description: foo + params: + indir: + description: foo + type: string + default: 'tera_demo_in' + outdir: + description: foo + type: string + default: 'tera_demo_out' + size: + description: The number of 100 byte rows, default to 100MB of data to generate and sort + type: string + default: "10000000" + maps: + description: The default number of map tasks per job. 1-20 + type: integer + default: 1 + reduces: + description: The default number of reduce tasks per job. Typically set to 99% of the cluster's reduce capacity, so that if a node fails the reduces can still be executed in a single wave. Try 1-20 + type: integer + default: 1 + numtasks: + description: How many tasks to run per jvm. If set to -1, there is no limit. + type: integer + default: 1 === added file 'actions/parseTerasort.py' --- actions/parseTerasort.py 1970-01-01 00:00:00 +0000 +++ actions/parseTerasort.py 2015-05-28 21:01:32 +0000 @@ -0,0 +1,54 @@ +#!/usr/bin/env python +""" +Simple script to parse cassandra-stress' transaction results +and reformat them as JSON for sending back to juju +""" +import sys +import subprocess +import json +from charmhelpers.contrib.benchmark import Benchmark +import re + + +def action_set(key, val): + action_cmd = ['action-set'] + if isinstance(val, dict): + for k, v in val.iteritems(): + action_set('%s.%s' % (key, k), v) + return + + action_cmd.append('%s=%s' % (key, val)) + subprocess.check_call(action_cmd) + + +def parse_terasort_output(): + """ + Parse the output from terasort and set the action results: + + """ + + results = {} + + # Find all of the interesting things + regex = re.compile('\t+(.*)=(.*)') + for line in sys.stdin.readlines(): + m = regex.match(line) + if m: + results[m.group(1)] = m.group(2) + action_set("results.raw", json.dumps(results)) + + # Calculate what's important + if 'CPU time spent (ms)' in results: + composite = int(results['CPU time spent (ms)']) + int(results['GC time elapsed (ms)']) + Benchmark.set_composite_score( + composite, + 'ms', + 'asc' + ) + else: + print "Invalid test results" + print results + + +if __name__ == "__main__": + parse_terasort_output() === added file 'actions/teragen' --- actions/teragen 1970-01-01 00:00:00 +0000 +++ actions/teragen 2015-05-28 21:01:32 +0000 @@ -0,0 +1,21 @@ +#!/bin/bash +set -eux +SIZE=`action-get size` +IN_DIR=`action-get indir` + +benchmark-start + +# I don't know why, but have to source /etc/environment before and after +# invoking the bash shell to get it working. +. /etc/environment +su ubuntu << EOF +. /etc/environment +if JAVA_HOME=${JAVA_HOME} hadoop fs -stat ${IN_DIR}; then + JAVA_HOME=${JAVA_HOME} hadoop fs -rm -r -skipTrash ${IN_DIR} || true +fi + +JAVA_HOME=${JAVA_HOME} hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples*.jar teragen ${SIZE} ${IN_DIR} + +EOF + +benchmark-finish === added file 'actions/terasort' --- actions/terasort 1970-01-01 00:00:00 +0000 +++ actions/terasort 2015-05-28 21:01:32 +0000 @@ -0,0 +1,49 @@ +#!/bin/bash +IN_DIR=`action-get indir` +OUT_DIR=`action-get outdir` +SIZE=`action-get size` +OPTIONS='' + +MAPS=`action-get maps` +REDUCES=`action-get reduces` +NUMTASKS=`action-get numtasks` + +OPTIONS="${OPTIONS} -D mapreduce.job.maps=${MAPS}" +OPTIONS="${OPTIONS} -D mapreduce.job.reduces=${REDUCES}" +OPTIONS="${OPTIONS} -D mapreduce.job.jvm.numtasks=${NUMTASKS}" + +mkdir -p /opt/terasort +chown ubuntu:ubuntu /opt/terasort +run=`date +%s` + +# HACK: the environment reset below is munging the PATH +OLDPATH=$PATH + + +# I don't know why, but have to source /etc/environment before and after +# invoking the bash shell to get it working. +. /etc/environment +su ubuntu << EOF +. /etc/environment + +mkdir -p /opt/terasort/results/$run + +# If there's no data generated yet, create it using the action defaults +if ! JAVA_HOME=${JAVA_HOME} hadoop fs -stat ${IN_DIR} &> /dev/null; then + JAVA_HOME=${JAVA_HOME} hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples*.jar teragen ${SIZE} ${IN_DIR} > /dev/null + +fi + +# If there's already sorted data, remove it +if JAVA_HOME=${JAVA_HOME} hadoop fs -stat ${OUT_DIR} &> /dev/null; then + JAVA_HOME=${JAVA_HOME} hadoop fs -rm -r -skipTrash ${OUT_DIR} || true +fi + +benchmark-start +JAVA_HOME=${JAVA_HOME} hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples*.jar terasort ${OPTIONS} ${IN_DIR} ${OUT_DIR} &> /opt/terasort/results/$run/terasort.log +benchmark-finish + +EOF +PATH=$OLDPATH + +`cat /opt/terasort/results/$run/terasort.log | python $CHARM_DIR/actions/parseTerasort.py` === added file 'hooks/benchmark-relation-changed' --- hooks/benchmark-relation-changed 1970-01-01 00:00:00 +0000 +++ hooks/benchmark-relation-changed 2015-05-28 21:01:32 +0000 @@ -0,0 +1,3 @@ +#!/bin/bash + +relation-set benchmarks=terasort === modified file 'hooks/install' --- hooks/install 2015-05-11 22:25:12 +0000 +++ hooks/install 2015-05-28 21:01:32 +0000 @@ -1,2 +1,4 @@ #!/bin/bash +apt-get install -y python-pip && pip install -U charm-benchmark + hooks/status-set blocked "Please add relation to apache-hadoop-plugin" === added symlink 'hooks/upgrade-charm' === target is u'install' === modified file 'metadata.yaml' --- metadata.yaml 2015-05-12 22:18:09 +0000 +++ metadata.yaml 2015-05-28 21:01:32 +0000 @@ -12,3 +12,5 @@ hadoop-plugin: interface: hadoop-plugin scope: container + benchmark: + interface: benchmark
-- Mailing list: https://launchpad.net/~bigdata-dev Post to : bigdata-dev@lists.launchpad.net Unsubscribe : https://launchpad.net/~bigdata-dev More help : https://help.launchpad.net/ListHelp