Andi Kleen <a...@firstfloor.org> writes: Ping for the patch series!
> From: Andi Kleen <a...@linux.intel.com> > > Using autofdo is currently something difficult. It requires using the > model specific branches taken event, which differs on different CPUs. > The example shown in the manual requires a special patched version of > perf that is non standard, and also will likely not work everywhere. > > This patch adds a new gcc-auto-profile script that figures out the > correct event and runs perf. The script is installed with on Linux systems. > > Since maintaining the script would be somewhat tedious (needs changes > every time a new CPU comes out) I auto generated it from the online > Intel event database. The script to do that is in contrib and can be > rerun. > > Right now there is no test if perf works in configure. This > would vary depending on the build and target system, and since > it currently doesn't work in virtualization and needs uptodate > kernel it may often fail in common distribution build setups. > > So Linux just hardcodes installing the script, but it may fail at runtime. > > This is needed to actually make use of autofdo in a generic way > in the build system and in the test suite. > > So far the script is not installed. > > gcc/: > 2016-03-27 Andi Kleen <a...@linux.intel.com> > > * doc/invoke.texi: Document gcc-auto-profile > * gcc-auto-profile: Create. > > contrib/: > > 2016-03-27 Andi Kleen <a...@linux.intel.com> > > * gen_autofdo_event.py: New file to regenerate > gcc-auto-profile. > --- > contrib/gen_autofdo_event.py | 155 > +++++++++++++++++++++++++++++++++++++++++++ > gcc/doc/invoke.texi | 31 +++++++-- > gcc/gcc-auto-profile | 70 +++++++++++++++++++ > 3 files changed, 251 insertions(+), 5 deletions(-) > create mode 100755 contrib/gen_autofdo_event.py > create mode 100755 gcc/gcc-auto-profile > > diff --git a/contrib/gen_autofdo_event.py b/contrib/gen_autofdo_event.py > new file mode 100755 > index 0000000..db4db33 > --- /dev/null > +++ b/contrib/gen_autofdo_event.py > @@ -0,0 +1,155 @@ > +#!/usr/bin/python > +# generate Intel taken branches Linux perf event script for autofdo profiling > + > +# Copyright (C) 2016 Free Software Foundation, Inc. > +# > +# GCC is free software; you can redistribute it and/or modify it under > +# the terms of the GNU General Public License as published by the Free > +# Software Foundation; either version 3, or (at your option) any later > +# version. > +# > +# GCC is distributed in the hope that it will be useful, but WITHOUT ANY > +# WARRANTY; without even the implied warranty of MERCHANTABILITY or > +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License > +# for more details. > +# > +# You should have received a copy of the GNU General Public License > +# along with GCC; see the file COPYING3. If not see > +# <http://www.gnu.org/licenses/>. */ > + > +# run it with perf record -b -e EVENT program ... > +# The Linux Kernel needs to support the PMU of the current CPU, and > +# it will likely not work in VMs. > +# add --all to print for all cpus, otherwise for current cpu > +# add --script to generate shell script to run correct event > +# > +# requires internet (https) access. this may require setting up a proxy > +# with export https_proxy=... > +# > +import urllib2 > +import sys > +import json > +import argparse > +import collections > + > +baseurl = "https://download.01.org/perfmon" > + > +target_events = (u'BR_INST_RETIRED.NEAR_TAKEN', > + u'BR_INST_EXEC.TAKEN', > + u'BR_INST_RETIRED.TAKEN_JCC', > + u'BR_INST_TYPE_RETIRED.COND_TAKEN') > + > +ap = argparse.ArgumentParser() > +ap.add_argument('--all', '-a', help='Print for all CPUs', > action='store_true') > +ap.add_argument('--script', help='Generate shell script', > action='store_true') > +args = ap.parse_args() > + > +eventmap = collections.defaultdict(list) > + > +def get_cpu_str(): > + with open('/proc/cpuinfo', 'r') as c: > + vendor, fam, model = None, None, None > + for j in c: > + n = j.split() > + if n[0] == 'vendor_id': > + vendor = n[2] > + elif n[0] == 'model' and n[1] == ':': > + model = int(n[2]) > + elif n[0] == 'cpu' and n[1] == 'family': > + fam = int(n[3]) > + if vendor and fam and model: > + return "%s-%d-%X" % (vendor, fam, model), model > + return None, None > + > +def find_event(eventurl, model): > + print >>sys.stderr, "Downloading", eventurl > + u = urllib2.urlopen(eventurl) > + events = json.loads(u.read()) > + u.close() > + > + found = 0 > + for j in events: > + if j[u'EventName'] in target_events: > + event = "cpu/event=%s,umask=%s/" % (j[u'EventCode'], j[u'UMask']) > + if u'PEBS' in j and j[u'PEBS'] > 0: > + event += "p" > + if args.script: > + eventmap[event].append(model) > + else: > + print j[u'EventName'], "event for model", model, "is", event > + found += 1 > + return found > + > +if not args.all: > + cpu, model = get_cpu_str() > + if not cpu: > + sys.exit("Unknown CPU type") > + > +url = baseurl + "/mapfile.csv" > +print >>sys.stderr, "Downloading", url > +u = urllib2.urlopen(url) > +found = 0 > +cpufound = 0 > +for j in u: > + n = j.rstrip().split(',') > + if len(n) >= 4 and (args.all or n[0] == cpu) and n[3] == "core": > + if args.all: > + vendor, fam, model = n[0].split("-") > + model = int(model, 16) > + cpufound += 1 > + found += find_event(baseurl + n[2], model) > +u.close() > + > +if args.script: > + print '''#!/bin/sh > +# profile workload for gcc profile feedback (autofdo) using Linux perf > +# auto generated. to regenerate for new CPUs run > +# contrib/gen_autofdo_event.py --shell --all in gcc source > + > +# usages: > +# gcc-auto-profile program (profile program and children) > +# gcc-auto-profile -a sleep X (profile all for X secs, may need > root) > +# gcc-auto-profile -p PID sleep X (profile PID) > +# gcc-auto-profile --kernel -a sleep X (profile kernel) > +# gcc-auto-profile --all -a sleep X (profile kernel and user space) > + > +# identify branches taken event for CPU > +# > + > +FLAGS=u > + > +if [ "$1" = "--kernel" ] ; then > + FLAGS=k > + shift > +fi > +if [ "$1" == "--all" ] ; then > + FLAGS=uk > + shift > +fi > + > +if ! grep -q Intel /proc/cpuinfo ] ; then > + echo >&2 "Only Intel CPUs supported" > + exit 1 > +fi > + > +if grep -q hypervisor /proc/cpuinfo ; then > + echo >&2 "Warning: branch profiling may not be functional in VMs" > +fi > + > +case `egrep -q "^cpu family\s*: 6" /proc/cpuinfo && > + egrep "^model\s*:" /proc/cpuinfo | head -1` in''' > + for event, mod in eventmap.iteritems(): > + for m in mod[:-1]: > + print "model*:\ %s|\\" % m > + print 'model*:\ %s) E="%s$FLAGS" ;;' % (mod[-1], event) > + print '''*) > +echo >&2 "Unknown CPU. Run contrib/gen_autofdo_event.py --all --script to > update script." > + exit 1 ;;''' > + print "esac" > + print 'exec perf record -e $E -b "$@"' > + > +if cpufound == 0 and not args.all: > + sys.exit('CPU %s not found' % cpu) > + > +if found == 0: > + sys.exit('Branch event not found') > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi > index 9e54bb7..427d89a 100644 > --- a/gcc/doc/invoke.texi > +++ b/gcc/doc/invoke.texi > @@ -8249,13 +8249,34 @@ which are generally profitable only with profile > feedback available: > If omitted, it defaults to @file{fbdata.afdo} in the current directory. > > Producing an AutoFDO profile data file requires running your program > -with the @command{perf} utility on a supported GNU/Linux target system. > +with the @command{gcc-auto-profile} utility on a supported GNU/Linux target > system. @command{gcc-auto-profile} calls the @command{perf} utility. > +It also requires Last-Branch-Record support, which typically requires > +a new enough kernel not running virtualized. > +@command{gcc-auto-profile} accepts the same arguments as @command{perf > record}. > For more information, see @uref{https://perf.wiki.kernel.org/}. > > -E.g. > @smallexample > -perf record -e br_inst_retired:near_taken -b -o perf.data \ > - -- your_program > +gcc-auto-profile your_program > +@end smallexample > + > +On larger programs the resulting perf.data file may be very large. > +In this case it can be better to reduce the sampling rate. > +Collect samples every million taken branches: > + > +@smallexample > +gcc-auto-profile -c 1000000 program > +@end smallexample > + > +Or only profile representative run intervals of the program: > + > +@smallexample > +gcc-auto-profile -p PID-OF-PROGRAM sleep 5 > +@end smallexample > + > +Profile complete system for 10 seconds (may require root) > + > +@smallexample > +gcc-auto-profile -a sleep 10 > @end smallexample > > Then use the @command{create_gcov} tool to convert the raw profile data > @@ -8266,7 +8287,7 @@ See @uref{https://github.com/google/autofdo}. > E.g. > @smallexample > create_gcov --binary=your_program.unstripped --profile=perf.data \ > - --gcov=profile.afdo > + --gcov=profile.afdo -gcov_version 1 > @end smallexample > @end table > > diff --git a/gcc/gcc-auto-profile b/gcc/gcc-auto-profile > new file mode 100755 > index 0000000..c6712b2 > --- /dev/null > +++ b/gcc/gcc-auto-profile > @@ -0,0 +1,70 @@ > +#!/bin/sh > +# profile workload for gcc profile feedback (autofdo) using Linux perf > +# auto generated. to regenerate for new CPUs run > +# contrib/gen_autofdo_event.py --shell --all in gcc source > + > +# usages: > +# gcc-auto-profile program (profile program and children) > +# gcc-auto-profile -a sleep X (profile all for X secs, may need > root) > +# gcc-auto-profile -p PID sleep X (profile PID) > +# gcc-auto-profile --kernel -a sleep X (profile kernel) > +# gcc-auto-profile --all -a sleep X (profile kernel and user space) > + > +# identify branches taken event for CPU > +# > + > +FLAGS=u > + > +if [ "$1" = "--kernel" ] ; then > + FLAGS=k > + shift > +fi > +if [ "$1" == "--all" ] ; then > + FLAGS=uk > + shift > +fi > + > +if ! grep -q Intel /proc/cpuinfo ] ; then > + echo >&2 "Only Intel CPUs supported" > + exit 1 > +fi > + > +if grep -q hypervisor /proc/cpuinfo ; then > + echo >&2 "Warning: branch profiling may not be functional in VMs" > +fi > + > +case `egrep -q "^cpu family\s*: 6" /proc/cpuinfo && > + egrep "^model\s*:" /proc/cpuinfo | head -1` in > +model*:\ 55|\ > +model*:\ 77|\ > +model*:\ 76) E="cpu/event=0xC4,umask=0xFE/p$FLAGS" ;; > +model*:\ 42|\ > +model*:\ 45|\ > +model*:\ 58|\ > +model*:\ 62|\ > +model*:\ 60|\ > +model*:\ 69|\ > +model*:\ 70|\ > +model*:\ 63|\ > +model*:\ 61|\ > +model*:\ 71|\ > +model*:\ 86|\ > +model*:\ 78|\ > +model*:\ 94) E="cpu/event=0xC4,umask=0x20/p$FLAGS" ;; > +model*:\ 46|\ > +model*:\ 30|\ > +model*:\ 31|\ > +model*:\ 26|\ > +model*:\ 47|\ > +model*:\ 37|\ > +model*:\ 44) E="cpu/event=0x88,umask=0x40/p$FLAGS" ;; > +model*:\ 28|\ > +model*:\ 38|\ > +model*:\ 39|\ > +model*:\ 54|\ > +model*:\ 53) E="cpu/event=0x88,umask=0x41/p$FLAGS" ;; > +*) > +echo >&2 "Unknown CPU. Run contrib/gen_autofdo_event.py --all --script to > update script." > + exit 1 ;; > +esac > +exec perf record -e $E -b "$@" -- a...@linux.intel.com -- Speaking for myself only