On Tue, Aug 04, 2015 at 04:30:21AM -0400, Kan Liang wrote:
> From: Kan Liang <kan.li...@intel.com>
> 
> When multiple events are sampled it may not be needed to collect
> callgraphs for all of them. The sample sites are usually nearby, and
> it's enough to collect the callgraphs on a reference event (such as
> precise cycles or precise instructions).
> This patchkit adds the ability to turn off callgraphs and time stamp
> per event. This in term can reduce sampling overhead and the size of the
> perf.data. Furthermore, it makes collecting back traces and timestamps
> possible when PEBS threshold > 1, which significantly reducing the
> sampling overhead especially for frequently occurring events
> (https://lkml.org/lkml/2015/5/10/196). For example, A slower event with
> a larger period collects back traces/timestamps. Other more events run
> fast with multi-pebs. The time stamps from the slower events can be used
> to order the faster events. Their backtraces can give the user enough
> hint to find the right spot.
> 
> Here are some examples and test results.
> 
> 1. Comparing the elapsed time and perf.data size from "kernbench -M -H".
> 
>  The test command for FULL callgraph and time support.
>    "perf record -e
>    '{cpu/cpu-cycles,period=100000/,cpu/instructions,period=20000/p}'
>    --call-graph fp --time"
> 
>  The test command for PARTIAL callgraph and time support.
>    "perf record -e
>    '{cpu/cpu-cycles,call-graph=fp,time,period=100000/,
>      cpu/instructions,call-graph=no,time=0,period=20000/p}'"
> 
>  The elapsed time for FULL is 24.3 Sec, while for PARTIAL is 16.9 Sec.
>  The perf.data size for FULL is 22.1 Gb, while for PARTIAL is 12.4 Gb.
> 
> 2. Comparing the perf.data size and callgraph results.
> 
>  The test command for FULL callgraph and time support.
>    "perf record -e
>    '{cpu/cpu-cycles,period=100000/pp,cpu/instructions,period=20000/p}'
>    --call-graph fp -- ./tchain_edit"
> 
>  The test command for PARTIAL callgraph and time support.
>    "perf record -e
>    '{cpu/cpu-cycles,call-graph=fp,time,period=100000/pp,
>      cpu/instructions,call-graph=no,time=0,period=20000/p}'
>    -- ./tchain_edit"
> 
>  The perf.data size for FULL is 43.2 MB, while for PARTIAL is 21.1 MB.
>  The callgraph is roughly the same.
> 
>  The callgraph from FULL
>  # Samples: 87K of event
>  'cpu/cpu-cycles,call-graph=fp,time,period=100000/pp'
>  # Event count (approx.): 8760000000
>  #
>  # Children      Self  Command      Shared Object       Symbol
>  # ........  ........  ...........  ..................
> ..........................................
>  #
>     99.98%     0.00%  tchain_edit  libc-2.15.so        [.]
> __libc_start_main
>             |
>             ---__libc_start_main
> 
>     99.97%     0.00%  tchain_edit  tchain_edit         [.] main
>             |
>             ---main
>                __libc_start_main
> 
>     99.97%     0.00%  tchain_edit  tchain_edit         [.] f1
>             |
>             ---f1
>                main
>                __libc_start_main
> 
>     99.85%    87.01%  tchain_edit  tchain_edit         [.] f3
>             |
>             ---f3
>                |
>                |--99.74%-- f2
>                |          f1
>                |          main
>                |          __libc_start_main
>                 --0.26%-- [...]
>     99.71%     0.12%  tchain_edit  tchain_edit         [.] f2
>             |
>             ---f2
>                f1
>                main
>                __libc_start_main
> 
>  The callgraph from PARTIAL
>  # Samples: 417K of event
>  'cpu/instructions,call-graph=no,time=0,period=20000/p'
>  # Event count (approx.): 8346980000
>  #
>  # Children      Self  Command      Shared Object     Symbol
>  # ........  ........  ...........  ................
> ..........................................
>  #
>     98.82%     0.00%  tchain_edit  libc-2.15.so      [.]
> __libc_start_main
>             |
>             ---__libc_start_main
> 
>     98.82%     0.00%  tchain_edit  tchain_edit       [.] main
>             |
>             ---main
>                __libc_start_main
> 
>     98.82%     0.00%  tchain_edit  tchain_edit       [.] f1
>             |
>             ---f1
>                main
>                __libc_start_main
> 
>     98.82%    98.28%  tchain_edit  tchain_edit       [.] f3
>             |
>             ---f3
>                |
>                |--0.53%-- f2
>                |          f1
>                |          main
>                |          __libc_start_main
>                |
>                |--0.01%-- f1
>                |          main
>                |          __libc_start_main
>                 --99.46%-- [...]
>     97.63%     0.03%  tchain_edit  tchain_edit       [.] f2
>             |
>             ---f2
>                f1
>                main
>                __libc_start_main
> 
>      7.13%     0.03%  tchain_edit  [kernel.vmlinux]  [k] do_nmi
>             |
>             ---do_nmi
>                end_repeat_nmi
>                f3
>                f2
>                f1
>                main
>                __libc_start_main
> 
> Signed-off-by: Kan Liang <kan.li...@intel.com>
> ---
>  tools/perf/Documentation/perf-record.txt |  4 ++
>  tools/perf/util/evsel.c                  | 65 
> +++++++++++++++++++++++++++++++-
>  tools/perf/util/evsel.h                  |  4 ++
>  tools/perf/util/parse-events.c           | 12 ++++++
>  tools/perf/util/parse-events.h           |  2 +
>  tools/perf/util/parse-events.l           |  2 +
>  tools/perf/util/pmu.c                    |  3 +-
>  7 files changed, 89 insertions(+), 3 deletions(-)
> 
> diff --git a/tools/perf/Documentation/perf-record.txt 
> b/tools/perf/Documentation/perf-record.txt
> index 0d852d1..e633711 100644
> --- a/tools/perf/Documentation/perf-record.txt
> +++ b/tools/perf/Documentation/perf-record.txt
> @@ -52,6 +52,10 @@ OPTIONS
>         - 'time': Disable/enable time stamping. Acceptable values are 1 for
>                   enabling time stamping. 0 for disabling time stamping.
>                   The default is 1.
> +       - 'call-graph': Disable/enable callgraph. Acceptable str are "fp" for
> +                      FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode and
> +                      "no" for disable callgraph.
> +       - 'stack-size': user stack size for dwarf mode
>         Note: If user explicitly sets options which conflict with the params,
>         the value set by the params will be overridden.
>  
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index f572f46..106cd20 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -588,11 +588,36 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel,
>       }
>  }
>  
> -static void apply_config_terms(struct perf_evsel *evsel)
> +static void
> +perf_evsel__reset_callgraph(struct perf_evsel *evsel,
> +                         struct callchain_param *param)
> +{
> +     struct perf_event_attr *attr = &evsel->attr;
> +
> +     perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
> +     if (param->record_mode == CALLCHAIN_LBR) {
> +             perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
> +             attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER |
> +                                           PERF_SAMPLE_BRANCH_CALL_STACK);
> +     }
> +     if (param->record_mode == CALLCHAIN_DWARF) {
> +             perf_evsel__reset_sample_bit(evsel, REGS_USER);
> +             perf_evsel__reset_sample_bit(evsel, STACK_USER);
> +     }
> +}
> +
> +static void apply_config_terms(struct perf_evsel *evsel,
> +                            struct record_opts *opts)
>  {
>       struct perf_evsel_config_term *term;
>       struct list_head *config_terms = &evsel->config_terms;
>       struct perf_event_attr *attr = &evsel->attr;
> +     struct callchain_param param;
> +     u32 dump_size = 0;
> +     char *callgraph_buf = NULL;
> +
> +     /* callgraph default */
> +     param.record_mode = callchain_param.record_mode;
>  
>       list_for_each_entry(term, config_terms, list) {
>               switch (term->type) {
> @@ -605,10 +630,46 @@ static void apply_config_terms(struct perf_evsel *evsel)
>                       else
>                               perf_evsel__reset_sample_bit(evsel, TIME);
>                       break;
> +             case PERF_EVSEL__CONFIG_TERM_CALLGRAPH:
> +                     callgraph_buf = term->val.callgraph;
> +                     break;
> +             case PERF_EVSEL__CONFIG_TERM_STACK_USER:
> +                     dump_size = term->val.stack_user;
> +                     break;
>               default:
>                       break;
>               }
>       }
> +
> +     /* User explicitly set per-event callgraph, clear the old setting and 
> reset. */
> +     if ((callgraph_buf != NULL) || (dump_size > 0)) {
> +
> +             /* parse callgraph parameters */
> +             if (callgraph_buf != NULL) {
> +                     if (!strcmp(callgraph_buf, "no")) {
> +                             param.enabled = false;
> +                             param.record_mode = CALLCHAIN_NONE;
> +                     } else {
> +                             param.enabled = true;
> +                             if (parse_callchain_record_opt(callgraph_buf, 
> &param)) {
> +                                     pr_err("per-event callgraph setting for 
> %s failed. "
> +                                            "Apply callgraph global setting 
> for it\n",
> +                                            evsel->name);
> +                                     return;

hum, calling parse_callchain_record_opt from evsel hurts the python code:

17: Try 'import perf' in python, checking link problems      :
--- start ---
test child forked, pid 25751
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
ImportError: python/perf.so: undefined symbol: parse_callchain_record_opt
test child finished with -1
---- end ----
Try 'import perf' in python, checking link problems: FAILED!


not sure if we can call it from some place else (I guess not),
then we'd need to either put the util/callchain.c under python
objects, or somehow refine needed parsing code..

jirka

> +                             }
> +                     }
> +             }
> +             if (dump_size > 0)
> +                     param.dump_size = dump_size;
> +
> +             /* If global callgraph set, clear it */
> +             if (callchain_param.enabled)
> +                     perf_evsel__reset_callgraph(evsel, &callchain_param);
> +
> +             /* set perf-event callgraph */
> +             if (param.enabled)
> +                     perf_evsel__config_callgraph(evsel, opts, &param);
> +     }
>  }
>  
>  /*
> @@ -807,7 +868,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct 
> record_opts *opts)
>        * Apply event specific term settings,
>        * it overloads any global configuration.
>        */
> -     apply_config_terms(evsel);
> +     apply_config_terms(evsel, opts);
>  }
>  
>  static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int 
> nthreads)
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index 6a12908..09a3022 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -40,6 +40,8 @@ struct cgroup_sel;
>  enum {
>       PERF_EVSEL__CONFIG_TERM_PERIOD,
>       PERF_EVSEL__CONFIG_TERM_TIME,
> +     PERF_EVSEL__CONFIG_TERM_CALLGRAPH,
> +     PERF_EVSEL__CONFIG_TERM_STACK_USER,
>       PERF_EVSEL__CONFIG_TERM_MAX,
>  };
>  
> @@ -49,6 +51,8 @@ struct perf_evsel_config_term {
>       union {
>               u64     period;
>               bool    time;
> +             char    *callgraph;
> +             u64     stack_user;
>       } val;
>  };
>  
> diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
> index a6cb9af..716f1fc 100644
> --- a/tools/perf/util/parse-events.c
> +++ b/tools/perf/util/parse-events.c
> @@ -611,6 +611,12 @@ do {                                                     
>                    \
>                       return -EINVAL;
>               }
>               break;
> +     case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
> +             CHECK_TYPE_VAL(STR);
> +             break;
> +     case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
> +             CHECK_TYPE_VAL(NUM);
> +             break;
>       case PARSE_EVENTS__TERM_TYPE_NAME:
>               CHECK_TYPE_VAL(STR);
>               break;
> @@ -662,6 +668,12 @@ do {                                                     
>         \
>               case PARSE_EVENTS__TERM_TYPE_TIME:
>                       ADD_CONFIG_TERM(TIME, time, term->val.num);
>                       break;
> +             case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
> +                     ADD_CONFIG_TERM(CALLGRAPH, callgraph, term->val.str);
> +                     break;
> +             case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
> +                     ADD_CONFIG_TERM(STACK_USER, stack_user, term->val.num);
> +                     break;
>               default:
>                       break;
>               }
> diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
> index e6f9aacc..87dc9f6 100644
> --- a/tools/perf/util/parse-events.h
> +++ b/tools/perf/util/parse-events.h
> @@ -64,6 +64,8 @@ enum {
>       PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD,
>       PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE,
>       PARSE_EVENTS__TERM_TYPE_TIME,
> +     PARSE_EVENTS__TERM_TYPE_CALLGRAPH,
> +     PARSE_EVENTS__TERM_TYPE_STACKSIZE,
>  };
>  
>  struct parse_events_term {
> diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
> index f542750..1665497 100644
> --- a/tools/perf/util/parse-events.l
> +++ b/tools/perf/util/parse-events.l
> @@ -184,6 +184,8 @@ name                      { return term(yyscanner, 
> PARSE_EVENTS__TERM_TYPE_NAME); }
>  period                       { return term(yyscanner, 
> PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); }
>  branch_type          { return term(yyscanner, 
> PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); }
>  time                 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); 
> }
> +call-graph           { return term(yyscanner, 
> PARSE_EVENTS__TERM_TYPE_CALLGRAPH); }
> +stack-size           { return term(yyscanner, 
> PARSE_EVENTS__TERM_TYPE_STACKSIZE); }
>  ,                    { return ','; }
>  "/"                  { BEGIN(INITIAL); return '/'; }
>  {name_minus}         { return str(yyscanner, PE_NAME); }
> diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
> index b615cdf..948f896 100644
> --- a/tools/perf/util/pmu.c
> +++ b/tools/perf/util/pmu.c
> @@ -607,7 +607,8 @@ static char *formats_error_string(struct list_head 
> *formats)
>  {
>       struct perf_pmu_format *format;
>       char *err, *str;
> -     static const char *static_terms = 
> "config,config1,config2,name,period,branch_type,time\n";
> +     static const char *static_terms = "config,config1,config2,name,period,"
> +                                       
> "branch_type,time,call-graph,stack-size\n";
>       unsigned i = 0;
>  
>       if (!asprintf(&str, "valid terms:"))
> -- 
> 1.8.3.1
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to