perf report: Add --max-stack option to limit callchain stack scan
When callgraph data was included in the perf data file, it may take a long time to scan all those data and merge them together especially if the stored callchains are long and the perf data file itself is large, like a Gbyte or so. The callchain stack is currently limited to PERF_MAX_STACK_DEPTH (127). This is a large value. Usually the callgraph data that developers are most interested in are the first few levels, the rests are usually not looked at. This patch adds a new --max-stack option to perf-report to limit the depth of callchain stack data to look at to reduce the time it takes for perf-report to finish its processing. It trades the presence of trailing stack information with faster speed. The following table shows the elapsed time of doing perf-report on a perf.data file of size 985,531,828 bytes. --max_stack Elapsed Time Output data size ----------- ------------ ---------------- not set 88.0s 124,422,651 64 87.5s 116,303,213 32 87.2s 112,023,804 16 86.6s 94,326,380 8 59.9s 33,697,248 4 40.7s 10,116,637 -g none 27.1s 2,555,810 Signed-off-by: Waiman Long <Waiman.Long@hp.com> Acked-by: David Ahern <dsahern@gmail.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Aswin Chandramouleeswaran <aswin@hp.com> Cc: David Ahern <dsahern@gmail.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Scott J Norton <scott.norton@hp.com> Cc: Stephane Eranian <eranian@google.com> Link: http://lkml.kernel.org/r/1382107129-2010-4-git-send-email-Waiman.Long@hp.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
cc9784bd9f
commit
91e9561742
6 changed files with 40 additions and 13 deletions
|
@ -141,6 +141,14 @@ OPTIONS
|
|||
|
||||
Default: fractal,0.5,callee,function.
|
||||
|
||||
--max-stack::
|
||||
Set the stack depth limit when parsing the callchain, anything
|
||||
beyond the specified depth will be ignored. This is a trade-off
|
||||
between information loss and faster processing especially for
|
||||
workloads that can have a very long callchain stack.
|
||||
|
||||
Default: 127
|
||||
|
||||
-G::
|
||||
--inverted::
|
||||
alias for inverted caller based call graph.
|
||||
|
|
|
@ -49,6 +49,7 @@ struct perf_report {
|
|||
bool show_threads;
|
||||
bool inverted_callchain;
|
||||
bool mem_mode;
|
||||
int max_stack;
|
||||
struct perf_read_values show_threads_values;
|
||||
const char *pretty_printing_style;
|
||||
const char *cpu_list;
|
||||
|
@ -90,7 +91,8 @@ static int perf_report__add_mem_hist_entry(struct perf_tool *tool,
|
|||
if ((sort__has_parent || symbol_conf.use_callchain) &&
|
||||
sample->callchain) {
|
||||
err = machine__resolve_callchain(machine, evsel, al->thread,
|
||||
sample, &parent, al);
|
||||
sample, &parent, al,
|
||||
rep->max_stack);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
@ -181,7 +183,8 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
|
|||
if ((sort__has_parent || symbol_conf.use_callchain)
|
||||
&& sample->callchain) {
|
||||
err = machine__resolve_callchain(machine, evsel, al->thread,
|
||||
sample, &parent, al);
|
||||
sample, &parent, al,
|
||||
rep->max_stack);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
@ -244,18 +247,21 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
|
|||
return err;
|
||||
}
|
||||
|
||||
static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
|
||||
static int perf_evsel__add_hist_entry(struct perf_tool *tool,
|
||||
struct perf_evsel *evsel,
|
||||
struct addr_location *al,
|
||||
struct perf_sample *sample,
|
||||
struct machine *machine)
|
||||
{
|
||||
struct perf_report *rep = container_of(tool, struct perf_report, tool);
|
||||
struct symbol *parent = NULL;
|
||||
int err = 0;
|
||||
struct hist_entry *he;
|
||||
|
||||
if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) {
|
||||
err = machine__resolve_callchain(machine, evsel, al->thread,
|
||||
sample, &parent, al);
|
||||
sample, &parent, al,
|
||||
rep->max_stack);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
@ -332,7 +338,8 @@ static int process_sample_event(struct perf_tool *tool,
|
|||
if (al.map != NULL)
|
||||
al.map->dso->hit = 1;
|
||||
|
||||
ret = perf_evsel__add_hist_entry(evsel, &al, sample, machine);
|
||||
ret = perf_evsel__add_hist_entry(tool, evsel, &al, sample,
|
||||
machine);
|
||||
if (ret < 0)
|
||||
pr_debug("problem incrementing symbol period, skipping event\n");
|
||||
}
|
||||
|
@ -772,6 +779,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
|
|||
.ordered_samples = true,
|
||||
.ordering_requires_timestamps = true,
|
||||
},
|
||||
.max_stack = PERF_MAX_STACK_DEPTH,
|
||||
.pretty_printing_style = "normal",
|
||||
};
|
||||
const struct option options[] = {
|
||||
|
@ -812,6 +820,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
|
|||
OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order",
|
||||
"Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). "
|
||||
"Default: fractal,0.5,callee,function", &parse_callchain_opt, callchain_default_opt),
|
||||
OPT_INTEGER(0, "max-stack", &report.max_stack,
|
||||
"Set the maximum stack depth when parsing the callchain, "
|
||||
"anything beyond the specified depth will be ignored. "
|
||||
"Default: " __stringify(PERF_MAX_STACK_DEPTH)),
|
||||
OPT_BOOLEAN('G', "inverted", &report.inverted_callchain,
|
||||
"alias for inverted call graph"),
|
||||
OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
|
||||
|
|
|
@ -770,7 +770,8 @@ static void perf_event__process_sample(struct perf_tool *tool,
|
|||
sample->callchain) {
|
||||
err = machine__resolve_callchain(machine, evsel,
|
||||
al.thread, sample,
|
||||
&parent, &al);
|
||||
&parent, &al,
|
||||
PERF_MAX_STACK_DEPTH);
|
||||
if (err)
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -1253,10 +1253,12 @@ static int machine__resolve_callchain_sample(struct machine *machine,
|
|||
struct thread *thread,
|
||||
struct ip_callchain *chain,
|
||||
struct symbol **parent,
|
||||
struct addr_location *root_al)
|
||||
struct addr_location *root_al,
|
||||
int max_stack)
|
||||
{
|
||||
u8 cpumode = PERF_RECORD_MISC_USER;
|
||||
unsigned int i;
|
||||
int chain_nr = min(max_stack, (int)chain->nr);
|
||||
int i;
|
||||
int err;
|
||||
|
||||
callchain_cursor_reset(&callchain_cursor);
|
||||
|
@ -1266,7 +1268,7 @@ static int machine__resolve_callchain_sample(struct machine *machine,
|
|||
return 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < chain->nr; i++) {
|
||||
for (i = 0; i < chain_nr; i++) {
|
||||
u64 ip;
|
||||
struct addr_location al;
|
||||
|
||||
|
@ -1338,12 +1340,14 @@ int machine__resolve_callchain(struct machine *machine,
|
|||
struct thread *thread,
|
||||
struct perf_sample *sample,
|
||||
struct symbol **parent,
|
||||
struct addr_location *root_al)
|
||||
struct addr_location *root_al,
|
||||
int max_stack)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = machine__resolve_callchain_sample(machine, thread,
|
||||
sample->callchain, parent, root_al);
|
||||
sample->callchain, parent,
|
||||
root_al, max_stack);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
|
|
@ -92,7 +92,8 @@ int machine__resolve_callchain(struct machine *machine,
|
|||
struct thread *thread,
|
||||
struct perf_sample *sample,
|
||||
struct symbol **parent,
|
||||
struct addr_location *root_al);
|
||||
struct addr_location *root_al,
|
||||
int max_stack);
|
||||
|
||||
/*
|
||||
* Default guest kernel is defined by parameter --guestkallsyms
|
||||
|
|
|
@ -1512,7 +1512,8 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event,
|
|||
if (symbol_conf.use_callchain && sample->callchain) {
|
||||
|
||||
if (machine__resolve_callchain(machine, evsel, al.thread,
|
||||
sample, NULL, NULL) != 0) {
|
||||
sample, NULL, NULL,
|
||||
PERF_MAX_STACK_DEPTH) != 0) {
|
||||
if (verbose)
|
||||
error("Failed to resolve callchain. Skipping\n");
|
||||
return;
|
||||
|
|
Loading…
Reference in a new issue