perf tools: Enable LBR call stack support
Currently, there are two call chain recording options, fp and dwarf. Haswell has a new feature that utilizes the existing LBR facility to record call chains. Kernel side LBR support code provides this as a third option to record call chains. This patch enables the lbr call stack support on the tooling side. LBR call stack has some limitations: - It reuses current LBR facility, so LBR call stack and branch record can not be enabled at the same time. - It is only available for user-space callchains. However, it also offers some advantages: - LBR call stack can work on user apps which don't have frame-pointers or dwarf debug info compiled. It is a good alternative when nothing else works. Tested-by: Jiri Olsa <jolsa@kernel.org> Signed-off-by: Kan Liang <kan.liang@intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Cody P Schafer <cody@linux.vnet.ibm.com> Cc: David Ahern <dsahern@gmail.com> Cc: Don Zickus <dzickus@redhat.com> Cc: Jacob Shin <jacob.w.shin@gmail.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Masanari Iida <standby24x7@gmail.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Rodrigo Campos <rodrigo@sdfg.com.ar> Cc: Stephane Eranian <eranian@google.com> Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com> Link: http://lkml.kernel.org/r/1420482185-29830-2-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
2c44b1936b
commit
aad2b21c15
6 changed files with 40 additions and 6 deletions
|
@ -115,13 +115,19 @@ OPTIONS
|
||||||
implies -g.
|
implies -g.
|
||||||
|
|
||||||
Allows specifying "fp" (frame pointer) or "dwarf"
|
Allows specifying "fp" (frame pointer) or "dwarf"
|
||||||
(DWARF's CFI - Call Frame Information) as the method to collect
|
(DWARF's CFI - Call Frame Information) or "lbr"
|
||||||
|
(Hardware Last Branch Record facility) as the method to collect
|
||||||
the information used to show the call graphs.
|
the information used to show the call graphs.
|
||||||
|
|
||||||
In some systems, where binaries are build with gcc
|
In some systems, where binaries are build with gcc
|
||||||
--fomit-frame-pointer, using the "fp" method will produce bogus
|
--fomit-frame-pointer, using the "fp" method will produce bogus
|
||||||
call graphs, using "dwarf", if available (perf tools linked to
|
call graphs, using "dwarf", if available (perf tools linked to
|
||||||
the libunwind library) should be used instead.
|
the libunwind library) should be used instead.
|
||||||
|
Using the "lbr" method doesn't require any compiler options. It
|
||||||
|
will produce call graphs from the hardware LBR registers. The
|
||||||
|
main limition is that it is only available on new Intel
|
||||||
|
platforms, such as Haswell. It can only get user call chain. It
|
||||||
|
doesn't work with branch stack sampling at the same time.
|
||||||
|
|
||||||
-q::
|
-q::
|
||||||
--quiet::
|
--quiet::
|
||||||
|
|
|
@ -658,7 +658,7 @@ parse_branch_stack(const struct option *opt, const char *str, int unset)
|
||||||
|
|
||||||
static void callchain_debug(void)
|
static void callchain_debug(void)
|
||||||
{
|
{
|
||||||
static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF" };
|
static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
|
||||||
|
|
||||||
pr_debug("callchain: type %s\n", str[callchain_param.record_mode]);
|
pr_debug("callchain: type %s\n", str[callchain_param.record_mode]);
|
||||||
|
|
||||||
|
@ -751,9 +751,9 @@ static struct record record = {
|
||||||
#define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
|
#define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
|
||||||
|
|
||||||
#ifdef HAVE_DWARF_UNWIND_SUPPORT
|
#ifdef HAVE_DWARF_UNWIND_SUPPORT
|
||||||
const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf";
|
const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf lbr";
|
||||||
#else
|
#else
|
||||||
const char record_callchain_help[] = CALLCHAIN_HELP "fp";
|
const char record_callchain_help[] = CALLCHAIN_HELP "fp lbr";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -249,6 +249,8 @@ static int report__setup_sample_type(struct report *rep)
|
||||||
if ((sample_type & PERF_SAMPLE_REGS_USER) &&
|
if ((sample_type & PERF_SAMPLE_REGS_USER) &&
|
||||||
(sample_type & PERF_SAMPLE_STACK_USER))
|
(sample_type & PERF_SAMPLE_STACK_USER))
|
||||||
callchain_param.record_mode = CALLCHAIN_DWARF;
|
callchain_param.record_mode = CALLCHAIN_DWARF;
|
||||||
|
else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
|
||||||
|
callchain_param.record_mode = CALLCHAIN_LBR;
|
||||||
else
|
else
|
||||||
callchain_param.record_mode = CALLCHAIN_FP;
|
callchain_param.record_mode = CALLCHAIN_FP;
|
||||||
}
|
}
|
||||||
|
|
|
@ -97,6 +97,14 @@ int parse_callchain_record_opt(const char *arg)
|
||||||
callchain_param.dump_size = size;
|
callchain_param.dump_size = size;
|
||||||
}
|
}
|
||||||
#endif /* HAVE_DWARF_UNWIND_SUPPORT */
|
#endif /* HAVE_DWARF_UNWIND_SUPPORT */
|
||||||
|
} else if (!strncmp(name, "lbr", sizeof("lbr"))) {
|
||||||
|
if (!strtok_r(NULL, ",", &saveptr)) {
|
||||||
|
callchain_param.record_mode = CALLCHAIN_LBR;
|
||||||
|
ret = 0;
|
||||||
|
} else
|
||||||
|
pr_err("callchain: No more arguments "
|
||||||
|
"needed for --call-graph lbr\n");
|
||||||
|
break;
|
||||||
} else {
|
} else {
|
||||||
pr_err("callchain: Unknown --call-graph option "
|
pr_err("callchain: Unknown --call-graph option "
|
||||||
"value: %s\n", arg);
|
"value: %s\n", arg);
|
||||||
|
|
|
@ -11,6 +11,7 @@ enum perf_call_graph_mode {
|
||||||
CALLCHAIN_NONE,
|
CALLCHAIN_NONE,
|
||||||
CALLCHAIN_FP,
|
CALLCHAIN_FP,
|
||||||
CALLCHAIN_DWARF,
|
CALLCHAIN_DWARF,
|
||||||
|
CALLCHAIN_LBR,
|
||||||
CALLCHAIN_MAX
|
CALLCHAIN_MAX
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -537,13 +537,30 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size)
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
perf_evsel__config_callgraph(struct perf_evsel *evsel)
|
perf_evsel__config_callgraph(struct perf_evsel *evsel,
|
||||||
|
struct record_opts *opts)
|
||||||
{
|
{
|
||||||
bool function = perf_evsel__is_function_event(evsel);
|
bool function = perf_evsel__is_function_event(evsel);
|
||||||
struct perf_event_attr *attr = &evsel->attr;
|
struct perf_event_attr *attr = &evsel->attr;
|
||||||
|
|
||||||
perf_evsel__set_sample_bit(evsel, CALLCHAIN);
|
perf_evsel__set_sample_bit(evsel, CALLCHAIN);
|
||||||
|
|
||||||
|
if (callchain_param.record_mode == CALLCHAIN_LBR) {
|
||||||
|
if (!opts->branch_stack) {
|
||||||
|
if (attr->exclude_user) {
|
||||||
|
pr_warning("LBR callstack option is only available "
|
||||||
|
"to get user callchain information. "
|
||||||
|
"Falling back to framepointers.\n");
|
||||||
|
} else {
|
||||||
|
perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
|
||||||
|
attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER |
|
||||||
|
PERF_SAMPLE_BRANCH_CALL_STACK;
|
||||||
|
}
|
||||||
|
} else
|
||||||
|
pr_warning("Cannot use LBR callstack with branch stack. "
|
||||||
|
"Falling back to framepointers.\n");
|
||||||
|
}
|
||||||
|
|
||||||
if (callchain_param.record_mode == CALLCHAIN_DWARF) {
|
if (callchain_param.record_mode == CALLCHAIN_DWARF) {
|
||||||
if (!function) {
|
if (!function) {
|
||||||
perf_evsel__set_sample_bit(evsel, REGS_USER);
|
perf_evsel__set_sample_bit(evsel, REGS_USER);
|
||||||
|
@ -667,7 +684,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
|
||||||
evsel->attr.exclude_callchain_user = 1;
|
evsel->attr.exclude_callchain_user = 1;
|
||||||
|
|
||||||
if (callchain_param.enabled && !evsel->no_aux_samples)
|
if (callchain_param.enabled && !evsel->no_aux_samples)
|
||||||
perf_evsel__config_callgraph(evsel);
|
perf_evsel__config_callgraph(evsel, opts);
|
||||||
|
|
||||||
if (opts->sample_intr_regs) {
|
if (opts->sample_intr_regs) {
|
||||||
attr->sample_regs_intr = PERF_REGS_MASK;
|
attr->sample_regs_intr = PERF_REGS_MASK;
|
||||||
|
|
Loading…
Reference in a new issue