perf tools: Make perf.data more self-descriptive (v8)
The goal of this patch is to include more information about the host environment into the perf.data so it is more self-descriptive. Overtime, profiles are captured on various machines and it becomes hard to track what was recorded, on what machine and when. This patch provides a way to solve this by extending the perf.data file with basic information about the host machine. To add those extensions, we leverage the feature bits capabilities of the perf.data format. The change is backward compatible with existing perf.data files. We define the following useful new extensions: - HEADER_HOSTNAME: the hostname - HEADER_OSRELEASE: the kernel release number - HEADER_ARCH: the hw architecture - HEADER_CPUDESC: generic CPU description - HEADER_NRCPUS: number of online/avail cpus - HEADER_CMDLINE: perf command line - HEADER_VERSION: perf version - HEADER_TOPOLOGY: cpu topology - HEADER_EVENT_DESC: full event description (attrs) - HEADER_CPUID: easy-to-parse low level CPU identication The small granularity for the entries is to make it easier to extend without breaking backward compatiblity. Many entries are provided as ASCII strings. Perf report/script have been modified to print the basic information as easy-to-parse ASCII strings. Extended information about CPU and NUMA topology may be requested with the -I option. Thanks to David Ahern for reviewing and testing the many versions of this patch. $ perf report --stdio # ======== # captured on : Mon Sep 26 15:22:14 2011 # hostname : quad # os release : 3.1.0-rc4-tip # perf version : 3.1.0-rc4 # arch : x86_64 # nrcpus online : 4 # nrcpus avail : 4 # cpudesc : Intel(R) Core(TM)2 Quad CPU Q6600 @ 2.40GHz # cpuid : GenuineIntel,6,15,11 # total memory :8105360
kB # cmdline : /home/eranian/perfmon/official/tip/build/tools/perf/perf record date # event : name = cycles, type = 0, config = 0x0, config1 = 0x0, config2 = 0x0, excl_usr = 0, excl_kern = 0, id = { 29, 30, 31, # HEADER_CPU_TOPOLOGY info available, use -I to display # HEADER_NUMA_TOPOLOGY info available, use -I to display # ======== # ... $ perf report --stdio -I # ======== # captured on : Mon Sep 26 15:22:14 2011 # hostname : quad # os release : 3.1.0-rc4-tip # perf version : 3.1.0-rc4 # arch : x86_64 # nrcpus online : 4 # nrcpus avail : 4 # cpudesc : Intel(R) Core(TM)2 Quad CPU Q6600 @ 2.40GHz # cpuid : GenuineIntel,6,15,11 # total memory :8105360
kB # cmdline : /home/eranian/perfmon/official/tip/build/tools/perf/perf record date # event : name = cycles, type = 0, config = 0x0, config1 = 0x0, config2 = 0x0, excl_usr = 0, excl_kern = 0, id = { 29, 30, 31, # sibling cores : 0-3 # sibling threads : 0 # sibling threads : 1 # sibling threads : 2 # sibling threads : 3 # node0 meminfo : total = 8320608 kB, free = 7571024 kB # node0 cpu list : 0-3 # ======== # ... Reviewed-by: David Ahern <dsahern@gmail.com> Tested-by: David Ahern <dsahern@gmail.com> Cc: David Ahern <dsahern@gmail.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Robert Richter <robert.richter@amd.com> Cc: Andi Kleen <ak@linux.intel.com> Link: http://lkml.kernel.org/r/20110930134040.GA5575@quad Signed-off-by: Stephane Eranian <eranian@google.com> [ committer notes: Use --show-info in the tools as was in the docs, rename perf_header_fprintf_info to perf_file_section__fprintf_info, fixup conflict withf69b64f7
"perf: Support setting the disassembler style" ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
be83f5ed6b
commit
fbe96f29ce
15 changed files with 1308 additions and 35 deletions
|
@ -139,6 +139,12 @@ OPTIONS
|
|||
|
||||
--show-total-period:: Show a column with the sum of periods.
|
||||
|
||||
-I::
|
||||
--show-info::
|
||||
Display extended information about the perf.data file. This adds
|
||||
information which may be very large and thus may clutter the display.
|
||||
It currently includes: cpu and numa topology of the host system.
|
||||
|
||||
SEE ALSO
|
||||
--------
|
||||
linkperf:perf-stat[1]
|
||||
|
|
|
@ -188,6 +188,13 @@ OPTIONS
|
|||
CPUs are specified with -: 0-2. Default is to report samples on all
|
||||
CPUs.
|
||||
|
||||
-I::
|
||||
--show-info::
|
||||
Display extended information about the perf.data file. This adds
|
||||
information which may be very large and thus may clutter the display.
|
||||
It currently includes: cpu and numa topology of the host system.
|
||||
It can only be used with the perf script report mode.
|
||||
|
||||
SEE ALSO
|
||||
--------
|
||||
linkperf:perf-record[1], linkperf:perf-script-perl[1],
|
||||
|
|
|
@ -2,3 +2,4 @@ ifndef NO_DWARF
|
|||
PERF_HAVE_DWARF_REGS := 1
|
||||
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
|
||||
endif
|
||||
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o
|
||||
|
|
36
tools/perf/arch/powerpc/util/header.c
Normal file
36
tools/perf/arch/powerpc/util/header.c
Normal file
|
@ -0,0 +1,36 @@
|
|||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "../../util/header.h"
|
||||
|
||||
#define __stringify_1(x) #x
|
||||
#define __stringify(x) __stringify_1(x)
|
||||
|
||||
#define mfspr(rn) ({unsigned long rval; \
|
||||
asm volatile("mfspr %0," __stringify(rn) \
|
||||
: "=r" (rval)); rval; })
|
||||
|
||||
#define SPRN_PVR 0x11F /* Processor Version Register */
|
||||
#define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) /* Version field */
|
||||
#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revison field */
|
||||
|
||||
int
|
||||
get_cpuid(char *buffer, size_t sz)
|
||||
{
|
||||
unsigned long pvr;
|
||||
int nb;
|
||||
|
||||
pvr = mfspr(SPRN_PVR);
|
||||
|
||||
nb = snprintf(buffer, sz, "%lu,%lu$", PVR_VER(pvr), PVR_REV(pvr));
|
||||
|
||||
/* look for end marker to ensure the entire data fit */
|
||||
if (strchr(buffer, '$')) {
|
||||
buffer[nb-1] = '\0';
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
}
|
|
@ -2,3 +2,4 @@ ifndef NO_DWARF
|
|||
PERF_HAVE_DWARF_REGS := 1
|
||||
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
|
||||
endif
|
||||
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o
|
||||
|
|
59
tools/perf/arch/x86/util/header.c
Normal file
59
tools/perf/arch/x86/util/header.c
Normal file
|
@ -0,0 +1,59 @@
|
|||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "../../util/header.h"
|
||||
|
||||
static inline void
|
||||
cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c,
|
||||
unsigned int *d)
|
||||
{
|
||||
__asm__ __volatile__ (".byte 0x53\n\tcpuid\n\t"
|
||||
"movl %%ebx, %%esi\n\t.byte 0x5b"
|
||||
: "=a" (*a),
|
||||
"=S" (*b),
|
||||
"=c" (*c),
|
||||
"=d" (*d)
|
||||
: "a" (op));
|
||||
}
|
||||
|
||||
int
|
||||
get_cpuid(char *buffer, size_t sz)
|
||||
{
|
||||
unsigned int a, b, c, d, lvl;
|
||||
int family = -1, model = -1, step = -1;
|
||||
int nb;
|
||||
char vendor[16];
|
||||
|
||||
cpuid(0, &lvl, &b, &c, &d);
|
||||
strncpy(&vendor[0], (char *)(&b), 4);
|
||||
strncpy(&vendor[4], (char *)(&d), 4);
|
||||
strncpy(&vendor[8], (char *)(&c), 4);
|
||||
vendor[12] = '\0';
|
||||
|
||||
if (lvl >= 1) {
|
||||
cpuid(1, &a, &b, &c, &d);
|
||||
|
||||
family = (a >> 8) & 0xf; /* bits 11 - 8 */
|
||||
model = (a >> 4) & 0xf; /* Bits 7 - 4 */
|
||||
step = a & 0xf;
|
||||
|
||||
/* extended family */
|
||||
if (family == 0xf)
|
||||
family += (a >> 20) & 0xff;
|
||||
|
||||
/* extended model */
|
||||
if (family >= 0x6)
|
||||
model += ((a >> 16) & 0xf) << 4;
|
||||
}
|
||||
nb = snprintf(buffer, sz, "%s,%u,%u,%u$", vendor, family, model, step);
|
||||
|
||||
/* look for end marker to ensure the entire data fit */
|
||||
if (strchr(buffer, '$')) {
|
||||
buffer[nb-1] = '\0';
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
}
|
|
@ -529,6 +529,19 @@ static int __cmd_record(int argc, const char **argv)
|
|||
if (have_tracepoints(&evsel_list->entries))
|
||||
perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
|
||||
|
||||
perf_header__set_feat(&session->header, HEADER_HOSTNAME);
|
||||
perf_header__set_feat(&session->header, HEADER_OSRELEASE);
|
||||
perf_header__set_feat(&session->header, HEADER_ARCH);
|
||||
perf_header__set_feat(&session->header, HEADER_CPUDESC);
|
||||
perf_header__set_feat(&session->header, HEADER_NRCPUS);
|
||||
perf_header__set_feat(&session->header, HEADER_EVENT_DESC);
|
||||
perf_header__set_feat(&session->header, HEADER_CMDLINE);
|
||||
perf_header__set_feat(&session->header, HEADER_VERSION);
|
||||
perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY);
|
||||
perf_header__set_feat(&session->header, HEADER_TOTAL_MEM);
|
||||
perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY);
|
||||
perf_header__set_feat(&session->header, HEADER_CPUID);
|
||||
|
||||
/* 512 kiB: default amount of unprivileged mlocked memory */
|
||||
if (mmap_pages == UINT_MAX)
|
||||
mmap_pages = (512 * 1024) / page_size;
|
||||
|
@ -800,6 +813,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
|
|||
int err = -ENOMEM;
|
||||
struct perf_evsel *pos;
|
||||
|
||||
perf_header__set_cmdline(argc, argv);
|
||||
|
||||
evsel_list = perf_evlist__new(NULL, NULL);
|
||||
if (evsel_list == NULL)
|
||||
return -ENOMEM;
|
||||
|
|
|
@ -40,6 +40,7 @@ static char const *input_name = "perf.data";
|
|||
static bool force, use_tui, use_stdio;
|
||||
static bool hide_unresolved;
|
||||
static bool dont_use_callchains;
|
||||
static bool show_full_info;
|
||||
|
||||
static bool show_threads;
|
||||
static struct perf_read_values show_threads_values;
|
||||
|
@ -273,6 +274,9 @@ static int __cmd_report(void)
|
|||
goto out_delete;
|
||||
}
|
||||
|
||||
if (use_browser <= 0)
|
||||
perf_session__fprintf_info(session, stdout, show_full_info);
|
||||
|
||||
if (show_threads)
|
||||
perf_read_values_init(&show_threads_values);
|
||||
|
||||
|
@ -485,6 +489,8 @@ static const struct option options[] = {
|
|||
OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
|
||||
"Look for files with symbols relative to this directory"),
|
||||
OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
|
||||
OPT_BOOLEAN('I', "show-info", &show_full_info,
|
||||
"Display extended information about perf.data file"),
|
||||
OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
|
||||
"Specify disassembler style (e.g. -M intel for intel syntax)"),
|
||||
OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
|
||||
|
|
|
@ -22,6 +22,7 @@ static u64 last_timestamp;
|
|||
static u64 nr_unordered;
|
||||
extern const struct option record_options[];
|
||||
static bool no_callchain;
|
||||
static bool show_full_info;
|
||||
static const char *cpu_list;
|
||||
static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
|
||||
|
||||
|
@ -1083,7 +1084,8 @@ static const struct option options[] = {
|
|||
"comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr",
|
||||
parse_output_fields),
|
||||
OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
|
||||
|
||||
OPT_BOOLEAN('I', "show-info", &show_full_info,
|
||||
"display extended information from perf.data file"),
|
||||
OPT_END()
|
||||
};
|
||||
|
||||
|
@ -1268,6 +1270,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __used)
|
|||
return -1;
|
||||
}
|
||||
|
||||
perf_session__fprintf_info(session, stdout, show_full_info);
|
||||
|
||||
if (!no_callchain)
|
||||
symbol_conf.use_callchain = true;
|
||||
else
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
#include "util/util.h"
|
||||
#include "util/strbuf.h"
|
||||
|
||||
extern const char perf_version_string[];
|
||||
extern const char perf_usage_string[];
|
||||
extern const char perf_more_info_string[];
|
||||
|
||||
|
|
|
@ -9,18 +9,21 @@ void get_term_dimensions(struct winsize *ws);
|
|||
#include "../../arch/x86/include/asm/unistd.h"
|
||||
#define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
|
||||
#define cpu_relax() asm volatile("rep; nop" ::: "memory");
|
||||
#define CPUINFO_PROC "model name"
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#include "../../arch/x86/include/asm/unistd.h"
|
||||
#define rmb() asm volatile("lfence" ::: "memory")
|
||||
#define cpu_relax() asm volatile("rep; nop" ::: "memory");
|
||||
#define CPUINFO_PROC "model name"
|
||||
#endif
|
||||
|
||||
#ifdef __powerpc__
|
||||
#include "../../arch/powerpc/include/asm/unistd.h"
|
||||
#define rmb() asm volatile ("sync" ::: "memory")
|
||||
#define cpu_relax() asm volatile ("" ::: "memory");
|
||||
#define CPUINFO_PROC "cpu"
|
||||
#endif
|
||||
|
||||
#ifdef __s390__
|
||||
|
@ -37,30 +40,35 @@ void get_term_dimensions(struct winsize *ws);
|
|||
# define rmb() asm volatile("" ::: "memory")
|
||||
#endif
|
||||
#define cpu_relax() asm volatile("" ::: "memory")
|
||||
#define CPUINFO_PROC "cpu type"
|
||||
#endif
|
||||
|
||||
#ifdef __hppa__
|
||||
#include "../../arch/parisc/include/asm/unistd.h"
|
||||
#define rmb() asm volatile("" ::: "memory")
|
||||
#define cpu_relax() asm volatile("" ::: "memory");
|
||||
#define CPUINFO_PROC "cpu"
|
||||
#endif
|
||||
|
||||
#ifdef __sparc__
|
||||
#include "../../arch/sparc/include/asm/unistd.h"
|
||||
#define rmb() asm volatile("":::"memory")
|
||||
#define cpu_relax() asm volatile("":::"memory")
|
||||
#define CPUINFO_PROC "cpu"
|
||||
#endif
|
||||
|
||||
#ifdef __alpha__
|
||||
#include "../../arch/alpha/include/asm/unistd.h"
|
||||
#define rmb() asm volatile("mb" ::: "memory")
|
||||
#define cpu_relax() asm volatile("" ::: "memory")
|
||||
#define CPUINFO_PROC "cpu model"
|
||||
#endif
|
||||
|
||||
#ifdef __ia64__
|
||||
#include "../../arch/ia64/include/asm/unistd.h"
|
||||
#define rmb() asm volatile ("mf" ::: "memory")
|
||||
#define cpu_relax() asm volatile ("hint @pause" ::: "memory")
|
||||
#define CPUINFO_PROC "model name"
|
||||
#endif
|
||||
|
||||
#ifdef __arm__
|
||||
|
@ -71,6 +79,7 @@ void get_term_dimensions(struct winsize *ws);
|
|||
*/
|
||||
#define rmb() ((void(*)(void))0xffff0fa0)()
|
||||
#define cpu_relax() asm volatile("":::"memory")
|
||||
#define CPUINFO_PROC "Processor"
|
||||
#endif
|
||||
|
||||
#ifdef __mips__
|
||||
|
@ -83,6 +92,7 @@ void get_term_dimensions(struct winsize *ws);
|
|||
: /* no input */ \
|
||||
: "memory")
|
||||
#define cpu_relax() asm volatile("" ::: "memory")
|
||||
#define CPUINFO_PROC "cpu model"
|
||||
#endif
|
||||
|
||||
#include <time.h>
|
||||
|
@ -171,5 +181,6 @@ struct ip_callchain {
|
|||
};
|
||||
|
||||
extern bool perf_host, perf_guest;
|
||||
extern const char perf_version_string[];
|
||||
|
||||
#endif
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -12,6 +12,20 @@
|
|||
enum {
|
||||
HEADER_TRACE_INFO = 1,
|
||||
HEADER_BUILD_ID,
|
||||
|
||||
HEADER_HOSTNAME,
|
||||
HEADER_OSRELEASE,
|
||||
HEADER_VERSION,
|
||||
HEADER_ARCH,
|
||||
HEADER_NRCPUS,
|
||||
HEADER_CPUDESC,
|
||||
HEADER_CPUID,
|
||||
HEADER_TOTAL_MEM,
|
||||
HEADER_CMDLINE,
|
||||
HEADER_EVENT_DESC,
|
||||
HEADER_CPU_TOPOLOGY,
|
||||
HEADER_NUMA_TOPOLOGY,
|
||||
|
||||
HEADER_LAST_FEATURE,
|
||||
};
|
||||
|
||||
|
@ -68,10 +82,15 @@ void perf_header__set_feat(struct perf_header *header, int feat);
|
|||
void perf_header__clear_feat(struct perf_header *header, int feat);
|
||||
bool perf_header__has_feat(const struct perf_header *header, int feat);
|
||||
|
||||
int perf_header__set_cmdline(int argc, const char **argv);
|
||||
|
||||
int perf_header__process_sections(struct perf_header *header, int fd,
|
||||
void *data,
|
||||
int (*process)(struct perf_file_section *section,
|
||||
struct perf_header *ph,
|
||||
int feat, int fd));
|
||||
struct perf_header *ph,
|
||||
int feat, int fd, void *data));
|
||||
|
||||
int perf_header__fprintf_info(struct perf_session *s, FILE *fp, bool full);
|
||||
|
||||
int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
|
||||
const char *name, bool is_kallsyms);
|
||||
|
@ -104,4 +123,10 @@ int perf_event__synthesize_build_id(struct dso *pos, u16 misc,
|
|||
struct perf_session *session);
|
||||
int perf_event__process_build_id(union perf_event *event,
|
||||
struct perf_session *session);
|
||||
|
||||
/*
|
||||
* arch specific callback
|
||||
*/
|
||||
int get_cpuid(char *buffer, size_t sz);
|
||||
|
||||
#endif /* __PERF_HEADER_H */
|
||||
|
|
|
@ -1326,3 +1326,22 @@ int perf_session__cpu_bitmap(struct perf_session *session,
|
|||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void perf_session__fprintf_info(struct perf_session *session, FILE *fp,
|
||||
bool full)
|
||||
{
|
||||
struct stat st;
|
||||
int ret;
|
||||
|
||||
if (session == NULL || fp == NULL)
|
||||
return;
|
||||
|
||||
ret = fstat(session->fd, &st);
|
||||
if (ret == -1)
|
||||
return;
|
||||
|
||||
fprintf(fp, "# ========\n");
|
||||
fprintf(fp, "# captured on: %s", ctime(&st.st_ctime));
|
||||
perf_header__fprintf_info(session, fp, full);
|
||||
fprintf(fp, "# ========\n#\n");
|
||||
}
|
||||
|
|
|
@ -177,4 +177,5 @@ void perf_session__print_ip(union perf_event *event,
|
|||
int perf_session__cpu_bitmap(struct perf_session *session,
|
||||
const char *cpu_list, unsigned long *cpu_bitmap);
|
||||
|
||||
void perf_session__fprintf_info(struct perf_session *s, FILE *fp, bool full);
|
||||
#endif /* __PERF_SESSION_H */
|
||||
|
|
Loading…
Reference in a new issue