perf report: Fix and improve the displaying of per-thread event counters
Improve and fix the handling of per-thread counter stats recorded via perf record -s. Previously we only displayed it in debug printouts (-D) and even that output was hard to disambiguate. I moved everything to utils/values.[ch] so that we may reuse it in perf stat. We get something like this now: # PID TID cache-misses cache-references 4658 4659 495581 3238779 4658 4662 498246 3236823 4658 4663 499531 3243162 Then it'll be easy to add --pretty=raw to display a single line per thread/event. By the way, -S was also used for --symbol... So I used -T/--thread here. perf report: Add -T/--threads to display per-thread counter values We get something like this now: # PID TID cache-misses cache-references 4658 4659 495581 3238779 4658 4662 498246 3236823 4658 4663 499531 3243162 Per-thread arrays of counter values are managed in utils/values.[ch] Signed-off-by: Brice Goglin <Brice.Goglin@inria.fr> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: paulus@samba.org Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
30dd568c91
commit
8d51327090
5 changed files with 227 additions and 0 deletions
|
@ -27,6 +27,9 @@ OPTIONS
|
|||
-n
|
||||
--show-nr-samples
|
||||
Show the number of samples for each symbol
|
||||
-T
|
||||
--threads
|
||||
Show per-thread event counters
|
||||
-C::
|
||||
--comms=::
|
||||
Only consider symbols in these comms. CSV that understands
|
||||
|
|
|
@ -310,6 +310,7 @@ LIB_H += util/sigchain.h
|
|||
LIB_H += util/symbol.h
|
||||
LIB_H += util/module.h
|
||||
LIB_H += util/color.h
|
||||
LIB_H += util/values.h
|
||||
|
||||
LIB_OBJS += util/abspath.o
|
||||
LIB_OBJS += util/alias.o
|
||||
|
@ -337,6 +338,7 @@ LIB_OBJS += util/color.o
|
|||
LIB_OBJS += util/pager.o
|
||||
LIB_OBJS += util/header.o
|
||||
LIB_OBJS += util/callchain.o
|
||||
LIB_OBJS += util/values.o
|
||||
|
||||
BUILTIN_OBJS += builtin-annotate.o
|
||||
BUILTIN_OBJS += builtin-help.o
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "util/string.h"
|
||||
#include "util/callchain.h"
|
||||
#include "util/strlist.h"
|
||||
#include "util/values.h"
|
||||
|
||||
#include "perf.h"
|
||||
#include "util/header.h"
|
||||
|
@ -53,6 +54,9 @@ static int modules;
|
|||
static int full_paths;
|
||||
static int show_nr_samples;
|
||||
|
||||
static int show_threads;
|
||||
static struct perf_read_values show_threads_values;
|
||||
|
||||
static unsigned long page_size;
|
||||
static unsigned long mmap_window = 32;
|
||||
|
||||
|
@ -1473,6 +1477,9 @@ static size_t output__fprintf(FILE *fp, u64 total_samples)
|
|||
|
||||
free(rem_sq_bracket);
|
||||
|
||||
if (show_threads)
|
||||
perf_read_values_display(fp, &show_threads_values);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1758,6 +1765,16 @@ process_read_event(event_t *event, unsigned long offset, unsigned long head)
|
|||
{
|
||||
struct perf_counter_attr *attr = perf_header__find_attr(event->read.id);
|
||||
|
||||
if (show_threads) {
|
||||
char *name = attr ? __event_name(attr->type, attr->config)
|
||||
: "unknown";
|
||||
perf_read_values_add_value(&show_threads_values,
|
||||
event->read.pid, event->read.tid,
|
||||
event->read.id,
|
||||
name,
|
||||
event->read.value);
|
||||
}
|
||||
|
||||
dprintf("%p [%p]: PERF_EVENT_READ: %d %d %s %Lu\n",
|
||||
(void *)(offset + head),
|
||||
(void *)(long)(event->header.size),
|
||||
|
@ -1839,6 +1856,9 @@ static int __cmd_report(void)
|
|||
|
||||
register_idle_thread();
|
||||
|
||||
if (show_threads)
|
||||
perf_read_values_init(&show_threads_values);
|
||||
|
||||
input = open(input_name, O_RDONLY);
|
||||
if (input < 0) {
|
||||
fprintf(stderr, " failed to open file: %s", input_name);
|
||||
|
@ -1993,6 +2013,9 @@ static int __cmd_report(void)
|
|||
output__resort(total);
|
||||
output__fprintf(stdout, total);
|
||||
|
||||
if (show_threads)
|
||||
perf_read_values_destroy(&show_threads_values);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -2066,6 +2089,8 @@ static const struct option options[] = {
|
|||
"load module symbols - WARNING: use only with -k and LIVE kernel"),
|
||||
OPT_BOOLEAN('n', "show-nr-samples", &show_nr_samples,
|
||||
"Show a column with the number of samples"),
|
||||
OPT_BOOLEAN('T', "threads", &show_threads,
|
||||
"Show per-thread event counters"),
|
||||
OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
|
||||
"sort by key(s): pid, comm, dso, symbol, parent"),
|
||||
OPT_BOOLEAN('P', "full-paths", &full_paths,
|
||||
|
|
171
tools/perf/util/values.c
Normal file
171
tools/perf/util/values.c
Normal file
|
@ -0,0 +1,171 @@
|
|||
#include <stdlib.h>
|
||||
|
||||
#include "util.h"
|
||||
#include "values.h"
|
||||
|
||||
void perf_read_values_init(struct perf_read_values *values)
|
||||
{
|
||||
values->threads_max = 16;
|
||||
values->pid = malloc(values->threads_max * sizeof(*values->pid));
|
||||
values->tid = malloc(values->threads_max * sizeof(*values->tid));
|
||||
values->value = malloc(values->threads_max * sizeof(*values->value));
|
||||
if (!values->pid || !values->tid || !values->value)
|
||||
die("failed to allocate read_values threads arrays");
|
||||
values->threads = 0;
|
||||
|
||||
values->counters_max = 16;
|
||||
values->counterrawid = malloc(values->counters_max
|
||||
* sizeof(*values->counterrawid));
|
||||
values->countername = malloc(values->counters_max
|
||||
* sizeof(*values->countername));
|
||||
if (!values->counterrawid || !values->countername)
|
||||
die("failed to allocate read_values counters arrays");
|
||||
values->counters = 0;
|
||||
}
|
||||
|
||||
void perf_read_values_destroy(struct perf_read_values *values)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!values->threads_max || !values->counters_max)
|
||||
return;
|
||||
|
||||
for (i = 0; i < values->threads; i++)
|
||||
free(values->value[i]);
|
||||
free(values->pid);
|
||||
free(values->tid);
|
||||
free(values->counterrawid);
|
||||
for (i = 0; i < values->counters; i++)
|
||||
free(values->countername[i]);
|
||||
free(values->countername);
|
||||
}
|
||||
|
||||
static void perf_read_values__enlarge_threads(struct perf_read_values *values)
|
||||
{
|
||||
values->threads_max *= 2;
|
||||
values->pid = realloc(values->pid,
|
||||
values->threads_max * sizeof(*values->pid));
|
||||
values->tid = realloc(values->tid,
|
||||
values->threads_max * sizeof(*values->tid));
|
||||
values->value = realloc(values->value,
|
||||
values->threads_max * sizeof(*values->value));
|
||||
if (!values->pid || !values->tid || !values->value)
|
||||
die("failed to enlarge read_values threads arrays");
|
||||
}
|
||||
|
||||
static int perf_read_values__findnew_thread(struct perf_read_values *values,
|
||||
u32 pid, u32 tid)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < values->threads; i++)
|
||||
if (values->pid[i] == pid && values->tid[i] == tid)
|
||||
return i;
|
||||
|
||||
if (values->threads == values->threads_max)
|
||||
perf_read_values__enlarge_threads(values);
|
||||
|
||||
i = values->threads++;
|
||||
values->pid[i] = pid;
|
||||
values->tid[i] = tid;
|
||||
values->value[i] = malloc(values->counters_max * sizeof(**values->value));
|
||||
if (!values->value[i])
|
||||
die("failed to allocate read_values counters array");
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
static void perf_read_values__enlarge_counters(struct perf_read_values *values)
|
||||
{
|
||||
int i;
|
||||
|
||||
values->counters_max *= 2;
|
||||
values->counterrawid = realloc(values->counterrawid,
|
||||
values->counters_max * sizeof(*values->counterrawid));
|
||||
values->countername = realloc(values->countername,
|
||||
values->counters_max * sizeof(*values->countername));
|
||||
if (!values->counterrawid || !values->countername)
|
||||
die("failed to enlarge read_values counters arrays");
|
||||
|
||||
for (i = 0; i < values->threads; i++) {
|
||||
values->value[i] = realloc(values->value[i],
|
||||
values->counters_max * sizeof(**values->value));
|
||||
if (!values->value[i])
|
||||
die("failed to enlarge read_values counters arrays");
|
||||
}
|
||||
}
|
||||
|
||||
static int perf_read_values__findnew_counter(struct perf_read_values *values,
|
||||
u64 rawid, char *name)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < values->counters; i++)
|
||||
if (values->counterrawid[i] == rawid)
|
||||
return i;
|
||||
|
||||
if (values->counters == values->counters_max)
|
||||
perf_read_values__enlarge_counters(values);
|
||||
|
||||
i = values->counters++;
|
||||
values->counterrawid[i] = rawid;
|
||||
values->countername[i] = strdup(name);
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
void perf_read_values_add_value(struct perf_read_values *values,
|
||||
u32 pid, u32 tid,
|
||||
u64 rawid, char *name, u64 value)
|
||||
{
|
||||
int tindex, cindex;
|
||||
|
||||
tindex = perf_read_values__findnew_thread(values, pid, tid);
|
||||
cindex = perf_read_values__findnew_counter(values, rawid, name);
|
||||
|
||||
values->value[tindex][cindex] = value;
|
||||
}
|
||||
|
||||
void perf_read_values_display(FILE *fp, struct perf_read_values *values)
|
||||
{
|
||||
int i, j;
|
||||
int pidwidth, tidwidth;
|
||||
int *counterwidth;
|
||||
|
||||
counterwidth = malloc(values->counters * sizeof(*counterwidth));
|
||||
if (!counterwidth)
|
||||
die("failed to allocate counterwidth array");
|
||||
tidwidth = 3;
|
||||
pidwidth = 3;
|
||||
for (j = 0; j < values->counters; j++)
|
||||
counterwidth[j] = strlen(values->countername[j]);
|
||||
for (i = 0; i < values->threads; i++) {
|
||||
int width;
|
||||
|
||||
width = snprintf(NULL, 0, "%d", values->pid[i]);
|
||||
if (width > pidwidth)
|
||||
pidwidth = width;
|
||||
width = snprintf(NULL, 0, "%d", values->tid[i]);
|
||||
if (width > tidwidth)
|
||||
tidwidth = width;
|
||||
for (j = 0; j < values->counters; j++) {
|
||||
width = snprintf(NULL, 0, "%Lu", values->value[i][j]);
|
||||
if (width > counterwidth[j])
|
||||
counterwidth[j] = width;
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(fp, "# %*s %*s", pidwidth, "PID", tidwidth, "TID");
|
||||
for (j = 0; j < values->counters; j++)
|
||||
fprintf(fp, " %*s", counterwidth[j], values->countername[j]);
|
||||
fprintf(fp, "\n");
|
||||
|
||||
for (i = 0; i < values->threads; i++) {
|
||||
fprintf(fp, " %*d %*d", pidwidth, values->pid[i],
|
||||
tidwidth, values->tid[i]);
|
||||
for (j = 0; j < values->counters; j++)
|
||||
fprintf(fp, " %*Lu",
|
||||
counterwidth[j], values->value[i][j]);
|
||||
fprintf(fp, "\n");
|
||||
}
|
||||
}
|
26
tools/perf/util/values.h
Normal file
26
tools/perf/util/values.h
Normal file
|
@ -0,0 +1,26 @@
|
|||
#ifndef _PERF_VALUES_H
|
||||
#define _PERF_VALUES_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
struct perf_read_values {
|
||||
int threads;
|
||||
int threads_max;
|
||||
u32 *pid, *tid;
|
||||
int counters;
|
||||
int counters_max;
|
||||
u64 *counterrawid;
|
||||
char **countername;
|
||||
u64 **value;
|
||||
};
|
||||
|
||||
void perf_read_values_init(struct perf_read_values *values);
|
||||
void perf_read_values_destroy(struct perf_read_values *values);
|
||||
|
||||
void perf_read_values_add_value(struct perf_read_values *values,
|
||||
u32 pid, u32 tid,
|
||||
u64 rawid, char *name, u64 value);
|
||||
|
||||
void perf_read_values_display(FILE *fp, struct perf_read_values *values);
|
||||
|
||||
#endif /* _PERF_VALUES_H */
|
Loading…
Reference in a new issue