perf script: Add synthesized Intel PT power and ptwrite events
Add definitions for synthesized Intel PT events for power and ptwrite. Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Cc: Andi Kleen <ak@linux.intel.com> Link: http://lkml.kernel.org/r/1498811802-2301-1-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
47e780848e
commit
65c5e18f9d
2 changed files with 231 additions and 1 deletions
|
@ -1203,10 +1203,122 @@ static void print_sample_bpf_output(struct perf_sample *sample)
|
|||
(char *)(sample->raw_data));
|
||||
}
|
||||
|
||||
static void print_sample_synth(struct perf_sample *sample __maybe_unused,
|
||||
static void print_sample_spacing(int len, int spacing)
|
||||
{
|
||||
if (len > 0 && len < spacing)
|
||||
printf("%*s", spacing - len, "");
|
||||
}
|
||||
|
||||
static void print_sample_pt_spacing(int len)
|
||||
{
|
||||
print_sample_spacing(len, 34);
|
||||
}
|
||||
|
||||
static void print_sample_synth_ptwrite(struct perf_sample *sample)
|
||||
{
|
||||
struct perf_synth_intel_ptwrite *data = perf_sample__synth_ptr(sample);
|
||||
int len;
|
||||
|
||||
if (perf_sample__bad_synth_size(sample, *data))
|
||||
return;
|
||||
|
||||
len = printf(" IP: %u payload: %#" PRIx64 " ",
|
||||
data->ip, le64_to_cpu(data->payload));
|
||||
print_sample_pt_spacing(len);
|
||||
}
|
||||
|
||||
static void print_sample_synth_mwait(struct perf_sample *sample)
|
||||
{
|
||||
struct perf_synth_intel_mwait *data = perf_sample__synth_ptr(sample);
|
||||
int len;
|
||||
|
||||
if (perf_sample__bad_synth_size(sample, *data))
|
||||
return;
|
||||
|
||||
len = printf(" hints: %#x extensions: %#x ",
|
||||
data->hints, data->extensions);
|
||||
print_sample_pt_spacing(len);
|
||||
}
|
||||
|
||||
static void print_sample_synth_pwre(struct perf_sample *sample)
|
||||
{
|
||||
struct perf_synth_intel_pwre *data = perf_sample__synth_ptr(sample);
|
||||
int len;
|
||||
|
||||
if (perf_sample__bad_synth_size(sample, *data))
|
||||
return;
|
||||
|
||||
len = printf(" hw: %u cstate: %u sub-cstate: %u ",
|
||||
data->hw, data->cstate, data->subcstate);
|
||||
print_sample_pt_spacing(len);
|
||||
}
|
||||
|
||||
static void print_sample_synth_exstop(struct perf_sample *sample)
|
||||
{
|
||||
struct perf_synth_intel_exstop *data = perf_sample__synth_ptr(sample);
|
||||
int len;
|
||||
|
||||
if (perf_sample__bad_synth_size(sample, *data))
|
||||
return;
|
||||
|
||||
len = printf(" IP: %u ", data->ip);
|
||||
print_sample_pt_spacing(len);
|
||||
}
|
||||
|
||||
static void print_sample_synth_pwrx(struct perf_sample *sample)
|
||||
{
|
||||
struct perf_synth_intel_pwrx *data = perf_sample__synth_ptr(sample);
|
||||
int len;
|
||||
|
||||
if (perf_sample__bad_synth_size(sample, *data))
|
||||
return;
|
||||
|
||||
len = printf(" deepest cstate: %u last cstate: %u wake reason: %#x ",
|
||||
data->deepest_cstate, data->last_cstate,
|
||||
data->wake_reason);
|
||||
print_sample_pt_spacing(len);
|
||||
}
|
||||
|
||||
static void print_sample_synth_cbr(struct perf_sample *sample)
|
||||
{
|
||||
struct perf_synth_intel_cbr *data = perf_sample__synth_ptr(sample);
|
||||
unsigned int percent, freq;
|
||||
int len;
|
||||
|
||||
if (perf_sample__bad_synth_size(sample, *data))
|
||||
return;
|
||||
|
||||
freq = (le32_to_cpu(data->freq) + 500) / 1000;
|
||||
len = printf(" cbr: %2u freq: %4u MHz ", data->cbr, freq);
|
||||
if (data->max_nonturbo) {
|
||||
percent = (5 + (1000 * data->cbr) / data->max_nonturbo) / 10;
|
||||
len += printf("(%3u%%) ", percent);
|
||||
}
|
||||
print_sample_pt_spacing(len);
|
||||
}
|
||||
|
||||
static void print_sample_synth(struct perf_sample *sample,
|
||||
struct perf_evsel *evsel)
|
||||
{
|
||||
switch (evsel->attr.config) {
|
||||
case PERF_SYNTH_INTEL_PTWRITE:
|
||||
print_sample_synth_ptwrite(sample);
|
||||
break;
|
||||
case PERF_SYNTH_INTEL_MWAIT:
|
||||
print_sample_synth_mwait(sample);
|
||||
break;
|
||||
case PERF_SYNTH_INTEL_PWRE:
|
||||
print_sample_synth_pwre(sample);
|
||||
break;
|
||||
case PERF_SYNTH_INTEL_EXSTOP:
|
||||
print_sample_synth_exstop(sample);
|
||||
break;
|
||||
case PERF_SYNTH_INTEL_PWRX:
|
||||
print_sample_synth_pwrx(sample);
|
||||
break;
|
||||
case PERF_SYNTH_INTEL_CBR:
|
||||
print_sample_synth_cbr(sample);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -255,6 +255,124 @@ enum auxtrace_error_type {
|
|||
/* Attribute type for custom synthesized events */
|
||||
#define PERF_TYPE_SYNTH (INT_MAX + 1U)
|
||||
|
||||
/* Attribute config for custom synthesized events */
|
||||
enum perf_synth_id {
|
||||
PERF_SYNTH_INTEL_PTWRITE,
|
||||
PERF_SYNTH_INTEL_MWAIT,
|
||||
PERF_SYNTH_INTEL_PWRE,
|
||||
PERF_SYNTH_INTEL_EXSTOP,
|
||||
PERF_SYNTH_INTEL_PWRX,
|
||||
PERF_SYNTH_INTEL_CBR,
|
||||
};
|
||||
|
||||
/*
|
||||
* Raw data formats for synthesized events. Note that 4 bytes of padding are
|
||||
* present to match the 'size' member of PERF_SAMPLE_RAW data which is always
|
||||
* 8-byte aligned. That means we must dereference raw_data with an offset of 4.
|
||||
* Refer perf_sample__synth_ptr() and perf_synth__raw_data(). It also means the
|
||||
* structure sizes are 4 bytes bigger than the raw_size, refer
|
||||
* perf_synth__raw_size().
|
||||
*/
|
||||
|
||||
struct perf_synth_intel_ptwrite {
|
||||
u32 padding;
|
||||
union {
|
||||
struct {
|
||||
u32 ip : 1,
|
||||
reserved : 31;
|
||||
};
|
||||
u32 flags;
|
||||
};
|
||||
u64 payload;
|
||||
};
|
||||
|
||||
struct perf_synth_intel_mwait {
|
||||
u32 padding;
|
||||
u32 reserved;
|
||||
union {
|
||||
struct {
|
||||
u64 hints : 8,
|
||||
reserved1 : 24,
|
||||
extensions : 2,
|
||||
reserved2 : 30;
|
||||
};
|
||||
u64 payload;
|
||||
};
|
||||
};
|
||||
|
||||
struct perf_synth_intel_pwre {
|
||||
u32 padding;
|
||||
u32 reserved;
|
||||
union {
|
||||
struct {
|
||||
u64 reserved1 : 7,
|
||||
hw : 1,
|
||||
subcstate : 4,
|
||||
cstate : 4,
|
||||
reserved2 : 48;
|
||||
};
|
||||
u64 payload;
|
||||
};
|
||||
};
|
||||
|
||||
struct perf_synth_intel_exstop {
|
||||
u32 padding;
|
||||
union {
|
||||
struct {
|
||||
u32 ip : 1,
|
||||
reserved : 31;
|
||||
};
|
||||
u32 flags;
|
||||
};
|
||||
};
|
||||
|
||||
struct perf_synth_intel_pwrx {
|
||||
u32 padding;
|
||||
u32 reserved;
|
||||
union {
|
||||
struct {
|
||||
u64 deepest_cstate : 4,
|
||||
last_cstate : 4,
|
||||
wake_reason : 4,
|
||||
reserved1 : 52;
|
||||
};
|
||||
u64 payload;
|
||||
};
|
||||
};
|
||||
|
||||
struct perf_synth_intel_cbr {
|
||||
u32 padding;
|
||||
union {
|
||||
struct {
|
||||
u32 cbr : 8,
|
||||
reserved1 : 8,
|
||||
max_nonturbo : 8,
|
||||
reserved2 : 8;
|
||||
};
|
||||
u32 flags;
|
||||
};
|
||||
u32 freq;
|
||||
u32 reserved3;
|
||||
};
|
||||
|
||||
/*
|
||||
* raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get
|
||||
* 8-byte alignment.
|
||||
*/
|
||||
static inline void *perf_sample__synth_ptr(struct perf_sample *sample)
|
||||
{
|
||||
return sample->raw_data - 4;
|
||||
}
|
||||
|
||||
static inline void *perf_synth__raw_data(void *p)
|
||||
{
|
||||
return p + 4;
|
||||
}
|
||||
|
||||
#define perf_synth__raw_size(d) (sizeof(d) - 4)
|
||||
|
||||
#define perf_sample__bad_synth_size(s, d) ((s)->raw_size < sizeof(d) - 4)
|
||||
|
||||
/*
|
||||
* The kernel collects the number of events it couldn't send in a stretch and
|
||||
* when possible sends this number in a PERF_RECORD_LOST event. The number of
|
||||
|
|
Loading…
Reference in a new issue