kernel-fxtec-pro1x/arch/arm/kernel/perf_event.c
Will Deacon ddee87f208 ARM: 5959/1: ARM: perf-events: request PMU interrupts with IRQF_NOBALANCING
If IRQ balancing is used on a multicore ARM system, PMU interrupt
lines may be relocated onto CPUs other than the one causing the
counter overflow. This can result in misattribution of events to
the wrong core and, in the case that the CPU handling the interrupt
has not experience counter overflow, the interrupt can be disabled
because the handler returns IRQ_NONE.

This patch adds the IRQF_NOBALANCING flag to the request_irq call
in perf_events.c.

Acked-by: Jamie Iles <jamie.iles@picochip.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2010-03-13 10:50:28 +00:00

2277 lines
59 KiB
C

#undef DEBUG
/*
* ARM performance counter support.
*
* Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
*
* ARMv7 support: Jean Pihet <jpihet@mvista.com>
* 2010 (c) MontaVista Software, LLC.
*
* This code is based on the sparc64 perf event code, which is in turn based
* on the x86 code. Callchain code is based on the ARM OProfile backtrace
* code.
*/
#define pr_fmt(fmt) "hw perfevents: " fmt
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/perf_event.h>
#include <linux/spinlock.h>
#include <linux/uaccess.h>
#include <asm/cputype.h>
#include <asm/irq.h>
#include <asm/irq_regs.h>
#include <asm/pmu.h>
#include <asm/stacktrace.h>
static const struct pmu_irqs *pmu_irqs;
/*
* Hardware lock to serialize accesses to PMU registers. Needed for the
* read/modify/write sequences.
*/
DEFINE_SPINLOCK(pmu_lock);
/*
* ARMv6 supports a maximum of 3 events, starting from index 1. If we add
* another platform that supports more, we need to increase this to be the
* largest of all platforms.
*
* ARMv7 supports up to 32 events:
* cycle counter CCNT + 31 events counters CNT0..30.
* Cortex-A8 has 1+4 counters, Cortex-A9 has 1+6 counters.
*/
#define ARMPMU_MAX_HWEVENTS 33
/* The events for a given CPU. */
struct cpu_hw_events {
/*
* The events that are active on the CPU for the given index. Index 0
* is reserved.
*/
struct perf_event *events[ARMPMU_MAX_HWEVENTS];
/*
* A 1 bit for an index indicates that the counter is being used for
* an event. A 0 means that the counter can be used.
*/
unsigned long used_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
/*
* A 1 bit for an index indicates that the counter is actively being
* used.
*/
unsigned long active_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
};
DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
struct arm_pmu {
char *name;
irqreturn_t (*handle_irq)(int irq_num, void *dev);
void (*enable)(struct hw_perf_event *evt, int idx);
void (*disable)(struct hw_perf_event *evt, int idx);
int (*event_map)(int evt);
u64 (*raw_event)(u64);
int (*get_event_idx)(struct cpu_hw_events *cpuc,
struct hw_perf_event *hwc);
u32 (*read_counter)(int idx);
void (*write_counter)(int idx, u32 val);
void (*start)(void);
void (*stop)(void);
int num_events;
u64 max_period;
};
/* Set at runtime when we know what CPU type we are. */
static const struct arm_pmu *armpmu;
#define HW_OP_UNSUPPORTED 0xFFFF
#define C(_x) \
PERF_COUNT_HW_CACHE_##_x
#define CACHE_OP_UNSUPPORTED 0xFFFF
static unsigned armpmu_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
static int
armpmu_map_cache_event(u64 config)
{
unsigned int cache_type, cache_op, cache_result, ret;
cache_type = (config >> 0) & 0xff;
if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
return -EINVAL;
cache_op = (config >> 8) & 0xff;
if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
return -EINVAL;
cache_result = (config >> 16) & 0xff;
if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
return -EINVAL;
ret = (int)armpmu_perf_cache_map[cache_type][cache_op][cache_result];
if (ret == CACHE_OP_UNSUPPORTED)
return -ENOENT;
return ret;
}
static int
armpmu_event_set_period(struct perf_event *event,
struct hw_perf_event *hwc,
int idx)
{
s64 left = atomic64_read(&hwc->period_left);
s64 period = hwc->sample_period;
int ret = 0;
if (unlikely(left <= -period)) {
left = period;
atomic64_set(&hwc->period_left, left);
hwc->last_period = period;
ret = 1;
}
if (unlikely(left <= 0)) {
left += period;
atomic64_set(&hwc->period_left, left);
hwc->last_period = period;
ret = 1;
}
if (left > (s64)armpmu->max_period)
left = armpmu->max_period;
atomic64_set(&hwc->prev_count, (u64)-left);
armpmu->write_counter(idx, (u64)(-left) & 0xffffffff);
perf_event_update_userpage(event);
return ret;
}
static u64
armpmu_event_update(struct perf_event *event,
struct hw_perf_event *hwc,
int idx)
{
int shift = 64 - 32;
s64 prev_raw_count, new_raw_count;
s64 delta;
again:
prev_raw_count = atomic64_read(&hwc->prev_count);
new_raw_count = armpmu->read_counter(idx);
if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count)
goto again;
delta = (new_raw_count << shift) - (prev_raw_count << shift);
delta >>= shift;
atomic64_add(delta, &event->count);
atomic64_sub(delta, &hwc->period_left);
return new_raw_count;
}
static void
armpmu_disable(struct perf_event *event)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
WARN_ON(idx < 0);
clear_bit(idx, cpuc->active_mask);
armpmu->disable(hwc, idx);
barrier();
armpmu_event_update(event, hwc, idx);
cpuc->events[idx] = NULL;
clear_bit(idx, cpuc->used_mask);
perf_event_update_userpage(event);
}
static void
armpmu_read(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
/* Don't read disabled counters! */
if (hwc->idx < 0)
return;
armpmu_event_update(event, hwc, hwc->idx);
}
static void
armpmu_unthrottle(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
/*
* Set the period again. Some counters can't be stopped, so when we
* were throttled we simply disabled the IRQ source and the counter
* may have been left counting. If we don't do this step then we may
* get an interrupt too soon or *way* too late if the overflow has
* happened since disabling.
*/
armpmu_event_set_period(event, hwc, hwc->idx);
armpmu->enable(hwc, hwc->idx);
}
static int
armpmu_enable(struct perf_event *event)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
int idx;
int err = 0;
/* If we don't have a space for the counter then finish early. */
idx = armpmu->get_event_idx(cpuc, hwc);
if (idx < 0) {
err = idx;
goto out;
}
/*
* If there is an event in the counter we are going to use then make
* sure it is disabled.
*/
event->hw.idx = idx;
armpmu->disable(hwc, idx);
cpuc->events[idx] = event;
set_bit(idx, cpuc->active_mask);
/* Set the period for the event. */
armpmu_event_set_period(event, hwc, idx);
/* Enable the event. */
armpmu->enable(hwc, idx);
/* Propagate our changes to the userspace mapping. */
perf_event_update_userpage(event);
out:
return err;
}
static struct pmu pmu = {
.enable = armpmu_enable,
.disable = armpmu_disable,
.unthrottle = armpmu_unthrottle,
.read = armpmu_read,
};
static int
validate_event(struct cpu_hw_events *cpuc,
struct perf_event *event)
{
struct hw_perf_event fake_event = event->hw;
if (event->pmu && event->pmu != &pmu)
return 0;
return armpmu->get_event_idx(cpuc, &fake_event) >= 0;
}
static int
validate_group(struct perf_event *event)
{
struct perf_event *sibling, *leader = event->group_leader;
struct cpu_hw_events fake_pmu;
memset(&fake_pmu, 0, sizeof(fake_pmu));
if (!validate_event(&fake_pmu, leader))
return -ENOSPC;
list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
if (!validate_event(&fake_pmu, sibling))
return -ENOSPC;
}
if (!validate_event(&fake_pmu, event))
return -ENOSPC;
return 0;
}
static int
armpmu_reserve_hardware(void)
{
int i;
int err;
pmu_irqs = reserve_pmu();
if (IS_ERR(pmu_irqs)) {
pr_warning("unable to reserve pmu\n");
return PTR_ERR(pmu_irqs);
}
init_pmu();
if (pmu_irqs->num_irqs < 1) {
pr_err("no irqs for PMUs defined\n");
return -ENODEV;
}
for (i = 0; i < pmu_irqs->num_irqs; ++i) {
err = request_irq(pmu_irqs->irqs[i], armpmu->handle_irq,
IRQF_DISABLED | IRQF_NOBALANCING,
"armpmu", NULL);
if (err) {
pr_warning("unable to request IRQ%d for ARM "
"perf counters\n", pmu_irqs->irqs[i]);
break;
}
}
if (err) {
for (i = i - 1; i >= 0; --i)
free_irq(pmu_irqs->irqs[i], NULL);
release_pmu(pmu_irqs);
pmu_irqs = NULL;
}
return err;
}
static void
armpmu_release_hardware(void)
{
int i;
for (i = pmu_irqs->num_irqs - 1; i >= 0; --i)
free_irq(pmu_irqs->irqs[i], NULL);
armpmu->stop();
release_pmu(pmu_irqs);
pmu_irqs = NULL;
}
static atomic_t active_events = ATOMIC_INIT(0);
static DEFINE_MUTEX(pmu_reserve_mutex);
static void
hw_perf_event_destroy(struct perf_event *event)
{
if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) {
armpmu_release_hardware();
mutex_unlock(&pmu_reserve_mutex);
}
}
static int
__hw_perf_event_init(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
int mapping, err;
/* Decode the generic type into an ARM event identifier. */
if (PERF_TYPE_HARDWARE == event->attr.type) {
mapping = armpmu->event_map(event->attr.config);
} else if (PERF_TYPE_HW_CACHE == event->attr.type) {
mapping = armpmu_map_cache_event(event->attr.config);
} else if (PERF_TYPE_RAW == event->attr.type) {
mapping = armpmu->raw_event(event->attr.config);
} else {
pr_debug("event type %x not supported\n", event->attr.type);
return -EOPNOTSUPP;
}
if (mapping < 0) {
pr_debug("event %x:%llx not supported\n", event->attr.type,
event->attr.config);
return mapping;
}
/*
* Check whether we need to exclude the counter from certain modes.
* The ARM performance counters are on all of the time so if someone
* has asked us for some excludes then we have to fail.
*/
if (event->attr.exclude_kernel || event->attr.exclude_user ||
event->attr.exclude_hv || event->attr.exclude_idle) {
pr_debug("ARM performance counters do not support "
"mode exclusion\n");
return -EPERM;
}
/*
* We don't assign an index until we actually place the event onto
* hardware. Use -1 to signify that we haven't decided where to put it
* yet. For SMP systems, each core has it's own PMU so we can't do any
* clever allocation or constraints checking at this point.
*/
hwc->idx = -1;
/*
* Store the event encoding into the config_base field. config and
* event_base are unused as the only 2 things we need to know are
* the event mapping and the counter to use. The counter to use is
* also the indx and the config_base is the event type.
*/
hwc->config_base = (unsigned long)mapping;
hwc->config = 0;
hwc->event_base = 0;
if (!hwc->sample_period) {
hwc->sample_period = armpmu->max_period;
hwc->last_period = hwc->sample_period;
atomic64_set(&hwc->period_left, hwc->sample_period);
}
err = 0;
if (event->group_leader != event) {
err = validate_group(event);
if (err)
return -EINVAL;
}
return err;
}
const struct pmu *
hw_perf_event_init(struct perf_event *event)
{
int err = 0;
if (!armpmu)
return ERR_PTR(-ENODEV);
event->destroy = hw_perf_event_destroy;
if (!atomic_inc_not_zero(&active_events)) {
if (atomic_read(&active_events) > perf_max_events) {
atomic_dec(&active_events);
return ERR_PTR(-ENOSPC);
}
mutex_lock(&pmu_reserve_mutex);
if (atomic_read(&active_events) == 0) {
err = armpmu_reserve_hardware();
}
if (!err)
atomic_inc(&active_events);
mutex_unlock(&pmu_reserve_mutex);
}
if (err)
return ERR_PTR(err);
err = __hw_perf_event_init(event);
if (err)
hw_perf_event_destroy(event);
return err ? ERR_PTR(err) : &pmu;
}
void
hw_perf_enable(void)
{
/* Enable all of the perf events on hardware. */
int idx;
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
if (!armpmu)
return;
for (idx = 0; idx <= armpmu->num_events; ++idx) {
struct perf_event *event = cpuc->events[idx];
if (!event)
continue;
armpmu->enable(&event->hw, idx);
}
armpmu->start();
}
void
hw_perf_disable(void)
{
if (armpmu)
armpmu->stop();
}
/*
* ARMv6 Performance counter handling code.
*
* ARMv6 has 2 configurable performance counters and a single cycle counter.
* They all share a single reset bit but can be written to zero so we can use
* that for a reset.
*
* The counters can't be individually enabled or disabled so when we remove
* one event and replace it with another we could get spurious counts from the
* wrong event. However, we can take advantage of the fact that the
* performance counters can export events to the event bus, and the event bus
* itself can be monitored. This requires that we *don't* export the events to
* the event bus. The procedure for disabling a configurable counter is:
* - change the counter to count the ETMEXTOUT[0] signal (0x20). This
* effectively stops the counter from counting.
* - disable the counter's interrupt generation (each counter has it's
* own interrupt enable bit).
* Once stopped, the counter value can be written as 0 to reset.
*
* To enable a counter:
* - enable the counter's interrupt generation.
* - set the new event type.
*
* Note: the dedicated cycle counter only counts cycles and can't be
* enabled/disabled independently of the others. When we want to disable the
* cycle counter, we have to just disable the interrupt reporting and start
* ignoring that counter. When re-enabling, we have to reset the value and
* enable the interrupt.
*/
enum armv6_perf_types {
ARMV6_PERFCTR_ICACHE_MISS = 0x0,
ARMV6_PERFCTR_IBUF_STALL = 0x1,
ARMV6_PERFCTR_DDEP_STALL = 0x2,
ARMV6_PERFCTR_ITLB_MISS = 0x3,
ARMV6_PERFCTR_DTLB_MISS = 0x4,
ARMV6_PERFCTR_BR_EXEC = 0x5,
ARMV6_PERFCTR_BR_MISPREDICT = 0x6,
ARMV6_PERFCTR_INSTR_EXEC = 0x7,
ARMV6_PERFCTR_DCACHE_HIT = 0x9,
ARMV6_PERFCTR_DCACHE_ACCESS = 0xA,
ARMV6_PERFCTR_DCACHE_MISS = 0xB,
ARMV6_PERFCTR_DCACHE_WBACK = 0xC,
ARMV6_PERFCTR_SW_PC_CHANGE = 0xD,
ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF,
ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10,
ARMV6_PERFCTR_LSU_FULL_STALL = 0x11,
ARMV6_PERFCTR_WBUF_DRAINED = 0x12,
ARMV6_PERFCTR_CPU_CYCLES = 0xFF,
ARMV6_PERFCTR_NOP = 0x20,
};
enum armv6_counters {
ARMV6_CYCLE_COUNTER = 1,
ARMV6_COUNTER0,
ARMV6_COUNTER1,
};
/*
* The hardware events that we support. We do support cache operations but
* we have harvard caches and no way to combine instruction and data
* accesses/misses in hardware.
*/
static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
[PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC,
[PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT,
[PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
};
static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
/*
* The performance counters don't differentiate between read
* and write accesses/misses so this isn't strictly correct,
* but it's the best we can do. Writes and reads get
* combined.
*/
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
[C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
[C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(L1I)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(LL)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(DTLB)] = {
/*
* The ARM performance counters can count micro DTLB misses,
* micro ITLB misses and main TLB misses. There isn't an event
* for TLB misses, so use the micro misses here and if users
* want the main TLB misses they can use a raw counter.
*/
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(ITLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(BPU)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
};
enum armv6mpcore_perf_types {
ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0,
ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1,
ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2,
ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3,
ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4,
ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5,
ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6,
ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7,
ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8,
ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA,
ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB,
ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC,
ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD,
ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE,
ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF,
ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10,
ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11,
ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12,
ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13,
ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF,
};
/*
* The hardware events that we support. We do support cache operations but
* we have harvard caches and no way to combine instruction and data
* accesses/misses in hardware.
*/
static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
[PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC,
[PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
[PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
};
static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] =
ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
[C(RESULT_MISS)] =
ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] =
ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
[C(RESULT_MISS)] =
ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(L1I)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(LL)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(DTLB)] = {
/*
* The ARM performance counters can count micro DTLB misses,
* micro ITLB misses and main TLB misses. There isn't an event
* for TLB misses, so use the micro misses here and if users
* want the main TLB misses they can use a raw counter.
*/
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(ITLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(BPU)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
};
static inline unsigned long
armv6_pmcr_read(void)
{
u32 val;
asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val));
return val;
}
static inline void
armv6_pmcr_write(unsigned long val)
{
asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val));
}
#define ARMV6_PMCR_ENABLE (1 << 0)
#define ARMV6_PMCR_CTR01_RESET (1 << 1)
#define ARMV6_PMCR_CCOUNT_RESET (1 << 2)
#define ARMV6_PMCR_CCOUNT_DIV (1 << 3)
#define ARMV6_PMCR_COUNT0_IEN (1 << 4)
#define ARMV6_PMCR_COUNT1_IEN (1 << 5)
#define ARMV6_PMCR_CCOUNT_IEN (1 << 6)
#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8)
#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9)
#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10)
#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20
#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12
#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
#define ARMV6_PMCR_OVERFLOWED_MASK \
(ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
ARMV6_PMCR_CCOUNT_OVERFLOW)
static inline int
armv6_pmcr_has_overflowed(unsigned long pmcr)
{
return (pmcr & ARMV6_PMCR_OVERFLOWED_MASK);
}
static inline int
armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
enum armv6_counters counter)
{
int ret = 0;
if (ARMV6_CYCLE_COUNTER == counter)
ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
else if (ARMV6_COUNTER0 == counter)
ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
else if (ARMV6_COUNTER1 == counter)
ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
else
WARN_ONCE(1, "invalid counter number (%d)\n", counter);
return ret;
}
static inline u32
armv6pmu_read_counter(int counter)
{
unsigned long value = 0;
if (ARMV6_CYCLE_COUNTER == counter)
asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value));
else if (ARMV6_COUNTER0 == counter)
asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value));
else if (ARMV6_COUNTER1 == counter)
asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value));
else
WARN_ONCE(1, "invalid counter number (%d)\n", counter);
return value;
}
static inline void
armv6pmu_write_counter(int counter,
u32 value)
{
if (ARMV6_CYCLE_COUNTER == counter)
asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value));
else if (ARMV6_COUNTER0 == counter)
asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value));
else if (ARMV6_COUNTER1 == counter)
asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value));
else
WARN_ONCE(1, "invalid counter number (%d)\n", counter);
}
void
armv6pmu_enable_event(struct hw_perf_event *hwc,
int idx)
{
unsigned long val, mask, evt, flags;
if (ARMV6_CYCLE_COUNTER == idx) {
mask = 0;
evt = ARMV6_PMCR_CCOUNT_IEN;
} else if (ARMV6_COUNTER0 == idx) {
mask = ARMV6_PMCR_EVT_COUNT0_MASK;
evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
ARMV6_PMCR_COUNT0_IEN;
} else if (ARMV6_COUNTER1 == idx) {
mask = ARMV6_PMCR_EVT_COUNT1_MASK;
evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
ARMV6_PMCR_COUNT1_IEN;
} else {
WARN_ONCE(1, "invalid counter number (%d)\n", idx);
return;
}
/*
* Mask out the current event and set the counter to count the event
* that we're interested in.
*/
spin_lock_irqsave(&pmu_lock, flags);
val = armv6_pmcr_read();
val &= ~mask;
val |= evt;
armv6_pmcr_write(val);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static irqreturn_t
armv6pmu_handle_irq(int irq_num,
void *dev)
{
unsigned long pmcr = armv6_pmcr_read();
struct perf_sample_data data;
struct cpu_hw_events *cpuc;
struct pt_regs *regs;
int idx;
if (!armv6_pmcr_has_overflowed(pmcr))
return IRQ_NONE;
regs = get_irq_regs();
/*
* The interrupts are cleared by writing the overflow flags back to
* the control register. All of the other bits don't have any effect
* if they are rewritten, so write the whole value back.
*/
armv6_pmcr_write(pmcr);
data.addr = 0;
cpuc = &__get_cpu_var(cpu_hw_events);
for (idx = 0; idx <= armpmu->num_events; ++idx) {
struct perf_event *event = cpuc->events[idx];
struct hw_perf_event *hwc;
if (!test_bit(idx, cpuc->active_mask))
continue;
/*
* We have a single interrupt for all counters. Check that
* each counter has overflowed before we process it.
*/
if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
continue;
hwc = &event->hw;
armpmu_event_update(event, hwc, idx);
data.period = event->hw.last_period;
if (!armpmu_event_set_period(event, hwc, idx))
continue;
if (perf_event_overflow(event, 0, &data, regs))
armpmu->disable(hwc, idx);
}
/*
* Handle the pending perf events.
*
* Note: this call *must* be run with interrupts enabled. For
* platforms that can have the PMU interrupts raised as a PMI, this
* will not work.
*/
perf_event_do_pending();
return IRQ_HANDLED;
}
static void
armv6pmu_start(void)
{
unsigned long flags, val;
spin_lock_irqsave(&pmu_lock, flags);
val = armv6_pmcr_read();
val |= ARMV6_PMCR_ENABLE;
armv6_pmcr_write(val);
spin_unlock_irqrestore(&pmu_lock, flags);
}
void
armv6pmu_stop(void)
{
unsigned long flags, val;
spin_lock_irqsave(&pmu_lock, flags);
val = armv6_pmcr_read();
val &= ~ARMV6_PMCR_ENABLE;
armv6_pmcr_write(val);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static inline int
armv6pmu_event_map(int config)
{
int mapping = armv6_perf_map[config];
if (HW_OP_UNSUPPORTED == mapping)
mapping = -EOPNOTSUPP;
return mapping;
}
static inline int
armv6mpcore_pmu_event_map(int config)
{
int mapping = armv6mpcore_perf_map[config];
if (HW_OP_UNSUPPORTED == mapping)
mapping = -EOPNOTSUPP;
return mapping;
}
static u64
armv6pmu_raw_event(u64 config)
{
return config & 0xff;
}
static int
armv6pmu_get_event_idx(struct cpu_hw_events *cpuc,
struct hw_perf_event *event)
{
/* Always place a cycle counter into the cycle counter. */
if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) {
if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
return -EAGAIN;
return ARMV6_CYCLE_COUNTER;
} else {
/*
* For anything other than a cycle counter, try and use
* counter0 and counter1.
*/
if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) {
return ARMV6_COUNTER1;
}
if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) {
return ARMV6_COUNTER0;
}
/* The counters are all in use. */
return -EAGAIN;
}
}
static void
armv6pmu_disable_event(struct hw_perf_event *hwc,
int idx)
{
unsigned long val, mask, evt, flags;
if (ARMV6_CYCLE_COUNTER == idx) {
mask = ARMV6_PMCR_CCOUNT_IEN;
evt = 0;
} else if (ARMV6_COUNTER0 == idx) {
mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK;
evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT;
} else if (ARMV6_COUNTER1 == idx) {
mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK;
evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT;
} else {
WARN_ONCE(1, "invalid counter number (%d)\n", idx);
return;
}
/*
* Mask out the current event and set the counter to count the number
* of ETM bus signal assertion cycles. The external reporting should
* be disabled and so this should never increment.
*/
spin_lock_irqsave(&pmu_lock, flags);
val = armv6_pmcr_read();
val &= ~mask;
val |= evt;
armv6_pmcr_write(val);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static void
armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
int idx)
{
unsigned long val, mask, flags, evt = 0;
if (ARMV6_CYCLE_COUNTER == idx) {
mask = ARMV6_PMCR_CCOUNT_IEN;
} else if (ARMV6_COUNTER0 == idx) {
mask = ARMV6_PMCR_COUNT0_IEN;
} else if (ARMV6_COUNTER1 == idx) {
mask = ARMV6_PMCR_COUNT1_IEN;
} else {
WARN_ONCE(1, "invalid counter number (%d)\n", idx);
return;
}
/*
* Unlike UP ARMv6, we don't have a way of stopping the counters. We
* simply disable the interrupt reporting.
*/
spin_lock_irqsave(&pmu_lock, flags);
val = armv6_pmcr_read();
val &= ~mask;
val |= evt;
armv6_pmcr_write(val);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static const struct arm_pmu armv6pmu = {
.name = "v6",
.handle_irq = armv6pmu_handle_irq,
.enable = armv6pmu_enable_event,
.disable = armv6pmu_disable_event,
.event_map = armv6pmu_event_map,
.raw_event = armv6pmu_raw_event,
.read_counter = armv6pmu_read_counter,
.write_counter = armv6pmu_write_counter,
.get_event_idx = armv6pmu_get_event_idx,
.start = armv6pmu_start,
.stop = armv6pmu_stop,
.num_events = 3,
.max_period = (1LLU << 32) - 1,
};
/*
* ARMv6mpcore is almost identical to single core ARMv6 with the exception
* that some of the events have different enumerations and that there is no
* *hack* to stop the programmable counters. To stop the counters we simply
* disable the interrupt reporting and update the event. When unthrottling we
* reset the period and enable the interrupt reporting.
*/
static const struct arm_pmu armv6mpcore_pmu = {
.name = "v6mpcore",
.handle_irq = armv6pmu_handle_irq,
.enable = armv6pmu_enable_event,
.disable = armv6mpcore_pmu_disable_event,
.event_map = armv6mpcore_pmu_event_map,
.raw_event = armv6pmu_raw_event,
.read_counter = armv6pmu_read_counter,
.write_counter = armv6pmu_write_counter,
.get_event_idx = armv6pmu_get_event_idx,
.start = armv6pmu_start,
.stop = armv6pmu_stop,
.num_events = 3,
.max_period = (1LLU << 32) - 1,
};
/*
* ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code.
*
* Copied from ARMv6 code, with the low level code inspired
* by the ARMv7 Oprofile code.
*
* Cortex-A8 has up to 4 configurable performance counters and
* a single cycle counter.
* Cortex-A9 has up to 31 configurable performance counters and
* a single cycle counter.
*
* All counters can be enabled/disabled and IRQ masked separately. The cycle
* counter and all 4 performance counters together can be reset separately.
*/
#define ARMV7_PMU_CORTEX_A8_NAME "ARMv7 Cortex-A8"
#define ARMV7_PMU_CORTEX_A9_NAME "ARMv7 Cortex-A9"
/* Common ARMv7 event types */
enum armv7_perf_types {
ARMV7_PERFCTR_PMNC_SW_INCR = 0x00,
ARMV7_PERFCTR_IFETCH_MISS = 0x01,
ARMV7_PERFCTR_ITLB_MISS = 0x02,
ARMV7_PERFCTR_DCACHE_REFILL = 0x03,
ARMV7_PERFCTR_DCACHE_ACCESS = 0x04,
ARMV7_PERFCTR_DTLB_REFILL = 0x05,
ARMV7_PERFCTR_DREAD = 0x06,
ARMV7_PERFCTR_DWRITE = 0x07,
ARMV7_PERFCTR_EXC_TAKEN = 0x09,
ARMV7_PERFCTR_EXC_EXECUTED = 0x0A,
ARMV7_PERFCTR_CID_WRITE = 0x0B,
/* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS.
* It counts:
* - all branch instructions,
* - instructions that explicitly write the PC,
* - exception generating instructions.
*/
ARMV7_PERFCTR_PC_WRITE = 0x0C,
ARMV7_PERFCTR_PC_IMM_BRANCH = 0x0D,
ARMV7_PERFCTR_UNALIGNED_ACCESS = 0x0F,
ARMV7_PERFCTR_PC_BRANCH_MIS_PRED = 0x10,
ARMV7_PERFCTR_CLOCK_CYCLES = 0x11,
ARMV7_PERFCTR_PC_BRANCH_MIS_USED = 0x12,
ARMV7_PERFCTR_CPU_CYCLES = 0xFF
};
/* ARMv7 Cortex-A8 specific event types */
enum armv7_a8_perf_types {
ARMV7_PERFCTR_INSTR_EXECUTED = 0x08,
ARMV7_PERFCTR_PC_PROC_RETURN = 0x0E,
ARMV7_PERFCTR_WRITE_BUFFER_FULL = 0x40,
ARMV7_PERFCTR_L2_STORE_MERGED = 0x41,
ARMV7_PERFCTR_L2_STORE_BUFF = 0x42,
ARMV7_PERFCTR_L2_ACCESS = 0x43,
ARMV7_PERFCTR_L2_CACH_MISS = 0x44,
ARMV7_PERFCTR_AXI_READ_CYCLES = 0x45,
ARMV7_PERFCTR_AXI_WRITE_CYCLES = 0x46,
ARMV7_PERFCTR_MEMORY_REPLAY = 0x47,
ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY = 0x48,
ARMV7_PERFCTR_L1_DATA_MISS = 0x49,
ARMV7_PERFCTR_L1_INST_MISS = 0x4A,
ARMV7_PERFCTR_L1_DATA_COLORING = 0x4B,
ARMV7_PERFCTR_L1_NEON_DATA = 0x4C,
ARMV7_PERFCTR_L1_NEON_CACH_DATA = 0x4D,
ARMV7_PERFCTR_L2_NEON = 0x4E,
ARMV7_PERFCTR_L2_NEON_HIT = 0x4F,
ARMV7_PERFCTR_L1_INST = 0x50,
ARMV7_PERFCTR_PC_RETURN_MIS_PRED = 0x51,
ARMV7_PERFCTR_PC_BRANCH_FAILED = 0x52,
ARMV7_PERFCTR_PC_BRANCH_TAKEN = 0x53,
ARMV7_PERFCTR_PC_BRANCH_EXECUTED = 0x54,
ARMV7_PERFCTR_OP_EXECUTED = 0x55,
ARMV7_PERFCTR_CYCLES_INST_STALL = 0x56,
ARMV7_PERFCTR_CYCLES_INST = 0x57,
ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL = 0x58,
ARMV7_PERFCTR_CYCLES_NEON_INST_STALL = 0x59,
ARMV7_PERFCTR_NEON_CYCLES = 0x5A,
ARMV7_PERFCTR_PMU0_EVENTS = 0x70,
ARMV7_PERFCTR_PMU1_EVENTS = 0x71,
ARMV7_PERFCTR_PMU_EVENTS = 0x72,
};
/* ARMv7 Cortex-A9 specific event types */
enum armv7_a9_perf_types {
ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC = 0x40,
ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC = 0x41,
ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC = 0x42,
ARMV7_PERFCTR_COHERENT_LINE_MISS = 0x50,
ARMV7_PERFCTR_COHERENT_LINE_HIT = 0x51,
ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES = 0x60,
ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES = 0x61,
ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62,
ARMV7_PERFCTR_STREX_EXECUTED_PASSED = 0x63,
ARMV7_PERFCTR_STREX_EXECUTED_FAILED = 0x64,
ARMV7_PERFCTR_DATA_EVICTION = 0x65,
ARMV7_PERFCTR_ISSUE_STAGE_NO_INST = 0x66,
ARMV7_PERFCTR_ISSUE_STAGE_EMPTY = 0x67,
ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE = 0x68,
ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E,
ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST = 0x70,
ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71,
ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST = 0x72,
ARMV7_PERFCTR_FP_EXECUTED_INST = 0x73,
ARMV7_PERFCTR_NEON_EXECUTED_INST = 0x74,
ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80,
ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES = 0x81,
ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES = 0x82,
ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES = 0x83,
ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES = 0x84,
ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES = 0x85,
ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES = 0x86,
ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES = 0x8A,
ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B,
ARMV7_PERFCTR_ISB_INST = 0x90,
ARMV7_PERFCTR_DSB_INST = 0x91,
ARMV7_PERFCTR_DMB_INST = 0x92,
ARMV7_PERFCTR_EXT_INTERRUPTS = 0x93,
ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED = 0xA0,
ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED = 0xA1,
ARMV7_PERFCTR_PLE_FIFO_FLUSH = 0xA2,
ARMV7_PERFCTR_PLE_RQST_COMPLETED = 0xA3,
ARMV7_PERFCTR_PLE_FIFO_OVERFLOW = 0xA4,
ARMV7_PERFCTR_PLE_RQST_PROG = 0xA5
};
/*
* Cortex-A8 HW events mapping
*
* The hardware events that we support. We do support cache operations but
* we have harvard caches and no way to combine instruction and data
* accesses/misses in hardware.
*/
static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = {
[PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
[PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
[PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
};
static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
/*
* The performance counters don't differentiate between read
* and write accesses/misses so this isn't strictly correct,
* but it's the best we can do. Writes and reads get
* combined.
*/
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
[C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
[C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(L1I)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST,
[C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST,
[C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(LL)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS,
[C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS,
[C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(DTLB)] = {
/*
* Only ITLB misses and DTLB refills are supported.
* If users want the DTLB refills misses a raw counter
* must be used.
*/
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(ITLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(BPU)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
[C(RESULT_MISS)]
= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
[C(RESULT_MISS)]
= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
};
/*
* Cortex-A9 HW events mapping
*/
static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = {
[PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] =
ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE,
[PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_COHERENT_LINE_HIT,
[PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_COHERENT_LINE_MISS,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
[PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
};
static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
/*
* The performance counters don't differentiate between read
* and write accesses/misses so this isn't strictly correct,
* but it's the best we can do. Writes and reads get
* combined.
*/
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
[C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
[C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(L1I)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(LL)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(DTLB)] = {
/*
* Only ITLB misses and DTLB refills are supported.
* If users want the DTLB refills misses a raw counter
* must be used.
*/
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(ITLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(BPU)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
[C(RESULT_MISS)]
= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
[C(RESULT_MISS)]
= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
};
/*
* Perf Events counters
*/
enum armv7_counters {
ARMV7_CYCLE_COUNTER = 1, /* Cycle counter */
ARMV7_COUNTER0 = 2, /* First event counter */
};
/*
* The cycle counter is ARMV7_CYCLE_COUNTER.
* The first event counter is ARMV7_COUNTER0.
* The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1).
*/
#define ARMV7_COUNTER_LAST (ARMV7_COUNTER0 + armpmu->num_events - 1)
/*
* ARMv7 low level PMNC access
*/
/*
* Per-CPU PMNC: config reg
*/
#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */
#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */
#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */
#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */
#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */
#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */
#define ARMV7_PMNC_N_MASK 0x1f
#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */
/*
* Available counters
*/
#define ARMV7_CNT0 0 /* First event counter */
#define ARMV7_CCNT 31 /* Cycle counter */
/* Perf Event to low level counters mapping */
#define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0)
/*
* CNTENS: counters enable reg
*/
#define ARMV7_CNTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
#define ARMV7_CNTENS_C (1 << ARMV7_CCNT)
/*
* CNTENC: counters disable reg
*/
#define ARMV7_CNTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
#define ARMV7_CNTENC_C (1 << ARMV7_CCNT)
/*
* INTENS: counters overflow interrupt enable reg
*/
#define ARMV7_INTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
#define ARMV7_INTENS_C (1 << ARMV7_CCNT)
/*
* INTENC: counters overflow interrupt disable reg
*/
#define ARMV7_INTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
#define ARMV7_INTENC_C (1 << ARMV7_CCNT)
/*
* EVTSEL: Event selection reg
*/
#define ARMV7_EVTSEL_MASK 0x7f /* Mask for writable bits */
/*
* SELECT: Counter selection reg
*/
#define ARMV7_SELECT_MASK 0x1f /* Mask for writable bits */
/*
* FLAG: counters overflow flag status reg
*/
#define ARMV7_FLAG_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
#define ARMV7_FLAG_C (1 << ARMV7_CCNT)
#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */
#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK
static inline unsigned long armv7_pmnc_read(void)
{
u32 val;
asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
return val;
}
static inline void armv7_pmnc_write(unsigned long val)
{
val &= ARMV7_PMNC_MASK;
asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
}
static inline int armv7_pmnc_has_overflowed(unsigned long pmnc)
{
return pmnc & ARMV7_OVERFLOWED_MASK;
}
static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc,
enum armv7_counters counter)
{
int ret;
if (counter == ARMV7_CYCLE_COUNTER)
ret = pmnc & ARMV7_FLAG_C;
else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST))
ret = pmnc & ARMV7_FLAG_P(counter);
else
pr_err("CPU%u checking wrong counter %d overflow status\n",
smp_processor_id(), counter);
return ret;
}
static inline int armv7_pmnc_select_counter(unsigned int idx)
{
u32 val;
if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) {
pr_err("CPU%u selecting wrong PMNC counter"
" %d\n", smp_processor_id(), idx);
return -1;
}
val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK;
asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
return idx;
}
static inline u32 armv7pmu_read_counter(int idx)
{
unsigned long value = 0;
if (idx == ARMV7_CYCLE_COUNTER)
asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
if (armv7_pmnc_select_counter(idx) == idx)
asm volatile("mrc p15, 0, %0, c9, c13, 2"
: "=r" (value));
} else
pr_err("CPU%u reading wrong counter %d\n",
smp_processor_id(), idx);
return value;
}
static inline void armv7pmu_write_counter(int idx, u32 value)
{
if (idx == ARMV7_CYCLE_COUNTER)
asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
if (armv7_pmnc_select_counter(idx) == idx)
asm volatile("mcr p15, 0, %0, c9, c13, 2"
: : "r" (value));
} else
pr_err("CPU%u writing wrong counter %d\n",
smp_processor_id(), idx);
}
static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val)
{
if (armv7_pmnc_select_counter(idx) == idx) {
val &= ARMV7_EVTSEL_MASK;
asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
}
}
static inline u32 armv7_pmnc_enable_counter(unsigned int idx)
{
u32 val;
if ((idx != ARMV7_CYCLE_COUNTER) &&
((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
pr_err("CPU%u enabling wrong PMNC counter"
" %d\n", smp_processor_id(), idx);
return -1;
}
if (idx == ARMV7_CYCLE_COUNTER)
val = ARMV7_CNTENS_C;
else
val = ARMV7_CNTENS_P(idx);
asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
return idx;
}
static inline u32 armv7_pmnc_disable_counter(unsigned int idx)
{
u32 val;
if ((idx != ARMV7_CYCLE_COUNTER) &&
((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
pr_err("CPU%u disabling wrong PMNC counter"
" %d\n", smp_processor_id(), idx);
return -1;
}
if (idx == ARMV7_CYCLE_COUNTER)
val = ARMV7_CNTENC_C;
else
val = ARMV7_CNTENC_P(idx);
asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
return idx;
}
static inline u32 armv7_pmnc_enable_intens(unsigned int idx)
{
u32 val;
if ((idx != ARMV7_CYCLE_COUNTER) &&
((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
pr_err("CPU%u enabling wrong PMNC counter"
" interrupt enable %d\n", smp_processor_id(), idx);
return -1;
}
if (idx == ARMV7_CYCLE_COUNTER)
val = ARMV7_INTENS_C;
else
val = ARMV7_INTENS_P(idx);
asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val));
return idx;
}
static inline u32 armv7_pmnc_disable_intens(unsigned int idx)
{
u32 val;
if ((idx != ARMV7_CYCLE_COUNTER) &&
((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
pr_err("CPU%u disabling wrong PMNC counter"
" interrupt enable %d\n", smp_processor_id(), idx);
return -1;
}
if (idx == ARMV7_CYCLE_COUNTER)
val = ARMV7_INTENC_C;
else
val = ARMV7_INTENC_P(idx);
asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
return idx;
}
static inline u32 armv7_pmnc_getreset_flags(void)
{
u32 val;
/* Read */
asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
/* Write to clear flags */
val &= ARMV7_FLAG_MASK;
asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
return val;
}
#ifdef DEBUG
static void armv7_pmnc_dump_regs(void)
{
u32 val;
unsigned int cnt;
printk(KERN_INFO "PMNC registers dump:\n");
asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
printk(KERN_INFO "PMNC =0x%08x\n", val);
asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val));
printk(KERN_INFO "CNTENS=0x%08x\n", val);
asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val));
printk(KERN_INFO "INTENS=0x%08x\n", val);
asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
printk(KERN_INFO "FLAGS =0x%08x\n", val);
asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val));
printk(KERN_INFO "SELECT=0x%08x\n", val);
asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
printk(KERN_INFO "CCNT =0x%08x\n", val);
for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) {
armv7_pmnc_select_counter(cnt);
asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
printk(KERN_INFO "CNT[%d] count =0x%08x\n",
cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val));
printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n",
cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
}
}
#endif
void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
{
unsigned long flags;
/*
* Enable counter and interrupt, and set the counter to count
* the event that we're interested in.
*/
spin_lock_irqsave(&pmu_lock, flags);
/*
* Disable counter
*/
armv7_pmnc_disable_counter(idx);
/*
* Set event (if destined for PMNx counters)
* We don't need to set the event if it's a cycle count
*/
if (idx != ARMV7_CYCLE_COUNTER)
armv7_pmnc_write_evtsel(idx, hwc->config_base);
/*
* Enable interrupt for this counter
*/
armv7_pmnc_enable_intens(idx);
/*
* Enable counter
*/
armv7_pmnc_enable_counter(idx);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx)
{
unsigned long flags;
/*
* Disable counter and interrupt
*/
spin_lock_irqsave(&pmu_lock, flags);
/*
* Disable counter
*/
armv7_pmnc_disable_counter(idx);
/*
* Disable interrupt for this counter
*/
armv7_pmnc_disable_intens(idx);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
{
unsigned long pmnc;
struct perf_sample_data data;
struct cpu_hw_events *cpuc;
struct pt_regs *regs;
int idx;
/*
* Get and reset the IRQ flags
*/
pmnc = armv7_pmnc_getreset_flags();
/*
* Did an overflow occur?
*/
if (!armv7_pmnc_has_overflowed(pmnc))
return IRQ_NONE;
/*
* Handle the counter(s) overflow(s)
*/
regs = get_irq_regs();
data.addr = 0;
cpuc = &__get_cpu_var(cpu_hw_events);
for (idx = 0; idx <= armpmu->num_events; ++idx) {
struct perf_event *event = cpuc->events[idx];
struct hw_perf_event *hwc;
if (!test_bit(idx, cpuc->active_mask))
continue;
/*
* We have a single interrupt for all counters. Check that
* each counter has overflowed before we process it.
*/
if (!armv7_pmnc_counter_has_overflowed(pmnc, idx))
continue;
hwc = &event->hw;
armpmu_event_update(event, hwc, idx);
data.period = event->hw.last_period;
if (!armpmu_event_set_period(event, hwc, idx))
continue;
if (perf_event_overflow(event, 0, &data, regs))
armpmu->disable(hwc, idx);
}
/*
* Handle the pending perf events.
*
* Note: this call *must* be run with interrupts enabled. For
* platforms that can have the PMU interrupts raised as a PMI, this
* will not work.
*/
perf_event_do_pending();
return IRQ_HANDLED;
}
static void armv7pmu_start(void)
{
unsigned long flags;
spin_lock_irqsave(&pmu_lock, flags);
/* Enable all counters */
armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static void armv7pmu_stop(void)
{
unsigned long flags;
spin_lock_irqsave(&pmu_lock, flags);
/* Disable all counters */
armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
spin_unlock_irqrestore(&pmu_lock, flags);
}
static inline int armv7_a8_pmu_event_map(int config)
{
int mapping = armv7_a8_perf_map[config];
if (HW_OP_UNSUPPORTED == mapping)
mapping = -EOPNOTSUPP;
return mapping;
}
static inline int armv7_a9_pmu_event_map(int config)
{
int mapping = armv7_a9_perf_map[config];
if (HW_OP_UNSUPPORTED == mapping)
mapping = -EOPNOTSUPP;
return mapping;
}
static u64 armv7pmu_raw_event(u64 config)
{
return config & 0xff;
}
static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc,
struct hw_perf_event *event)
{
int idx;
/* Always place a cycle counter into the cycle counter. */
if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) {
if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask))
return -EAGAIN;
return ARMV7_CYCLE_COUNTER;
} else {
/*
* For anything other than a cycle counter, try and use
* the events counters
*/
for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) {
if (!test_and_set_bit(idx, cpuc->used_mask))
return idx;
}
/* The counters are all in use. */
return -EAGAIN;
}
}
static struct arm_pmu armv7pmu = {
.handle_irq = armv7pmu_handle_irq,
.enable = armv7pmu_enable_event,
.disable = armv7pmu_disable_event,
.raw_event = armv7pmu_raw_event,
.read_counter = armv7pmu_read_counter,
.write_counter = armv7pmu_write_counter,
.get_event_idx = armv7pmu_get_event_idx,
.start = armv7pmu_start,
.stop = armv7pmu_stop,
.max_period = (1LLU << 32) - 1,
};
static u32 __init armv7_reset_read_pmnc(void)
{
u32 nb_cnt;
/* Initialize & Reset PMNC: C and P bits */
armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C);
/* Read the nb of CNTx counters supported from PMNC */
nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK;
/* Add the CPU cycles counter and return */
return nb_cnt + 1;
}
static int __init
init_hw_perf_events(void)
{
unsigned long cpuid = read_cpuid_id();
unsigned long implementor = (cpuid & 0xFF000000) >> 24;
unsigned long part_number = (cpuid & 0xFFF0);
/* We only support ARM CPUs implemented by ARM at the moment. */
if (0x41 == implementor) {
switch (part_number) {
case 0xB360: /* ARM1136 */
case 0xB560: /* ARM1156 */
case 0xB760: /* ARM1176 */
armpmu = &armv6pmu;
memcpy(armpmu_perf_cache_map, armv6_perf_cache_map,
sizeof(armv6_perf_cache_map));
perf_max_events = armv6pmu.num_events;
break;
case 0xB020: /* ARM11mpcore */
armpmu = &armv6mpcore_pmu;
memcpy(armpmu_perf_cache_map,
armv6mpcore_perf_cache_map,
sizeof(armv6mpcore_perf_cache_map));
perf_max_events = armv6mpcore_pmu.num_events;
break;
case 0xC080: /* Cortex-A8 */
armv7pmu.name = ARMV7_PMU_CORTEX_A8_NAME;
memcpy(armpmu_perf_cache_map, armv7_a8_perf_cache_map,
sizeof(armv7_a8_perf_cache_map));
armv7pmu.event_map = armv7_a8_pmu_event_map;
armpmu = &armv7pmu;
/* Reset PMNC and read the nb of CNTx counters
supported */
armv7pmu.num_events = armv7_reset_read_pmnc();
perf_max_events = armv7pmu.num_events;
break;
case 0xC090: /* Cortex-A9 */
armv7pmu.name = ARMV7_PMU_CORTEX_A9_NAME;
memcpy(armpmu_perf_cache_map, armv7_a9_perf_cache_map,
sizeof(armv7_a9_perf_cache_map));
armv7pmu.event_map = armv7_a9_pmu_event_map;
armpmu = &armv7pmu;
/* Reset PMNC and read the nb of CNTx counters
supported */
armv7pmu.num_events = armv7_reset_read_pmnc();
perf_max_events = armv7pmu.num_events;
break;
default:
pr_info("no hardware support available\n");
perf_max_events = -1;
}
}
if (armpmu)
pr_info("enabled with %s PMU driver, %d counters available\n",
armpmu->name, armpmu->num_events);
return 0;
}
arch_initcall(init_hw_perf_events);
/*
* Callchain handling code.
*/
static inline void
callchain_store(struct perf_callchain_entry *entry,
u64 ip)
{
if (entry->nr < PERF_MAX_STACK_DEPTH)
entry->ip[entry->nr++] = ip;
}
/*
* The registers we're interested in are at the end of the variable
* length saved register structure. The fp points at the end of this
* structure so the address of this struct is:
* (struct frame_tail *)(xxx->fp)-1
*
* This code has been adapted from the ARM OProfile support.
*/
struct frame_tail {
struct frame_tail *fp;
unsigned long sp;
unsigned long lr;
} __attribute__((packed));
/*
* Get the return address for a single stackframe and return a pointer to the
* next frame tail.
*/
static struct frame_tail *
user_backtrace(struct frame_tail *tail,
struct perf_callchain_entry *entry)
{
struct frame_tail buftail;
/* Also check accessibility of one struct frame_tail beyond */
if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
return NULL;
if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail)))
return NULL;
callchain_store(entry, buftail.lr);
/*
* Frame pointers should strictly progress back up the stack
* (towards higher addresses).
*/
if (tail >= buftail.fp)
return NULL;
return buftail.fp - 1;
}
static void
perf_callchain_user(struct pt_regs *regs,
struct perf_callchain_entry *entry)
{
struct frame_tail *tail;
callchain_store(entry, PERF_CONTEXT_USER);
if (!user_mode(regs))
regs = task_pt_regs(current);
tail = (struct frame_tail *)regs->ARM_fp - 1;
while (tail && !((unsigned long)tail & 0x3))
tail = user_backtrace(tail, entry);
}
/*
* Gets called by walk_stackframe() for every stackframe. This will be called
* whist unwinding the stackframe and is like a subroutine return so we use
* the PC.
*/
static int
callchain_trace(struct stackframe *fr,
void *data)
{
struct perf_callchain_entry *entry = data;
callchain_store(entry, fr->pc);
return 0;
}
static void
perf_callchain_kernel(struct pt_regs *regs,
struct perf_callchain_entry *entry)
{
struct stackframe fr;
callchain_store(entry, PERF_CONTEXT_KERNEL);
fr.fp = regs->ARM_fp;
fr.sp = regs->ARM_sp;
fr.lr = regs->ARM_lr;
fr.pc = regs->ARM_pc;
walk_stackframe(&fr, callchain_trace, entry);
}
static void
perf_do_callchain(struct pt_regs *regs,
struct perf_callchain_entry *entry)
{
int is_user;
if (!regs)
return;
is_user = user_mode(regs);
if (!current || !current->pid)
return;
if (is_user && current->state != TASK_RUNNING)
return;
if (!is_user)
perf_callchain_kernel(regs, entry);
if (current->mm)
perf_callchain_user(regs, entry);
}
static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
struct perf_callchain_entry *
perf_callchain(struct pt_regs *regs)
{
struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry);
entry->nr = 0;
perf_do_callchain(regs, entry);
return entry;
}