perf: add hotplug support
The change is a squash of the following four commits taken from msm-4.14: 1) commit <b02d7648fbd1> ("enable perf to continue across hotplug") Currently perf hardware, software and tracepoint events are deleted when a cpu is hotplugged out. This change restarts the events after hotplug. In arm_pmu.c most of the code for handline power collapse is reused for hotplug. This change supercedes commit 1f0f95c5fe9e ("perf: add hotplug support so that perf continues after hotplug") and uses the new hotplug notification method. 2) commit <9768e7af40d6> ("disable perf_event_read during hotplug") core.c should not allow perf_event_read access during hotplug. DCVS may try to read events during hotplug startup or shutdown. Set a flag to not allow access during hotplug. 3) commit <1fc690b7b8c6> ("perf: core: Avoid race condition when releasing perf-events") Before calling perf_event_release_kernel(), capture the perf's pmus_mutex lock to prevent the CPU from going offline during the operation. 4) commit <77257e46efea> ("perf: Manage CPU hotplug events at core level") the approach here is to achieve the hotplug management at the perf-core level. The idea is to detach the event from the context (perf_remove_from_context()), when the CPU is about to come down and re-attach it back (perf_install_in_context()), when the CPU comes back online. The approach involves removing the logic for maintaining zombie events (PERF_EVENT_STATE_ZOMBIE) and let the dormant list itself carry the events whose CPUs are offline. Change-Id: I3738bdcaae2d2199ba3fb68ce77f637d867c5c14 Signed-off-by: Raghavendra Rao Ananta <rananta@codeaurora.org> Signed-off-by: Rishabh Bhatnagar <rishabhb@codeaurora.org>
This commit is contained in:
parent
7a0a9e676c
commit
3d3eb5fb85
5 changed files with 174 additions and 51 deletions
|
@ -30,6 +30,8 @@
|
|||
#include <linux/perf/arm_pmu.h>
|
||||
#include <linux/platform_device.h>
|
||||
|
||||
static DEFINE_PER_CPU(bool, is_hotplugging);
|
||||
|
||||
/*
|
||||
* ARMv8 PMUv3 Performance Events handling code.
|
||||
* Common event types (some are defined in asm/perf_event.h).
|
||||
|
@ -870,8 +872,8 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
|
|||
struct perf_event *event = cpuc->events[idx];
|
||||
struct hw_perf_event *hwc;
|
||||
|
||||
/* Ignore if we don't have an event. */
|
||||
if (!event)
|
||||
/* Ignore if we don't have an event */
|
||||
if (!event || event->state != PERF_EVENT_STATE_ACTIVE)
|
||||
continue;
|
||||
|
||||
/*
|
||||
|
@ -1146,6 +1148,9 @@ static void armv8pmu_idle_update(struct arm_pmu *cpu_pmu)
|
|||
if (!cpu_pmu)
|
||||
return;
|
||||
|
||||
if (__this_cpu_read(is_hotplugging))
|
||||
return;
|
||||
|
||||
hw_events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
|
||||
if (!hw_events)
|
||||
|
@ -1377,6 +1382,37 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = {
|
|||
{},
|
||||
};
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
static int perf_event_hotplug_coming_up(unsigned int cpu)
|
||||
{
|
||||
per_cpu(is_hotplugging, cpu) = false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int perf_event_hotplug_going_down(unsigned int cpu)
|
||||
{
|
||||
per_cpu(is_hotplugging, cpu) = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int perf_event_cpu_hp_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ONLINE,
|
||||
"PERF_EVENT/CPUHP_AP_PERF_ONLINE",
|
||||
perf_event_hotplug_coming_up,
|
||||
perf_event_hotplug_going_down);
|
||||
if (ret)
|
||||
pr_err("CPU hotplug notifier for perf_event.c could not be registered: %d\n",
|
||||
ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
static int perf_event_cpu_hp_init(void) { return 0; }
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Non DT systems have their micro/arch events probed at run-time.
|
||||
* A fairly complete list of generic events are provided and ones that
|
||||
|
@ -1389,7 +1425,14 @@ static const struct pmu_probe_info armv8_pmu_probe_table[] = {
|
|||
|
||||
static int armv8_pmu_device_probe(struct platform_device *pdev)
|
||||
{
|
||||
int ret;
|
||||
int ret, cpu;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
per_cpu(is_hotplugging, cpu) = false;
|
||||
|
||||
ret = perf_event_cpu_hp_init();
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* set to true so armv8pmu_idle_update doesn't try to load
|
||||
* hw_events before arm_pmu_device_probe has initialized it.
|
||||
|
|
|
@ -675,6 +675,9 @@ static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd)
|
|||
if (!event)
|
||||
continue;
|
||||
|
||||
if (event->state != PERF_EVENT_STATE_ACTIVE)
|
||||
continue;
|
||||
|
||||
switch (cmd) {
|
||||
case CPU_PM_ENTER:
|
||||
/*
|
||||
|
@ -821,6 +824,7 @@ static struct arm_pmu *__armpmu_alloc(gfp_t flags)
|
|||
* validation).
|
||||
*/
|
||||
.capabilities = PERF_PMU_CAP_HETEROGENEOUS_CPUS,
|
||||
.events_across_hotplug = 1,
|
||||
};
|
||||
|
||||
pmu->attr_groups[ARMPMU_ATTR_GROUP_COMMON] =
|
||||
|
|
|
@ -267,6 +267,8 @@ struct pmu {
|
|||
atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */
|
||||
int task_ctx_nr;
|
||||
int hrtimer_interval_ms;
|
||||
u32 events_across_hotplug:1,
|
||||
reserved:31;
|
||||
|
||||
/* number of address filters this PMU can do */
|
||||
unsigned int nr_addr_filters;
|
||||
|
@ -493,6 +495,7 @@ struct perf_addr_filters_head {
|
|||
* enum perf_event_state - the states of an event:
|
||||
*/
|
||||
enum perf_event_state {
|
||||
PERF_EVENT_STATE_DORMANT = -5,
|
||||
PERF_EVENT_STATE_DEAD = -4,
|
||||
PERF_EVENT_STATE_EXIT = -3,
|
||||
PERF_EVENT_STATE_ERROR = -2,
|
||||
|
@ -703,6 +706,13 @@ struct perf_event {
|
|||
struct list_head sb_list;
|
||||
/* Is this event shared with other events */
|
||||
bool shared;
|
||||
|
||||
/*
|
||||
* Entry into the list that holds the events whose CPUs
|
||||
* are offline. These events will be installed once the
|
||||
* CPU wakes up and will be removed from the list after that
|
||||
*/
|
||||
struct list_head dormant_event_entry;
|
||||
#endif /* CONFIG_PERF_EVENTS */
|
||||
};
|
||||
|
||||
|
@ -1419,9 +1429,11 @@ static struct device_attribute format_attr_##_name = __ATTR_RO(_name)
|
|||
#ifdef CONFIG_PERF_EVENTS
|
||||
int perf_event_init_cpu(unsigned int cpu);
|
||||
int perf_event_exit_cpu(unsigned int cpu);
|
||||
int perf_event_restart_events(unsigned int cpu);
|
||||
#else
|
||||
#define perf_event_init_cpu NULL
|
||||
#define perf_event_exit_cpu NULL
|
||||
#define perf_event_restart_events NULL
|
||||
#endif
|
||||
|
||||
#endif /* _LINUX_PERF_EVENT_H */
|
||||
|
|
|
@ -1431,7 +1431,7 @@ static struct cpuhp_step cpuhp_hp_states[] = {
|
|||
},
|
||||
[CPUHP_AP_PERF_ONLINE] = {
|
||||
.name = "perf:online",
|
||||
.startup.single = perf_event_init_cpu,
|
||||
.startup.single = perf_event_restart_events,
|
||||
.teardown.single = perf_event_exit_cpu,
|
||||
},
|
||||
[CPUHP_AP_WATCHDOG_ONLINE] = {
|
||||
|
|
|
@ -405,6 +405,7 @@ static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
|
|||
static DEFINE_PER_CPU(int, perf_sched_cb_usages);
|
||||
static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
|
||||
static DEFINE_PER_CPU(bool, is_idle);
|
||||
static DEFINE_PER_CPU(bool, is_hotplugging);
|
||||
|
||||
static atomic_t nr_mmap_events __read_mostly;
|
||||
static atomic_t nr_comm_events __read_mostly;
|
||||
|
@ -2511,6 +2512,23 @@ static void ctx_resched(struct perf_cpu_context *cpuctx,
|
|||
perf_pmu_enable(cpuctx->ctx.pmu);
|
||||
}
|
||||
|
||||
#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
|
||||
static LIST_HEAD(dormant_event_list);
|
||||
static DEFINE_SPINLOCK(dormant_event_list_lock);
|
||||
|
||||
static void perf_prepare_install_in_context(struct perf_event *event)
|
||||
{
|
||||
spin_lock(&dormant_event_list_lock);
|
||||
if (event->state == PERF_EVENT_STATE_DORMANT)
|
||||
goto out;
|
||||
|
||||
event->state = PERF_EVENT_STATE_DORMANT;
|
||||
list_add_tail(&event->dormant_event_entry, &dormant_event_list);
|
||||
out:
|
||||
spin_unlock(&dormant_event_list_lock);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Cross CPU call to install and enable a performance event
|
||||
*
|
||||
|
@ -2599,6 +2617,13 @@ perf_install_in_context(struct perf_event_context *ctx,
|
|||
*/
|
||||
smp_store_release(&event->ctx, ctx);
|
||||
|
||||
#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
|
||||
if (per_cpu(is_hotplugging, cpu)) {
|
||||
perf_prepare_install_in_context(event);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!task) {
|
||||
cpu_function_call(cpu, __perf_install_in_context, event);
|
||||
return;
|
||||
|
@ -2668,6 +2693,34 @@ perf_install_in_context(struct perf_event_context *ctx,
|
|||
raw_spin_unlock_irq(&ctx->lock);
|
||||
}
|
||||
|
||||
#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
|
||||
static void perf_deferred_install_in_context(int cpu)
|
||||
{
|
||||
struct perf_event *event, *tmp;
|
||||
struct perf_event_context *ctx;
|
||||
|
||||
spin_lock(&dormant_event_list_lock);
|
||||
list_for_each_entry_safe(event, tmp, &dormant_event_list,
|
||||
dormant_event_entry) {
|
||||
if (cpu != event->cpu)
|
||||
continue;
|
||||
|
||||
list_del(&event->dormant_event_entry);
|
||||
event->state = PERF_EVENT_STATE_INACTIVE;
|
||||
spin_unlock(&dormant_event_list_lock);
|
||||
|
||||
ctx = event->ctx;
|
||||
|
||||
mutex_lock(&ctx->mutex);
|
||||
perf_install_in_context(ctx, event, cpu);
|
||||
mutex_unlock(&ctx->mutex);
|
||||
|
||||
spin_lock(&dormant_event_list_lock);
|
||||
}
|
||||
spin_unlock(&dormant_event_list_lock);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Cross CPU call to enable a performance event
|
||||
*/
|
||||
|
@ -3874,6 +3927,9 @@ static void __perf_event_read(void *info)
|
|||
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
|
||||
struct pmu *pmu = event->pmu;
|
||||
|
||||
if (__this_cpu_read(is_hotplugging))
|
||||
return;
|
||||
|
||||
/*
|
||||
* If this is a task context, we need to check whether it is
|
||||
* the current task context of this cpu. If not it has been
|
||||
|
@ -4028,7 +4084,8 @@ static int perf_event_read(struct perf_event *event, bool group)
|
|||
return 0;
|
||||
if (cpu_isolated(event_cpu) ||
|
||||
(event->attr.exclude_idle &&
|
||||
per_cpu(is_idle, event_cpu)))
|
||||
per_cpu(is_idle, event_cpu)) ||
|
||||
per_cpu(is_hotplugging, event_cpu))
|
||||
active_event_skip_read = true;
|
||||
}
|
||||
if (state == PERF_EVENT_STATE_ACTIVE &&
|
||||
|
@ -4057,7 +4114,8 @@ static int perf_event_read(struct perf_event *event, bool group)
|
|||
preempt_enable();
|
||||
ret = data.ret;
|
||||
} else if (state == PERF_EVENT_STATE_INACTIVE ||
|
||||
active_event_skip_read) {
|
||||
(active_event_skip_read &&
|
||||
!per_cpu(is_hotplugging, event_cpu))) {
|
||||
struct perf_event_context *ctx = event->ctx;
|
||||
unsigned long flags;
|
||||
|
||||
|
@ -4609,12 +4667,21 @@ static void put_event(struct perf_event *event)
|
|||
* object, it will not preserve its functionality. Once the last 'user'
|
||||
* gives up the object, we'll destroy the thing.
|
||||
*/
|
||||
int perf_event_release_kernel(struct perf_event *event)
|
||||
static int __perf_event_release_kernel(struct perf_event *event)
|
||||
{
|
||||
struct perf_event_context *ctx = event->ctx;
|
||||
struct perf_event *child, *tmp;
|
||||
LIST_HEAD(free_list);
|
||||
|
||||
#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
|
||||
if (event->cpu != -1) {
|
||||
spin_lock(&dormant_event_list_lock);
|
||||
if (event->state == PERF_EVENT_STATE_DORMANT)
|
||||
list_del(&event->dormant_event_entry);
|
||||
spin_unlock(&dormant_event_list_lock);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If we got here through err_file: fput(event_file); we will not have
|
||||
* attached to a context yet.
|
||||
|
@ -4721,6 +4788,17 @@ int perf_event_release_kernel(struct perf_event *event)
|
|||
put_event(event); /* Must be the 'last' reference */
|
||||
return 0;
|
||||
}
|
||||
|
||||
int perf_event_release_kernel(struct perf_event *event)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&pmus_lock);
|
||||
ret = __perf_event_release_kernel(event);
|
||||
mutex_unlock(&pmus_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_event_release_kernel);
|
||||
|
||||
/*
|
||||
|
@ -4937,6 +5015,15 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
|
|||
struct perf_event_context *ctx;
|
||||
int ret;
|
||||
|
||||
#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
|
||||
spin_lock(&dormant_event_list_lock);
|
||||
if (event->state == PERF_EVENT_STATE_DORMANT) {
|
||||
spin_unlock(&dormant_event_list_lock);
|
||||
return 0;
|
||||
}
|
||||
spin_unlock(&dormant_event_list_lock);
|
||||
#endif
|
||||
|
||||
ctx = perf_event_ctx_lock(event);
|
||||
ret = __perf_read(event, buf, count);
|
||||
perf_event_ctx_unlock(event, ctx);
|
||||
|
@ -8306,6 +8393,7 @@ static struct pmu perf_swevent = {
|
|||
.start = perf_swevent_start,
|
||||
.stop = perf_swevent_stop,
|
||||
.read = perf_swevent_read,
|
||||
.events_across_hotplug = 1,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_EVENT_TRACING
|
||||
|
@ -8450,6 +8538,7 @@ static struct pmu perf_tracepoint = {
|
|||
.start = perf_swevent_start,
|
||||
.stop = perf_swevent_stop,
|
||||
.read = perf_swevent_read,
|
||||
.events_across_hotplug = 1,
|
||||
};
|
||||
|
||||
#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
|
||||
|
@ -9349,6 +9438,7 @@ static struct pmu perf_cpu_clock = {
|
|||
.start = cpu_clock_event_start,
|
||||
.stop = cpu_clock_event_stop,
|
||||
.read = cpu_clock_event_read,
|
||||
.events_across_hotplug = 1,
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -9430,6 +9520,7 @@ static struct pmu perf_task_clock = {
|
|||
.start = task_clock_event_start,
|
||||
.stop = task_clock_event_stop,
|
||||
.read = task_clock_event_read,
|
||||
.events_across_hotplug = 1,
|
||||
};
|
||||
|
||||
static void perf_pmu_nop_void(struct pmu *pmu)
|
||||
|
@ -10140,6 +10231,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
|
|||
mutex_init(&event->child_mutex);
|
||||
INIT_LIST_HEAD(&event->child_list);
|
||||
|
||||
INIT_LIST_HEAD(&event->dormant_event_entry);
|
||||
INIT_LIST_HEAD(&event->event_entry);
|
||||
INIT_LIST_HEAD(&event->sibling_list);
|
||||
INIT_LIST_HEAD(&event->active_list);
|
||||
|
@ -10911,23 +11003,6 @@ SYSCALL_DEFINE5(perf_event_open,
|
|||
goto err_locked;
|
||||
}
|
||||
|
||||
if (!task) {
|
||||
/*
|
||||
* Check if the @cpu we're creating an event for is online.
|
||||
*
|
||||
* We use the perf_cpu_context::ctx::mutex to serialize against
|
||||
* the hotplug notifiers. See perf_event_{init,exit}_cpu().
|
||||
*/
|
||||
struct perf_cpu_context *cpuctx =
|
||||
container_of(ctx, struct perf_cpu_context, ctx);
|
||||
|
||||
if (!cpuctx->online) {
|
||||
err = -ENODEV;
|
||||
goto err_locked;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Must be under the same ctx::mutex as perf_install_in_context(),
|
||||
* because we need to serialize with concurrent event creation.
|
||||
|
@ -11130,21 +11205,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
|
|||
goto err_unlock;
|
||||
}
|
||||
|
||||
if (!task) {
|
||||
/*
|
||||
* Check if the @cpu we're creating an event for is online.
|
||||
*
|
||||
* We use the perf_cpu_context::ctx::mutex to serialize against
|
||||
* the hotplug notifiers. See perf_event_{init,exit}_cpu().
|
||||
*/
|
||||
struct perf_cpu_context *cpuctx =
|
||||
container_of(ctx, struct perf_cpu_context, ctx);
|
||||
if (!cpuctx->online) {
|
||||
err = -ENODEV;
|
||||
goto err_unlock;
|
||||
}
|
||||
}
|
||||
|
||||
if (!exclusive_event_installable(event, ctx)) {
|
||||
err = -EBUSY;
|
||||
goto err_unlock;
|
||||
|
@ -11853,6 +11913,8 @@ static void __init perf_event_init_all_cpus(void)
|
|||
INIT_LIST_HEAD(&per_cpu(cgrp_cpuctx_list, cpu));
|
||||
#endif
|
||||
INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu));
|
||||
per_cpu(is_hotplugging, cpu) = false;
|
||||
per_cpu(is_idle, cpu) = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -11872,32 +11934,35 @@ void perf_swevent_init_cpu(unsigned int cpu)
|
|||
}
|
||||
|
||||
#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
|
||||
static void __perf_event_exit_context(void *__info)
|
||||
int perf_event_restart_events(unsigned int cpu)
|
||||
{
|
||||
struct perf_event_context *ctx = __info;
|
||||
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
|
||||
struct perf_event *event;
|
||||
mutex_lock(&pmus_lock);
|
||||
per_cpu(is_hotplugging, cpu) = false;
|
||||
perf_deferred_install_in_context(cpu);
|
||||
mutex_unlock(&pmus_lock);
|
||||
|
||||
raw_spin_lock(&ctx->lock);
|
||||
ctx_sched_out(ctx, cpuctx, EVENT_TIME);
|
||||
list_for_each_entry(event, &ctx->event_list, event_entry)
|
||||
__perf_remove_from_context(event, cpuctx, ctx, (void *)DETACH_GROUP);
|
||||
raw_spin_unlock(&ctx->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void perf_event_exit_cpu_context(int cpu)
|
||||
{
|
||||
struct perf_cpu_context *cpuctx;
|
||||
struct perf_event_context *ctx;
|
||||
struct perf_event *event, *event_tmp;
|
||||
struct pmu *pmu;
|
||||
|
||||
mutex_lock(&pmus_lock);
|
||||
per_cpu(is_hotplugging, cpu) = true;
|
||||
list_for_each_entry(pmu, &pmus, entry) {
|
||||
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
|
||||
ctx = &cpuctx->ctx;
|
||||
|
||||
mutex_lock(&ctx->mutex);
|
||||
smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1);
|
||||
list_for_each_entry_safe(event, event_tmp, &ctx->event_list,
|
||||
event_entry) {
|
||||
perf_remove_from_context(event, DETACH_GROUP);
|
||||
if (event->pmu->events_across_hotplug)
|
||||
perf_prepare_install_in_context(event);
|
||||
}
|
||||
cpuctx->online = 0;
|
||||
mutex_unlock(&ctx->mutex);
|
||||
}
|
||||
|
@ -11978,7 +12043,6 @@ static struct notifier_block perf_event_idle_nb = {
|
|||
.notifier_call = event_idle_notif,
|
||||
};
|
||||
|
||||
|
||||
void __init perf_event_init(void)
|
||||
{
|
||||
int ret, cpu;
|
||||
|
|
Loading…
Add table
Reference in a new issue