perf: add hotplug support

The change is a squash of the following four commits taken
from msm-4.14:

1) commit <b02d7648fbd1> ("enable perf to continue across
hotplug")
Currently perf hardware, software and tracepoint events are
deleted when a cpu is hotplugged out. This change restarts
the events after hotplug. In arm_pmu.c most of the code
for handline power collapse is reused for hotplug.
This change supercedes commit 1f0f95c5fe9e ("perf: add hotplug
support so that perf continues after hotplug") and uses the
new hotplug notification method.

2) commit <9768e7af40d6> ("disable perf_event_read during hotplug")
core.c should not allow perf_event_read access during hotplug.
DCVS may try to read events during hotplug startup or
shutdown. Set a flag to not allow access during hotplug.

3) commit <1fc690b7b8c6> ("perf: core: Avoid race condition when
releasing perf-events")
Before calling perf_event_release_kernel(), capture
the perf's pmus_mutex lock to prevent the CPU from going offline
during the operation.

4) commit <77257e46efea> ("perf: Manage CPU hotplug events at
core level")
the approach here is to achieve the hotplug management
at the perf-core level. The idea is to detach the event from the
context (perf_remove_from_context()), when the CPU is about to come
down and re-attach it back (perf_install_in_context()), when the CPU
comes back online. The approach involves removing the logic for
maintaining zombie events (PERF_EVENT_STATE_ZOMBIE) and let the
dormant list itself carry the events whose CPUs are offline.

Change-Id: I3738bdcaae2d2199ba3fb68ce77f637d867c5c14
Signed-off-by: Raghavendra Rao Ananta <rananta@codeaurora.org>
Signed-off-by: Rishabh Bhatnagar <rishabhb@codeaurora.org>
This commit is contained in:
Raghavendra Rao Ananta 2017-10-30 11:17:37 -07:00 committed by Rishabh Bhatnagar
parent 7a0a9e676c
commit 3d3eb5fb85
5 changed files with 174 additions and 51 deletions

View file

@ -30,6 +30,8 @@
#include <linux/perf/arm_pmu.h>
#include <linux/platform_device.h>
static DEFINE_PER_CPU(bool, is_hotplugging);
/*
* ARMv8 PMUv3 Performance Events handling code.
* Common event types (some are defined in asm/perf_event.h).
@ -870,8 +872,8 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
struct perf_event *event = cpuc->events[idx];
struct hw_perf_event *hwc;
/* Ignore if we don't have an event. */
if (!event)
/* Ignore if we don't have an event */
if (!event || event->state != PERF_EVENT_STATE_ACTIVE)
continue;
/*
@ -1146,6 +1148,9 @@ static void armv8pmu_idle_update(struct arm_pmu *cpu_pmu)
if (!cpu_pmu)
return;
if (__this_cpu_read(is_hotplugging))
return;
hw_events = this_cpu_ptr(cpu_pmu->hw_events);
if (!hw_events)
@ -1377,6 +1382,37 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = {
{},
};
#ifdef CONFIG_HOTPLUG_CPU
static int perf_event_hotplug_coming_up(unsigned int cpu)
{
per_cpu(is_hotplugging, cpu) = false;
return 0;
}
static int perf_event_hotplug_going_down(unsigned int cpu)
{
per_cpu(is_hotplugging, cpu) = true;
return 0;
}
static int perf_event_cpu_hp_init(void)
{
int ret;
ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ONLINE,
"PERF_EVENT/CPUHP_AP_PERF_ONLINE",
perf_event_hotplug_coming_up,
perf_event_hotplug_going_down);
if (ret)
pr_err("CPU hotplug notifier for perf_event.c could not be registered: %d\n",
ret);
return ret;
}
#else
static int perf_event_cpu_hp_init(void) { return 0; }
#endif
/*
* Non DT systems have their micro/arch events probed at run-time.
* A fairly complete list of generic events are provided and ones that
@ -1389,7 +1425,14 @@ static const struct pmu_probe_info armv8_pmu_probe_table[] = {
static int armv8_pmu_device_probe(struct platform_device *pdev)
{
int ret;
int ret, cpu;
for_each_possible_cpu(cpu)
per_cpu(is_hotplugging, cpu) = false;
ret = perf_event_cpu_hp_init();
if (ret)
return ret;
/* set to true so armv8pmu_idle_update doesn't try to load
* hw_events before arm_pmu_device_probe has initialized it.

View file

@ -675,6 +675,9 @@ static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd)
if (!event)
continue;
if (event->state != PERF_EVENT_STATE_ACTIVE)
continue;
switch (cmd) {
case CPU_PM_ENTER:
/*
@ -821,6 +824,7 @@ static struct arm_pmu *__armpmu_alloc(gfp_t flags)
* validation).
*/
.capabilities = PERF_PMU_CAP_HETEROGENEOUS_CPUS,
.events_across_hotplug = 1,
};
pmu->attr_groups[ARMPMU_ATTR_GROUP_COMMON] =

View file

@ -267,6 +267,8 @@ struct pmu {
atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */
int task_ctx_nr;
int hrtimer_interval_ms;
u32 events_across_hotplug:1,
reserved:31;
/* number of address filters this PMU can do */
unsigned int nr_addr_filters;
@ -493,6 +495,7 @@ struct perf_addr_filters_head {
* enum perf_event_state - the states of an event:
*/
enum perf_event_state {
PERF_EVENT_STATE_DORMANT = -5,
PERF_EVENT_STATE_DEAD = -4,
PERF_EVENT_STATE_EXIT = -3,
PERF_EVENT_STATE_ERROR = -2,
@ -703,6 +706,13 @@ struct perf_event {
struct list_head sb_list;
/* Is this event shared with other events */
bool shared;
/*
* Entry into the list that holds the events whose CPUs
* are offline. These events will be installed once the
* CPU wakes up and will be removed from the list after that
*/
struct list_head dormant_event_entry;
#endif /* CONFIG_PERF_EVENTS */
};
@ -1419,9 +1429,11 @@ static struct device_attribute format_attr_##_name = __ATTR_RO(_name)
#ifdef CONFIG_PERF_EVENTS
int perf_event_init_cpu(unsigned int cpu);
int perf_event_exit_cpu(unsigned int cpu);
int perf_event_restart_events(unsigned int cpu);
#else
#define perf_event_init_cpu NULL
#define perf_event_exit_cpu NULL
#define perf_event_restart_events NULL
#endif
#endif /* _LINUX_PERF_EVENT_H */

View file

@ -1431,7 +1431,7 @@ static struct cpuhp_step cpuhp_hp_states[] = {
},
[CPUHP_AP_PERF_ONLINE] = {
.name = "perf:online",
.startup.single = perf_event_init_cpu,
.startup.single = perf_event_restart_events,
.teardown.single = perf_event_exit_cpu,
},
[CPUHP_AP_WATCHDOG_ONLINE] = {

View file

@ -405,6 +405,7 @@ static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
static DEFINE_PER_CPU(int, perf_sched_cb_usages);
static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
static DEFINE_PER_CPU(bool, is_idle);
static DEFINE_PER_CPU(bool, is_hotplugging);
static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_comm_events __read_mostly;
@ -2511,6 +2512,23 @@ static void ctx_resched(struct perf_cpu_context *cpuctx,
perf_pmu_enable(cpuctx->ctx.pmu);
}
#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
static LIST_HEAD(dormant_event_list);
static DEFINE_SPINLOCK(dormant_event_list_lock);
static void perf_prepare_install_in_context(struct perf_event *event)
{
spin_lock(&dormant_event_list_lock);
if (event->state == PERF_EVENT_STATE_DORMANT)
goto out;
event->state = PERF_EVENT_STATE_DORMANT;
list_add_tail(&event->dormant_event_entry, &dormant_event_list);
out:
spin_unlock(&dormant_event_list_lock);
}
#endif
/*
* Cross CPU call to install and enable a performance event
*
@ -2599,6 +2617,13 @@ perf_install_in_context(struct perf_event_context *ctx,
*/
smp_store_release(&event->ctx, ctx);
#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
if (per_cpu(is_hotplugging, cpu)) {
perf_prepare_install_in_context(event);
return;
}
#endif
if (!task) {
cpu_function_call(cpu, __perf_install_in_context, event);
return;
@ -2668,6 +2693,34 @@ perf_install_in_context(struct perf_event_context *ctx,
raw_spin_unlock_irq(&ctx->lock);
}
#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
static void perf_deferred_install_in_context(int cpu)
{
struct perf_event *event, *tmp;
struct perf_event_context *ctx;
spin_lock(&dormant_event_list_lock);
list_for_each_entry_safe(event, tmp, &dormant_event_list,
dormant_event_entry) {
if (cpu != event->cpu)
continue;
list_del(&event->dormant_event_entry);
event->state = PERF_EVENT_STATE_INACTIVE;
spin_unlock(&dormant_event_list_lock);
ctx = event->ctx;
mutex_lock(&ctx->mutex);
perf_install_in_context(ctx, event, cpu);
mutex_unlock(&ctx->mutex);
spin_lock(&dormant_event_list_lock);
}
spin_unlock(&dormant_event_list_lock);
}
#endif
/*
* Cross CPU call to enable a performance event
*/
@ -3874,6 +3927,9 @@ static void __perf_event_read(void *info)
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
struct pmu *pmu = event->pmu;
if (__this_cpu_read(is_hotplugging))
return;
/*
* If this is a task context, we need to check whether it is
* the current task context of this cpu. If not it has been
@ -4028,7 +4084,8 @@ static int perf_event_read(struct perf_event *event, bool group)
return 0;
if (cpu_isolated(event_cpu) ||
(event->attr.exclude_idle &&
per_cpu(is_idle, event_cpu)))
per_cpu(is_idle, event_cpu)) ||
per_cpu(is_hotplugging, event_cpu))
active_event_skip_read = true;
}
if (state == PERF_EVENT_STATE_ACTIVE &&
@ -4057,7 +4114,8 @@ static int perf_event_read(struct perf_event *event, bool group)
preempt_enable();
ret = data.ret;
} else if (state == PERF_EVENT_STATE_INACTIVE ||
active_event_skip_read) {
(active_event_skip_read &&
!per_cpu(is_hotplugging, event_cpu))) {
struct perf_event_context *ctx = event->ctx;
unsigned long flags;
@ -4609,12 +4667,21 @@ static void put_event(struct perf_event *event)
* object, it will not preserve its functionality. Once the last 'user'
* gives up the object, we'll destroy the thing.
*/
int perf_event_release_kernel(struct perf_event *event)
static int __perf_event_release_kernel(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
struct perf_event *child, *tmp;
LIST_HEAD(free_list);
#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
if (event->cpu != -1) {
spin_lock(&dormant_event_list_lock);
if (event->state == PERF_EVENT_STATE_DORMANT)
list_del(&event->dormant_event_entry);
spin_unlock(&dormant_event_list_lock);
}
#endif
/*
* If we got here through err_file: fput(event_file); we will not have
* attached to a context yet.
@ -4721,6 +4788,17 @@ int perf_event_release_kernel(struct perf_event *event)
put_event(event); /* Must be the 'last' reference */
return 0;
}
int perf_event_release_kernel(struct perf_event *event)
{
int ret;
mutex_lock(&pmus_lock);
ret = __perf_event_release_kernel(event);
mutex_unlock(&pmus_lock);
return ret;
}
EXPORT_SYMBOL_GPL(perf_event_release_kernel);
/*
@ -4937,6 +5015,15 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
struct perf_event_context *ctx;
int ret;
#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
spin_lock(&dormant_event_list_lock);
if (event->state == PERF_EVENT_STATE_DORMANT) {
spin_unlock(&dormant_event_list_lock);
return 0;
}
spin_unlock(&dormant_event_list_lock);
#endif
ctx = perf_event_ctx_lock(event);
ret = __perf_read(event, buf, count);
perf_event_ctx_unlock(event, ctx);
@ -8306,6 +8393,7 @@ static struct pmu perf_swevent = {
.start = perf_swevent_start,
.stop = perf_swevent_stop,
.read = perf_swevent_read,
.events_across_hotplug = 1,
};
#ifdef CONFIG_EVENT_TRACING
@ -8450,6 +8538,7 @@ static struct pmu perf_tracepoint = {
.start = perf_swevent_start,
.stop = perf_swevent_stop,
.read = perf_swevent_read,
.events_across_hotplug = 1,
};
#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
@ -9349,6 +9438,7 @@ static struct pmu perf_cpu_clock = {
.start = cpu_clock_event_start,
.stop = cpu_clock_event_stop,
.read = cpu_clock_event_read,
.events_across_hotplug = 1,
};
/*
@ -9430,6 +9520,7 @@ static struct pmu perf_task_clock = {
.start = task_clock_event_start,
.stop = task_clock_event_stop,
.read = task_clock_event_read,
.events_across_hotplug = 1,
};
static void perf_pmu_nop_void(struct pmu *pmu)
@ -10140,6 +10231,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
mutex_init(&event->child_mutex);
INIT_LIST_HEAD(&event->child_list);
INIT_LIST_HEAD(&event->dormant_event_entry);
INIT_LIST_HEAD(&event->event_entry);
INIT_LIST_HEAD(&event->sibling_list);
INIT_LIST_HEAD(&event->active_list);
@ -10911,23 +11003,6 @@ SYSCALL_DEFINE5(perf_event_open,
goto err_locked;
}
if (!task) {
/*
* Check if the @cpu we're creating an event for is online.
*
* We use the perf_cpu_context::ctx::mutex to serialize against
* the hotplug notifiers. See perf_event_{init,exit}_cpu().
*/
struct perf_cpu_context *cpuctx =
container_of(ctx, struct perf_cpu_context, ctx);
if (!cpuctx->online) {
err = -ENODEV;
goto err_locked;
}
}
/*
* Must be under the same ctx::mutex as perf_install_in_context(),
* because we need to serialize with concurrent event creation.
@ -11130,21 +11205,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
goto err_unlock;
}
if (!task) {
/*
* Check if the @cpu we're creating an event for is online.
*
* We use the perf_cpu_context::ctx::mutex to serialize against
* the hotplug notifiers. See perf_event_{init,exit}_cpu().
*/
struct perf_cpu_context *cpuctx =
container_of(ctx, struct perf_cpu_context, ctx);
if (!cpuctx->online) {
err = -ENODEV;
goto err_unlock;
}
}
if (!exclusive_event_installable(event, ctx)) {
err = -EBUSY;
goto err_unlock;
@ -11853,6 +11913,8 @@ static void __init perf_event_init_all_cpus(void)
INIT_LIST_HEAD(&per_cpu(cgrp_cpuctx_list, cpu));
#endif
INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu));
per_cpu(is_hotplugging, cpu) = false;
per_cpu(is_idle, cpu) = false;
}
}
@ -11872,32 +11934,35 @@ void perf_swevent_init_cpu(unsigned int cpu)
}
#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
static void __perf_event_exit_context(void *__info)
int perf_event_restart_events(unsigned int cpu)
{
struct perf_event_context *ctx = __info;
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
struct perf_event *event;
mutex_lock(&pmus_lock);
per_cpu(is_hotplugging, cpu) = false;
perf_deferred_install_in_context(cpu);
mutex_unlock(&pmus_lock);
raw_spin_lock(&ctx->lock);
ctx_sched_out(ctx, cpuctx, EVENT_TIME);
list_for_each_entry(event, &ctx->event_list, event_entry)
__perf_remove_from_context(event, cpuctx, ctx, (void *)DETACH_GROUP);
raw_spin_unlock(&ctx->lock);
return 0;
}
static void perf_event_exit_cpu_context(int cpu)
{
struct perf_cpu_context *cpuctx;
struct perf_event_context *ctx;
struct perf_event *event, *event_tmp;
struct pmu *pmu;
mutex_lock(&pmus_lock);
per_cpu(is_hotplugging, cpu) = true;
list_for_each_entry(pmu, &pmus, entry) {
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
ctx = &cpuctx->ctx;
mutex_lock(&ctx->mutex);
smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1);
list_for_each_entry_safe(event, event_tmp, &ctx->event_list,
event_entry) {
perf_remove_from_context(event, DETACH_GROUP);
if (event->pmu->events_across_hotplug)
perf_prepare_install_in_context(event);
}
cpuctx->online = 0;
mutex_unlock(&ctx->mutex);
}
@ -11978,7 +12043,6 @@ static struct notifier_block perf_event_idle_nb = {
.notifier_call = event_idle_notif,
};
void __init perf_event_init(void)
{
int ret, cpu;