From 272325c4821f052092c41feac21f4a1a46f0ad48 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 15 Apr 2015 11:41:58 +0200 Subject: [PATCH] perf: Fix mux_interval hrtimer wreckage Thomas stumbled over the hrtimer_forward_now() in perf_event_mux_interval_ms_store() and noticed its broken-ness. You cannot just change the expiry time of an active timer, it will destroy the red-black tree order and cause havoc. Change it to (re)start the timer instead, (re)starting a timer will dequeue and enqueue a timer and therefore preserve rb-tree order. Since we cannot enqueue remotely, wrap the thing in cpu_function_call(), this however mandates that we restrict ourselves to online cpus. Also serialize the entire setting so we don't get multiple concurrent threads trying to update to different values. Also fix a problem in perf_mux_hrtimer_restart(), checking against hrtimer_active() can actually loose us the timer when timer->state == HRTIMER_STATE_CALLBACK and the callback has already decided NORESTART. Furthermore it doesn't make any sense to test hrtimer_callback_running() when we already tested hrtimer_active(), but with the above change, we explicitly must call it when callback_running. Lastly, rename a few functions: s/perf_cpu_hrtimer_/perf_mux_hrtimer_/ -- because I could not find the mux timer function s/\/timer/ -- because that's the normal way of calling things. Fixes: 62b856397927 ("perf: Add sysfs entry to adjust multiplexing interval per PMU") Reported-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: http://lkml.kernel.org/r/20150415095011.863052571@infradead.org Signed-off-by: Thomas Gleixner --- kernel/events/core.c | 63 +++++++++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 27 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 05309fdba2a4..e7ed00b4a6ed 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -51,9 +51,11 @@ static struct workqueue_struct *perf_wq; +typedef int (*remote_function_f)(void *); + struct remote_function_call { struct task_struct *p; - int (*func)(void *info); + remote_function_f func; void *info; int ret; }; @@ -86,7 +88,7 @@ static void remote_function(void *data) * -EAGAIN - when the process moved away */ static int -task_function_call(struct task_struct *p, int (*func) (void *info), void *info) +task_function_call(struct task_struct *p, remote_function_f func, void *info) { struct remote_function_call data = { .p = p, @@ -110,7 +112,7 @@ task_function_call(struct task_struct *p, int (*func) (void *info), void *info) * * returns: @func return value or -ENXIO when the cpu is offline */ -static int cpu_function_call(int cpu, int (*func) (void *info), void *info) +static int cpu_function_call(int cpu, remote_function_f func, void *info) { struct remote_function_call data = { .p = NULL, @@ -747,7 +749,7 @@ perf_cgroup_mark_enabled(struct perf_event *event, /* * function must be called with interrupts disbled */ -static enum hrtimer_restart perf_cpu_hrtimer_handler(struct hrtimer *hr) +static enum hrtimer_restart perf_mux_hrtimer_handler(struct hrtimer *hr) { struct perf_cpu_context *cpuctx; enum hrtimer_restart ret = HRTIMER_NORESTART; @@ -771,7 +773,7 @@ static enum hrtimer_restart perf_cpu_hrtimer_handler(struct hrtimer *hr) } /* CPU is going down */ -void perf_cpu_hrtimer_cancel(int cpu) +void perf_mux_hrtimer_cancel(int cpu) { struct perf_cpu_context *cpuctx; struct pmu *pmu; @@ -798,11 +800,11 @@ void perf_cpu_hrtimer_cancel(int cpu) local_irq_restore(flags); } -static void __perf_cpu_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu) +static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu) { - struct hrtimer *hr = &cpuctx->hrtimer; + struct hrtimer *timer = &cpuctx->hrtimer; struct pmu *pmu = cpuctx->ctx.pmu; - int timer; + u64 interval; /* no multiplexing needed for SW PMU */ if (pmu->task_ctx_nr == perf_sw_context) @@ -812,29 +814,30 @@ static void __perf_cpu_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu) * check default is sane, if not set then force to * default interval (1/tick) */ - timer = pmu->hrtimer_interval_ms; - if (timer < 1) - timer = pmu->hrtimer_interval_ms = PERF_CPU_HRTIMER; + interval = pmu->hrtimer_interval_ms; + if (interval < 1) + interval = pmu->hrtimer_interval_ms = PERF_CPU_HRTIMER; - cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer); + cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval); - hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); - hr->function = perf_cpu_hrtimer_handler; + hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); + timer->function = perf_mux_hrtimer_handler; } -static void perf_cpu_hrtimer_restart(struct perf_cpu_context *cpuctx) +static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx) { - struct hrtimer *hr = &cpuctx->hrtimer; + struct hrtimer *timer = &cpuctx->hrtimer; struct pmu *pmu = cpuctx->ctx.pmu; /* not for SW PMU */ if (pmu->task_ctx_nr == perf_sw_context) - return; + return 0; - if (hrtimer_active(hr)) - return; + if (hrtimer_is_queued(timer)) + return 0; - hrtimer_start(hr, cpuctx->hrtimer_interval, HRTIMER_MODE_REL_PINNED); + hrtimer_start(timer, cpuctx->hrtimer_interval, HRTIMER_MODE_REL_PINNED); + return 0; } void perf_pmu_disable(struct pmu *pmu) @@ -1913,7 +1916,7 @@ group_sched_in(struct perf_event *group_event, if (event_sched_in(group_event, cpuctx, ctx)) { pmu->cancel_txn(pmu); - perf_cpu_hrtimer_restart(cpuctx); + perf_mux_hrtimer_restart(cpuctx); return -EAGAIN; } @@ -1960,7 +1963,7 @@ group_sched_in(struct perf_event *group_event, pmu->cancel_txn(pmu); - perf_cpu_hrtimer_restart(cpuctx); + perf_mux_hrtimer_restart(cpuctx); return -EAGAIN; } @@ -2233,7 +2236,7 @@ static int __perf_event_enable(void *info) */ if (leader != event) { group_sched_out(leader, cpuctx, ctx); - perf_cpu_hrtimer_restart(cpuctx); + perf_mux_hrtimer_restart(cpuctx); } if (leader->attr.pinned) { update_group_times(leader); @@ -7143,6 +7146,8 @@ perf_event_mux_interval_ms_show(struct device *dev, return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->hrtimer_interval_ms); } +static DEFINE_MUTEX(mux_interval_mutex); + static ssize_t perf_event_mux_interval_ms_store(struct device *dev, struct device_attribute *attr, @@ -7162,17 +7167,21 @@ perf_event_mux_interval_ms_store(struct device *dev, if (timer == pmu->hrtimer_interval_ms) return count; + mutex_lock(&mux_interval_mutex); pmu->hrtimer_interval_ms = timer; /* update all cpuctx for this PMU */ - for_each_possible_cpu(cpu) { + get_online_cpus(); + for_each_online_cpu(cpu) { struct perf_cpu_context *cpuctx; cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer); - if (hrtimer_active(&cpuctx->hrtimer)) - hrtimer_forward_now(&cpuctx->hrtimer, cpuctx->hrtimer_interval); + cpu_function_call(cpu, + (remote_function_f)perf_mux_hrtimer_restart, cpuctx); } + put_online_cpus(); + mutex_unlock(&mux_interval_mutex); return count; } @@ -7277,7 +7286,7 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type) lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock); cpuctx->ctx.pmu = pmu; - __perf_cpu_hrtimer_init(cpuctx, cpu); + __perf_mux_hrtimer_init(cpuctx, cpu); cpuctx->unique_pmu = pmu; }