kernel-fxtec-pro1x/kernel/events/callchain.c
Andrew Vagin e6dab5ffab perf/trace: Add ability to set a target task for events
A few events are interesting not only for a current task.
For example, sched_stat_* events are interesting for a task
which wakes up. For this reason, it will be good if such
events will be delivered to a target task too.

Now a target task can be set by using __perf_task().

The original idea and a draft patch belongs to Peter Zijlstra.

I need these events for profiling sleep times. sched_switch is used for
getting callchains and sched_stat_* is used for getting time periods.
These events are combined in user space, then it can be analyzed by
perf tools.

Inspired-by: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Arun Sharma <asharma@fb.com>
Signed-off-by: Andrew Vagin <avagin@openvz.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1342016098-213063-1-git-send-email-avagin@openvz.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2012-07-31 17:02:05 +02:00

196 lines
4.1 KiB
C

/*
* Performance events callchain code, extracted from core.c:
*
* Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
* Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
* Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
* Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
*
* For licensing details see kernel-base/COPYING
*/
#include <linux/perf_event.h>
#include <linux/slab.h>
#include "internal.h"
struct callchain_cpus_entries {
struct rcu_head rcu_head;
struct perf_callchain_entry *cpu_entries[0];
};
static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
static atomic_t nr_callchain_events;
static DEFINE_MUTEX(callchain_mutex);
static struct callchain_cpus_entries *callchain_cpus_entries;
__weak void perf_callchain_kernel(struct perf_callchain_entry *entry,
struct pt_regs *regs)
{
}
__weak void perf_callchain_user(struct perf_callchain_entry *entry,
struct pt_regs *regs)
{
}
static void release_callchain_buffers_rcu(struct rcu_head *head)
{
struct callchain_cpus_entries *entries;
int cpu;
entries = container_of(head, struct callchain_cpus_entries, rcu_head);
for_each_possible_cpu(cpu)
kfree(entries->cpu_entries[cpu]);
kfree(entries);
}
static void release_callchain_buffers(void)
{
struct callchain_cpus_entries *entries;
entries = callchain_cpus_entries;
rcu_assign_pointer(callchain_cpus_entries, NULL);
call_rcu(&entries->rcu_head, release_callchain_buffers_rcu);
}
static int alloc_callchain_buffers(void)
{
int cpu;
int size;
struct callchain_cpus_entries *entries;
/*
* We can't use the percpu allocation API for data that can be
* accessed from NMI. Use a temporary manual per cpu allocation
* until that gets sorted out.
*/
size = offsetof(struct callchain_cpus_entries, cpu_entries[nr_cpu_ids]);
entries = kzalloc(size, GFP_KERNEL);
if (!entries)
return -ENOMEM;
size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS;
for_each_possible_cpu(cpu) {
entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL,
cpu_to_node(cpu));
if (!entries->cpu_entries[cpu])
goto fail;
}
rcu_assign_pointer(callchain_cpus_entries, entries);
return 0;
fail:
for_each_possible_cpu(cpu)
kfree(entries->cpu_entries[cpu]);
kfree(entries);
return -ENOMEM;
}
int get_callchain_buffers(void)
{
int err = 0;
int count;
mutex_lock(&callchain_mutex);
count = atomic_inc_return(&nr_callchain_events);
if (WARN_ON_ONCE(count < 1)) {
err = -EINVAL;
goto exit;
}
if (count > 1) {
/* If the allocation failed, give up */
if (!callchain_cpus_entries)
err = -ENOMEM;
goto exit;
}
err = alloc_callchain_buffers();
exit:
mutex_unlock(&callchain_mutex);
return err;
}
void put_callchain_buffers(void)
{
if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) {
release_callchain_buffers();
mutex_unlock(&callchain_mutex);
}
}
static struct perf_callchain_entry *get_callchain_entry(int *rctx)
{
int cpu;
struct callchain_cpus_entries *entries;
*rctx = get_recursion_context(__get_cpu_var(callchain_recursion));
if (*rctx == -1)
return NULL;
entries = rcu_dereference(callchain_cpus_entries);
if (!entries)
return NULL;
cpu = smp_processor_id();
return &entries->cpu_entries[cpu][*rctx];
}
static void
put_callchain_entry(int rctx)
{
put_recursion_context(__get_cpu_var(callchain_recursion), rctx);
}
struct perf_callchain_entry *
perf_callchain(struct perf_event *event, struct pt_regs *regs)
{
int rctx;
struct perf_callchain_entry *entry;
entry = get_callchain_entry(&rctx);
if (rctx == -1)
return NULL;
if (!entry)
goto exit_put;
entry->nr = 0;
if (!user_mode(regs)) {
perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
perf_callchain_kernel(entry, regs);
if (current->mm)
regs = task_pt_regs(current);
else
regs = NULL;
}
if (regs) {
/*
* Disallow cross-task user callchains.
*/
if (event->ctx->task && event->ctx->task != current)
goto exit_put;
perf_callchain_store(entry, PERF_CONTEXT_USER);
perf_callchain_user(entry, regs);
}
exit_put:
put_callchain_entry(rctx);
return entry;
}