2239291aeb
This patch removes the register functions of TRACE_EVENT() to enable and disable tracepoints. The registering of a event is now down directly in the trace_events.c file. The tracepoint_probe_register() is now called directly. The prototypes are no longer type checked, but this should not be an issue since the tracepoints are created automatically by the macros. If a prototype is incorrect in the TRACE_EVENT() macro, then other macros will catch it. The trace_event_class structure now holds the probes to be called by the callbacks. This removes needing to have each event have a separate pointer for the probe. To handle kprobes and syscalls, since they register probes in a different manner, a "reg" field is added to the ftrace_event_class structure. If the "reg" field is assigned, then it will be called for enabling and disabling of the probe for either ftrace or perf. To let the reg function know what is happening, a new enum (trace_reg) is created that has the type of control that is needed. With this new rework, the 82 kernel events and 618 syscall events has their footprint dramatically lowered: text data bss dec hex filename 4913961 1088356 861512 6863829 68bbd5 vmlinux.orig 4914025 1088868 861512 6864405 68be15 vmlinux.class 4918492 1084612 861512 6864616 68bee8 vmlinux.tracepoint 4900252 1057412 861512 6819176 680d68 vmlinux.regs The size went from 6863829 to 6819176, that's a total of 44K in savings. With tracepoints being continuously added, this is critical that the footprint becomes minimal. v5: Added #ifdef CONFIG_PERF_EVENTS around a reference to perf specific structure in trace_events.c. v4: Fixed trace self tests to check probe because regfunc no longer exists. v3: Updated to handle void *data in beginning of probe parameters. Also added the tracepoint: check_trace_callback_type_##call(). v2: Changed the callback probes to pass void * and typecast the value within the function. Acked-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Acked-by: Masami Hiramatsu <mhiramat@redhat.com> Acked-by: Frederic Weisbecker <fweisbec@gmail.com> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
184 lines
4 KiB
C
184 lines
4 KiB
C
/*
|
|
* trace event based perf event profiling/tracing
|
|
*
|
|
* Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
|
|
* Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
|
|
*/
|
|
|
|
#include <linux/module.h>
|
|
#include <linux/kprobes.h>
|
|
#include "trace.h"
|
|
|
|
DEFINE_PER_CPU(struct pt_regs, perf_trace_regs);
|
|
EXPORT_PER_CPU_SYMBOL_GPL(perf_trace_regs);
|
|
|
|
EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs);
|
|
|
|
static char *perf_trace_buf;
|
|
static char *perf_trace_buf_nmi;
|
|
|
|
/*
|
|
* Force it to be aligned to unsigned long to avoid misaligned accesses
|
|
* suprises
|
|
*/
|
|
typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
|
|
perf_trace_t;
|
|
|
|
/* Count the events in use (per event id, not per instance) */
|
|
static int total_ref_count;
|
|
|
|
static int perf_trace_event_enable(struct ftrace_event_call *event)
|
|
{
|
|
char *buf;
|
|
int ret = -ENOMEM;
|
|
|
|
if (event->perf_refcount++ > 0)
|
|
return 0;
|
|
|
|
if (!total_ref_count) {
|
|
buf = (char *)alloc_percpu(perf_trace_t);
|
|
if (!buf)
|
|
goto fail_buf;
|
|
|
|
rcu_assign_pointer(perf_trace_buf, buf);
|
|
|
|
buf = (char *)alloc_percpu(perf_trace_t);
|
|
if (!buf)
|
|
goto fail_buf_nmi;
|
|
|
|
rcu_assign_pointer(perf_trace_buf_nmi, buf);
|
|
}
|
|
|
|
if (event->class->reg)
|
|
ret = event->class->reg(event, TRACE_REG_PERF_REGISTER);
|
|
else
|
|
ret = tracepoint_probe_register(event->name,
|
|
event->class->perf_probe,
|
|
event);
|
|
if (!ret) {
|
|
total_ref_count++;
|
|
return 0;
|
|
}
|
|
|
|
fail_buf_nmi:
|
|
if (!total_ref_count) {
|
|
free_percpu(perf_trace_buf_nmi);
|
|
free_percpu(perf_trace_buf);
|
|
perf_trace_buf_nmi = NULL;
|
|
perf_trace_buf = NULL;
|
|
}
|
|
fail_buf:
|
|
event->perf_refcount--;
|
|
|
|
return ret;
|
|
}
|
|
|
|
int perf_trace_enable(int event_id)
|
|
{
|
|
struct ftrace_event_call *event;
|
|
int ret = -EINVAL;
|
|
|
|
mutex_lock(&event_mutex);
|
|
list_for_each_entry(event, &ftrace_events, list) {
|
|
if (event->id == event_id &&
|
|
event->class && event->class->perf_probe &&
|
|
try_module_get(event->mod)) {
|
|
ret = perf_trace_event_enable(event);
|
|
break;
|
|
}
|
|
}
|
|
mutex_unlock(&event_mutex);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void perf_trace_event_disable(struct ftrace_event_call *event)
|
|
{
|
|
char *buf, *nmi_buf;
|
|
|
|
if (--event->perf_refcount > 0)
|
|
return;
|
|
|
|
if (event->class->reg)
|
|
event->class->reg(event, TRACE_REG_PERF_UNREGISTER);
|
|
else
|
|
tracepoint_probe_unregister(event->name, event->class->perf_probe, event);
|
|
|
|
if (!--total_ref_count) {
|
|
buf = perf_trace_buf;
|
|
rcu_assign_pointer(perf_trace_buf, NULL);
|
|
|
|
nmi_buf = perf_trace_buf_nmi;
|
|
rcu_assign_pointer(perf_trace_buf_nmi, NULL);
|
|
|
|
/*
|
|
* Ensure every events in profiling have finished before
|
|
* releasing the buffers
|
|
*/
|
|
synchronize_sched();
|
|
|
|
free_percpu(buf);
|
|
free_percpu(nmi_buf);
|
|
}
|
|
}
|
|
|
|
void perf_trace_disable(int event_id)
|
|
{
|
|
struct ftrace_event_call *event;
|
|
|
|
mutex_lock(&event_mutex);
|
|
list_for_each_entry(event, &ftrace_events, list) {
|
|
if (event->id == event_id) {
|
|
perf_trace_event_disable(event);
|
|
module_put(event->mod);
|
|
break;
|
|
}
|
|
}
|
|
mutex_unlock(&event_mutex);
|
|
}
|
|
|
|
__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
|
|
int *rctxp, unsigned long *irq_flags)
|
|
{
|
|
struct trace_entry *entry;
|
|
char *trace_buf, *raw_data;
|
|
int pc, cpu;
|
|
|
|
BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
|
|
|
|
pc = preempt_count();
|
|
|
|
/* Protect the per cpu buffer, begin the rcu read side */
|
|
local_irq_save(*irq_flags);
|
|
|
|
*rctxp = perf_swevent_get_recursion_context();
|
|
if (*rctxp < 0)
|
|
goto err_recursion;
|
|
|
|
cpu = smp_processor_id();
|
|
|
|
if (in_nmi())
|
|
trace_buf = rcu_dereference_sched(perf_trace_buf_nmi);
|
|
else
|
|
trace_buf = rcu_dereference_sched(perf_trace_buf);
|
|
|
|
if (!trace_buf)
|
|
goto err;
|
|
|
|
raw_data = per_cpu_ptr(trace_buf, cpu);
|
|
|
|
/* zero the dead bytes from align to not leak stack to user */
|
|
memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
|
|
|
|
entry = (struct trace_entry *)raw_data;
|
|
tracing_generic_entry_update(entry, *irq_flags, pc);
|
|
entry->type = type;
|
|
|
|
return raw_data;
|
|
err:
|
|
perf_swevent_put_recursion_context(*rctxp);
|
|
err_recursion:
|
|
local_irq_restore(*irq_flags);
|
|
return NULL;
|
|
}
|
|
EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
|