tracing: move function profiler data out of function struct

Impact: reduce size of memory in function profiler

The function profiler originally introduces its counters into the
function records itself. There is 20 thousand different functions on
a normal system, and that is adding 20 thousand counters for profiling
event when not needed.

A normal run of the profiler yields only a couple of thousand functions
executed, depending on what is being profiled. This means we have around
18 thousand useless counters.

This patch rectifies this by moving the data out of the function
records used by dynamic ftrace. Data is preallocated to hold the functions
when the profiling begins. Checks are made during profiling to see if
more recorcds should be allocated, and they are allocated if it is safe
to do so.

This also removes the dependency from using dynamic ftrace, and also
removes the overhead by having it enabled.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
This commit is contained in:
Steven Rostedt 2009-03-23 17:12:36 -04:00
parent bac429f037
commit 493762fc53
3 changed files with 393 additions and 321 deletions

View file

@ -153,10 +153,6 @@ struct dyn_ftrace {
unsigned long flags;
struct dyn_ftrace *newlist;
};
#ifdef CONFIG_FUNCTION_PROFILER
unsigned long counter;
struct hlist_node node;
#endif
struct dyn_arch_ftrace arch;
};

View file

@ -379,20 +379,16 @@ config DYNAMIC_FTRACE
config FUNCTION_PROFILER
bool "Kernel function profiler"
depends on DYNAMIC_FTRACE
depends on FUNCTION_TRACER
default n
help
This option enables the kernel function profiler. When the dynamic
function tracing is enabled, a counter is added into the function
records used by the dynamic function tracer. A file is created in
debugfs called function_profile_enabled which defaults to zero.
This option enables the kernel function profiler. A file is created
in debugfs called function_profile_enabled which defaults to zero.
When a 1 is echoed into this file profiling begins, and when a
zero is entered, profiling stops. A file in the trace_stats
directory called functions, that show the list of functions that
have been hit and their counters.
This takes up around 320K more memory.
If in doubt, say N
config FTRACE_MCOUNT_RECORD

View file

@ -241,6 +241,392 @@ static void ftrace_update_pid_func(void)
#endif
}
#ifdef CONFIG_FUNCTION_PROFILER
struct ftrace_profile {
struct hlist_node node;
unsigned long ip;
unsigned long counter;
};
struct ftrace_profile_page {
struct ftrace_profile_page *next;
unsigned long index;
struct ftrace_profile records[];
};
#define PROFILE_RECORDS_SIZE \
(PAGE_SIZE - offsetof(struct ftrace_profile_page, records))
#define PROFILES_PER_PAGE \
(PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile))
/* TODO: make these percpu, to prevent cache line bouncing */
static struct ftrace_profile_page *profile_pages_start;
static struct ftrace_profile_page *profile_pages;
static struct hlist_head *ftrace_profile_hash;
static int ftrace_profile_bits;
static int ftrace_profile_enabled;
static DEFINE_MUTEX(ftrace_profile_lock);
static DEFINE_PER_CPU(atomic_t, ftrace_profile_disable);
#define FTRACE_PROFILE_HASH_SIZE 1024 /* must be power of 2 */
static raw_spinlock_t ftrace_profile_rec_lock =
(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
static void *
function_stat_next(void *v, int idx)
{
struct ftrace_profile *rec = v;
struct ftrace_profile_page *pg;
pg = (struct ftrace_profile_page *)((unsigned long)rec & PAGE_MASK);
again:
rec++;
if ((void *)rec >= (void *)&pg->records[pg->index]) {
pg = pg->next;
if (!pg)
return NULL;
rec = &pg->records[0];
if (!rec->counter)
goto again;
}
return rec;
}
static void *function_stat_start(struct tracer_stat *trace)
{
return function_stat_next(&profile_pages_start->records[0], 0);
}
static int function_stat_cmp(void *p1, void *p2)
{
struct ftrace_profile *a = p1;
struct ftrace_profile *b = p2;
if (a->counter < b->counter)
return -1;
if (a->counter > b->counter)
return 1;
else
return 0;
}
static int function_stat_headers(struct seq_file *m)
{
seq_printf(m, " Function Hit\n"
" -------- ---\n");
return 0;
}
static int function_stat_show(struct seq_file *m, void *v)
{
struct ftrace_profile *rec = v;
char str[KSYM_SYMBOL_LEN];
kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
seq_printf(m, " %-30.30s %10lu\n", str, rec->counter);
return 0;
}
static struct tracer_stat function_stats = {
.name = "functions",
.stat_start = function_stat_start,
.stat_next = function_stat_next,
.stat_cmp = function_stat_cmp,
.stat_headers = function_stat_headers,
.stat_show = function_stat_show
};
static void ftrace_profile_reset(void)
{
struct ftrace_profile_page *pg;
pg = profile_pages = profile_pages_start;
while (pg) {
memset(pg->records, 0, PROFILE_RECORDS_SIZE);
pg->index = 0;
pg = pg->next;
}
memset(ftrace_profile_hash, 0,
FTRACE_PROFILE_HASH_SIZE * sizeof(struct hlist_head));
}
int ftrace_profile_pages_init(void)
{
struct ftrace_profile_page *pg;
int i;
/* If we already allocated, do nothing */
if (profile_pages)
return 0;
profile_pages = (void *)get_zeroed_page(GFP_KERNEL);
if (!profile_pages)
return -ENOMEM;
pg = profile_pages_start = profile_pages;
/* allocate 10 more pages to start */
for (i = 0; i < 10; i++) {
pg->next = (void *)get_zeroed_page(GFP_KERNEL);
/*
* We only care about allocating profile_pages, if
* we failed to allocate here, hopefully we will allocate
* later.
*/
if (!pg->next)
break;
pg = pg->next;
}
return 0;
}
static int ftrace_profile_init(void)
{
int size;
if (ftrace_profile_hash) {
/* If the profile is already created, simply reset it */
ftrace_profile_reset();
return 0;
}
/*
* We are profiling all functions, but usually only a few thousand
* functions are hit. We'll make a hash of 1024 items.
*/
size = FTRACE_PROFILE_HASH_SIZE;
ftrace_profile_hash =
kzalloc(sizeof(struct hlist_head) * size, GFP_KERNEL);
if (!ftrace_profile_hash)
return -ENOMEM;
size--;
for (; size; size >>= 1)
ftrace_profile_bits++;
/* Preallocate a few pages */
if (ftrace_profile_pages_init() < 0) {
kfree(ftrace_profile_hash);
ftrace_profile_hash = NULL;
return -ENOMEM;
}
return 0;
}
/* interrupts must be disabled */
static struct ftrace_profile *ftrace_find_profiled_func(unsigned long ip)
{
struct ftrace_profile *rec;
struct hlist_head *hhd;
struct hlist_node *n;
unsigned long key;
key = hash_long(ip, ftrace_profile_bits);
hhd = &ftrace_profile_hash[key];
if (hlist_empty(hhd))
return NULL;
hlist_for_each_entry_rcu(rec, n, hhd, node) {
if (rec->ip == ip)
return rec;
}
return NULL;
}
static void ftrace_add_profile(struct ftrace_profile *rec)
{
unsigned long key;
key = hash_long(rec->ip, ftrace_profile_bits);
hlist_add_head_rcu(&rec->node, &ftrace_profile_hash[key]);
}
/* Interrupts must be disabled calling this */
static struct ftrace_profile *
ftrace_profile_alloc(unsigned long ip, bool alloc_safe)
{
struct ftrace_profile *rec = NULL;
/* prevent recursion */
if (atomic_inc_return(&__get_cpu_var(ftrace_profile_disable)) != 1)
goto out;
__raw_spin_lock(&ftrace_profile_rec_lock);
/* Try to always keep another page available */
if (!profile_pages->next && alloc_safe)
profile_pages->next = (void *)get_zeroed_page(GFP_ATOMIC);
/*
* Try to find the function again since another
* task on another CPU could have added it
*/
rec = ftrace_find_profiled_func(ip);
if (rec)
goto out_unlock;
if (profile_pages->index == PROFILES_PER_PAGE) {
if (!profile_pages->next)
goto out_unlock;
profile_pages = profile_pages->next;
}
rec = &profile_pages->records[profile_pages->index++];
rec->ip = ip;
ftrace_add_profile(rec);
out_unlock:
__raw_spin_unlock(&ftrace_profile_rec_lock);
out:
atomic_dec(&__get_cpu_var(ftrace_profile_disable));
return rec;
}
/*
* If we are not in an interrupt, or softirq and
* and interrupts are disabled and preemption is not enabled
* (not in a spinlock) then it should be safe to allocate memory.
*/
static bool ftrace_safe_to_allocate(void)
{
return !in_interrupt() && irqs_disabled() && !preempt_count();
}
static void
function_profile_call(unsigned long ip, unsigned long parent_ip)
{
struct ftrace_profile *rec;
unsigned long flags;
bool alloc_safe;
if (!ftrace_profile_enabled)
return;
alloc_safe = ftrace_safe_to_allocate();
local_irq_save(flags);
rec = ftrace_find_profiled_func(ip);
if (!rec) {
rec = ftrace_profile_alloc(ip, alloc_safe);
if (!rec)
goto out;
}
rec->counter++;
out:
local_irq_restore(flags);
}
static struct ftrace_ops ftrace_profile_ops __read_mostly =
{
.func = function_profile_call,
};
static ssize_t
ftrace_profile_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
unsigned long val;
char buf[64];
int ret;
if (cnt >= sizeof(buf))
return -EINVAL;
if (copy_from_user(&buf, ubuf, cnt))
return -EFAULT;
buf[cnt] = 0;
ret = strict_strtoul(buf, 10, &val);
if (ret < 0)
return ret;
val = !!val;
mutex_lock(&ftrace_profile_lock);
if (ftrace_profile_enabled ^ val) {
if (val) {
ret = ftrace_profile_init();
if (ret < 0) {
cnt = ret;
goto out;
}
register_ftrace_function(&ftrace_profile_ops);
ftrace_profile_enabled = 1;
} else {
ftrace_profile_enabled = 0;
unregister_ftrace_function(&ftrace_profile_ops);
}
}
out:
mutex_unlock(&ftrace_profile_lock);
filp->f_pos += cnt;
return cnt;
}
static ssize_t
ftrace_profile_read(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
char buf[64];
int r;
r = sprintf(buf, "%u\n", ftrace_profile_enabled);
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
static const struct file_operations ftrace_profile_fops = {
.open = tracing_open_generic,
.read = ftrace_profile_read,
.write = ftrace_profile_write,
};
static void ftrace_profile_debugfs(struct dentry *d_tracer)
{
struct dentry *entry;
int ret;
ret = register_stat_tracer(&function_stats);
if (ret) {
pr_warning("Warning: could not register "
"function stats\n");
return;
}
entry = debugfs_create_file("function_profile_enabled", 0644,
d_tracer, NULL, &ftrace_profile_fops);
if (!entry)
pr_warning("Could not create debugfs "
"'function_profile_enabled' entry\n");
}
#else /* CONFIG_FUNCTION_PROFILER */
static void ftrace_profile_debugfs(struct dentry *d_tracer)
{
}
#endif /* CONFIG_FUNCTION_PROFILER */
/* set when tracing only a pid */
struct pid *ftrace_pid_trace;
static struct pid * const ftrace_swapper_pid = &init_struct_pid;
@ -309,307 +695,6 @@ static struct dyn_ftrace *ftrace_free_records;
} \
}
#ifdef CONFIG_FUNCTION_PROFILER
static struct hlist_head *ftrace_profile_hash;
static int ftrace_profile_bits;
static int ftrace_profile_enabled;
static DEFINE_MUTEX(ftrace_profile_lock);
static void *
function_stat_next(void *v, int idx)
{
struct dyn_ftrace *rec = v;
struct ftrace_page *pg;
pg = (struct ftrace_page *)((unsigned long)rec & PAGE_MASK);
again:
rec++;
if ((void *)rec >= (void *)&pg->records[pg->index]) {
pg = pg->next;
if (!pg)
return NULL;
rec = &pg->records[0];
}
if (rec->flags & FTRACE_FL_FREE ||
rec->flags & FTRACE_FL_FAILED ||
!(rec->flags & FTRACE_FL_CONVERTED) ||
/* ignore non hit functions */
!rec->counter)
goto again;
return rec;
}
static void *function_stat_start(struct tracer_stat *trace)
{
return function_stat_next(&ftrace_pages_start->records[0], 0);
}
static int function_stat_cmp(void *p1, void *p2)
{
struct dyn_ftrace *a = p1;
struct dyn_ftrace *b = p2;
if (a->counter < b->counter)
return -1;
if (a->counter > b->counter)
return 1;
else
return 0;
}
static int function_stat_headers(struct seq_file *m)
{
seq_printf(m, " Function Hit\n"
" -------- ---\n");
return 0;
}
static int function_stat_show(struct seq_file *m, void *v)
{
struct dyn_ftrace *rec = v;
char str[KSYM_SYMBOL_LEN];
kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
seq_printf(m, " %-30.30s %10lu\n", str, rec->counter);
return 0;
}
static struct tracer_stat function_stats = {
.name = "functions",
.stat_start = function_stat_start,
.stat_next = function_stat_next,
.stat_cmp = function_stat_cmp,
.stat_headers = function_stat_headers,
.stat_show = function_stat_show
};
static void ftrace_profile_init(int nr_funcs)
{
unsigned long addr;
int order;
int size;
/*
* We are profiling all functions, lets make it 1/4th of the
* number of functions that are in core kernel. So we have to
* iterate 4 times.
*/
order = (sizeof(struct hlist_head) * nr_funcs) / 4;
order = get_order(order);
size = 1 << (PAGE_SHIFT + order);
pr_info("Allocating %d KB for profiler hash\n", size >> 10);
addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
if (!addr) {
pr_warning("Could not allocate function profiler hash\n");
return;
}
ftrace_profile_hash = (void *)addr;
/*
* struct hlist_head should be a pointer of 4 or 8 bytes.
* And a simple bit manipulation can be done, but if for
* some reason struct hlist_head is not a mulitple of 2,
* then we play it safe, and simply count. This function
* is done once at boot up, so it is not that critical in
* performance.
*/
size--;
size /= sizeof(struct hlist_head);
for (; size; size >>= 1)
ftrace_profile_bits++;
pr_info("Function profiler has %d hash buckets\n",
1 << ftrace_profile_bits);
return;
}
static ssize_t
ftrace_profile_read(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
char buf[64];
int r;
r = sprintf(buf, "%u\n", ftrace_profile_enabled);
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
static void ftrace_profile_reset(void)
{
struct dyn_ftrace *rec;
struct ftrace_page *pg;
do_for_each_ftrace_rec(pg, rec) {
rec->counter = 0;
} while_for_each_ftrace_rec();
}
static struct dyn_ftrace *ftrace_find_profiled_func(unsigned long ip)
{
struct dyn_ftrace *rec;
struct hlist_head *hhd;
struct hlist_node *n;
unsigned long flags;
unsigned long key;
if (!ftrace_profile_hash)
return NULL;
key = hash_long(ip, ftrace_profile_bits);
hhd = &ftrace_profile_hash[key];
if (hlist_empty(hhd))
return NULL;
local_irq_save(flags);
hlist_for_each_entry_rcu(rec, n, hhd, node) {
if (rec->ip == ip)
goto out;
}
rec = NULL;
out:
local_irq_restore(flags);
return rec;
}
static void
function_profile_call(unsigned long ip, unsigned long parent_ip)
{
struct dyn_ftrace *rec;
unsigned long flags;
if (!ftrace_profile_enabled)
return;
local_irq_save(flags);
rec = ftrace_find_profiled_func(ip);
if (!rec)
goto out;
rec->counter++;
out:
local_irq_restore(flags);
}
static struct ftrace_ops ftrace_profile_ops __read_mostly =
{
.func = function_profile_call,
};
static ssize_t
ftrace_profile_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
unsigned long val;
char buf[64];
int ret;
if (!ftrace_profile_hash) {
pr_info("Can not enable hash due to earlier problems\n");
return -ENODEV;
}
if (cnt >= sizeof(buf))
return -EINVAL;
if (copy_from_user(&buf, ubuf, cnt))
return -EFAULT;
buf[cnt] = 0;
ret = strict_strtoul(buf, 10, &val);
if (ret < 0)
return ret;
val = !!val;
mutex_lock(&ftrace_profile_lock);
if (ftrace_profile_enabled ^ val) {
if (val) {
ftrace_profile_reset();
register_ftrace_function(&ftrace_profile_ops);
ftrace_profile_enabled = 1;
} else {
ftrace_profile_enabled = 0;
unregister_ftrace_function(&ftrace_profile_ops);
}
}
mutex_unlock(&ftrace_profile_lock);
filp->f_pos += cnt;
return cnt;
}
static const struct file_operations ftrace_profile_fops = {
.open = tracing_open_generic,
.read = ftrace_profile_read,
.write = ftrace_profile_write,
};
static void ftrace_profile_debugfs(struct dentry *d_tracer)
{
struct dentry *entry;
int ret;
ret = register_stat_tracer(&function_stats);
if (ret) {
pr_warning("Warning: could not register "
"function stats\n");
return;
}
entry = debugfs_create_file("function_profile_enabled", 0644,
d_tracer, NULL, &ftrace_profile_fops);
if (!entry)
pr_warning("Could not create debugfs "
"'function_profile_enabled' entry\n");
}
static void ftrace_add_profile(struct dyn_ftrace *rec)
{
unsigned long key;
if (!ftrace_profile_hash)
return;
key = hash_long(rec->ip, ftrace_profile_bits);
hlist_add_head_rcu(&rec->node, &ftrace_profile_hash[key]);
}
static void ftrace_profile_release(struct dyn_ftrace *rec)
{
mutex_lock(&ftrace_profile_lock);
hlist_del(&rec->node);
mutex_unlock(&ftrace_profile_lock);
}
#else /* CONFIG_FUNCTION_PROFILER */
static void ftrace_profile_init(int nr_funcs)
{
}
static void ftrace_add_profile(struct dyn_ftrace *rec)
{
}
static void ftrace_profile_debugfs(struct dentry *d_tracer)
{
}
static void ftrace_profile_release(struct dyn_ftrace *rec)
{
}
#endif /* CONFIG_FUNCTION_PROFILER */
#ifdef CONFIG_KPROBES
static int frozen_record_count;
@ -660,10 +745,8 @@ void ftrace_release(void *start, unsigned long size)
mutex_lock(&ftrace_lock);
do_for_each_ftrace_rec(pg, rec) {
if ((rec->ip >= s) && (rec->ip < e) &&
!(rec->flags & FTRACE_FL_FREE)) {
!(rec->flags & FTRACE_FL_FREE))
ftrace_free_rec(rec);
ftrace_profile_release(rec);
}
} while_for_each_ftrace_rec();
mutex_unlock(&ftrace_lock);
}
@ -717,8 +800,6 @@ ftrace_record_ip(unsigned long ip)
rec->newlist = ftrace_new_addrs;
ftrace_new_addrs = rec;
ftrace_add_profile(rec);
return rec;
}
@ -2462,8 +2543,6 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
"'set_graph_function' entry\n");
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
ftrace_profile_debugfs(d_tracer);
return 0;
}
@ -2532,8 +2611,6 @@ void __init ftrace_init(void)
if (ret)
goto failed;
ftrace_profile_init(count);
last_ftrace_enabled = ftrace_enabled = 1;
ret = ftrace_convert_nops(NULL,
@ -2734,6 +2811,9 @@ static __init int ftrace_init_debugfs(void)
if (!entry)
pr_warning("Could not create debugfs "
"'set_ftrace_pid' entry\n");
ftrace_profile_debugfs(d_tracer);
return 0;
}
fs_initcall(ftrace_init_debugfs);