From a7c3196c79051f9e1a498f5be8fe6870bde5e55d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org> Date: Mon, 2 Jun 2014 23:20:13 -0400 Subject: [PATCH 01/35] tools lib traceevent: Add flag to not load event plugins Add a flag to pevent that will let the callers be able to set it and keep the system, and perhaps even normal plugins from being loaded. This is useful when plugins might hide certain information and seeing the raw events shows what may be going on. Signed-off-by: Steven Rostedt <rostedt@goodmis.org> Acked-by: Namhyung Kim <namhyung@kernel.org> Link: http://lkml.kernel.org/r/20140603032223.678098063@goodmis.org Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- tools/lib/traceevent/event-parse.h | 2 ++ tools/lib/traceevent/event-plugin.c | 7 ++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index feab94281634..a68ec3d8289f 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -354,6 +354,8 @@ enum pevent_func_arg_type { enum pevent_flag { PEVENT_NSEC_OUTPUT = 1, /* output in NSECS */ + PEVENT_DISABLE_SYS_PLUGINS = 1 << 1, + PEVENT_DISABLE_PLUGINS = 1 << 2, }; #define PEVENT_ERRORS \ diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c index 0c8bf6780e4d..317466bd1a37 100644 --- a/tools/lib/traceevent/event-plugin.c +++ b/tools/lib/traceevent/event-plugin.c @@ -148,12 +148,17 @@ load_plugins(struct pevent *pevent, const char *suffix, char *path; char *envdir; + if (pevent->flags & PEVENT_DISABLE_PLUGINS) + return; + /* * If a system plugin directory was defined, * check that first. */ #ifdef PLUGIN_DIR - load_plugins_dir(pevent, suffix, PLUGIN_DIR, load_plugin, data); + if (!(pevent->flags & PEVENT_DISABLE_SYS_PLUGINS)) + load_plugins_dir(pevent, suffix, PLUGIN_DIR, + load_plugin, data); #endif /* From 5827f2faabe40cc285cc67b697277547a19b6c9a Mon Sep 17 00:00:00 2001 From: Steven Rostedt <rostedt@goodmis.org> Date: Tue, 3 Jun 2014 18:41:54 -0400 Subject: [PATCH 02/35] tools lib traceevent: Add options to plugins The traceevent plugins allows developers to have their events print out information that is more advanced than what can be achieved by the trace event format files. As these plugins are used on the userspace side of the tracing tools, it is only logical that the tools should be able to produce different types of output for the events. The types of events still need to be defined by the plugins thus we need a way to pass information from the tool to the plugin to specify what type of information to be shown. Not only does the information need to be passed by the tool to plugin, but the plugin also requires a way to notify the tool of what options it can provide. This builds the plugin option infrastructure that is taken from trace-cmd that is used to allow plugins to produce different output based on the options specified by the tool. Signed-off-by: Steven Rostedt <rostedt@goodmis.org> Acked-by: Namhyung Kim <namhyung@kernel.org> Link: http://lkml.kernel.org/r/20140603184154.0a4c031c@gandalf.local.home Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- tools/lib/traceevent/event-parse.h | 16 ++- tools/lib/traceevent/event-plugin.c | 196 ++++++++++++++++++++++++++++ 2 files changed, 209 insertions(+), 3 deletions(-) diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index a68ec3d8289f..56e0e6c12411 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -107,8 +107,8 @@ typedef int (*pevent_event_handler_func)(struct trace_seq *s, typedef int (*pevent_plugin_load_func)(struct pevent *pevent); typedef int (*pevent_plugin_unload_func)(struct pevent *pevent); -struct plugin_option { - struct plugin_option *next; +struct pevent_plugin_option { + struct pevent_plugin_option *next; void *handle; char *file; char *name; @@ -135,7 +135,7 @@ struct plugin_option { * PEVENT_PLUGIN_OPTIONS: (optional) * Plugin options that can be set before loading * - * struct plugin_option PEVENT_PLUGIN_OPTIONS[] = { + * struct pevent_plugin_option PEVENT_PLUGIN_OPTIONS[] = { * { * .name = "option-name", * .plugin_alias = "overide-file-name", (optional) @@ -412,9 +412,19 @@ enum pevent_errno { struct plugin_list; +#define INVALID_PLUGIN_LIST_OPTION ((char **)((unsigned long)-1)) + struct plugin_list *traceevent_load_plugins(struct pevent *pevent); void traceevent_unload_plugins(struct plugin_list *plugin_list, struct pevent *pevent); +char **traceevent_plugin_list_options(void); +void traceevent_plugin_free_options_list(char **list); +int traceevent_plugin_add_options(const char *name, + struct pevent_plugin_option *options); +void traceevent_plugin_remove_options(struct pevent_plugin_option *options); +void traceevent_print_plugins(struct trace_seq *s, + const char *prefix, const char *suffix, + const struct plugin_list *list); struct cmdline; struct cmdline_list; diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c index 317466bd1a37..136162c03af1 100644 --- a/tools/lib/traceevent/event-plugin.c +++ b/tools/lib/traceevent/event-plugin.c @@ -18,6 +18,7 @@ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +#include <stdio.h> #include <string.h> #include <dlfcn.h> #include <stdlib.h> @@ -30,12 +31,207 @@ #define LOCAL_PLUGIN_DIR ".traceevent/plugins" +static struct registered_plugin_options { + struct registered_plugin_options *next; + struct pevent_plugin_option *options; +} *registered_options; + +static struct trace_plugin_options { + struct trace_plugin_options *next; + char *plugin; + char *option; + char *value; +} *trace_plugin_options; + struct plugin_list { struct plugin_list *next; char *name; void *handle; }; +/** + * traceevent_plugin_list_options - get list of plugin options + * + * Returns an array of char strings that list the currently registered + * plugin options in the format of <plugin>:<option>. This list can be + * used by toggling the option. + * + * Returns NULL if there's no options registered. On error it returns + * INVALID_PLUGIN_LIST_OPTION + * + * Must be freed with traceevent_plugin_free_options_list(). + */ +char **traceevent_plugin_list_options(void) +{ + struct registered_plugin_options *reg; + struct pevent_plugin_option *op; + char **list = NULL; + char *name; + int count = 0; + + for (reg = registered_options; reg; reg = reg->next) { + for (op = reg->options; op->name; op++) { + char *alias = op->plugin_alias ? op->plugin_alias : op->file; + char **temp = list; + + name = malloc(strlen(op->name) + strlen(alias) + 2); + if (!name) + goto err; + + sprintf(name, "%s:%s", alias, op->name); + list = realloc(list, count + 2); + if (!list) { + list = temp; + free(name); + goto err; + } + list[count++] = name; + list[count] = NULL; + } + } + return list; + + err: + while (--count >= 0) + free(list[count]); + free(list); + + return INVALID_PLUGIN_LIST_OPTION; +} + +void traceevent_plugin_free_options_list(char **list) +{ + int i; + + if (!list) + return; + + if (list == INVALID_PLUGIN_LIST_OPTION) + return; + + for (i = 0; list[i]; i++) + free(list[i]); + + free(list); +} + +static int +update_option(const char *file, struct pevent_plugin_option *option) +{ + struct trace_plugin_options *op; + char *plugin; + + if (option->plugin_alias) { + plugin = strdup(option->plugin_alias); + if (!plugin) + return -1; + } else { + char *p; + plugin = strdup(file); + if (!plugin) + return -1; + p = strstr(plugin, "."); + if (p) + *p = '\0'; + } + + /* first look for named options */ + for (op = trace_plugin_options; op; op = op->next) { + if (!op->plugin) + continue; + if (strcmp(op->plugin, plugin) != 0) + continue; + if (strcmp(op->option, option->name) != 0) + continue; + + option->value = op->value; + option->set ^= 1; + goto out; + } + + /* first look for unnamed options */ + for (op = trace_plugin_options; op; op = op->next) { + if (op->plugin) + continue; + if (strcmp(op->option, option->name) != 0) + continue; + + option->value = op->value; + option->set ^= 1; + break; + } + + out: + free(plugin); + return 0; +} + +/** + * traceevent_plugin_add_options - Add a set of options by a plugin + * @name: The name of the plugin adding the options + * @options: The set of options being loaded + * + * Sets the options with the values that have been added by user. + */ +int traceevent_plugin_add_options(const char *name, + struct pevent_plugin_option *options) +{ + struct registered_plugin_options *reg; + + reg = malloc(sizeof(*reg)); + if (!reg) + return -1; + reg->next = registered_options; + reg->options = options; + registered_options = reg; + + while (options->name) { + update_option(name, options); + options++; + } + return 0; +} + +/** + * traceevent_plugin_remove_options - remove plugin options that were registered + * @options: Options to removed that were registered with traceevent_plugin_add_options + */ +void traceevent_plugin_remove_options(struct pevent_plugin_option *options) +{ + struct registered_plugin_options **last; + struct registered_plugin_options *reg; + + for (last = ®istered_options; *last; last = &(*last)->next) { + if ((*last)->options == options) { + reg = *last; + *last = reg->next; + free(reg); + return; + } + } +} + +/** + * traceevent_print_plugins - print out the list of plugins loaded + * @s: the trace_seq descripter to write to + * @prefix: The prefix string to add before listing the option name + * @suffix: The suffix string ot append after the option name + * @list: The list of plugins (usually returned by traceevent_load_plugins() + * + * Writes to the trace_seq @s the list of plugins (files) that is + * returned by traceevent_load_plugins(). Use @prefix and @suffix for formating: + * @prefix = " ", @suffix = "\n". + */ +void traceevent_print_plugins(struct trace_seq *s, + const char *prefix, const char *suffix, + const struct plugin_list *list) +{ + while (list) { + trace_seq_printf(s, "%s%s%s", prefix, list->name, suffix); + list = list->next; + } +} + static void load_plugin(struct pevent *pevent, const char *path, const char *file, void *data) From 49440828ad7b809e9d31f6108875e3b1e974690c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org> Date: Mon, 2 Jun 2014 23:20:15 -0400 Subject: [PATCH 03/35] tools lib traceevent: Add options to function plugin Add the options "parent" and "indent" to the function plugin. When parent is set, the output looks like this: function: fsnotify_modify <-- vfs_write function: zone_statistics <-- get_page_from_freelist function: __inc_zone_state <-- zone_statistics function: inotify_inode_queue_event <-- fsnotify_modify function: fsnotify_parent <-- fsnotify_modify function: __inc_zone_state <-- zone_statistics function: __fsnotify_parent <-- fsnotify_parent function: inotify_dentry_parent_queue_event <-- fsnotify_parent function: add_to_page_cache_lru <-- do_read_cache_page When it's not set, it looks like: function: fsnotify_modify function: zone_statistics function: __inc_zone_state function: inotify_inode_queue_event function: fsnotify_parent function: __inc_zone_state function: __fsnotify_parent function: inotify_dentry_parent_queue_event function: add_to_page_cache_lru When the otpion "indent" is not set, it looks like this: function: fsnotify_modify <-- vfs_write function: zone_statistics <-- get_page_from_freelist function: __inc_zone_state <-- zone_statistics function: inotify_inode_queue_event <-- fsnotify_modify function: fsnotify_parent <-- fsnotify_modify function: __inc_zone_state <-- zone_statistics function: __fsnotify_parent <-- fsnotify_parent function: inotify_dentry_parent_queue_event <-- fsnotify_parent function: add_to_page_cache_lru <-- do_read_cache_page Signed-off-by: Steven Rostedt <rostedt@goodmis.org> Acked-by: Namhyung Kim <namhyung@kernel.org> Link: http://lkml.kernel.org/r/20140603032224.056940410@goodmis.org Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- tools/lib/traceevent/plugin_function.c | 43 ++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/tools/lib/traceevent/plugin_function.c b/tools/lib/traceevent/plugin_function.c index 80ba4ff1fe84..a00ec190821a 100644 --- a/tools/lib/traceevent/plugin_function.c +++ b/tools/lib/traceevent/plugin_function.c @@ -33,6 +33,29 @@ static int cpus = -1; #define STK_BLK 10 +struct pevent_plugin_option plugin_options[] = +{ + { + .name = "parent", + .plugin_alias = "ftrace", + .description = + "Print parent of functions for function events", + }, + { + .name = "indent", + .plugin_alias = "ftrace", + .description = + "Try to show function call indents, based on parents", + .set = 1, + }, + { + .name = NULL, + } +}; + +static struct pevent_plugin_option *ftrace_parent = &plugin_options[0]; +static struct pevent_plugin_option *ftrace_indent = &plugin_options[1]; + static void add_child(struct func_stack *stack, const char *child, int pos) { int i; @@ -119,7 +142,8 @@ static int function_handler(struct trace_seq *s, struct pevent_record *record, parent = pevent_find_function(pevent, pfunction); - index = add_and_get_index(parent, func, record->cpu); + if (parent && ftrace_indent->set) + index = add_and_get_index(parent, func, record->cpu); trace_seq_printf(s, "%*s", index*3, ""); @@ -128,11 +152,13 @@ static int function_handler(struct trace_seq *s, struct pevent_record *record, else trace_seq_printf(s, "0x%llx", function); - trace_seq_printf(s, " <-- "); - if (parent) - trace_seq_printf(s, "%s", parent); - else - trace_seq_printf(s, "0x%llx", pfunction); + if (ftrace_parent->set) { + trace_seq_printf(s, " <-- "); + if (parent) + trace_seq_printf(s, "%s", parent); + else + trace_seq_printf(s, "0x%llx", pfunction); + } return 0; } @@ -141,6 +167,9 @@ int PEVENT_PLUGIN_LOADER(struct pevent *pevent) { pevent_register_event_handler(pevent, -1, "ftrace", "function", function_handler, NULL); + + traceevent_plugin_add_options("ftrace", plugin_options); + return 0; } @@ -157,6 +186,8 @@ void PEVENT_PLUGIN_UNLOADER(struct pevent *pevent) free(fstack[i].stack); } + traceevent_plugin_remove_options(plugin_options); + free(fstack); fstack = NULL; cpus = -1; From 473a778a2f2949972b52ad7fc61577f381f2d05e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org> Date: Mon, 2 Jun 2014 23:20:16 -0400 Subject: [PATCH 04/35] tools lib traceevent: Added support for __get_bitmask() macro Coming in v3.16, trace events will be able to save bitmasks in raw format in the ring buffer and output it with the __get_bitmask() macro. In order for userspace tools to parse this, it must be able to handle the __get_bitmask() call and be able to convert the data that's in the ring buffer into a nice bitmask format. The output is similar to what the kernel uses to print bitmasks, with a comma separator every 4 bytes (8 characters). This allows for cpumasks to also be saved efficiently. The first user is the thermal:thermal_power_limit event which has the following output: thermal_power_limit: cpus=0000000f freq=1900000 cdev_state=0 power=5252 Link: http://lkml.kernel.org/r/20140506132238.22e136d1@gandalf.local.home Suggested-by: Javi Merino <javi.merino@arm.com> Signed-off-by: Steven Rostedt <rostedt@goodmis.org> Acked-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Javi Merino <javi.merino@arm.com> Link: http://lkml.kernel.org/r/20140603032224.229186537@goodmis.org Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- tools/lib/traceevent/event-parse.c | 113 ++++++++++++++++++ tools/lib/traceevent/event-parse.h | 7 ++ .../util/scripting-engines/trace-event-perl.c | 1 + .../scripting-engines/trace-event-python.c | 1 + 4 files changed, 122 insertions(+) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index b83184f2d484..93825a17dcce 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -765,6 +765,9 @@ static void free_arg(struct print_arg *arg) case PRINT_BSTRING: free(arg->string.string); break; + case PRINT_BITMASK: + free(arg->bitmask.bitmask); + break; case PRINT_DYNAMIC_ARRAY: free(arg->dynarray.index); break; @@ -2268,6 +2271,7 @@ static int arg_num_eval(struct print_arg *arg, long long *val) case PRINT_FIELD ... PRINT_SYMBOL: case PRINT_STRING: case PRINT_BSTRING: + case PRINT_BITMASK: default: do_warning("invalid eval type %d", arg->type); ret = 0; @@ -2296,6 +2300,7 @@ static char *arg_eval (struct print_arg *arg) case PRINT_FIELD ... PRINT_SYMBOL: case PRINT_STRING: case PRINT_BSTRING: + case PRINT_BITMASK: default: do_warning("invalid eval type %d", arg->type); break; @@ -2683,6 +2688,35 @@ process_str(struct event_format *event __maybe_unused, struct print_arg *arg, return EVENT_ERROR; } +static enum event_type +process_bitmask(struct event_format *event __maybe_unused, struct print_arg *arg, + char **tok) +{ + enum event_type type; + char *token; + + if (read_expect_type(EVENT_ITEM, &token) < 0) + goto out_free; + + arg->type = PRINT_BITMASK; + arg->bitmask.bitmask = token; + arg->bitmask.offset = -1; + + if (read_expected(EVENT_DELIM, ")") < 0) + goto out_err; + + type = read_token(&token); + *tok = token; + + return type; + + out_free: + free_token(token); + out_err: + *tok = NULL; + return EVENT_ERROR; +} + static struct pevent_function_handler * find_func_handler(struct pevent *pevent, char *func_name) { @@ -2797,6 +2831,10 @@ process_function(struct event_format *event, struct print_arg *arg, free_token(token); return process_str(event, arg, tok); } + if (strcmp(token, "__get_bitmask") == 0) { + free_token(token); + return process_bitmask(event, arg, tok); + } if (strcmp(token, "__get_dynamic_array") == 0) { free_token(token); return process_dynamic_array(event, arg, tok); @@ -3324,6 +3362,7 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg return eval_type(val, arg, 0); case PRINT_STRING: case PRINT_BSTRING: + case PRINT_BITMASK: return 0; case PRINT_FUNC: { struct trace_seq s; @@ -3556,6 +3595,60 @@ static void print_str_to_seq(struct trace_seq *s, const char *format, trace_seq_printf(s, format, str); } +static void print_bitmask_to_seq(struct pevent *pevent, + struct trace_seq *s, const char *format, + int len_arg, const void *data, int size) +{ + int nr_bits = size * 8; + int str_size = (nr_bits + 3) / 4; + int len = 0; + char buf[3]; + char *str; + int index; + int i; + + /* + * The kernel likes to put in commas every 32 bits, we + * can do the same. + */ + str_size += (nr_bits - 1) / 32; + + str = malloc(str_size + 1); + if (!str) { + do_warning("%s: not enough memory!", __func__); + return; + } + str[str_size] = 0; + + /* Start out with -2 for the two chars per byte */ + for (i = str_size - 2; i >= 0; i -= 2) { + /* + * data points to a bit mask of size bytes. + * In the kernel, this is an array of long words, thus + * endianess is very important. + */ + if (pevent->file_bigendian) + index = size - (len + 1); + else + index = len; + + snprintf(buf, 3, "%02x", *((unsigned char *)data + index)); + memcpy(str + i, buf, 2); + len++; + if (!(len & 3) && i > 0) { + i--; + str[i] = ','; + } + } + + if (len_arg >= 0) + trace_seq_printf(s, format, len_arg, str); + else + trace_seq_printf(s, format, str); + + free(str); +} + static void print_str_arg(struct trace_seq *s, void *data, int size, struct event_format *event, const char *format, int len_arg, struct print_arg *arg) @@ -3691,6 +3784,23 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, case PRINT_BSTRING: print_str_to_seq(s, format, len_arg, arg->string.string); break; + case PRINT_BITMASK: { + int bitmask_offset; + int bitmask_size; + + if (arg->bitmask.offset == -1) { + struct format_field *f; + + f = pevent_find_any_field(event, arg->bitmask.bitmask); + arg->bitmask.offset = f->offset; + } + bitmask_offset = data2host4(pevent, data + arg->bitmask.offset); + bitmask_size = bitmask_offset >> 16; + bitmask_offset &= 0xffff; + print_bitmask_to_seq(pevent, s, format, len_arg, + data + bitmask_offset, bitmask_size); + break; + } case PRINT_OP: /* * The only op for string should be ? : @@ -4822,6 +4932,9 @@ static void print_args(struct print_arg *args) case PRINT_BSTRING: printf("__get_str(%s)", args->string.string); break; + case PRINT_BITMASK: + printf("__get_bitmask(%s)", args->bitmask.bitmask); + break; case PRINT_TYPE: printf("(%s)", args->typecast.type); print_args(args->typecast.item); diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 56e0e6c12411..7a3873ff9a4f 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -208,6 +208,11 @@ struct print_arg_string { int offset; }; +struct print_arg_bitmask { + char *bitmask; + int offset; +}; + struct print_arg_field { char *name; struct format_field *field; @@ -274,6 +279,7 @@ enum print_arg_type { PRINT_DYNAMIC_ARRAY, PRINT_OP, PRINT_FUNC, + PRINT_BITMASK, }; struct print_arg { @@ -288,6 +294,7 @@ struct print_arg { struct print_arg_hex hex; struct print_arg_func func; struct print_arg_string string; + struct print_arg_bitmask bitmask; struct print_arg_op op; struct print_arg_dynarray dynarray; }; diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index e108207c5de0..af7da565a750 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -215,6 +215,7 @@ static void define_event_symbols(struct event_format *event, case PRINT_BSTRING: case PRINT_DYNAMIC_ARRAY: case PRINT_STRING: + case PRINT_BITMASK: break; case PRINT_TYPE: define_event_symbols(event, ev_name, args->typecast.item); diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index cd9774df3750..c3de0971bfdd 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -197,6 +197,7 @@ static void define_event_symbols(struct event_format *event, case PRINT_BSTRING: case PRINT_DYNAMIC_ARRAY: case PRINT_FUNC: + case PRINT_BITMASK: /* we should warn... */ return; } From a261e4a09a073451057e9dbe17783255ea94598d Mon Sep 17 00:00:00 2001 From: Jiri Olsa <jolsa@kernel.org> Date: Thu, 5 Jun 2014 18:51:44 +0200 Subject: [PATCH 05/35] perf tools: Fix pipe check regression in attr event callback The file factoring in builtin-inject.c object introduced regression in attr event callback. The commit is: 3406912 perf inject: Handle output file via perf_data_file object Following hunk reversed the logic: - if (!inject->pipe_output) + if (&inject->output.is_pipe) putting it back, following example now works: $ perf record -o - kill | perf inject -b | perf report -i - Plus removing extra '&' (kudos to Arnaldo) Reported-by: Stephane Eranian <eranian@google.com> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com> Cc: David Ahern <dsahern@gmail.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/20140605204117.GA1771@krava.redhat.com Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- tools/perf/builtin-inject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 6a3af0013d68..16c7c11ad06e 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -72,7 +72,7 @@ static int perf_event__repipe_attr(struct perf_tool *tool, if (ret) return ret; - if (&inject->output.is_pipe) + if (!inject->output.is_pipe) return 0; return perf_event__repipe_synth(tool, event); From b0815d07ec12959da2104822561e0df26dc4bf3d Mon Sep 17 00:00:00 2001 From: Jiri Olsa <jolsa@kernel.org> Date: Wed, 4 Jun 2014 14:43:58 +0200 Subject: [PATCH 06/35] perf tools: Prettify the tags/TAGS/cscope targets output Add tags/TAGS/cscope targets to the quiet family. $ make tags cscope BUILD: Doing 'make -j4' parallel build GEN tags $ make cscope BUILD: Doing 'make -j4' parallel build GEN cscope Reviewed-by: David Ahern <dsahern@gmail.com> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com> Cc: David Ahern <dsahern@gmail.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jean Pihet <jean.pihet@linaro.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/1401893676-32205-1-git-send-email-jolsa@kernel.org Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- tools/perf/Makefile.perf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index ae20edfcc3f7..9670a16fa577 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -819,15 +819,15 @@ TAG_FOLDERS= . ../lib/traceevent ../lib/api ../lib/symbol TAG_FILES= ../../include/uapi/linux/perf_event.h TAGS: - $(RM) TAGS + $(QUIET_GEN)$(RM) TAGS; \ $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs etags -a $(TAG_FILES) tags: - $(RM) tags + $(QUIET_GEN)$(RM) tags; \ $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs ctags -a $(TAG_FILES) cscope: - $(RM) cscope* + $(QUIET_GEN)$(RM) cscope*; \ $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs cscope -b $(TAG_FILES) ### Detect prefix changes From e646fe730a324098a718f1c9b2f349efb99d5457 Mon Sep 17 00:00:00 2001 From: Namhyung Kim <namhyung@kernel.org> Date: Thu, 29 May 2014 13:44:55 +0900 Subject: [PATCH 07/35] perf script/python: Print array argument as string With the Sebastian's change of handling num array argument (of raw syscall enter), the script still failed to work like this: $ perf record -e raw_syscalls:* sleep 1 $ perf script -g python $ perf script -s perf-script.py ... Traceback (most recent call last): File "perf-script.py", line 42, in raw_syscalls__sys_enter (id, args), TypeError: %u format: a number is required, not list Fatal Python error: problem in Python trace event handler Aborted (core dumped) This is because the generated script tries to print the array arg as unsigned integer (%u). Since the python seems to convert arguments to strings by default, just using %s solved the problem for me. Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Tom Zanussi <tzanussi@gmail.com> Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Link: http://lkml.kernel.org/r/1401338695-18837-1-git-send-email-namhyung@kernel.org Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- tools/perf/util/scripting-engines/trace-event-python.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index c3de0971bfdd..1c419321f707 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -623,6 +623,7 @@ static int python_generate_script(struct pevent *pevent, const char *outfile) fprintf(ofp, "%s=", f->name); if (f->flags & FIELD_IS_STRING || f->flags & FIELD_IS_FLAG || + f->flags & FIELD_IS_ARRAY || f->flags & FIELD_IS_SYMBOLIC) fprintf(ofp, "%%s"); else if (f->flags & FIELD_IS_SIGNED) From f972eb63b1003fae68d7b7e9b674d4ba5db681c2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra <a.p.zijlstra@chello.nl> Date: Mon, 19 May 2014 15:13:47 -0400 Subject: [PATCH 08/35] perf: Pass protection and flags bits through mmap2 interface The mmap2 interface was missing the protection and flags bits needed to accurately determine if a mmap memory area was shared or private and if it was readable or not. Signed-off-by: Peter Zijlstra <peterz@infradead.org> [tweaked patch to compile and wrote changelog] Signed-off-by: Don Zickus <dzickus@redhat.com> Link: http://lkml.kernel.org/r/1400526833-141779-2-git-send-email-dzickus@redhat.com Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- include/uapi/linux/perf_event.h | 1 + kernel/events/core.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 5312fae47218..9269de254874 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -705,6 +705,7 @@ enum perf_event_type { * u32 min; * u64 ino; * u64 ino_generation; + * u32 prot, flags; * char filename[]; * struct sample_id sample_id; * }; diff --git a/kernel/events/core.c b/kernel/events/core.c index 7da5e561e89a..eea1955c7868 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -40,6 +40,7 @@ #include <linux/mm_types.h> #include <linux/cgroup.h> #include <linux/module.h> +#include <linux/mman.h> #include "internal.h" @@ -5127,6 +5128,7 @@ struct perf_mmap_event { int maj, min; u64 ino; u64 ino_generation; + u32 prot, flags; struct { struct perf_event_header header; @@ -5168,6 +5170,8 @@ static void perf_event_mmap_output(struct perf_event *event, mmap_event->event_id.header.size += sizeof(mmap_event->min); mmap_event->event_id.header.size += sizeof(mmap_event->ino); mmap_event->event_id.header.size += sizeof(mmap_event->ino_generation); + mmap_event->event_id.header.size += sizeof(mmap_event->prot); + mmap_event->event_id.header.size += sizeof(mmap_event->flags); } perf_event_header__init_id(&mmap_event->event_id.header, &sample, event); @@ -5186,6 +5190,8 @@ static void perf_event_mmap_output(struct perf_event *event, perf_output_put(&handle, mmap_event->min); perf_output_put(&handle, mmap_event->ino); perf_output_put(&handle, mmap_event->ino_generation); + perf_output_put(&handle, mmap_event->prot); + perf_output_put(&handle, mmap_event->flags); } __output_copy(&handle, mmap_event->file_name, @@ -5204,6 +5210,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) struct file *file = vma->vm_file; int maj = 0, min = 0; u64 ino = 0, gen = 0; + u32 prot = 0, flags = 0; unsigned int size; char tmp[16]; char *buf = NULL; @@ -5234,6 +5241,28 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) gen = inode->i_generation; maj = MAJOR(dev); min = MINOR(dev); + + if (vma->vm_flags & VM_READ) + prot |= PROT_READ; + if (vma->vm_flags & VM_WRITE) + prot |= PROT_WRITE; + if (vma->vm_flags & VM_EXEC) + prot |= PROT_EXEC; + + if (vma->vm_flags & VM_MAYSHARE) + flags = MAP_SHARED; + else + flags = MAP_PRIVATE; + + if (vma->vm_flags & VM_DENYWRITE) + flags |= MAP_DENYWRITE; + if (vma->vm_flags & VM_MAYEXEC) + flags |= MAP_EXECUTABLE; + if (vma->vm_flags & VM_LOCKED) + flags |= MAP_LOCKED; + if (vma->vm_flags & VM_HUGETLB) + flags |= MAP_HUGETLB; + goto got_name; } else { name = (char *)arch_vma_name(vma); @@ -5274,6 +5303,8 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) mmap_event->min = min; mmap_event->ino = ino; mmap_event->ino_generation = gen; + mmap_event->prot = prot; + mmap_event->flags = flags; if (!(vma->vm_flags & VM_EXEC)) mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_DATA; @@ -5314,6 +5345,8 @@ void perf_event_mmap(struct vm_area_struct *vma) /* .min (attr_mmap2 only) */ /* .ino (attr_mmap2 only) */ /* .ino_generation (attr_mmap2 only) */ + /* .prot (attr_mmap2 only) */ + /* .flags (attr_mmap2 only) */ }; perf_event_mmap_event(&mmap_event); From 7ef807034ef33f8afe33fa7957c73954e8e4f89c Mon Sep 17 00:00:00 2001 From: Don Zickus <dzickus@redhat.com> Date: Mon, 19 May 2014 15:13:49 -0400 Subject: [PATCH 09/35] perf tools: Update mmap2 interface with protection and flag bits The kernel piece passes more info now. Update the perf tool to reflect that and adjust the synthesized maps to play along. Signed-off-by: Don Zickus <dzickus@redhat.com> Link: http://lkml.kernel.org/r/1400526833-141779-4-git-send-email-dzickus@redhat.com Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- tools/perf/util/event.c | 23 +++++++++++++++++++++-- tools/perf/util/event.h | 2 ++ tools/perf/util/machine.c | 4 +++- tools/perf/util/map.c | 4 +++- tools/perf/util/map.h | 4 +++- 5 files changed, 32 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 65795b835b39..ce43cbae3bd5 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1,4 +1,5 @@ #include <linux/types.h> +#include <sys/mman.h> #include "event.h" #include "debug.h" #include "hist.h" @@ -212,6 +213,21 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, else event->header.misc = PERF_RECORD_MISC_GUEST_USER; + /* map protection and flags bits */ + event->mmap2.prot = 0; + event->mmap2.flags = 0; + if (prot[0] == 'r') + event->mmap2.prot |= PROT_READ; + if (prot[1] == 'w') + event->mmap2.prot |= PROT_WRITE; + if (prot[2] == 'x') + event->mmap2.prot |= PROT_EXEC; + + if (prot[3] == 's') + event->mmap2.flags |= MAP_SHARED; + else + event->mmap2.flags |= MAP_PRIVATE; + if (prot[2] != 'x') { if (!mmap_data || prot[0] != 'r') continue; @@ -612,12 +628,15 @@ size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp) size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp) { return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 - " %02x:%02x %"PRIu64" %"PRIu64"]: %c %s\n", + " %02x:%02x %"PRIu64" %"PRIu64"]: %c%c%c%c %s\n", event->mmap2.pid, event->mmap2.tid, event->mmap2.start, event->mmap2.len, event->mmap2.pgoff, event->mmap2.maj, event->mmap2.min, event->mmap2.ino, event->mmap2.ino_generation, - (event->header.misc & PERF_RECORD_MISC_MMAP_DATA) ? 'r' : 'x', + (event->mmap2.prot & PROT_READ) ? 'r' : '-', + (event->mmap2.prot & PROT_WRITE) ? 'w' : '-', + (event->mmap2.prot & PROT_EXEC) ? 'x' : '-', + (event->mmap2.flags & MAP_SHARED) ? 's' : 'p', event->mmap2.filename); } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index d970232cb270..9ba2eb3bdcfd 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -27,6 +27,8 @@ struct mmap2_event { u32 min; u64 ino; u64 ino_generation; + u32 prot; + u32 flags; char filename[PATH_MAX]; }; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 7409ac8de51c..0e5fea95d596 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1060,6 +1060,8 @@ int machine__process_mmap2_event(struct machine *machine, event->mmap2.pid, event->mmap2.maj, event->mmap2.min, event->mmap2.ino, event->mmap2.ino_generation, + event->mmap2.prot, + event->mmap2.flags, event->mmap2.filename, type); if (map == NULL) @@ -1105,7 +1107,7 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event map = map__new(&machine->user_dsos, event->mmap.start, event->mmap.len, event->mmap.pgoff, - event->mmap.pid, 0, 0, 0, 0, + event->mmap.pid, 0, 0, 0, 0, 0, 0, event->mmap.filename, type); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 8ccbb32eda25..25c571f4cba6 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -138,7 +138,7 @@ void map__init(struct map *map, enum map_type type, struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, u64 pgoff, u32 pid, u32 d_maj, u32 d_min, u64 ino, - u64 ino_gen, char *filename, + u64 ino_gen, u32 prot, u32 flags, char *filename, enum map_type type) { struct map *map = malloc(sizeof(*map)); @@ -157,6 +157,8 @@ struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, map->min = d_min; map->ino = ino; map->ino_generation = ino_gen; + map->prot = prot; + map->flags = flags; if ((anon || no_dso) && type == MAP__FUNCTION) { snprintf(newfilename, sizeof(newfilename), "/tmp/perf-%d.map", pid); diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index ae2d45110588..7758c72522ef 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -35,6 +35,8 @@ struct map { bool referenced; bool erange_warned; u32 priv; + u32 prot; + u32 flags; u64 pgoff; u64 reloc; u32 maj, min; /* only valid for MMAP2 record */ @@ -118,7 +120,7 @@ void map__init(struct map *map, enum map_type type, u64 start, u64 end, u64 pgoff, struct dso *dso); struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, u64 pgoff, u32 pid, u32 d_maj, u32 d_min, u64 ino, - u64 ino_gen, + u64 ino_gen, u32 prot, u32 flags, char *filename, enum map_type type); struct map *map__new2(u64 start, struct dso *dso, enum map_type type); void map__delete(struct map *map); From a5a5ba72843dd05f991184d6cb9a4471acce1005 Mon Sep 17 00:00:00 2001 From: Don Zickus <dzickus@redhat.com> Date: Fri, 30 May 2014 10:49:42 -0400 Subject: [PATCH 10/35] Revert "perf: Disable PERF_RECORD_MMAP2 support" This reverts commit 3090ffb5a2515990182f3f55b0688a7817325488. Re-enable the mmap2 interface as we will have a user soon. Since things have changed since perf disabled mmap2, small tweaks to the revert had to be done: o commit 9d4ecc88 forced (n!=8) to become (n<7) o a new libunwind test needed updating to use mmap2 interface Signed-off-by: Don Zickus <dzickus@redhat.com> Link: http://lkml.kernel.org/r/1401461382-209586-1-git-send-email-dzickus@redhat.com Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- kernel/events/core.c | 4 ---- tools/perf/tests/dwarf-unwind.c | 2 +- tools/perf/util/event.c | 34 +++++++++++++++++++-------------- tools/perf/util/evsel.c | 1 + 4 files changed, 22 insertions(+), 19 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index eea1955c7868..cd28335b3d28 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6929,10 +6929,6 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, if (ret) return -EFAULT; - /* disabled for now */ - if (attr->mmap2) - return -EINVAL; - if (attr->__reserved_1) return -EINVAL; diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 108f0cd49f4e..96adb730b744 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -15,7 +15,7 @@ static int mmap_handler(struct perf_tool *tool __maybe_unused, struct perf_sample *sample __maybe_unused, struct machine *machine) { - return machine__process_mmap_event(machine, event, NULL); + return machine__process_mmap2_event(machine, event, NULL); } static int init_live_machine(struct machine *machine) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index ce43cbae3bd5..d0281bdfa582 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -179,13 +179,14 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, return -1; } - event->header.type = PERF_RECORD_MMAP; + event->header.type = PERF_RECORD_MMAP2; while (1) { char bf[BUFSIZ]; char prot[5]; char execname[PATH_MAX]; char anonstr[] = "//anon"; + unsigned int ino; size_t size; ssize_t n; @@ -196,15 +197,20 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, strcpy(execname, ""); /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ - n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %*x:%*x %*u %s\n", - &event->mmap.start, &event->mmap.len, prot, - &event->mmap.pgoff, - execname); + n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %s\n", + &event->mmap2.start, &event->mmap2.len, prot, + &event->mmap2.pgoff, &event->mmap2.maj, + &event->mmap2.min, + &ino, execname); + /* * Anon maps don't have the execname. */ - if (n < 4) + if (n < 7) continue; + + event->mmap2.ino = (u64)ino; + /* * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c */ @@ -239,15 +245,15 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, strcpy(execname, anonstr); size = strlen(execname) + 1; - memcpy(event->mmap.filename, execname, size); + memcpy(event->mmap2.filename, execname, size); size = PERF_ALIGN(size, sizeof(u64)); - event->mmap.len -= event->mmap.start; - event->mmap.header.size = (sizeof(event->mmap) - - (sizeof(event->mmap.filename) - size)); - memset(event->mmap.filename + size, 0, machine->id_hdr_size); - event->mmap.header.size += machine->id_hdr_size; - event->mmap.pid = tgid; - event->mmap.tid = pid; + event->mmap2.len -= event->mmap.start; + event->mmap2.header.size = (sizeof(event->mmap2) - + (sizeof(event->mmap2.filename) - size)); + memset(event->mmap2.filename + size, 0, machine->id_hdr_size); + event->mmap2.header.size += machine->id_hdr_size; + event->mmap2.pid = tgid; + event->mmap2.tid = pid; if (process(tool, event, &synth_sample, machine) != 0) { rc = -1; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 5c28d82b76c4..21154dabc5fa 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -659,6 +659,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) perf_evsel__set_sample_bit(evsel, WEIGHT); attr->mmap = track; + attr->mmap2 = track && !perf_missing_features.mmap2; attr->comm = track; if (opts->sample_transaction) From 75e906c9601aee73b88d6e6dc02371f8c3ca24d7 Mon Sep 17 00:00:00 2001 From: Don Zickus <dzickus@redhat.com> Date: Fri, 23 May 2014 18:41:23 +0200 Subject: [PATCH 11/35] perf report: Add mem-mode documentation to report command Add mem-mode sorting types and mem-mode itself to perf-report documentation. Signed-off-by: Don Zickus <dzickus@redhat.com> Link: http://lkml.kernel.org/r/1400526833-141779-5-git-send-email-dzickus@redhat.com Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- tools/perf/Documentation/perf-report.txt | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index cefdf430d1b4..00fbfb6d7f14 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -117,6 +117,21 @@ OPTIONS By default, every sort keys not specified in -F will be appended automatically. + If --mem-mode option is used, following sort keys are also available + (incompatible with --branch-stack): + symbol_daddr, dso_daddr, locked, tlb, mem, snoop. + + - symbol_daddr: name of data symbol being executed on at the time of sample + - dso_daddr: name of library or module containing the data being executed + on at the time of sample + - locked: whether the bus was locked at the time of sample + - tlb: type of tlb access for the data at the time of sample + - mem: type of memory access for the data at the time of sample + - snoop: type of snoop (if any) for the data at the time of sample + + And default sort keys are changed to local_weight, mem, sym, dso, + symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'. + -p:: --parent=<regex>:: A regex filter to identify parent. The parent is a caller of this @@ -260,6 +275,13 @@ OPTIONS Demangle symbol names to human readable form. It's enabled by default, disable with --no-demangle. +--mem-mode:: + Use the data addresses of samples in addition to instruction addresses + to build the histograms. To generate meaningful output, the perf.data + file must have been obtained using perf record -d -W and using a + special event -e cpu/mem-loads/ or -e cpu/mem-stores/. See + 'perf mem' for simpler access. + --percent-limit:: Do not show entries which have an overhead under that percent. (Default: 0). From 7365be55eee37ddb4f487263b4ba5bc8beb9638f Mon Sep 17 00:00:00 2001 From: Don Zickus <dzickus@redhat.com> Date: Tue, 27 May 2014 12:28:05 -0400 Subject: [PATCH 12/35] perf tools: Add cpumode to struct hist_entry The next patch needs to sort on cpumode, so add it to hist_entry to be tracked. Signed-off-by: Don Zickus <dzickus@redhat.com> Link: http://lkml.kernel.org/r/1401208087-181977-6-git-send-email-dzickus@redhat.com Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- tools/perf/util/hist.c | 7 ++++--- tools/perf/util/sort.h | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 5a0a4b2cadc4..d5f47a47c4bf 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -439,9 +439,10 @@ struct hist_entry *__hists__add_entry(struct hists *hists, .map = al->map, .sym = al->sym, }, - .cpu = al->cpu, - .ip = al->addr, - .level = al->level, + .cpu = al->cpu, + .cpumode = al->cpumode, + .ip = al->addr, + .level = al->level, .stat = { .nr_events = 1, .period = period, diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 5bf0098d6b06..6de22f838854 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -89,6 +89,7 @@ struct hist_entry { u64 ip; u64 transaction; s32 cpu; + u8 cpumode; struct hist_entry_diff diff; From 2b1b71003ea809e619bd73e74dfc2a73069de66f Mon Sep 17 00:00:00 2001 From: Don Zickus <dzickus@redhat.com> Date: Fri, 30 May 2014 16:10:05 -0400 Subject: [PATCH 13/35] perf tools: Add support to dynamically get cacheline size Different arches may have different cacheline sizes. Look it up and set a global variable for reference. Signed-off-by: Don Zickus <dzickus@redhat.com> Link: http://lkml.kernel.org/r/1401480605-97442-1-git-send-email-dzickus@redhat.com Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- tools/perf/perf.c | 1 + tools/perf/util/util.c | 1 + tools/perf/util/util.h | 1 + 3 files changed, 3 insertions(+) diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 78f7b920e548..95c58fc15284 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -458,6 +458,7 @@ int main(int argc, const char **argv) /* The page_size is placed in util object. */ page_size = sysconf(_SC_PAGE_SIZE); + cacheline_size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE); cmd = perf_extract_argv0_path(argv[0]); if (!cmd) diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 7fff6be07f07..95aefa78bb07 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -17,6 +17,7 @@ * XXX We need to find a better place for these things... */ unsigned int page_size; +int cacheline_size; bool test_attr__enabled; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index b03da44e94e4..66864364ccb4 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -304,6 +304,7 @@ char *rtrim(char *s); void dump_stack(void); extern unsigned int page_size; +extern int cacheline_size; void get_term_dimensions(struct winsize *ws); From 9b32ba71ba905b90610fc2aad77cb98a373c5624 Mon Sep 17 00:00:00 2001 From: Don Zickus <dzickus@redhat.com> Date: Sun, 1 Jun 2014 15:38:29 +0200 Subject: [PATCH 14/35] perf tools: Add dcacheline sort In perf's 'mem-mode', one can get access to a whole bunch of details specific to a particular sample instruction. A bunch of those details relate to the data address. One interesting thing you can do with data addresses is to convert them into a unique cacheline they belong too. Organizing these data cachelines into similar groups and sorting them can reveal cache contention. This patch creates an alogorithm based on various sample details that can help group entries together into data cachelines and allows 'perf report' to sort on it. The algorithm relies on having proper mmap2 support in the kernel to help determine if the memory map the data address belongs to is private to a pid or globally shared. The alogortithm is as follows: o group cpumodes together o group entries with discovered maps together o sort on major, minor, inode and inode generation numbers o if userspace anon, then sort on pid o sort on cachelines based on data addresses The 'dcacheline' sort option in 'perf report' only works in 'mem-mode'. Sample output: # # Samples: 206 of event 'cpu/mem-loads/pp' # Total weight : 2534 # Sort order : dcacheline,pid # # Overhead Samples Data Cacheline Command: Pid # ........ ............ ...................................................................... .................. # 13.22% 1 [k] 0xffff88042f08ebc0 swapper: 0 9.27% 1 [k] 0xffff88082e8cea80 swapper: 0 3.59% 2 [k] 0xffffffff819ba180 swapper: 0 0.32% 1 [k] arch_trigger_all_cpu_backtrace_handler_na.23901+0xffffffffffffffe0 swapper: 0 0.32% 1 [k] timekeeper_seq+0xfffffffffffffff8 swapper: 0 Note: Added a '+1' to symlen size in hists__calc_col_len to prevent the next column from prematurely tabbing over and mis-aligning. Not sure what the problem is. Signed-off-by: Don Zickus <dzickus@redhat.com> Link: http://lkml.kernel.org/r/1401208087-181977-8-git-send-email-dzickus@redhat.com Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- tools/perf/Documentation/perf-report.txt | 3 +- tools/perf/util/hist.c | 2 + tools/perf/util/hist.h | 1 + tools/perf/util/sort.c | 107 +++++++++++++++++++++++ tools/perf/util/sort.h | 1 + 5 files changed, 113 insertions(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 00fbfb6d7f14..d2b59af62bc0 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -119,7 +119,7 @@ OPTIONS If --mem-mode option is used, following sort keys are also available (incompatible with --branch-stack): - symbol_daddr, dso_daddr, locked, tlb, mem, snoop. + symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline. - symbol_daddr: name of data symbol being executed on at the time of sample - dso_daddr: name of library or module containing the data being executed @@ -128,6 +128,7 @@ OPTIONS - tlb: type of tlb access for the data at the time of sample - mem: type of memory access for the data at the time of sample - snoop: type of snoop (if any) for the data at the time of sample + - dcacheline: the cacheline the data address is on at the time of sample And default sort keys are changed to local_weight, mem, sym, dso, symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'. diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index d5f47a47c4bf..30df6187ee02 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -128,6 +128,8 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) + unresolved_col_width + 2; hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen); + hists__new_col_len(hists, HISTC_MEM_DCACHELINE, + symlen + 1); } else { symlen = unresolved_col_width + 4 + 2; hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index d2bf03575d5f..742f49a85725 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -72,6 +72,7 @@ enum hist_column { HISTC_MEM_TLB, HISTC_MEM_LVL, HISTC_MEM_SNOOP, + HISTC_MEM_DCACHELINE, HISTC_TRANSACTION, HISTC_NR_COLS, /* Last entry */ }; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 45512baaab67..1ec57dd82284 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1,3 +1,4 @@ +#include <sys/mman.h> #include "sort.h" #include "hist.h" #include "comm.h" @@ -784,6 +785,104 @@ static int hist_entry__snoop_snprintf(struct hist_entry *he, char *bf, return repsep_snprintf(bf, size, "%-*s", width, out); } +static inline u64 cl_address(u64 address) +{ + /* return the cacheline of the address */ + return (address & ~(cacheline_size - 1)); +} + +static int64_t +sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right) +{ + u64 l, r; + struct map *l_map, *r_map; + + if (!left->mem_info) return -1; + if (!right->mem_info) return 1; + + /* group event types together */ + if (left->cpumode > right->cpumode) return -1; + if (left->cpumode < right->cpumode) return 1; + + l_map = left->mem_info->daddr.map; + r_map = right->mem_info->daddr.map; + + /* if both are NULL, jump to sort on al_addr instead */ + if (!l_map && !r_map) + goto addr; + + if (!l_map) return -1; + if (!r_map) return 1; + + if (l_map->maj > r_map->maj) return -1; + if (l_map->maj < r_map->maj) return 1; + + if (l_map->min > r_map->min) return -1; + if (l_map->min < r_map->min) return 1; + + if (l_map->ino > r_map->ino) return -1; + if (l_map->ino < r_map->ino) return 1; + + if (l_map->ino_generation > r_map->ino_generation) return -1; + if (l_map->ino_generation < r_map->ino_generation) return 1; + + /* + * Addresses with no major/minor numbers are assumed to be + * anonymous in userspace. Sort those on pid then address. + * + * The kernel and non-zero major/minor mapped areas are + * assumed to be unity mapped. Sort those on address. + */ + + if ((left->cpumode != PERF_RECORD_MISC_KERNEL) && + (!(l_map->flags & MAP_SHARED)) && + !l_map->maj && !l_map->min && !l_map->ino && + !l_map->ino_generation) { + /* userspace anonymous */ + + if (left->thread->pid_ > right->thread->pid_) return -1; + if (left->thread->pid_ < right->thread->pid_) return 1; + } + +addr: + /* al_addr does all the right addr - start + offset calculations */ + l = cl_address(left->mem_info->daddr.al_addr); + r = cl_address(right->mem_info->daddr.al_addr); + + if (l > r) return -1; + if (l < r) return 1; + + return 0; +} + +static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + + uint64_t addr = 0; + struct map *map = NULL; + struct symbol *sym = NULL; + char level = he->level; + + if (he->mem_info) { + addr = cl_address(he->mem_info->daddr.al_addr); + map = he->mem_info->daddr.map; + sym = he->mem_info->daddr.sym; + + /* print [s] for shared data mmaps */ + if ((he->cpumode != PERF_RECORD_MISC_KERNEL) && + map && (map->type == MAP__VARIABLE) && + (map->flags & MAP_SHARED) && + (map->maj || map->min || map->ino || + map->ino_generation)) + level = 's'; + else if (!map) + level = 'X'; + } + return _hist_entry__sym_snprintf(map, sym, addr, level, bf, size, + width); +} + struct sort_entry sort_mispredict = { .se_header = "Branch Mispredicted", .se_cmp = sort__mispredict_cmp, @@ -876,6 +975,13 @@ struct sort_entry sort_mem_snoop = { .se_width_idx = HISTC_MEM_SNOOP, }; +struct sort_entry sort_mem_dcacheline = { + .se_header = "Data Cacheline", + .se_cmp = sort__dcacheline_cmp, + .se_snprintf = hist_entry__dcacheline_snprintf, + .se_width_idx = HISTC_MEM_DCACHELINE, +}; + static int64_t sort__abort_cmp(struct hist_entry *left, struct hist_entry *right) { @@ -1043,6 +1149,7 @@ static struct sort_dimension memory_sort_dimensions[] = { DIM(SORT_MEM_TLB, "tlb", sort_mem_tlb), DIM(SORT_MEM_LVL, "mem", sort_mem_lvl), DIM(SORT_MEM_SNOOP, "snoop", sort_mem_snoop), + DIM(SORT_MEM_DCACHELINE, "dcacheline", sort_mem_dcacheline), }; #undef DIM diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 6de22f838854..041f0c9cea2b 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -186,6 +186,7 @@ enum sort_type { SORT_MEM_TLB, SORT_MEM_LVL, SORT_MEM_SNOOP, + SORT_MEM_DCACHELINE, }; /* From f9ca2d8918979e8ef806a8b6b4635a79b4f40d1a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo <acme@redhat.com> Date: Thu, 5 Jun 2014 12:46:04 -0300 Subject: [PATCH 15/35] perf tools: Emit more precise message for missing glibc static library When the user does: make -C tools/perf LDFLAGS=-static asking for a static build, and the glibc-static (or equivalent) is not found, the message wasn't clear, stating that one of glibc-devel or glibc-static wasn't installed, clarify it checking if -static is present in LDFLAGS. Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Don Zickus <dzickus@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Link: http://lkml.kernel.org/n/tip-7e0sfobbzgeydzi9gsz8ss3m@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/config/Makefile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 4f100b54ba8b..f30ac5e5d271 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -299,7 +299,11 @@ else NO_LIBUNWIND := 1 NO_LIBDW_DWARF_UNWIND := 1 else - msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static); + ifneq ($(filter s% -static%,$(LDFLAGS),),) + msg := $(error No static glibc found, please install glibc-static); + else + msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]); + endif endif else ifndef NO_LIBDW_DWARF_UNWIND From a5c5009f72ced4b90f8715137784c0cd8b244544 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo <acme@redhat.com> Date: Thu, 5 Jun 2014 17:15:47 -0300 Subject: [PATCH 16/35] perf tests: Show the inner make output when an error happens Before: [acme@zoo linux]$ make -C tools/perf -f tests/make make_static make: Entering directory `/home/git/linux/tools/perf' - make_static: cd . && make -f Makefile DESTDIR=/tmp/tmp.JcWuM4Zu9f LDFLAGS=-static make: *** [make_static] Error 1 make: Leaving directory `/home/git/linux/tools/perf' [acme@zoo linux]$ After: [acme@zoo linux]$ make -C tools/perf -f tests/make make_static make: Entering directory `/home/git/linux/tools/perf' - make_static: cd . && make -f Makefile DESTDIR=/tmp/tmp.X3su83i14u LDFLAGS=-static cd . && make -f Makefile DESTDIR=/tmp/tmp.X3su83i14u LDFLAGS=-static BUILD: Doing 'make -j4' parallel build config/Makefile:303: *** No static glibc found, please install glibc-static. Stop. make[1]: *** [all] Error 2 test: test -x ./perf make: Leaving directory `/home/git/linux/tools/perf' [acme@zoo linux]$ Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Don Zickus <dzickus@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Link: http://lkml.kernel.org/n/tip-h4kby5wyp6nfev3882rzm3r9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/tests/make | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 2f92d6e7ee00..69a71ff84e01 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -205,8 +205,7 @@ $(run): ( eval $$cmd ) >> $@ 2>&1; \ echo " test: $(call test,$@)" >> $@ 2>&1; \ $(call test,$@) && \ - rm -f $@ \ - rm -rf $$TMP_DEST + rm -rf $@ $$TMP_DEST || (cat $@ ; false) $(run_O): $(call clean) @@ -217,9 +216,7 @@ $(run_O): ( eval $$cmd ) >> $@ 2>&1 && \ echo " test: $(call test_O,$@)" >> $@ 2>&1; \ $(call test_O,$@) && \ - rm -f $@ && \ - rm -rf $$TMP_O \ - rm -rf $$TMP_DEST + rm -rf $@ $$TMP_O $$TMP_DEST || (cat $@ ; false) tarpkg: @cmd="$(PERF)/tests/perf-targz-src-pkg $(PERF)"; \ From 36d789a4d75f3826faa6e75b018942b63ffed1a0 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Date: Fri, 6 Jun 2014 07:13:45 +0000 Subject: [PATCH 17/35] perf probe: Improve error message for unknown member of data structure Improve the error message if we can not find given member in the given structure. Currently perf probe shows a wrong error message as below. ----- # perf probe getname_flags:65 "result->BOGUS" result(type:filename) has no member BOGUS. Failed to find 'result' in this function. Error: Failed to add events. (-22) ----- The first message is correct, but the second one is not, since we didn't fail to find a variable but fails to find the member of given variable. ----- # perf probe getname_flags:65 "result->BOGUS" result(type:filename) has no member BOGUS. Error: Failed to add events. (-22) ----- With this patch, the error message shows only the first one. And if we really failed to find given variable, it tells us so. ----- # perf probe getname_flags:65 "BOGUS" Failed to find 'BOGUS' in this function. Error: Failed to add events. (-2) ----- Reported-by: Arnaldo Carvalho de Melo <acme@kernel.org> Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/20140606071345.6788.23744.stgit@kbuild-fedora.novalocal Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/util/probe-finder.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 9d8eb26f0533..ce8faf47691a 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -573,14 +573,13 @@ static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf) if (!die_find_variable_at(sc_die, pf->pvar->var, pf->addr, &vr_die)) { /* Search again in global variables */ if (!die_find_variable_at(&pf->cu_die, pf->pvar->var, 0, &vr_die)) + pr_warning("Failed to find '%s' in this function.\n", + pf->pvar->var); ret = -ENOENT; } if (ret >= 0) ret = convert_variable(&vr_die, pf); - if (ret < 0) - pr_warning("Failed to find '%s' in this function.\n", - pf->pvar->var); return ret; } From b4bf1130cdee7d5247bd3171530869809f5aca54 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Date: Fri, 6 Jun 2014 07:13:52 +0000 Subject: [PATCH 18/35] perf probe: Show error code and description in verbose mode Show error code and description only in verbose mode if 'perf probe' command failed. Current 'perf probe' shows error code with final error message, and that is meaningless for many users. This changes error messages to show the error code and its description only in verbose mode (-v option). Without this patch: ----- # perf probe -a do_execve@hoge Probe point 'do_execve@hoge' not found. Error: Failed to add events. (-2) ----- With this patch, normally the message doesn't show the misterious error number: ----- # perf probe -a do_execve@hoge Probe point 'do_execve@hoge' not found. Error: Failed to add events. ----- And in verbose mode, it also shows additional error messages as below: ----- # perf probe -va do_execve@hoge probe-definition(0): do_execve@hoge symbol:do_execve file:hoge line:0 offset:0 return:0 lazy:(null) 0 arguments Looking at the vmlinux_path (6 entries long) Using /lib/modules/3.15.0-rc8+/build/vmlinux for symbols Open Debuginfo file: /lib/modules/3.15.0-rc8+/build/vmlinux Try to find probe point from debuginfo. Probe point 'do_execve@hoge' not found. Error: Failed to add events. Reason: No such file or directory (Code: -2) ----- Reported-by: Arnaldo Carvalho de Melo <acme@kernel.org> Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/20140606071352.6788.76943.stgit@kbuild-fedora.novalocal Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/builtin-probe.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index cdcd4eb3a57d..c63fa2925075 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -288,6 +288,13 @@ static void cleanup_params(void) memset(¶ms, 0, sizeof(params)); } +static void pr_err_with_code(const char *msg, int err) +{ + pr_err("%s", msg); + pr_debug(" Reason: %s (Code: %d)", strerror(-err), err); + pr_err("\n"); +} + static int __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) { @@ -379,7 +386,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) } ret = parse_probe_event_argv(argc, argv); if (ret < 0) { - pr_err(" Error: Parse Error. (%d)\n", ret); + pr_err_with_code(" Error: Command Parse Error.", ret); return ret; } } @@ -419,8 +426,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) } ret = show_perf_probe_events(); if (ret < 0) - pr_err(" Error: Failed to show event list. (%d)\n", - ret); + pr_err_with_code(" Error: Failed to show event list.", ret); return ret; } if (params.show_funcs) { @@ -445,8 +451,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) strfilter__delete(params.filter); params.filter = NULL; if (ret < 0) - pr_err(" Error: Failed to show functions." - " (%d)\n", ret); + pr_err_with_code(" Error: Failed to show functions.", ret); return ret; } @@ -464,7 +469,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) ret = show_line_range(¶ms.line_range, params.target); if (ret < 0) - pr_err(" Error: Failed to show lines. (%d)\n", ret); + pr_err_with_code(" Error: Failed to show lines.", ret); return ret; } if (params.show_vars) { @@ -485,7 +490,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) strfilter__delete(params.filter); params.filter = NULL; if (ret < 0) - pr_err(" Error: Failed to show vars. (%d)\n", ret); + pr_err_with_code(" Error: Failed to show vars.", ret); return ret; } #endif @@ -493,7 +498,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) if (params.dellist) { ret = del_perf_probe_events(params.dellist); if (ret < 0) { - pr_err(" Error: Failed to delete events. (%d)\n", ret); + pr_err_with_code(" Error: Failed to delete events.", ret); return ret; } } @@ -504,7 +509,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) params.target, params.force_add); if (ret < 0) { - pr_err(" Error: Failed to add events. (%d)\n", ret); + pr_err_with_code(" Error: Failed to add events.", ret); return ret; } } From 69e96eaa4fef04ad543eda3eab787dbae99d8912 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Date: Fri, 6 Jun 2014 07:13:59 +0000 Subject: [PATCH 19/35] perf probe: Improve an error message of perf probe --vars mode Fix an error message when failed to find given address in --vars mode. Without this fix, perf probe -V doesn't show the final "Error" message if it fails to find given source line. Moreover, it tells it fails to find "variables" instead of the source line. ----- # perf probe -V foo@bar Failed to find variables at foo@bar (0) ----- The result also shows mysterious error code. Actually the error returns 0 or -ENOENT means that it just fails to find the address of given source line. (0 means there is no matching address, and -ENOENT means there is an entry(DIE) but it has no instance, e.g. an empty inlined function) This fixes it to show what happened and the final error message as below. ----- # perf probe -V foo@bar Failed to find the address of foo@bar Error: Failed to show vars. ----- Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/20140606071359.6788.84716.stgit@kbuild-fedora.novalocal Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/util/probe-event.c | 7 ++++++- tools/perf/util/probe-finder.c | 6 +++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 0d1542f33d87..44c71417262f 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -721,9 +721,14 @@ static int show_available_vars_at(struct debuginfo *dinfo, ret = debuginfo__find_available_vars_at(dinfo, pev, &vls, max_vls, externs); if (ret <= 0) { - pr_err("Failed to find variables at %s (%d)\n", buf, ret); + if (ret == 0 || ret == -ENOENT) { + pr_err("Failed to find the address of %s\n", buf); + ret = -ENOENT; + } else + pr_warning("Debuginfo analysis failed.\n"); goto end; } + /* Some variables are found */ fprintf(stdout, "Available variables at %s\n", buf); for (i = 0; i < ret; i++) { diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index ce8faf47691a..98e304766416 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -1280,7 +1280,11 @@ static int add_available_vars(Dwarf_Die *sc_die, struct probe_finder *pf) return ret; } -/* Find available variables at given probe point */ +/* + * Find available variables at given probe point + * Return the number of found probe points. Return 0 if there is no + * matched probe point. Return <0 if an error occurs. + */ int debuginfo__find_available_vars_at(struct debuginfo *dbg, struct perf_probe_event *pev, struct variable_list **vls, From 5ee05b8801892ecc5df44e03429008dfa89aa361 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Date: Fri, 6 Jun 2014 07:14:06 +0000 Subject: [PATCH 20/35] perf probe: Improve error messages in --line option Improve error messages of 'perf probe --line' mode. Currently 'perf probe' shows the "Debuginfo analysis failed" message with an error code when the given symbol is not found: ----- # perf probe -L page_cgroup_init_flatmem Debuginfo analysis failed. (-2) Error: Failed to show lines. ----- But -2 (-ENOENT) means that the given source line or function was not found. With this patch, 'perf probe' shows the correct error message: ----- # perf probe -L page_cgroup_init_flatmem Specified source line is not found. Error: Failed to show lines. ----- There is also another debug error code is shown in the same function after get_real_path(). This removes that too. Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/20140606071406.6788.47850.stgit@kbuild-fedora.novalocal Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/util/probe-event.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 44c71417262f..9a0a1839a377 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -628,11 +628,11 @@ static int __show_line_range(struct line_range *lr, const char *module) ret = debuginfo__find_line_range(dinfo, lr); debuginfo__delete(dinfo); - if (ret == 0) { + if (ret == 0 || ret == -ENOENT) { pr_warning("Specified source line is not found.\n"); return -ENOENT; } else if (ret < 0) { - pr_warning("Debuginfo analysis failed. (%d)\n", ret); + pr_warning("Debuginfo analysis failed.\n"); return ret; } @@ -641,7 +641,7 @@ static int __show_line_range(struct line_range *lr, const char *module) ret = get_real_path(tmp, lr->comp_dir, &lr->path); free(tmp); /* Free old path */ if (ret < 0) { - pr_warning("Failed to find source file. (%d)\n", ret); + pr_warning("Failed to find source file path.\n"); return ret; } From f48e00cead1f7574147e6bd0d203c8331714d35b Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev <stfomichev@yandex-team.ru> Date: Tue, 10 Jun 2014 13:52:16 +0400 Subject: [PATCH 21/35] perf timechart: Reflow documentation Move options away from examples. Signed-off-by: Stanislav Fomichev <stfomichev@yandex-team.ru> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Namhyung Kim <namhyung@gmail.com> Cc: Ramkumar Ramachandra <artagnon@gmail.com> Link: http://lkml.kernel.org/r/20140610095216.GO26511@stfomichev-desktop.yandex.net Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/Documentation/perf-timechart.txt | 41 ++++++++++----------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/tools/perf/Documentation/perf-timechart.txt b/tools/perf/Documentation/perf-timechart.txt index bc5990c33dc0..5e0f986dff38 100644 --- a/tools/perf/Documentation/perf-timechart.txt +++ b/tools/perf/Documentation/perf-timechart.txt @@ -43,27 +43,6 @@ TIMECHART OPTIONS --symfs=<directory>:: Look for files with symbols relative to this directory. - -EXAMPLES --------- - -$ perf timechart record git pull - - [ perf record: Woken up 13 times to write data ] - [ perf record: Captured and wrote 4.253 MB perf.data (~185801 samples) ] - -$ perf timechart - - Written 10.2 seconds of trace to output.svg. - -Record system-wide timechart: - - $ perf timechart record - - then generate timechart and highlight 'gcc' tasks: - - $ perf timechart --highlight gcc - -n:: --proc-num:: Print task info for at least given number of tasks. @@ -88,6 +67,26 @@ RECORD OPTIONS --callchain:: Do call-graph (stack chain/backtrace) recording +EXAMPLES +-------- + +$ perf timechart record git pull + + [ perf record: Woken up 13 times to write data ] + [ perf record: Captured and wrote 4.253 MB perf.data (~185801 samples) ] + +$ perf timechart + + Written 10.2 seconds of trace to output.svg. + +Record system-wide timechart: + + $ perf timechart record + + then generate timechart and highlight 'gcc' tasks: + + $ perf timechart --highlight gcc + SEE ALSO -------- linkperf:perf-record[1] From 17314e2385c6627fcab4b8f97bd6668bb63495c0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim <namhyung@kernel.org> Date: Mon, 9 Jun 2014 14:43:37 +0900 Subject: [PATCH 22/35] perf record: Fix to honor user freq/interval properly When configuring event perf checked a wrong condition that user specified both of freq (-F) and period (-c) or the event has no default value. This worked because most of events don't have default value and only tracepoint events have default of 1 (and it's not desirable to change it for those events). However, Andi's downloadable event patch changes the situation so it cannot change the value for those events. Fix it by allowing override the default value if user gives one of the options. $ perf record -a -e uops_retired.all -F 4000 sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.325 MB perf.data (~14185 samples) ] $ perf evlist -F cpu/uops_retired.all/: sample_freq=4000 Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Andi Kleen <andi@firstfloor.org> Cc: Frederic Weisbecker <fweisbec@gmail.com> Link: http://lkml.kernel.org/r/1402292617-26278-1-git-send-email-namhyung@kernel.org Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- tools/perf/util/evsel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 21154dabc5fa..8606175fe1e8 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -589,10 +589,10 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) } /* - * We default some events to a 1 default interval. But keep + * We default some events to have a default interval. But keep * it a weak assumption overridable by the user. */ - if (!attr->sample_period || (opts->user_freq != UINT_MAX && + if (!attr->sample_period || (opts->user_freq != UINT_MAX || opts->user_interval != ULLONG_MAX)) { if (opts->freq) { perf_evsel__set_sample_bit(evsel, PERIOD); From 0c4e774fad0202b91dea8d99c04e9bdf2c2c6647 Mon Sep 17 00:00:00 2001 From: Jiri Olsa <jolsa@kernel.org> Date: Thu, 17 Apr 2014 19:39:10 +0200 Subject: [PATCH 23/35] perf tools: Cache register accesses for unwind processing Caching registers value into an array. Got about 4% speed up of perf_reg_value function for report command processing dwarf unwind stacks. Output from report over 1.5 GB data with DWARF unwind stacks: (TODO fix perf diff) current code: 5.84% perf perf [.] perf_reg_value change: 1.94% perf perf [.] perf_reg_value And little bit of overall speed up: (perf stat -r 5 -e '{cycles,instructions}:u' ...) current code: 310,298,611,754 cycles ( +- 0.33% ) 439,669,689,341 instructions ( +- 0.03% ) 188.656753166 seconds time elapsed ( +- 0.82% ) change: 291,315,329,878 cycles ( +- 0.22% ) 391,763,485,304 instructions ( +- 0.03% ) 180.742249687 seconds time elapsed ( +- 0.64% ) Acked-by: Namhyung Kim <namhyung@kernel.org> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com> Cc: David Ahern <dsahern@gmail.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jean Pihet <jean.pihet@linaro.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/1401892622-30848-2-git-send-email-jolsa@kernel.org Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- tools/perf/util/event.h | 5 +++++ tools/perf/util/perf_regs.c | 10 +++++++++- tools/perf/util/perf_regs.h | 4 +++- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 9ba2eb3bdcfd..e5dd40addb30 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -7,6 +7,7 @@ #include "../perf.h" #include "map.h" #include "build-id.h" +#include "perf_regs.h" struct mmap_event { struct perf_event_header header; @@ -89,6 +90,10 @@ struct regs_dump { u64 abi; u64 mask; u64 *regs; + + /* Cached values/mask filled by first register access. */ + u64 cache_regs[PERF_REGS_MAX]; + u64 cache_mask; }; struct stack_dump { diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index a3539ef30b15..43168fb0d9a2 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -1,11 +1,15 @@ #include <errno.h> #include "perf_regs.h" +#include "event.h" int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) { int i, idx = 0; u64 mask = regs->mask; + if (regs->cache_mask & (1 << id)) + goto out; + if (!(mask & (1 << id))) return -EINVAL; @@ -14,6 +18,10 @@ int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) idx++; } - *valp = regs->regs[idx]; + regs->cache_mask |= (1 << id); + regs->cache_regs[id] = regs->regs[idx]; + +out: + *valp = regs->cache_regs[id]; return 0; } diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index 79c78f74e0cf..980dbf76bc98 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h @@ -2,7 +2,8 @@ #define __PERF_REGS_H #include <linux/types.h> -#include "event.h" + +struct regs_dump; #ifdef HAVE_PERF_REGS_SUPPORT #include <perf_regs.h> @@ -11,6 +12,7 @@ int perf_reg_value(u64 *valp, struct regs_dump *regs, int id); #else #define PERF_REGS_MASK 0 +#define PERF_REGS_MAX 0 static inline const char *perf_reg_name(int id __maybe_unused) { From ca40e2af1f75eddf7eb2b93fde6391ea185d8fc8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa <jolsa@kernel.org> Date: Wed, 7 May 2014 18:30:45 +0200 Subject: [PATCH 24/35] perf tools: Separate dso data related variables Add separated structure/namespace for data related variables. We are going to add mode of them, so this way they will be clearly separated. Acked-by: Namhyung Kim <namhyung@kernel.org> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com> Cc: David Ahern <dsahern@gmail.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jean Pihet <jean.pihet@linaro.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/1401892622-30848-3-git-send-email-jolsa@kernel.org Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- tools/perf/util/dso.c | 8 ++++---- tools/perf/util/dso.h | 7 ++++++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 64453d63b971..1c3cdaf228c1 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -292,7 +292,7 @@ dso_cache__read(struct dso *dso, struct machine *machine, cache->offset = cache_offset; cache->size = ret; - dso_cache__insert(&dso->cache, cache); + dso_cache__insert(&dso->data.cache, cache); ret = dso_cache__memcpy(cache, offset, data, size); @@ -310,7 +310,7 @@ static ssize_t dso_cache_read(struct dso *dso, struct machine *machine, { struct dso_cache *cache; - cache = dso_cache__find(&dso->cache, offset); + cache = dso_cache__find(&dso->data.cache, offset); if (cache) return dso_cache__memcpy(cache, offset, data, size); else @@ -473,7 +473,7 @@ struct dso *dso__new(const char *name) dso__set_short_name(dso, dso->name, false); for (i = 0; i < MAP__NR_TYPES; ++i) dso->symbols[i] = dso->symbol_names[i] = RB_ROOT; - dso->cache = RB_ROOT; + dso->data.cache = RB_ROOT; dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND; dso->binary_type = DSO_BINARY_TYPE__NOT_FOUND; dso->loaded = 0; @@ -506,7 +506,7 @@ void dso__delete(struct dso *dso) dso->long_name_allocated = false; } - dso_cache__free(&dso->cache); + dso_cache__free(&dso->data.cache); dso__free_a2l(dso); zfree(&dso->symsrc_filename); free(dso); diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 38efe95a7fdd..7637fdd680b2 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -76,7 +76,6 @@ struct dso { struct list_head node; struct rb_root symbols[MAP__NR_TYPES]; struct rb_root symbol_names[MAP__NR_TYPES]; - struct rb_root cache; void *a2l; char *symsrc_filename; unsigned int a2l_fails; @@ -99,6 +98,12 @@ struct dso { const char *long_name; u16 long_name_len; u16 short_name_len; + + /* dso data file */ + struct { + struct rb_root cache; + } data; + char name[0]; }; From 53fa8eaa093ad87eb59379de059e76d735a5de45 Mon Sep 17 00:00:00 2001 From: Jiri Olsa <jolsa@kernel.org> Date: Mon, 28 Apr 2014 16:43:43 +0200 Subject: [PATCH 25/35] perf tools: Add data_fd into dso object Adding data_fd into dso object so we could handle caching of opened dso file data descriptors coming int next patches. Adding dso__data_close interface to keep the data_fd updated when the descriptor is closed. Acked-by: Namhyung Kim <namhyung@kernel.org> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com> Cc: David Ahern <dsahern@gmail.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jean Pihet <jean.pihet@linaro.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/1401892622-30848-4-git-send-email-jolsa@kernel.org Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- tools/perf/util/dso.c | 23 +++++++++++++++++++---- tools/perf/util/dso.h | 3 +++ tools/perf/util/unwind-libunwind.c | 4 ++-- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 1c3cdaf228c1..5acb4b8b35d7 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -159,6 +159,14 @@ static int open_dso(struct dso *dso, struct machine *machine) return fd; } +void dso__data_close(struct dso *dso) +{ + if (dso->data.fd >= 0) { + close(dso->data.fd); + dso->data.fd = -1; + } +} + int dso__data_fd(struct dso *dso, struct machine *machine) { enum dso_binary_type binary_type_data[] = { @@ -168,8 +176,13 @@ int dso__data_fd(struct dso *dso, struct machine *machine) }; int i = 0; - if (dso->binary_type != DSO_BINARY_TYPE__NOT_FOUND) - return open_dso(dso, machine); + if (dso->data.fd >= 0) + return dso->data.fd; + + if (dso->binary_type != DSO_BINARY_TYPE__NOT_FOUND) { + dso->data.fd = open_dso(dso, machine); + return dso->data.fd; + } do { int fd; @@ -178,7 +191,7 @@ int dso__data_fd(struct dso *dso, struct machine *machine) fd = open_dso(dso, machine); if (fd >= 0) - return fd; + return dso->data.fd = fd; } while (dso->binary_type != DSO_BINARY_TYPE__NOT_FOUND); @@ -301,7 +314,7 @@ dso_cache__read(struct dso *dso, struct machine *machine, if (ret <= 0) free(cache); - close(fd); + dso__data_close(dso); return ret; } @@ -474,6 +487,7 @@ struct dso *dso__new(const char *name) for (i = 0; i < MAP__NR_TYPES; ++i) dso->symbols[i] = dso->symbol_names[i] = RB_ROOT; dso->data.cache = RB_ROOT; + dso->data.fd = -1; dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND; dso->binary_type = DSO_BINARY_TYPE__NOT_FOUND; dso->loaded = 0; @@ -506,6 +520,7 @@ void dso__delete(struct dso *dso) dso->long_name_allocated = false; } + dso__data_close(dso); dso_cache__free(&dso->data.cache); dso__free_a2l(dso); zfree(&dso->symsrc_filename); diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 7637fdd680b2..e48dcf59b570 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -102,6 +102,7 @@ struct dso { /* dso data file */ struct { struct rb_root cache; + int fd; } data; char name[0]; @@ -147,6 +148,8 @@ int dso__read_binary_type_filename(const struct dso *dso, enum dso_binary_type t char *root_dir, char *filename, size_t size); int dso__data_fd(struct dso *dso, struct machine *machine); +void dso__data_close(struct dso *dso); + ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine, u64 offset, u8 *data, ssize_t size); ssize_t dso__data_read_addr(struct dso *dso, struct map *map, diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index bd5768d74f01..4f8dd9ee3899 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -250,7 +250,7 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine, /* Check the .eh_frame section for unwinding info */ offset = elf_section_offset(fd, ".eh_frame_hdr"); - close(fd); + dso__data_close(dso); if (offset) ret = unwind_spec_ehframe(dso, machine, offset, @@ -271,7 +271,7 @@ static int read_unwind_spec_debug_frame(struct dso *dso, /* Check the .debug_frame section for unwinding info */ *offset = elf_section_offset(fd, ".debug_frame"); - close(fd); + dso__data_close(dso); if (*offset) return 0; From eba5102d2f0b4117edd089f2d882d9386025c829 Mon Sep 17 00:00:00 2001 From: Jiri Olsa <jolsa@kernel.org> Date: Wed, 30 Apr 2014 15:00:59 +0200 Subject: [PATCH 26/35] perf tools: Add global list of opened dso objects Adding global list of opened dso objects, so we can track them and use the list for caching dso data file descriptors. Acked-by: Namhyung Kim <namhyung@kernel.org> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com> Cc: David Ahern <dsahern@gmail.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jean Pihet <jean.pihet@linaro.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/1401892622-30848-5-git-send-email-jolsa@kernel.org Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- tools/perf/util/dso.c | 41 +++++++++++++++++++++++++++++++++++++++-- tools/perf/util/dso.h | 1 + 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 5acb4b8b35d7..5d7c7bcc6276 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -136,7 +136,22 @@ int dso__read_binary_type_filename(const struct dso *dso, return ret; } -static int open_dso(struct dso *dso, struct machine *machine) +/* + * Global list of open DSOs. + */ +static LIST_HEAD(dso__data_open); + +static void dso__list_add(struct dso *dso) +{ + list_add_tail(&dso->data.open_entry, &dso__data_open); +} + +static void dso__list_del(struct dso *dso) +{ + list_del(&dso->data.open_entry); +} + +static int __open_dso(struct dso *dso, struct machine *machine) { int fd; char *root_dir = (char *)""; @@ -159,14 +174,35 @@ static int open_dso(struct dso *dso, struct machine *machine) return fd; } -void dso__data_close(struct dso *dso) +static int open_dso(struct dso *dso, struct machine *machine) +{ + int fd = __open_dso(dso, machine); + + if (fd > 0) + dso__list_add(dso); + + return fd; +} + +static void close_data_fd(struct dso *dso) { if (dso->data.fd >= 0) { close(dso->data.fd); dso->data.fd = -1; + dso__list_del(dso); } } +static void close_dso(struct dso *dso) +{ + close_data_fd(dso); +} + +void dso__data_close(struct dso *dso) +{ + close_dso(dso); +} + int dso__data_fd(struct dso *dso, struct machine *machine) { enum dso_binary_type binary_type_data[] = { @@ -499,6 +535,7 @@ struct dso *dso__new(const char *name) dso->kernel = DSO_TYPE_USER; dso->needs_swap = DSO_SWAP__UNSET; INIT_LIST_HEAD(&dso->node); + INIT_LIST_HEAD(&dso->data.open_entry); } return dso; diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index e48dcf59b570..90988bf06641 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -103,6 +103,7 @@ struct dso { struct { struct rb_root cache; int fd; + struct list_head open_entry; } data; char name[0]; From bda6ee4a94d1e1be0c1428d37bc0d3da2e5793ad Mon Sep 17 00:00:00 2001 From: Jiri Olsa <jolsa@kernel.org> Date: Wed, 30 Apr 2014 15:25:10 +0200 Subject: [PATCH 27/35] perf tools: Add global count of opened dso objects Adding global count of opened dso objects so we could properly limit the number of opened dso data file descriptors. Acked-by: Namhyung Kim <namhyung@kernel.org> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com> Cc: David Ahern <dsahern@gmail.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jean Pihet <jean.pihet@linaro.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/1401892622-30848-6-git-send-email-jolsa@kernel.org Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- tools/perf/util/dso.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 5d7c7bcc6276..76e5c13afc8f 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1,3 +1,4 @@ +#include <asm/bug.h> #include "symbol.h" #include "dso.h" #include "machine.h" @@ -137,18 +138,23 @@ int dso__read_binary_type_filename(const struct dso *dso, } /* - * Global list of open DSOs. + * Global list of open DSOs and the counter. */ static LIST_HEAD(dso__data_open); +static long dso__data_open_cnt; static void dso__list_add(struct dso *dso) { list_add_tail(&dso->data.open_entry, &dso__data_open); + dso__data_open_cnt++; } static void dso__list_del(struct dso *dso) { list_del(&dso->data.open_entry); + WARN_ONCE(dso__data_open_cnt <= 0, + "DSO data fd counter out of bounds."); + dso__data_open_cnt--; } static int __open_dso(struct dso *dso, struct machine *machine) From c658045197814b7d762662f9aa9f652379121a03 Mon Sep 17 00:00:00 2001 From: Jiri Olsa <jolsa@kernel.org> Date: Wed, 30 Apr 2014 15:47:27 +0200 Subject: [PATCH 28/35] perf tools: Cache dso data file descriptor Caching dso data file descriptors to avoid expensive re-opens especially during DWARF unwind. We keep dsos data file descriptors open until their count reaches the half of the current fd open limit (RLIMIT_NOFILE). In this case we close file descriptor of the first opened dso object. We've got overall speedup (~27% for my workload) of report: 'perf report --stdio -i perf-test.data' (3 runs) (perf-test.data size was around 12GB) current code: 545,640,944,228 cycles ( +- 0.53% ) 785,255,798,320 instructions ( +- 0.03% ) 366.340910010 seconds time elapsed ( +- 3.65% ) after change: 435,895,036,114 cycles ( +- 0.26% ) 636,790,271,176 instructions ( +- 0.04% ) 266.481463387 seconds time elapsed ( +- 0.13% ) Acked-by: Namhyung Kim <namhyung@kernel.org> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com> Cc: David Ahern <dsahern@gmail.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jean Pihet <jean.pihet@linaro.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/1401892622-30848-7-git-send-email-jolsa@kernel.org Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- tools/perf/util/dso.c | 61 +++++++++++++++++++++++++++++- tools/perf/util/unwind-libunwind.c | 2 - 2 files changed, 59 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 76e5c13afc8f..fbf6cc98b8a9 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1,4 +1,6 @@ #include <asm/bug.h> +#include <sys/time.h> +#include <sys/resource.h> #include "symbol.h" #include "dso.h" #include "machine.h" @@ -180,12 +182,20 @@ static int __open_dso(struct dso *dso, struct machine *machine) return fd; } +static void check_data_close(void); + static int open_dso(struct dso *dso, struct machine *machine) { int fd = __open_dso(dso, machine); - if (fd > 0) + if (fd > 0) { dso__list_add(dso); + /* + * Check if we crossed the allowed number + * of opened DSOs and close one if needed. + */ + check_data_close(); + } return fd; } @@ -204,6 +214,54 @@ static void close_dso(struct dso *dso) close_data_fd(dso); } +static void close_first_dso(void) +{ + struct dso *dso; + + dso = list_first_entry(&dso__data_open, struct dso, data.open_entry); + close_dso(dso); +} + +static rlim_t get_fd_limit(void) +{ + struct rlimit l; + rlim_t limit = 0; + + /* Allow half of the current open fd limit. */ + if (getrlimit(RLIMIT_NOFILE, &l) == 0) { + if (l.rlim_cur == RLIM_INFINITY) + limit = l.rlim_cur; + else + limit = l.rlim_cur / 2; + } else { + pr_err("failed to get fd limit\n"); + limit = 1; + } + + return limit; +} + +static bool may_cache_fd(void) +{ + static rlim_t limit; + + if (!limit) + limit = get_fd_limit(); + + if (limit == RLIM_INFINITY) + return true; + + return limit > (rlim_t) dso__data_open_cnt; +} + +static void check_data_close(void) +{ + bool cache_fd = may_cache_fd(); + + if (!cache_fd) + close_first_dso(); +} + void dso__data_close(struct dso *dso) { close_dso(dso); @@ -356,7 +414,6 @@ dso_cache__read(struct dso *dso, struct machine *machine, if (ret <= 0) free(cache); - dso__data_close(dso); return ret; } diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index 4f8dd9ee3899..25578b98f5c5 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -250,7 +250,6 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine, /* Check the .eh_frame section for unwinding info */ offset = elf_section_offset(fd, ".eh_frame_hdr"); - dso__data_close(dso); if (offset) ret = unwind_spec_ehframe(dso, machine, offset, @@ -271,7 +270,6 @@ static int read_unwind_spec_debug_frame(struct dso *dso, /* Check the .debug_frame section for unwinding info */ *offset = elf_section_offset(fd, ".debug_frame"); - dso__data_close(dso); if (*offset) return 0; From c3fbd2a606c5f88de0079b027727a1fb0ae27b65 Mon Sep 17 00:00:00 2001 From: Jiri Olsa <jolsa@kernel.org> Date: Wed, 7 May 2014 18:51:41 +0200 Subject: [PATCH 29/35] perf tools: Add file size check and factor dso__data_read_offset Adding file size check, because the lseek will succeed for any offset behind file size and thus succeed when it was expected to fail. Factoring the code to check the offset against file size earlier in the flow. Acked-by: Namhyung Kim <namhyung@kernel.org> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com> Cc: David Ahern <dsahern@gmail.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jean Pihet <jean.pihet@linaro.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/1401892622-30848-8-git-send-email-jolsa@kernel.org Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- tools/perf/util/dso.c | 64 +++++++++++++++++++++++++++++++++---------- tools/perf/util/dso.h | 1 + 2 files changed, 50 insertions(+), 15 deletions(-) diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index fbf6cc98b8a9..db634383156c 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -205,6 +205,7 @@ static void close_data_fd(struct dso *dso) if (dso->data.fd >= 0) { close(dso->data.fd); dso->data.fd = -1; + dso->data.file_size = 0; dso__list_del(dso); } } @@ -373,16 +374,10 @@ dso_cache__memcpy(struct dso_cache *cache, u64 offset, } static ssize_t -dso_cache__read(struct dso *dso, struct machine *machine, - u64 offset, u8 *data, ssize_t size) +dso_cache__read(struct dso *dso, u64 offset, u8 *data, ssize_t size) { struct dso_cache *cache; ssize_t ret; - int fd; - - fd = dso__data_fd(dso, machine); - if (fd < 0) - return -1; do { u64 cache_offset; @@ -396,10 +391,10 @@ dso_cache__read(struct dso *dso, struct machine *machine, cache_offset = offset & DSO__DATA_CACHE_MASK; ret = -EINVAL; - if (-1 == lseek(fd, cache_offset, SEEK_SET)) + if (-1 == lseek(dso->data.fd, cache_offset, SEEK_SET)) break; - ret = read(fd, cache->data, DSO__DATA_CACHE_SIZE); + ret = read(dso->data.fd, cache->data, DSO__DATA_CACHE_SIZE); if (ret <= 0) break; @@ -417,8 +412,8 @@ dso_cache__read(struct dso *dso, struct machine *machine, return ret; } -static ssize_t dso_cache_read(struct dso *dso, struct machine *machine, - u64 offset, u8 *data, ssize_t size) +static ssize_t dso_cache_read(struct dso *dso, u64 offset, + u8 *data, ssize_t size) { struct dso_cache *cache; @@ -426,11 +421,10 @@ static ssize_t dso_cache_read(struct dso *dso, struct machine *machine, if (cache) return dso_cache__memcpy(cache, offset, data, size); else - return dso_cache__read(dso, machine, offset, data, size); + return dso_cache__read(dso, offset, data, size); } -ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine, - u64 offset, u8 *data, ssize_t size) +static ssize_t cached_read(struct dso *dso, u64 offset, u8 *data, ssize_t size) { ssize_t r = 0; u8 *p = data; @@ -438,7 +432,7 @@ ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine, do { ssize_t ret; - ret = dso_cache_read(dso, machine, offset, p, size); + ret = dso_cache_read(dso, offset, p, size); if (ret < 0) return ret; @@ -458,6 +452,46 @@ ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine, return r; } +static int data_file_size(struct dso *dso) +{ + struct stat st; + + if (!dso->data.file_size) { + if (fstat(dso->data.fd, &st)) { + pr_err("dso mmap failed, fstat: %s\n", strerror(errno)); + return -1; + } + dso->data.file_size = st.st_size; + } + + return 0; +} + +static ssize_t data_read_offset(struct dso *dso, u64 offset, + u8 *data, ssize_t size) +{ + if (data_file_size(dso)) + return -1; + + /* Check the offset sanity. */ + if (offset > dso->data.file_size) + return -1; + + if (offset + size < offset) + return -1; + + return cached_read(dso, offset, data, size); +} + +ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine, + u64 offset, u8 *data, ssize_t size) +{ + if (dso__data_fd(dso, machine) < 0) + return -1; + + return data_read_offset(dso, offset, data, size); +} + ssize_t dso__data_read_addr(struct dso *dso, struct map *map, struct machine *machine, u64 addr, u8 *data, ssize_t size) diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 90988bf06641..da47b13595f3 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -103,6 +103,7 @@ struct dso { struct { struct rb_root cache; int fd; + size_t file_size; struct list_head open_entry; } data; From a08cae03f430b971afa508a32662dc476d42d8cb Mon Sep 17 00:00:00 2001 From: Jiri Olsa <jolsa@kernel.org> Date: Wed, 7 May 2014 21:35:02 +0200 Subject: [PATCH 30/35] perf tools: Allow to close dso fd in case of open failure Adding do_open function that tries to close opened dso objects in case we fail to open the dso due to to crossing the allowed RLIMIT_NOFILE limit. Acked-by: Namhyung Kim <namhyung@kernel.org> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com> Cc: David Ahern <dsahern@gmail.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jean Pihet <jean.pihet@linaro.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/1401892622-30848-9-git-send-email-jolsa@kernel.org Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- tools/perf/util/dso.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index db634383156c..c30752c7eebb 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -159,6 +159,27 @@ static void dso__list_del(struct dso *dso) dso__data_open_cnt--; } +static void close_first_dso(void); + +static int do_open(char *name) +{ + int fd; + + do { + fd = open(name, O_RDONLY); + if (fd >= 0) + return fd; + + pr_debug("dso open failed, mmap: %s\n", strerror(errno)); + if (!dso__data_open_cnt || errno != EMFILE) + break; + + close_first_dso(); + } while (1); + + return -1; +} + static int __open_dso(struct dso *dso, struct machine *machine) { int fd; @@ -177,7 +198,7 @@ static int __open_dso(struct dso *dso, struct machine *machine) return -EINVAL; } - fd = open(name, O_RDONLY); + fd = do_open(name); free(name); return fd; } From c1f9aa0a61bde512a68060883d1c3c1955a546ea Mon Sep 17 00:00:00 2001 From: Jiri Olsa <jolsa@kernel.org> Date: Wed, 7 May 2014 21:09:59 +0200 Subject: [PATCH 31/35] perf tools: Add dso__data_* interface descriptons Adding descriptions/explanations for dso__data_* interface functions. Acked-by: Namhyung Kim <namhyung@kernel.org> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com> Cc: David Ahern <dsahern@gmail.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jean Pihet <jean.pihet@linaro.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/1401892622-30848-10-git-send-email-jolsa@kernel.org Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- tools/perf/util/dso.c | 59 +++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/dso.h | 38 ++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+) diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index c30752c7eebb..819f10414f08 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -205,6 +205,13 @@ static int __open_dso(struct dso *dso, struct machine *machine) static void check_data_close(void); +/** + * dso_close - Open DSO data file + * @dso: dso object + * + * Open @dso's data file descriptor and updates + * list/count of open DSO objects. + */ static int open_dso(struct dso *dso, struct machine *machine) { int fd = __open_dso(dso, machine); @@ -231,6 +238,13 @@ static void close_data_fd(struct dso *dso) } } +/** + * dso_close - Close DSO data file + * @dso: dso object + * + * Close @dso's data file descriptor and updates + * list/count of open DSO objects. + */ static void close_dso(struct dso *dso) { close_data_fd(dso); @@ -276,6 +290,11 @@ static bool may_cache_fd(void) return limit > (rlim_t) dso__data_open_cnt; } +/* + * Check and close LRU dso if we crossed allowed limit + * for opened dso file descriptors. The limit is half + * of the RLIMIT_NOFILE files opened. +*/ static void check_data_close(void) { bool cache_fd = may_cache_fd(); @@ -284,11 +303,25 @@ static void check_data_close(void) close_first_dso(); } +/** + * dso__data_close - Close DSO data file + * @dso: dso object + * + * External interface to close @dso's data file descriptor. + */ void dso__data_close(struct dso *dso) { close_dso(dso); } +/** + * dso__data_fd - Get dso's data file descriptor + * @dso: dso object + * @machine: machine object + * + * External interface to find dso's file, open it and + * returns file descriptor. + */ int dso__data_fd(struct dso *dso, struct machine *machine) { enum dso_binary_type binary_type_data[] = { @@ -445,6 +478,11 @@ static ssize_t dso_cache_read(struct dso *dso, u64 offset, return dso_cache__read(dso, offset, data, size); } +/* + * Reads and caches dso data DSO__DATA_CACHE_SIZE size chunks + * in the rb_tree. Any read to already cached data is served + * by cached data. + */ static ssize_t cached_read(struct dso *dso, u64 offset, u8 *data, ssize_t size) { ssize_t r = 0; @@ -504,6 +542,17 @@ static ssize_t data_read_offset(struct dso *dso, u64 offset, return cached_read(dso, offset, data, size); } +/** + * dso__data_read_offset - Read data from dso file offset + * @dso: dso object + * @machine: machine object + * @offset: file offset + * @data: buffer to store data + * @size: size of the @data buffer + * + * External interface to read data from dso file offset. Open + * dso data file and use cached_read to get the data. + */ ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine, u64 offset, u8 *data, ssize_t size) { @@ -513,6 +562,16 @@ ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine, return data_read_offset(dso, offset, data, size); } +/** + * dso__data_read_addr - Read data from dso address + * @dso: dso object + * @machine: machine object + * @add: virtual memory address + * @data: buffer to store data + * @size: size of the @data buffer + * + * External interface to read data from dso address. + */ ssize_t dso__data_read_addr(struct dso *dso, struct map *map, struct machine *machine, u64 addr, u8 *data, ssize_t size) diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index da47b13595f3..ad553ba257bf 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -149,6 +149,44 @@ char dso__symtab_origin(const struct dso *dso); int dso__read_binary_type_filename(const struct dso *dso, enum dso_binary_type type, char *root_dir, char *filename, size_t size); +/* + * The dso__data_* external interface provides following functions: + * dso__data_fd + * dso__data_close + * dso__data_read_offset + * dso__data_read_addr + * + * Please refer to the dso.c object code for each function and + * arguments documentation. Following text tries to explain the + * dso file descriptor caching. + * + * The dso__data* interface allows caching of opened file descriptors + * to speed up the dso data accesses. The idea is to leave the file + * descriptor opened ideally for the whole life of the dso object. + * + * The current usage of the dso__data_* interface is as follows: + * + * Get DSO's fd: + * int fd = dso__data_fd(dso, machine); + * USE 'fd' SOMEHOW + * + * Read DSO's data: + * n = dso__data_read_offset(dso_0, &machine, 0, buf, BUFSIZE); + * n = dso__data_read_addr(dso_0, &machine, 0, buf, BUFSIZE); + * + * Eventually close DSO's fd: + * dso__data_close(dso); + * + * It is not necessary to close the DSO object data file. Each time new + * DSO data file is opened, the limit (RLIMIT_NOFILE/2) is checked. Once + * it is crossed, the oldest opened DSO object is closed. + * + * The dso__delete function calls close_dso function to ensure the + * data file descriptor gets closed/unmapped before the dso object + * is freed. + * + * TODO +*/ int dso__data_fd(struct dso *dso, struct machine *machine); void dso__data_close(struct dso *dso); From 0d8a5faaf5a1087c7212a6f0d81920a93396414a Mon Sep 17 00:00:00 2001 From: Jiri Olsa <jolsa@kernel.org> Date: Sat, 10 May 2014 17:22:30 +0200 Subject: [PATCH 32/35] perf tests: Spawn child for each test In upcoming tests we will setup process limits, which might affect other tests. Spawning child for each test to prevent this. Acked-by: Namhyung Kim <namhyung@kernel.org> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com> Cc: David Ahern <dsahern@gmail.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jean Pihet <jean.pihet@linaro.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Reviewed-by: David Ahern <dsahern@gmail.com> Link: http://lkml.kernel.org/r/1401892622-30848-11-git-send-email-jolsa@kernel.org Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- tools/perf/tests/builtin-test.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 802e3cd50f6f..9677a5c6a366 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -3,6 +3,8 @@ * * Builtin regression testing command: ever growing number of sanity tests */ +#include <unistd.h> +#include <string.h> #include "builtin.h" #include "intlist.h" #include "tests.h" @@ -172,6 +174,34 @@ static bool perf_test__matches(int curr, int argc, const char *argv[]) return false; } +static int run_test(struct test *test) +{ + int status, err = -1, child = fork(); + + if (child < 0) { + pr_err("failed to fork test: %s\n", strerror(errno)); + return -1; + } + + if (!child) { + pr_debug("test child forked, pid %d\n", getpid()); + err = test->func(); + exit(err); + } + + wait(&status); + + if (WIFEXITED(status)) { + err = WEXITSTATUS(status); + pr_debug("test child finished with %d\n", err); + } else if (WIFSIGNALED(status)) { + err = -1; + pr_debug("test child interrupted\n"); + } + + return err; +} + static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) { int i = 0; @@ -200,7 +230,7 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) } pr_debug("\n--- start ---\n"); - err = tests[curr].func(); + err = run_test(&tests[curr]); pr_debug("---- end ----\n%s:", tests[curr].desc); switch (err) { From 822c45db6398a69879b0539f0819de02b813493c Mon Sep 17 00:00:00 2001 From: Jiri Olsa <jolsa@kernel.org> Date: Sun, 4 May 2014 13:51:46 +0200 Subject: [PATCH 33/35] perf tests: Allow reuse of test_file function Making the test_file function to be reusable for new tests coming in following patches. Also changing the template name of temp files to "/tmp/perf-test-XXXXXX" to easily identify & blame. Acked-by: Namhyung Kim <namhyung@kernel.org> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com> Cc: David Ahern <dsahern@gmail.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jean Pihet <jean.pihet@linaro.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/1401892622-30848-12-git-send-email-jolsa@kernel.org Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- tools/perf/tests/dso-data.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c index 3e6cb171e3d3..738438196d9a 100644 --- a/tools/perf/tests/dso-data.c +++ b/tools/perf/tests/dso-data.c @@ -12,11 +12,15 @@ static char *test_file(int size) { - static char buf_templ[] = "/tmp/test-XXXXXX"; +#define TEMPL "/tmp/perf-test-XXXXXX" + static char buf_templ[sizeof(TEMPL)]; char *templ = buf_templ; int fd, i; unsigned char *buf; + strcpy(buf_templ, TEMPL); +#undef TEMPL + fd = mkstemp(templ); if (fd < 0) { perror("mkstemp failed"); From 4ebbcb84b19b8472fb5b9c8be89b3d0ea17c902e Mon Sep 17 00:00:00 2001 From: Jiri Olsa <jolsa@kernel.org> Date: Mon, 12 May 2014 14:43:53 +0200 Subject: [PATCH 34/35] perf tests: Add test for caching dso file descriptors Adding test that setup test_dso_data__fd_limit and test dso data file descriptors are cached appropriately. Acked-by: Namhyung Kim <namhyung@kernel.org> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com> Cc: David Ahern <dsahern@gmail.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jean Pihet <jean.pihet@linaro.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/1401892622-30848-13-git-send-email-jolsa@kernel.org Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- tools/perf/tests/builtin-test.c | 6 +- tools/perf/tests/dso-data.c | 135 +++++++++++++++++++++++++++++++- tools/perf/tests/tests.h | 1 + 3 files changed, 138 insertions(+), 4 deletions(-) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 9677a5c6a366..b8a63583566f 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -52,9 +52,13 @@ static struct test { .func = test__pmu, }, { - .desc = "Test dso data interface", + .desc = "Test dso data read", .func = test__dso_data, }, + { + .desc = "Test dso data cache", + .func = test__dso_data_cache, + }, { .desc = "roundtrip evsel->name check", .func = test__perf_evsel__roundtrip_name_test, diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c index 738438196d9a..2d30014d716b 100644 --- a/tools/perf/tests/dso-data.c +++ b/tools/perf/tests/dso-data.c @@ -1,11 +1,12 @@ -#include "util.h" - #include <stdlib.h> #include <linux/types.h> #include <sys/stat.h> #include <fcntl.h> #include <string.h> - +#include <sys/time.h> +#include <sys/resource.h> +#include <api/fs/fs.h> +#include "util.h" #include "machine.h" #include "symbol.h" #include "tests.h" @@ -154,3 +155,131 @@ int test__dso_data(void) unlink(file); return 0; } + +static long open_files_cnt(void) +{ + char path[PATH_MAX]; + struct dirent *dent; + DIR *dir; + long nr = 0; + + scnprintf(path, PATH_MAX, "%s/self/fd", procfs__mountpoint()); + pr_debug("fd path: %s\n", path); + + dir = opendir(path); + TEST_ASSERT_VAL("failed to open fd directory", dir); + + while ((dent = readdir(dir)) != NULL) { + if (!strcmp(dent->d_name, ".") || + !strcmp(dent->d_name, "..")) + continue; + + nr++; + } + + closedir(dir); + return nr - 1; +} + +static struct dso **dsos; + +static int dsos__create(int cnt, int size) +{ + int i; + + dsos = malloc(sizeof(dsos) * cnt); + TEST_ASSERT_VAL("failed to alloc dsos array", dsos); + + for (i = 0; i < cnt; i++) { + char *file; + + file = test_file(size); + TEST_ASSERT_VAL("failed to get dso file", file); + + dsos[i] = dso__new(file); + TEST_ASSERT_VAL("failed to get dso", dsos[i]); + } + + return 0; +} + +static void dsos__delete(int cnt) +{ + int i; + + for (i = 0; i < cnt; i++) { + struct dso *dso = dsos[i]; + + unlink(dso->name); + dso__delete(dso); + } + + free(dsos); +} + +static int set_fd_limit(int n) +{ + struct rlimit rlim; + + if (getrlimit(RLIMIT_NOFILE, &rlim)) + return -1; + + pr_debug("file limit %ld, new %d\n", (long) rlim.rlim_cur, n); + + rlim.rlim_cur = n; + return setrlimit(RLIMIT_NOFILE, &rlim); +} + +int test__dso_data_cache(void) +{ + struct machine machine; + long nr_end, nr = open_files_cnt(); + int dso_cnt, limit, i, fd; + + memset(&machine, 0, sizeof(machine)); + + /* set as system limit */ + limit = nr * 4; + TEST_ASSERT_VAL("failed to set file limit", !set_fd_limit(limit)); + + /* and this is now our dso open FDs limit + 1 extra */ + dso_cnt = limit / 2 + 1; + TEST_ASSERT_VAL("failed to create dsos\n", + !dsos__create(dso_cnt, TEST_FILE_SIZE)); + + for (i = 0; i < (dso_cnt - 1); i++) { + struct dso *dso = dsos[i]; + + /* + * Open dsos via dso__data_fd or dso__data_read_offset. + * Both opens the data file and keep it open. + */ + if (i % 2) { + fd = dso__data_fd(dso, &machine); + TEST_ASSERT_VAL("failed to get fd", fd > 0); + } else { + #define BUFSIZE 10 + u8 buf[BUFSIZE]; + ssize_t n; + + n = dso__data_read_offset(dso, &machine, 0, buf, BUFSIZE); + TEST_ASSERT_VAL("failed to read dso", n == BUFSIZE); + } + } + + /* open +1 dso over the allowed limit */ + fd = dso__data_fd(dsos[i], &machine); + TEST_ASSERT_VAL("failed to get fd", fd > 0); + + /* should force the first one to be closed */ + TEST_ASSERT_VAL("failed to close dsos[0]", dsos[0]->data.fd == -1); + + /* cleanup everything */ + dsos__delete(dso_cnt); + + /* Make sure we did not leak any file descriptor. */ + nr_end = open_files_cnt(); + pr_debug("nr start %ld, nr stop %ld\n", nr, nr_end); + TEST_ASSERT_VAL("failed leadking files", nr == nr_end); + return 0; +} diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 022bb68fd9c7..ccc4deb2d963 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -28,6 +28,7 @@ int test__syscall_open_tp_fields(void); int test__pmu(void); int test__attr(void); int test__dso_data(void); +int test__dso_data_cache(void); int test__parse_events(void); int test__hists_link(void); int test__python_use(void); From 45dc1bb5c1d47f9519e2101f6b073bb4bb1d1f99 Mon Sep 17 00:00:00 2001 From: Jiri Olsa <jolsa@kernel.org> Date: Mon, 12 May 2014 14:50:03 +0200 Subject: [PATCH 35/35] perf tests: Add test for closing dso objects on EMFILE error Testing that perf properly closes opened dso objects and tries to reopen in case we run out of allowed file descriptors for dso data. Acked-by: Namhyung Kim <namhyung@kernel.org> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com> Cc: David Ahern <dsahern@gmail.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jean Pihet <jean.pihet@linaro.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Reviewed by: David Ahern <dsahern@gmail.com> Link: http://lkml.kernel.org/r/1401892622-30848-14-git-send-email-jolsa@kernel.org Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- tools/perf/tests/builtin-test.c | 4 ++ tools/perf/tests/dso-data.c | 73 +++++++++++++++++++++++++++++++++ tools/perf/tests/tests.h | 1 + 3 files changed, 78 insertions(+) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index b8a63583566f..6f8b01bc6033 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -59,6 +59,10 @@ static struct test { .desc = "Test dso data cache", .func = test__dso_data_cache, }, + { + .desc = "Test dso data reopen", + .func = test__dso_data_reopen, + }, { .desc = "roundtrip evsel->name check", .func = test__perf_evsel__roundtrip_name_test, diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c index 2d30014d716b..630808cd7cc2 100644 --- a/tools/perf/tests/dso-data.c +++ b/tools/perf/tests/dso-data.c @@ -283,3 +283,76 @@ int test__dso_data_cache(void) TEST_ASSERT_VAL("failed leadking files", nr == nr_end); return 0; } + +int test__dso_data_reopen(void) +{ + struct machine machine; + long nr_end, nr = open_files_cnt(); + int fd, fd_extra; + +#define dso_0 (dsos[0]) +#define dso_1 (dsos[1]) +#define dso_2 (dsos[2]) + + memset(&machine, 0, sizeof(machine)); + + /* + * Test scenario: + * - create 3 dso objects + * - set process file descriptor limit to current + * files count + 3 + * - test that the first dso gets closed when we + * reach the files count limit + */ + + /* Make sure we are able to open 3 fds anyway */ + TEST_ASSERT_VAL("failed to set file limit", + !set_fd_limit((nr + 3))); + + TEST_ASSERT_VAL("failed to create dsos\n", !dsos__create(3, TEST_FILE_SIZE)); + + /* open dso_0 */ + fd = dso__data_fd(dso_0, &machine); + TEST_ASSERT_VAL("failed to get fd", fd > 0); + + /* open dso_1 */ + fd = dso__data_fd(dso_1, &machine); + TEST_ASSERT_VAL("failed to get fd", fd > 0); + + /* + * open extra file descriptor and we just + * reached the files count limit + */ + fd_extra = open("/dev/null", O_RDONLY); + TEST_ASSERT_VAL("failed to open extra fd", fd_extra > 0); + + /* open dso_2 */ + fd = dso__data_fd(dso_2, &machine); + TEST_ASSERT_VAL("failed to get fd", fd > 0); + + /* + * dso_0 should get closed, because we reached + * the file descriptor limit + */ + TEST_ASSERT_VAL("failed to close dso_0", dso_0->data.fd == -1); + + /* open dso_0 */ + fd = dso__data_fd(dso_0, &machine); + TEST_ASSERT_VAL("failed to get fd", fd > 0); + + /* + * dso_1 should get closed, because we reached + * the file descriptor limit + */ + TEST_ASSERT_VAL("failed to close dso_1", dso_1->data.fd == -1); + + /* cleanup everything */ + close(fd_extra); + dsos__delete(3); + + /* Make sure we did not leak any file descriptor. */ + nr_end = open_files_cnt(); + pr_debug("nr start %ld, nr stop %ld\n", nr, nr_end); + TEST_ASSERT_VAL("failed leadking files", nr == nr_end); + return 0; +} diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index ccc4deb2d963..ed64790a395f 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -29,6 +29,7 @@ int test__pmu(void); int test__attr(void); int test__dso_data(void); int test__dso_data_cache(void); +int test__dso_data_reopen(void); int test__parse_events(void); int test__hists_link(void); int test__python_use(void);