From b736f48bda54ec75b7dc9306884c3843f1a78a0a Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Sun, 18 Nov 2012 21:27:45 -0800 Subject: tracing: Mark tracing_dentry_percpu() static Nothing outside of kernel/trace/trace.c references tracing_dentry_percpu(). Link: http://lkml.kernel.org/r/1353302917-13995-7-git-send-email-josh@joshtriplett.org Signed-off-by: Josh Triplett Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d2a658349ca1..ca9b7dfed8ef 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4506,7 +4506,7 @@ struct dentry *tracing_init_dentry(void) static struct dentry *d_percpu; -struct dentry *tracing_dentry_percpu(void) +static struct dentry *tracing_dentry_percpu(void) { static int once; struct dentry *d_tracer; -- cgit v1.2.3-55-g7522 From d75f717e19fe595e7efbf67de195ada8d89dfbbe Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 25 Jan 2013 09:46:36 -0500 Subject: tracing: Remove tracepoint sample code The tracepoint sample code was used to teach developers how to create their own tracepoints. But now the trace_events have been added as a higher level that is used directly by developers today. Only the trace_event code should use the tracepoint interface directly and no new tracepoints should be added. Besides, the example had a race condition with the use of the ->d_name.name dentry field, as pointed out by Al Viro. Best just to remove the code so it wont be used by other developers. Link: http://lkml.kernel.org/r/20130123225523.GY4939@ZenIV.linux.org.uk Cc: Al Viro Acked-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt --- samples/Kconfig | 6 --- samples/Makefile | 2 +- samples/tracepoints/Makefile | 6 --- samples/tracepoints/tp-samples-trace.h | 11 ----- samples/tracepoints/tracepoint-probe-sample.c | 57 -------------------------- samples/tracepoints/tracepoint-probe-sample2.c | 44 -------------------- samples/tracepoints/tracepoint-sample.c | 57 -------------------------- 7 files changed, 1 insertion(+), 182 deletions(-) delete mode 100644 samples/tracepoints/Makefile delete mode 100644 samples/tracepoints/tp-samples-trace.h delete mode 100644 samples/tracepoints/tracepoint-probe-sample.c delete mode 100644 samples/tracepoints/tracepoint-probe-sample2.c delete mode 100644 samples/tracepoints/tracepoint-sample.c diff --git a/samples/Kconfig b/samples/Kconfig index 7b6792a18c05..6181c2cc9ca0 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -5,12 +5,6 @@ menuconfig SAMPLES if SAMPLES -config SAMPLE_TRACEPOINTS - tristate "Build tracepoints examples -- loadable modules only" - depends on TRACEPOINTS && m - help - This build tracepoints example modules. - config SAMPLE_TRACE_EVENTS tristate "Build trace_events examples -- loadable modules only" depends on EVENT_TRACING && m diff --git a/samples/Makefile b/samples/Makefile index 5ef08bba96ce..1a60c62e2045 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -1,4 +1,4 @@ # Makefile for Linux samples code -obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ tracepoints/ trace_events/ \ +obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ trace_events/ \ hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ diff --git a/samples/tracepoints/Makefile b/samples/tracepoints/Makefile deleted file mode 100644 index 36479ad9ae14..000000000000 --- a/samples/tracepoints/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -# builds the tracepoint example kernel modules; -# then to use one (as root): insmod - -obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-sample.o -obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-probe-sample.o -obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-probe-sample2.o diff --git a/samples/tracepoints/tp-samples-trace.h b/samples/tracepoints/tp-samples-trace.h deleted file mode 100644 index 4d46be965961..000000000000 --- a/samples/tracepoints/tp-samples-trace.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef _TP_SAMPLES_TRACE_H -#define _TP_SAMPLES_TRACE_H - -#include /* for struct inode and struct file */ -#include - -DECLARE_TRACE(subsys_event, - TP_PROTO(struct inode *inode, struct file *file), - TP_ARGS(inode, file)); -DECLARE_TRACE_NOARGS(subsys_eventb); -#endif diff --git a/samples/tracepoints/tracepoint-probe-sample.c b/samples/tracepoints/tracepoint-probe-sample.c deleted file mode 100644 index 744c0b9652a7..000000000000 --- a/samples/tracepoints/tracepoint-probe-sample.c +++ /dev/null @@ -1,57 +0,0 @@ -/* - * tracepoint-probe-sample.c - * - * sample tracepoint probes. - */ - -#include -#include -#include -#include "tp-samples-trace.h" - -/* - * Here the caller only guarantees locking for struct file and struct inode. - * Locking must therefore be done in the probe to use the dentry. - */ -static void probe_subsys_event(void *ignore, - struct inode *inode, struct file *file) -{ - path_get(&file->f_path); - dget(file->f_path.dentry); - printk(KERN_INFO "Event is encountered with filename %s\n", - file->f_path.dentry->d_name.name); - dput(file->f_path.dentry); - path_put(&file->f_path); -} - -static void probe_subsys_eventb(void *ignore) -{ - printk(KERN_INFO "Event B is encountered\n"); -} - -static int __init tp_sample_trace_init(void) -{ - int ret; - - ret = register_trace_subsys_event(probe_subsys_event, NULL); - WARN_ON(ret); - ret = register_trace_subsys_eventb(probe_subsys_eventb, NULL); - WARN_ON(ret); - - return 0; -} - -module_init(tp_sample_trace_init); - -static void __exit tp_sample_trace_exit(void) -{ - unregister_trace_subsys_eventb(probe_subsys_eventb, NULL); - unregister_trace_subsys_event(probe_subsys_event, NULL); - tracepoint_synchronize_unregister(); -} - -module_exit(tp_sample_trace_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Mathieu Desnoyers"); -MODULE_DESCRIPTION("Tracepoint Probes Samples"); diff --git a/samples/tracepoints/tracepoint-probe-sample2.c b/samples/tracepoints/tracepoint-probe-sample2.c deleted file mode 100644 index 9fcf990e5d4b..000000000000 --- a/samples/tracepoints/tracepoint-probe-sample2.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * tracepoint-probe-sample2.c - * - * 2nd sample tracepoint probes. - */ - -#include -#include -#include "tp-samples-trace.h" - -/* - * Here the caller only guarantees locking for struct file and struct inode. - * Locking must therefore be done in the probe to use the dentry. - */ -static void probe_subsys_event(void *ignore, - struct inode *inode, struct file *file) -{ - printk(KERN_INFO "Event is encountered with inode number %lu\n", - inode->i_ino); -} - -static int __init tp_sample_trace_init(void) -{ - int ret; - - ret = register_trace_subsys_event(probe_subsys_event, NULL); - WARN_ON(ret); - - return 0; -} - -module_init(tp_sample_trace_init); - -static void __exit tp_sample_trace_exit(void) -{ - unregister_trace_subsys_event(probe_subsys_event, NULL); - tracepoint_synchronize_unregister(); -} - -module_exit(tp_sample_trace_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Mathieu Desnoyers"); -MODULE_DESCRIPTION("Tracepoint Probes Samples"); diff --git a/samples/tracepoints/tracepoint-sample.c b/samples/tracepoints/tracepoint-sample.c deleted file mode 100644 index f4d89e008c32..000000000000 --- a/samples/tracepoints/tracepoint-sample.c +++ /dev/null @@ -1,57 +0,0 @@ -/* tracepoint-sample.c - * - * Executes a tracepoint when /proc/tracepoint-sample is opened. - * - * (C) Copyright 2007 Mathieu Desnoyers - * - * This file is released under the GPLv2. - * See the file COPYING for more details. - */ - -#include -#include -#include -#include "tp-samples-trace.h" - -DEFINE_TRACE(subsys_event); -DEFINE_TRACE(subsys_eventb); - -struct proc_dir_entry *pentry_sample; - -static int my_open(struct inode *inode, struct file *file) -{ - int i; - - trace_subsys_event(inode, file); - for (i = 0; i < 10; i++) - trace_subsys_eventb(); - return -EPERM; -} - -static const struct file_operations mark_ops = { - .open = my_open, - .llseek = noop_llseek, -}; - -static int __init sample_init(void) -{ - printk(KERN_ALERT "sample init\n"); - pentry_sample = proc_create("tracepoint-sample", 0444, NULL, - &mark_ops); - if (!pentry_sample) - return -EPERM; - return 0; -} - -static void __exit sample_exit(void) -{ - printk(KERN_ALERT "sample exit\n"); - remove_proc_entry("tracepoint-sample", NULL); -} - -module_init(sample_init) -module_exit(sample_exit) - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Mathieu Desnoyers"); -MODULE_DESCRIPTION("Tracepoint sample"); -- cgit v1.2.3-55-g7522 From 821465295b36136998ef294fe176fba4e09c1cd9 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Mon, 19 Nov 2012 13:21:01 +0800 Subject: tracing: Use __this_cpu_inc/dec operation instead of __get_cpu_var __this_cpu_inc_return() or __this_cpu_dec generates a single instruction, which is faster than __get_cpu_var operation. Link: http://lkml.kernel.org/r/50A9C1BD.1060308@gmail.com Reviewed-by: Christoph Lameter Signed-off-by: Shan Wei Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ca9b7dfed8ef..07888e15c694 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1344,7 +1344,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer, */ preempt_disable_notrace(); - use_stack = ++__get_cpu_var(ftrace_stack_reserve); + use_stack = __this_cpu_inc_return(ftrace_stack_reserve); /* * We don't need any atomic variables, just a barrier. * If an interrupt comes in, we don't care, because it would @@ -1398,7 +1398,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer, out: /* Again, don't let gcc optimize things here */ barrier(); - __get_cpu_var(ftrace_stack_reserve)--; + __this_cpu_dec(ftrace_stack_reserve); preempt_enable_notrace(); } -- cgit v1.2.3-55-g7522 From 38dbe0b137bfe6ea92be495017885c0785179a02 Mon Sep 17 00:00:00 2001 From: Jovi Zhang Date: Fri, 25 Jan 2013 18:03:07 +0800 Subject: tracing: Remove second iterator initializer The trace iterator is already initialized by trace_init_global_iter(), so there is no need to initialize it again. Link: http://lkml.kernel.org/r/CACV3sb+G1YnO6168JhY3dEadmJi58pA5-2cSZT8E0WVHJNFt9Q@mail.gmail.com Signed-off-by: Jovi Zhang Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 07888e15c694..d399592701aa 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5030,6 +5030,7 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode) if (disable_tracing) ftrace_kill(); + /* Simulate the iterator */ trace_init_global_iter(&iter); for_each_tracing_cpu(cpu) { @@ -5041,10 +5042,6 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode) /* don't look at user memory in panic mode */ trace_flags &= ~TRACE_ITER_SYM_USEROBJ; - /* Simulate the iterator */ - iter.tr = &global_trace; - iter.trace = current_trace; - switch (oops_dump_mode) { case DUMP_ALL: iter.cpu_file = TRACE_PIPE_ALL_CPU; -- cgit v1.2.3-55-g7522 From 03274a3ffb449632970fdd35da72ea41cf8474da Mon Sep 17 00:00:00 2001 From: Steven Rostedt (Red Hat) Date: Tue, 29 Jan 2013 17:30:31 -0500 Subject: tracing/fgraph: Adjust fgraph depth before calling trace return callback While debugging the virtual cputime with the function graph tracer with a max_depth of 1 (most common use of the max_depth so far), I found that I was missing kernel execution because of a race condition. The code for the return side of the function has a slight race: ftrace_pop_return_trace(&trace, &ret, frame_pointer); trace.rettime = trace_clock_local(); ftrace_graph_return(&trace); barrier(); current->curr_ret_stack--; The ftrace_pop_return_trace() initializes the trace structure for the callback. The ftrace_graph_return() uses the trace structure for its own use as that structure is on the stack and is local to this function. Then the curr_ret_stack is decremented which is what the trace.depth is set to. If an interrupt comes in after the ftrace_graph_return() but before the curr_ret_stack, then the called function will get a depth of 2. If max_depth is set to 1 this function will be ignored. The problem is that the trace has already been called, and the timestamp for that trace will not reflect the time the function was about to re-enter userspace. Calls to the interrupt will not be traced because the max_depth has prevented this. To solve this issue, the ftrace_graph_return() can safely be moved after the current->curr_ret_stack has been updated. This way the timestamp for the return callback will reflect the actual time. If an interrupt comes in after the curr_ret_stack update and ftrace_graph_return(), it will be traced. It may look a little confusing to see it within the other function, but at least it will not be lost. Cc: Frederic Weisbecker Signed-off-by: Steven Rostedt --- kernel/trace/trace_functions_graph.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 7008d2e13cf2..39ada66389cc 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -191,10 +191,16 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer) ftrace_pop_return_trace(&trace, &ret, frame_pointer); trace.rettime = trace_clock_local(); - ftrace_graph_return(&trace); barrier(); current->curr_ret_stack--; + /* + * The trace should run after decrementing the ret counter + * in case an interrupt were to come in. We don't want to + * lose the interrupt if max_depth is set. + */ + ftrace_graph_return(&trace); + if (unlikely(!ret)) { ftrace_graph_stop(); WARN_ON(1); -- cgit v1.2.3-55-g7522 From ad964704ba9326d027fc10fd0099b7c880e50172 Mon Sep 17 00:00:00 2001 From: Steven Rostedt (Red Hat) Date: Tue, 29 Jan 2013 17:45:49 -0500 Subject: ring-buffer: Add stats field for amount read from trace ring buffer Add a stat about the number of events read from the ring buffer: # cat /debug/tracing/per_cpu/cpu0/stats entries: 39869 overrun: 870512 commit overrun: 0 bytes: 1449912 oldest event ts: 6561.368690 now ts: 6565.246426 dropped events: 0 read events: 112 <-- Added Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 1 + kernel/trace/ring_buffer.c | 18 ++++++++++++++++++ kernel/trace/trace.c | 3 +++ 3 files changed, 22 insertions(+) diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 519777e3fa01..1342e69542f3 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -167,6 +167,7 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu); +unsigned long ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu); u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu); void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 13950d9027cb..7244acde77b0 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3102,6 +3102,24 @@ ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu) } EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu); +/** + * ring_buffer_read_events_cpu - get the number of events successfully read + * @buffer: The ring buffer + * @cpu: The per CPU buffer to get the number of events read + */ +unsigned long +ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu) +{ + struct ring_buffer_per_cpu *cpu_buffer; + + if (!cpumask_test_cpu(cpu, buffer->cpumask)) + return 0; + + cpu_buffer = buffer->buffers[cpu]; + return cpu_buffer->read; +} +EXPORT_SYMBOL_GPL(ring_buffer_read_events_cpu); + /** * ring_buffer_entries - get the number of entries in a buffer * @buffer: The ring buffer diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d399592701aa..90a1c71fdbfc 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4430,6 +4430,9 @@ tracing_stats_read(struct file *filp, char __user *ubuf, cnt = ring_buffer_dropped_events_cpu(tr->buffer, cpu); trace_seq_printf(s, "dropped events: %ld\n", cnt); + cnt = ring_buffer_read_events_cpu(tr->buffer, cpu); + trace_seq_printf(s, "read events: %ld\n", cnt); + count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); kfree(s); -- cgit v1.2.3-55-g7522 From 5e67b51e3fb22ad43faf9589e9019ad9c6a00413 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 27 Dec 2012 11:49:45 +0900 Subject: tracing: Use sched_clock_cpu for trace_clock_global For systems with an unstable sched_clock, all cpu_clock() does is enable/ disable local irq during the call to sched_clock_cpu(). And for stable systems they are same. trace_clock_global() already disables interrupts, so it can call sched_clock_cpu() directly. Link: http://lkml.kernel.org/r/1356576585-28782-2-git-send-email-namhyung@kernel.org Signed-off-by: Namhyung Kim Signed-off-by: Steven Rostedt --- kernel/trace/trace_clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index 22b638b28e48..24bf48eabfcc 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c @@ -84,7 +84,7 @@ u64 notrace trace_clock_global(void) local_irq_save(flags); this_cpu = raw_smp_processor_id(); - now = cpu_clock(this_cpu); + now = sched_clock_cpu(this_cpu); /* * If in an NMI context then dont risk lockups and return the * cpu_clock() time: -- cgit v1.2.3-55-g7522 From 2fd196ec1eab2623096e7fc7e6f3976160392bce Mon Sep 17 00:00:00 2001 From: Hiraku Toyooka Date: Wed, 26 Dec 2012 11:52:52 +0900 Subject: tracing: Replace static old_tracer check of tracer name Currently the trace buffer read functions use a static variable "old_tracer" for detecting if the current tracer changes. This was suitable for a single trace file ("trace"), but to add a snapshot feature that will use the same function for its file, a check against a static variable is not sufficient. To use the output functions for two different files, instead of storing the current tracer in a static variable, as the trace iterator descriptor contains a pointer to the original current tracer's name, that pointer can now be used to check if the current tracer has changed between different reads of the trace file. Link: http://lkml.kernel.org/r/20121226025252.3252.9276.stgit@liselsia Signed-off-by: Hiraku Toyooka Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 90a1c71fdbfc..2c724662a3e8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1948,18 +1948,20 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu) static void *s_start(struct seq_file *m, loff_t *pos) { struct trace_iterator *iter = m->private; - static struct tracer *old_tracer; int cpu_file = iter->cpu_file; void *p = NULL; loff_t l = 0; int cpu; - /* copy the tracer to avoid using a global lock all around */ + /* + * copy the tracer to avoid using a global lock all around. + * iter->trace is a copy of current_trace, the pointer to the + * name may be used instead of a strcmp(), as iter->trace->name + * will point to the same string as current_trace->name. + */ mutex_lock(&trace_types_lock); - if (unlikely(old_tracer != current_trace && current_trace)) { - old_tracer = current_trace; + if (unlikely(current_trace && iter->trace->name != current_trace->name)) *iter->trace = *current_trace; - } mutex_unlock(&trace_types_lock); atomic_inc(&trace_record_cmdline_disabled); @@ -3494,7 +3496,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_iterator *iter = filp->private_data; - static struct tracer *old_tracer; ssize_t sret; /* return any leftover data */ @@ -3506,10 +3507,8 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, /* copy the tracer to avoid using a global lock all around */ mutex_lock(&trace_types_lock); - if (unlikely(old_tracer != current_trace && current_trace)) { - old_tracer = current_trace; + if (unlikely(current_trace && iter->trace->name != current_trace->name)) *iter->trace = *current_trace; - } mutex_unlock(&trace_types_lock); /* @@ -3665,7 +3664,6 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, .ops = &tracing_pipe_buf_ops, .spd_release = tracing_spd_release_pipe, }; - static struct tracer *old_tracer; ssize_t ret; size_t rem; unsigned int i; @@ -3675,10 +3673,8 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, /* copy the tracer to avoid using a global lock all around */ mutex_lock(&trace_types_lock); - if (unlikely(old_tracer != current_trace && current_trace)) { - old_tracer = current_trace; + if (unlikely(current_trace && iter->trace->name != current_trace->name)) *iter->trace = *current_trace; - } mutex_unlock(&trace_types_lock); mutex_lock(&iter->mutex); -- cgit v1.2.3-55-g7522 From debdd57f5145f3c6a4b3f8d0126abd1a2def7fc6 Mon Sep 17 00:00:00 2001 From: Hiraku Toyooka Date: Wed, 26 Dec 2012 11:53:00 +0900 Subject: tracing: Make a snapshot feature available from userspace Ftrace has a snapshot feature available from kernel space and latency tracers (e.g. irqsoff) are using it. This patch enables user applictions to take a snapshot via debugfs. Add "snapshot" debugfs file in "tracing" directory. snapshot: This is used to take a snapshot and to read the output of the snapshot. # echo 1 > snapshot This will allocate the spare buffer for snapshot (if it is not allocated), and take a snapshot. # cat snapshot This will show contents of the snapshot. # echo 0 > snapshot This will free the snapshot if it is allocated. Any other positive values will clear the snapshot contents if the snapshot is allocated, or return EINVAL if it is not allocated. Link: http://lkml.kernel.org/r/20121226025300.3252.86850.stgit@liselsia Cc: Jiri Olsa Cc: David Sharp Signed-off-by: Hiraku Toyooka [ Fixed irqsoff selftest and also a conflict with a change that fixes the update_max_tr. ] Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 3 + kernel/trace/Kconfig | 10 +++ kernel/trace/trace.c | 166 ++++++++++++++++++++++++++++++++++++------- kernel/trace/trace.h | 1 + 4 files changed, 154 insertions(+), 26 deletions(-) diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 6f8d0b77006b..13a54d0bdfa8 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -83,6 +83,9 @@ struct trace_iterator { long idx; cpumask_var_t started; + + /* it's true when current open file is snapshot */ + bool snapshot; }; enum trace_iter_flags { diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index cdc9d284d24e..36567564e221 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -253,6 +253,16 @@ config FTRACE_SYSCALLS help Basic tracer to catch the syscall entry and exit events. +config TRACER_SNAPSHOT + bool "Create a snapshot trace buffer" + select TRACER_MAX_TRACE + help + Allow tracing users to take snapshot of the current buffer using the + ftrace interface, e.g.: + + echo 1 > /sys/kernel/debug/tracing/snapshot + cat snapshot + config TRACE_BRANCH_PROFILING bool select GENERIC_TRACER diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2c724662a3e8..70dce64b9ecf 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -710,12 +710,11 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) WARN_ON_ONCE(!irqs_disabled()); - /* If we disabled the tracer, stop now */ - if (current_trace == &nop_trace) - return; - - if (WARN_ON_ONCE(!current_trace->use_max_tr)) + if (!current_trace->allocated_snapshot) { + /* Only the nop tracer should hit this when disabling */ + WARN_ON_ONCE(current_trace != &nop_trace); return; + } arch_spin_lock(&ftrace_max_lock); @@ -743,10 +742,8 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) return; WARN_ON_ONCE(!irqs_disabled()); - if (!current_trace->use_max_tr) { - WARN_ON_ONCE(1); + if (WARN_ON_ONCE(!current_trace->allocated_snapshot)) return; - } arch_spin_lock(&ftrace_max_lock); @@ -866,10 +863,13 @@ int register_tracer(struct tracer *type) current_trace = type; - /* If we expanded the buffers, make sure the max is expanded too */ - if (ring_buffer_expanded && type->use_max_tr) - ring_buffer_resize(max_tr.buffer, trace_buf_size, - RING_BUFFER_ALL_CPUS); + if (type->use_max_tr) { + /* If we expanded the buffers, make sure the max is expanded too */ + if (ring_buffer_expanded) + ring_buffer_resize(max_tr.buffer, trace_buf_size, + RING_BUFFER_ALL_CPUS); + type->allocated_snapshot = true; + } /* the test is responsible for initializing and enabling */ pr_info("Testing tracer %s: ", type->name); @@ -885,10 +885,14 @@ int register_tracer(struct tracer *type) /* Only reset on passing, to avoid touching corrupted buffers */ tracing_reset_online_cpus(tr); - /* Shrink the max buffer again */ - if (ring_buffer_expanded && type->use_max_tr) - ring_buffer_resize(max_tr.buffer, 1, - RING_BUFFER_ALL_CPUS); + if (type->use_max_tr) { + type->allocated_snapshot = false; + + /* Shrink the max buffer again */ + if (ring_buffer_expanded) + ring_buffer_resize(max_tr.buffer, 1, + RING_BUFFER_ALL_CPUS); + } printk(KERN_CONT "PASSED\n"); } @@ -1964,7 +1968,11 @@ static void *s_start(struct seq_file *m, loff_t *pos) *iter->trace = *current_trace; mutex_unlock(&trace_types_lock); - atomic_inc(&trace_record_cmdline_disabled); + if (iter->snapshot && iter->trace->use_max_tr) + return ERR_PTR(-EBUSY); + + if (!iter->snapshot) + atomic_inc(&trace_record_cmdline_disabled); if (*pos != iter->pos) { iter->ent = NULL; @@ -2003,7 +2011,11 @@ static void s_stop(struct seq_file *m, void *p) { struct trace_iterator *iter = m->private; - atomic_dec(&trace_record_cmdline_disabled); + if (iter->snapshot && iter->trace->use_max_tr) + return; + + if (!iter->snapshot) + atomic_dec(&trace_record_cmdline_disabled); trace_access_unlock(iter->cpu_file); trace_event_read_unlock(); } @@ -2438,7 +2450,7 @@ static const struct seq_operations tracer_seq_ops = { }; static struct trace_iterator * -__tracing_open(struct inode *inode, struct file *file) +__tracing_open(struct inode *inode, struct file *file, bool snapshot) { long cpu_file = (long) inode->i_private; struct trace_iterator *iter; @@ -2471,10 +2483,11 @@ __tracing_open(struct inode *inode, struct file *file) if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL)) goto fail; - if (current_trace && current_trace->print_max) + if ((current_trace && current_trace->print_max) || snapshot) iter->tr = &max_tr; else iter->tr = &global_trace; + iter->snapshot = snapshot; iter->pos = -1; mutex_init(&iter->mutex); iter->cpu_file = cpu_file; @@ -2491,8 +2504,9 @@ __tracing_open(struct inode *inode, struct file *file) if (trace_clocks[trace_clock_id].in_ns) iter->iter_flags |= TRACE_FILE_TIME_IN_NS; - /* stop the trace while dumping */ - tracing_stop(); + /* stop the trace while dumping if we are not opening "snapshot" */ + if (!iter->snapshot) + tracing_stop(); if (iter->cpu_file == TRACE_PIPE_ALL_CPU) { for_each_tracing_cpu(cpu) { @@ -2555,8 +2569,9 @@ static int tracing_release(struct inode *inode, struct file *file) if (iter->trace && iter->trace->close) iter->trace->close(iter); - /* reenable tracing if it was previously enabled */ - tracing_start(); + if (!iter->snapshot) + /* reenable tracing if it was previously enabled */ + tracing_start(); mutex_unlock(&trace_types_lock); mutex_destroy(&iter->mutex); @@ -2584,7 +2599,7 @@ static int tracing_open(struct inode *inode, struct file *file) } if (file->f_mode & FMODE_READ) { - iter = __tracing_open(inode, file); + iter = __tracing_open(inode, file, false); if (IS_ERR(iter)) ret = PTR_ERR(iter); else if (trace_flags & TRACE_ITER_LATENCY_FMT) @@ -3219,7 +3234,7 @@ static int tracing_set_tracer(const char *buf) if (current_trace && current_trace->reset) current_trace->reset(tr); - had_max_tr = current_trace && current_trace->use_max_tr; + had_max_tr = current_trace && current_trace->allocated_snapshot; current_trace = &nop_trace; if (had_max_tr && !t->use_max_tr) { @@ -3238,6 +3253,8 @@ static int tracing_set_tracer(const char *buf) */ ring_buffer_resize(max_tr.buffer, 1, RING_BUFFER_ALL_CPUS); set_buffer_entries(&max_tr, 1); + tracing_reset_online_cpus(&max_tr); + current_trace->allocated_snapshot = false; } destroy_trace_option_files(topts); @@ -3248,6 +3265,7 @@ static int tracing_set_tracer(const char *buf) RING_BUFFER_ALL_CPUS); if (ret < 0) goto out; + t->allocated_snapshot = true; } if (t->init) { @@ -4066,6 +4084,87 @@ static int tracing_clock_open(struct inode *inode, struct file *file) return single_open(file, tracing_clock_show, NULL); } +#ifdef CONFIG_TRACER_SNAPSHOT +static int tracing_snapshot_open(struct inode *inode, struct file *file) +{ + struct trace_iterator *iter; + int ret = 0; + + if (file->f_mode & FMODE_READ) { + iter = __tracing_open(inode, file, true); + if (IS_ERR(iter)) + ret = PTR_ERR(iter); + } + return ret; +} + +static ssize_t +tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + unsigned long val; + int ret; + + ret = tracing_update_buffers(); + if (ret < 0) + return ret; + + ret = kstrtoul_from_user(ubuf, cnt, 10, &val); + if (ret) + return ret; + + mutex_lock(&trace_types_lock); + + if (current_trace && current_trace->use_max_tr) { + ret = -EBUSY; + goto out; + } + + switch (val) { + case 0: + if (current_trace->allocated_snapshot) { + /* free spare buffer */ + ring_buffer_resize(max_tr.buffer, 1, + RING_BUFFER_ALL_CPUS); + set_buffer_entries(&max_tr, 1); + tracing_reset_online_cpus(&max_tr); + current_trace->allocated_snapshot = false; + } + break; + case 1: + if (!current_trace->allocated_snapshot) { + /* allocate spare buffer */ + ret = resize_buffer_duplicate_size(&max_tr, + &global_trace, RING_BUFFER_ALL_CPUS); + if (ret < 0) + break; + current_trace->allocated_snapshot = true; + } + + local_irq_disable(); + /* Now, we're going to swap */ + update_max_tr(&global_trace, current, smp_processor_id()); + local_irq_enable(); + break; + default: + if (current_trace->allocated_snapshot) + tracing_reset_online_cpus(&max_tr); + else + ret = -EINVAL; + break; + } + + if (ret >= 0) { + *ppos += cnt; + ret = cnt; + } +out: + mutex_unlock(&trace_types_lock); + return ret; +} +#endif /* CONFIG_TRACER_SNAPSHOT */ + + static const struct file_operations tracing_max_lat_fops = { .open = tracing_open_generic, .read = tracing_max_lat_read, @@ -4122,6 +4221,16 @@ static const struct file_operations trace_clock_fops = { .write = tracing_clock_write, }; +#ifdef CONFIG_TRACER_SNAPSHOT +static const struct file_operations snapshot_fops = { + .open = tracing_snapshot_open, + .read = seq_read, + .write = tracing_snapshot_write, + .llseek = tracing_seek, + .release = tracing_release, +}; +#endif /* CONFIG_TRACER_SNAPSHOT */ + struct ftrace_buffer_info { struct trace_array *tr; void *spare; @@ -4921,6 +5030,11 @@ static __init int tracer_init_debugfs(void) &ftrace_update_tot_cnt, &tracing_dyn_info_fops); #endif +#ifdef CONFIG_TRACER_SNAPSHOT + trace_create_file("snapshot", 0644, d_tracer, + (void *) TRACE_PIPE_ALL_CPU, &snapshot_fops); +#endif + create_trace_options_dir(); for_each_tracing_cpu(cpu) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 04a2c7ab1735..57d7e5397d56 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -287,6 +287,7 @@ struct tracer { struct tracer_flags *flags; bool print_max; bool use_max_tr; + bool allocated_snapshot; }; -- cgit v1.2.3-55-g7522 From c1043fcda1b9e8e5144cfdaee7be262c50dbdead Mon Sep 17 00:00:00 2001 From: Hiraku Toyooka Date: Wed, 26 Dec 2012 11:53:09 +0900 Subject: tracing: Add documentation of snapshot utility This patch adds snapshot description in ftrace documentation. This description includes what the snapshot is and how to use it. Link: http://lkml.kernel.org/r/20121226025309.3252.150.stgit@liselsia Cc: Rob Landley Signed-off-by: Hiraku Toyooka Signed-off-by: Steven Rostedt --- Documentation/trace/ftrace.txt | 83 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index 6f51fed45f2d..53d6a3c51d87 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -1842,6 +1842,89 @@ an error. # cat buffer_size_kb 85 +Snapshot +-------- +CONFIG_TRACER_SNAPSHOT makes a generic snapshot feature +available to all non latency tracers. (Latency tracers which +record max latency, such as "irqsoff" or "wakeup", can't use +this feature, since those are already using the snapshot +mechanism internally.) + +Snapshot preserves a current trace buffer at a particular point +in time without stopping tracing. Ftrace swaps the current +buffer with a spare buffer, and tracing continues in the new +current (=previous spare) buffer. + +The following debugfs files in "tracing" are related to this +feature: + + snapshot: + + This is used to take a snapshot and to read the output + of the snapshot. Echo 1 into this file to allocate a + spare buffer and to take a snapshot (swap), then read + the snapshot from this file in the same format as + "trace" (described above in the section "The File + System"). Both reads snapshot and tracing are executable + in parallel. When the spare buffer is allocated, echoing + 0 frees it, and echoing else (positive) values clear the + snapshot contents. + More details are shown in the table below. + + status\input | 0 | 1 | else | + --------------+------------+------------+------------+ + not allocated |(do nothing)| alloc+swap | EINVAL | + --------------+------------+------------+------------+ + allocated | free | swap | clear | + --------------+------------+------------+------------+ + +Here is an example of using the snapshot feature. + + # echo 1 > events/sched/enable + # echo 1 > snapshot + # cat snapshot +# tracer: nop +# +# entries-in-buffer/entries-written: 71/71 #P:8 +# +# _-----=> irqs-off +# / _----=> need-resched +# | / _---=> hardirq/softirq +# || / _--=> preempt-depth +# ||| / delay +# TASK-PID CPU# |||| TIMESTAMP FUNCTION +# | | | |||| | | + -0 [005] d... 2440.603828: sched_switch: prev_comm=swapper/5 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=snapshot-test-2 next_pid=2242 next_prio=120 + sleep-2242 [005] d... 2440.603846: sched_switch: prev_comm=snapshot-test-2 prev_pid=2242 prev_prio=120 prev_state=R ==> next_comm=kworker/5:1 next_pid=60 next_prio=120 +[...] + -0 [002] d... 2440.707230: sched_switch: prev_comm=swapper/2 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=snapshot-test-2 next_pid=2229 next_prio=120 + + # cat trace +# tracer: nop +# +# entries-in-buffer/entries-written: 77/77 #P:8 +# +# _-----=> irqs-off +# / _----=> need-resched +# | / _---=> hardirq/softirq +# || / _--=> preempt-depth +# ||| / delay +# TASK-PID CPU# |||| TIMESTAMP FUNCTION +# | | | |||| | | + -0 [007] d... 2440.707395: sched_switch: prev_comm=swapper/7 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=snapshot-test-2 next_pid=2243 next_prio=120 + snapshot-test-2-2229 [002] d... 2440.707438: sched_switch: prev_comm=snapshot-test-2 prev_pid=2229 prev_prio=120 prev_state=S ==> next_comm=swapper/2 next_pid=0 next_prio=120 +[...] + + +If you try to use this snapshot feature when current tracer is +one of the latency tracers, you will get the following results. + + # echo wakeup > current_tracer + # echo 1 > snapshot +bash: echo: write error: Device or resource busy + # cat snapshot +cat: snapshot: Device or resource busy + ----------- More details can be found in the source code, in the -- cgit v1.2.3-55-g7522 From d840f718d28715a9833c1a8f46c2493ff3fd219b Mon Sep 17 00:00:00 2001 From: Steven Rostedt (Red Hat) Date: Fri, 1 Feb 2013 18:38:47 -0500 Subject: tracing: Init current_trace to nop_trace and remove NULL checks On early boot up, when the ftrace ring buffer is initialized, the static variable current_trace is initialized to &nop_trace. Before this initialization, current_trace is NULL and will never become NULL again. It is always reassigned to a ftrace tracer. Several places check if current_trace is NULL before it uses it, and this check is frivolous, because at the point in time when the checks are made the only way current_trace could be NULL is if ftrace failed its allocations at boot up, and the paths to these locations would probably not be possible. By initializing current_trace to &nop_trace where it is declared, current_trace will never be NULL, and we can remove all these checks of current_trace being NULL which never needed to be checked in the first place. Cc: Dan Carpenter Cc: Hiraku Toyooka Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 70dce64b9ecf..5d520b7bb4c5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -249,7 +249,7 @@ static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT; static struct tracer *trace_types __read_mostly; /* current_trace points to the tracer that is currently active */ -static struct tracer *current_trace __read_mostly; +static struct tracer *current_trace __read_mostly = &nop_trace; /* * trace_types_lock is used to protect the trace_types list. @@ -2100,8 +2100,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) unsigned long total; const char *name = "preemption"; - if (type) - name = type->name; + name = type->name; get_total_entries(tr, &total, &entries); @@ -2477,13 +2476,12 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) if (!iter->trace) goto fail; - if (current_trace) - *iter->trace = *current_trace; + *iter->trace = *current_trace; if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL)) goto fail; - if ((current_trace && current_trace->print_max) || snapshot) + if (current_trace->print_max || snapshot) iter->tr = &max_tr; else iter->tr = &global_trace; @@ -3037,10 +3035,7 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf, int r; mutex_lock(&trace_types_lock); - if (current_trace) - r = sprintf(buf, "%s\n", current_trace->name); - else - r = sprintf(buf, "\n"); + r = sprintf(buf, "%s\n", current_trace->name); mutex_unlock(&trace_types_lock); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); @@ -3231,10 +3226,10 @@ static int tracing_set_tracer(const char *buf) goto out; trace_branch_disable(); - if (current_trace && current_trace->reset) + if (current_trace->reset) current_trace->reset(tr); - had_max_tr = current_trace && current_trace->allocated_snapshot; + had_max_tr = current_trace->allocated_snapshot; current_trace = &nop_trace; if (had_max_tr && !t->use_max_tr) { @@ -3373,8 +3368,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) ret = -ENOMEM; goto fail; } - if (current_trace) - *iter->trace = *current_trace; + *iter->trace = *current_trace; if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { ret = -ENOMEM; @@ -3525,7 +3519,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, /* copy the tracer to avoid using a global lock all around */ mutex_lock(&trace_types_lock); - if (unlikely(current_trace && iter->trace->name != current_trace->name)) + if (unlikely(iter->trace->name != current_trace->name)) *iter->trace = *current_trace; mutex_unlock(&trace_types_lock); @@ -3691,7 +3685,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, /* copy the tracer to avoid using a global lock all around */ mutex_lock(&trace_types_lock); - if (unlikely(current_trace && iter->trace->name != current_trace->name)) + if (unlikely(iter->trace->name != current_trace->name)) *iter->trace = *current_trace; mutex_unlock(&trace_types_lock); @@ -4115,7 +4109,7 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, mutex_lock(&trace_types_lock); - if (current_trace && current_trace->use_max_tr) { + if (current_trace->use_max_tr) { ret = -EBUSY; goto out; } @@ -5299,7 +5293,7 @@ __init static int tracer_alloc_buffers(void) init_irq_work(&trace_work_wakeup, trace_wake_up); register_tracer(&nop_trace); - current_trace = &nop_trace; + /* All seems OK, enable tracing */ tracing_disabled = 0; -- cgit v1.2.3-55-g7522