From c5dfd78eb79851e278b7973031b9ca363da87a7e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 21 Apr 2016 12:28:50 -0300 Subject: perf core: Allow setting up max frame stack depth via sysctl The default remains 127, which is good for most cases, and not even hit most of the time, but then for some cases, as reported by Brendan, 1024+ deep frames are appearing on the radar for things like groovy, ruby. And in some workloads putting a _lower_ cap on this may make sense. One that is per event still needs to be put in place tho. The new file is: # cat /proc/sys/kernel/perf_event_max_stack 127 Chaging it: # echo 256 > /proc/sys/kernel/perf_event_max_stack # cat /proc/sys/kernel/perf_event_max_stack 256 But as soon as there is some event using callchains we get: # echo 512 > /proc/sys/kernel/perf_event_max_stack -bash: echo: write error: Device or resource busy # Because we only allocate the callchain percpu data structures when there is a user, which allows for changing the max easily, its just a matter of having no callchain users at that point. Reported-and-Tested-by: Brendan Gregg Reviewed-by: Frederic Weisbecker Acked-by: Alexei Starovoitov Acked-by: David Ahern Cc: Adrian Hunter Cc: Alexander Shishkin Cc: He Kuang Cc: Jiri Olsa Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: Wang Nan Cc: Zefan Li Link: http://lkml.kernel.org/r/20160426002928.GB16708@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- kernel/events/callchain.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) (limited to 'kernel/events/callchain.c') diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 343c22f5e867..b9325e7dcba1 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -18,6 +18,14 @@ struct callchain_cpus_entries { struct perf_callchain_entry *cpu_entries[0]; }; +int sysctl_perf_event_max_stack __read_mostly = PERF_MAX_STACK_DEPTH; + +static inline size_t perf_callchain_entry__sizeof(void) +{ + return (sizeof(struct perf_callchain_entry) + + sizeof(__u64) * sysctl_perf_event_max_stack); +} + static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]); static atomic_t nr_callchain_events; static DEFINE_MUTEX(callchain_mutex); @@ -73,7 +81,7 @@ static int alloc_callchain_buffers(void) if (!entries) return -ENOMEM; - size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS; + size = perf_callchain_entry__sizeof() * PERF_NR_CONTEXTS; for_each_possible_cpu(cpu) { entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL, @@ -147,7 +155,8 @@ static struct perf_callchain_entry *get_callchain_entry(int *rctx) cpu = smp_processor_id(); - return &entries->cpu_entries[cpu][*rctx]; + return (((void *)entries->cpu_entries[cpu]) + + (*rctx * perf_callchain_entry__sizeof())); } static void @@ -215,3 +224,25 @@ exit_put: return entry; } + +int perf_event_max_stack_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int new_value = sysctl_perf_event_max_stack, ret; + struct ctl_table new_table = *table; + + new_table.data = &new_value; + ret = proc_dointvec_minmax(&new_table, write, buffer, lenp, ppos); + if (ret || !write) + return ret; + + mutex_lock(&callchain_mutex); + if (atomic_read(&nr_callchain_events)) + ret = -EBUSY; + else + sysctl_perf_event_max_stack = new_value; + + mutex_unlock(&callchain_mutex); + + return ret; +} -- cgit v1.2.3-55-g7522