From 8dc85d547285668e509f86c177bcd4ea055bcaaf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 2 Sep 2010 16:50:03 +0200 Subject: perf: Multiple task contexts Provide the infrastructure for multiple task contexts. A more flexible approach would have resulted in more pointer chases in the scheduling hot-paths. This approach has the limitation of a static number of task contexts. Since I expect most external PMUs to be system wide, or at least node wide (as per the intel uncore unit) they won't actually need a task context. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Frederic Weisbecker Cc: Lin Ming Cc: Yanmin LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/sched.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 1e2a6db2d7dd..89d6023c6f82 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1160,6 +1160,12 @@ struct sched_rt_entity { struct rcu_node; +enum perf_event_task_context { + perf_invalid_context = -1, + perf_hw_context = 0, + perf_nr_task_contexts, +}; + struct task_struct { volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ void *stack; @@ -1431,7 +1437,7 @@ struct task_struct { struct futex_pi_state *pi_state_cache; #endif #ifdef CONFIG_PERF_EVENTS - struct perf_event_context *perf_event_ctxp; + struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; struct mutex perf_event_mutex; struct list_head perf_event_list; #endif -- cgit v1.2.3-55-g7522 From 89a1e18731959e9953fae15ddc1a983eb15a4f19 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 7 Sep 2010 17:34:50 +0200 Subject: perf: Provide a separate task context for swevents Since software events are always schedulable, mixing them up with hardware events (who are not) can lead to funny scheduling oddities. Giving them their own context solves this. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Frederic Weisbecker Cc: Lin Ming Cc: Yanmin LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 9 +-------- include/linux/sched.h | 1 + kernel/hw_breakpoint.c | 2 ++ kernel/perf_event.c | 40 +++++++++++++++++++++++++++++----------- 4 files changed, 33 insertions(+), 19 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 9ecfd856ce6e..c1173520f14d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -952,14 +952,7 @@ extern int perf_event_overflow(struct perf_event *event, int nmi, */ static inline int is_software_event(struct perf_event *event) { - switch (event->attr.type) { - case PERF_TYPE_SOFTWARE: - case PERF_TYPE_TRACEPOINT: - /* for now the breakpoint stuff also works as software event */ - case PERF_TYPE_BREAKPOINT: - return 1; - } - return 0; + return event->pmu->task_ctx_nr == perf_sw_context; } extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; diff --git a/include/linux/sched.h b/include/linux/sched.h index 89d6023c6f82..eb3c1ceec06e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1163,6 +1163,7 @@ struct rcu_node; enum perf_event_task_context { perf_invalid_context = -1, perf_hw_context = 0, + perf_sw_context, perf_nr_task_contexts, }; diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 6f150095cafe..3b2aaffb65f0 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -610,6 +610,8 @@ static void hw_breakpoint_stop(struct perf_event *bp, int flags) } static struct pmu perf_breakpoint = { + .task_ctx_nr = perf_sw_context, /* could eventually get its own */ + .event_init = hw_breakpoint_event_init, .add = hw_breakpoint_add, .del = hw_breakpoint_del, diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 7223ea875861..357ee8d5e8ae 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4709,6 +4709,8 @@ static int perf_swevent_init(struct perf_event *event) } static struct pmu perf_swevent = { + .task_ctx_nr = perf_sw_context, + .event_init = perf_swevent_init, .add = perf_swevent_add, .del = perf_swevent_del, @@ -4800,6 +4802,8 @@ static int perf_tp_event_init(struct perf_event *event) } static struct pmu perf_tracepoint = { + .task_ctx_nr = perf_sw_context, + .event_init = perf_tp_event_init, .add = perf_trace_add, .del = perf_trace_del, @@ -4988,6 +4992,8 @@ static int cpu_clock_event_init(struct perf_event *event) } static struct pmu perf_cpu_clock = { + .task_ctx_nr = perf_sw_context, + .event_init = cpu_clock_event_init, .add = cpu_clock_event_add, .del = cpu_clock_event_del, @@ -5063,6 +5069,8 @@ static int task_clock_event_init(struct perf_event *event) } static struct pmu perf_task_clock = { + .task_ctx_nr = perf_sw_context, + .event_init = task_clock_event_init, .add = task_clock_event_add, .del = task_clock_event_del, @@ -5490,6 +5498,7 @@ SYSCALL_DEFINE5(perf_event_open, struct perf_event_context *ctx; struct file *event_file = NULL; struct file *group_file = NULL; + struct pmu *pmu; int event_fd; int fput_needed = 0; int err; @@ -5522,20 +5531,11 @@ SYSCALL_DEFINE5(perf_event_open, goto err_fd; } - /* - * Get the target context (task or percpu): - */ - ctx = find_get_context(event->pmu, pid, cpu); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto err_alloc; - } - if (group_fd != -1) { group_leader = perf_fget_light(group_fd, &fput_needed); if (IS_ERR(group_leader)) { err = PTR_ERR(group_leader); - goto err_context; + goto err_alloc; } group_file = group_leader->filp; if (flags & PERF_FLAG_FD_OUTPUT) @@ -5544,6 +5544,23 @@ SYSCALL_DEFINE5(perf_event_open, group_leader = NULL; } + /* + * Special case software events and allow them to be part of + * any hardware group. + */ + pmu = event->pmu; + if ((pmu->task_ctx_nr == perf_sw_context) && group_leader) + pmu = group_leader->pmu; + + /* + * Get the target context (task or percpu): + */ + ctx = find_get_context(pmu, pid, cpu); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto err_group_fd; + } + /* * Look up the group leader (we will attach this event to it): */ @@ -5605,8 +5622,9 @@ SYSCALL_DEFINE5(perf_event_open, return event_fd; err_context: - fput_light(group_file, fput_needed); put_ctx(ctx); +err_group_fd: + fput_light(group_file, fput_needed); err_alloc: free_event(event); err_fd: -- cgit v1.2.3-55-g7522