summaryrefslogtreecommitdiffstats
path: root/kernel/perf_counter.c
diff options
context:
space:
mode:
authorIngo Molnar2008-12-21 14:43:25 +0100
committerIngo Molnar2008-12-23 12:45:23 +0100
commit235c7fc7c500e4fd1700c4ad01b5612bcdc1b449 (patch)
tree837db278456caa0eb4720afdc36adf47e7dd542f /kernel/perf_counter.c
parentperfcounters: remove ->nr_inherited (diff)
downloadkernel-qcow2-linux-235c7fc7c500e4fd1700c4ad01b5612bcdc1b449.tar.gz
kernel-qcow2-linux-235c7fc7c500e4fd1700c4ad01b5612bcdc1b449.tar.xz
kernel-qcow2-linux-235c7fc7c500e4fd1700c4ad01b5612bcdc1b449.zip
perfcounters: generalize the counter scheduler
Impact: clean up and refactor code refactor the counter scheduler: separate out in/out functions and introduce a counter-rotation function as well. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/perf_counter.c')
-rw-r--r--kernel/perf_counter.c220
1 files changed, 142 insertions, 78 deletions
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 48e1dbcdc1cd..d7a79f321b1c 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -111,11 +111,12 @@ static void __perf_counter_remove_from_context(void *info)
spin_lock(&ctx->lock);
if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
- counter->hw_ops->disable(counter);
counter->state = PERF_COUNTER_STATE_INACTIVE;
+ counter->hw_ops->disable(counter);
ctx->nr_active--;
cpuctx->active_oncpu--;
counter->task = NULL;
+ counter->oncpu = -1;
}
ctx->nr_counters--;
@@ -192,8 +193,36 @@ retry:
spin_unlock_irq(&ctx->lock);
}
+static int
+counter_sched_in(struct perf_counter *counter,
+ struct perf_cpu_context *cpuctx,
+ struct perf_counter_context *ctx,
+ int cpu)
+{
+ if (counter->state == PERF_COUNTER_STATE_OFF)
+ return 0;
+
+ counter->state = PERF_COUNTER_STATE_ACTIVE;
+ counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */
+ /*
+ * The new state must be visible before we turn it on in the hardware:
+ */
+ smp_wmb();
+
+ if (counter->hw_ops->enable(counter)) {
+ counter->state = PERF_COUNTER_STATE_INACTIVE;
+ counter->oncpu = -1;
+ return -EAGAIN;
+ }
+
+ cpuctx->active_oncpu++;
+ ctx->nr_active++;
+
+ return 0;
+}
+
/*
- * Cross CPU call to install and enable a preformance counter
+ * Cross CPU call to install and enable a performance counter
*/
static void __perf_install_in_context(void *info)
{
@@ -220,22 +249,17 @@ static void __perf_install_in_context(void *info)
* counters on a global level. NOP for non NMI based counters.
*/
perf_flags = hw_perf_save_disable();
- list_add_counter(counter, ctx);
- hw_perf_restore(perf_flags);
+ list_add_counter(counter, ctx);
ctx->nr_counters++;
- if (cpuctx->active_oncpu < perf_max_counters) {
- counter->state = PERF_COUNTER_STATE_ACTIVE;
- counter->oncpu = cpu;
- ctx->nr_active++;
- cpuctx->active_oncpu++;
- counter->hw_ops->enable(counter);
- }
+ counter_sched_in(counter, cpuctx, ctx, cpu);
if (!ctx->task && cpuctx->max_pertask)
cpuctx->max_pertask--;
+ hw_perf_restore(perf_flags);
+
spin_unlock(&ctx->lock);
curr_rq_unlock_irq_restore(&flags);
}
@@ -302,8 +326,8 @@ counter_sched_out(struct perf_counter *counter,
if (counter->state != PERF_COUNTER_STATE_ACTIVE)
return;
- counter->hw_ops->disable(counter);
counter->state = PERF_COUNTER_STATE_INACTIVE;
+ counter->hw_ops->disable(counter);
counter->oncpu = -1;
cpuctx->active_oncpu--;
@@ -326,6 +350,22 @@ group_sched_out(struct perf_counter *group_counter,
counter_sched_out(counter, cpuctx, ctx);
}
+void __perf_counter_sched_out(struct perf_counter_context *ctx,
+ struct perf_cpu_context *cpuctx)
+{
+ struct perf_counter *counter;
+
+ if (likely(!ctx->nr_counters))
+ return;
+
+ spin_lock(&ctx->lock);
+ if (ctx->nr_active) {
+ list_for_each_entry(counter, &ctx->counter_list, list_entry)
+ group_sched_out(counter, cpuctx, ctx);
+ }
+ spin_unlock(&ctx->lock);
+}
+
/*
* Called from scheduler to remove the counters of the current task,
* with interrupts disabled.
@@ -341,39 +381,18 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu)
{
struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
struct perf_counter_context *ctx = &task->perf_counter_ctx;
- struct perf_counter *counter;
if (likely(!cpuctx->task_ctx))
return;
- spin_lock(&ctx->lock);
- if (ctx->nr_active) {
- list_for_each_entry(counter, &ctx->counter_list, list_entry)
- group_sched_out(counter, cpuctx, ctx);
- }
- spin_unlock(&ctx->lock);
+ __perf_counter_sched_out(ctx, cpuctx);
+
cpuctx->task_ctx = NULL;
}
-static int
-counter_sched_in(struct perf_counter *counter,
- struct perf_cpu_context *cpuctx,
- struct perf_counter_context *ctx,
- int cpu)
+static void perf_counter_cpu_sched_out(struct perf_cpu_context *cpuctx)
{
- if (counter->state == PERF_COUNTER_STATE_OFF)
- return 0;
-
- if (counter->hw_ops->enable(counter))
- return -EAGAIN;
-
- counter->state = PERF_COUNTER_STATE_ACTIVE;
- counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */
-
- cpuctx->active_oncpu++;
- ctx->nr_active++;
-
- return 0;
+ __perf_counter_sched_out(&cpuctx->ctx, cpuctx);
}
static int
@@ -416,21 +435,10 @@ group_error:
return -EAGAIN;
}
-/*
- * Called from scheduler to add the counters of the current task
- * with interrupts disabled.
- *
- * We restore the counter value and then enable it.
- *
- * This does not protect us against NMI, but enable()
- * sets the enabled bit in the control field of counter _before_
- * accessing the counter control register. If a NMI hits, then it will
- * keep the counter running.
- */
-void perf_counter_task_sched_in(struct task_struct *task, int cpu)
+static void
+__perf_counter_sched_in(struct perf_counter_context *ctx,
+ struct perf_cpu_context *cpuctx, int cpu)
{
- struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
- struct perf_counter_context *ctx = &task->perf_counter_ctx;
struct perf_counter *counter;
if (likely(!ctx->nr_counters))
@@ -453,10 +461,35 @@ void perf_counter_task_sched_in(struct task_struct *task, int cpu)
break;
}
spin_unlock(&ctx->lock);
+}
+/*
+ * Called from scheduler to add the counters of the current task
+ * with interrupts disabled.
+ *
+ * We restore the counter value and then enable it.
+ *
+ * This does not protect us against NMI, but enable()
+ * sets the enabled bit in the control field of counter _before_
+ * accessing the counter control register. If a NMI hits, then it will
+ * keep the counter running.
+ */
+void perf_counter_task_sched_in(struct task_struct *task, int cpu)
+{
+ struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+ struct perf_counter_context *ctx = &task->perf_counter_ctx;
+
+ __perf_counter_sched_in(ctx, cpuctx, cpu);
cpuctx->task_ctx = ctx;
}
+static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
+{
+ struct perf_counter_context *ctx = &cpuctx->ctx;
+
+ __perf_counter_sched_in(ctx, cpuctx, cpu);
+}
+
int perf_counter_task_disable(void)
{
struct task_struct *curr = current;
@@ -514,6 +547,8 @@ int perf_counter_task_enable(void)
/* force the update of the task clock: */
__task_delta_exec(curr, 1);
+ perf_counter_task_sched_out(curr, cpu);
+
spin_lock(&ctx->lock);
/*
@@ -538,19 +573,18 @@ int perf_counter_task_enable(void)
return 0;
}
-void perf_counter_task_tick(struct task_struct *curr, int cpu)
+/*
+ * Round-robin a context's counters:
+ */
+static void rotate_ctx(struct perf_counter_context *ctx)
{
- struct perf_counter_context *ctx = &curr->perf_counter_ctx;
struct perf_counter *counter;
u64 perf_flags;
- if (likely(!ctx->nr_counters))
+ if (!ctx->nr_counters)
return;
- perf_counter_task_sched_out(curr, cpu);
-
spin_lock(&ctx->lock);
-
/*
* Rotate the first entry last (works just fine for group counters too):
*/
@@ -563,7 +597,24 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
hw_perf_restore(perf_flags);
spin_unlock(&ctx->lock);
+}
+
+void perf_counter_task_tick(struct task_struct *curr, int cpu)
+{
+ struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+ struct perf_counter_context *ctx = &curr->perf_counter_ctx;
+ const int rotate_percpu = 0;
+
+ if (rotate_percpu)
+ perf_counter_cpu_sched_out(cpuctx);
+ perf_counter_task_sched_out(curr, cpu);
+ if (rotate_percpu)
+ rotate_ctx(&cpuctx->ctx);
+ rotate_ctx(ctx);
+
+ if (rotate_percpu)
+ perf_counter_cpu_sched_in(cpuctx, cpu);
perf_counter_task_sched_in(curr, cpu);
}
@@ -905,8 +956,6 @@ static u64 task_clock_perf_counter_val(struct perf_counter *counter, int update)
struct task_struct *curr = counter->task;
u64 delta;
- WARN_ON_ONCE(counter->task != current);
-
delta = __task_delta_exec(curr, update);
return curr->se.sum_exec_runtime + delta;
@@ -1160,6 +1209,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
counter->group_leader = group_leader;
counter->hw_ops = NULL;
+ counter->state = PERF_COUNTER_STATE_INACTIVE;
if (hw_event->disabled)
counter->state = PERF_COUNTER_STATE_OFF;
@@ -1331,35 +1381,49 @@ __perf_counter_exit_task(struct task_struct *child,
{
struct perf_counter *parent_counter;
u64 parent_val, child_val;
- unsigned long flags;
- u64 perf_flags;
/*
- * Disable and unlink this counter.
- *
- * Be careful about zapping the list - IRQ/NMI context
- * could still be processing it:
+ * If we do not self-reap then we have to wait for the
+ * child task to unschedule (it will happen for sure),
+ * so that its counter is at its final count. (This
+ * condition triggers rarely - child tasks usually get
+ * off their CPU before the parent has a chance to
+ * get this far into the reaping action)
*/
- curr_rq_lock_irq_save(&flags);
- perf_flags = hw_perf_save_disable();
-
- if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) {
+ if (child != current) {
+ wait_task_inactive(child, 0);
+ list_del_init(&child_counter->list_entry);
+ } else {
struct perf_cpu_context *cpuctx;
+ unsigned long flags;
+ u64 perf_flags;
+
+ /*
+ * Disable and unlink this counter.
+ *
+ * Be careful about zapping the list - IRQ/NMI context
+ * could still be processing it:
+ */
+ curr_rq_lock_irq_save(&flags);
+ perf_flags = hw_perf_save_disable();
cpuctx = &__get_cpu_var(perf_cpu_context);
- child_counter->hw_ops->disable(child_counter);
- child_counter->state = PERF_COUNTER_STATE_INACTIVE;
- child_counter->oncpu = -1;
+ if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) {
+ child_counter->state = PERF_COUNTER_STATE_INACTIVE;
+ child_counter->hw_ops->disable(child_counter);
+ cpuctx->active_oncpu--;
+ child_ctx->nr_active--;
+ child_counter->oncpu = -1;
+ }
- cpuctx->active_oncpu--;
- child_ctx->nr_active--;
- }
+ list_del_init(&child_counter->list_entry);
- list_del_init(&child_counter->list_entry);
+ child_ctx->nr_counters--;
- hw_perf_restore(perf_flags);
- curr_rq_unlock_irq_restore(&flags);
+ hw_perf_restore(perf_flags);
+ curr_rq_unlock_irq_restore(&flags);
+ }
parent_counter = child_counter->parent;
/*