summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--mm/vmstat.c141
1 files changed, 120 insertions, 21 deletions
diff --git a/mm/vmstat.c b/mm/vmstat.c
index cce7c766da7a..1b12d390dc68 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -7,6 +7,7 @@
* zoned VM statistics
* Copyright (C) 2006 Silicon Graphics, Inc.,
* Christoph Lameter <christoph@lameter.com>
+ * Copyright (C) 2008-2014 Christoph Lameter
*/
#include <linux/fs.h>
#include <linux/mm.h>
@@ -14,6 +15,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/cpu.h>
+#include <linux/cpumask.h>
#include <linux/vmstat.h>
#include <linux/sched.h>
#include <linux/math64.h>
@@ -419,13 +421,22 @@ void dec_zone_page_state(struct page *page, enum zone_stat_item item)
EXPORT_SYMBOL(dec_zone_page_state);
#endif
-static inline void fold_diff(int *diff)
+
+/*
+ * Fold a differential into the global counters.
+ * Returns the number of counters updated.
+ */
+static int fold_diff(int *diff)
{
int i;
+ int changes = 0;
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
- if (diff[i])
+ if (diff[i]) {
atomic_long_add(diff[i], &vm_stat[i]);
+ changes++;
+ }
+ return changes;
}
/*
@@ -441,12 +452,15 @@ static inline void fold_diff(int *diff)
* statistics in the remote zone struct as well as the global cachelines
* with the global counters. These could cause remote node cache line
* bouncing and will have to be only done when necessary.
+ *
+ * The function returns the number of global counters updated.
*/
-static void refresh_cpu_vm_stats(void)
+static int refresh_cpu_vm_stats(void)
{
struct zone *zone;
int i;
int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
+ int changes = 0;
for_each_populated_zone(zone) {
struct per_cpu_pageset __percpu *p = zone->pageset;
@@ -486,15 +500,17 @@ static void refresh_cpu_vm_stats(void)
continue;
}
-
if (__this_cpu_dec_return(p->expire))
continue;
- if (__this_cpu_read(p->pcp.count))
+ if (__this_cpu_read(p->pcp.count)) {
drain_zone_pages(zone, this_cpu_ptr(&p->pcp));
+ changes++;
+ }
#endif
}
- fold_diff(global_diff);
+ changes += fold_diff(global_diff);
+ return changes;
}
/*
@@ -1239,20 +1255,108 @@ static const struct file_operations proc_vmstat_file_operations = {
#ifdef CONFIG_SMP
static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
int sysctl_stat_interval __read_mostly = HZ;
+static cpumask_var_t cpu_stat_off;
static void vmstat_update(struct work_struct *w)
{
- refresh_cpu_vm_stats();
- schedule_delayed_work(this_cpu_ptr(&vmstat_work),
+ if (refresh_cpu_vm_stats())
+ /*
+ * Counters were updated so we expect more updates
+ * to occur in the future. Keep on running the
+ * update worker thread.
+ */
+ schedule_delayed_work(this_cpu_ptr(&vmstat_work),
+ round_jiffies_relative(sysctl_stat_interval));
+ else {
+ /*
+ * We did not update any counters so the app may be in
+ * a mode where it does not cause counter updates.
+ * We may be uselessly running vmstat_update.
+ * Defer the checking for differentials to the
+ * shepherd thread on a different processor.
+ */
+ int r;
+ /*
+ * Shepherd work thread does not race since it never
+ * changes the bit if its zero but the cpu
+ * online / off line code may race if
+ * worker threads are still allowed during
+ * shutdown / startup.
+ */
+ r = cpumask_test_and_set_cpu(smp_processor_id(),
+ cpu_stat_off);
+ VM_BUG_ON(r);
+ }
+}
+
+/*
+ * Check if the diffs for a certain cpu indicate that
+ * an update is needed.
+ */
+static bool need_update(int cpu)
+{
+ struct zone *zone;
+
+ for_each_populated_zone(zone) {
+ struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu);
+
+ BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1);
+ /*
+ * The fast way of checking if there are any vmstat diffs.
+ * This works because the diffs are byte sized items.
+ */
+ if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS))
+ return true;
+
+ }
+ return false;
+}
+
+
+/*
+ * Shepherd worker thread that checks the
+ * differentials of processors that have their worker
+ * threads for vm statistics updates disabled because of
+ * inactivity.
+ */
+static void vmstat_shepherd(struct work_struct *w);
+
+static DECLARE_DELAYED_WORK(shepherd, vmstat_shepherd);
+
+static void vmstat_shepherd(struct work_struct *w)
+{
+ int cpu;
+
+ get_online_cpus();
+ /* Check processors whose vmstat worker threads have been disabled */
+ for_each_cpu(cpu, cpu_stat_off)
+ if (need_update(cpu) &&
+ cpumask_test_and_clear_cpu(cpu, cpu_stat_off))
+
+ schedule_delayed_work_on(cpu, &per_cpu(vmstat_work, cpu),
+ __round_jiffies_relative(sysctl_stat_interval, cpu));
+
+ put_online_cpus();
+
+ schedule_delayed_work(&shepherd,
round_jiffies_relative(sysctl_stat_interval));
+
}
-static void start_cpu_timer(int cpu)
+static void __init start_shepherd_timer(void)
{
- struct delayed_work *work = &per_cpu(vmstat_work, cpu);
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
+ vmstat_update);
+
+ if (!alloc_cpumask_var(&cpu_stat_off, GFP_KERNEL))
+ BUG();
+ cpumask_copy(cpu_stat_off, cpu_online_mask);
- INIT_DEFERRABLE_WORK(work, vmstat_update);
- schedule_delayed_work_on(cpu, work, __round_jiffies_relative(HZ, cpu));
+ schedule_delayed_work(&shepherd,
+ round_jiffies_relative(sysctl_stat_interval));
}
static void vmstat_cpu_dead(int node)
@@ -1283,17 +1387,17 @@ static int vmstat_cpuup_callback(struct notifier_block *nfb,
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
refresh_zone_stat_thresholds();
- start_cpu_timer(cpu);
node_set_state(cpu_to_node(cpu), N_CPU);
+ cpumask_set_cpu(cpu, cpu_stat_off);
break;
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
- per_cpu(vmstat_work, cpu).work.func = NULL;
+ cpumask_clear_cpu(cpu, cpu_stat_off);
break;
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
- start_cpu_timer(cpu);
+ cpumask_set_cpu(cpu, cpu_stat_off);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
@@ -1313,15 +1417,10 @@ static struct notifier_block vmstat_notifier =
static int __init setup_vmstat(void)
{
#ifdef CONFIG_SMP
- int cpu;
-
cpu_notifier_register_begin();
__register_cpu_notifier(&vmstat_notifier);
- for_each_online_cpu(cpu) {
- start_cpu_timer(cpu);
- node_set_state(cpu_to_node(cpu), N_CPU);
- }
+ start_shepherd_timer();
cpu_notifier_register_done();
#endif
#ifdef CONFIG_PROC_FS