summaryrefslogtreecommitdiffstats
path: root/mm/vmstat.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmstat.c')
-rw-r--r--mm/vmstat.c169
1 files changed, 162 insertions, 7 deletions
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 9a4441bbeef2..4bb13e72ac97 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -30,6 +30,8 @@
#include "internal.h"
+#define NUMA_STATS_THRESHOLD (U16_MAX - 2)
+
#ifdef CONFIG_VM_EVENT_COUNTERS
DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
EXPORT_PER_CPU_SYMBOL(vm_event_states);
@@ -87,8 +89,10 @@ void vm_events_fold_cpu(int cpu)
* vm_stat contains the global counters
*/
atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
+atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS] __cacheline_aligned_in_smp;
atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
EXPORT_SYMBOL(vm_zone_stat);
+EXPORT_SYMBOL(vm_numa_stat);
EXPORT_SYMBOL(vm_node_stat);
#ifdef CONFIG_SMP
@@ -604,6 +608,32 @@ EXPORT_SYMBOL(dec_node_page_state);
* Fold a differential into the global counters.
* Returns the number of counters updated.
*/
+#ifdef CONFIG_NUMA
+static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff)
+{
+ int i;
+ int changes = 0;
+
+ for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
+ if (zone_diff[i]) {
+ atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
+ changes++;
+ }
+
+ for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
+ if (numa_diff[i]) {
+ atomic_long_add(numa_diff[i], &vm_numa_stat[i]);
+ changes++;
+ }
+
+ for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
+ if (node_diff[i]) {
+ atomic_long_add(node_diff[i], &vm_node_stat[i]);
+ changes++;
+ }
+ return changes;
+}
+#else
static int fold_diff(int *zone_diff, int *node_diff)
{
int i;
@@ -622,6 +652,7 @@ static int fold_diff(int *zone_diff, int *node_diff)
}
return changes;
}
+#endif /* CONFIG_NUMA */
/*
* Update the zone counters for the current cpu.
@@ -645,6 +676,9 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
struct zone *zone;
int i;
int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
+#ifdef CONFIG_NUMA
+ int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
+#endif
int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
int changes = 0;
@@ -666,6 +700,18 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
}
}
#ifdef CONFIG_NUMA
+ for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
+ int v;
+
+ v = this_cpu_xchg(p->vm_numa_stat_diff[i], 0);
+ if (v) {
+
+ atomic_long_add(v, &zone->vm_numa_stat[i]);
+ global_numa_diff[i] += v;
+ __this_cpu_write(p->expire, 3);
+ }
+ }
+
if (do_pagesets) {
cond_resched();
/*
@@ -712,7 +758,12 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
}
}
+#ifdef CONFIG_NUMA
+ changes += fold_diff(global_zone_diff, global_numa_diff,
+ global_node_diff);
+#else
changes += fold_diff(global_zone_diff, global_node_diff);
+#endif
return changes;
}
@@ -727,6 +778,9 @@ void cpu_vm_stats_fold(int cpu)
struct zone *zone;
int i;
int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
+#ifdef CONFIG_NUMA
+ int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
+#endif
int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
for_each_populated_zone(zone) {
@@ -743,6 +797,18 @@ void cpu_vm_stats_fold(int cpu)
atomic_long_add(v, &zone->vm_stat[i]);
global_zone_diff[i] += v;
}
+
+#ifdef CONFIG_NUMA
+ for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
+ if (p->vm_numa_stat_diff[i]) {
+ int v;
+
+ v = p->vm_numa_stat_diff[i];
+ p->vm_numa_stat_diff[i] = 0;
+ atomic_long_add(v, &zone->vm_numa_stat[i]);
+ global_numa_diff[i] += v;
+ }
+#endif
}
for_each_online_pgdat(pgdat) {
@@ -761,7 +827,11 @@ void cpu_vm_stats_fold(int cpu)
}
}
+#ifdef CONFIG_NUMA
+ fold_diff(global_zone_diff, global_numa_diff, global_node_diff);
+#else
fold_diff(global_zone_diff, global_node_diff);
+#endif
}
/*
@@ -779,10 +849,36 @@ void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset)
atomic_long_add(v, &zone->vm_stat[i]);
atomic_long_add(v, &vm_zone_stat[i]);
}
+
+#ifdef CONFIG_NUMA
+ for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
+ if (pset->vm_numa_stat_diff[i]) {
+ int v = pset->vm_numa_stat_diff[i];
+
+ pset->vm_numa_stat_diff[i] = 0;
+ atomic_long_add(v, &zone->vm_numa_stat[i]);
+ atomic_long_add(v, &vm_numa_stat[i]);
+ }
+#endif
}
#endif
#ifdef CONFIG_NUMA
+void __inc_numa_state(struct zone *zone,
+ enum numa_stat_item item)
+{
+ struct per_cpu_pageset __percpu *pcp = zone->pageset;
+ u16 __percpu *p = pcp->vm_numa_stat_diff + item;
+ u16 v;
+
+ v = __this_cpu_inc_return(*p);
+
+ if (unlikely(v > NUMA_STATS_THRESHOLD)) {
+ zone_numa_state_add(v, zone, item);
+ __this_cpu_write(*p, 0);
+ }
+}
+
/*
* Determine the per node value of a stat item. This function
* is called frequently in a NUMA machine, so try to be as
@@ -802,6 +898,23 @@ unsigned long sum_zone_node_page_state(int node,
}
/*
+ * Determine the per node value of a numa stat item. To avoid deviation,
+ * the per cpu stat number in vm_numa_stat_diff[] is also included.
+ */
+unsigned long sum_zone_numa_state(int node,
+ enum numa_stat_item item)
+{
+ struct zone *zones = NODE_DATA(node)->node_zones;
+ int i;
+ unsigned long count = 0;
+
+ for (i = 0; i < MAX_NR_ZONES; i++)
+ count += zone_numa_state_snapshot(zones + i, item);
+
+ return count;
+}
+
+/*
* Determine the per node value of a stat item.
*/
unsigned long node_page_state(struct pglist_data *pgdat,
@@ -870,6 +983,9 @@ static int __fragmentation_index(unsigned int order, struct contig_page_info *in
{
unsigned long requested = 1UL << order;
+ if (WARN_ON_ONCE(order >= MAX_ORDER))
+ return 0;
+
if (!info->free_blocks_total)
return 0;
@@ -934,6 +1050,9 @@ const char * const vmstat_text[] = {
#if IS_ENABLED(CONFIG_ZSMALLOC)
"nr_zspages",
#endif
+ "nr_free_cma",
+
+ /* enum numa_stat_item counters */
#ifdef CONFIG_NUMA
"numa_hit",
"numa_miss",
@@ -942,7 +1061,6 @@ const char * const vmstat_text[] = {
"numa_local",
"numa_other",
#endif
- "nr_free_cma",
/* Node-based counters */
"nr_inactive_anon",
@@ -1071,6 +1189,8 @@ const char * const vmstat_text[] = {
#endif
"thp_zero_page_alloc",
"thp_zero_page_alloc_failed",
+ "thp_swpout",
+ "thp_swpout_fallback",
#endif
#ifdef CONFIG_MEMORY_BALLOON
"balloon_inflate",
@@ -1093,11 +1213,14 @@ const char * const vmstat_text[] = {
"vmacache_find_hits",
"vmacache_full_flushes",
#endif
+#ifdef CONFIG_SWAP
+ "swap_ra",
+ "swap_ra_hit",
+#endif
#endif /* CONFIG_VM_EVENTS_COUNTERS */
};
#endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */
-
#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \
defined(CONFIG_PROC_FS)
static void *frag_start(struct seq_file *m, loff_t *pos)
@@ -1250,7 +1373,7 @@ static void pagetypeinfo_showblockcount_print(struct seq_file *m,
seq_putc(m, '\n');
}
-/* Print out the free pages at each order for each migratetype */
+/* Print out the number of pageblocks for each migratetype */
static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
{
int mtype;
@@ -1375,7 +1498,8 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
seq_printf(m, "\n per-node stats");
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
seq_printf(m, "\n %-12s %lu",
- vmstat_text[i + NR_VM_ZONE_STAT_ITEMS],
+ vmstat_text[i + NR_VM_ZONE_STAT_ITEMS +
+ NR_VM_NUMA_STAT_ITEMS],
node_page_state(pgdat, i));
}
}
@@ -1412,6 +1536,13 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
seq_printf(m, "\n %-12s %lu", vmstat_text[i],
zone_page_state(zone, i));
+#ifdef CONFIG_NUMA
+ for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
+ seq_printf(m, "\n %-12s %lu",
+ vmstat_text[i + NR_VM_ZONE_STAT_ITEMS],
+ zone_numa_state_snapshot(zone, i));
+#endif
+
seq_printf(m, "\n pagesets");
for_each_online_cpu(i) {
struct per_cpu_pageset *pageset;
@@ -1488,6 +1619,7 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)
if (*pos >= ARRAY_SIZE(vmstat_text))
return NULL;
stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) +
+ NR_VM_NUMA_STAT_ITEMS * sizeof(unsigned long) +
NR_VM_NODE_STAT_ITEMS * sizeof(unsigned long) +
NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long);
@@ -1500,9 +1632,15 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)
if (!v)
return ERR_PTR(-ENOMEM);
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
- v[i] = global_page_state(i);
+ v[i] = global_zone_page_state(i);
v += NR_VM_ZONE_STAT_ITEMS;
+#ifdef CONFIG_NUMA
+ for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
+ v[i] = global_numa_state(i);
+ v += NR_VM_NUMA_STAT_ITEMS;
+#endif
+
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
v[i] = global_node_page_state(i);
v += NR_VM_NODE_STAT_ITEMS;
@@ -1589,7 +1727,7 @@ int vmstat_refresh(struct ctl_table *table, int write,
* which can equally be echo'ed to or cat'ted from (by root),
* can be used to update the stats just before reading them.
*
- * Oh, and since global_page_state() etc. are so careful to hide
+ * Oh, and since global_zone_page_state() etc. are so careful to hide
* transiently negative values, report an error here if any of
* the stats is negative, so we know to go looking for imbalance.
*/
@@ -1604,6 +1742,16 @@ int vmstat_refresh(struct ctl_table *table, int write,
err = -EINVAL;
}
}
+#ifdef CONFIG_NUMA
+ for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
+ val = atomic_long_read(&vm_numa_stat[i]);
+ if (val < 0) {
+ pr_warn("%s: %s %ld\n",
+ __func__, vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], val);
+ err = -EINVAL;
+ }
+ }
+#endif
if (err)
return err;
if (write)
@@ -1645,13 +1793,20 @@ static bool need_update(int cpu)
struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu);
BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1);
+#ifdef CONFIG_NUMA
+ BUILD_BUG_ON(sizeof(p->vm_numa_stat_diff[0]) != 2);
+#endif
+
/*
* The fast way of checking if there are any vmstat diffs.
* This works because the diffs are byte sized items.
*/
if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS))
return true;
-
+#ifdef CONFIG_NUMA
+ if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS))
+ return true;
+#endif
}
return false;
}