summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/bounce.c4
-rw-r--r--mm/memory.c2
-rw-r--r--mm/memory_hotplug.c6
-rw-r--r--mm/mempolicy.c4
-rw-r--r--mm/oom_kill.c19
-rw-r--r--mm/page-writeback.c45
-rw-r--r--mm/page_alloc.c32
-rw-r--r--mm/rmap.c2
-rw-r--r--mm/slab.c4
-rw-r--r--mm/slob.c11
-rw-r--r--mm/swapfile.c8
-rw-r--r--mm/truncate.c24
-rw-r--r--mm/vmscan.c35
13 files changed, 139 insertions, 57 deletions
diff --git a/mm/bounce.c b/mm/bounce.c
index e4b62d2a4024..643efbe82402 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -237,6 +237,8 @@ static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig,
if (!bio)
return;
+ blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE);
+
/*
* at least one page was bounced, fill in possible non-highmem
* pages
@@ -291,8 +293,6 @@ void blk_queue_bounce(request_queue_t *q, struct bio **bio_orig)
pool = isa_page_pool;
}
- blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE);
-
/*
* slow path
*/
diff --git a/mm/memory.c b/mm/memory.c
index 563792f4f687..af227d26e104 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1091,7 +1091,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
if (pages) {
pages[i] = page;
- flush_anon_page(page, start);
+ flush_anon_page(vma, page, start);
flush_dcache_page(page);
}
if (vmas)
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 0c055a090f4d..84279127fcd3 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -67,11 +67,13 @@ static int __add_zone(struct zone *zone, unsigned long phys_start_pfn)
zone_type = zone - pgdat->node_zones;
if (!populated_zone(zone)) {
int ret = 0;
- ret = init_currently_empty_zone(zone, phys_start_pfn, nr_pages);
+ ret = init_currently_empty_zone(zone, phys_start_pfn,
+ nr_pages, MEMMAP_HOTPLUG);
if (ret < 0)
return ret;
}
- memmap_init_zone(nr_pages, nid, zone_type, phys_start_pfn);
+ memmap_init_zone(nr_pages, nid, zone_type,
+ phys_start_pfn, MEMMAP_HOTPLUG);
return 0;
}
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index da9463946556..c2aec0e1090d 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -884,6 +884,10 @@ asmlinkage long sys_mbind(unsigned long start, unsigned long len,
err = get_nodes(&nodes, nmask, maxnode);
if (err)
return err;
+#ifdef CONFIG_CPUSETS
+ /* Restrict the nodes to the allowed nodes in the cpuset */
+ nodes_and(nodes, nodes, current->mems_allowed);
+#endif
return do_mbind(start, len, mode, &nodes, flags);
}
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 64cf3c214634..b278b8d60eee 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -61,12 +61,6 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
}
/*
- * swapoff can easily use up all memory, so kill those first.
- */
- if (p->flags & PF_SWAPOFF)
- return ULONG_MAX;
-
- /*
* The memory size of the process is the basis for the badness.
*/
points = mm->total_vm;
@@ -77,6 +71,12 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
task_unlock(p);
/*
+ * swapoff can easily use up all memory, so kill those first.
+ */
+ if (p->flags & PF_SWAPOFF)
+ return ULONG_MAX;
+
+ /*
* Processes which fork a lot of child processes are likely
* a good choice. We add half the vmsize of the children if they
* have an own mm. This prevents forking servers to flood the
@@ -174,7 +174,12 @@ static inline int constrained_alloc(struct zonelist *zonelist, gfp_t gfp_mask)
{
#ifdef CONFIG_NUMA
struct zone **z;
- nodemask_t nodes = node_online_map;
+ nodemask_t nodes;
+ int node;
+ /* node has memory ? */
+ for_each_online_node(node)
+ if (NODE_DATA(node)->node_present_pages)
+ node_set(node, nodes);
for (z = zonelist->zones; *z; z++)
if (cpuset_zone_allowed_softwall(*z, gfp_mask))
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index b3a198c9248d..1d2fc89ca56d 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -862,17 +862,46 @@ int clear_page_dirty_for_io(struct page *page)
{
struct address_space *mapping = page_mapping(page);
- if (!mapping)
- return TestClearPageDirty(page);
-
- if (TestClearPageDirty(page)) {
- if (mapping_cap_account_dirty(mapping)) {
- page_mkclean(page);
+ if (mapping && mapping_cap_account_dirty(mapping)) {
+ /*
+ * Yes, Virginia, this is indeed insane.
+ *
+ * We use this sequence to make sure that
+ * (a) we account for dirty stats properly
+ * (b) we tell the low-level filesystem to
+ * mark the whole page dirty if it was
+ * dirty in a pagetable. Only to then
+ * (c) clean the page again and return 1 to
+ * cause the writeback.
+ *
+ * This way we avoid all nasty races with the
+ * dirty bit in multiple places and clearing
+ * them concurrently from different threads.
+ *
+ * Note! Normally the "set_page_dirty(page)"
+ * has no effect on the actual dirty bit - since
+ * that will already usually be set. But we
+ * need the side effects, and it can help us
+ * avoid races.
+ *
+ * We basically use the page "master dirty bit"
+ * as a serialization point for all the different
+ * threads doing their things.
+ *
+ * FIXME! We still have a race here: if somebody
+ * adds the page back to the page tables in
+ * between the "page_mkclean()" and the "TestClearPageDirty()",
+ * we might have it mapped without the dirty bit set.
+ */
+ if (page_mkclean(page))
+ set_page_dirty(page);
+ if (TestClearPageDirty(page)) {
dec_zone_page_state(page, NR_FILE_DIRTY);
+ return 1;
}
- return 1;
+ return 0;
}
- return 0;
+ return TestClearPageDirty(page);
}
EXPORT_SYMBOL(clear_page_dirty_for_io);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8c1a116875bc..fc5b5442e942 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -711,6 +711,9 @@ static void __drain_pages(unsigned int cpu)
for_each_zone(zone) {
struct per_cpu_pageset *pset;
+ if (!populated_zone(zone))
+ continue;
+
pset = zone_pcp(zone, cpu);
for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
struct per_cpu_pages *pcp;
@@ -1953,17 +1956,24 @@ static inline unsigned long wait_table_bits(unsigned long size)
* done. Non-atomic initialization, single-pass.
*/
void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
- unsigned long start_pfn)
+ unsigned long start_pfn, enum memmap_context context)
{
struct page *page;
unsigned long end_pfn = start_pfn + size;
unsigned long pfn;
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
- if (!early_pfn_valid(pfn))
- continue;
- if (!early_pfn_in_nid(pfn, nid))
- continue;
+ /*
+ * There can be holes in boot-time mem_map[]s
+ * handed to this function. They do not
+ * exist on hotplugged memory.
+ */
+ if (context == MEMMAP_EARLY) {
+ if (!early_pfn_valid(pfn))
+ continue;
+ if (!early_pfn_in_nid(pfn, nid))
+ continue;
+ }
page = pfn_to_page(pfn);
set_page_links(page, zone, nid, pfn);
init_page_count(page);
@@ -1990,7 +2000,7 @@ void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone,
#ifndef __HAVE_ARCH_MEMMAP_INIT
#define memmap_init(size, nid, zone, start_pfn) \
- memmap_init_zone((size), (nid), (zone), (start_pfn))
+ memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY)
#endif
static int __cpuinit zone_batchsize(struct zone *zone)
@@ -2236,7 +2246,8 @@ static __meminit void zone_pcp_init(struct zone *zone)
__meminit int init_currently_empty_zone(struct zone *zone,
unsigned long zone_start_pfn,
- unsigned long size)
+ unsigned long size,
+ enum memmap_context context)
{
struct pglist_data *pgdat = zone->zone_pgdat;
int ret;
@@ -2680,7 +2691,8 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
if (!size)
continue;
- ret = init_currently_empty_zone(zone, zone_start_pfn, size);
+ ret = init_currently_empty_zone(zone, zone_start_pfn,
+ size, MEMMAP_EARLY);
BUG_ON(ret);
zone_start_pfn += size;
}
@@ -3321,6 +3333,10 @@ void *__init alloc_large_system_hash(const char *tablename,
numentries >>= (scale - PAGE_SHIFT);
else
numentries <<= (PAGE_SHIFT - scale);
+
+ /* Make sure we've got at least a 0-order allocation.. */
+ if (unlikely((numentries * bucketsize) < PAGE_SIZE))
+ numentries = PAGE_SIZE / bucketsize;
}
numentries = roundup_pow_of_two(numentries);
diff --git a/mm/rmap.c b/mm/rmap.c
index 57306fa0114d..669acb22b572 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -452,7 +452,7 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma)
entry = ptep_clear_flush(vma, address, pte);
entry = pte_wrprotect(entry);
entry = pte_mkclean(entry);
- set_pte_at(vma, address, pte, entry);
+ set_pte_at(mm, address, pte, entry);
lazy_mmu_prot_update(entry);
ret = 1;
}
diff --git a/mm/slab.c b/mm/slab.c
index 0d4e57431de4..c6100628a6ef 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3281,7 +3281,7 @@ retry:
flags | GFP_THISNODE, nid);
}
- if (!obj) {
+ if (!obj && !(flags & __GFP_NO_GROW)) {
/*
* This allocation will be performed within the constraints
* of the current cpuset / memory policy requirements.
@@ -3310,7 +3310,7 @@ retry:
*/
goto retry;
} else {
- kmem_freepages(cache, obj);
+ /* cache_grow already freed obj */
obj = NULL;
}
}
diff --git a/mm/slob.c b/mm/slob.c
index 2e9236e10ed1..5adc29cb58dd 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -60,6 +60,8 @@ static DEFINE_SPINLOCK(slob_lock);
static DEFINE_SPINLOCK(block_lock);
static void slob_free(void *b, int size);
+static void slob_timer_cbk(void);
+
static void *slob_alloc(size_t size, gfp_t gfp, int align)
{
@@ -326,7 +328,7 @@ const char *kmem_cache_name(struct kmem_cache *c)
EXPORT_SYMBOL(kmem_cache_name);
static struct timer_list slob_timer = TIMER_INITIALIZER(
- (void (*)(unsigned long))kmem_cache_init, 0, 0);
+ (void (*)(unsigned long))slob_timer_cbk, 0, 0);
int kmem_cache_shrink(struct kmem_cache *d)
{
@@ -339,7 +341,12 @@ int kmem_ptr_validate(struct kmem_cache *a, const void *b)
return 0;
}
-void kmem_cache_init(void)
+void __init kmem_cache_init(void)
+{
+ slob_timer_cbk();
+}
+
+static void slob_timer_cbk(void)
{
void *p = slob_alloc(PAGE_SIZE, 0, PAGE_SIZE-1);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index b9fc0e5de6d5..a2d9bb4e80df 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -434,7 +434,7 @@ void free_swap_and_cache(swp_entry_t entry)
*
* This is needed for the suspend to disk (aka swsusp).
*/
-int swap_type_of(dev_t device, sector_t offset)
+int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
{
struct block_device *bdev = NULL;
int i;
@@ -450,6 +450,9 @@ int swap_type_of(dev_t device, sector_t offset)
continue;
if (!bdev) {
+ if (bdev_p)
+ *bdev_p = sis->bdev;
+
spin_unlock(&swap_lock);
return i;
}
@@ -459,6 +462,9 @@ int swap_type_of(dev_t device, sector_t offset)
se = list_entry(sis->extent_list.next,
struct swap_extent, list);
if (se->start_block == offset) {
+ if (bdev_p)
+ *bdev_p = sis->bdev;
+
spin_unlock(&swap_lock);
bdput(bdev);
return i;
diff --git a/mm/truncate.c b/mm/truncate.c
index 4a38dd1a4ce8..6c79ca4a1ca7 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -60,12 +60,16 @@ void cancel_dirty_page(struct page *page, unsigned int account_size)
WARN_ON(++warncount < 5);
}
- if (TestClearPageDirty(page) && account_size &&
- mapping_cap_account_dirty(page->mapping)) {
- dec_zone_page_state(page, NR_FILE_DIRTY);
- task_io_account_cancelled_write(account_size);
+ if (TestClearPageDirty(page)) {
+ struct address_space *mapping = page->mapping;
+ if (mapping && mapping_cap_account_dirty(mapping)) {
+ dec_zone_page_state(page, NR_FILE_DIRTY);
+ if (account_size)
+ task_io_account_cancelled_write(account_size);
+ }
}
}
+EXPORT_SYMBOL(cancel_dirty_page);
/*
* If truncate cannot remove the fs-private metadata from the page, the page
@@ -337,6 +341,15 @@ failed:
return 0;
}
+static int do_launder_page(struct address_space *mapping, struct page *page)
+{
+ if (!PageDirty(page))
+ return 0;
+ if (page->mapping != mapping || mapping->a_ops->launder_page == NULL)
+ return 0;
+ return mapping->a_ops->launder_page(page);
+}
+
/**
* invalidate_inode_pages2_range - remove range of pages from an address_space
* @mapping: the address_space
@@ -401,7 +414,8 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
PAGE_CACHE_SIZE, 0);
}
}
- if (!invalidate_complete_page2(mapping, page))
+ ret = do_launder_page(mapping, page);
+ if (ret == 0 && !invalidate_complete_page2(mapping, page))
ret = -EIO;
unlock_page(page);
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 63eb9ab0032b..7430df68cb64 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -692,7 +692,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
__count_vm_events(KSWAPD_STEAL, nr_freed);
} else
__count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scan);
- __count_vm_events(PGACTIVATE, nr_freed);
+ __count_zone_vm_events(PGSTEAL, zone, nr_freed);
if (nr_taken == 0)
goto done;
@@ -1406,6 +1406,16 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
return ret;
}
+static unsigned long count_lru_pages(void)
+{
+ struct zone *zone;
+ unsigned long ret = 0;
+
+ for_each_zone(zone)
+ ret += zone->nr_active + zone->nr_inactive;
+ return ret;
+}
+
/*
* Try to free `nr_pages' of memory, system-wide, and return the number of
* freed pages.
@@ -1420,7 +1430,6 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
unsigned long ret = 0;
int pass;
struct reclaim_state reclaim_state;
- struct zone *zone;
struct scan_control sc = {
.gfp_mask = GFP_KERNEL,
.may_swap = 0,
@@ -1431,10 +1440,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
current->reclaim_state = &reclaim_state;
- lru_pages = 0;
- for_each_zone(zone)
- lru_pages += zone->nr_active + zone->nr_inactive;
-
+ lru_pages = count_lru_pages();
nr_slab = global_page_state(NR_SLAB_RECLAIMABLE);
/* If slab caches are huge, it's better to hit them first */
while (nr_slab >= lru_pages) {
@@ -1461,13 +1467,6 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
for (pass = 0; pass < 5; pass++) {
int prio;
- /* Needed for shrinking slab caches later on */
- if (!lru_pages)
- for_each_zone(zone) {
- lru_pages += zone->nr_active;
- lru_pages += zone->nr_inactive;
- }
-
/* Force reclaiming mapped pages in the passes #3 and #4 */
if (pass > 2) {
sc.may_swap = 1;
@@ -1483,7 +1482,8 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
goto out;
reclaim_state.reclaimed_slab = 0;
- shrink_slab(sc.nr_scanned, sc.gfp_mask, lru_pages);
+ shrink_slab(sc.nr_scanned, sc.gfp_mask,
+ count_lru_pages());
ret += reclaim_state.reclaimed_slab;
if (ret >= nr_pages)
goto out;
@@ -1491,20 +1491,19 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
if (sc.nr_scanned && prio < DEF_PRIORITY - 2)
congestion_wait(WRITE, HZ / 10);
}
-
- lru_pages = 0;
}
/*
* If ret = 0, we could not shrink LRUs, but there may be something
* in slab caches
*/
- if (!ret)
+ if (!ret) {
do {
reclaim_state.reclaimed_slab = 0;
- shrink_slab(nr_pages, sc.gfp_mask, lru_pages);
+ shrink_slab(nr_pages, sc.gfp_mask, count_lru_pages());
ret += reclaim_state.reclaimed_slab;
} while (ret < nr_pages && reclaim_state.reclaimed_slab > 0);
+ }
out:
current->reclaim_state = NULL;