diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/bounce.c | 4 | ||||
-rw-r--r-- | mm/memory.c | 2 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 6 | ||||
-rw-r--r-- | mm/mempolicy.c | 4 | ||||
-rw-r--r-- | mm/oom_kill.c | 19 | ||||
-rw-r--r-- | mm/page-writeback.c | 45 | ||||
-rw-r--r-- | mm/page_alloc.c | 32 | ||||
-rw-r--r-- | mm/rmap.c | 2 | ||||
-rw-r--r-- | mm/slab.c | 4 | ||||
-rw-r--r-- | mm/slob.c | 11 | ||||
-rw-r--r-- | mm/swapfile.c | 8 | ||||
-rw-r--r-- | mm/truncate.c | 24 | ||||
-rw-r--r-- | mm/vmscan.c | 35 |
13 files changed, 139 insertions, 57 deletions
diff --git a/mm/bounce.c b/mm/bounce.c index e4b62d2a4024..643efbe82402 100644 --- a/mm/bounce.c +++ b/mm/bounce.c @@ -237,6 +237,8 @@ static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig, if (!bio) return; + blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE); + /* * at least one page was bounced, fill in possible non-highmem * pages @@ -291,8 +293,6 @@ void blk_queue_bounce(request_queue_t *q, struct bio **bio_orig) pool = isa_page_pool; } - blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE); - /* * slow path */ diff --git a/mm/memory.c b/mm/memory.c index 563792f4f687..af227d26e104 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1091,7 +1091,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, if (pages) { pages[i] = page; - flush_anon_page(page, start); + flush_anon_page(vma, page, start); flush_dcache_page(page); } if (vmas) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 0c055a090f4d..84279127fcd3 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -67,11 +67,13 @@ static int __add_zone(struct zone *zone, unsigned long phys_start_pfn) zone_type = zone - pgdat->node_zones; if (!populated_zone(zone)) { int ret = 0; - ret = init_currently_empty_zone(zone, phys_start_pfn, nr_pages); + ret = init_currently_empty_zone(zone, phys_start_pfn, + nr_pages, MEMMAP_HOTPLUG); if (ret < 0) return ret; } - memmap_init_zone(nr_pages, nid, zone_type, phys_start_pfn); + memmap_init_zone(nr_pages, nid, zone_type, + phys_start_pfn, MEMMAP_HOTPLUG); return 0; } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index da9463946556..c2aec0e1090d 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -884,6 +884,10 @@ asmlinkage long sys_mbind(unsigned long start, unsigned long len, err = get_nodes(&nodes, nmask, maxnode); if (err) return err; +#ifdef CONFIG_CPUSETS + /* Restrict the nodes to the allowed nodes in the cpuset */ + nodes_and(nodes, nodes, current->mems_allowed); +#endif return do_mbind(start, len, mode, &nodes, flags); } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 64cf3c214634..b278b8d60eee 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -61,12 +61,6 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) } /* - * swapoff can easily use up all memory, so kill those first. - */ - if (p->flags & PF_SWAPOFF) - return ULONG_MAX; - - /* * The memory size of the process is the basis for the badness. */ points = mm->total_vm; @@ -77,6 +71,12 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) task_unlock(p); /* + * swapoff can easily use up all memory, so kill those first. + */ + if (p->flags & PF_SWAPOFF) + return ULONG_MAX; + + /* * Processes which fork a lot of child processes are likely * a good choice. We add half the vmsize of the children if they * have an own mm. This prevents forking servers to flood the @@ -174,7 +174,12 @@ static inline int constrained_alloc(struct zonelist *zonelist, gfp_t gfp_mask) { #ifdef CONFIG_NUMA struct zone **z; - nodemask_t nodes = node_online_map; + nodemask_t nodes; + int node; + /* node has memory ? */ + for_each_online_node(node) + if (NODE_DATA(node)->node_present_pages) + node_set(node, nodes); for (z = zonelist->zones; *z; z++) if (cpuset_zone_allowed_softwall(*z, gfp_mask)) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index b3a198c9248d..1d2fc89ca56d 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -862,17 +862,46 @@ int clear_page_dirty_for_io(struct page *page) { struct address_space *mapping = page_mapping(page); - if (!mapping) - return TestClearPageDirty(page); - - if (TestClearPageDirty(page)) { - if (mapping_cap_account_dirty(mapping)) { - page_mkclean(page); + if (mapping && mapping_cap_account_dirty(mapping)) { + /* + * Yes, Virginia, this is indeed insane. + * + * We use this sequence to make sure that + * (a) we account for dirty stats properly + * (b) we tell the low-level filesystem to + * mark the whole page dirty if it was + * dirty in a pagetable. Only to then + * (c) clean the page again and return 1 to + * cause the writeback. + * + * This way we avoid all nasty races with the + * dirty bit in multiple places and clearing + * them concurrently from different threads. + * + * Note! Normally the "set_page_dirty(page)" + * has no effect on the actual dirty bit - since + * that will already usually be set. But we + * need the side effects, and it can help us + * avoid races. + * + * We basically use the page "master dirty bit" + * as a serialization point for all the different + * threads doing their things. + * + * FIXME! We still have a race here: if somebody + * adds the page back to the page tables in + * between the "page_mkclean()" and the "TestClearPageDirty()", + * we might have it mapped without the dirty bit set. + */ + if (page_mkclean(page)) + set_page_dirty(page); + if (TestClearPageDirty(page)) { dec_zone_page_state(page, NR_FILE_DIRTY); + return 1; } - return 1; + return 0; } - return 0; + return TestClearPageDirty(page); } EXPORT_SYMBOL(clear_page_dirty_for_io); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8c1a116875bc..fc5b5442e942 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -711,6 +711,9 @@ static void __drain_pages(unsigned int cpu) for_each_zone(zone) { struct per_cpu_pageset *pset; + if (!populated_zone(zone)) + continue; + pset = zone_pcp(zone, cpu); for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { struct per_cpu_pages *pcp; @@ -1953,17 +1956,24 @@ static inline unsigned long wait_table_bits(unsigned long size) * done. Non-atomic initialization, single-pass. */ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, - unsigned long start_pfn) + unsigned long start_pfn, enum memmap_context context) { struct page *page; unsigned long end_pfn = start_pfn + size; unsigned long pfn; for (pfn = start_pfn; pfn < end_pfn; pfn++) { - if (!early_pfn_valid(pfn)) - continue; - if (!early_pfn_in_nid(pfn, nid)) - continue; + /* + * There can be holes in boot-time mem_map[]s + * handed to this function. They do not + * exist on hotplugged memory. + */ + if (context == MEMMAP_EARLY) { + if (!early_pfn_valid(pfn)) + continue; + if (!early_pfn_in_nid(pfn, nid)) + continue; + } page = pfn_to_page(pfn); set_page_links(page, zone, nid, pfn); init_page_count(page); @@ -1990,7 +2000,7 @@ void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone, #ifndef __HAVE_ARCH_MEMMAP_INIT #define memmap_init(size, nid, zone, start_pfn) \ - memmap_init_zone((size), (nid), (zone), (start_pfn)) + memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY) #endif static int __cpuinit zone_batchsize(struct zone *zone) @@ -2236,7 +2246,8 @@ static __meminit void zone_pcp_init(struct zone *zone) __meminit int init_currently_empty_zone(struct zone *zone, unsigned long zone_start_pfn, - unsigned long size) + unsigned long size, + enum memmap_context context) { struct pglist_data *pgdat = zone->zone_pgdat; int ret; @@ -2680,7 +2691,8 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, if (!size) continue; - ret = init_currently_empty_zone(zone, zone_start_pfn, size); + ret = init_currently_empty_zone(zone, zone_start_pfn, + size, MEMMAP_EARLY); BUG_ON(ret); zone_start_pfn += size; } @@ -3321,6 +3333,10 @@ void *__init alloc_large_system_hash(const char *tablename, numentries >>= (scale - PAGE_SHIFT); else numentries <<= (PAGE_SHIFT - scale); + + /* Make sure we've got at least a 0-order allocation.. */ + if (unlikely((numentries * bucketsize) < PAGE_SIZE)) + numentries = PAGE_SIZE / bucketsize; } numentries = roundup_pow_of_two(numentries); diff --git a/mm/rmap.c b/mm/rmap.c index 57306fa0114d..669acb22b572 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -452,7 +452,7 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma) entry = ptep_clear_flush(vma, address, pte); entry = pte_wrprotect(entry); entry = pte_mkclean(entry); - set_pte_at(vma, address, pte, entry); + set_pte_at(mm, address, pte, entry); lazy_mmu_prot_update(entry); ret = 1; } diff --git a/mm/slab.c b/mm/slab.c index 0d4e57431de4..c6100628a6ef 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3281,7 +3281,7 @@ retry: flags | GFP_THISNODE, nid); } - if (!obj) { + if (!obj && !(flags & __GFP_NO_GROW)) { /* * This allocation will be performed within the constraints * of the current cpuset / memory policy requirements. @@ -3310,7 +3310,7 @@ retry: */ goto retry; } else { - kmem_freepages(cache, obj); + /* cache_grow already freed obj */ obj = NULL; } } diff --git a/mm/slob.c b/mm/slob.c index 2e9236e10ed1..5adc29cb58dd 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -60,6 +60,8 @@ static DEFINE_SPINLOCK(slob_lock); static DEFINE_SPINLOCK(block_lock); static void slob_free(void *b, int size); +static void slob_timer_cbk(void); + static void *slob_alloc(size_t size, gfp_t gfp, int align) { @@ -326,7 +328,7 @@ const char *kmem_cache_name(struct kmem_cache *c) EXPORT_SYMBOL(kmem_cache_name); static struct timer_list slob_timer = TIMER_INITIALIZER( - (void (*)(unsigned long))kmem_cache_init, 0, 0); + (void (*)(unsigned long))slob_timer_cbk, 0, 0); int kmem_cache_shrink(struct kmem_cache *d) { @@ -339,7 +341,12 @@ int kmem_ptr_validate(struct kmem_cache *a, const void *b) return 0; } -void kmem_cache_init(void) +void __init kmem_cache_init(void) +{ + slob_timer_cbk(); +} + +static void slob_timer_cbk(void) { void *p = slob_alloc(PAGE_SIZE, 0, PAGE_SIZE-1); diff --git a/mm/swapfile.c b/mm/swapfile.c index b9fc0e5de6d5..a2d9bb4e80df 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -434,7 +434,7 @@ void free_swap_and_cache(swp_entry_t entry) * * This is needed for the suspend to disk (aka swsusp). */ -int swap_type_of(dev_t device, sector_t offset) +int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p) { struct block_device *bdev = NULL; int i; @@ -450,6 +450,9 @@ int swap_type_of(dev_t device, sector_t offset) continue; if (!bdev) { + if (bdev_p) + *bdev_p = sis->bdev; + spin_unlock(&swap_lock); return i; } @@ -459,6 +462,9 @@ int swap_type_of(dev_t device, sector_t offset) se = list_entry(sis->extent_list.next, struct swap_extent, list); if (se->start_block == offset) { + if (bdev_p) + *bdev_p = sis->bdev; + spin_unlock(&swap_lock); bdput(bdev); return i; diff --git a/mm/truncate.c b/mm/truncate.c index 4a38dd1a4ce8..6c79ca4a1ca7 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -60,12 +60,16 @@ void cancel_dirty_page(struct page *page, unsigned int account_size) WARN_ON(++warncount < 5); } - if (TestClearPageDirty(page) && account_size && - mapping_cap_account_dirty(page->mapping)) { - dec_zone_page_state(page, NR_FILE_DIRTY); - task_io_account_cancelled_write(account_size); + if (TestClearPageDirty(page)) { + struct address_space *mapping = page->mapping; + if (mapping && mapping_cap_account_dirty(mapping)) { + dec_zone_page_state(page, NR_FILE_DIRTY); + if (account_size) + task_io_account_cancelled_write(account_size); + } } } +EXPORT_SYMBOL(cancel_dirty_page); /* * If truncate cannot remove the fs-private metadata from the page, the page @@ -337,6 +341,15 @@ failed: return 0; } +static int do_launder_page(struct address_space *mapping, struct page *page) +{ + if (!PageDirty(page)) + return 0; + if (page->mapping != mapping || mapping->a_ops->launder_page == NULL) + return 0; + return mapping->a_ops->launder_page(page); +} + /** * invalidate_inode_pages2_range - remove range of pages from an address_space * @mapping: the address_space @@ -401,7 +414,8 @@ int invalidate_inode_pages2_range(struct address_space *mapping, PAGE_CACHE_SIZE, 0); } } - if (!invalidate_complete_page2(mapping, page)) + ret = do_launder_page(mapping, page); + if (ret == 0 && !invalidate_complete_page2(mapping, page)) ret = -EIO; unlock_page(page); } diff --git a/mm/vmscan.c b/mm/vmscan.c index 63eb9ab0032b..7430df68cb64 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -692,7 +692,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, __count_vm_events(KSWAPD_STEAL, nr_freed); } else __count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scan); - __count_vm_events(PGACTIVATE, nr_freed); + __count_zone_vm_events(PGSTEAL, zone, nr_freed); if (nr_taken == 0) goto done; @@ -1406,6 +1406,16 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio, return ret; } +static unsigned long count_lru_pages(void) +{ + struct zone *zone; + unsigned long ret = 0; + + for_each_zone(zone) + ret += zone->nr_active + zone->nr_inactive; + return ret; +} + /* * Try to free `nr_pages' of memory, system-wide, and return the number of * freed pages. @@ -1420,7 +1430,6 @@ unsigned long shrink_all_memory(unsigned long nr_pages) unsigned long ret = 0; int pass; struct reclaim_state reclaim_state; - struct zone *zone; struct scan_control sc = { .gfp_mask = GFP_KERNEL, .may_swap = 0, @@ -1431,10 +1440,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages) current->reclaim_state = &reclaim_state; - lru_pages = 0; - for_each_zone(zone) - lru_pages += zone->nr_active + zone->nr_inactive; - + lru_pages = count_lru_pages(); nr_slab = global_page_state(NR_SLAB_RECLAIMABLE); /* If slab caches are huge, it's better to hit them first */ while (nr_slab >= lru_pages) { @@ -1461,13 +1467,6 @@ unsigned long shrink_all_memory(unsigned long nr_pages) for (pass = 0; pass < 5; pass++) { int prio; - /* Needed for shrinking slab caches later on */ - if (!lru_pages) - for_each_zone(zone) { - lru_pages += zone->nr_active; - lru_pages += zone->nr_inactive; - } - /* Force reclaiming mapped pages in the passes #3 and #4 */ if (pass > 2) { sc.may_swap = 1; @@ -1483,7 +1482,8 @@ unsigned long shrink_all_memory(unsigned long nr_pages) goto out; reclaim_state.reclaimed_slab = 0; - shrink_slab(sc.nr_scanned, sc.gfp_mask, lru_pages); + shrink_slab(sc.nr_scanned, sc.gfp_mask, + count_lru_pages()); ret += reclaim_state.reclaimed_slab; if (ret >= nr_pages) goto out; @@ -1491,20 +1491,19 @@ unsigned long shrink_all_memory(unsigned long nr_pages) if (sc.nr_scanned && prio < DEF_PRIORITY - 2) congestion_wait(WRITE, HZ / 10); } - - lru_pages = 0; } /* * If ret = 0, we could not shrink LRUs, but there may be something * in slab caches */ - if (!ret) + if (!ret) { do { reclaim_state.reclaimed_slab = 0; - shrink_slab(nr_pages, sc.gfp_mask, lru_pages); + shrink_slab(nr_pages, sc.gfp_mask, count_lru_pages()); ret += reclaim_state.reclaimed_slab; } while (ret < nr_pages && reclaim_state.reclaimed_slab > 0); + } out: current->reclaim_state = NULL; |