summaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c128
1 files changed, 101 insertions, 27 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2bc2ac63f41e..8deb9d0fd5b1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -48,6 +48,7 @@
#include <linux/page_cgroup.h>
#include <linux/debugobjects.h>
#include <linux/kmemleak.h>
+#include <linux/memory.h>
#include <trace/events/kmem.h>
#include <asm/tlbflush.h>
@@ -486,7 +487,6 @@ static inline void __free_one_page(struct page *page,
zone->free_area[order].nr_free++;
}
-#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT
/*
* free_page_mlock() -- clean up attempts to free and mlocked() page.
* Page should not be on lru, so no need to fix that up.
@@ -497,9 +497,6 @@ static inline void free_page_mlock(struct page *page)
__dec_zone_page_state(page, NR_MLOCK);
__count_vm_event(UNEVICTABLE_MLOCKFREED);
}
-#else
-static void free_page_mlock(struct page *page) { }
-#endif
static inline int free_pages_check(struct page *page)
{
@@ -559,8 +556,9 @@ static void free_pcppages_bulk(struct zone *zone, int count,
page = list_entry(list->prev, struct page, lru);
/* must delete as __free_one_page list manipulates */
list_del(&page->lru);
- __free_one_page(page, zone, 0, migratetype);
- trace_mm_page_pcpu_drain(page, 0, migratetype);
+ /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
+ __free_one_page(page, zone, 0, page_private(page));
+ trace_mm_page_pcpu_drain(page, 0, page_private(page));
} while (--count && --batch_free && !list_empty(list));
}
spin_unlock(&zone->lock);
@@ -1225,10 +1223,10 @@ again:
}
spin_lock_irqsave(&zone->lock, flags);
page = __rmqueue(zone, order, migratetype);
- __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order));
spin_unlock(&zone->lock);
if (!page)
goto failed;
+ __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order));
}
__count_zone_vm_events(PGALLOC, zone, 1 << order);
@@ -1658,12 +1656,22 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
if (page)
goto out;
- /* The OOM killer will not help higher order allocs */
- if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_NOFAIL))
- goto out;
-
+ if (!(gfp_mask & __GFP_NOFAIL)) {
+ /* The OOM killer will not help higher order allocs */
+ if (order > PAGE_ALLOC_COSTLY_ORDER)
+ goto out;
+ /*
+ * GFP_THISNODE contains __GFP_NORETRY and we never hit this.
+ * Sanity check for bare calls of __GFP_THISNODE, not real OOM.
+ * The caller should handle page allocation failure by itself if
+ * it specifies __GFP_THISNODE.
+ * Note: Hugepage uses it but will hit PAGE_ALLOC_COSTLY_ORDER.
+ */
+ if (gfp_mask & __GFP_THISNODE)
+ goto out;
+ }
/* Exhausted what can be done so it's blamo time */
- out_of_memory(zonelist, gfp_mask, order);
+ out_of_memory(zonelist, gfp_mask, order, nodemask);
out:
clear_zonelist_oom(zonelist, gfp_mask);
@@ -2395,13 +2403,14 @@ int numa_zonelist_order_handler(ctl_table *table, int write,
{
char saved_string[NUMA_ZONELIST_ORDER_LEN];
int ret;
+ static DEFINE_MUTEX(zl_order_mutex);
+ mutex_lock(&zl_order_mutex);
if (write)
- strncpy(saved_string, (char*)table->data,
- NUMA_ZONELIST_ORDER_LEN);
+ strcpy(saved_string, (char*)table->data);
ret = proc_dostring(table, write, buffer, length, ppos);
if (ret)
- return ret;
+ goto out;
if (write) {
int oldval = user_zonelist_order;
if (__parse_numa_zonelist_order((char*)table->data)) {
@@ -2414,7 +2423,9 @@ int numa_zonelist_order_handler(ctl_table *table, int write,
} else if (oldval != user_zonelist_order)
build_all_zonelists();
}
- return 0;
+out:
+ mutex_unlock(&zl_order_mutex);
+ return ret;
}
@@ -3127,7 +3138,7 @@ static int __cpuinit process_zones(int cpu)
if (percpu_pagelist_fraction)
setup_pagelist_highmark(zone_pcp(zone, cpu),
- (zone->present_pages / percpu_pagelist_fraction));
+ (zone->present_pages / percpu_pagelist_fraction));
}
return 0;
@@ -3573,7 +3584,7 @@ static unsigned long __meminit zone_spanned_pages_in_node(int nid,
* Return the number of holes in a range on a node. If nid is MAX_NUMNODES,
* then all holes in the requested range will be accounted for.
*/
-static unsigned long __meminit __absent_pages_in_range(int nid,
+unsigned long __meminit __absent_pages_in_range(int nid,
unsigned long range_start_pfn,
unsigned long range_end_pfn)
{
@@ -3988,7 +3999,7 @@ void __init add_active_range(unsigned int nid, unsigned long start_pfn,
}
/* Merge backward if suitable */
- if (start_pfn < early_node_map[i].end_pfn &&
+ if (start_pfn < early_node_map[i].start_pfn &&
end_pfn >= early_node_map[i].start_pfn) {
early_node_map[i].start_pfn = start_pfn;
return;
@@ -4102,7 +4113,7 @@ static int __init cmp_node_active_region(const void *a, const void *b)
}
/* sort the node_map by start_pfn */
-static void __init sort_node_map(void)
+void __init sort_node_map(void)
{
sort(early_node_map, (size_t)nr_nodemap_entries,
sizeof(struct node_active_region),
@@ -5002,23 +5013,65 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags,
int set_migratetype_isolate(struct page *page)
{
struct zone *zone;
- unsigned long flags;
+ struct page *curr_page;
+ unsigned long flags, pfn, iter;
+ unsigned long immobile = 0;
+ struct memory_isolate_notify arg;
+ int notifier_ret;
int ret = -EBUSY;
int zone_idx;
zone = page_zone(page);
zone_idx = zone_idx(zone);
+
spin_lock_irqsave(&zone->lock, flags);
+ if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE ||
+ zone_idx == ZONE_MOVABLE) {
+ ret = 0;
+ goto out;
+ }
+
+ pfn = page_to_pfn(page);
+ arg.start_pfn = pfn;
+ arg.nr_pages = pageblock_nr_pages;
+ arg.pages_found = 0;
+
/*
- * In future, more migrate types will be able to be isolation target.
+ * It may be possible to isolate a pageblock even if the
+ * migratetype is not MIGRATE_MOVABLE. The memory isolation
+ * notifier chain is used by balloon drivers to return the
+ * number of pages in a range that are held by the balloon
+ * driver to shrink memory. If all the pages are accounted for
+ * by balloons, are free, or on the LRU, isolation can continue.
+ * Later, for example, when memory hotplug notifier runs, these
+ * pages reported as "can be isolated" should be isolated(freed)
+ * by the balloon driver through the memory notifier chain.
*/
- if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE &&
- zone_idx != ZONE_MOVABLE)
+ notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg);
+ notifier_ret = notifier_to_errno(notifier_ret);
+ if (notifier_ret || !arg.pages_found)
goto out;
- set_pageblock_migratetype(page, MIGRATE_ISOLATE);
- move_freepages_block(zone, page, MIGRATE_ISOLATE);
- ret = 0;
+
+ for (iter = pfn; iter < (pfn + pageblock_nr_pages); iter++) {
+ if (!pfn_valid_within(pfn))
+ continue;
+
+ curr_page = pfn_to_page(iter);
+ if (!page_count(curr_page) || PageLRU(curr_page))
+ continue;
+
+ immobile++;
+ }
+
+ if (arg.pages_found == immobile)
+ ret = 0;
+
out:
+ if (!ret) {
+ set_pageblock_migratetype(page, MIGRATE_ISOLATE);
+ move_freepages_block(zone, page, MIGRATE_ISOLATE);
+ }
+
spin_unlock_irqrestore(&zone->lock, flags);
if (!ret)
drain_all_pages();
@@ -5085,3 +5138,24 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
spin_unlock_irqrestore(&zone->lock, flags);
}
#endif
+
+#ifdef CONFIG_MEMORY_FAILURE
+bool is_free_buddy_page(struct page *page)
+{
+ struct zone *zone = page_zone(page);
+ unsigned long pfn = page_to_pfn(page);
+ unsigned long flags;
+ int order;
+
+ spin_lock_irqsave(&zone->lock, flags);
+ for (order = 0; order < MAX_ORDER; order++) {
+ struct page *page_head = page - (pfn & ((1 << order) - 1));
+
+ if (PageBuddy(page_head) && page_order(page_head) >= order)
+ break;
+ }
+ spin_unlock_irqrestore(&zone->lock, flags);
+
+ return order < MAX_ORDER;
+}
+#endif