7 files changed, 65 insertions, 63 deletions
diff --git a/mm/mmap.c b/mm/mmap.c
index 404319477e71..12334aecf8ad 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -61,7 +61,7 @@ pgprot_t protection_map[16] = {
 
 int sysctl_overcommit_memory = OVERCOMMIT_GUESS;  /* heuristic overcommit */
 int sysctl_overcommit_ratio = 50;	/* default is 50% */
-int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
+int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
 atomic_t vm_committed_space = ATOMIC_INIT(0);
 
 /*
@@ -203,13 +203,6 @@ static void remove_vm_struct(struct vm_area_struct *vma)
 	kmem_cache_free(vm_area_cachep, vma);
 }
 
-/*
- *  sys_brk() for the most part doesn't need the global kernel
- *  lock, except when an application is doing something nasty
- *  like trying to un-brk an area that has already been mapped
- *  to a regular file.  in this case, the unmapping will need
- *  to invoke file system routines that need the global lock.
- */
 asmlinkage unsigned long sys_brk(unsigned long brk)
 {
 	unsigned long rlim, retval;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 1e56076672f5..5ec8da12cfd9 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -6,8 +6,8 @@
  *	for goading me into coding this file...
  *
  *  The routines in this file are used to kill a process when
- *  we're seriously out of memory. This gets called from kswapd()
- *  in linux/mm/vmscan.c when we really run out of memory.
+ *  we're seriously out of memory. This gets called from __alloc_pages()
+ *  in mm/page_alloc.c when we really run out of memory.
  *
  *  Since we won't call these routines often (on a well-configured
  *  machine) this file will double as a 'coding guide' and a signpost
@@ -20,13 +20,14 @@
 #include <linux/swap.h>
 #include <linux/timex.h>
 #include <linux/jiffies.h>
+#include <linux/cpuset.h>
 
 /* #define DEBUG */
 
 /**
  * oom_badness - calculate a numeric value for how bad this task has been
  * @p: task struct of which task we should calculate
- * @p: current uptime in seconds
+ * @uptime: current uptime in seconds
  *
  * The formula used is relatively simple and documented inline in the
  * function. The main rationale is that we want to select a good task
@@ -57,9 +58,9 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
 
 	/*
 	 * Processes which fork a lot of child processes are likely
-	 * a good choice. We add the vmsize of the childs if they
+	 * a good choice. We add the vmsize of the children if they
 	 * have an own mm. This prevents forking servers to flood the
-	 * machine with an endless amount of childs
+	 * machine with an endless amount of children
 	 */
 	list_for_each(tsk, &p->children) {
 		struct task_struct *chld;
@@ -143,28 +144,36 @@ static struct task_struct * select_bad_process(void)
 	struct timespec uptime;
 
 	do_posix_clock_monotonic_gettime(&uptime);
-	do_each_thread(g, p)
+	do_each_thread(g, p) {
+		unsigned long points;
+		int releasing;
+
 		/* skip the init task with pid == 1 */
-		if (p->pid > 1 && p->oomkilladj != OOM_DISABLE) {
-			unsigned long points;
-
-			/*
-			 * This is in the process of releasing memory so wait it
-			 * to finish before killing some other task by mistake.
-			 */
-			if ((unlikely(test_tsk_thread_flag(p, TIF_MEMDIE)) || (p->flags & PF_EXITING)) &&
-			    !(p->flags & PF_DEAD))
-				return ERR_PTR(-1UL);
-			if (p->flags & PF_SWAPOFF)
-				return p;
-
-			points = badness(p, uptime.tv_sec);
-			if (points > maxpoints || !chosen) {
-				chosen = p;
-				maxpoints = points;
-			}
+		if (p->pid == 1)
+			continue;
+		if (p->oomkilladj == OOM_DISABLE)
+			continue;
+		/* If p's nodes don't overlap ours, it won't help to kill p. */
+		if (!cpuset_excl_nodes_overlap(p))
+			continue;
+
+		/*
+		 * This is in the process of releasing memory so for wait it
+		 * to finish before killing some other task by mistake.
+		 */
+		releasing = test_tsk_thread_flag(p, TIF_MEMDIE) ||
+						p->flags & PF_EXITING;
+		if (releasing && !(p->flags & PF_DEAD))
+			return ERR_PTR(-1UL);
+		if (p->flags & PF_SWAPOFF)
+			return p;
+
+		points = badness(p, uptime.tv_sec);
+		if (points > maxpoints || !chosen) {
+			chosen = p;
+			maxpoints = points;
 		}
-	while_each_thread(g, p);
+	} while_each_thread(g, p);
 	return chosen;
 }
 
@@ -189,7 +198,8 @@ static void __oom_kill_task(task_t *p)
 		return;
 	}
 	task_unlock(p);
-	printk(KERN_ERR "Out of Memory: Killed process %d (%s).\n", p->pid, p->comm);
+	printk(KERN_ERR "Out of Memory: Killed process %d (%s).\n",
+							p->pid, p->comm);
 
 	/*
 	 * We give our sacrificial lamb high priority and access to
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b06a9636d971..3974fd81d27c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -42,13 +42,13 @@
  * MCD - HACK: Find somewhere to initialize this EARLY, or make this
  * initializer cleaner
  */
-nodemask_t node_online_map = { { [0] = 1UL } };
+nodemask_t node_online_map __read_mostly = { { [0] = 1UL } };
 EXPORT_SYMBOL(node_online_map);
-nodemask_t node_possible_map = NODE_MASK_ALL;
+nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
 EXPORT_SYMBOL(node_possible_map);
-struct pglist_data *pgdat_list;
-unsigned long totalram_pages;
-unsigned long totalhigh_pages;
+struct pglist_data *pgdat_list __read_mostly;
+unsigned long totalram_pages __read_mostly;
+unsigned long totalhigh_pages __read_mostly;
 long nr_swap_pages;
 
 /*
@@ -68,7 +68,7 @@ EXPORT_SYMBOL(nr_swap_pages);
  * Used by page_zone() to look up the address of the struct zone whose
  * id is encoded in the upper bits of page->flags
  */
-struct zone *zone_table[1 << ZONETABLE_SHIFT];
+struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly;
 EXPORT_SYMBOL(zone_table);
 
 static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
@@ -806,11 +806,14 @@ __alloc_pages(unsigned int __nocast gfp_mask, unsigned int order,
 	classzone_idx = zone_idx(zones[0]);
 
 restart:
-	/* Go through the zonelist once, looking for a zone with enough free */
+	/*
+	 * Go through the zonelist once, looking for a zone with enough free.
+	 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
+	 */
 	for (i = 0; (z = zones[i]) != NULL; i++) {
 		int do_reclaim = should_reclaim_zone(z, gfp_mask);
 
-		if (!cpuset_zone_allowed(z))
+		if (!cpuset_zone_allowed(z, __GFP_HARDWALL))
 			continue;
 
 		/*
@@ -845,6 +848,7 @@ zone_reclaim_retry:
 	 *
 	 * This is the last chance, in general, before the goto nopage.
 	 * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
+	 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
 	 */
 	for (i = 0; (z = zones[i]) != NULL; i++) {
 		if (!zone_watermark_ok(z, order, z->pages_min,
@@ -852,7 +856,7 @@ zone_reclaim_retry:
 				       gfp_mask & __GFP_HIGH))
 			continue;
 
-		if (wait && !cpuset_zone_allowed(z))
+		if (wait && !cpuset_zone_allowed(z, gfp_mask))
 			continue;
 
 		page = buffered_rmqueue(z, order, gfp_mask);
@@ -867,7 +871,7 @@ zone_reclaim_retry:
 		if (!(gfp_mask & __GFP_NOMEMALLOC)) {
 			/* go through the zonelist yet again, ignoring mins */
 			for (i = 0; (z = zones[i]) != NULL; i++) {
-				if (!cpuset_zone_allowed(z))
+				if (!cpuset_zone_allowed(z, gfp_mask))
 					continue;
 				page = buffered_rmqueue(z, order, gfp_mask);
 				if (page)
@@ -903,7 +907,7 @@ rebalance:
 					       gfp_mask & __GFP_HIGH))
 				continue;
 
-			if (!cpuset_zone_allowed(z))
+			if (!cpuset_zone_allowed(z, gfp_mask))
 				continue;
 
 			page = buffered_rmqueue(z, order, gfp_mask);
@@ -922,7 +926,7 @@ rebalance:
 					       classzone_idx, 0, 0))
 				continue;
 
-			if (!cpuset_zone_allowed(z))
+			if (!cpuset_zone_allowed(z, __GFP_HARDWALL))
 				continue;
 
 			page = buffered_rmqueue(z, order, gfp_mask);
diff --git a/mm/readahead.c b/mm/readahead.c
index b840e7c6ea74..d0b50034e245 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -540,6 +540,7 @@ void handle_ra_miss(struct address_space *mapping,
 {
 	ra->flags |= RA_FLAG_MISS;
 	ra->flags &= ~RA_FLAG_INCACHE;
+	ra->cache_hit = 0;
 }
 
 /*
diff --git a/mm/shmem.c b/mm/shmem.c
index bdc4bbb6ddbb..db2c9e8d9909 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -180,7 +180,7 @@ static struct inode_operations shmem_inode_operations;
 static struct inode_operations shmem_dir_inode_operations;
 static struct vm_operations_struct shmem_vm_ops;
 
-static struct backing_dev_info shmem_backing_dev_info = {
+static struct backing_dev_info shmem_backing_dev_info  __read_mostly = {
 	.ra_pages	= 0,	/* No readahead */
 	.capabilities	= BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
 	.unplug_io_fn	= default_unplug_io_fn,
diff --git a/mm/slab.c b/mm/slab.c
index a9ff4f7f9860..d7c4443991fe 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2558,24 +2558,18 @@ void kmem_cache_free(kmem_cache_t *cachep, void *objp)
 EXPORT_SYMBOL(kmem_cache_free);
 
 /**
- * kcalloc - allocate memory for an array. The memory is set to zero.
- * @n: number of elements.
- * @size: element size.
+ * kzalloc - allocate memory. The memory is set to zero.
+ * @size: how many bytes of memory are required.
  * @flags: the type of memory to allocate.
  */
-void *kcalloc(size_t n, size_t size, unsigned int __nocast flags)
+void *kzalloc(size_t size, unsigned int __nocast flags)
 {
-	void *ret = NULL;
-
-	if (n != 0 && size > INT_MAX / n)
-		return ret;
-
-	ret = kmalloc(n * size, flags);
+	void *ret = kmalloc(size, flags);
 	if (ret)
-		memset(ret, 0, n * size);
+		memset(ret, 0, size);
 	return ret;
 }
-EXPORT_SYMBOL(kcalloc);
+EXPORT_SYMBOL(kzalloc);
 
 /**
  * kfree - free previously allocated memory
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0095533cdde9..a740778f688d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -894,7 +894,7 @@ shrink_caches(struct zone **zones, struct scan_control *sc)
 		if (zone->present_pages == 0)
 			continue;
 
-		if (!cpuset_zone_allowed(zone))
+		if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
 			continue;
 
 		zone->temp_priority = sc->priority;
@@ -940,7 +940,7 @@ int try_to_free_pages(struct zone **zones, unsigned int gfp_mask)
 	for (i = 0; zones[i] != NULL; i++) {
 		struct zone *zone = zones[i];
 
-		if (!cpuset_zone_allowed(zone))
+		if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
 			continue;
 
 		zone->temp_priority = DEF_PRIORITY;
@@ -986,7 +986,7 @@ out:
 	for (i = 0; zones[i] != 0; i++) {
 		struct zone *zone = zones[i];
 
-		if (!cpuset_zone_allowed(zone))
+		if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
 			continue;
 
 		zone->prev_priority = zone->temp_priority;
@@ -1256,7 +1256,7 @@ void wakeup_kswapd(struct zone *zone, int order)
 		return;
 	if (pgdat->kswapd_max_order < order)
 		pgdat->kswapd_max_order = order;
-	if (!cpuset_zone_allowed(zone))
+	if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
 		return;
 	if (!waitqueue_active(&zone->zone_pgdat->kswapd_wait))
 		return;