From 0e093d99763eb4cea09f8ca4f1d01f34e121d10b Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 26 Oct 2010 14:21:45 -0700 Subject: writeback: do not sleep on the congestion queue if there are no congested BDIs or if significant congestion is not being encountered in the current zone If congestion_wait() is called with no BDI congested, the caller will sleep for the full timeout and this may be an unnecessary sleep. This patch adds a wait_iff_congested() that checks congestion and only sleeps if a BDI is congested else, it calls cond_resched() to ensure the caller is not hogging the CPU longer than its quota but otherwise will not sleep. This is aimed at reducing some of the major desktop stalls reported during IO. For example, while kswapd is operating, it calls congestion_wait() but it could just have been reclaiming clean page cache pages with no congestion. Without this patch, it would sleep for a full timeout but after this patch, it'll just call schedule() if it has been on the CPU too long. Similar logic applies to direct reclaimers that are not making enough progress. Signed-off-by: Mel Gorman Cc: Johannes Weiner Cc: Minchan Kim Cc: Wu Fengguang Cc: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: Rik van Riel Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 42 +++++++++++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 7 deletions(-) (limited to 'mm/vmscan.c') diff --git a/mm/vmscan.c b/mm/vmscan.c index 130ad0239f52..30fd658bb289 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -401,10 +401,8 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, } if (mapping->a_ops->writepage == NULL) return PAGE_ACTIVATE; - if (!may_write_to_queue(mapping->backing_dev_info, sc)) { - disable_lumpy_reclaim_mode(sc); + if (!may_write_to_queue(mapping->backing_dev_info, sc)) return PAGE_KEEP; - } if (clear_page_dirty_for_io(page)) { int res; @@ -681,11 +679,14 @@ static noinline_for_stack void free_page_list(struct list_head *free_pages) * shrink_page_list() returns the number of reclaimed pages */ static unsigned long shrink_page_list(struct list_head *page_list, + struct zone *zone, struct scan_control *sc) { LIST_HEAD(ret_pages); LIST_HEAD(free_pages); int pgactivate = 0; + unsigned long nr_dirty = 0; + unsigned long nr_congested = 0; unsigned long nr_reclaimed = 0; cond_resched(); @@ -705,6 +706,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, goto keep; VM_BUG_ON(PageActive(page)); + VM_BUG_ON(page_zone(page) != zone); sc->nr_scanned++; @@ -782,6 +784,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, } if (PageDirty(page)) { + nr_dirty++; + if (references == PAGEREF_RECLAIM_CLEAN) goto keep_locked; if (!may_enter_fs) @@ -792,6 +796,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, /* Page is dirty, try to write it out here */ switch (pageout(page, mapping, sc)) { case PAGE_KEEP: + nr_congested++; goto keep_locked; case PAGE_ACTIVATE: goto activate_locked; @@ -902,6 +907,15 @@ keep_lumpy: VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); } + /* + * Tag a zone as congested if all the dirty pages encountered were + * backed by a congested BDI. In this case, reclaimers should just + * back off and wait for congestion to clear because further reclaim + * will encounter the same problem + */ + if (nr_dirty == nr_congested) + zone_set_flag(zone, ZONE_CONGESTED); + free_page_list(&free_pages); list_splice(&ret_pages, page_list); @@ -1386,12 +1400,12 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, spin_unlock_irq(&zone->lru_lock); - nr_reclaimed = shrink_page_list(&page_list, sc); + nr_reclaimed = shrink_page_list(&page_list, zone, sc); /* Check if we should syncronously wait for writeback */ if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) { set_lumpy_reclaim_mode(priority, sc, true); - nr_reclaimed += shrink_page_list(&page_list, sc); + nr_reclaimed += shrink_page_list(&page_list, zone, sc); } local_irq_disable(); @@ -1982,8 +1996,13 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, /* Take a nap, wait for some writeback to complete */ if (!sc->hibernation_mode && sc->nr_scanned && - priority < DEF_PRIORITY - 2) - congestion_wait(BLK_RW_ASYNC, HZ/10); + priority < DEF_PRIORITY - 2) { + struct zone *preferred_zone; + + first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask), + NULL, &preferred_zone); + wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10); + } } out: @@ -2282,6 +2301,15 @@ loop_again: if (!zone_watermark_ok(zone, order, min_wmark_pages(zone), end_zone, 0)) has_under_min_watermark_zone = 1; + } else { + /* + * If a zone reaches its high watermark, + * consider it to be no longer congested. It's + * possible there are dirty pages backed by + * congested BDIs but as pressure is relieved, + * spectulatively avoid congestion waits + */ + zone_clear_flag(zone, ZONE_CONGESTED); } } -- cgit v1.2.3-55-g7522 rnel-qcow2-linux.git/commit/security/min_addr.c?id=788084aba2ab7348257597496befcbccabdc98a3'>788084aba2ab ^
1
                                   





























                                                                                     
                                                             



                                                                          
                                             

                              
                                                                       





                               
                                          





                                  
// SPDX-License-Identifier: GPL-2.0
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/security.h>
#include <linux/sysctl.h>

/* amount of vm to protect from userspace access by both DAC and the LSM*/
unsigned long mmap_min_addr;
/* amount of vm to protect from userspace using CAP_SYS_RAWIO (DAC) */
unsigned long dac_mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
/* amount of vm to protect from userspace using the LSM = CONFIG_LSM_MMAP_MIN_ADDR */

/*
 * Update mmap_min_addr = max(dac_mmap_min_addr, CONFIG_LSM_MMAP_MIN_ADDR)
 */
static void update_mmap_min_addr(void)
{
#ifdef CONFIG_LSM_MMAP_MIN_ADDR
	if (dac_mmap_min_addr > CONFIG_LSM_MMAP_MIN_ADDR)
		mmap_min_addr = dac_mmap_min_addr;
	else
		mmap_min_addr = CONFIG_LSM_MMAP_MIN_ADDR;
#else
	mmap_min_addr = dac_mmap_min_addr;
#endif
}

/*
 * sysctl handler which just sets dac_mmap_min_addr = the new value and then
 * calls update_mmap_min_addr() so non MAP_FIXED hints get rounded properly
 */
int mmap_min_addr_handler(struct ctl_table *table, int write,
			  void __user *buffer, size_t *lenp, loff_t *ppos)
{
	int ret;

	if (write && !capable(CAP_SYS_RAWIO))
		return -EPERM;

	ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);

	update_mmap_min_addr();

	return ret;
}

static int __init init_mmap_min_addr(void)
{
	update_mmap_min_addr();

	return 0;
}
pure_initcall(init_mmap_min_addr);