summaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorTejun Heo2015-09-29 19:04:26 +0200
committerJens Axboe2015-10-12 18:31:13 +0200
commitc5edf9cdc4c483b9a94c03fc0b9f769bd090bf3e (patch)
tree797d8c3edf45a4b84beee61426b98be8f77afc4e /mm/memcontrol.c
parentwriteback: memcg dirty_throttle_control should be initialized with wb->memcg_... (diff)
downloadkernel-qcow2-linux-c5edf9cdc4c483b9a94c03fc0b9f769bd090bf3e.tar.gz
kernel-qcow2-linux-c5edf9cdc4c483b9a94c03fc0b9f769bd090bf3e.tar.xz
kernel-qcow2-linux-c5edf9cdc4c483b9a94c03fc0b9f769bd090bf3e.zip
writeback: fix incorrect calculation of available memory for memcg domains
For memcg domains, the amount of available memory was calculated as min(the amount currently in use + headroom according to memcg, total clean memory) This isn't quite correct as what should be capped by the amount of clean memory is the headroom, not the sum of memory in use and headroom. For example, if a memcg domain has a significant amount of dirty memory, the above can lead to a value which is lower than the current amount in use which doesn't make much sense. In most circumstances, the above leads to a number which is somewhat but not drastically lower. As the amount of memory which can be readily allocated to the memcg domain is capped by the amount of system-wide clean memory which is not already assigned to the memcg itself, the number we want is the amount currently in use + min(headroom according to memcg, clean memory elsewhere in the system) This patch updates mem_cgroup_wb_stats() to return the number of filepages and headroom instead of the calculated available pages. mdtc_cap_avail() is renamed to mdtc_calc_avail() and performs the above calculation from file, headroom, dirty and globally clean pages. v2: Dummy mem_cgroup_wb_stats() implementation wasn't updated leading to build failure when !CGROUP_WRITEBACK. Fixed. Signed-off-by: Tejun Heo <tj@kernel.org> Fixes: c2aa723a6093 ("writeback: implement memcg writeback domain based throttling") Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c35
1 files changed, 17 insertions, 18 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1fedbde68f59..882c10cfd0ba 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3740,44 +3740,43 @@ struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb)
/**
* mem_cgroup_wb_stats - retrieve writeback related stats from its memcg
* @wb: bdi_writeback in question
- * @pavail: out parameter for number of available pages
+ * @pfilepages: out parameter for number of file pages
+ * @pheadroom: out parameter for number of allocatable pages according to memcg
* @pdirty: out parameter for number of dirty pages
* @pwriteback: out parameter for number of pages under writeback
*
- * Determine the numbers of available, dirty, and writeback pages in @wb's
- * memcg. Dirty and writeback are self-explanatory. Available is a bit
- * more involved.
+ * Determine the numbers of file, headroom, dirty, and writeback pages in
+ * @wb's memcg. File, dirty and writeback are self-explanatory. Headroom
+ * is a bit more involved.
*
- * A memcg's headroom is "min(max, high) - used". The available memory is
- * calculated as the lowest headroom of itself and the ancestors plus the
- * number of pages already being used for file pages. Note that this
- * doesn't consider the actual amount of available memory in the system.
- * The caller should further cap *@pavail accordingly.
+ * A memcg's headroom is "min(max, high) - used". In the hierarchy, the
+ * headroom is calculated as the lowest headroom of itself and the
+ * ancestors. Note that this doesn't consider the actual amount of
+ * available memory in the system. The caller should further cap
+ * *@pheadroom accordingly.
*/
-void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pavail,
- unsigned long *pdirty, unsigned long *pwriteback)
+void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
+ unsigned long *pheadroom, unsigned long *pdirty,
+ unsigned long *pwriteback)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);
struct mem_cgroup *parent;
- unsigned long head_room = PAGE_COUNTER_MAX;
- unsigned long file_pages;
*pdirty = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_DIRTY);
/* this should eventually include NR_UNSTABLE_NFS */
*pwriteback = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_WRITEBACK);
+ *pfilepages = mem_cgroup_nr_lru_pages(memcg, (1 << LRU_INACTIVE_FILE) |
+ (1 << LRU_ACTIVE_FILE));
+ *pheadroom = PAGE_COUNTER_MAX;
- file_pages = mem_cgroup_nr_lru_pages(memcg, (1 << LRU_INACTIVE_FILE) |
- (1 << LRU_ACTIVE_FILE));
while ((parent = parent_mem_cgroup(memcg))) {
unsigned long ceiling = min(memcg->memory.limit, memcg->high);
unsigned long used = page_counter_read(&memcg->memory);
- head_room = min(head_room, ceiling - min(ceiling, used));
+ *pheadroom = min(*pheadroom, ceiling - min(ceiling, used));
memcg = parent;
}
-
- *pavail = file_pages + head_room;
}
#else /* CONFIG_CGROUP_WRITEBACK */