summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/controllers/memory.txt27
-rw-r--r--mm/memcontrol.c41
2 files changed, 57 insertions, 11 deletions
diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt
index 58f32c166fac..54253b7a8db2 100644
--- a/Documentation/controllers/memory.txt
+++ b/Documentation/controllers/memory.txt
@@ -237,11 +237,30 @@ reclaimed.
A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a
cgroup might have some charge associated with it, even though all
tasks have migrated away from it.
-Such charges are moved to its parent as much as possible and freed if parent
-is full. Both of RSS and CACHES are moved to parent.
-If both of them are busy, rmdir() returns -EBUSY.
+Such charges are freed(at default) or moved to its parent. When moved,
+both of RSS and CACHES are moved to parent.
+If both of them are busy, rmdir() returns -EBUSY. See 5.1 Also.
-5. TODO
+5. Misc. interfaces.
+
+5.1 force_empty
+ memory.force_empty interface is provided to make cgroup's memory usage empty.
+ You can use this interface only when the cgroup has no tasks.
+ When writing anything to this
+
+ # echo 0 > memory.force_empty
+
+ Almost all pages tracked by this memcg will be unmapped and freed. Some of
+ pages cannot be freed because it's locked or in-use. Such pages are moved
+ to parent and this cgroup will be empty. But this may return -EBUSY in
+ some too busy case.
+
+ Typical use case of this interface is that calling this before rmdir().
+ Because rmdir() moves all pages to parent, some out-of-use page caches can be
+ moved to the parent. If you want to avoid that, force_empty will be useful.
+
+
+6. TODO
1. Add support for accounting huge pages (as a separate controller)
2. Make per-cgroup scanner reclaim not-shared pages first
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e00f25e6545f..decace3bb57e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1062,21 +1062,27 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem,
* make mem_cgroup's charge to be 0 if there is no task.
* This enables deleting this mem_cgroup.
*/
-static int mem_cgroup_force_empty(struct mem_cgroup *mem)
+static int mem_cgroup_force_empty(struct mem_cgroup *mem, bool free_all)
{
int ret;
int node, zid, shrink;
int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
+ struct cgroup *cgrp = mem->css.cgroup;
css_get(&mem->css);
shrink = 0;
+ /* should free all ? */
+ if (free_all)
+ goto try_to_free;
move_account:
while (mem->res.usage > 0) {
ret = -EBUSY;
- if (atomic_read(&mem->css.cgroup->count) > 0)
+ if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children))
+ goto out;
+ ret = -EINTR;
+ if (signal_pending(current))
goto out;
-
/* This is for making all *used* pages to be on LRU. */
lru_add_drain_all();
ret = 0;
@@ -1106,19 +1112,29 @@ out:
return ret;
try_to_free:
- /* returns EBUSY if we come here twice. */
- if (shrink) {
+ /* returns EBUSY if there is a task or if we come here twice. */
+ if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children) || shrink) {
ret = -EBUSY;
goto out;
}
+ /* we call try-to-free pages for make this cgroup empty */
+ lru_add_drain_all();
/* try to free all pages in this cgroup */
shrink = 1;
while (nr_retries && mem->res.usage > 0) {
int progress;
+
+ if (signal_pending(current)) {
+ ret = -EINTR;
+ goto out;
+ }
progress = try_to_free_mem_cgroup_pages(mem,
GFP_HIGHUSER_MOVABLE);
- if (!progress)
+ if (!progress) {
nr_retries--;
+ /* maybe some writeback is necessary */
+ congestion_wait(WRITE, HZ/10);
+ }
}
/* try move_account...there may be some *locked* pages. */
@@ -1128,6 +1144,12 @@ try_to_free:
goto out;
}
+int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event)
+{
+ return mem_cgroup_force_empty(mem_cgroup_from_cont(cont), true);
+}
+
+
static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
{
return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res,
@@ -1225,6 +1247,7 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
return 0;
}
+
static struct cftype mem_cgroup_files[] = {
{
.name = "usage_in_bytes",
@@ -1253,6 +1276,10 @@ static struct cftype mem_cgroup_files[] = {
.name = "stat",
.read_map = mem_control_stat_show,
},
+ {
+ .name = "force_empty",
+ .trigger = mem_cgroup_force_empty_write,
+ },
};
static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
@@ -1350,7 +1377,7 @@ static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss,
struct cgroup *cont)
{
struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
- mem_cgroup_force_empty(mem);
+ mem_cgroup_force_empty(mem, false);
}
static void mem_cgroup_destroy(struct cgroup_subsys *ss,