summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/memcontrol.h16
-rw-r--r--include/linux/sched.h2
-rw-r--r--mm/memcontrol.c75
-rw-r--r--mm/memory.c4
4 files changed, 71 insertions, 26 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 50e3e807b427..57a202f31683 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -507,16 +507,16 @@ unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg);
void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
struct task_struct *p);
-static inline void mem_cgroup_oom_enable(void)
+static inline void mem_cgroup_enter_user_fault(void)
{
- WARN_ON(current->memcg_may_oom);
- current->memcg_may_oom = 1;
+ WARN_ON(current->in_user_fault);
+ current->in_user_fault = 1;
}
-static inline void mem_cgroup_oom_disable(void)
+static inline void mem_cgroup_exit_user_fault(void)
{
- WARN_ON(!current->memcg_may_oom);
- current->memcg_may_oom = 0;
+ WARN_ON(!current->in_user_fault);
+ current->in_user_fault = 0;
}
static inline bool task_in_memcg_oom(struct task_struct *p)
@@ -961,11 +961,11 @@ static inline void mem_cgroup_handle_over_high(void)
{
}
-static inline void mem_cgroup_oom_enable(void)
+static inline void mem_cgroup_enter_user_fault(void)
{
}
-static inline void mem_cgroup_oom_disable(void)
+static inline void mem_cgroup_exit_user_fault(void)
{
}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1827f4a7a6de..066a2c328653 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -722,7 +722,7 @@ struct task_struct {
unsigned restore_sigmask:1;
#endif
#ifdef CONFIG_MEMCG
- unsigned memcg_may_oom:1;
+ unsigned in_user_fault:1;
#ifndef CONFIG_SLOB
unsigned memcg_kmem_skip_account:1;
#endif
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c071af193986..d6724bed57d8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1534,28 +1534,53 @@ static void memcg_oom_recover(struct mem_cgroup *memcg)
__wake_up(&memcg_oom_waitq, TASK_NORMAL, 0, memcg);
}
-static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
+enum oom_status {
+ OOM_SUCCESS,
+ OOM_FAILED,
+ OOM_ASYNC,
+ OOM_SKIPPED
+};
+
+static enum oom_status mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
{
- if (!current->memcg_may_oom || order > PAGE_ALLOC_COSTLY_ORDER)
- return;
+ if (order > PAGE_ALLOC_COSTLY_ORDER)
+ return OOM_SKIPPED;
+
/*
* We are in the middle of the charge context here, so we
* don't want to block when potentially sitting on a callstack
* that holds all kinds of filesystem and mm locks.
*
- * Also, the caller may handle a failed allocation gracefully
- * (like optional page cache readahead) and so an OOM killer
- * invocation might not even be necessary.
+ * cgroup1 allows disabling the OOM killer and waiting for outside
+ * handling until the charge can succeed; remember the context and put
+ * the task to sleep at the end of the page fault when all locks are
+ * released.
+ *
+ * On the other hand, in-kernel OOM killer allows for an async victim
+ * memory reclaim (oom_reaper) and that means that we are not solely
+ * relying on the oom victim to make a forward progress and we can
+ * invoke the oom killer here.
*
- * That's why we don't do anything here except remember the
- * OOM context and then deal with it at the end of the page
- * fault when the stack is unwound, the locks are released,
- * and when we know whether the fault was overall successful.
+ * Please note that mem_cgroup_out_of_memory might fail to find a
+ * victim and then we have to bail out from the charge path.
*/
- css_get(&memcg->css);
- current->memcg_in_oom = memcg;
- current->memcg_oom_gfp_mask = mask;
- current->memcg_oom_order = order;
+ if (memcg->oom_kill_disable) {
+ if (!current->in_user_fault)
+ return OOM_SKIPPED;
+ css_get(&memcg->css);
+ current->memcg_in_oom = memcg;
+ current->memcg_oom_gfp_mask = mask;
+ current->memcg_oom_order = order;
+
+ return OOM_ASYNC;
+ }
+
+ if (mem_cgroup_out_of_memory(memcg, mask, order))
+ return OOM_SUCCESS;
+
+ WARN(1,"Memory cgroup charge failed because of no reclaimable memory! "
+ "This looks like a misconfiguration or a kernel bug.");
+ return OOM_FAILED;
}
/**
@@ -1950,6 +1975,8 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
unsigned long nr_reclaimed;
bool may_swap = true;
bool drained = false;
+ bool oomed = false;
+ enum oom_status oom_status;
if (mem_cgroup_is_root(memcg))
return 0;
@@ -2037,6 +2064,9 @@ retry:
if (nr_retries--)
goto retry;
+ if (gfp_mask & __GFP_RETRY_MAYFAIL && oomed)
+ goto nomem;
+
if (gfp_mask & __GFP_NOFAIL)
goto force;
@@ -2045,8 +2075,23 @@ retry:
memcg_memory_event(mem_over_limit, MEMCG_OOM);
- mem_cgroup_oom(mem_over_limit, gfp_mask,
+ /*
+ * keep retrying as long as the memcg oom killer is able to make
+ * a forward progress or bypass the charge if the oom killer
+ * couldn't make any progress.
+ */
+ oom_status = mem_cgroup_oom(mem_over_limit, gfp_mask,
get_order(nr_pages * PAGE_SIZE));
+ switch (oom_status) {
+ case OOM_SUCCESS:
+ nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
+ oomed = true;
+ goto retry;
+ case OOM_FAILED:
+ goto force;
+ default:
+ goto nomem;
+ }
nomem:
if (!(gfp_mask & __GFP_NOFAIL))
return -ENOMEM;
diff --git a/mm/memory.c b/mm/memory.c
index 175f344e1523..ae2ec887508b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4153,7 +4153,7 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
* space. Kernel faults are handled more gracefully.
*/
if (flags & FAULT_FLAG_USER)
- mem_cgroup_oom_enable();
+ mem_cgroup_enter_user_fault();
if (unlikely(is_vm_hugetlb_page(vma)))
ret = hugetlb_fault(vma->vm_mm, vma, address, flags);
@@ -4161,7 +4161,7 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
ret = __handle_mm_fault(vma, address, flags);
if (flags & FAULT_FLAG_USER) {
- mem_cgroup_oom_disable();
+ mem_cgroup_exit_user_fault();
/*
* The task may have entered a memcg OOM situation but
* if the allocation error was handled gracefully (no