From 7d699ddb2b181a2c76e5ea18b1bdf102c4bebe4b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 4 May 2014 15:09:13 -0400 Subject: cgroup, memcg: allocate cgroup ID from 1 Currently, cgroup->id is allocated from 0, which is always assigned to the root cgroup; unfortunately, memcg wants to use ID 0 to indicate invalid IDs and ends up incrementing all IDs by one. It's reasonable to reserve 0 for special purposes. This patch updates cgroup core so that ID 0 is not used and the root cgroups get ID 1. The ID incrementing is removed form memcg. Signed-off-by: Tejun Heo Acked-by: Michal Hocko Cc: Johannes Weiner Acked-by: Li Zefan --- mm/memcontrol.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 29501f040568..1d0b29715b73 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -527,18 +527,14 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) { - /* - * The ID of the root cgroup is 0, but memcg treat 0 as an - * invalid ID, so we return (cgroup_id + 1). - */ - return memcg->css.cgroup->id + 1; + return memcg->css.cgroup->id; } static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id) { struct cgroup_subsys_state *css; - css = css_from_id(id - 1, &memory_cgrp_subsys); + css = css_from_id(id, &memory_cgrp_subsys); return mem_cgroup_from_css(css); } -- cgit v1.2.3-55-g7522 From 15a4c835e4ed3e60dd68727cd1907e3dd89563f4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 4 May 2014 15:09:14 -0400 Subject: cgroup, memcg: implement css->id and convert css_from_id() to use it Until now, cgroup->id has been used to identify all the associated csses and css_from_id() takes cgroup ID and returns the matching css by looking up the cgroup and then dereferencing the css associated with it; however, now that the lifetimes of cgroup and css are separate, this is incorrect and breaks on the unified hierarchy when a controller is disabled and enabled back again before the previous instance is released. This patch adds css->id which is a subsystem-unique ID and converts css_from_id() to look up by the new css->id instead. memcg is the only user of css_from_id() and also converted to use css->id instead. For traditional hierarchies, this shouldn't make any functional difference. Signed-off-by: Tejun Heo Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Jianyu Zhan Acked-by: Li Zefan --- include/linux/cgroup.h | 9 ++++++++ kernel/cgroup.c | 59 ++++++++++++++++++++++++++++++++------------------ mm/memcontrol.c | 4 ++-- 3 files changed, 49 insertions(+), 23 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 793f70a48820..2dfabb3b749a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -62,6 +62,12 @@ struct cgroup_subsys_state { /* the parent css */ struct cgroup_subsys_state *parent; + /* + * Subsys-unique ID. 0 is unused and root is always 1. The + * matching css can be looked up using css_from_id(). + */ + int id; + unsigned int flags; /* percpu_ref killing and RCU release */ @@ -655,6 +661,9 @@ struct cgroup_subsys { /* link to parent, protected by cgroup_lock() */ struct cgroup_root *root; + /* idr for css->id */ + struct idr css_idr; + /* * List of cftypes. Each entry is the first entry of an array * terminated by zero length name. diff --git a/kernel/cgroup.c b/kernel/cgroup.c index f1c98c527b2d..a1a20e8c973a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -100,8 +100,8 @@ static DECLARE_RWSEM(css_set_rwsem); #endif /* - * Protects cgroup_idr so that IDs can be released without grabbing - * cgroup_mutex. + * Protects cgroup_idr and css_idr so that IDs can be released without + * grabbing cgroup_mutex. */ static DEFINE_SPINLOCK(cgroup_idr_lock); @@ -1089,12 +1089,6 @@ static void cgroup_put(struct cgroup *cgrp) if (WARN_ON_ONCE(cgrp->parent && !cgroup_is_dead(cgrp))) return; - /* - * XXX: cgrp->id is only used to look up css's. As cgroup and - * css's lifetimes will be decoupled, it should be made - * per-subsystem and moved to css->id so that lookups are - * successful until the target css is released. - */ cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id); cgrp->id = -1; @@ -4104,8 +4098,11 @@ static void css_release(struct percpu_ref *ref) { struct cgroup_subsys_state *css = container_of(ref, struct cgroup_subsys_state, refcnt); + struct cgroup_subsys *ss = css->ss; + + RCU_INIT_POINTER(css->cgroup->subsys[ss->id], NULL); + cgroup_idr_remove(&ss->css_idr, css->id); - RCU_INIT_POINTER(css->cgroup->subsys[css->ss->id], NULL); call_rcu(&css->rcu_head, css_free_rcu_fn); } @@ -4195,9 +4192,17 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss) if (err) goto err_free_css; + err = cgroup_idr_alloc(&ss->css_idr, NULL, 2, 0, GFP_NOWAIT); + if (err < 0) + goto err_free_percpu_ref; + css->id = err; + err = cgroup_populate_dir(cgrp, 1 << ss->id); if (err) - goto err_free_percpu_ref; + goto err_free_id; + + /* @css is ready to be brought online now, make it visible */ + cgroup_idr_replace(&ss->css_idr, css, css->id); err = online_css(css); if (err) @@ -4216,6 +4221,8 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss) err_clear_dir: cgroup_clear_dir(css->cgroup, 1 << css->ss->id); +err_free_id: + cgroup_idr_remove(&ss->css_idr, css->id); err_free_percpu_ref: percpu_ref_cancel_init(&css->refcnt); err_free_css: @@ -4642,7 +4649,7 @@ static struct kernfs_syscall_ops cgroup_kf_syscall_ops = { .rename = cgroup_rename, }; -static void __init cgroup_init_subsys(struct cgroup_subsys *ss) +static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early) { struct cgroup_subsys_state *css; @@ -4651,6 +4658,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) mutex_lock(&cgroup_tree_mutex); mutex_lock(&cgroup_mutex); + idr_init(&ss->css_idr); INIT_LIST_HEAD(&ss->cfts); /* Create the root cgroup state for this subsystem */ @@ -4659,6 +4667,13 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) /* We don't handle early failures gracefully */ BUG_ON(IS_ERR(css)); init_and_link_css(css, ss, &cgrp_dfl_root.cgrp); + if (early) { + /* idr_alloc() can't be called safely during early init */ + css->id = 1; + } else { + css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL); + BUG_ON(css->id < 0); + } /* Update the init_css_set to contain a subsys * pointer to this state - since the subsystem is @@ -4709,7 +4724,7 @@ int __init cgroup_init_early(void) ss->name = cgroup_subsys_name[i]; if (ss->early_init) - cgroup_init_subsys(ss); + cgroup_init_subsys(ss, true); } return 0; } @@ -4741,8 +4756,16 @@ int __init cgroup_init(void) mutex_unlock(&cgroup_tree_mutex); for_each_subsys(ss, ssid) { - if (!ss->early_init) - cgroup_init_subsys(ss); + if (ss->early_init) { + struct cgroup_subsys_state *css = + init_css_set.subsys[ss->id]; + + css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, + GFP_KERNEL); + BUG_ON(css->id < 0); + } else { + cgroup_init_subsys(ss, false); + } list_add_tail(&init_css_set.e_cset_node[ssid], &cgrp_dfl_root.cgrp.e_csets[ssid]); @@ -5196,14 +5219,8 @@ struct cgroup_subsys_state *css_tryget_from_dir(struct dentry *dentry, */ struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss) { - struct cgroup *cgrp; - WARN_ON_ONCE(!rcu_read_lock_held()); - - cgrp = idr_find(&ss->root->cgroup_idr, id); - if (cgrp) - return cgroup_css(cgrp, ss); - return NULL; + return idr_find(&ss->css_idr, id); } #ifdef CONFIG_CGROUP_DEBUG diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 1d0b29715b73..c3f82f69ef58 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -527,7 +527,7 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) { - return memcg->css.cgroup->id; + return memcg->css.id; } static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id) @@ -6401,7 +6401,7 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) struct mem_cgroup *memcg = mem_cgroup_from_css(css); struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css)); - if (css->cgroup->id > MEM_CGROUP_ID_MAX) + if (css->id > MEM_CGROUP_ID_MAX) return -ENOSPC; if (!parent) -- cgit v1.2.3-55-g7522 From ec903c0c858e4963a9e0724bdcadfa837253341c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 May 2014 12:11:01 -0400 Subject: cgroup: rename css_tryget*() to css_tryget_online*() Unlike the more usual refcnting, what css_tryget() provides is the distinction between online and offline csses instead of protection against upping a refcnt which already reached zero. cgroup is planning to provide actual tryget which fails if the refcnt already reached zero. Let's rename the existing trygets so that they clearly indicate that they're onliness. I thought about keeping the existing names as-are and introducing new names for the planned actual tryget; however, given that each controller participates in the synchronization of the online state, it seems worthwhile to make it explicit that these functions are about on/offline state. Rename css_tryget() to css_tryget_online() and css_tryget_from_dir() to css_tryget_online_from_dir(). This is pure rename. v2: cgroup_freezer grew new usages of css_tryget(). Update accordingly. Signed-off-by: Tejun Heo Acked-by: Johannes Weiner Acked-by: Michal Hocko Acked-by: Li Zefan Cc: Vivek Goyal Cc: Jens Axboe Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo --- block/blk-cgroup.c | 2 +- fs/bio.c | 2 +- include/linux/cgroup.h | 14 +++++++------- kernel/cgroup.c | 40 ++++++++++++++++++++-------------------- kernel/cgroup_freezer.c | 4 ++-- kernel/cpuset.c | 6 +++--- kernel/events/core.c | 3 ++- mm/hugetlb_cgroup.c | 2 +- mm/memcontrol.c | 46 ++++++++++++++++++++++++---------------------- 9 files changed, 61 insertions(+), 58 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 1039fb9ff5f5..9f5bce33e6fe 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -185,7 +185,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, lockdep_assert_held(q->queue_lock); /* blkg holds a reference to blkcg */ - if (!css_tryget(&blkcg->css)) { + if (!css_tryget_online(&blkcg->css)) { ret = -EINVAL; goto err_free_blkg; } diff --git a/fs/bio.c b/fs/bio.c index 6f0362b77806..0608ef9422bd 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -1970,7 +1970,7 @@ int bio_associate_current(struct bio *bio) /* associate blkcg if exists */ rcu_read_lock(); css = task_css(current, blkio_cgrp_id); - if (css && css_tryget(css)) + if (css && css_tryget_online(css)) bio->bi_css = css; rcu_read_unlock(); diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index bde44618d8c2..c5f3684ef557 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -95,16 +95,16 @@ static inline void css_get(struct cgroup_subsys_state *css) } /** - * css_tryget - try to obtain a reference on the specified css + * css_tryget_online - try to obtain a reference on the specified css if online * @css: target css * - * Obtain a reference on @css if it's alive. The caller naturally needs to - * ensure that @css is accessible but doesn't have to be holding a + * Obtain a reference on @css if it's online. The caller naturally needs + * to ensure that @css is accessible but doesn't have to be holding a * reference on it - IOW, RCU protected access is good enough for this * function. Returns %true if a reference count was successfully obtained; * %false otherwise. */ -static inline bool css_tryget(struct cgroup_subsys_state *css) +static inline bool css_tryget_online(struct cgroup_subsys_state *css) { if (css->flags & CSS_ROOT) return true; @@ -115,7 +115,7 @@ static inline bool css_tryget(struct cgroup_subsys_state *css) * css_put - put a css reference * @css: target css * - * Put a reference obtained via css_get() and css_tryget(). + * Put a reference obtained via css_get() and css_tryget_online(). */ static inline void css_put(struct cgroup_subsys_state *css) { @@ -905,8 +905,8 @@ void css_task_iter_end(struct css_task_iter *it); int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); -struct cgroup_subsys_state *css_tryget_from_dir(struct dentry *dentry, - struct cgroup_subsys *ss); +struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, + struct cgroup_subsys *ss); #else /* !CONFIG_CGROUPS */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 7633703e9baf..671d8a6dae37 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3771,7 +3771,7 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) /* * We aren't being called from kernfs and there's no guarantee on - * @kn->priv's validity. For this and css_tryget_from_dir(), + * @kn->priv's validity. For this and css_tryget_online_from_dir(), * @kn->priv is RCU safe. Let's do the RCU dancing. */ rcu_read_lock(); @@ -4060,9 +4060,9 @@ err: * Implemented in kill_css(). * * 2. When the percpu_ref is confirmed to be visible as killed on all CPUs - * and thus css_tryget() is guaranteed to fail, the css can be offlined - * by invoking offline_css(). After offlining, the base ref is put. - * Implemented in css_killed_work_fn(). + * and thus css_tryget_online() is guaranteed to fail, the css can be + * offlined by invoking offline_css(). After offlining, the base ref is + * put. Implemented in css_killed_work_fn(). * * 3. When the percpu_ref reaches zero, the only possible remaining * accessors are inside RCU read sections. css_release() schedules the @@ -4386,7 +4386,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, /* * This is called when the refcnt of a css is confirmed to be killed. - * css_tryget() is now guaranteed to fail. + * css_tryget_online() is now guaranteed to fail. */ static void css_killed_work_fn(struct work_struct *work) { @@ -4398,8 +4398,8 @@ static void css_killed_work_fn(struct work_struct *work) mutex_lock(&cgroup_mutex); /* - * css_tryget() is guaranteed to fail now. Tell subsystems to - * initate destruction. + * css_tryget_online() is guaranteed to fail now. Tell subsystems + * to initate destruction. */ offline_css(css); @@ -4440,8 +4440,8 @@ static void css_killed_ref_fn(struct percpu_ref *ref) * * This function initiates destruction of @css by removing cgroup interface * files and putting its base reference. ->css_offline() will be invoked - * asynchronously once css_tryget() is guaranteed to fail and when the - * reference count reaches zero, @css will be released. + * asynchronously once css_tryget_online() is guaranteed to fail and when + * the reference count reaches zero, @css will be released. */ static void kill_css(struct cgroup_subsys_state *css) { @@ -4462,7 +4462,7 @@ static void kill_css(struct cgroup_subsys_state *css) /* * cgroup core guarantees that, by the time ->css_offline() is * invoked, no new css reference will be given out via - * css_tryget(). We can't simply call percpu_ref_kill() and + * css_tryget_online(). We can't simply call percpu_ref_kill() and * proceed to offlining css's because percpu_ref_kill() doesn't * guarantee that the ref is seen as killed on all CPUs on return. * @@ -4478,9 +4478,9 @@ static void kill_css(struct cgroup_subsys_state *css) * * css's make use of percpu refcnts whose killing latency shouldn't be * exposed to userland and are RCU protected. Also, cgroup core needs to - * guarantee that css_tryget() won't succeed by the time ->css_offline() is - * invoked. To satisfy all the requirements, destruction is implemented in - * the following two steps. + * guarantee that css_tryget_online() won't succeed by the time + * ->css_offline() is invoked. To satisfy all the requirements, + * destruction is implemented in the following two steps. * * s1. Verify @cgrp can be destroyed and mark it dying. Remove all * userland visible parts and start killing the percpu refcnts of @@ -4574,9 +4574,9 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) /* * There are two control paths which try to determine cgroup from * dentry without going through kernfs - cgroupstats_build() and - * css_tryget_from_dir(). Those are supported by RCU protecting - * clearing of cgrp->kn->priv backpointer, which should happen - * after all files under it have been removed. + * css_tryget_online_from_dir(). Those are supported by RCU + * protecting clearing of cgrp->kn->priv backpointer, which should + * happen after all files under it have been removed. */ kernfs_remove(cgrp->kn); /* @cgrp has an extra ref on its kn */ RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv, NULL); @@ -5173,7 +5173,7 @@ static int __init cgroup_disable(char *str) __setup("cgroup_disable=", cgroup_disable); /** - * css_tryget_from_dir - get corresponding css from the dentry of a cgroup dir + * css_tryget_online_from_dir - get corresponding css from a cgroup dentry * @dentry: directory dentry of interest * @ss: subsystem of interest * @@ -5181,8 +5181,8 @@ __setup("cgroup_disable=", cgroup_disable); * to get the corresponding css and return it. If such css doesn't exist * or can't be pinned, an ERR_PTR value is returned. */ -struct cgroup_subsys_state *css_tryget_from_dir(struct dentry *dentry, - struct cgroup_subsys *ss) +struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, + struct cgroup_subsys *ss) { struct kernfs_node *kn = kernfs_node_from_dentry(dentry); struct cgroup_subsys_state *css = NULL; @@ -5204,7 +5204,7 @@ struct cgroup_subsys_state *css_tryget_from_dir(struct dentry *dentry, if (cgrp) css = cgroup_css(cgrp, ss); - if (!css || !css_tryget(css)) + if (!css || !css_tryget_online(css)) css = ERR_PTR(-ENOENT); rcu_read_unlock(); diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 345628c78b5b..0398f7e9ac81 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -304,7 +304,7 @@ static int freezer_read(struct seq_file *m, void *v) /* update states bottom-up */ css_for_each_descendant_post(pos, css) { - if (!css_tryget(pos)) + if (!css_tryget_online(pos)) continue; rcu_read_unlock(); @@ -404,7 +404,7 @@ static void freezer_change_state(struct freezer *freezer, bool freeze) struct freezer *pos_f = css_freezer(pos); struct freezer *parent = parent_freezer(pos_f); - if (!css_tryget(pos)) + if (!css_tryget_online(pos)) continue; rcu_read_unlock(); diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 7c0e8da59e26..37ca0a5c226d 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -872,7 +872,7 @@ static void update_tasks_cpumask_hier(struct cpuset *root_cs, bool update_root) continue; } } - if (!css_tryget(&cp->css)) + if (!css_tryget_online(&cp->css)) continue; rcu_read_unlock(); @@ -1108,7 +1108,7 @@ static void update_tasks_nodemask_hier(struct cpuset *root_cs, bool update_root) continue; } } - if (!css_tryget(&cp->css)) + if (!css_tryget_online(&cp->css)) continue; rcu_read_unlock(); @@ -2153,7 +2153,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work) rcu_read_lock(); cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) { - if (cs == &top_cpuset || !css_tryget(&cs->css)) + if (cs == &top_cpuset || !css_tryget_online(&cs->css)) continue; rcu_read_unlock(); diff --git a/kernel/events/core.c b/kernel/events/core.c index f83a71a3e46d..077968d19b8a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -607,7 +607,8 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event, if (!f.file) return -EBADF; - css = css_tryget_from_dir(f.file->f_dentry, &perf_event_cgrp_subsys); + css = css_tryget_online_from_dir(f.file->f_dentry, + &perf_event_cgrp_subsys); if (IS_ERR(css)) { ret = PTR_ERR(css); goto out; diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index 595d7fd795e1..372f1adca491 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -181,7 +181,7 @@ int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, again: rcu_read_lock(); h_cg = hugetlb_cgroup_from_task(current); - if (!css_tryget(&h_cg->css)) { + if (!css_tryget_online(&h_cg->css)) { rcu_read_unlock(); goto again; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c3f82f69ef58..5cf3246314a2 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -567,7 +567,8 @@ void sock_update_memcg(struct sock *sk) memcg = mem_cgroup_from_task(current); cg_proto = sk->sk_prot->proto_cgroup(memcg); if (!mem_cgroup_is_root(memcg) && - memcg_proto_active(cg_proto) && css_tryget(&memcg->css)) { + memcg_proto_active(cg_proto) && + css_tryget_online(&memcg->css)) { sk->sk_cgrp = cg_proto; } rcu_read_unlock(); @@ -834,7 +835,7 @@ retry: */ __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz); if (!res_counter_soft_limit_excess(&mz->memcg->res) || - !css_tryget(&mz->memcg->css)) + !css_tryget_online(&mz->memcg->css)) goto retry; done: return mz; @@ -1076,7 +1077,7 @@ static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); if (unlikely(!memcg)) memcg = root_mem_cgroup; - } while (!css_tryget(&memcg->css)); + } while (!css_tryget_online(&memcg->css)); rcu_read_unlock(); return memcg; } @@ -1113,7 +1114,8 @@ skip_node: */ if (next_css) { if ((next_css == &root->css) || - ((next_css->flags & CSS_ONLINE) && css_tryget(next_css))) + ((next_css->flags & CSS_ONLINE) && + css_tryget_online(next_css))) return mem_cgroup_from_css(next_css); prev_css = next_css; @@ -1159,7 +1161,7 @@ mem_cgroup_iter_load(struct mem_cgroup_reclaim_iter *iter, * would be returned all the time. */ if (position && position != root && - !css_tryget(&position->css)) + !css_tryget_online(&position->css)) position = NULL; } return position; @@ -2785,9 +2787,9 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg, /* * A helper function to get mem_cgroup from ID. must be called under - * rcu_read_lock(). The caller is responsible for calling css_tryget if - * the mem_cgroup is used for charging. (dropping refcnt from swap can be - * called against removed memcg.) + * rcu_read_lock(). The caller is responsible for calling + * css_tryget_online() if the mem_cgroup is used for charging. (dropping + * refcnt from swap can be called against removed memcg.) */ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id) { @@ -2810,14 +2812,14 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) lock_page_cgroup(pc); if (PageCgroupUsed(pc)) { memcg = pc->mem_cgroup; - if (memcg && !css_tryget(&memcg->css)) + if (memcg && !css_tryget_online(&memcg->css)) memcg = NULL; } else if (PageSwapCache(page)) { ent.val = page_private(page); id = lookup_swap_cgroup_id(ent); rcu_read_lock(); memcg = mem_cgroup_lookup(id); - if (memcg && !css_tryget(&memcg->css)) + if (memcg && !css_tryget_online(&memcg->css)) memcg = NULL; rcu_read_unlock(); } @@ -3473,7 +3475,7 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep, } /* The corresponding put will be done in the workqueue. */ - if (!css_tryget(&memcg->css)) + if (!css_tryget_online(&memcg->css)) goto out; rcu_read_unlock(); @@ -4246,8 +4248,8 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent) memcg = mem_cgroup_lookup(id); if (memcg) { /* - * We uncharge this because swap is freed. - * This memcg can be obsolete one. We avoid calling css_tryget + * We uncharge this because swap is freed. This memcg can + * be obsolete one. We avoid calling css_tryget_online(). */ if (!mem_cgroup_is_root(memcg)) res_counter_uncharge(&memcg->memsw, PAGE_SIZE); @@ -5840,10 +5842,10 @@ static void kmem_cgroup_css_offline(struct mem_cgroup *memcg) * which is then paired with css_put during uncharge resp. here. * * Although this might sound strange as this path is called from - * css_offline() when the referencemight have dropped down to 0 - * and shouldn't be incremented anymore (css_tryget would fail) - * we do not have other options because of the kmem allocations - * lifetime. + * css_offline() when the referencemight have dropped down to 0 and + * shouldn't be incremented anymore (css_tryget_online() would + * fail) we do not have other options because of the kmem + * allocations lifetime. */ css_get(&memcg->css); @@ -6051,8 +6053,8 @@ static int memcg_write_event_control(struct cgroup_subsys_state *css, * automatically removed on cgroup destruction but the removal is * asynchronous, so take an extra ref on @css. */ - cfile_css = css_tryget_from_dir(cfile.file->f_dentry->d_parent, - &memory_cgrp_subsys); + cfile_css = css_tryget_online_from_dir(cfile.file->f_dentry->d_parent, + &memory_cgrp_subsys); ret = -EINVAL; if (IS_ERR(cfile_css)) goto out_put_cfile; @@ -6496,7 +6498,7 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css) /* * XXX: css_offline() would be where we should reparent all * memory to prepare the cgroup for destruction. However, - * memcg does not do css_tryget() and res_counter charging + * memcg does not do css_tryget_online() and res_counter charging * under the same RCU lock region, which means that charging * could race with offlining. Offlining only happens to * cgroups with no tasks in them but charges can show up @@ -6510,9 +6512,9 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css) * lookup_swap_cgroup_id() * rcu_read_lock() * mem_cgroup_lookup() - * css_tryget() + * css_tryget_online() * rcu_read_unlock() - * disable css_tryget() + * disable css_tryget_online() * call_rcu() * offline_css() * reparent_charges() -- cgit v1.2.3-55-g7522 From 451af504df0c62f695a69b83c250486e77c66378 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 May 2014 12:16:21 -0400 Subject: cgroup: replace cftype->write_string() with cftype->write() Convert all cftype->write_string() users to the new cftype->write() which maps directly to kernfs write operation and has full access to kernfs and cgroup contexts. The conversions are mostly mechanical. * @css and @cft are accessed using of_css() and of_cft() accessors respectively instead of being specified as arguments. * Should return @nbytes on success instead of 0. * @buf is not trimmed automatically. Trim if necessary. Note that blkcg and netprio don't need this as the parsers already handle whitespaces. cftype->write_string() has no user left after the conversions and removed. While at it, remove unnecessary local variable @p in cgroup_subtree_control_write() and stale comment about CGROUP_LOCAL_BUFFER_SIZE in cgroup_freezer.c. This patch doesn't introduce any visible behavior changes. v2: netprio was missing from conversion. Converted. Signed-off-by: Tejun Heo Acked-by: Aristeu Rozanski Acked-by: Vivek Goyal Acked-by: Li Zefan Cc: Jens Axboe Cc: Johannes Weiner Cc: Michal Hocko Cc: Neil Horman Cc: "David S. Miller" --- block/blk-throttle.c | 32 ++++++++++++++++---------------- block/cfq-iosched.c | 28 ++++++++++++++-------------- include/linux/cgroup.h | 10 +--------- kernel/cgroup.c | 38 +++++++++++++++++++------------------- kernel/cgroup_freezer.c | 20 +++++++++----------- kernel/cpuset.c | 16 +++++++++------- mm/hugetlb_cgroup.c | 17 +++++++++-------- mm/memcontrol.c | 46 +++++++++++++++++++++++++--------------------- net/core/netprio_cgroup.c | 12 ++++++------ net/ipv4/tcp_memcontrol.c | 16 +++++++++------- security/device_cgroup.c | 14 +++++++------- 11 files changed, 124 insertions(+), 125 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 033745cd7fba..5e8fd1bace98 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -1346,10 +1346,10 @@ static int tg_print_conf_uint(struct seq_file *sf, void *v) return 0; } -static int tg_set_conf(struct cgroup_subsys_state *css, struct cftype *cft, - const char *buf, bool is_u64) +static ssize_t tg_set_conf(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off, bool is_u64) { - struct blkcg *blkcg = css_to_blkcg(css); + struct blkcg *blkcg = css_to_blkcg(of_css(of)); struct blkg_conf_ctx ctx; struct throtl_grp *tg; struct throtl_service_queue *sq; @@ -1368,9 +1368,9 @@ static int tg_set_conf(struct cgroup_subsys_state *css, struct cftype *cft, ctx.v = -1; if (is_u64) - *(u64 *)((void *)tg + cft->private) = ctx.v; + *(u64 *)((void *)tg + of_cft(of)->private) = ctx.v; else - *(unsigned int *)((void *)tg + cft->private) = ctx.v; + *(unsigned int *)((void *)tg + of_cft(of)->private) = ctx.v; throtl_log(&tg->service_queue, "limit change rbps=%llu wbps=%llu riops=%u wiops=%u", @@ -1404,19 +1404,19 @@ static int tg_set_conf(struct cgroup_subsys_state *css, struct cftype *cft, } blkg_conf_finish(&ctx); - return 0; + return nbytes; } -static int tg_set_conf_u64(struct cgroup_subsys_state *css, struct cftype *cft, - char *buf) +static ssize_t tg_set_conf_u64(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { - return tg_set_conf(css, cft, buf, true); + return tg_set_conf(of, buf, nbytes, off, true); } -static int tg_set_conf_uint(struct cgroup_subsys_state *css, struct cftype *cft, - char *buf) +static ssize_t tg_set_conf_uint(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { - return tg_set_conf(css, cft, buf, false); + return tg_set_conf(of, buf, nbytes, off, false); } static struct cftype throtl_files[] = { @@ -1424,25 +1424,25 @@ static struct cftype throtl_files[] = { .name = "throttle.read_bps_device", .private = offsetof(struct throtl_grp, bps[READ]), .seq_show = tg_print_conf_u64, - .write_string = tg_set_conf_u64, + .write = tg_set_conf_u64, }, { .name = "throttle.write_bps_device", .private = offsetof(struct throtl_grp, bps[WRITE]), .seq_show = tg_print_conf_u64, - .write_string = tg_set_conf_u64, + .write = tg_set_conf_u64, }, { .name = "throttle.read_iops_device", .private = offsetof(struct throtl_grp, iops[READ]), .seq_show = tg_print_conf_uint, - .write_string = tg_set_conf_uint, + .write = tg_set_conf_uint, }, { .name = "throttle.write_iops_device", .private = offsetof(struct throtl_grp, iops[WRITE]), .seq_show = tg_print_conf_uint, - .write_string = tg_set_conf_uint, + .write = tg_set_conf_uint, }, { .name = "throttle.io_service_bytes", diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index e0985f1955e7..a73020b8c9af 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1670,11 +1670,11 @@ static int cfq_print_leaf_weight(struct seq_file *sf, void *v) return 0; } -static int __cfqg_set_weight_device(struct cgroup_subsys_state *css, - struct cftype *cft, const char *buf, - bool is_leaf_weight) +static ssize_t __cfqg_set_weight_device(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off, + bool is_leaf_weight) { - struct blkcg *blkcg = css_to_blkcg(css); + struct blkcg *blkcg = css_to_blkcg(of_css(of)); struct blkg_conf_ctx ctx; struct cfq_group *cfqg; int ret; @@ -1697,19 +1697,19 @@ static int __cfqg_set_weight_device(struct cgroup_subsys_state *css, } blkg_conf_finish(&ctx); - return ret; + return ret ?: nbytes; } -static int cfqg_set_weight_device(struct cgroup_subsys_state *css, - struct cftype *cft, char *buf) +static ssize_t cfqg_set_weight_device(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { - return __cfqg_set_weight_device(css, cft, buf, false); + return __cfqg_set_weight_device(of, buf, nbytes, off, false); } -static int cfqg_set_leaf_weight_device(struct cgroup_subsys_state *css, - struct cftype *cft, char *buf) +static ssize_t cfqg_set_leaf_weight_device(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { - return __cfqg_set_weight_device(css, cft, buf, true); + return __cfqg_set_weight_device(of, buf, nbytes, off, true); } static int __cfq_set_weight(struct cgroup_subsys_state *css, struct cftype *cft, @@ -1837,7 +1837,7 @@ static struct cftype cfq_blkcg_files[] = { .name = "weight_device", .flags = CFTYPE_ONLY_ON_ROOT, .seq_show = cfqg_print_leaf_weight_device, - .write_string = cfqg_set_leaf_weight_device, + .write = cfqg_set_leaf_weight_device, }, { .name = "weight", @@ -1851,7 +1851,7 @@ static struct cftype cfq_blkcg_files[] = { .name = "weight_device", .flags = CFTYPE_NOT_ON_ROOT, .seq_show = cfqg_print_weight_device, - .write_string = cfqg_set_weight_device, + .write = cfqg_set_weight_device, }, { .name = "weight", @@ -1863,7 +1863,7 @@ static struct cftype cfq_blkcg_files[] = { { .name = "leaf_weight_device", .seq_show = cfqg_print_leaf_weight_device, - .write_string = cfqg_set_leaf_weight_device, + .write = cfqg_set_leaf_weight_device, }, { .name = "leaf_weight", diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c5a170ca4a48..aecdc84fe128 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -453,8 +453,7 @@ struct cftype { /* * The maximum length of string, excluding trailing nul, that can - * be passed to write_string. If < PAGE_SIZE-1, PAGE_SIZE-1 is - * assumed. + * be passed to write. If < PAGE_SIZE-1, PAGE_SIZE-1 is assumed. */ size_t max_write_len; @@ -500,13 +499,6 @@ struct cftype { int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft, s64 val); - /* - * write_string() is passed a nul-terminated kernelspace - * buffer of maximum length determined by max_write_len. - * Returns 0 or -ve error code. - */ - int (*write_string)(struct cgroup_subsys_state *css, struct cftype *cft, - char *buffer); /* * trigger() callback can be used to get some kick from the * userspace, when the actual string written is not important diff --git a/kernel/cgroup.c b/kernel/cgroup.c index a16f91d12f4e..2a88ce7b24b6 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1035,7 +1035,7 @@ static umode_t cgroup_file_mode(const struct cftype *cft) mode |= S_IRUGO; if (cft->write_u64 || cft->write_s64 || cft->write || - cft->write_string || cft->trigger) + cft->trigger) mode |= S_IWUSR; return mode; @@ -2352,20 +2352,21 @@ static int cgroup_procs_write(struct cgroup_subsys_state *css, return attach_task_by_pid(css->cgroup, tgid, true); } -static int cgroup_release_agent_write(struct cgroup_subsys_state *css, - struct cftype *cft, char *buffer) +static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { - struct cgroup_root *root = css->cgroup->root; + struct cgroup *cgrp = of_css(of)->cgroup; + struct cgroup_root *root = cgrp->root; BUILD_BUG_ON(sizeof(root->release_agent_path) < PATH_MAX); - if (!cgroup_lock_live_group(css->cgroup)) + if (!cgroup_lock_live_group(cgrp)) return -ENODEV; spin_lock(&release_agent_path_lock); - strlcpy(root->release_agent_path, buffer, + strlcpy(root->release_agent_path, strstrip(buf), sizeof(root->release_agent_path)); spin_unlock(&release_agent_path_lock); mutex_unlock(&cgroup_mutex); - return 0; + return nbytes; } static int cgroup_release_agent_show(struct seq_file *seq, void *v) @@ -2530,21 +2531,22 @@ out_finish: } /* change the enabled child controllers for a cgroup in the default hierarchy */ -static int cgroup_subtree_control_write(struct cgroup_subsys_state *dummy_css, - struct cftype *cft, char *buffer) +static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, + loff_t off) { unsigned int enable = 0, disable = 0; - struct cgroup *cgrp = dummy_css->cgroup, *child; + struct cgroup *cgrp = of_css(of)->cgroup, *child; struct cgroup_subsys *ss; - char *tok, *p; + char *tok; int ssid, ret; /* * Parse input - space separated list of subsystem names prefixed * with either + or -. */ - p = buffer; - while ((tok = strsep(&p, " "))) { + buf = strstrip(buf); + while ((tok = strsep(&buf, " "))) { if (tok[0] == '\0') continue; for_each_subsys(ss, ssid) { @@ -2692,7 +2694,7 @@ out_unlock_tree: out_unbreak: kernfs_unbreak_active_protection(cgrp->control_kn); cgroup_put(cgrp); - return ret; + return ret ?: nbytes; err_undo_css: cgrp->child_subsys_mask &= ~enable; @@ -2738,9 +2740,7 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, css = cgroup_css(cgrp, cft->ss); rcu_read_unlock(); - if (cft->write_string) { - ret = cft->write_string(css, cft, strstrip(buf)); - } else if (cft->write_u64) { + if (cft->write_u64) { unsigned long long v; ret = kstrtoull(buf, 0, &v); if (!ret) @@ -3984,7 +3984,7 @@ static struct cftype cgroup_base_files[] = { .name = "cgroup.subtree_control", .flags = CFTYPE_ONLY_ON_DFL, .seq_show = cgroup_subtree_control_show, - .write_string = cgroup_subtree_control_write, + .write = cgroup_subtree_control_write, }, { .name = "cgroup.populated", @@ -4018,7 +4018,7 @@ static struct cftype cgroup_base_files[] = { .name = "release_agent", .flags = CFTYPE_INSANE | CFTYPE_ONLY_ON_ROOT, .seq_show = cgroup_release_agent_show, - .write_string = cgroup_release_agent_write, + .write = cgroup_release_agent_write, .max_write_len = PATH_MAX - 1, }, { } /* terminate */ diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 0398f7e9ac81..6b4e60e33a9a 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -73,10 +73,6 @@ bool cgroup_freezing(struct task_struct *task) return ret; } -/* - * cgroups_write_string() limits the size of freezer state strings to - * CGROUP_LOCAL_BUFFER_SIZE - */ static const char *freezer_state_strs(unsigned int state) { if (state & CGROUP_FROZEN) @@ -423,20 +419,22 @@ static void freezer_change_state(struct freezer *freezer, bool freeze) mutex_unlock(&freezer_mutex); } -static int freezer_write(struct cgroup_subsys_state *css, struct cftype *cft, - char *buffer) +static ssize_t freezer_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { bool freeze; - if (strcmp(buffer, freezer_state_strs(0)) == 0) + buf = strstrip(buf); + + if (strcmp(buf, freezer_state_strs(0)) == 0) freeze = false; - else if (strcmp(buffer, freezer_state_strs(CGROUP_FROZEN)) == 0) + else if (strcmp(buf, freezer_state_strs(CGROUP_FROZEN)) == 0) freeze = true; else return -EINVAL; - freezer_change_state(css_freezer(css), freeze); - return 0; + freezer_change_state(css_freezer(of_css(of)), freeze); + return nbytes; } static u64 freezer_self_freezing_read(struct cgroup_subsys_state *css, @@ -460,7 +458,7 @@ static struct cftype files[] = { .name = "state", .flags = CFTYPE_NOT_ON_ROOT, .seq_show = freezer_read, - .write_string = freezer_write, + .write = freezer_write, }, { .name = "self_freezing", diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 37ca0a5c226d..2f4b08b8db24 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1603,13 +1603,15 @@ out_unlock: /* * Common handling for a write to a "cpus" or "mems" file. */ -static int cpuset_write_resmask(struct cgroup_subsys_state *css, - struct cftype *cft, char *buf) +static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { - struct cpuset *cs = css_cs(css); + struct cpuset *cs = css_cs(of_css(of)); struct cpuset *trialcs; int retval = -ENODEV; + buf = strstrip(buf); + /* * CPU or memory hotunplug may leave @cs w/o any execution * resources, in which case the hotplug code asynchronously updates @@ -1633,7 +1635,7 @@ static int cpuset_write_resmask(struct cgroup_subsys_state *css, goto out_unlock; } - switch (cft->private) { + switch (of_cft(of)->private) { case FILE_CPULIST: retval = update_cpumask(cs, trialcs, buf); break; @@ -1648,7 +1650,7 @@ static int cpuset_write_resmask(struct cgroup_subsys_state *css, free_trial_cpuset(trialcs); out_unlock: mutex_unlock(&cpuset_mutex); - return retval; + return retval ?: nbytes; } /* @@ -1750,7 +1752,7 @@ static struct cftype files[] = { { .name = "cpus", .seq_show = cpuset_common_seq_show, - .write_string = cpuset_write_resmask, + .write = cpuset_write_resmask, .max_write_len = (100U + 6 * NR_CPUS), .private = FILE_CPULIST, }, @@ -1758,7 +1760,7 @@ static struct cftype files[] = { { .name = "mems", .seq_show = cpuset_common_seq_show, - .write_string = cpuset_write_resmask, + .write = cpuset_write_resmask, .max_write_len = (100U + 6 * MAX_NUMNODES), .private = FILE_MEMLIST, }, diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index 372f1adca491..191de26b0148 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -253,15 +253,16 @@ static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css, return res_counter_read_u64(&h_cg->hugepage[idx], name); } -static int hugetlb_cgroup_write(struct cgroup_subsys_state *css, - struct cftype *cft, char *buffer) +static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { int idx, name, ret; unsigned long long val; - struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); + struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); - idx = MEMFILE_IDX(cft->private); - name = MEMFILE_ATTR(cft->private); + buf = strstrip(buf); + idx = MEMFILE_IDX(of_cft(of)->private); + name = MEMFILE_ATTR(of_cft(of)->private); switch (name) { case RES_LIMIT: @@ -271,7 +272,7 @@ static int hugetlb_cgroup_write(struct cgroup_subsys_state *css, break; } /* This function does all necessary parse...reuse it */ - ret = res_counter_memparse_write_strategy(buffer, &val); + ret = res_counter_memparse_write_strategy(buf, &val); if (ret) break; ret = res_counter_set_limit(&h_cg->hugepage[idx], val); @@ -280,7 +281,7 @@ static int hugetlb_cgroup_write(struct cgroup_subsys_state *css, ret = -EINVAL; break; } - return ret; + return ret ?: nbytes; } static int hugetlb_cgroup_reset(struct cgroup_subsys_state *css, @@ -331,7 +332,7 @@ static void __init __hugetlb_cgroup_file_init(int idx) snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf); cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT); cft->read_u64 = hugetlb_cgroup_read_u64; - cft->write_string = hugetlb_cgroup_write; + cft->write = hugetlb_cgroup_write; /* Add the usage file */ cft = &h->cgroup_files[1]; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 5cf3246314a2..7098a43f7447 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5143,17 +5143,18 @@ static int memcg_update_kmem_limit(struct mem_cgroup *memcg, * The user of this function is... * RES_LIMIT. */ -static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, - char *buffer) +static ssize_t mem_cgroup_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { - struct mem_cgroup *memcg = mem_cgroup_from_css(css); + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); enum res_type type; int name; unsigned long long val; int ret; - type = MEMFILE_TYPE(cft->private); - name = MEMFILE_ATTR(cft->private); + buf = strstrip(buf); + type = MEMFILE_TYPE(of_cft(of)->private); + name = MEMFILE_ATTR(of_cft(of)->private); switch (name) { case RES_LIMIT: @@ -5162,7 +5163,7 @@ static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, break; } /* This function does all necessary parse...reuse it */ - ret = res_counter_memparse_write_strategy(buffer, &val); + ret = res_counter_memparse_write_strategy(buf, &val); if (ret) break; if (type == _MEM) @@ -5175,7 +5176,7 @@ static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, return -EINVAL; break; case RES_SOFT_LIMIT: - ret = res_counter_memparse_write_strategy(buffer, &val); + ret = res_counter_memparse_write_strategy(buf, &val); if (ret) break; /* @@ -5192,7 +5193,7 @@ static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, ret = -EINVAL; /* should be BUG() ? */ break; } - return ret; + return ret ?: nbytes; } static void memcg_get_hierarchical_limit(struct mem_cgroup *memcg, @@ -5964,9 +5965,10 @@ static void memcg_event_ptable_queue_proc(struct file *file, * Input must be in format ' '. * Interpretation of args is defined by control file implementation. */ -static int memcg_write_event_control(struct cgroup_subsys_state *css, - struct cftype *cft, char *buffer) +static ssize_t memcg_write_event_control(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { + struct cgroup_subsys_state *css = of_css(of); struct mem_cgroup *memcg = mem_cgroup_from_css(css); struct mem_cgroup_event *event; struct cgroup_subsys_state *cfile_css; @@ -5977,15 +5979,17 @@ static int memcg_write_event_control(struct cgroup_subsys_state *css, char *endp; int ret; - efd = simple_strtoul(buffer, &endp, 10); + buf = strstrip(buf); + + efd = simple_strtoul(buf, &endp, 10); if (*endp != ' ') return -EINVAL; - buffer = endp + 1; + buf = endp + 1; - cfd = simple_strtoul(buffer, &endp, 10); + cfd = simple_strtoul(buf, &endp, 10); if ((*endp != ' ') && (*endp != '\0')) return -EINVAL; - buffer = endp + 1; + buf = endp + 1; event = kzalloc(sizeof(*event), GFP_KERNEL); if (!event) @@ -6063,7 +6067,7 @@ static int memcg_write_event_control(struct cgroup_subsys_state *css, goto out_put_cfile; } - ret = event->register_event(memcg, event->eventfd, buffer); + ret = event->register_event(memcg, event->eventfd, buf); if (ret) goto out_put_css; @@ -6076,7 +6080,7 @@ static int memcg_write_event_control(struct cgroup_subsys_state *css, fdput(cfile); fdput(efile); - return 0; + return nbytes; out_put_css: css_put(css); @@ -6107,13 +6111,13 @@ static struct cftype mem_cgroup_files[] = { { .name = "limit_in_bytes", .private = MEMFILE_PRIVATE(_MEM, RES_LIMIT), - .write_string = mem_cgroup_write, + .write = mem_cgroup_write, .read_u64 = mem_cgroup_read_u64, }, { .name = "soft_limit_in_bytes", .private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT), - .write_string = mem_cgroup_write, + .write = mem_cgroup_write, .read_u64 = mem_cgroup_read_u64, }, { @@ -6138,7 +6142,7 @@ static struct cftype mem_cgroup_files[] = { }, { .name = "cgroup.event_control", /* XXX: for compat */ - .write_string = memcg_write_event_control, + .write = memcg_write_event_control, .flags = CFTYPE_NO_PREFIX, .mode = S_IWUGO, }, @@ -6171,7 +6175,7 @@ static struct cftype mem_cgroup_files[] = { { .name = "kmem.limit_in_bytes", .private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT), - .write_string = mem_cgroup_write, + .write = mem_cgroup_write, .read_u64 = mem_cgroup_read_u64, }, { @@ -6217,7 +6221,7 @@ static struct cftype memsw_cgroup_files[] = { { .name = "memsw.limit_in_bytes", .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT), - .write_string = mem_cgroup_write, + .write = mem_cgroup_write, .read_u64 = mem_cgroup_read_u64, }, { diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 3825f669147b..b990cefd906b 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -185,15 +185,15 @@ static int read_priomap(struct seq_file *sf, void *v) return 0; } -static int write_priomap(struct cgroup_subsys_state *css, struct cftype *cft, - char *buffer) +static ssize_t write_priomap(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { char devname[IFNAMSIZ + 1]; struct net_device *dev; u32 prio; int ret; - if (sscanf(buffer, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2) + if (sscanf(buf, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2) return -EINVAL; dev = dev_get_by_name(&init_net, devname); @@ -202,11 +202,11 @@ static int write_priomap(struct cgroup_subsys_state *css, struct cftype *cft, rtnl_lock(); - ret = netprio_set_prio(css, dev, prio); + ret = netprio_set_prio(of_css(of), dev, prio); rtnl_unlock(); dev_put(dev); - return ret; + return ret ?: nbytes; } static int update_netprio(const void *v, struct file *file, unsigned n) @@ -239,7 +239,7 @@ static struct cftype ss_files[] = { { .name = "ifpriomap", .seq_show = read_priomap, - .write_string = write_priomap, + .write = write_priomap, }, { } /* terminate */ }; diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index d4f015ad6c84..841fd3fa937a 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -102,17 +102,19 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) return 0; } -static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, - char *buffer) +static ssize_t tcp_cgroup_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { - struct mem_cgroup *memcg = mem_cgroup_from_css(css); + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); unsigned long long val; int ret = 0; - switch (cft->private) { + buf = strstrip(buf); + + switch (of_cft(of)->private) { case RES_LIMIT: /* see memcontrol.c */ - ret = res_counter_memparse_write_strategy(buffer, &val); + ret = res_counter_memparse_write_strategy(buf, &val); if (ret) break; ret = tcp_update_limit(memcg, val); @@ -121,7 +123,7 @@ static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, ret = -EINVAL; break; } - return ret; + return ret ?: nbytes; } static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val) @@ -193,7 +195,7 @@ static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event) static struct cftype tcp_files[] = { { .name = "kmem.tcp.limit_in_bytes", - .write_string = tcp_cgroup_write, + .write = tcp_cgroup_write, .read_u64 = tcp_cgroup_read, .private = RES_LIMIT, }, diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 9134dbf70d3e..7dbac4061b1c 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -767,27 +767,27 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup, return rc; } -static int devcgroup_access_write(struct cgroup_subsys_state *css, - struct cftype *cft, char *buffer) +static ssize_t devcgroup_access_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { int retval; mutex_lock(&devcgroup_mutex); - retval = devcgroup_update_access(css_to_devcgroup(css), - cft->private, buffer); + retval = devcgroup_update_access(css_to_devcgroup(of_css(of)), + of_cft(of)->private, strstrip(buf)); mutex_unlock(&devcgroup_mutex); - return retval; + return retval ?: nbytes; } static struct cftype dev_cgroup_files[] = { { .name = "allow", - .write_string = devcgroup_access_write, + .write = devcgroup_access_write, .private = DEVCG_ALLOW, }, { .name = "deny", - .write_string = devcgroup_access_write, + .write = devcgroup_access_write, .private = DEVCG_DENY, }, { -- cgit v1.2.3-55-g7522 From 6770c64e5c8da4705d1f0973bdeb5c2bf4f3a404 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 May 2014 12:16:21 -0400 Subject: cgroup: replace cftype->trigger() with cftype->write() cftype->trigger() is pointless. It's trivial to ignore the input buffer from a regular ->write() operation. Convert all ->trigger() users to ->write() and remove ->trigger(). This patch doesn't introduce any visible behavior changes. Signed-off-by: Tejun Heo Acked-by: Li Zefan Cc: Johannes Weiner Cc: Michal Hocko --- include/linux/cgroup.h | 8 -------- kernel/cgroup.c | 5 +---- mm/hugetlb_cgroup.c | 16 ++++++++-------- mm/memcontrol.c | 34 ++++++++++++++++++---------------- net/ipv4/tcp_memcontrol.c | 15 ++++++++------- 5 files changed, 35 insertions(+), 43 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index aecdc84fe128..08eb71ee600b 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -499,14 +499,6 @@ struct cftype { int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft, s64 val); - /* - * trigger() callback can be used to get some kick from the - * userspace, when the actual string written is not important - * at all. The private field can be used to determine the - * kick type for multiplexing. - */ - int (*trigger)(struct cgroup_subsys_state *css, unsigned int event); - /* * write() is the generic write callback which maps directly to * kernfs write operation and overrides all other operations. diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 2a88ce7b24b6..2f16aab03493 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1034,8 +1034,7 @@ static umode_t cgroup_file_mode(const struct cftype *cft) if (cft->read_u64 || cft->read_s64 || cft->seq_show) mode |= S_IRUGO; - if (cft->write_u64 || cft->write_s64 || cft->write || - cft->trigger) + if (cft->write_u64 || cft->write_s64 || cft->write) mode |= S_IWUSR; return mode; @@ -2750,8 +2749,6 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, ret = kstrtoll(buf, 0, &v); if (!ret) ret = cft->write_s64(css, cft, v); - } else if (cft->trigger) { - ret = cft->trigger(css, (unsigned int)cft->private); } else { ret = -EINVAL; } diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index 191de26b0148..a380681ab3cf 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -284,14 +284,14 @@ static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of, return ret ?: nbytes; } -static int hugetlb_cgroup_reset(struct cgroup_subsys_state *css, - unsigned int event) +static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { int idx, name, ret = 0; - struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); + struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); - idx = MEMFILE_IDX(event); - name = MEMFILE_ATTR(event); + idx = MEMFILE_IDX(of_cft(of)->private); + name = MEMFILE_ATTR(of_cft(of)->private); switch (name) { case RES_MAX_USAGE: @@ -304,7 +304,7 @@ static int hugetlb_cgroup_reset(struct cgroup_subsys_state *css, ret = -EINVAL; break; } - return ret; + return ret ?: nbytes; } static char *mem_fmt(char *buf, int size, unsigned long hsize) @@ -344,14 +344,14 @@ static void __init __hugetlb_cgroup_file_init(int idx) cft = &h->cgroup_files[2]; snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf); cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE); - cft->trigger = hugetlb_cgroup_reset; + cft->write = hugetlb_cgroup_reset; cft->read_u64 = hugetlb_cgroup_read_u64; /* Add the failcntfile */ cft = &h->cgroup_files[3]; snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf); cft->private = MEMFILE_PRIVATE(idx, RES_FAILCNT); - cft->trigger = hugetlb_cgroup_reset; + cft->write = hugetlb_cgroup_reset; cft->read_u64 = hugetlb_cgroup_read_u64; /* NULL terminate the last cft */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7098a43f7447..b638a79209ee 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4887,14 +4887,15 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg) return 0; } -static int mem_cgroup_force_empty_write(struct cgroup_subsys_state *css, - unsigned int event) +static ssize_t mem_cgroup_force_empty_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, + loff_t off) { - struct mem_cgroup *memcg = mem_cgroup_from_css(css); + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); if (mem_cgroup_is_root(memcg)) return -EINVAL; - return mem_cgroup_force_empty(memcg); + return mem_cgroup_force_empty(memcg) ?: nbytes; } static u64 mem_cgroup_hierarchy_read(struct cgroup_subsys_state *css, @@ -5220,14 +5221,15 @@ out: *memsw_limit = min_memsw_limit; } -static int mem_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event) +static ssize_t mem_cgroup_reset(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) { - struct mem_cgroup *memcg = mem_cgroup_from_css(css); + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); int name; enum res_type type; - type = MEMFILE_TYPE(event); - name = MEMFILE_ATTR(event); + type = MEMFILE_TYPE(of_cft(of)->private); + name = MEMFILE_ATTR(of_cft(of)->private); switch (name) { case RES_MAX_USAGE: @@ -5252,7 +5254,7 @@ static int mem_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event) break; } - return 0; + return nbytes; } static u64 mem_cgroup_move_charge_read(struct cgroup_subsys_state *css, @@ -6105,7 +6107,7 @@ static struct cftype mem_cgroup_files[] = { { .name = "max_usage_in_bytes", .private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE), - .trigger = mem_cgroup_reset, + .write = mem_cgroup_reset, .read_u64 = mem_cgroup_read_u64, }, { @@ -6123,7 +6125,7 @@ static struct cftype mem_cgroup_files[] = { { .name = "failcnt", .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT), - .trigger = mem_cgroup_reset, + .write = mem_cgroup_reset, .read_u64 = mem_cgroup_read_u64, }, { @@ -6132,7 +6134,7 @@ static struct cftype mem_cgroup_files[] = { }, { .name = "force_empty", - .trigger = mem_cgroup_force_empty_write, + .write = mem_cgroup_force_empty_write, }, { .name = "use_hierarchy", @@ -6186,13 +6188,13 @@ static struct cftype mem_cgroup_files[] = { { .name = "kmem.failcnt", .private = MEMFILE_PRIVATE(_KMEM, RES_FAILCNT), - .trigger = mem_cgroup_reset, + .write = mem_cgroup_reset, .read_u64 = mem_cgroup_read_u64, }, { .name = "kmem.max_usage_in_bytes", .private = MEMFILE_PRIVATE(_KMEM, RES_MAX_USAGE), - .trigger = mem_cgroup_reset, + .write = mem_cgroup_reset, .read_u64 = mem_cgroup_read_u64, }, #ifdef CONFIG_SLABINFO @@ -6215,7 +6217,7 @@ static struct cftype memsw_cgroup_files[] = { { .name = "memsw.max_usage_in_bytes", .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE), - .trigger = mem_cgroup_reset, + .write = mem_cgroup_reset, .read_u64 = mem_cgroup_read_u64, }, { @@ -6227,7 +6229,7 @@ static struct cftype memsw_cgroup_files[] = { { .name = "memsw.failcnt", .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT), - .trigger = mem_cgroup_reset, + .write = mem_cgroup_reset, .read_u64 = mem_cgroup_read_u64, }, { }, /* terminate */ diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 841fd3fa937a..f7a2ec3ac584 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -170,17 +170,18 @@ static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft) return val; } -static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event) +static ssize_t tcp_cgroup_reset(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { struct mem_cgroup *memcg; struct cg_proto *cg_proto; - memcg = mem_cgroup_from_css(css); + memcg = mem_cgroup_from_css(of_css(of)); cg_proto = tcp_prot.proto_cgroup(memcg); if (!cg_proto) - return 0; + return nbytes; - switch (event) { + switch (of_cft(of)->private) { case RES_MAX_USAGE: res_counter_reset_max(&cg_proto->memory_allocated); break; @@ -189,7 +190,7 @@ static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event) break; } - return 0; + return nbytes; } static struct cftype tcp_files[] = { @@ -207,13 +208,13 @@ static struct cftype tcp_files[] = { { .name = "kmem.tcp.failcnt", .private = RES_FAILCNT, - .trigger = tcp_cgroup_reset, + .write = tcp_cgroup_reset, .read_u64 = tcp_cgroup_read, }, { .name = "kmem.tcp.max_usage_in_bytes", .private = RES_MAX_USAGE, - .trigger = tcp_cgroup_reset, + .write = tcp_cgroup_reset, .read_u64 = tcp_cgroup_read, }, { } /* terminate */ -- cgit v1.2.3-55-g7522 From 5c9d535b893f30266ea29fe377cb9b002fcd76aa Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 16 May 2014 13:22:48 -0400 Subject: cgroup: remove css_parent() cgroup in general is moving towards using cgroup_subsys_state as the fundamental structural component and css_parent() was introduced to convert from using cgroup->parent to css->parent. It was quite some time ago and we're moving forward with making css more prominent. This patch drops the trivial wrapper css_parent() and let the users dereference css->parent. While at it, explicitly mark fields of css which are public and immutable. v2: New usage from device_cgroup.c converted. Signed-off-by: Tejun Heo Acked-by: Michal Hocko Acked-by: Neil Horman Acked-by: "David S. Miller" Acked-by: Li Zefan Cc: Vivek Goyal Cc: Jens Axboe Cc: Peter Zijlstra Cc: Johannes Weiner --- block/blk-cgroup.h | 2 +- include/linux/cgroup.h | 29 +++++++++++------------------ kernel/cgroup.c | 8 ++++---- kernel/cgroup_freezer.c | 2 +- kernel/cpuset.c | 2 +- kernel/sched/core.c | 2 +- kernel/sched/cpuacct.c | 2 +- mm/hugetlb_cgroup.c | 2 +- mm/memcontrol.c | 14 +++++++------- net/core/netclassid_cgroup.c | 2 +- net/core/netprio_cgroup.c | 2 +- security/device_cgroup.c | 8 ++++---- 12 files changed, 34 insertions(+), 41 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 371fe8e92ab5..d692b29c083a 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -204,7 +204,7 @@ static inline struct blkcg *bio_blkcg(struct bio *bio) */ static inline struct blkcg *blkcg_parent(struct blkcg *blkcg) { - return css_to_blkcg(css_parent(&blkcg->css)); + return css_to_blkcg(blkcg->css.parent); } /** diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 1737db0c63fe..2549493d518d 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -48,22 +48,28 @@ enum cgroup_subsys_id { }; #undef SUBSYS -/* Per-subsystem/per-cgroup state maintained by the system. */ +/* + * Per-subsystem/per-cgroup state maintained by the system. This is the + * fundamental structural building block that controllers deal with. + * + * Fields marked with "PI:" are public and immutable and may be accessed + * directly without synchronization. + */ struct cgroup_subsys_state { - /* the cgroup that this css is attached to */ + /* PI: the cgroup that this css is attached to */ struct cgroup *cgroup; - /* the cgroup subsystem that this css is attached to */ + /* PI: the cgroup subsystem that this css is attached to */ struct cgroup_subsys *ss; /* reference count - access via css_[try]get() and css_put() */ struct percpu_ref refcnt; - /* the parent css */ + /* PI: the parent css */ struct cgroup_subsys_state *parent; /* - * Subsys-unique ID. 0 is unused and root is always 1. The + * PI: Subsys-unique ID. 0 is unused and root is always 1. The * matching css can be looked up using css_from_id(). */ int id; @@ -669,19 +675,6 @@ struct cgroup_subsys { #include #undef SUBSYS -/** - * css_parent - find the parent css - * @css: the target cgroup_subsys_state - * - * Return the parent css of @css. This function is guaranteed to return - * non-NULL parent as long as @css isn't the root. - */ -static inline -struct cgroup_subsys_state *css_parent(struct cgroup_subsys_state *css) -{ - return css->parent; -} - /** * task_css_set_check - obtain a task's css_set with extra access conditions * @task: the task to obtain css_set for diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 0343d7ee6d62..929bbbc539e9 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3176,10 +3176,10 @@ css_next_descendant_pre(struct cgroup_subsys_state *pos, /* no child, visit my or the closest ancestor's next sibling */ while (pos != root) { - next = css_next_child(pos, css_parent(pos)); + next = css_next_child(pos, pos->parent); if (next) return next; - pos = css_parent(pos); + pos = pos->parent; } return NULL; @@ -3261,12 +3261,12 @@ css_next_descendant_post(struct cgroup_subsys_state *pos, return NULL; /* if there's an unvisited sibling, visit its leftmost descendant */ - next = css_next_child(pos, css_parent(pos)); + next = css_next_child(pos, pos->parent); if (next) return css_leftmost_descendant(next); /* no sibling left, visit parent */ - return css_parent(pos); + return pos->parent; } static bool cgroup_has_live_children(struct cgroup *cgrp) diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 6b4e60e33a9a..a79e40f9d700 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -59,7 +59,7 @@ static inline struct freezer *task_freezer(struct task_struct *task) static struct freezer *parent_freezer(struct freezer *freezer) { - return css_freezer(css_parent(&freezer->css)); + return css_freezer(freezer->css.parent); } bool cgroup_freezing(struct task_struct *task) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 2f4b08b8db24..5b2a31082f4f 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -124,7 +124,7 @@ static inline struct cpuset *task_cs(struct task_struct *task) static inline struct cpuset *parent_cs(struct cpuset *cs) { - return css_cs(css_parent(&cs->css)); + return css_cs(cs->css.parent); } #ifdef CONFIG_NUMA diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 268a45ea238c..ac61ad1a5f9f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7586,7 +7586,7 @@ cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) static int cpu_cgroup_css_online(struct cgroup_subsys_state *css) { struct task_group *tg = css_tg(css); - struct task_group *parent = css_tg(css_parent(css)); + struct task_group *parent = css_tg(css->parent); if (parent) sched_online_group(tg, parent); diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c index c143ee380e3a..9cf350c94ec4 100644 --- a/kernel/sched/cpuacct.c +++ b/kernel/sched/cpuacct.c @@ -46,7 +46,7 @@ static inline struct cpuacct *task_ca(struct task_struct *tsk) static inline struct cpuacct *parent_ca(struct cpuacct *ca) { - return css_ca(css_parent(&ca->css)); + return css_ca(ca->css.parent); } static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage); diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index a380681ab3cf..493f758445e7 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -52,7 +52,7 @@ static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg) static inline struct hugetlb_cgroup * parent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg) { - return hugetlb_cgroup_from_css(css_parent(&h_cg->css)); + return hugetlb_cgroup_from_css(h_cg->css.parent); } static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b638a79209ee..a5e0417b4f9a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1540,7 +1540,7 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg) int mem_cgroup_swappiness(struct mem_cgroup *memcg) { /* root ? */ - if (!css_parent(&memcg->css)) + if (!memcg->css.parent) return vm_swappiness; return memcg->swappiness; @@ -4909,7 +4909,7 @@ static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css, { int retval = 0; struct mem_cgroup *memcg = mem_cgroup_from_css(css); - struct mem_cgroup *parent_memcg = mem_cgroup_from_css(css_parent(&memcg->css)); + struct mem_cgroup *parent_memcg = mem_cgroup_from_css(memcg->css.parent); mutex_lock(&memcg_create_mutex); @@ -5207,8 +5207,8 @@ static void memcg_get_hierarchical_limit(struct mem_cgroup *memcg, if (!memcg->use_hierarchy) goto out; - while (css_parent(&memcg->css)) { - memcg = mem_cgroup_from_css(css_parent(&memcg->css)); + while (memcg->css.parent) { + memcg = mem_cgroup_from_css(memcg->css.parent); if (!memcg->use_hierarchy) break; tmp = res_counter_read_u64(&memcg->res, RES_LIMIT); @@ -5443,7 +5443,7 @@ static int mem_cgroup_swappiness_write(struct cgroup_subsys_state *css, struct cftype *cft, u64 val) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); - struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(&memcg->css)); + struct mem_cgroup *parent = mem_cgroup_from_css(memcg->css.parent); if (val > 100 || !parent) return -EINVAL; @@ -5790,7 +5790,7 @@ static int mem_cgroup_oom_control_write(struct cgroup_subsys_state *css, struct cftype *cft, u64 val) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); - struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(&memcg->css)); + struct mem_cgroup *parent = mem_cgroup_from_css(memcg->css.parent); /* cannot set to root cgroup and only 0 and 1 are allowed */ if (!parent || !((val == 0) || (val == 1))) @@ -6407,7 +6407,7 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); - struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css)); + struct mem_cgroup *parent = mem_cgroup_from_css(css->parent); if (css->id > MEM_CGROUP_ID_MAX) return -ENOSPC; diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 22931e1b99b4..30d903b19c62 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -42,7 +42,7 @@ cgrp_css_alloc(struct cgroup_subsys_state *parent_css) static int cgrp_css_online(struct cgroup_subsys_state *css) { struct cgroup_cls_state *cs = css_cls_state(css); - struct cgroup_cls_state *parent = css_cls_state(css_parent(css)); + struct cgroup_cls_state *parent = css_cls_state(css->parent); if (parent) cs->classid = parent->classid; diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index b990cefd906b..2f385b9bccc0 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -140,7 +140,7 @@ cgrp_css_alloc(struct cgroup_subsys_state *parent_css) static int cgrp_css_online(struct cgroup_subsys_state *css) { - struct cgroup_subsys_state *parent_css = css_parent(css); + struct cgroup_subsys_state *parent_css = css->parent; struct net_device *dev; int ret = 0; diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 7dbac4061b1c..ce14a31b1337 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -182,7 +182,7 @@ static inline bool is_devcg_online(const struct dev_cgroup *devcg) static int devcgroup_online(struct cgroup_subsys_state *css) { struct dev_cgroup *dev_cgroup = css_to_devcgroup(css); - struct dev_cgroup *parent_dev_cgroup = css_to_devcgroup(css_parent(css)); + struct dev_cgroup *parent_dev_cgroup = css_to_devcgroup(css->parent); int ret = 0; mutex_lock(&devcgroup_mutex); @@ -455,7 +455,7 @@ static bool verify_new_ex(struct dev_cgroup *dev_cgroup, static int parent_has_perm(struct dev_cgroup *childcg, struct dev_exception_item *ex) { - struct dev_cgroup *parent = css_to_devcgroup(css_parent(&childcg->css)); + struct dev_cgroup *parent = css_to_devcgroup(childcg->css.parent); if (!parent) return 1; @@ -476,7 +476,7 @@ static int parent_has_perm(struct dev_cgroup *childcg, static bool parent_allows_removal(struct dev_cgroup *childcg, struct dev_exception_item *ex) { - struct dev_cgroup *parent = css_to_devcgroup(css_parent(&childcg->css)); + struct dev_cgroup *parent = css_to_devcgroup(childcg->css.parent); if (!parent) return true; @@ -614,7 +614,7 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup, char temp[12]; /* 11 + 1 characters needed for a u32 */ int count, rc = 0; struct dev_exception_item ex; - struct dev_cgroup *parent = css_to_devcgroup(css_parent(&devcgroup->css)); + struct dev_cgroup *parent = css_to_devcgroup(devcgroup->css.parent); if (!capable(CAP_SYS_ADMIN)) return -EPERM; -- cgit v1.2.3-55-g7522 From f61c42a7d9119a8b72b9607ba8e3a34111f81d8c Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 12 May 2014 16:34:17 +0200 Subject: memcg: remove tasks/children test from mem_cgroup_force_empty() Tejun has correctly pointed out that tasks/children test in mem_cgroup_force_empty is not correct because there is no other locking which preserves this state throughout the rest of the function so both new tasks can join the group or new children groups can be added while somebody is writing to memory.force_empty. A new task would break mem_cgroup_reparent_charges expectation that all failures as described by mem_cgroup_force_empty_list are temporal and there is no way out. The main use case for the knob as described by Documentation/cgroups/memory.txt is to: " The typical use case for this interface is before calling rmdir(). Because rmdir() moves all pages to parent, some out-of-use page caches can be moved to the parent. If you want to avoid that, force_empty will be useful. " This means that reparenting is not really required as rmdir will reparent pages implicitly from the safe context. If we remove it from mem_cgroup_force_empty then we are safe even with existing tasks because the number of reclaim attempts is bounded. Moreover the knob still does what the documentation claims (modulo reparenting which doesn't make any difference) and users might expect. Longterm we want to deprecate the whole knob and put the reparented pages to the tail of parent LRU during cgroup removal. tj: Removed unused variable @cgrp from mem_cgroup_force_empty() Signed-off-by: Michal Hocko Acked-by: Johannes Weiner Acked-by: Li Zefan Signed-off-by: Tejun Heo --- Documentation/cgroups/memory.txt | 6 +----- mm/memcontrol.c | 7 ------- 2 files changed, 1 insertion(+), 12 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index 2622115276aa..8cb87e32e8cb 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt @@ -453,15 +453,11 @@ About use_hierarchy, see Section 6. 5.1 force_empty memory.force_empty interface is provided to make cgroup's memory usage empty. - You can use this interface only when the cgroup has no tasks. When writing anything to this # echo 0 > memory.force_empty - Almost all pages tracked by this memory cgroup will be unmapped and freed. - Some pages cannot be freed because they are locked or in-use. Such pages are - moved to parent (if use_hierarchy==1) or root (if use_hierarchy==0) and this - cgroup will be empty. + the cgroup will be reclaimed and as many pages reclaimed as possible. The typical use case for this interface is before calling rmdir(). Because rmdir() moves all pages to parent, some out-of-use page caches can be diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a5e0417b4f9a..6144a8e7283f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4857,11 +4857,6 @@ static inline bool memcg_has_children(struct mem_cgroup *memcg) static int mem_cgroup_force_empty(struct mem_cgroup *memcg) { int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; - struct cgroup *cgrp = memcg->css.cgroup; - - /* returns EBUSY if there is a task or if we come here twice. */ - if (cgroup_has_tasks(cgrp) || !list_empty(&cgrp->children)) - return -EBUSY; /* we call try-to-free pages for make this cgroup empty */ lru_add_drain_all(); @@ -4881,8 +4876,6 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg) } } - lru_add_drain(); - mem_cgroup_reparent_charges(memcg); return 0; } -- cgit v1.2.3-55-g7522 From ea280e7b408ca0dad195ce9836feccdd1dc32131 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 16 May 2014 13:22:48 -0400 Subject: memcg: update memcg_has_children() to use css_next_child() Currently, memcg_has_children() and mem_cgroup_hierarchy_write() directly test cgroup->children for list emptiness. It's semantically correct in traditional hierarchies as it actually wants to test for any children dead or alive; however, cgroup->children is not a published field and scheduled to go away. This patch moves out .use_hierarchy test out of memcg_has_children() and updates it to use css_next_child() to test whether there exists any children. With .use_hierarchy test moved out, it can also be used by mem_cgroup_hierarchy_write(). A side note: As .use_hierarchy is going away, it doesn't really matter but I'm not sure about how it's used in __memcg_activate_kmem(). The condition tested by memcg_has_children() is mushy when seen from userland as its result is affected by dead csses which aren't visible from userland. I think the rule would be a lot clearer if we have a dedicated "freshly minted" flag which gets cleared when the first task is migrated into it or the first child is created and then gate activation with that. v2: Added comment noting that testing use_hierarchy is the responsibility of the callers of memcg_has_children() as suggested by Michal Hocko. Signed-off-by: Tejun Heo Acked-by: Michal Hocko Acked-by: Li Zefan Cc: Johannes Weiner --- mm/memcontrol.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6144a8e7283f..b6f91d61b3af 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4834,18 +4834,28 @@ static void mem_cgroup_reparent_charges(struct mem_cgroup *memcg) } while (usage > 0); } +/* + * Test whether @memcg has children, dead or alive. Note that this + * function doesn't care whether @memcg has use_hierarchy enabled and + * returns %true if there are child csses according to the cgroup + * hierarchy. Testing use_hierarchy is the caller's responsiblity. + */ static inline bool memcg_has_children(struct mem_cgroup *memcg) { - lockdep_assert_held(&memcg_create_mutex); + bool ret; + /* - * The lock does not prevent addition or deletion to the list - * of children, but it prevents a new child from being - * initialized based on this parent in css_online(), so it's - * enough to decide whether hierarchically inherited - * attributes can still be changed or not. + * The lock does not prevent addition or deletion of children, but + * it prevents a new child from being initialized based on this + * parent in css_online(), so it's enough to decide whether + * hierarchically inherited attributes can still be changed or not. */ - return memcg->use_hierarchy && - !list_empty(&memcg->css.cgroup->children); + lockdep_assert_held(&memcg_create_mutex); + + rcu_read_lock(); + ret = css_next_child(NULL, &memcg->css); + rcu_read_unlock(); + return ret; } /* @@ -4919,7 +4929,7 @@ static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css, */ if ((!parent_memcg || !parent_memcg->use_hierarchy) && (val == 1 || val == 0)) { - if (list_empty(&memcg->css.cgroup->children)) + if (!memcg_has_children(memcg)) memcg->use_hierarchy = val; else retval = -EBUSY; @@ -5036,7 +5046,8 @@ static int __memcg_activate_kmem(struct mem_cgroup *memcg, * of course permitted. */ mutex_lock(&memcg_create_mutex); - if (cgroup_has_tasks(memcg->css.cgroup) || memcg_has_children(memcg)) + if (cgroup_has_tasks(memcg->css.cgroup) || + (memcg->use_hierarchy && memcg_has_children(memcg))) err = -EBUSY; mutex_unlock(&memcg_create_mutex); if (err) -- cgit v1.2.3-55-g7522