summaryrefslogtreecommitdiffstats
path: root/kernel/cgroup.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r--kernel/cgroup.c188
1 files changed, 145 insertions, 43 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 3ac6f5b0a64b..66a416b42c18 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -52,7 +52,6 @@
#include <linux/cgroupstats.h>
#include <linux/hash.h>
#include <linux/namei.h>
-#include <linux/smp_lock.h>
#include <linux/pid_namespace.h>
#include <linux/idr.h>
#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
@@ -138,7 +137,7 @@ struct css_id {
* is called after synchronize_rcu(). But for safe use, css_is_removed()
* css_tryget() should be used for avoiding race.
*/
- struct cgroup_subsys_state *css;
+ struct cgroup_subsys_state __rcu *css;
/*
* ID of this css.
*/
@@ -244,6 +243,11 @@ static int notify_on_release(const struct cgroup *cgrp)
return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
}
+static int clone_children(const struct cgroup *cgrp)
+{
+ return test_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
+}
+
/*
* for_each_subsys() allows you to iterate on each subsystem attached to
* an active hierarchy
@@ -778,6 +782,7 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
struct inode *inode = new_inode(sb);
if (inode) {
+ inode->i_ino = get_next_ino();
inode->i_mode = mode;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
@@ -1040,6 +1045,8 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
seq_puts(seq, ",noprefix");
if (strlen(root->release_agent_path))
seq_printf(seq, ",release_agent=%s", root->release_agent_path);
+ if (clone_children(&root->top_cgroup))
+ seq_puts(seq, ",clone_children");
if (strlen(root->name))
seq_printf(seq, ",name=%s", root->name);
mutex_unlock(&cgroup_mutex);
@@ -1050,6 +1057,7 @@ struct cgroup_sb_opts {
unsigned long subsys_bits;
unsigned long flags;
char *release_agent;
+ bool clone_children;
char *name;
/* User explicitly requested empty subsystem */
bool none;
@@ -1066,7 +1074,8 @@ struct cgroup_sb_opts {
*/
static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
{
- char *token, *o = data ?: "all";
+ char *token, *o = data;
+ bool all_ss = false, one_ss = false;
unsigned long mask = (unsigned long)-1;
int i;
bool module_pin_failed = false;
@@ -1082,30 +1091,37 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
while ((token = strsep(&o, ",")) != NULL) {
if (!*token)
return -EINVAL;
- if (!strcmp(token, "all")) {
- /* Add all non-disabled subsystems */
- opts->subsys_bits = 0;
- for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
- struct cgroup_subsys *ss = subsys[i];
- if (ss == NULL)
- continue;
- if (!ss->disabled)
- opts->subsys_bits |= 1ul << i;
- }
- } else if (!strcmp(token, "none")) {
+ if (!strcmp(token, "none")) {
/* Explicitly have no subsystems */
opts->none = true;
- } else if (!strcmp(token, "noprefix")) {
+ continue;
+ }
+ if (!strcmp(token, "all")) {
+ /* Mutually exclusive option 'all' + subsystem name */
+ if (one_ss)
+ return -EINVAL;
+ all_ss = true;
+ continue;
+ }
+ if (!strcmp(token, "noprefix")) {
set_bit(ROOT_NOPREFIX, &opts->flags);
- } else if (!strncmp(token, "release_agent=", 14)) {
+ continue;
+ }
+ if (!strcmp(token, "clone_children")) {
+ opts->clone_children = true;
+ continue;
+ }
+ if (!strncmp(token, "release_agent=", 14)) {
/* Specifying two release agents is forbidden */
if (opts->release_agent)
return -EINVAL;
opts->release_agent =
- kstrndup(token + 14, PATH_MAX, GFP_KERNEL);
+ kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL);
if (!opts->release_agent)
return -ENOMEM;
- } else if (!strncmp(token, "name=", 5)) {
+ continue;
+ }
+ if (!strncmp(token, "name=", 5)) {
const char *name = token + 5;
/* Can't specify an empty name */
if (!strlen(name))
@@ -1123,24 +1139,48 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
if (opts->name)
return -EINVAL;
opts->name = kstrndup(name,
- MAX_CGROUP_ROOT_NAMELEN,
+ MAX_CGROUP_ROOT_NAMELEN - 1,
GFP_KERNEL);
if (!opts->name)
return -ENOMEM;
- } else {
- struct cgroup_subsys *ss;
- for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
- ss = subsys[i];
- if (ss == NULL)
- continue;
- if (!strcmp(token, ss->name)) {
- if (!ss->disabled)
- set_bit(i, &opts->subsys_bits);
- break;
- }
- }
- if (i == CGROUP_SUBSYS_COUNT)
- return -ENOENT;
+
+ continue;
+ }
+
+ for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+ struct cgroup_subsys *ss = subsys[i];
+ if (ss == NULL)
+ continue;
+ if (strcmp(token, ss->name))
+ continue;
+ if (ss->disabled)
+ continue;
+
+ /* Mutually exclusive option 'all' + subsystem name */
+ if (all_ss)
+ return -EINVAL;
+ set_bit(i, &opts->subsys_bits);
+ one_ss = true;
+
+ break;
+ }
+ if (i == CGROUP_SUBSYS_COUNT)
+ return -ENOENT;
+ }
+
+ /*
+ * If the 'all' option was specified select all the subsystems,
+ * otherwise 'all, 'none' and a subsystem name options were not
+ * specified, let's default to 'all'
+ */
+ if (all_ss || (!all_ss && !one_ss && !opts->none)) {
+ for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+ struct cgroup_subsys *ss = subsys[i];
+ if (ss == NULL)
+ continue;
+ if (ss->disabled)
+ continue;
+ set_bit(i, &opts->subsys_bits);
}
}
@@ -1222,7 +1262,6 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
struct cgroup *cgrp = &root->top_cgroup;
struct cgroup_sb_opts opts;
- lock_kernel();
mutex_lock(&cgrp->dentry->d_inode->i_mutex);
mutex_lock(&cgroup_mutex);
@@ -1255,7 +1294,6 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
kfree(opts.name);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
- unlock_kernel();
return ret;
}
@@ -1357,6 +1395,8 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
strcpy(root->release_agent_path, opts->release_agent);
if (opts->name)
strcpy(root->name, opts->name);
+ if (opts->clone_children)
+ set_bit(CGRP_CLONE_CHILDREN, &root->top_cgroup.flags);
return root;
}
@@ -1420,9 +1460,9 @@ static int cgroup_get_rootdir(struct super_block *sb)
return 0;
}
-static int cgroup_get_sb(struct file_system_type *fs_type,
+static struct dentry *cgroup_mount(struct file_system_type *fs_type,
int flags, const char *unused_dev_name,
- void *data, struct vfsmount *mnt)
+ void *data)
{
struct cgroup_sb_opts opts;
struct cgroupfs_root *root;
@@ -1556,10 +1596,9 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
drop_parsed_module_refcounts(opts.subsys_bits);
}
- simple_set_mnt(mnt, sb);
kfree(opts.release_agent);
kfree(opts.name);
- return 0;
+ return dget(sb->s_root);
drop_new_super:
deactivate_locked_super(sb);
@@ -1568,8 +1607,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
out_err:
kfree(opts.release_agent);
kfree(opts.name);
-
- return ret;
+ return ERR_PTR(ret);
}
static void cgroup_kill_sb(struct super_block *sb) {
@@ -1619,10 +1657,12 @@ static void cgroup_kill_sb(struct super_block *sb) {
static struct file_system_type cgroup_fs_type = {
.name = "cgroup",
- .get_sb = cgroup_get_sb,
+ .mount = cgroup_mount,
.kill_sb = cgroup_kill_sb,
};
+static struct kobject *cgroup_kobj;
+
static inline struct cgroup *__d_cgrp(struct dentry *dentry)
{
return dentry->d_fsdata;
@@ -1788,6 +1828,30 @@ out:
return retval;
}
+/**
+ * cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from'
+ * @from: attach to all cgroups of a given task
+ * @tsk: the task to be attached
+ */
+int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
+{
+ struct cgroupfs_root *root;
+ int retval = 0;
+
+ cgroup_lock();
+ for_each_active_root(root) {
+ struct cgroup *from_cg = task_cgroup_from_root(from, root);
+
+ retval = cgroup_attach_task(from_cg, tsk);
+ if (retval)
+ break;
+ }
+ cgroup_unlock();
+
+ return retval;
+}
+EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
+
/*
* Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex
* held. May take task_lock of task
@@ -1857,6 +1921,8 @@ static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
const char *buffer)
{
BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
+ if (strlen(buffer) >= PATH_MAX)
+ return -EINVAL;
if (!cgroup_lock_live_group(cgrp))
return -ENODEV;
strcpy(cgrp->root->release_agent_path, buffer);
@@ -3150,6 +3216,23 @@ fail:
return ret;
}
+static u64 cgroup_clone_children_read(struct cgroup *cgrp,
+ struct cftype *cft)
+{
+ return clone_children(cgrp);
+}
+
+static int cgroup_clone_children_write(struct cgroup *cgrp,
+ struct cftype *cft,
+ u64 val)
+{
+ if (val)
+ set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
+ else
+ clear_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
+ return 0;
+}
+
/*
* for the common functions, 'private' gives the type of file
*/
@@ -3180,6 +3263,11 @@ static struct cftype files[] = {
.write_string = cgroup_write_event_control,
.mode = S_IWUGO,
},
+ {
+ .name = "cgroup.clone_children",
+ .read_u64 = cgroup_clone_children_read,
+ .write_u64 = cgroup_clone_children_write,
+ },
};
static struct cftype cft_release_agent = {
@@ -3309,6 +3397,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
if (notify_on_release(parent))
set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
+ if (clone_children(parent))
+ set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
+
for_each_subsys(root, ss) {
struct cgroup_subsys_state *css = ss->create(ss, cgrp);
@@ -3323,6 +3414,8 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
goto err_destroy;
}
/* At error, ->destroy() callback has to free assigned ID. */
+ if (clone_children(parent) && ss->post_clone)
+ ss->post_clone(ss, cgrp);
}
cgroup_lock_hierarchy(root);
@@ -3871,9 +3964,18 @@ int __init cgroup_init(void)
hhead = css_set_hash(init_css_set.subsys);
hlist_add_head(&init_css_set.hlist, hhead);
BUG_ON(!init_root_id(&rootnode));
+
+ cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj);
+ if (!cgroup_kobj) {
+ err = -ENOMEM;
+ goto out;
+ }
+
err = register_filesystem(&cgroup_fs_type);
- if (err < 0)
+ if (err < 0) {
+ kobject_put(cgroup_kobj);
goto out;
+ }
proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations);