From 90129625d9203a917fc1d3e4768976ba90d71b44 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 5 Jan 2019 00:38:03 -0500 Subject: cgroup: start switching to fs_context Unfortunately, cgroup is tangled into kernfs infrastructure. To avoid converting all kernfs-based filesystems at once, we need to untangle the remount part of things, instead of having it go through kernfs_sop_remount_fs(). Fortunately, it's not hard to do. This commit just gets cgroup/cgroup1 to use fs_context to deliver options on mount and remount paths. Parsing those is going to be done in the next commits; for now we do pretty much what legacy case does. Signed-off-by: Al Viro --- kernel/cgroup/cgroup-internal.h | 14 +++++ kernel/cgroup/cgroup-v1.c | 9 +-- kernel/cgroup/cgroup.c | 134 +++++++++++++++++++++++++++++----------- 3 files changed, 116 insertions(+), 41 deletions(-) (limited to 'kernel/cgroup') diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index c9a35f09e4b9..a89cb0ba7a68 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -7,6 +7,7 @@ #include #include #include +#include #define TRACE_CGROUP_PATH_LEN 1024 extern spinlock_t trace_cgroup_path_lock; @@ -36,6 +37,18 @@ extern void __init enable_debug_cgroup(void); } \ } while (0) +/* + * The cgroup filesystem superblock creation/mount context. + */ +struct cgroup_fs_context { + char *data; +}; + +static inline struct cgroup_fs_context *cgroup_fc2context(struct fs_context *fc) +{ + return fc->fs_private; +} + /* * A cgroup can be associated with multiple css_sets as different tasks may * belong to different cgroups on different hierarchies. In the other @@ -255,5 +268,6 @@ void cgroup1_check_for_release(struct cgroup *cgrp); struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, void *data, unsigned long magic, struct cgroup_namespace *ns); +int cgroup1_reconfigure(struct fs_context *ctx); #endif /* __CGROUP_INTERNAL_H */ diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index f94a7229974e..e377e19dd3e6 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -1046,17 +1046,19 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) return 0; } -static int cgroup1_remount(struct kernfs_root *kf_root, int *flags, char *data) +int cgroup1_reconfigure(struct fs_context *fc) { - int ret = 0; + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + struct kernfs_root *kf_root = kernfs_root_from_sb(fc->root->d_sb); struct cgroup_root *root = cgroup_root_from_kf(kf_root); + int ret = 0; struct cgroup_sb_opts opts; u16 added_mask, removed_mask; cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); /* See what subsystems are wanted */ - ret = parse_cgroupfs_options(data, &opts); + ret = parse_cgroupfs_options(ctx->data, &opts); if (ret) goto out_unlock; @@ -1106,7 +1108,6 @@ static int cgroup1_remount(struct kernfs_root *kf_root, int *flags, char *data) struct kernfs_syscall_ops cgroup1_kf_syscall_ops = { .rename = cgroup1_rename, .show_options = cgroup1_show_options, - .remount_fs = cgroup1_remount, .mkdir = cgroup_mkdir, .rmdir = cgroup_rmdir, .show_path = cgroup_show_path, diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 7fd9f22e406d..7f7db5f967e3 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1811,12 +1811,13 @@ static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root return 0; } -static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data) +static int cgroup_reconfigure(struct fs_context *fc) { + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); unsigned int root_flags; int ret; - ret = parse_cgroup_root_flags(data, &root_flags); + ret = parse_cgroup_root_flags(ctx->data, &root_flags); if (ret) return ret; @@ -2067,21 +2068,98 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, return dentry; } -static struct dentry *cgroup_mount(struct file_system_type *fs_type, - int flags, const char *unused_dev_name, - void *data) +/* + * Destroy a cgroup filesystem context. + */ +static void cgroup_fs_context_free(struct fs_context *fc) +{ + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + + kfree(ctx); +} + +static int cgroup_parse_monolithic(struct fs_context *fc, void *data) +{ + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + + ctx->data = data; + if (ctx->data) + security_sb_eat_lsm_opts(ctx->data, &fc->security); + return 0; +} + +static int cgroup_get_tree(struct fs_context *fc) { struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; - struct dentry *dentry; + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + unsigned int root_flags; + struct dentry *root; int ret; - get_cgroup_ns(ns); + /* Check if the caller has permission to mount. */ + if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) + return -EPERM; + + ret = parse_cgroup_root_flags(ctx->data, &root_flags); + if (ret) + return ret; + + cgrp_dfl_visible = true; + cgroup_get_live(&cgrp_dfl_root.cgrp); + + root = cgroup_do_mount(&cgroup2_fs_type, fc->sb_flags, &cgrp_dfl_root, + CGROUP2_SUPER_MAGIC, ns); + if (IS_ERR(root)) + return PTR_ERR(root); + + apply_cgroup_root_flags(root_flags); + fc->root = root; + return 0; +} + +static int cgroup1_get_tree(struct fs_context *fc) +{ + struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + struct dentry *root; /* Check if the caller has permission to mount. */ - if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) { - put_cgroup_ns(ns); - return ERR_PTR(-EPERM); - } + if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) + return -EPERM; + + root = cgroup1_mount(&cgroup_fs_type, fc->sb_flags, ctx->data, + CGROUP_SUPER_MAGIC, ns); + if (IS_ERR(root)) + return PTR_ERR(root); + + fc->root = root; + return 0; +} + +static const struct fs_context_operations cgroup_fs_context_ops = { + .free = cgroup_fs_context_free, + .parse_monolithic = cgroup_parse_monolithic, + .get_tree = cgroup_get_tree, + .reconfigure = cgroup_reconfigure, +}; + +static const struct fs_context_operations cgroup1_fs_context_ops = { + .free = cgroup_fs_context_free, + .parse_monolithic = cgroup_parse_monolithic, + .get_tree = cgroup1_get_tree, + .reconfigure = cgroup1_reconfigure, +}; + +/* + * Initialise the cgroup filesystem creation/reconfiguration context. + */ +static int cgroup_init_fs_context(struct fs_context *fc) +{ + struct cgroup_fs_context *ctx; + + ctx = kzalloc(sizeof(struct cgroup_fs_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM; /* * The first time anyone tries to mount a cgroup, enable the list @@ -2090,29 +2168,12 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, if (!use_task_css_set_links) cgroup_enable_task_cg_lists(); - if (fs_type == &cgroup2_fs_type) { - unsigned int root_flags; - - ret = parse_cgroup_root_flags(data, &root_flags); - if (ret) { - put_cgroup_ns(ns); - return ERR_PTR(ret); - } - - cgrp_dfl_visible = true; - cgroup_get_live(&cgrp_dfl_root.cgrp); - - dentry = cgroup_do_mount(&cgroup2_fs_type, flags, &cgrp_dfl_root, - CGROUP2_SUPER_MAGIC, ns); - if (!IS_ERR(dentry)) - apply_cgroup_root_flags(root_flags); - } else { - dentry = cgroup1_mount(&cgroup_fs_type, flags, data, - CGROUP_SUPER_MAGIC, ns); - } - - put_cgroup_ns(ns); - return dentry; + fc->fs_private = ctx; + if (fc->fs_type == &cgroup2_fs_type) + fc->ops = &cgroup_fs_context_ops; + else + fc->ops = &cgroup1_fs_context_ops; + return 0; } static void cgroup_kill_sb(struct super_block *sb) @@ -2136,14 +2197,14 @@ static void cgroup_kill_sb(struct super_block *sb) struct file_system_type cgroup_fs_type = { .name = "cgroup", - .mount = cgroup_mount, + .init_fs_context = cgroup_init_fs_context, .kill_sb = cgroup_kill_sb, .fs_flags = FS_USERNS_MOUNT, }; static struct file_system_type cgroup2_fs_type = { .name = "cgroup2", - .mount = cgroup_mount, + .init_fs_context = cgroup_init_fs_context, .kill_sb = cgroup_kill_sb, .fs_flags = FS_USERNS_MOUNT, }; @@ -5268,7 +5329,6 @@ int cgroup_rmdir(struct kernfs_node *kn) static struct kernfs_syscall_ops cgroup_kf_syscall_ops = { .show_options = cgroup_show_options, - .remount_fs = cgroup_remount, .mkdir = cgroup_mkdir, .rmdir = cgroup_rmdir, .show_path = cgroup_show_path, -- cgit v1.2.3-55-g7522 From 7feeef58690a5ea8c5033d43e696ef41b28d82eb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 16 Jan 2019 21:23:02 -0500 Subject: cgroup: fold cgroup1_mount() into cgroup1_get_tree() Signed-off-by: Al Viro --- kernel/cgroup/cgroup-internal.h | 4 +--- kernel/cgroup/cgroup-v1.c | 26 +++++++++++++++++--------- kernel/cgroup/cgroup.c | 19 ------------------- 3 files changed, 18 insertions(+), 31 deletions(-) (limited to 'kernel/cgroup') diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index a89cb0ba7a68..37836d598ff8 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -265,9 +265,7 @@ bool cgroup1_ssid_disabled(int ssid); void cgroup1_pidlist_destroy_all(struct cgroup *cgrp); void cgroup1_release_agent(struct work_struct *work); void cgroup1_check_for_release(struct cgroup *cgrp); -struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, - void *data, unsigned long magic, - struct cgroup_namespace *ns); +int cgroup1_get_tree(struct fs_context *fc); int cgroup1_reconfigure(struct fs_context *ctx); #endif /* __CGROUP_INTERNAL_H */ diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index e377e19dd3e6..7ae3810dcbdf 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -1113,20 +1113,24 @@ struct kernfs_syscall_ops cgroup1_kf_syscall_ops = { .show_path = cgroup_show_path, }; -struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, - void *data, unsigned long magic, - struct cgroup_namespace *ns) +int cgroup1_get_tree(struct fs_context *fc) { + struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); struct cgroup_sb_opts opts; struct cgroup_root *root; struct cgroup_subsys *ss; struct dentry *dentry; int i, ret; + /* Check if the caller has permission to mount. */ + if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) + return -EPERM; + cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); /* First find the desired set of subsystems */ - ret = parse_cgroupfs_options(data, &opts); + ret = parse_cgroupfs_options(ctx->data, &opts); if (ret) goto out_unlock; @@ -1228,19 +1232,23 @@ out_free: kfree(opts.name); if (ret) - return ERR_PTR(ret); + return ret; - dentry = cgroup_do_mount(&cgroup_fs_type, flags, root, + dentry = cgroup_do_mount(&cgroup_fs_type, fc->sb_flags, root, CGROUP_SUPER_MAGIC, ns); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); - if (!IS_ERR(dentry) && percpu_ref_is_dying(&root->cgrp.self.refcnt)) { + if (percpu_ref_is_dying(&root->cgrp.self.refcnt)) { struct super_block *sb = dentry->d_sb; dput(dentry); deactivate_locked_super(sb); msleep(10); - dentry = ERR_PTR(restart_syscall()); + return restart_syscall(); } - return dentry; + + fc->root = dentry; + return 0; } static int __init cgroup1_wq_init(void) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 7f7db5f967e3..0652f74064a2 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2117,25 +2117,6 @@ static int cgroup_get_tree(struct fs_context *fc) return 0; } -static int cgroup1_get_tree(struct fs_context *fc) -{ - struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; - struct cgroup_fs_context *ctx = cgroup_fc2context(fc); - struct dentry *root; - - /* Check if the caller has permission to mount. */ - if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) - return -EPERM; - - root = cgroup1_mount(&cgroup_fs_type, fc->sb_flags, ctx->data, - CGROUP_SUPER_MAGIC, ns); - if (IS_ERR(root)) - return PTR_ERR(root); - - fc->root = root; - return 0; -} - static const struct fs_context_operations cgroup_fs_context_ops = { .free = cgroup_fs_context_free, .parse_monolithic = cgroup_parse_monolithic, -- cgit v1.2.3-55-g7522 From f5dfb5315d340abd71bec523be9b114d5ac410de Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 16 Jan 2019 23:42:38 -0500 Subject: cgroup: take options parsing into ->parse_monolithic() Store the results in cgroup_fs_context. There's a nasty twist caused by the enabling/disabling subsystems - we can't do the checks sensitive to that until cgroup_mutex gets grabbed. Frankly, these checks are complete bullshit (e.g. all,none combination is accepted if all subsystems are disabled; so's cpusets,none and all,cpusets when cpusets is disabled, etc.), but touching that would be a userland-visible behaviour change ;-/ So we do parsing in ->parse_monolithic() and have the consistency checks done in check_cgroupfs_options(), with the latter called (on already parsed options) from cgroup1_get_tree() and cgroup1_reconfigure(). Freeing the strdup'ed strings is done from fs_context destructor, which somewhat simplifies the life for cgroup1_{get_tree,reconfigure}(). Signed-off-by: Al Viro --- kernel/cgroup/cgroup-internal.h | 23 ++++--- kernel/cgroup/cgroup-v1.c | 143 +++++++++++++++++++--------------------- kernel/cgroup/cgroup.c | 54 ++++++++------- 3 files changed, 104 insertions(+), 116 deletions(-) (limited to 'kernel/cgroup') diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 37836d598ff8..e627ff193dba 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -41,7 +41,15 @@ extern void __init enable_debug_cgroup(void); * The cgroup filesystem superblock creation/mount context. */ struct cgroup_fs_context { - char *data; + unsigned int flags; /* CGRP_ROOT_* flags */ + + /* cgroup1 bits */ + bool cpuset_clone_children; + bool none; /* User explicitly requested empty subsystem */ + bool all_ss; /* Seen 'all' option */ + u16 subsys_mask; /* Selected subsystems */ + char *name; /* Hierarchy name */ + char *release_agent; /* Path for release notifications */ }; static inline struct cgroup_fs_context *cgroup_fc2context(struct fs_context *fc) @@ -130,16 +138,6 @@ struct cgroup_mgctx { #define DEFINE_CGROUP_MGCTX(name) \ struct cgroup_mgctx name = CGROUP_MGCTX_INIT(name) -struct cgroup_sb_opts { - u16 subsys_mask; - unsigned int flags; - char *release_agent; - bool cpuset_clone_children; - char *name; - /* User explicitly requested empty subsystem */ - bool none; -}; - extern struct mutex cgroup_mutex; extern spinlock_t css_set_lock; extern struct cgroup_subsys *cgroup_subsys[]; @@ -210,7 +208,7 @@ int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen, struct cgroup_namespace *ns); void cgroup_free_root(struct cgroup_root *root); -void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts); +void init_cgroup_root(struct cgroup_root *root, struct cgroup_fs_context *ctx); int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask); int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask); struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, @@ -266,6 +264,7 @@ void cgroup1_pidlist_destroy_all(struct cgroup *cgrp); void cgroup1_release_agent(struct work_struct *work); void cgroup1_check_for_release(struct cgroup *cgrp); int cgroup1_get_tree(struct fs_context *fc); +int parse_cgroup1_options(char *data, struct cgroup_fs_context *ctx); int cgroup1_reconfigure(struct fs_context *ctx); #endif /* __CGROUP_INTERNAL_H */ diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 7ae3810dcbdf..5c93643090e9 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -906,61 +906,47 @@ static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_roo return 0; } -static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) +int parse_cgroup1_options(char *data, struct cgroup_fs_context *ctx) { char *token, *o = data; - bool all_ss = false, one_ss = false; - u16 mask = U16_MAX; struct cgroup_subsys *ss; - int nr_opts = 0; int i; -#ifdef CONFIG_CPUSETS - mask = ~((u16)1 << cpuset_cgrp_id); -#endif - - memset(opts, 0, sizeof(*opts)); - while ((token = strsep(&o, ",")) != NULL) { - nr_opts++; - if (!*token) return -EINVAL; if (!strcmp(token, "none")) { /* Explicitly have no subsystems */ - opts->none = true; + ctx->none = true; continue; } if (!strcmp(token, "all")) { - /* Mutually exclusive option 'all' + subsystem name */ - if (one_ss) - return -EINVAL; - all_ss = true; + ctx->all_ss = true; continue; } if (!strcmp(token, "noprefix")) { - opts->flags |= CGRP_ROOT_NOPREFIX; + ctx->flags |= CGRP_ROOT_NOPREFIX; continue; } if (!strcmp(token, "clone_children")) { - opts->cpuset_clone_children = true; + ctx->cpuset_clone_children = true; continue; } if (!strcmp(token, "cpuset_v2_mode")) { - opts->flags |= CGRP_ROOT_CPUSET_V2_MODE; + ctx->flags |= CGRP_ROOT_CPUSET_V2_MODE; continue; } if (!strcmp(token, "xattr")) { - opts->flags |= CGRP_ROOT_XATTR; + ctx->flags |= CGRP_ROOT_XATTR; continue; } if (!strncmp(token, "release_agent=", 14)) { /* Specifying two release agents is forbidden */ - if (opts->release_agent) + if (ctx->release_agent) return -EINVAL; - opts->release_agent = + ctx->release_agent = kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL); - if (!opts->release_agent) + if (!ctx->release_agent) return -ENOMEM; continue; } @@ -983,12 +969,12 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) return -EINVAL; } /* Specifying two names is forbidden */ - if (opts->name) + if (ctx->name) return -EINVAL; - opts->name = kstrndup(name, + ctx->name = kstrndup(name, MAX_CGROUP_ROOT_NAMELEN - 1, GFP_KERNEL); - if (!opts->name) + if (!ctx->name) return -ENOMEM; continue; @@ -997,38 +983,51 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) for_each_subsys(ss, i) { if (strcmp(token, ss->legacy_name)) continue; - if (!cgroup_ssid_enabled(i)) - continue; - if (cgroup1_ssid_disabled(i)) - continue; - - /* Mutually exclusive option 'all' + subsystem name */ - if (all_ss) - return -EINVAL; - opts->subsys_mask |= (1 << i); - one_ss = true; - + ctx->subsys_mask |= (1 << i); break; } if (i == CGROUP_SUBSYS_COUNT) return -ENOENT; } + return 0; +} + +static int check_cgroupfs_options(struct cgroup_fs_context *ctx) +{ + u16 mask = U16_MAX; + u16 enabled = 0; + struct cgroup_subsys *ss; + int i; + +#ifdef CONFIG_CPUSETS + mask = ~((u16)1 << cpuset_cgrp_id); +#endif + for_each_subsys(ss, i) + if (cgroup_ssid_enabled(i) && !cgroup1_ssid_disabled(i)) + enabled |= 1 << i; + + ctx->subsys_mask &= enabled; /* - * If the 'all' option was specified select all the subsystems, - * otherwise if 'none', 'name=' and a subsystem name options were - * not specified, let's default to 'all' + * In absense of 'none', 'name=' or subsystem name options, + * let's default to 'all'. */ - if (all_ss || (!one_ss && !opts->none && !opts->name)) - for_each_subsys(ss, i) - if (cgroup_ssid_enabled(i) && !cgroup1_ssid_disabled(i)) - opts->subsys_mask |= (1 << i); + if (!ctx->subsys_mask && !ctx->none && !ctx->name) + ctx->all_ss = true; + + if (ctx->all_ss) { + /* Mutually exclusive option 'all' + subsystem name */ + if (ctx->subsys_mask) + return -EINVAL; + /* 'all' => select all the subsystems */ + ctx->subsys_mask = enabled; + } /* * We either have to specify by name or by subsystems. (So all * empty hierarchies must have a name). */ - if (!opts->subsys_mask && !opts->name) + if (!ctx->subsys_mask && !ctx->name) return -EINVAL; /* @@ -1036,11 +1035,11 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) * with the old cpuset, so we allow noprefix only if mounting just * the cpuset subsystem. */ - if ((opts->flags & CGRP_ROOT_NOPREFIX) && (opts->subsys_mask & mask)) + if ((ctx->flags & CGRP_ROOT_NOPREFIX) && (ctx->subsys_mask & mask)) return -EINVAL; /* Can't specify "none" and some subsystems */ - if (opts->subsys_mask && opts->none) + if (ctx->subsys_mask && ctx->none) return -EINVAL; return 0; @@ -1052,28 +1051,27 @@ int cgroup1_reconfigure(struct fs_context *fc) struct kernfs_root *kf_root = kernfs_root_from_sb(fc->root->d_sb); struct cgroup_root *root = cgroup_root_from_kf(kf_root); int ret = 0; - struct cgroup_sb_opts opts; u16 added_mask, removed_mask; cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); /* See what subsystems are wanted */ - ret = parse_cgroupfs_options(ctx->data, &opts); + ret = check_cgroupfs_options(ctx); if (ret) goto out_unlock; - if (opts.subsys_mask != root->subsys_mask || opts.release_agent) + if (ctx->subsys_mask != root->subsys_mask || ctx->release_agent) pr_warn("option changes via remount are deprecated (pid=%d comm=%s)\n", task_tgid_nr(current), current->comm); - added_mask = opts.subsys_mask & ~root->subsys_mask; - removed_mask = root->subsys_mask & ~opts.subsys_mask; + added_mask = ctx->subsys_mask & ~root->subsys_mask; + removed_mask = root->subsys_mask & ~ctx->subsys_mask; /* Don't allow flags or name to change at remount */ - if ((opts.flags ^ root->flags) || - (opts.name && strcmp(opts.name, root->name))) { + if ((ctx->flags ^ root->flags) || + (ctx->name && strcmp(ctx->name, root->name))) { pr_err("option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"\n", - opts.flags, opts.name ?: "", root->flags, root->name); + ctx->flags, ctx->name ?: "", root->flags, root->name); ret = -EINVAL; goto out_unlock; } @@ -1090,17 +1088,15 @@ int cgroup1_reconfigure(struct fs_context *fc) WARN_ON(rebind_subsystems(&cgrp_dfl_root, removed_mask)); - if (opts.release_agent) { + if (ctx->release_agent) { spin_lock(&release_agent_path_lock); - strcpy(root->release_agent_path, opts.release_agent); + strcpy(root->release_agent_path, ctx->release_agent); spin_unlock(&release_agent_path_lock); } trace_cgroup_remount(root); out_unlock: - kfree(opts.release_agent); - kfree(opts.name); mutex_unlock(&cgroup_mutex); return ret; } @@ -1117,7 +1113,6 @@ int cgroup1_get_tree(struct fs_context *fc) { struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; struct cgroup_fs_context *ctx = cgroup_fc2context(fc); - struct cgroup_sb_opts opts; struct cgroup_root *root; struct cgroup_subsys *ss; struct dentry *dentry; @@ -1130,7 +1125,7 @@ int cgroup1_get_tree(struct fs_context *fc) cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); /* First find the desired set of subsystems */ - ret = parse_cgroupfs_options(ctx->data, &opts); + ret = check_cgroupfs_options(ctx); if (ret) goto out_unlock; @@ -1142,7 +1137,7 @@ int cgroup1_get_tree(struct fs_context *fc) * starting. Testing ref liveliness is good enough. */ for_each_subsys(ss, i) { - if (!(opts.subsys_mask & (1 << i)) || + if (!(ctx->subsys_mask & (1 << i)) || ss->root == &cgrp_dfl_root) continue; @@ -1166,8 +1161,8 @@ int cgroup1_get_tree(struct fs_context *fc) * name matches but sybsys_mask doesn't, we should fail. * Remember whether name matched. */ - if (opts.name) { - if (strcmp(opts.name, root->name)) + if (ctx->name) { + if (strcmp(ctx->name, root->name)) continue; name_match = true; } @@ -1176,15 +1171,15 @@ int cgroup1_get_tree(struct fs_context *fc) * If we asked for subsystems (or explicitly for no * subsystems) then they must match. */ - if ((opts.subsys_mask || opts.none) && - (opts.subsys_mask != root->subsys_mask)) { + if ((ctx->subsys_mask || ctx->none) && + (ctx->subsys_mask != root->subsys_mask)) { if (!name_match) continue; ret = -EBUSY; goto out_unlock; } - if (root->flags ^ opts.flags) + if (root->flags ^ ctx->flags) pr_warn("new mount options do not match the existing superblock, will be ignored\n"); ret = 0; @@ -1196,7 +1191,7 @@ int cgroup1_get_tree(struct fs_context *fc) * specification is allowed for already existing hierarchies but we * can't create new one without subsys specification. */ - if (!opts.subsys_mask && !opts.none) { + if (!ctx->subsys_mask && !ctx->none) { ret = -EINVAL; goto out_unlock; } @@ -1213,9 +1208,9 @@ int cgroup1_get_tree(struct fs_context *fc) goto out_unlock; } - init_cgroup_root(root, &opts); + init_cgroup_root(root, ctx); - ret = cgroup_setup_root(root, opts.subsys_mask); + ret = cgroup_setup_root(root, ctx->subsys_mask); if (ret) cgroup_free_root(root); @@ -1223,14 +1218,10 @@ out_unlock: if (!ret && !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) { mutex_unlock(&cgroup_mutex); msleep(10); - ret = restart_syscall(); - goto out_free; + return restart_syscall(); } mutex_unlock(&cgroup_mutex); out_free: - kfree(opts.release_agent); - kfree(opts.name); - if (ret) return ret; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 0652f74064a2..33da9eef3ef4 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1814,14 +1814,8 @@ static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root static int cgroup_reconfigure(struct fs_context *fc) { struct cgroup_fs_context *ctx = cgroup_fc2context(fc); - unsigned int root_flags; - int ret; - - ret = parse_cgroup_root_flags(ctx->data, &root_flags); - if (ret) - return ret; - apply_cgroup_root_flags(root_flags); + apply_cgroup_root_flags(ctx->flags); return 0; } @@ -1909,7 +1903,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent); } -void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts) +void init_cgroup_root(struct cgroup_root *root, struct cgroup_fs_context *ctx) { struct cgroup *cgrp = &root->cgrp; @@ -1919,12 +1913,12 @@ void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts) init_cgroup_housekeeping(cgrp); idr_init(&root->cgroup_idr); - root->flags = opts->flags; - if (opts->release_agent) - strscpy(root->release_agent_path, opts->release_agent, PATH_MAX); - if (opts->name) - strscpy(root->name, opts->name, MAX_CGROUP_ROOT_NAMELEN); - if (opts->cpuset_clone_children) + root->flags = ctx->flags; + if (ctx->release_agent) + strscpy(root->release_agent_path, ctx->release_agent, PATH_MAX); + if (ctx->name) + strscpy(root->name, ctx->name, MAX_CGROUP_ROOT_NAMELEN); + if (ctx->cpuset_clone_children) set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags); } @@ -2075,6 +2069,8 @@ static void cgroup_fs_context_free(struct fs_context *fc) { struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + kfree(ctx->name); + kfree(ctx->release_agent); kfree(ctx); } @@ -2082,28 +2078,30 @@ static int cgroup_parse_monolithic(struct fs_context *fc, void *data) { struct cgroup_fs_context *ctx = cgroup_fc2context(fc); - ctx->data = data; - if (ctx->data) - security_sb_eat_lsm_opts(ctx->data, &fc->security); - return 0; + if (data) + security_sb_eat_lsm_opts(data, &fc->security); + return parse_cgroup_root_flags(data, &ctx->flags); +} + +static int cgroup1_parse_monolithic(struct fs_context *fc, void *data) +{ + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + + if (data) + security_sb_eat_lsm_opts(data, &fc->security); + return parse_cgroup1_options(data, ctx); } static int cgroup_get_tree(struct fs_context *fc) { struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; struct cgroup_fs_context *ctx = cgroup_fc2context(fc); - unsigned int root_flags; struct dentry *root; - int ret; /* Check if the caller has permission to mount. */ if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) return -EPERM; - ret = parse_cgroup_root_flags(ctx->data, &root_flags); - if (ret) - return ret; - cgrp_dfl_visible = true; cgroup_get_live(&cgrp_dfl_root.cgrp); @@ -2112,7 +2110,7 @@ static int cgroup_get_tree(struct fs_context *fc) if (IS_ERR(root)) return PTR_ERR(root); - apply_cgroup_root_flags(root_flags); + apply_cgroup_root_flags(ctx->flags); fc->root = root; return 0; } @@ -2126,7 +2124,7 @@ static const struct fs_context_operations cgroup_fs_context_ops = { static const struct fs_context_operations cgroup1_fs_context_ops = { .free = cgroup_fs_context_free, - .parse_monolithic = cgroup_parse_monolithic, + .parse_monolithic = cgroup1_parse_monolithic, .get_tree = cgroup1_get_tree, .reconfigure = cgroup1_reconfigure, }; @@ -5376,11 +5374,11 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early) */ int __init cgroup_init_early(void) { - static struct cgroup_sb_opts __initdata opts; + static struct cgroup_fs_context __initdata ctx; struct cgroup_subsys *ss; int i; - init_cgroup_root(&cgrp_dfl_root, &opts); + init_cgroup_root(&cgrp_dfl_root, &ctx); cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF; RCU_INIT_POINTER(init_task.cgroups, &init_css_set); -- cgit v1.2.3-55-g7522 From 8d2451f4994fa60a57617282bab91b98266a00b1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Jan 2019 00:15:11 -0500 Subject: cgroup1: switch to option-by-option parsing [dhowells should be the author - it's carved out of his patch] Signed-off-by: Al Viro --- kernel/cgroup/cgroup-internal.h | 3 +- kernel/cgroup/cgroup-v1.c | 192 +++++++++++++++++++++++----------------- kernel/cgroup/cgroup.c | 20 ++--- 3 files changed, 117 insertions(+), 98 deletions(-) (limited to 'kernel/cgroup') diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index e627ff193dba..a7b5a41f170c 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -257,14 +257,15 @@ extern const struct proc_ns_operations cgroupns_operations; */ extern struct cftype cgroup1_base_files[]; extern struct kernfs_syscall_ops cgroup1_kf_syscall_ops; +extern const struct fs_parameter_description cgroup1_fs_parameters; int proc_cgroupstats_show(struct seq_file *m, void *v); bool cgroup1_ssid_disabled(int ssid); void cgroup1_pidlist_destroy_all(struct cgroup *cgrp); void cgroup1_release_agent(struct work_struct *work); void cgroup1_check_for_release(struct cgroup *cgrp); +int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param); int cgroup1_get_tree(struct fs_context *fc); -int parse_cgroup1_options(char *data, struct cgroup_fs_context *ctx); int cgroup1_reconfigure(struct fs_context *ctx); #endif /* __CGROUP_INTERNAL_H */ diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 5c93643090e9..725e9f6fe80d 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -13,9 +13,12 @@ #include #include #include +#include #include +#define cg_invalf(fc, fmt, ...) ({ pr_err(fmt, ## __VA_ARGS__); -EINVAL; }) + /* * pidlists linger the following amount before being destroyed. The goal * is avoiding frequent destruction in the middle of consecutive read calls @@ -906,94 +909,117 @@ static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_roo return 0; } -int parse_cgroup1_options(char *data, struct cgroup_fs_context *ctx) -{ - char *token, *o = data; - struct cgroup_subsys *ss; - int i; +enum cgroup1_param { + Opt_all, + Opt_clone_children, + Opt_cpuset_v2_mode, + Opt_name, + Opt_none, + Opt_noprefix, + Opt_release_agent, + Opt_xattr, +}; - while ((token = strsep(&o, ",")) != NULL) { - if (!*token) - return -EINVAL; - if (!strcmp(token, "none")) { - /* Explicitly have no subsystems */ - ctx->none = true; - continue; - } - if (!strcmp(token, "all")) { - ctx->all_ss = true; - continue; - } - if (!strcmp(token, "noprefix")) { - ctx->flags |= CGRP_ROOT_NOPREFIX; - continue; - } - if (!strcmp(token, "clone_children")) { - ctx->cpuset_clone_children = true; - continue; - } - if (!strcmp(token, "cpuset_v2_mode")) { - ctx->flags |= CGRP_ROOT_CPUSET_V2_MODE; - continue; - } - if (!strcmp(token, "xattr")) { - ctx->flags |= CGRP_ROOT_XATTR; - continue; - } - if (!strncmp(token, "release_agent=", 14)) { - /* Specifying two release agents is forbidden */ - if (ctx->release_agent) - return -EINVAL; - ctx->release_agent = - kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL); - if (!ctx->release_agent) - return -ENOMEM; - continue; - } - if (!strncmp(token, "name=", 5)) { - const char *name = token + 5; - - /* blocked by boot param? */ - if (cgroup_no_v1_named) - return -ENOENT; - /* Can't specify an empty name */ - if (!strlen(name)) - return -EINVAL; - /* Must match [\w.-]+ */ - for (i = 0; i < strlen(name); i++) { - char c = name[i]; - if (isalnum(c)) - continue; - if ((c == '.') || (c == '-') || (c == '_')) - continue; - return -EINVAL; - } - /* Specifying two names is forbidden */ - if (ctx->name) - return -EINVAL; - ctx->name = kstrndup(name, - MAX_CGROUP_ROOT_NAMELEN - 1, - GFP_KERNEL); - if (!ctx->name) - return -ENOMEM; +static const struct fs_parameter_spec cgroup1_param_specs[] = { + fsparam_flag ("all", Opt_all), + fsparam_flag ("clone_children", Opt_clone_children), + fsparam_flag ("cpuset_v2_mode", Opt_cpuset_v2_mode), + fsparam_string("name", Opt_name), + fsparam_flag ("none", Opt_none), + fsparam_flag ("noprefix", Opt_noprefix), + fsparam_string("release_agent", Opt_release_agent), + fsparam_flag ("xattr", Opt_xattr), + {} +}; - continue; - } +const struct fs_parameter_description cgroup1_fs_parameters = { + .name = "cgroup1", + .specs = cgroup1_param_specs, +}; +int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + struct cgroup_subsys *ss; + struct fs_parse_result result; + int opt, i; + + opt = fs_parse(fc, &cgroup1_fs_parameters, param, &result); + if (opt == -ENOPARAM) { + if (strcmp(param->key, "source") == 0) { + fc->source = param->string; + param->string = NULL; + return 0; + } for_each_subsys(ss, i) { - if (strcmp(token, ss->legacy_name)) + if (strcmp(param->key, ss->legacy_name)) continue; ctx->subsys_mask |= (1 << i); - break; + return 0; } - if (i == CGROUP_SUBSYS_COUNT) + return cg_invalf(fc, "cgroup1: Unknown subsys name '%s'", param->key); + } + if (opt < 0) + return opt; + + switch (opt) { + case Opt_none: + /* Explicitly have no subsystems */ + ctx->none = true; + break; + case Opt_all: + ctx->all_ss = true; + break; + case Opt_noprefix: + ctx->flags |= CGRP_ROOT_NOPREFIX; + break; + case Opt_clone_children: + ctx->cpuset_clone_children = true; + break; + case Opt_cpuset_v2_mode: + ctx->flags |= CGRP_ROOT_CPUSET_V2_MODE; + break; + case Opt_xattr: + ctx->flags |= CGRP_ROOT_XATTR; + break; + case Opt_release_agent: + /* Specifying two release agents is forbidden */ + if (ctx->release_agent) + return cg_invalf(fc, "cgroup1: release_agent respecified"); + ctx->release_agent = param->string; + param->string = NULL; + break; + case Opt_name: + /* blocked by boot param? */ + if (cgroup_no_v1_named) return -ENOENT; + /* Can't specify an empty name */ + if (!param->size) + return cg_invalf(fc, "cgroup1: Empty name"); + if (param->size > MAX_CGROUP_ROOT_NAMELEN - 1) + return cg_invalf(fc, "cgroup1: Name too long"); + /* Must match [\w.-]+ */ + for (i = 0; i < param->size; i++) { + char c = param->string[i]; + if (isalnum(c)) + continue; + if ((c == '.') || (c == '-') || (c == '_')) + continue; + return cg_invalf(fc, "cgroup1: Invalid name"); + } + /* Specifying two names is forbidden */ + if (ctx->name) + return cg_invalf(fc, "cgroup1: name respecified"); + ctx->name = param->string; + param->string = NULL; + break; } return 0; } -static int check_cgroupfs_options(struct cgroup_fs_context *ctx) +static int check_cgroupfs_options(struct fs_context *fc) { + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); u16 mask = U16_MAX; u16 enabled = 0; struct cgroup_subsys *ss; @@ -1018,7 +1044,7 @@ static int check_cgroupfs_options(struct cgroup_fs_context *ctx) if (ctx->all_ss) { /* Mutually exclusive option 'all' + subsystem name */ if (ctx->subsys_mask) - return -EINVAL; + return cg_invalf(fc, "cgroup1: subsys name conflicts with all"); /* 'all' => select all the subsystems */ ctx->subsys_mask = enabled; } @@ -1028,7 +1054,7 @@ static int check_cgroupfs_options(struct cgroup_fs_context *ctx) * empty hierarchies must have a name). */ if (!ctx->subsys_mask && !ctx->name) - return -EINVAL; + return cg_invalf(fc, "cgroup1: Need name or subsystem set"); /* * Option noprefix was introduced just for backward compatibility @@ -1036,11 +1062,11 @@ static int check_cgroupfs_options(struct cgroup_fs_context *ctx) * the cpuset subsystem. */ if ((ctx->flags & CGRP_ROOT_NOPREFIX) && (ctx->subsys_mask & mask)) - return -EINVAL; + return cg_invalf(fc, "cgroup1: noprefix used incorrectly"); /* Can't specify "none" and some subsystems */ if (ctx->subsys_mask && ctx->none) - return -EINVAL; + return cg_invalf(fc, "cgroup1: none used incorrectly"); return 0; } @@ -1056,7 +1082,7 @@ int cgroup1_reconfigure(struct fs_context *fc) cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); /* See what subsystems are wanted */ - ret = check_cgroupfs_options(ctx); + ret = check_cgroupfs_options(fc); if (ret) goto out_unlock; @@ -1070,7 +1096,7 @@ int cgroup1_reconfigure(struct fs_context *fc) /* Don't allow flags or name to change at remount */ if ((ctx->flags ^ root->flags) || (ctx->name && strcmp(ctx->name, root->name))) { - pr_err("option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"\n", + cg_invalf(fc, "option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"", ctx->flags, ctx->name ?: "", root->flags, root->name); ret = -EINVAL; goto out_unlock; @@ -1125,7 +1151,7 @@ int cgroup1_get_tree(struct fs_context *fc) cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); /* First find the desired set of subsystems */ - ret = check_cgroupfs_options(ctx); + ret = check_cgroupfs_options(fc); if (ret) goto out_unlock; @@ -1192,7 +1218,7 @@ int cgroup1_get_tree(struct fs_context *fc) * can't create new one without subsys specification. */ if (!ctx->subsys_mask && !ctx->none) { - ret = -EINVAL; + ret = cg_invalf(fc, "cgroup1: No subsys list or none specified"); goto out_unlock; } diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 33da9eef3ef4..faba00caa197 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2083,15 +2083,6 @@ static int cgroup_parse_monolithic(struct fs_context *fc, void *data) return parse_cgroup_root_flags(data, &ctx->flags); } -static int cgroup1_parse_monolithic(struct fs_context *fc, void *data) -{ - struct cgroup_fs_context *ctx = cgroup_fc2context(fc); - - if (data) - security_sb_eat_lsm_opts(data, &fc->security); - return parse_cgroup1_options(data, ctx); -} - static int cgroup_get_tree(struct fs_context *fc) { struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; @@ -2124,7 +2115,7 @@ static const struct fs_context_operations cgroup_fs_context_ops = { static const struct fs_context_operations cgroup1_fs_context_ops = { .free = cgroup_fs_context_free, - .parse_monolithic = cgroup1_parse_monolithic, + .parse_param = cgroup1_parse_param, .get_tree = cgroup1_get_tree, .reconfigure = cgroup1_reconfigure, }; @@ -2175,10 +2166,11 @@ static void cgroup_kill_sb(struct super_block *sb) } struct file_system_type cgroup_fs_type = { - .name = "cgroup", - .init_fs_context = cgroup_init_fs_context, - .kill_sb = cgroup_kill_sb, - .fs_flags = FS_USERNS_MOUNT, + .name = "cgroup", + .init_fs_context = cgroup_init_fs_context, + .parameters = &cgroup1_fs_parameters, + .kill_sb = cgroup_kill_sb, + .fs_flags = FS_USERNS_MOUNT, }; static struct file_system_type cgroup2_fs_type = { -- cgit v1.2.3-55-g7522 From e34a98d5b226b84a3ed6da93e7a92e65cc1c81ba Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Jan 2019 00:22:58 -0500 Subject: cgroup2: switch to option-by-option parsing [again, carved out of patch by dhowells] [NB: we probably want to handle "source" in parse_param here] Signed-off-by: Al Viro --- kernel/cgroup/cgroup.c | 62 +++++++++++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 29 deletions(-) (limited to 'kernel/cgroup') diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index faba00caa197..d0cddfbdf5cf 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -1772,26 +1773,37 @@ int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, return len; } -static int parse_cgroup_root_flags(char *data, unsigned int *root_flags) -{ - char *token; +enum cgroup2_param { + Opt_nsdelegate, + nr__cgroup2_params +}; - *root_flags = 0; +static const struct fs_parameter_spec cgroup2_param_specs[] = { + fsparam_flag ("nsdelegate", Opt_nsdelegate), + {} +}; - if (!data || *data == '\0') - return 0; +static const struct fs_parameter_description cgroup2_fs_parameters = { + .name = "cgroup2", + .specs = cgroup2_param_specs, +}; - while ((token = strsep(&data, ",")) != NULL) { - if (!strcmp(token, "nsdelegate")) { - *root_flags |= CGRP_ROOT_NS_DELEGATE; - continue; - } +static int cgroup2_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + struct fs_parse_result result; + int opt; - pr_err("cgroup2: unknown option \"%s\"\n", token); - return -EINVAL; - } + opt = fs_parse(fc, &cgroup2_fs_parameters, param, &result); + if (opt < 0) + return opt; - return 0; + switch (opt) { + case Opt_nsdelegate: + ctx->flags |= CGRP_ROOT_NS_DELEGATE; + return 0; + } + return -EINVAL; } static void apply_cgroup_root_flags(unsigned int root_flags) @@ -2074,15 +2086,6 @@ static void cgroup_fs_context_free(struct fs_context *fc) kfree(ctx); } -static int cgroup_parse_monolithic(struct fs_context *fc, void *data) -{ - struct cgroup_fs_context *ctx = cgroup_fc2context(fc); - - if (data) - security_sb_eat_lsm_opts(data, &fc->security); - return parse_cgroup_root_flags(data, &ctx->flags); -} - static int cgroup_get_tree(struct fs_context *fc) { struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; @@ -2108,7 +2111,7 @@ static int cgroup_get_tree(struct fs_context *fc) static const struct fs_context_operations cgroup_fs_context_ops = { .free = cgroup_fs_context_free, - .parse_monolithic = cgroup_parse_monolithic, + .parse_param = cgroup2_parse_param, .get_tree = cgroup_get_tree, .reconfigure = cgroup_reconfigure, }; @@ -2174,10 +2177,11 @@ struct file_system_type cgroup_fs_type = { }; static struct file_system_type cgroup2_fs_type = { - .name = "cgroup2", - .init_fs_context = cgroup_init_fs_context, - .kill_sb = cgroup_kill_sb, - .fs_flags = FS_USERNS_MOUNT, + .name = "cgroup2", + .init_fs_context = cgroup_init_fs_context, + .parameters = &cgroup2_fs_parameters, + .kill_sb = cgroup_kill_sb, + .fs_flags = FS_USERNS_MOUNT, }; int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen, -- cgit v1.2.3-55-g7522 From cf6299b1d00555cd10dc30d95b300d7084128a2c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Jan 2019 02:25:51 -0500 Subject: cgroup: stash cgroup_root reference into cgroup_fs_context Note that this reference is *NOT* contributing to refcount of cgroup_root in question and is valid only until cgroup_do_mount() returns. Signed-off-by: Al Viro --- kernel/cgroup/cgroup-internal.h | 3 ++- kernel/cgroup/cgroup-v1.c | 4 +++- kernel/cgroup/cgroup.c | 7 +++++-- 3 files changed, 10 insertions(+), 4 deletions(-) (limited to 'kernel/cgroup') diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index a7b5a41f170c..3c1613a7648c 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -41,6 +41,7 @@ extern void __init enable_debug_cgroup(void); * The cgroup filesystem superblock creation/mount context. */ struct cgroup_fs_context { + struct cgroup_root *root; unsigned int flags; /* CGRP_ROOT_* flags */ /* cgroup1 bits */ @@ -208,7 +209,7 @@ int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen, struct cgroup_namespace *ns); void cgroup_free_root(struct cgroup_root *root); -void init_cgroup_root(struct cgroup_root *root, struct cgroup_fs_context *ctx); +void init_cgroup_root(struct cgroup_fs_context *ctx); int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask); int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask); struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 725e9f6fe80d..45a198c63d6e 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -1208,6 +1208,7 @@ int cgroup1_get_tree(struct fs_context *fc) if (root->flags ^ ctx->flags) pr_warn("new mount options do not match the existing superblock, will be ignored\n"); + ctx->root = root; ret = 0; goto out_unlock; } @@ -1234,7 +1235,8 @@ int cgroup1_get_tree(struct fs_context *fc) goto out_unlock; } - init_cgroup_root(root, ctx); + ctx->root = root; + init_cgroup_root(ctx); ret = cgroup_setup_root(root, ctx->subsys_mask); if (ret) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index d0cddfbdf5cf..57f43f63363a 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1915,8 +1915,9 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent); } -void init_cgroup_root(struct cgroup_root *root, struct cgroup_fs_context *ctx) +void init_cgroup_root(struct cgroup_fs_context *ctx) { + struct cgroup_root *root = ctx->root; struct cgroup *cgrp = &root->cgrp; INIT_LIST_HEAD(&root->root_list); @@ -2098,6 +2099,7 @@ static int cgroup_get_tree(struct fs_context *fc) cgrp_dfl_visible = true; cgroup_get_live(&cgrp_dfl_root.cgrp); + ctx->root = &cgrp_dfl_root; root = cgroup_do_mount(&cgroup2_fs_type, fc->sb_flags, &cgrp_dfl_root, CGROUP2_SUPER_MAGIC, ns); @@ -5374,7 +5376,8 @@ int __init cgroup_init_early(void) struct cgroup_subsys *ss; int i; - init_cgroup_root(&cgrp_dfl_root, &ctx); + ctx.root = &cgrp_dfl_root; + init_cgroup_root(&ctx); cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF; RCU_INIT_POINTER(init_task.cgroups, &init_css_set); -- cgit v1.2.3-55-g7522 From 71d883c37e8d4484207708af56685abb39703b04 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Jan 2019 02:44:07 -0500 Subject: cgroup_do_mount(): massage calling conventions pass it fs_context instead of fs_type/flags/root triple, have it return int instead of dentry and make it deal with setting fc->root. Signed-off-by: Al Viro --- kernel/cgroup/cgroup-internal.h | 3 +-- kernel/cgroup/cgroup-v1.c | 17 +++++----------- kernel/cgroup/cgroup.c | 45 +++++++++++++++++++++-------------------- 3 files changed, 29 insertions(+), 36 deletions(-) (limited to 'kernel/cgroup') diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 3c1613a7648c..f7fd54f2973f 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -212,8 +212,7 @@ void cgroup_free_root(struct cgroup_root *root); void init_cgroup_root(struct cgroup_fs_context *ctx); int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask); int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask); -struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, - struct cgroup_root *root, unsigned long magic, +int cgroup_do_mount(struct fs_context *fc, unsigned long magic, struct cgroup_namespace *ns); int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp); diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 45a198c63d6e..05f05d773adf 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -1141,7 +1141,6 @@ int cgroup1_get_tree(struct fs_context *fc) struct cgroup_fs_context *ctx = cgroup_fc2context(fc); struct cgroup_root *root; struct cgroup_subsys *ss; - struct dentry *dentry; int i, ret; /* Check if the caller has permission to mount. */ @@ -1253,21 +1252,15 @@ out_free: if (ret) return ret; - dentry = cgroup_do_mount(&cgroup_fs_type, fc->sb_flags, root, - CGROUP_SUPER_MAGIC, ns); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - - if (percpu_ref_is_dying(&root->cgrp.self.refcnt)) { - struct super_block *sb = dentry->d_sb; - dput(dentry); + ret = cgroup_do_mount(fc, CGROUP_SUPER_MAGIC, ns); + if (!ret && percpu_ref_is_dying(&root->cgrp.self.refcnt)) { + struct super_block *sb = fc->root->d_sb; + dput(fc->root); deactivate_locked_super(sb); msleep(10); return restart_syscall(); } - - fc->root = dentry; - return 0; + return ret; } static int __init cgroup1_wq_init(void) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 57f43f63363a..64360a46d4df 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2036,43 +2036,48 @@ out: return ret; } -struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, - struct cgroup_root *root, unsigned long magic, - struct cgroup_namespace *ns) +int cgroup_do_mount(struct fs_context *fc, unsigned long magic, + struct cgroup_namespace *ns) { - struct dentry *dentry; + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); bool new_sb = false; + int ret = 0; - dentry = kernfs_mount(fs_type, flags, root->kf_root, magic, &new_sb); + fc->root = kernfs_mount(fc->fs_type, fc->sb_flags, ctx->root->kf_root, + magic, &new_sb); + if (IS_ERR(fc->root)) + ret = PTR_ERR(fc->root); /* * In non-init cgroup namespace, instead of root cgroup's dentry, * we return the dentry corresponding to the cgroupns->root_cgrp. */ - if (!IS_ERR(dentry) && ns != &init_cgroup_ns) { + if (!ret && ns != &init_cgroup_ns) { struct dentry *nsdentry; - struct super_block *sb = dentry->d_sb; + struct super_block *sb = fc->root->d_sb; struct cgroup *cgrp; mutex_lock(&cgroup_mutex); spin_lock_irq(&css_set_lock); - cgrp = cset_cgroup_from_root(ns->root_cset, root); + cgrp = cset_cgroup_from_root(ns->root_cset, ctx->root); spin_unlock_irq(&css_set_lock); mutex_unlock(&cgroup_mutex); nsdentry = kernfs_node_dentry(cgrp->kn, sb); - dput(dentry); - if (IS_ERR(nsdentry)) + dput(fc->root); + fc->root = nsdentry; + if (IS_ERR(nsdentry)) { + ret = PTR_ERR(nsdentry); deactivate_locked_super(sb); - dentry = nsdentry; + } } if (!new_sb) - cgroup_put(&root->cgrp); + cgroup_put(&ctx->root->cgrp); - return dentry; + return ret; } /* @@ -2091,7 +2096,7 @@ static int cgroup_get_tree(struct fs_context *fc) { struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; struct cgroup_fs_context *ctx = cgroup_fc2context(fc); - struct dentry *root; + int ret; /* Check if the caller has permission to mount. */ if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) @@ -2101,14 +2106,10 @@ static int cgroup_get_tree(struct fs_context *fc) cgroup_get_live(&cgrp_dfl_root.cgrp); ctx->root = &cgrp_dfl_root; - root = cgroup_do_mount(&cgroup2_fs_type, fc->sb_flags, &cgrp_dfl_root, - CGROUP2_SUPER_MAGIC, ns); - if (IS_ERR(root)) - return PTR_ERR(root); - - apply_cgroup_root_flags(ctx->flags); - fc->root = root; - return 0; + ret = cgroup_do_mount(fc, CGROUP2_SUPER_MAGIC, ns); + if (!ret) + apply_cgroup_root_flags(ctx->flags); + return ret; } static const struct fs_context_operations cgroup_fs_context_ops = { -- cgit v1.2.3-55-g7522 From 6678889f0726910fc884c54f951d8c5646a04819 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Jan 2019 09:42:30 -0500 Subject: cgroup1_get_tree(): separate "get cgroup_root to use" into a separate helper Signed-off-by: Al Viro --- kernel/cgroup/cgroup-v1.c | 87 +++++++++++++++++++++++++---------------------- 1 file changed, 46 insertions(+), 41 deletions(-) (limited to 'kernel/cgroup') diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 05f05d773adf..0d71fc98e73d 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -1135,7 +1135,15 @@ struct kernfs_syscall_ops cgroup1_kf_syscall_ops = { .show_path = cgroup_show_path, }; -int cgroup1_get_tree(struct fs_context *fc) +/* + * The guts of cgroup1 mount - find or create cgroup_root to use. + * Called with cgroup_mutex held; returns 0 on success, -E... on + * error and positive - in case when the candidate is busy dying. + * On success it stashes a reference to cgroup_root into given + * cgroup_fs_context; that reference is *NOT* counting towards the + * cgroup_root refcount. + */ +static int cgroup1_root_to_use(struct fs_context *fc) { struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; struct cgroup_fs_context *ctx = cgroup_fc2context(fc); @@ -1143,16 +1151,10 @@ int cgroup1_get_tree(struct fs_context *fc) struct cgroup_subsys *ss; int i, ret; - /* Check if the caller has permission to mount. */ - if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) - return -EPERM; - - cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); - /* First find the desired set of subsystems */ ret = check_cgroupfs_options(fc); if (ret) - goto out_unlock; + return ret; /* * Destruction of cgroup root is asynchronous, so subsystems may @@ -1166,12 +1168,8 @@ int cgroup1_get_tree(struct fs_context *fc) ss->root == &cgrp_dfl_root) continue; - if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt)) { - mutex_unlock(&cgroup_mutex); - msleep(10); - ret = restart_syscall(); - goto out_free; - } + if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt)) + return 1; /* restart */ cgroup_put(&ss->root->cgrp); } @@ -1200,16 +1198,14 @@ int cgroup1_get_tree(struct fs_context *fc) (ctx->subsys_mask != root->subsys_mask)) { if (!name_match) continue; - ret = -EBUSY; - goto out_unlock; + return -EBUSY; } if (root->flags ^ ctx->flags) pr_warn("new mount options do not match the existing superblock, will be ignored\n"); ctx->root = root; - ret = 0; - goto out_unlock; + return 0; } /* @@ -1217,22 +1213,16 @@ int cgroup1_get_tree(struct fs_context *fc) * specification is allowed for already existing hierarchies but we * can't create new one without subsys specification. */ - if (!ctx->subsys_mask && !ctx->none) { - ret = cg_invalf(fc, "cgroup1: No subsys list or none specified"); - goto out_unlock; - } + if (!ctx->subsys_mask && !ctx->none) + return cg_invalf(fc, "cgroup1: No subsys list or none specified"); /* Hierarchies may only be created in the initial cgroup namespace. */ - if (ns != &init_cgroup_ns) { - ret = -EPERM; - goto out_unlock; - } + if (ns != &init_cgroup_ns) + return -EPERM; root = kzalloc(sizeof(*root), GFP_KERNEL); - if (!root) { - ret = -ENOMEM; - goto out_unlock; - } + if (!root) + return -ENOMEM; ctx->root = root; init_cgroup_root(ctx); @@ -1240,23 +1230,38 @@ int cgroup1_get_tree(struct fs_context *fc) ret = cgroup_setup_root(root, ctx->subsys_mask); if (ret) cgroup_free_root(root); + return ret; +} + +int cgroup1_get_tree(struct fs_context *fc) +{ + struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + int ret; + + /* Check if the caller has permission to mount. */ + if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) + return -EPERM; + + cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); + + ret = cgroup1_root_to_use(fc); + if (!ret && !percpu_ref_tryget_live(&ctx->root->cgrp.self.refcnt)) + ret = 1; /* restart */ -out_unlock: - if (!ret && !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) { - mutex_unlock(&cgroup_mutex); - msleep(10); - return restart_syscall(); - } mutex_unlock(&cgroup_mutex); -out_free: - if (ret) - return ret; - ret = cgroup_do_mount(fc, CGROUP_SUPER_MAGIC, ns); - if (!ret && percpu_ref_is_dying(&root->cgrp.self.refcnt)) { + if (!ret) + ret = cgroup_do_mount(fc, CGROUP_SUPER_MAGIC, ns); + + if (!ret && percpu_ref_is_dying(&ctx->root->cgrp.self.refcnt)) { struct super_block *sb = fc->root->d_sb; dput(fc->root); deactivate_locked_super(sb); + ret = 1; + } + + if (unlikely(ret > 0)) { msleep(10); return restart_syscall(); } -- cgit v1.2.3-55-g7522 From cca8f32714d3a8bb4d109c9d7d790fd705b734e5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Jan 2019 10:14:26 -0500 Subject: cgroup: store a reference to cgroup_ns into cgroup_fs_context ... and trim cgroup_do_mount() arguments (renaming it to cgroup_do_get_tree()) Signed-off-by: Al Viro --- kernel/cgroup/cgroup-internal.h | 4 ++-- kernel/cgroup/cgroup-v1.c | 8 +++----- kernel/cgroup/cgroup.c | 17 ++++++++++++----- 3 files changed, 17 insertions(+), 12 deletions(-) (limited to 'kernel/cgroup') diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index f7fd54f2973f..37cf709b7a0e 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -42,6 +42,7 @@ extern void __init enable_debug_cgroup(void); */ struct cgroup_fs_context { struct cgroup_root *root; + struct cgroup_namespace *ns; unsigned int flags; /* CGRP_ROOT_* flags */ /* cgroup1 bits */ @@ -212,8 +213,7 @@ void cgroup_free_root(struct cgroup_root *root); void init_cgroup_root(struct cgroup_fs_context *ctx); int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask); int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask); -int cgroup_do_mount(struct fs_context *fc, unsigned long magic, - struct cgroup_namespace *ns); +int cgroup_do_get_tree(struct fs_context *fc); int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp); void cgroup_migrate_finish(struct cgroup_mgctx *mgctx); diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 0d71fc98e73d..571ef3447426 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -1145,7 +1145,6 @@ struct kernfs_syscall_ops cgroup1_kf_syscall_ops = { */ static int cgroup1_root_to_use(struct fs_context *fc) { - struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; struct cgroup_fs_context *ctx = cgroup_fc2context(fc); struct cgroup_root *root; struct cgroup_subsys *ss; @@ -1217,7 +1216,7 @@ static int cgroup1_root_to_use(struct fs_context *fc) return cg_invalf(fc, "cgroup1: No subsys list or none specified"); /* Hierarchies may only be created in the initial cgroup namespace. */ - if (ns != &init_cgroup_ns) + if (ctx->ns != &init_cgroup_ns) return -EPERM; root = kzalloc(sizeof(*root), GFP_KERNEL); @@ -1235,12 +1234,11 @@ static int cgroup1_root_to_use(struct fs_context *fc) int cgroup1_get_tree(struct fs_context *fc) { - struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; struct cgroup_fs_context *ctx = cgroup_fc2context(fc); int ret; /* Check if the caller has permission to mount. */ - if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) + if (!ns_capable(ctx->ns->user_ns, CAP_SYS_ADMIN)) return -EPERM; cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); @@ -1252,7 +1250,7 @@ int cgroup1_get_tree(struct fs_context *fc) mutex_unlock(&cgroup_mutex); if (!ret) - ret = cgroup_do_mount(fc, CGROUP_SUPER_MAGIC, ns); + ret = cgroup_do_get_tree(fc); if (!ret && percpu_ref_is_dying(&ctx->root->cgrp.self.refcnt)) { struct super_block *sb = fc->root->d_sb; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 64360a46d4df..0c6bef234a7c 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2036,13 +2036,17 @@ out: return ret; } -int cgroup_do_mount(struct fs_context *fc, unsigned long magic, - struct cgroup_namespace *ns) +int cgroup_do_get_tree(struct fs_context *fc) { struct cgroup_fs_context *ctx = cgroup_fc2context(fc); bool new_sb = false; + unsigned long magic; int ret = 0; + if (fc->fs_type == &cgroup2_fs_type) + magic = CGROUP2_SUPER_MAGIC; + else + magic = CGROUP_SUPER_MAGIC; fc->root = kernfs_mount(fc->fs_type, fc->sb_flags, ctx->root->kf_root, magic, &new_sb); if (IS_ERR(fc->root)) @@ -2052,7 +2056,7 @@ int cgroup_do_mount(struct fs_context *fc, unsigned long magic, * In non-init cgroup namespace, instead of root cgroup's dentry, * we return the dentry corresponding to the cgroupns->root_cgrp. */ - if (!ret && ns != &init_cgroup_ns) { + if (!ret && ctx->ns != &init_cgroup_ns) { struct dentry *nsdentry; struct super_block *sb = fc->root->d_sb; struct cgroup *cgrp; @@ -2060,7 +2064,7 @@ int cgroup_do_mount(struct fs_context *fc, unsigned long magic, mutex_lock(&cgroup_mutex); spin_lock_irq(&css_set_lock); - cgrp = cset_cgroup_from_root(ns->root_cset, ctx->root); + cgrp = cset_cgroup_from_root(ctx->ns->root_cset, ctx->root); spin_unlock_irq(&css_set_lock); mutex_unlock(&cgroup_mutex); @@ -2089,6 +2093,7 @@ static void cgroup_fs_context_free(struct fs_context *fc) kfree(ctx->name); kfree(ctx->release_agent); + put_cgroup_ns(ctx->ns); kfree(ctx); } @@ -2106,7 +2111,7 @@ static int cgroup_get_tree(struct fs_context *fc) cgroup_get_live(&cgrp_dfl_root.cgrp); ctx->root = &cgrp_dfl_root; - ret = cgroup_do_mount(fc, CGROUP2_SUPER_MAGIC, ns); + ret = cgroup_do_get_tree(fc); if (!ret) apply_cgroup_root_flags(ctx->flags); return ret; @@ -2144,6 +2149,8 @@ static int cgroup_init_fs_context(struct fs_context *fc) if (!use_task_css_set_links) cgroup_enable_task_cg_lists(); + ctx->ns = current->nsproxy->cgroup_ns; + get_cgroup_ns(ctx->ns); fc->fs_private = ctx; if (fc->fs_type == &cgroup2_fs_type) fc->ops = &cgroup_fs_context_ops; -- cgit v1.2.3-55-g7522 From 23bf1b6be9c291a7130118dcc7384f72ac04d813 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 1 Nov 2018 23:07:26 +0000 Subject: kernfs, sysfs, cgroup, intel_rdt: Support fs_context Make kernfs support superblock creation/mount/remount with fs_context. This requires that sysfs, cgroup and intel_rdt, which are built on kernfs, be made to support fs_context also. Notes: (1) A kernfs_fs_context struct is created to wrap fs_context and the kernfs mount parameters are moved in here (or are in fs_context). (2) kernfs_mount{,_ns}() are made into kernfs_get_tree(). The extra namespace tag parameter is passed in the context if desired (3) kernfs_free_fs_context() is provided as a destructor for the kernfs_fs_context struct, but for the moment it does nothing except get called in the right places. (4) sysfs doesn't wrap kernfs_fs_context since it has no parameters to pass, but possibly this should be done anyway in case someone wants to add a parameter in future. (5) A cgroup_fs_context struct is created to wrap kernfs_fs_context and the cgroup v1 and v2 mount parameters are all moved there. (6) cgroup1 parameter parsing error messages are now handled by invalf(), which allows userspace to collect them directly. (7) cgroup1 parameter cleanup is now done in the context destructor rather than in the mount/get_tree and remount functions. Weirdies: (*) cgroup_do_get_tree() calls cset_cgroup_from_root() with locks held, but then uses the resulting pointer after dropping the locks. I'm told this is okay and needs commenting. (*) The cgroup refcount web. This really needs documenting. (*) cgroup2 only has one root? Add a suggestion from Thomas Gleixner in which the RDT enablement code is placed into its own function. [folded a leak fix from Andrey Vagin] Signed-off-by: David Howells cc: Greg Kroah-Hartman cc: Tejun Heo cc: Li Zefan cc: Johannes Weiner cc: cgroups@vger.kernel.org cc: fenghua.yu@intel.com Signed-off-by: Al Viro --- arch/x86/kernel/cpu/resctrl/internal.h | 16 +++ arch/x86/kernel/cpu/resctrl/rdtgroup.c | 185 +++++++++++++++++++++------------ fs/kernfs/kernfs-internal.h | 1 + fs/kernfs/mount.c | 89 +++++++--------- fs/sysfs/mount.c | 73 +++++++++---- include/linux/kernfs.h | 38 +++---- kernel/cgroup/cgroup-internal.h | 5 +- kernel/cgroup/cgroup.c | 31 +++--- 8 files changed, 262 insertions(+), 176 deletions(-) (limited to 'kernel/cgroup') diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 822b7db634ee..e49b77283924 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -4,6 +4,7 @@ #include #include +#include #include #define MSR_IA32_L3_QOS_CFG 0xc81 @@ -40,6 +41,21 @@ #define RMID_VAL_ERROR BIT_ULL(63) #define RMID_VAL_UNAVAIL BIT_ULL(62) + +struct rdt_fs_context { + struct kernfs_fs_context kfc; + bool enable_cdpl2; + bool enable_cdpl3; + bool enable_mba_mbps; +}; + +static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc) +{ + struct kernfs_fs_context *kfc = fc->fs_private; + + return container_of(kfc, struct rdt_fs_context, kfc); +} + DECLARE_STATIC_KEY_FALSE(rdt_enable_key); /** diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 8388adf241b2..399601eda8e4 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -32,6 +33,7 @@ #include #include #include +#include #include @@ -1858,46 +1860,6 @@ static void cdp_disable_all(void) cdpl2_disable(); } -static int parse_rdtgroupfs_options(char *data) -{ - char *token, *o = data; - int ret = 0; - - while ((token = strsep(&o, ",")) != NULL) { - if (!*token) { - ret = -EINVAL; - goto out; - } - - if (!strcmp(token, "cdp")) { - ret = cdpl3_enable(); - if (ret) - goto out; - } else if (!strcmp(token, "cdpl2")) { - ret = cdpl2_enable(); - if (ret) - goto out; - } else if (!strcmp(token, "mba_MBps")) { - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - ret = set_mba_sc(true); - else - ret = -EINVAL; - if (ret) - goto out; - } else { - ret = -EINVAL; - goto out; - } - } - - return 0; - -out: - pr_err("Invalid mount option \"%s\"\n", token); - - return ret; -} - /* * We don't allow rdtgroup directories to be created anywhere * except the root directory. Thus when looking for the rdtgroup @@ -1969,13 +1931,27 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn, struct rdtgroup *prgrp, struct kernfs_node **mon_data_kn); -static struct dentry *rdt_mount(struct file_system_type *fs_type, - int flags, const char *unused_dev_name, - void *data) +static int rdt_enable_ctx(struct rdt_fs_context *ctx) +{ + int ret = 0; + + if (ctx->enable_cdpl2) + ret = cdpl2_enable(); + + if (!ret && ctx->enable_cdpl3) + ret = cdpl3_enable(); + + if (!ret && ctx->enable_mba_mbps) + ret = set_mba_sc(true); + + return ret; +} + +static int rdt_get_tree(struct fs_context *fc) { + struct rdt_fs_context *ctx = rdt_fc2context(fc); struct rdt_domain *dom; struct rdt_resource *r; - struct dentry *dentry; int ret; cpus_read_lock(); @@ -1984,53 +1960,42 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type, * resctrl file system can only be mounted once. */ if (static_branch_unlikely(&rdt_enable_key)) { - dentry = ERR_PTR(-EBUSY); + ret = -EBUSY; goto out; } - ret = parse_rdtgroupfs_options(data); - if (ret) { - dentry = ERR_PTR(ret); + ret = rdt_enable_ctx(ctx); + if (ret < 0) goto out_cdp; - } closid_init(); ret = rdtgroup_create_info_dir(rdtgroup_default.kn); - if (ret) { - dentry = ERR_PTR(ret); - goto out_cdp; - } + if (ret < 0) + goto out_mba; if (rdt_mon_capable) { ret = mongroup_create_dir(rdtgroup_default.kn, NULL, "mon_groups", &kn_mongrp); - if (ret) { - dentry = ERR_PTR(ret); + if (ret < 0) goto out_info; - } kernfs_get(kn_mongrp); ret = mkdir_mondata_all(rdtgroup_default.kn, &rdtgroup_default, &kn_mondata); - if (ret) { - dentry = ERR_PTR(ret); + if (ret < 0) goto out_mongrp; - } kernfs_get(kn_mondata); rdtgroup_default.mon.mon_data_kn = kn_mondata; } ret = rdt_pseudo_lock_init(); - if (ret) { - dentry = ERR_PTR(ret); + if (ret) goto out_mondata; - } - dentry = kernfs_mount(fs_type, flags, rdt_root, - RDTGROUP_SUPER_MAGIC, NULL); - if (IS_ERR(dentry)) + ret = kernfs_get_tree(fc); + if (ret < 0) goto out_psl; if (rdt_alloc_capable) @@ -2059,14 +2024,95 @@ out_mongrp: kernfs_remove(kn_mongrp); out_info: kernfs_remove(kn_info); +out_mba: + if (ctx->enable_mba_mbps) + set_mba_sc(false); out_cdp: cdp_disable_all(); out: rdt_last_cmd_clear(); mutex_unlock(&rdtgroup_mutex); cpus_read_unlock(); + return ret; +} + +enum rdt_param { + Opt_cdp, + Opt_cdpl2, + Opt_mba_mpbs, + nr__rdt_params +}; + +static const struct fs_parameter_spec rdt_param_specs[] = { + fsparam_flag("cdp", Opt_cdp), + fsparam_flag("cdpl2", Opt_cdpl2), + fsparam_flag("mba_mpbs", Opt_mba_mpbs), + {} +}; + +static const struct fs_parameter_description rdt_fs_parameters = { + .name = "rdt", + .specs = rdt_param_specs, +}; + +static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct rdt_fs_context *ctx = rdt_fc2context(fc); + struct fs_parse_result result; + int opt; + + opt = fs_parse(fc, &rdt_fs_parameters, param, &result); + if (opt < 0) + return opt; - return dentry; + switch (opt) { + case Opt_cdp: + ctx->enable_cdpl3 = true; + return 0; + case Opt_cdpl2: + ctx->enable_cdpl2 = true; + return 0; + case Opt_mba_mpbs: + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return -EINVAL; + ctx->enable_mba_mbps = true; + return 0; + } + + return -EINVAL; +} + +static void rdt_fs_context_free(struct fs_context *fc) +{ + struct rdt_fs_context *ctx = rdt_fc2context(fc); + + kernfs_free_fs_context(fc); + kfree(ctx); +} + +static const struct fs_context_operations rdt_fs_context_ops = { + .free = rdt_fs_context_free, + .parse_param = rdt_parse_param, + .get_tree = rdt_get_tree, +}; + +static int rdt_init_fs_context(struct fs_context *fc) +{ + struct rdt_fs_context *ctx; + + ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->kfc.root = rdt_root; + ctx->kfc.magic = RDTGROUP_SUPER_MAGIC; + fc->fs_private = &ctx->kfc; + fc->ops = &rdt_fs_context_ops; + if (fc->user_ns) + put_user_ns(fc->user_ns); + fc->user_ns = get_user_ns(&init_user_ns); + fc->global = true; + return 0; } static int reset_all_ctrls(struct rdt_resource *r) @@ -2239,9 +2285,10 @@ static void rdt_kill_sb(struct super_block *sb) } static struct file_system_type rdt_fs_type = { - .name = "resctrl", - .mount = rdt_mount, - .kill_sb = rdt_kill_sb, + .name = "resctrl", + .init_fs_context = rdt_init_fs_context, + .parameters = &rdt_fs_parameters, + .kill_sb = rdt_kill_sb, }; static int mon_addfile(struct kernfs_node *parent_kn, const char *name, diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index 3d83b114bb08..379e3a9eb1ec 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h @@ -17,6 +17,7 @@ #include #include +#include struct kernfs_iattrs { struct iattr ia_iattr; diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 4d303047a4f8..36376cc5c9c2 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -22,16 +22,6 @@ struct kmem_cache *kernfs_node_cache; -static int kernfs_sop_remount_fs(struct super_block *sb, int *flags, char *data) -{ - struct kernfs_root *root = kernfs_info(sb)->root; - struct kernfs_syscall_ops *scops = root->syscall_ops; - - if (scops && scops->remount_fs) - return scops->remount_fs(root, flags, data); - return 0; -} - static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry) { struct kernfs_root *root = kernfs_root(kernfs_dentry_node(dentry)); @@ -60,7 +50,6 @@ const struct super_operations kernfs_sops = { .drop_inode = generic_delete_inode, .evict_inode = kernfs_evict_inode, - .remount_fs = kernfs_sop_remount_fs, .show_options = kernfs_sop_show_options, .show_path = kernfs_sop_show_path, }; @@ -222,7 +211,7 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, } while (true); } -static int kernfs_fill_super(struct super_block *sb, unsigned long magic) +static int kernfs_fill_super(struct super_block *sb, struct kernfs_fs_context *kfc) { struct kernfs_super_info *info = kernfs_info(sb); struct inode *inode; @@ -233,7 +222,7 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic) sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV; sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; - sb->s_magic = magic; + sb->s_magic = kfc->magic; sb->s_op = &kernfs_sops; sb->s_xattr = kernfs_xattr_handlers; if (info->root->flags & KERNFS_ROOT_SUPPORT_EXPORTOP) @@ -263,21 +252,20 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic) return 0; } -static int kernfs_test_super(struct super_block *sb, void *data) +static int kernfs_test_super(struct super_block *sb, struct fs_context *fc) { struct kernfs_super_info *sb_info = kernfs_info(sb); - struct kernfs_super_info *info = data; + struct kernfs_super_info *info = fc->s_fs_info; return sb_info->root == info->root && sb_info->ns == info->ns; } -static int kernfs_set_super(struct super_block *sb, void *data) +static int kernfs_set_super(struct super_block *sb, struct fs_context *fc) { - int error; - error = set_anon_super(sb, data); - if (!error) - sb->s_fs_info = data; - return error; + struct kernfs_fs_context *kfc = fc->fs_private; + + kfc->ns_tag = NULL; + return set_anon_super_fc(sb, fc); } /** @@ -294,63 +282,60 @@ const void *kernfs_super_ns(struct super_block *sb) } /** - * kernfs_mount_ns - kernfs mount helper - * @fs_type: file_system_type of the fs being mounted - * @flags: mount flags specified for the mount - * @root: kernfs_root of the hierarchy being mounted - * @magic: file system specific magic number - * @new_sb_created: tell the caller if we allocated a new superblock - * @ns: optional namespace tag of the mount + * kernfs_get_tree - kernfs filesystem access/retrieval helper + * @fc: The filesystem context. * - * This is to be called from each kernfs user's file_system_type->mount() - * implementation, which should pass through the specified @fs_type and - * @flags, and specify the hierarchy and namespace tag to mount via @root - * and @ns, respectively. - * - * The return value can be passed to the vfs layer verbatim. + * This is to be called from each kernfs user's fs_context->ops->get_tree() + * implementation, which should set the specified ->@fs_type and ->@flags, and + * specify the hierarchy and namespace tag to mount via ->@root and ->@ns, + * respectively. */ -struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags, - struct kernfs_root *root, unsigned long magic, - bool *new_sb_created, const void *ns) +int kernfs_get_tree(struct fs_context *fc) { + struct kernfs_fs_context *kfc = fc->fs_private; struct super_block *sb; struct kernfs_super_info *info; int error; info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) - return ERR_PTR(-ENOMEM); + return -ENOMEM; - info->root = root; - info->ns = ns; + info->root = kfc->root; + info->ns = kfc->ns_tag; INIT_LIST_HEAD(&info->node); - sb = sget_userns(fs_type, kernfs_test_super, kernfs_set_super, flags, - &init_user_ns, info); - if (IS_ERR(sb) || sb->s_fs_info != info) - kfree(info); + fc->s_fs_info = info; + sb = sget_fc(fc, kernfs_test_super, kernfs_set_super); if (IS_ERR(sb)) - return ERR_CAST(sb); - - if (new_sb_created) - *new_sb_created = !sb->s_root; + return PTR_ERR(sb); if (!sb->s_root) { struct kernfs_super_info *info = kernfs_info(sb); - error = kernfs_fill_super(sb, magic); + kfc->new_sb_created = true; + + error = kernfs_fill_super(sb, kfc); if (error) { deactivate_locked_super(sb); - return ERR_PTR(error); + return error; } sb->s_flags |= SB_ACTIVE; mutex_lock(&kernfs_mutex); - list_add(&info->node, &root->supers); + list_add(&info->node, &info->root->supers); mutex_unlock(&kernfs_mutex); } - return dget(sb->s_root); + fc->root = dget(sb->s_root); + return 0; +} + +void kernfs_free_fs_context(struct fs_context *fc) +{ + /* Note that we don't deal with kfc->ns_tag here. */ + kfree(fc->s_fs_info); + fc->s_fs_info = NULL; } /** diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 92682fcc41f6..4cb21b558a85 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -13,34 +13,69 @@ #include #include #include +#include #include +#include +#include #include "sysfs.h" static struct kernfs_root *sysfs_root; struct kernfs_node *sysfs_root_kn; -static struct dentry *sysfs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int sysfs_get_tree(struct fs_context *fc) { - struct dentry *root; - void *ns; - bool new_sb = false; + struct kernfs_fs_context *kfc = fc->fs_private; + int ret; - if (!(flags & SB_KERNMOUNT)) { + ret = kernfs_get_tree(fc); + if (ret) + return ret; + + if (kfc->new_sb_created) + fc->root->d_sb->s_iflags |= SB_I_USERNS_VISIBLE; + return 0; +} + +static void sysfs_fs_context_free(struct fs_context *fc) +{ + struct kernfs_fs_context *kfc = fc->fs_private; + + if (kfc->ns_tag) + kobj_ns_drop(KOBJ_NS_TYPE_NET, kfc->ns_tag); + kernfs_free_fs_context(fc); + kfree(kfc); +} + +static const struct fs_context_operations sysfs_fs_context_ops = { + .free = sysfs_fs_context_free, + .get_tree = sysfs_get_tree, +}; + +static int sysfs_init_fs_context(struct fs_context *fc) +{ + struct kernfs_fs_context *kfc; + struct net *netns; + + if (!(fc->sb_flags & SB_KERNMOUNT)) { if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET)) - return ERR_PTR(-EPERM); + return -EPERM; } - ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); - root = kernfs_mount_ns(fs_type, flags, sysfs_root, - SYSFS_MAGIC, &new_sb, ns); - if (!new_sb) - kobj_ns_drop(KOBJ_NS_TYPE_NET, ns); - else if (!IS_ERR(root)) - root->d_sb->s_iflags |= SB_I_USERNS_VISIBLE; + kfc = kzalloc(sizeof(struct kernfs_fs_context), GFP_KERNEL); + if (!kfc) + return -ENOMEM; - return root; + kfc->ns_tag = netns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); + kfc->root = sysfs_root; + kfc->magic = SYSFS_MAGIC; + fc->fs_private = kfc; + fc->ops = &sysfs_fs_context_ops; + if (fc->user_ns) + put_user_ns(fc->user_ns); + fc->user_ns = get_user_ns(netns->user_ns); + fc->global = true; + return 0; } static void sysfs_kill_sb(struct super_block *sb) @@ -52,10 +87,10 @@ static void sysfs_kill_sb(struct super_block *sb) } static struct file_system_type sysfs_fs_type = { - .name = "sysfs", - .mount = sysfs_mount, - .kill_sb = sysfs_kill_sb, - .fs_flags = FS_USERNS_MOUNT, + .name = "sysfs", + .init_fs_context = sysfs_init_fs_context, + .kill_sb = sysfs_kill_sb, + .fs_flags = FS_USERNS_MOUNT, }; int __init sysfs_init(void) diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 44acb4c3659c..822a64e65b41 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -25,7 +25,9 @@ struct seq_file; struct vm_area_struct; struct super_block; struct file_system_type; +struct fs_context; +struct kernfs_fs_context; struct kernfs_open_node; struct kernfs_iattrs; @@ -167,7 +169,6 @@ struct kernfs_node { * kernfs_node parameter. */ struct kernfs_syscall_ops { - int (*remount_fs)(struct kernfs_root *root, int *flags, char *data); int (*show_options)(struct seq_file *sf, struct kernfs_root *root); int (*mkdir)(struct kernfs_node *parent, const char *name, @@ -268,6 +269,18 @@ struct kernfs_ops { #endif }; +/* + * The kernfs superblock creation/mount parameter context. + */ +struct kernfs_fs_context { + struct kernfs_root *root; /* Root of the hierarchy being mounted */ + void *ns_tag; /* Namespace tag of the mount (or NULL) */ + unsigned long magic; /* File system specific magic number */ + + /* The following are set/used by kernfs_mount() */ + bool new_sb_created; /* Set to T if we allocated a new sb */ +}; + #ifdef CONFIG_KERNFS static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn) @@ -353,9 +366,8 @@ int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr); void kernfs_notify(struct kernfs_node *kn); const void *kernfs_super_ns(struct super_block *sb); -struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags, - struct kernfs_root *root, unsigned long magic, - bool *new_sb_created, const void *ns); +int kernfs_get_tree(struct fs_context *fc); +void kernfs_free_fs_context(struct fs_context *fc); void kernfs_kill_sb(struct super_block *sb); void kernfs_init(void); @@ -458,11 +470,10 @@ static inline void kernfs_notify(struct kernfs_node *kn) { } static inline const void *kernfs_super_ns(struct super_block *sb) { return NULL; } -static inline struct dentry * -kernfs_mount_ns(struct file_system_type *fs_type, int flags, - struct kernfs_root *root, unsigned long magic, - bool *new_sb_created, const void *ns) -{ return ERR_PTR(-ENOSYS); } +static inline int kernfs_get_tree(struct fs_context *fc) +{ return -ENOSYS; } + +static inline void kernfs_free_fs_context(struct fs_context *fc) { } static inline void kernfs_kill_sb(struct super_block *sb) { } @@ -545,13 +556,4 @@ static inline int kernfs_rename(struct kernfs_node *kn, return kernfs_rename_ns(kn, new_parent, new_name, NULL); } -static inline struct dentry * -kernfs_mount(struct file_system_type *fs_type, int flags, - struct kernfs_root *root, unsigned long magic, - bool *new_sb_created) -{ - return kernfs_mount_ns(fs_type, flags, root, - magic, new_sb_created, NULL); -} - #endif /* __LINUX_KERNFS_H */ diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 37cf709b7a0e..30e39f3932ad 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -41,6 +41,7 @@ extern void __init enable_debug_cgroup(void); * The cgroup filesystem superblock creation/mount context. */ struct cgroup_fs_context { + struct kernfs_fs_context kfc; struct cgroup_root *root; struct cgroup_namespace *ns; unsigned int flags; /* CGRP_ROOT_* flags */ @@ -56,7 +57,9 @@ struct cgroup_fs_context { static inline struct cgroup_fs_context *cgroup_fc2context(struct fs_context *fc) { - return fc->fs_private; + struct kernfs_fs_context *kfc = fc->fs_private; + + return container_of(kfc, struct cgroup_fs_context, kfc); } /* diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 0c6bef234a7c..747e5b17f9da 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2039,18 +2039,14 @@ out: int cgroup_do_get_tree(struct fs_context *fc) { struct cgroup_fs_context *ctx = cgroup_fc2context(fc); - bool new_sb = false; - unsigned long magic; - int ret = 0; + int ret; + ctx->kfc.root = ctx->root->kf_root; if (fc->fs_type == &cgroup2_fs_type) - magic = CGROUP2_SUPER_MAGIC; + ctx->kfc.magic = CGROUP2_SUPER_MAGIC; else - magic = CGROUP_SUPER_MAGIC; - fc->root = kernfs_mount(fc->fs_type, fc->sb_flags, ctx->root->kf_root, - magic, &new_sb); - if (IS_ERR(fc->root)) - ret = PTR_ERR(fc->root); + ctx->kfc.magic = CGROUP_SUPER_MAGIC; + ret = kernfs_get_tree(fc); /* * In non-init cgroup namespace, instead of root cgroup's dentry, @@ -2078,7 +2074,7 @@ int cgroup_do_get_tree(struct fs_context *fc) } } - if (!new_sb) + if (!ctx->kfc.new_sb_created) cgroup_put(&ctx->root->cgrp); return ret; @@ -2094,19 +2090,15 @@ static void cgroup_fs_context_free(struct fs_context *fc) kfree(ctx->name); kfree(ctx->release_agent); put_cgroup_ns(ctx->ns); + kernfs_free_fs_context(fc); kfree(ctx); } static int cgroup_get_tree(struct fs_context *fc) { - struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; struct cgroup_fs_context *ctx = cgroup_fc2context(fc); int ret; - /* Check if the caller has permission to mount. */ - if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) - return -EPERM; - cgrp_dfl_visible = true; cgroup_get_live(&cgrp_dfl_root.cgrp); ctx->root = &cgrp_dfl_root; @@ -2132,7 +2124,8 @@ static const struct fs_context_operations cgroup1_fs_context_ops = { }; /* - * Initialise the cgroup filesystem creation/reconfiguration context. + * Initialise the cgroup filesystem creation/reconfiguration context. Notably, + * we select the namespace we're going to use. */ static int cgroup_init_fs_context(struct fs_context *fc) { @@ -2151,11 +2144,15 @@ static int cgroup_init_fs_context(struct fs_context *fc) ctx->ns = current->nsproxy->cgroup_ns; get_cgroup_ns(ctx->ns); - fc->fs_private = ctx; + fc->fs_private = &ctx->kfc; if (fc->fs_type == &cgroup2_fs_type) fc->ops = &cgroup_fs_context_ops; else fc->ops = &cgroup1_fs_context_ops; + if (fc->user_ns) + put_user_ns(fc->user_ns); + fc->user_ns = get_user_ns(ctx->ns->user_ns); + fc->global = true; return 0; } -- cgit v1.2.3-55-g7522 From a18753747385b8b98577a18adc8ec99fda679044 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 1 Nov 2018 23:07:25 +0000 Subject: cpuset: Use fs_context Make the cpuset filesystem use the filesystem context. This is potentially tricky as the cpuset fs is almost an alias for the cgroup filesystem, but with some special parameters. This can, however, be handled by setting up an appropriate cgroup filesystem and returning the root directory of that as the root dir of this one. Signed-off-by: David Howells cc: Tejun Heo Signed-off-by: Al Viro --- kernel/cgroup/cpuset.c | 56 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 14 deletions(-) (limited to 'kernel/cgroup') diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 479743db6c37..9758b03834ac 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -372,25 +373,52 @@ static inline bool is_in_v2_mode(void) * users. If someone tries to mount the "cpuset" filesystem, we * silently switch it to mount "cgroup" instead */ -static struct dentry *cpuset_mount(struct file_system_type *fs_type, - int flags, const char *unused_dev_name, void *data) -{ - struct file_system_type *cgroup_fs = get_fs_type("cgroup"); - struct dentry *ret = ERR_PTR(-ENODEV); - if (cgroup_fs) { - char mountopts[] = - "cpuset,noprefix," - "release_agent=/sbin/cpuset_release_agent"; - ret = cgroup_fs->mount(cgroup_fs, flags, - unused_dev_name, mountopts); - put_filesystem(cgroup_fs); +static int cpuset_get_tree(struct fs_context *fc) +{ + struct file_system_type *cgroup_fs; + struct fs_context *new_fc; + int ret; + + cgroup_fs = get_fs_type("cgroup"); + if (!cgroup_fs) + return -ENODEV; + + new_fc = fs_context_for_mount(cgroup_fs, fc->sb_flags); + if (IS_ERR(new_fc)) { + ret = PTR_ERR(new_fc); + } else { + static const char agent_path[] = "/sbin/cpuset_release_agent"; + ret = vfs_parse_fs_string(new_fc, "cpuset", NULL, 0); + if (!ret) + ret = vfs_parse_fs_string(new_fc, "noprefix", NULL, 0); + if (!ret) + ret = vfs_parse_fs_string(new_fc, "release_agent", + agent_path, sizeof(agent_path) - 1); + if (!ret) + ret = vfs_get_tree(new_fc); + if (!ret) { /* steal the result */ + fc->root = new_fc->root; + new_fc->root = NULL; + } + put_fs_context(new_fc); } + put_filesystem(cgroup_fs); return ret; } +static const struct fs_context_operations cpuset_fs_context_ops = { + .get_tree = cpuset_get_tree, +}; + +static int cpuset_init_fs_context(struct fs_context *fc) +{ + fc->ops = &cpuset_fs_context_ops; + return 0; +} + static struct file_system_type cpuset_fs_type = { - .name = "cpuset", - .mount = cpuset_mount, + .name = "cpuset", + .init_fs_context = cpuset_init_fs_context, }; /* -- cgit v1.2.3-55-g7522 From 06a2ae56b5b88fa57cd56e0b99bd874135efdf58 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 1 Nov 2018 23:07:26 +0000 Subject: vfs: Add some logging to the core users of the fs_context log Add some logging to the core users of the fs_context log so that information can be extracted from them as to the reason for failure. Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/super.c | 4 +++- kernel/cgroup/cgroup-v1.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel/cgroup') diff --git a/fs/super.c b/fs/super.c index 0ebb5c11fa56..583a0124bc39 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1467,8 +1467,10 @@ int vfs_get_tree(struct fs_context *fc) struct super_block *sb; int error; - if (fc->fs_type->fs_flags & FS_REQUIRES_DEV && !fc->source) + if (fc->fs_type->fs_flags & FS_REQUIRES_DEV && !fc->source) { + errorf(fc, "Filesystem requires source device"); return -ENOENT; + } if (fc->root) return -EBUSY; diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 571ef3447426..c126b34fd4ff 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -17,7 +17,7 @@ #include -#define cg_invalf(fc, fmt, ...) ({ pr_err(fmt, ## __VA_ARGS__); -EINVAL; }) +#define cg_invalf(fc, fmt, ...) invalf(fc, fmt, ## __VA_ARGS__) /* * pidlists linger the following amount before being destroyed. The goal -- cgit v1.2.3-55-g7522