summaryrefslogtreecommitdiffstats
path: root/kernel/cgroup/cpuset.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cgroup/cpuset.c')
-rw-r--r--kernel/cgroup/cpuset.c99
1 files changed, 63 insertions, 36 deletions
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 479743db6c37..863e434a6020 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -39,6 +39,7 @@
#include <linux/memory.h>
#include <linux/export.h>
#include <linux/mount.h>
+#include <linux/fs_context.h>
#include <linux/namei.h>
#include <linux/pagemap.h>
#include <linux/proc_fs.h>
@@ -203,19 +204,6 @@ static inline struct cpuset *parent_cs(struct cpuset *cs)
return css_cs(cs->css.parent);
}
-#ifdef CONFIG_NUMA
-static inline bool task_has_mempolicy(struct task_struct *task)
-{
- return task->mempolicy;
-}
-#else
-static inline bool task_has_mempolicy(struct task_struct *task)
-{
- return false;
-}
-#endif
-
-
/* bits in struct cpuset flags field */
typedef enum {
CS_ONLINE,
@@ -372,25 +360,52 @@ static inline bool is_in_v2_mode(void)
* users. If someone tries to mount the "cpuset" filesystem, we
* silently switch it to mount "cgroup" instead
*/
-static struct dentry *cpuset_mount(struct file_system_type *fs_type,
- int flags, const char *unused_dev_name, void *data)
-{
- struct file_system_type *cgroup_fs = get_fs_type("cgroup");
- struct dentry *ret = ERR_PTR(-ENODEV);
- if (cgroup_fs) {
- char mountopts[] =
- "cpuset,noprefix,"
- "release_agent=/sbin/cpuset_release_agent";
- ret = cgroup_fs->mount(cgroup_fs, flags,
- unused_dev_name, mountopts);
- put_filesystem(cgroup_fs);
+static int cpuset_get_tree(struct fs_context *fc)
+{
+ struct file_system_type *cgroup_fs;
+ struct fs_context *new_fc;
+ int ret;
+
+ cgroup_fs = get_fs_type("cgroup");
+ if (!cgroup_fs)
+ return -ENODEV;
+
+ new_fc = fs_context_for_mount(cgroup_fs, fc->sb_flags);
+ if (IS_ERR(new_fc)) {
+ ret = PTR_ERR(new_fc);
+ } else {
+ static const char agent_path[] = "/sbin/cpuset_release_agent";
+ ret = vfs_parse_fs_string(new_fc, "cpuset", NULL, 0);
+ if (!ret)
+ ret = vfs_parse_fs_string(new_fc, "noprefix", NULL, 0);
+ if (!ret)
+ ret = vfs_parse_fs_string(new_fc, "release_agent",
+ agent_path, sizeof(agent_path) - 1);
+ if (!ret)
+ ret = vfs_get_tree(new_fc);
+ if (!ret) { /* steal the result */
+ fc->root = new_fc->root;
+ new_fc->root = NULL;
+ }
+ put_fs_context(new_fc);
}
+ put_filesystem(cgroup_fs);
return ret;
}
+static const struct fs_context_operations cpuset_fs_context_ops = {
+ .get_tree = cpuset_get_tree,
+};
+
+static int cpuset_init_fs_context(struct fs_context *fc)
+{
+ fc->ops = &cpuset_fs_context_ops;
+ return 0;
+}
+
static struct file_system_type cpuset_fs_type = {
- .name = "cpuset",
- .mount = cpuset_mount,
+ .name = "cpuset",
+ .init_fs_context = cpuset_init_fs_context,
};
/*
@@ -714,7 +729,7 @@ static inline int nr_cpusets(void)
* load balancing domains (sched domains) as specified by that partial
* partition.
*
- * See "What is sched_load_balance" in Documentation/cgroup-v1/cpusets.txt
+ * See "What is sched_load_balance" in Documentation/admin-guide/cgroup-v1/cpusets.rst
* for a background explanation of this.
*
* Does not return errors, on the theory that the callers of this
@@ -725,11 +740,10 @@ static inline int nr_cpusets(void)
* Must be called with cpuset_mutex held.
*
* The three key local variables below are:
- * q - a linked-list queue of cpuset pointers, used to implement a
- * top-down scan of all cpusets. This scan loads a pointer
- * to each cpuset marked is_sched_load_balance into the
- * array 'csa'. For our purposes, rebuilding the schedulers
- * sched domains, we can ignore !is_sched_load_balance cpusets.
+ * cp - cpuset pointer, used (together with pos_css) to perform a
+ * top-down scan of all cpusets. For our purposes, rebuilding
+ * the schedulers sched domains, we can ignore !is_sched_load_
+ * balance cpusets.
* csa - (for CpuSet Array) Array of pointers to all the cpusets
* that need to be load balanced, for convenient iterative
* access by the subsequent code that finds the best partition,
@@ -760,7 +774,7 @@ static inline int nr_cpusets(void)
static int generate_sched_domains(cpumask_var_t **domains,
struct sched_domain_attr **attributes)
{
- struct cpuset *cp; /* scans q */
+ struct cpuset *cp; /* top-down scan of cpusets */
struct cpuset **csa; /* array of all cpuset ptrs */
int csn; /* how many cpuset ptrs in csa so far */
int i, j, k; /* indices for partition finding loops */
@@ -2815,7 +2829,7 @@ static void cpuset_fork(struct task_struct *task)
if (task_css_is_root(task, cpuset_cgrp_id))
return;
- set_cpus_allowed_ptr(task, &current->cpus_allowed);
+ set_cpus_allowed_ptr(task, current->cpus_ptr);
task->mems_allowed = current->mems_allowed;
}
@@ -3240,10 +3254,23 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
spin_unlock_irqrestore(&callback_lock, flags);
}
+/**
+ * cpuset_cpus_allowed_fallback - final fallback before complete catastrophe.
+ * @tsk: pointer to task_struct with which the scheduler is struggling
+ *
+ * Description: In the case that the scheduler cannot find an allowed cpu in
+ * tsk->cpus_allowed, we fall back to task_cs(tsk)->cpus_allowed. In legacy
+ * mode however, this value is the same as task_cs(tsk)->effective_cpus,
+ * which will not contain a sane cpumask during cases such as cpu hotplugging.
+ * This is the absolute last resort for the scheduler and it is only used if
+ * _every_ other avenue has been traveled.
+ **/
+
void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
{
rcu_read_lock();
- do_set_cpus_allowed(tsk, task_cs(tsk)->effective_cpus);
+ do_set_cpus_allowed(tsk, is_in_v2_mode() ?
+ task_cs(tsk)->cpus_allowed : cpu_possible_mask);
rcu_read_unlock();
/*