From a2d477778e82a60a0b7114cefdb70aa43af28782 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Wed, 12 Nov 2008 16:19:00 +0530 Subject: sched: fix stale value in average load per task Impact: fix load balancer load average calculation accuracy cpu_avg_load_per_task() returns a stale value when nr_running is 0. It returns an older stale (caculated when nr_running was non zero) value. This patch returns and sets rq->avg_load_per_task to zero when nr_running is 0. Compile and boot tested on a x86_64 box. Signed-off-by: Balbir Singh Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 50a21f964679..3bafbe350f4f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1456,6 +1456,8 @@ static unsigned long cpu_avg_load_per_task(int cpu) if (rq->nr_running) rq->avg_load_per_task = rq->load.weight / rq->nr_running; + else + rq->avg_load_per_task = 0; return rq->avg_load_per_task; } -- cgit v1.2.3-55-g7522 From 5cbd54ef470d880fc37fbe4b21eb514806d51e0d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 12 Nov 2008 20:05:50 +0100 Subject: sched: fix init_idle()'s use of sched_clock() Maciej Rutecki reported: > I have this bug during suspend to disk: > > [ 188.592151] Enabling non-boot CPUs ... > [ 188.592151] SMP alternatives: switching to SMP code > [ 188.666058] BUG: using smp_processor_id() in preemptible > [00000000] > code: suspend_to_disk/2934 > [ 188.666064] caller is native_sched_clock+0x2b/0x80 Which, as noted by Linus, was caused by me, via: 7cbaef9c "sched: optimize sched_clock() a bit" Move the rq locking a bit earlier in the initialization sequence, that will make the sched_clock() call in init_idle() non-preemptible. Reported-by: Maciej Rutecki Signed-off-by: Ingo Molnar --- kernel/sched.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 3bafbe350f4f..c94baf2969e7 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5870,6 +5870,8 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) struct rq *rq = cpu_rq(cpu); unsigned long flags; + spin_lock_irqsave(&rq->lock, flags); + __sched_fork(idle); idle->se.exec_start = sched_clock(); @@ -5877,7 +5879,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) idle->cpus_allowed = cpumask_of_cpu(cpu); __set_task_cpu(idle, cpu); - spin_lock_irqsave(&rq->lock, flags); rq->curr = rq->idle = idle; #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) idle->oncpu = 1; -- cgit v1.2.3-55-g7522 From 700018e0a77b4113172257fcdaa1c58e27a5074f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 18 Nov 2008 14:02:03 +0800 Subject: cpuset: fix regression when failed to generate sched domains Impact: properly rebuild sched-domains on kmalloc() failure When cpuset failed to generate sched domains due to kmalloc() failure, the scheduler should fallback to the single partition 'fallback_doms' and rebuild sched domains, but now it only destroys but not rebuilds sched domains. The regression was introduced by: | commit dfb512ec4834116124da61d6c1ee10fd0aa32bd6 | Author: Max Krasnyansky | Date: Fri Aug 29 13:11:41 2008 -0700 | | sched: arch_reinit_sched_domains() must destroy domains to force rebuild After the above commit, partition_sched_domains(0, NULL, NULL) will only destroy sched domains and partition_sched_domains(1, NULL, NULL) will create the default sched domain. Signed-off-by: Li Zefan Cc: Max Krasnyansky Cc: Signed-off-by: Ingo Molnar --- kernel/cpuset.c | 12 ++++++++---- kernel/sched.c | 13 +++++++------ 2 files changed, 15 insertions(+), 10 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 3e00526f52ec..81fc6791a296 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -587,7 +587,6 @@ static int generate_sched_domains(cpumask_t **domains, int ndoms; /* number of sched domains in result */ int nslot; /* next empty doms[] cpumask_t slot */ - ndoms = 0; doms = NULL; dattr = NULL; csa = NULL; @@ -674,10 +673,8 @@ restart: * Convert to and populate cpu masks. */ doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL); - if (!doms) { - ndoms = 0; + if (!doms) goto done; - } /* * The rest of the code, including the scheduler, can deal with @@ -732,6 +729,13 @@ restart: done: kfree(csa); + /* + * Fallback to the default domain if kmalloc() failed. + * See comments in partition_sched_domains(). + */ + if (doms == NULL) + ndoms = 1; + *domains = doms; *attributes = dattr; return ndoms; diff --git a/kernel/sched.c b/kernel/sched.c index c94baf2969e7..9b1e79371c20 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7789,13 +7789,14 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, * * The passed in 'doms_new' should be kmalloc'd. This routine takes * ownership of it and will kfree it when done with it. If the caller - * failed the kmalloc call, then it can pass in doms_new == NULL, - * and partition_sched_domains() will fallback to the single partition - * 'fallback_doms', it also forces the domains to be rebuilt. + * failed the kmalloc call, then it can pass in doms_new == NULL && + * ndoms_new == 1, and partition_sched_domains() will fallback to + * the single partition 'fallback_doms', it also forces the domains + * to be rebuilt. * - * If doms_new==NULL it will be replaced with cpu_online_map. - * ndoms_new==0 is a special case for destroying existing domains. - * It will not create the default domain. + * If doms_new == NULL it will be replaced with cpu_online_map. + * ndoms_new == 0 is a special case for destroying existing domains, + * and it will not create the default domain. * * Call with hotplug lock held */ -- cgit v1.2.3-55-g7522