diff options
Diffstat (limited to 'kernel/time/timer.c')
-rw-r--r-- | kernel/time/timer.c | 102 |
1 files changed, 67 insertions, 35 deletions
diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 152a706ef8b8..f2674a056c26 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -195,7 +195,7 @@ EXPORT_SYMBOL(jiffies_64); #endif struct timer_base { - spinlock_t lock; + raw_spinlock_t lock; struct timer_list *running_timer; unsigned long clk; unsigned long next_expiry; @@ -203,6 +203,7 @@ struct timer_base { bool migration_enabled; bool nohz_active; bool is_idle; + bool must_forward_clk; DECLARE_BITMAP(pending_map, WHEEL_SIZE); struct hlist_head vectors[WHEEL_SIZE]; } ____cacheline_aligned; @@ -856,13 +857,19 @@ get_target_base(struct timer_base *base, unsigned tflags) static inline void forward_timer_base(struct timer_base *base) { - unsigned long jnow = READ_ONCE(jiffies); + unsigned long jnow; /* - * We only forward the base when it's idle and we have a delta between - * base clock and jiffies. + * We only forward the base when we are idle or have just come out of + * idle (must_forward_clk logic), and have a delta between base clock + * and jiffies. In the common case, run_timers will take care of it. */ - if (!base->is_idle || (long) (jnow - base->clk) < 2) + if (likely(!base->must_forward_clk)) + return; + + jnow = READ_ONCE(jiffies); + base->must_forward_clk = base->is_idle; + if ((long)(jnow - base->clk) < 2) return; /* @@ -913,10 +920,10 @@ static struct timer_base *lock_timer_base(struct timer_list *timer, if (!(tf & TIMER_MIGRATING)) { base = get_timer_base(tf); - spin_lock_irqsave(&base->lock, *flags); + raw_spin_lock_irqsave(&base->lock, *flags); if (timer->flags == tf) return base; - spin_unlock_irqrestore(&base->lock, *flags); + raw_spin_unlock_irqrestore(&base->lock, *flags); } cpu_relax(); } @@ -938,6 +945,11 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) * same array bucket then just return: */ if (timer_pending(timer)) { + /* + * The downside of this optimization is that it can result in + * larger granularity than you would get from adding a new + * timer with this expiry. + */ if (timer->expires == expires) return 1; @@ -948,6 +960,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) * dequeue/enqueue dance. */ base = lock_timer_base(timer, &flags); + forward_timer_base(base); clk = base->clk; idx = calc_wheel_index(expires, clk); @@ -964,6 +977,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) } } else { base = lock_timer_base(timer, &flags); + forward_timer_base(base); } ret = detach_if_pending(timer, base, false); @@ -986,17 +1000,15 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) /* See the comment in lock_timer_base() */ timer->flags |= TIMER_MIGRATING; - spin_unlock(&base->lock); + raw_spin_unlock(&base->lock); base = new_base; - spin_lock(&base->lock); + raw_spin_lock(&base->lock); WRITE_ONCE(timer->flags, (timer->flags & ~TIMER_BASEMASK) | base->cpu); + forward_timer_base(base); } } - /* Try to forward a stale timer base clock */ - forward_timer_base(base); - timer->expires = expires; /* * If 'idx' was calculated above and the base time did not advance @@ -1013,7 +1025,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) } out_unlock: - spin_unlock_irqrestore(&base->lock, flags); + raw_spin_unlock_irqrestore(&base->lock, flags); return ret; } @@ -1106,16 +1118,17 @@ void add_timer_on(struct timer_list *timer, int cpu) if (base != new_base) { timer->flags |= TIMER_MIGRATING; - spin_unlock(&base->lock); + raw_spin_unlock(&base->lock); base = new_base; - spin_lock(&base->lock); + raw_spin_lock(&base->lock); WRITE_ONCE(timer->flags, (timer->flags & ~TIMER_BASEMASK) | cpu); } + forward_timer_base(base); debug_activate(timer, timer->expires); internal_add_timer(base, timer); - spin_unlock_irqrestore(&base->lock, flags); + raw_spin_unlock_irqrestore(&base->lock, flags); } EXPORT_SYMBOL_GPL(add_timer_on); @@ -1141,7 +1154,7 @@ int del_timer(struct timer_list *timer) if (timer_pending(timer)) { base = lock_timer_base(timer, &flags); ret = detach_if_pending(timer, base, true); - spin_unlock_irqrestore(&base->lock, flags); + raw_spin_unlock_irqrestore(&base->lock, flags); } return ret; @@ -1150,7 +1163,7 @@ EXPORT_SYMBOL(del_timer); /** * try_to_del_timer_sync - Try to deactivate a timer - * @timer: timer do del + * @timer: timer to delete * * This function tries to deactivate a timer. Upon successful (ret >= 0) * exit the timer is not queued and the handler is not running on any CPU. @@ -1168,7 +1181,7 @@ int try_to_del_timer_sync(struct timer_list *timer) if (base->running_timer != timer) ret = detach_if_pending(timer, base, true); - spin_unlock_irqrestore(&base->lock, flags); + raw_spin_unlock_irqrestore(&base->lock, flags); return ret; } @@ -1299,13 +1312,13 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head) data = timer->data; if (timer->flags & TIMER_IRQSAFE) { - spin_unlock(&base->lock); + raw_spin_unlock(&base->lock); call_timer_fn(timer, fn, data); - spin_lock(&base->lock); + raw_spin_lock(&base->lock); } else { - spin_unlock_irq(&base->lock); + raw_spin_unlock_irq(&base->lock); call_timer_fn(timer, fn, data); - spin_lock_irq(&base->lock); + raw_spin_lock_irq(&base->lock); } } } @@ -1474,7 +1487,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) if (cpu_is_offline(smp_processor_id())) return expires; - spin_lock(&base->lock); + raw_spin_lock(&base->lock); nextevt = __next_timer_interrupt(base); is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA); base->next_expiry = nextevt; @@ -1495,14 +1508,20 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) base->is_idle = false; } else { if (!is_max_delta) - expires = basem + (nextevt - basej) * TICK_NSEC; + expires = basem + (u64)(nextevt - basej) * TICK_NSEC; /* - * If we expect to sleep more than a tick, mark the base idle: + * If we expect to sleep more than a tick, mark the base idle. + * Also the tick is stopped so any added timer must forward + * the base clk itself to keep granularity small. This idle + * logic is only maintained for the BASE_STD base, deferrable + * timers may still see large granularity skew (by design). */ - if ((expires - basem) > TICK_NSEC) + if ((expires - basem) > TICK_NSEC) { + base->must_forward_clk = true; base->is_idle = true; + } } - spin_unlock(&base->lock); + raw_spin_unlock(&base->lock); return cmp_next_hrtimer_event(basem, expires); } @@ -1590,7 +1609,7 @@ static inline void __run_timers(struct timer_base *base) if (!time_after_eq(jiffies, base->clk)) return; - spin_lock_irq(&base->lock); + raw_spin_lock_irq(&base->lock); while (time_after_eq(jiffies, base->clk)) { @@ -1601,7 +1620,7 @@ static inline void __run_timers(struct timer_base *base) expire_timers(base, heads + levels); } base->running_timer = NULL; - spin_unlock_irq(&base->lock); + raw_spin_unlock_irq(&base->lock); } /* @@ -1611,6 +1630,19 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h) { struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); + /* + * must_forward_clk must be cleared before running timers so that any + * timer functions that call mod_timer will not try to forward the + * base. idle trcking / clock forwarding logic is only used with + * BASE_STD timers. + * + * The deferrable base does not do idle tracking at all, so we do + * not forward it. This can result in very large variations in + * granularity for deferrable timers, but they can be deferred for + * long periods due to idle. + */ + base->must_forward_clk = false; + __run_timers(base); if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); @@ -1786,16 +1818,16 @@ int timers_dead_cpu(unsigned int cpu) * The caller is globally serialized and nobody else * takes two locks at once, deadlock is not possible. */ - spin_lock_irq(&new_base->lock); - spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); + raw_spin_lock_irq(&new_base->lock); + raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); BUG_ON(old_base->running_timer); for (i = 0; i < WHEEL_SIZE; i++) migrate_timer_list(new_base, old_base->vectors + i); - spin_unlock(&old_base->lock); - spin_unlock_irq(&new_base->lock); + raw_spin_unlock(&old_base->lock); + raw_spin_unlock_irq(&new_base->lock); put_cpu_ptr(&timer_bases); } return 0; @@ -1811,7 +1843,7 @@ static void __init init_timer_cpu(int cpu) for (i = 0; i < NR_BASES; i++) { base = per_cpu_ptr(&timer_bases[i], cpu); base->cpu = cpu; - spin_lock_init(&base->lock); + raw_spin_lock_init(&base->lock); base->clk = jiffies; } } |