From a4ca26a43e39d521b3913f09faf82dfbbbca5f6a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 11 Jan 2013 13:37:23 +0100 Subject: PM / Domains: don't use [delayed_]work_pending() There's no need to test whether a (delayed) work item is pending before queueing, flushing or cancelling it, so remove work_pending() tests used in those cases. Signed-off-by: Tejun Heo Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index acc3a8ded29d..9a6b05a35603 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -433,8 +433,7 @@ static bool genpd_abort_poweroff(struct generic_pm_domain *genpd) */ void genpd_queue_power_off_work(struct generic_pm_domain *genpd) { - if (!work_pending(&genpd->power_off_work)) - queue_work(pm_wq, &genpd->power_off_work); + queue_work(pm_wq, &genpd->power_off_work); } /** -- cgit v1.2.3-55-g7522 From ed1ac6e91a3ff7c561008ba57747cd6cbc49385e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 11 Jan 2013 13:37:33 +0100 Subject: PM: don't use [delayed_]work_pending() There's no need to test whether a (delayed) work item is pending before queueing, flushing or cancelling it, so remove work_pending() tests used in those cases. Signed-off-by: Tejun Heo Signed-off-by: Rafael J. Wysocki --- kernel/power/autosleep.c | 2 +- kernel/power/qos.c | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/kernel/power/autosleep.c b/kernel/power/autosleep.c index ca304046d9e2..c6422ffeda9a 100644 --- a/kernel/power/autosleep.c +++ b/kernel/power/autosleep.c @@ -66,7 +66,7 @@ static DECLARE_WORK(suspend_work, try_to_suspend); void queue_up_suspend_work(void) { - if (!work_pending(&suspend_work) && autosleep_state > PM_SUSPEND_ON) + if (autosleep_state > PM_SUSPEND_ON) queue_work(autosleep_wq, &suspend_work); } diff --git a/kernel/power/qos.c b/kernel/power/qos.c index 9322ff7eaad6..587dddeebf15 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c @@ -359,8 +359,7 @@ void pm_qos_update_request(struct pm_qos_request *req, return; } - if (delayed_work_pending(&req->work)) - cancel_delayed_work_sync(&req->work); + cancel_delayed_work_sync(&req->work); if (new_value != req->node.prio) pm_qos_update_target( @@ -386,8 +385,7 @@ void pm_qos_update_request_timeout(struct pm_qos_request *req, s32 new_value, "%s called for unknown object.", __func__)) return; - if (delayed_work_pending(&req->work)) - cancel_delayed_work_sync(&req->work); + cancel_delayed_work_sync(&req->work); if (new_value != req->node.prio) pm_qos_update_target( @@ -416,8 +414,7 @@ void pm_qos_remove_request(struct pm_qos_request *req) return; } - if (delayed_work_pending(&req->work)) - cancel_delayed_work_sync(&req->work); + cancel_delayed_work_sync(&req->work); pm_qos_update_target(pm_qos_array[req->pm_qos_class]->constraints, &req->node, PM_QOS_REMOVE_REQ, -- cgit v1.2.3-55-g7522 From 43720bd6014327ac454434496cb953edcdb9f8d6 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Fri, 11 Jan 2013 13:43:45 +0100 Subject: PM / tracing: remove deprecated power trace API The text in Documentation said it would be removed in 2.6.41; the text in the Kconfig said removal in the 3.1 release. Either way you look at it, we are well past both, so push it off a cliff. Note that the POWER_CSTATE and the POWER_PSTATE are part of the legacy tracing API. Remove all tracepoints which use these flags. As can be seen from context, most already have a trace entry via trace_cpu_idle anyways. Also, the cpufreq/cpufreq.c PSTATE one is actually unpaired, as compared to the CSTATE ones which all have a clear start/stop. As part of this, the trace_power_frequency also becomes orphaned, so it too is deleted. Signed-off-by: Paul Gortmaker Acked-by: Steven Rostedt Signed-off-by: Rafael J. Wysocki --- Documentation/trace/events-power.txt | 27 +---------- arch/arm/mach-omap2/pm34xx.c | 2 - arch/x86/kernel/process.c | 6 --- drivers/cpufreq/cpufreq.c | 1 - drivers/cpuidle/cpuidle.c | 2 - include/trace/events/power.h | 92 ------------------------------------ kernel/trace/Kconfig | 15 ------ kernel/trace/power-traces.c | 3 -- 8 files changed, 1 insertion(+), 147 deletions(-) diff --git a/Documentation/trace/events-power.txt b/Documentation/trace/events-power.txt index cf794af22855..e1498ff8cf94 100644 --- a/Documentation/trace/events-power.txt +++ b/Documentation/trace/events-power.txt @@ -17,7 +17,7 @@ Cf. include/trace/events/power.h for the events definitions. 1. Power state switch events ============================ -1.1 New trace API +1.1 Trace API ----------------- A 'cpu' event class gathers the CPU-related events: cpuidle and @@ -41,31 +41,6 @@ The event which has 'state=4294967295' in the trace is very important to the use space tools which are using it to detect the end of the current state, and so to correctly draw the states diagrams and to calculate accurate statistics etc. -1.2 DEPRECATED trace API ------------------------- - -A new Kconfig option CONFIG_EVENT_POWER_TRACING_DEPRECATED with the default value of -'y' has been created. This allows the legacy trace power API to be used conjointly -with the new trace API. -The Kconfig option, the old trace API (in include/trace/events/power.h) and the -old trace points will disappear in a future release (namely 2.6.41). - -power_start "type=%lu state=%lu cpu_id=%lu" -power_frequency "type=%lu state=%lu cpu_id=%lu" -power_end "cpu_id=%lu" - -The 'type' parameter takes one of those macros: - . POWER_NONE = 0, - . POWER_CSTATE = 1, /* C-State */ - . POWER_PSTATE = 2, /* Frequency change or DVFS */ - -The 'state' parameter is set depending on the type: - . Target C-state for type=POWER_CSTATE, - . Target frequency for type=POWER_PSTATE, - -power_end is used to indicate the exit of a state, corresponding to the latest -power_start event. - 2. Clocks events ================ The clock events are used for clock enable/disable and for diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c index 7be3622cfc85..2d93d8b23835 100644 --- a/arch/arm/mach-omap2/pm34xx.c +++ b/arch/arm/mach-omap2/pm34xx.c @@ -351,12 +351,10 @@ static void omap3_pm_idle(void) if (omap_irq_pending()) goto out; - trace_power_start(POWER_CSTATE, 1, smp_processor_id()); trace_cpu_idle(1, smp_processor_id()); omap_sram_idle(); - trace_power_end(smp_processor_id()); trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); out: diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 2ed787f15bf0..dcfc1f410dc4 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -375,7 +375,6 @@ void cpu_idle(void) */ void default_idle(void) { - trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id()); trace_cpu_idle_rcuidle(1, smp_processor_id()); current_thread_info()->status &= ~TS_POLLING; /* @@ -389,7 +388,6 @@ void default_idle(void) else local_irq_enable(); current_thread_info()->status |= TS_POLLING; - trace_power_end_rcuidle(smp_processor_id()); trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } #ifdef CONFIG_APM_MODULE @@ -423,7 +421,6 @@ void stop_this_cpu(void *dummy) static void mwait_idle(void) { if (!need_resched()) { - trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id()); trace_cpu_idle_rcuidle(1, smp_processor_id()); if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) clflush((void *)¤t_thread_info()->flags); @@ -434,7 +431,6 @@ static void mwait_idle(void) __sti_mwait(0, 0); else local_irq_enable(); - trace_power_end_rcuidle(smp_processor_id()); trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } else local_irq_enable(); @@ -447,12 +443,10 @@ static void mwait_idle(void) */ static void poll_idle(void) { - trace_power_start_rcuidle(POWER_CSTATE, 0, smp_processor_id()); trace_cpu_idle_rcuidle(0, smp_processor_id()); local_irq_enable(); while (!need_resched()) cpu_relax(); - trace_power_end_rcuidle(smp_processor_id()); trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 1f93dbd72355..99faadf454ec 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -294,7 +294,6 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state) adjust_jiffies(CPUFREQ_POSTCHANGE, freqs); pr_debug("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new, (unsigned long)freqs->cpu); - trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu); trace_cpu_frequency(freqs->new, freqs->cpu); srcu_notifier_call_chain(&cpufreq_transition_notifier_list, CPUFREQ_POSTCHANGE, freqs); diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index e1f6860e069c..eba69290e074 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -144,7 +144,6 @@ int cpuidle_idle_call(void) return 0; } - trace_power_start_rcuidle(POWER_CSTATE, next_state, dev->cpu); trace_cpu_idle_rcuidle(next_state, dev->cpu); if (cpuidle_state_is_coupled(dev, drv, next_state)) @@ -153,7 +152,6 @@ int cpuidle_idle_call(void) else entered_state = cpuidle_enter_state(dev, drv, next_state); - trace_power_end_rcuidle(dev->cpu); trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu); /* give the governor an opportunity to reflect on the outcome */ diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 0c9783841a30..427acab5d69a 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -99,98 +99,6 @@ DEFINE_EVENT(wakeup_source, wakeup_source_deactivate, TP_ARGS(name, state) ); -#ifdef CONFIG_EVENT_POWER_TRACING_DEPRECATED - -/* - * The power events are used for cpuidle & suspend (power_start, power_end) - * and for cpufreq (power_frequency) - */ -DECLARE_EVENT_CLASS(power, - - TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id), - - TP_ARGS(type, state, cpu_id), - - TP_STRUCT__entry( - __field( u64, type ) - __field( u64, state ) - __field( u64, cpu_id ) - ), - - TP_fast_assign( - __entry->type = type; - __entry->state = state; - __entry->cpu_id = cpu_id; - ), - - TP_printk("type=%lu state=%lu cpu_id=%lu", (unsigned long)__entry->type, - (unsigned long)__entry->state, (unsigned long)__entry->cpu_id) -); - -DEFINE_EVENT(power, power_start, - - TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id), - - TP_ARGS(type, state, cpu_id) -); - -DEFINE_EVENT(power, power_frequency, - - TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id), - - TP_ARGS(type, state, cpu_id) -); - -TRACE_EVENT(power_end, - - TP_PROTO(unsigned int cpu_id), - - TP_ARGS(cpu_id), - - TP_STRUCT__entry( - __field( u64, cpu_id ) - ), - - TP_fast_assign( - __entry->cpu_id = cpu_id; - ), - - TP_printk("cpu_id=%lu", (unsigned long)__entry->cpu_id) - -); - -/* Deprecated dummy functions must be protected against multi-declartion */ -#ifndef _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED -#define _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED - -enum { - POWER_NONE = 0, - POWER_CSTATE = 1, - POWER_PSTATE = 2, -}; -#endif /* _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED */ - -#else /* CONFIG_EVENT_POWER_TRACING_DEPRECATED */ - -#ifndef _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED -#define _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED -enum { - POWER_NONE = 0, - POWER_CSTATE = 1, - POWER_PSTATE = 2, -}; - -/* These dummy declaration have to be ripped out when the deprecated - events get removed */ -static inline void trace_power_start(u64 type, u64 state, u64 cpuid) {}; -static inline void trace_power_end(u64 cpuid) {}; -static inline void trace_power_start_rcuidle(u64 type, u64 state, u64 cpuid) {}; -static inline void trace_power_end_rcuidle(u64 cpuid) {}; -static inline void trace_power_frequency(u64 type, u64 state, u64 cpuid) {}; -#endif /* _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED */ - -#endif /* CONFIG_EVENT_POWER_TRACING_DEPRECATED */ - /* * The clock events are used for clock enable/disable and for * clock rate change diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 5d89335a485f..ad0a067ad4b3 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -78,21 +78,6 @@ config EVENT_TRACING select CONTEXT_SWITCH_TRACER bool -config EVENT_POWER_TRACING_DEPRECATED - depends on EVENT_TRACING - bool "Deprecated power event trace API, to be removed" - default y - help - Provides old power event types: - C-state/idle accounting events: - power:power_start - power:power_end - and old cpufreq accounting event: - power:power_frequency - This is for userspace compatibility - and will vanish after 5 kernel iterations, - namely 3.1. - config CONTEXT_SWITCH_TRACER bool diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c index f55fcf61b223..1c71382b283d 100644 --- a/kernel/trace/power-traces.c +++ b/kernel/trace/power-traces.c @@ -13,8 +13,5 @@ #define CREATE_TRACE_POINTS #include -#ifdef EVENT_POWER_TRACING_DEPRECATED -EXPORT_TRACEPOINT_SYMBOL_GPL(power_start); -#endif EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle); -- cgit v1.2.3-55-g7522 From fbadc58dd3a52c330c8f3926aa93011bf9d91fa0 Mon Sep 17 00:00:00 2001 From: ShuoX Liu Date: Wed, 23 Jan 2013 21:49:37 +0100 Subject: PM / Runtime: Add new helper function: pm_runtime_active() This boolean function simply returns whether or not the runtime status of the device is 'active'. The typical scenario is driver calls pm_runtime_get firstly, then check pm_runtime_active in atomic environment. Also add entry to Documentation/power/runtime.txt Signed-off-by: Yanmin Zhang Signed-off-by: ShuoX Liu Signed-off-by: Rafael J. Wysocki --- Documentation/power/runtime_pm.txt | 4 ++++ include/linux/pm_runtime.h | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt index 03591a750f99..6c9f5d9aa115 100644 --- a/Documentation/power/runtime_pm.txt +++ b/Documentation/power/runtime_pm.txt @@ -426,6 +426,10 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: 'power.runtime_error' is set or 'power.disable_depth' is greater than zero) + bool pm_runtime_active(struct device *dev); + - return true if the device's runtime PM status is 'active' or its + 'power.disable_depth' field is not equal to zero, or false otherwise + bool pm_runtime_suspended(struct device *dev); - return true if the device's runtime PM status is 'suspended' and its 'power.disable_depth' field is equal to zero, or false otherwise diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index f271860c78d5..c785c215abfc 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -80,6 +80,12 @@ static inline bool pm_runtime_suspended(struct device *dev) && !dev->power.disable_depth; } +static inline bool pm_runtime_active(struct device *dev) +{ + return dev->power.runtime_status == RPM_ACTIVE + || dev->power.disable_depth; +} + static inline bool pm_runtime_status_suspended(struct device *dev) { return dev->power.runtime_status == RPM_SUSPENDED; @@ -132,6 +138,7 @@ static inline void pm_runtime_put_noidle(struct device *dev) {} static inline bool device_run_wake(struct device *dev) { return false; } static inline void device_set_run_wake(struct device *dev, bool enable) {} static inline bool pm_runtime_suspended(struct device *dev) { return false; } +static inline bool pm_runtime_active(struct device *dev) { return true; } static inline bool pm_runtime_status_suspended(struct device *dev) { return false; } static inline bool pm_runtime_enabled(struct device *dev) { return false; } -- cgit v1.2.3-55-g7522 From 7e73c5ae6e7991a6c01f6d096ff8afaef4458c36 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 6 Feb 2013 13:00:36 +0100 Subject: PM: Introduce suspend state PM_SUSPEND_FREEZE PM_SUSPEND_FREEZE state is a general state that does not need any platform specific support, it equals frozen processes + suspended devices + idle processors. Compared with PM_SUSPEND_MEMORY, PM_SUSPEND_FREEZE saves less power because the system is still in a running state. PM_SUSPEND_FREEZE has less resume latency because it does not touch BIOS, and the processors are in idle state. Compared with RTPM/idle, PM_SUSPEND_FREEZE saves more power as 1. the processor has longer sleep time because processes are frozen. The deeper c-state the processor supports, more power saving we can get. 2. PM_SUSPEND_FREEZE uses system suspend code path, thus we can get more power saving from the devices that does not have good RTPM support. This state is useful for 1) platforms that do not have STR, or have a broken STR. 2) platforms that have an extremely low power idle state, which can be used to replace STR. The following describes how PM_SUSPEND_FREEZE state works. 1. echo freeze > /sys/power/state 2. the processes are frozen. 3. all the devices are suspended. 4. all the processors are blocked by a wait queue 5. all the processors idles and enters (Deep) c-state. 6. an interrupt fires. 7. a processor is woken up and handles the irq. 8. if it is a general event, a) the irq handler runs and quites. b) goto step 4. 9. if it is a real wake event, say, power button pressing, keyboard touch, mouse moving, a) the irq handler runs and activate the wakeup source b) wakeup_source_activate() notifies the wait queue. c) system starts resuming from PM_SUSPEND_FREEZE 10. all the devices are resumed. 11. all the processes are unfrozen. 12. system is back to working. Known Issue: The wakeup of this new PM_SUSPEND_FREEZE state may behave differently from the previous suspend state. Take ACPI platform for example, there are some GPEs that only enabled when the system is in sleep state, to wake the system backk from S3/S4. But we are not touching these GPEs during transition to PM_SUSPEND_FREEZE. This means we may lose some wake event. But on the other hand, as we do not disable all the Interrupts during PM_SUSPEND_FREEZE, we may get some extra "wakeup" Interrupts, that are not available for S3/S4. The patches has been tested on an old Sony laptop, and here are the results: Average Power: 1. RPTM/idle for half an hour: 14.8W, 12.6W, 14.1W, 12.5W, 14.4W, 13.2W, 12.9W 2. Freeze for half an hour: 11W, 10.4W, 9.4W, 11.3W 10.5W 3. RTPM/idle for three hours: 11.6W 4. Freeze for three hours: 10W 5. Suspend to Memory: 0.5~0.9W Average Resume Latency: 1. RTPM/idle with a black screen: (From pressing keyboard to screen back) Less than 0.2s 2. Freeze: (From pressing power button to screen back) 2.50s 3. Suspend to Memory: (From pressing power button to screen back) 4.33s >From the results, we can see that all the platforms should benefit from this patch, even if it does not have Low Power S0. Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- drivers/base/power/wakeup.c | 6 ++++ include/linux/suspend.h | 6 +++- kernel/power/main.c | 2 +- kernel/power/suspend.c | 69 ++++++++++++++++++++++++++++++++++++--------- 4 files changed, 68 insertions(+), 15 deletions(-) diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index e6ee5e80e546..79715e7fa43e 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -382,6 +382,12 @@ static void wakeup_source_activate(struct wakeup_source *ws) { unsigned int cec; + /* + * active wakeup source should bring the system + * out of PM_SUSPEND_FREEZE state + */ + freeze_wake(); + ws->active = true; ws->active_count++; ws->last_time = ktime_get(); diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 0c808d7fa579..d4e3f16d5e89 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -34,8 +34,10 @@ static inline void pm_restore_console(void) typedef int __bitwise suspend_state_t; #define PM_SUSPEND_ON ((__force suspend_state_t) 0) -#define PM_SUSPEND_STANDBY ((__force suspend_state_t) 1) +#define PM_SUSPEND_FREEZE ((__force suspend_state_t) 1) +#define PM_SUSPEND_STANDBY ((__force suspend_state_t) 2) #define PM_SUSPEND_MEM ((__force suspend_state_t) 3) +#define PM_SUSPEND_MIN PM_SUSPEND_FREEZE #define PM_SUSPEND_MAX ((__force suspend_state_t) 4) enum suspend_stat_step { @@ -192,6 +194,7 @@ struct platform_suspend_ops { */ extern void suspend_set_ops(const struct platform_suspend_ops *ops); extern int suspend_valid_only_mem(suspend_state_t state); +extern void freeze_wake(void); /** * arch_suspend_disable_irqs - disable IRQs for suspend @@ -217,6 +220,7 @@ extern int pm_suspend(suspend_state_t state); static inline void suspend_set_ops(const struct platform_suspend_ops *ops) {} static inline int pm_suspend(suspend_state_t state) { return -ENOSYS; } +static inline void freeze_wake(void) {} #endif /* !CONFIG_SUSPEND */ /* struct pbe is used for creating lists of pages that should be restored diff --git a/kernel/power/main.c b/kernel/power/main.c index 1c16f9167de1..b1c26a92ca9f 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -313,7 +313,7 @@ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, static suspend_state_t decode_state(const char *buf, size_t n) { #ifdef CONFIG_SUSPEND - suspend_state_t state = PM_SUSPEND_STANDBY; + suspend_state_t state = PM_SUSPEND_MIN; const char * const *s; #endif char *p; diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index c8b7446b27df..d4feda084a3a 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -30,12 +30,38 @@ #include "power.h" const char *const pm_states[PM_SUSPEND_MAX] = { + [PM_SUSPEND_FREEZE] = "freeze", [PM_SUSPEND_STANDBY] = "standby", [PM_SUSPEND_MEM] = "mem", }; static const struct platform_suspend_ops *suspend_ops; +static bool need_suspend_ops(suspend_state_t state) +{ + return !!(state > PM_SUSPEND_FREEZE); +} + +static DECLARE_WAIT_QUEUE_HEAD(suspend_freeze_wait_head); +static bool suspend_freeze_wake; + +static void freeze_begin(void) +{ + suspend_freeze_wake = false; +} + +static void freeze_enter(void) +{ + wait_event(suspend_freeze_wait_head, suspend_freeze_wake); +} + +void freeze_wake(void) +{ + suspend_freeze_wake = true; + wake_up(&suspend_freeze_wait_head); +} +EXPORT_SYMBOL_GPL(freeze_wake); + /** * suspend_set_ops - Set the global suspend method table. * @ops: Suspend operations to use. @@ -50,8 +76,11 @@ EXPORT_SYMBOL_GPL(suspend_set_ops); bool valid_state(suspend_state_t state) { + if (state == PM_SUSPEND_FREEZE) + return true; /* - * All states need lowlevel support and need to be valid to the lowlevel + * PM_SUSPEND_STANDBY and PM_SUSPEND_MEMORY states need lowlevel + * support and need to be valid to the lowlevel * implementation, no valid callback implies that none are valid. */ return suspend_ops && suspend_ops->valid && suspend_ops->valid(state); @@ -89,11 +118,11 @@ static int suspend_test(int level) * hibernation). Run suspend notifiers, allocate the "suspend" console and * freeze processes. */ -static int suspend_prepare(void) +static int suspend_prepare(suspend_state_t state) { int error; - if (!suspend_ops || !suspend_ops->enter) + if (need_suspend_ops(state) && (!suspend_ops || !suspend_ops->enter)) return -EPERM; pm_prepare_console(); @@ -137,7 +166,7 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) { int error; - if (suspend_ops->prepare) { + if (need_suspend_ops(state) && suspend_ops->prepare) { error = suspend_ops->prepare(); if (error) goto Platform_finish; @@ -149,12 +178,23 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) goto Platform_finish; } - if (suspend_ops->prepare_late) { + if (need_suspend_ops(state) && suspend_ops->prepare_late) { error = suspend_ops->prepare_late(); if (error) goto Platform_wake; } + /* + * PM_SUSPEND_FREEZE equals + * frozen processes + suspended devices + idle processors. + * Thus we should invoke freeze_enter() soon after + * all the devices are suspended. + */ + if (state == PM_SUSPEND_FREEZE) { + freeze_enter(); + goto Platform_wake; + } + if (suspend_test(TEST_PLATFORM)) goto Platform_wake; @@ -182,13 +222,13 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) enable_nonboot_cpus(); Platform_wake: - if (suspend_ops->wake) + if (need_suspend_ops(state) && suspend_ops->wake) suspend_ops->wake(); dpm_resume_start(PMSG_RESUME); Platform_finish: - if (suspend_ops->finish) + if (need_suspend_ops(state) && suspend_ops->finish) suspend_ops->finish(); return error; @@ -203,11 +243,11 @@ int suspend_devices_and_enter(suspend_state_t state) int error; bool wakeup = false; - if (!suspend_ops) + if (need_suspend_ops(state) && !suspend_ops) return -ENOSYS; trace_machine_suspend(state); - if (suspend_ops->begin) { + if (need_suspend_ops(state) && suspend_ops->begin) { error = suspend_ops->begin(state); if (error) goto Close; @@ -226,7 +266,7 @@ int suspend_devices_and_enter(suspend_state_t state) do { error = suspend_enter(state, &wakeup); - } while (!error && !wakeup + } while (!error && !wakeup && need_suspend_ops(state) && suspend_ops->suspend_again && suspend_ops->suspend_again()); Resume_devices: @@ -236,13 +276,13 @@ int suspend_devices_and_enter(suspend_state_t state) ftrace_start(); resume_console(); Close: - if (suspend_ops->end) + if (need_suspend_ops(state) && suspend_ops->end) suspend_ops->end(); trace_machine_suspend(PWR_EVENT_EXIT); return error; Recover_platform: - if (suspend_ops->recover) + if (need_suspend_ops(state) && suspend_ops->recover) suspend_ops->recover(); goto Resume_devices; } @@ -278,12 +318,15 @@ static int enter_state(suspend_state_t state) if (!mutex_trylock(&pm_mutex)) return -EBUSY; + if (state == PM_SUSPEND_FREEZE) + freeze_begin(); + printk(KERN_INFO "PM: Syncing filesystems ... "); sys_sync(); printk("done.\n"); pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]); - error = suspend_prepare(); + error = suspend_prepare(state); if (error) goto Unlock; -- cgit v1.2.3-55-g7522 From 89a22dadb8810983868f5bbbc5530b27bf714a60 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Mon, 4 Feb 2013 07:10:10 +0000 Subject: ACPI: enable ACPI SCI during suspend Enable ACPI SCI during suspend so that SCI can be used as wake events for PM_SUSPEND_FREEZE. For S3/S4 transition, We disable all GPEs in suspend_ops->prepare_late() to fix a problem that GPEs may trigger SCI before arch_suspend_disable_irqs() is run. So it is safe to leave the SCI enabled until arch_suspend_irq_disable() is run. Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- drivers/acpi/osl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 3ff267861541..3adeb10ff3ec 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -787,7 +787,7 @@ acpi_os_install_interrupt_handler(u32 gsi, acpi_osd_handler handler, acpi_irq_handler = handler; acpi_irq_context = context; - if (request_irq(irq, acpi_irq, IRQF_SHARED, "acpi", acpi_irq)) { + if (request_irq(irq, acpi_irq, IRQF_SHARED | IRQF_NO_SUSPEND, "acpi", acpi_irq)) { printk(KERN_ERR PREFIX "SCI (IRQ%d) allocation failed\n", irq); acpi_irq_handler = NULL; return AE_NOT_ACQUIRED; -- cgit v1.2.3-55-g7522 From 957d1282bb8c07e682e142b9237cd9fcb8348a0b Mon Sep 17 00:00:00 2001 From: Li Fei Date: Fri, 1 Feb 2013 08:56:03 +0000 Subject: suspend: enable freeze timeout configuration through sys At present, the value of timeout for freezing is 20s, which is meaningless in case that one thread is frozen with mutex locked and another thread is trying to lock the mutex, as this time of freezing will fail unavoidably. And if there is no new wakeup event registered, the system will waste at most 20s for such meaningless trying of freezing. With this patch, the value of timeout can be configured to smaller value, so such meaningless trying of freezing will be aborted in earlier time, and later freezing can be also triggered in earlier time. And more power will be saved. In normal case on mobile phone, it costs real little time to freeze processes. On some platform, it only costs about 20ms to freeze user space processes and 10ms to freeze kernel freezable threads. Signed-off-by: Liu Chuansheng Signed-off-by: Li Fei Signed-off-by: Rafael J. Wysocki --- Documentation/power/freezing-of-tasks.txt | 5 +++++ include/linux/freezer.h | 5 +++++ kernel/power/main.c | 27 +++++++++++++++++++++++++++ kernel/power/process.c | 4 ++-- 4 files changed, 39 insertions(+), 2 deletions(-) diff --git a/Documentation/power/freezing-of-tasks.txt b/Documentation/power/freezing-of-tasks.txt index 6ec291ea1c78..85894d83b352 100644 --- a/Documentation/power/freezing-of-tasks.txt +++ b/Documentation/power/freezing-of-tasks.txt @@ -223,3 +223,8 @@ since they ask the freezer to skip freezing this task, since it is anyway only after the entire suspend/hibernation sequence is complete. So, to summarize, use [un]lock_system_sleep() instead of directly using mutex_[un]lock(&pm_mutex). That would prevent freezing failures. + +V. Miscellaneous +/sys/power/pm_freeze_timeout controls how long it will cost at most to freeze +all user space processes or all freezable kernel threads, in unit of millisecond. +The default value is 20000, with range of unsigned integer. diff --git a/include/linux/freezer.h b/include/linux/freezer.h index e4238ceaa4d6..e70df40d84f6 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -12,6 +12,11 @@ extern atomic_t system_freezing_cnt; /* nr of freezing conds in effect */ extern bool pm_freezing; /* PM freezing in effect */ extern bool pm_nosig_freezing; /* PM nosig freezing in effect */ +/* + * Timeout for stopping processes + */ +extern unsigned int freeze_timeout_msecs; + /* * Check if a process has been frozen */ diff --git a/kernel/power/main.c b/kernel/power/main.c index b1c26a92ca9f..d77663bfedeb 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -553,6 +553,30 @@ power_attr(pm_trace_dev_match); #endif /* CONFIG_PM_TRACE */ +#ifdef CONFIG_FREEZER +static ssize_t pm_freeze_timeout_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", freeze_timeout_msecs); +} + +static ssize_t pm_freeze_timeout_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned long val; + + if (kstrtoul(buf, 10, &val)) + return -EINVAL; + + freeze_timeout_msecs = val; + return n; +} + +power_attr(pm_freeze_timeout); + +#endif /* CONFIG_FREEZER*/ + static struct attribute * g[] = { &state_attr.attr, #ifdef CONFIG_PM_TRACE @@ -575,6 +599,9 @@ static struct attribute * g[] = { #ifdef CONFIG_PM_SLEEP_DEBUG &pm_print_times_attr.attr, #endif +#endif +#ifdef CONFIG_FREEZER + &pm_freeze_timeout_attr.attr, #endif NULL, }; diff --git a/kernel/power/process.c b/kernel/power/process.c index d5a258b60c6f..98088e0e71e8 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -21,7 +21,7 @@ /* * Timeout for stopping processes */ -#define TIMEOUT (20 * HZ) +unsigned int __read_mostly freeze_timeout_msecs = 20 * MSEC_PER_SEC; static int try_to_freeze_tasks(bool user_only) { @@ -36,7 +36,7 @@ static int try_to_freeze_tasks(bool user_only) do_gettimeofday(&start); - end_time = jiffies + TIMEOUT; + end_time = jiffies + msecs_to_jiffies(freeze_timeout_msecs); if (!user_only) freeze_workqueues_begin(); -- cgit v1.2.3-55-g7522