summaryrefslogtreecommitdiffstats
path: root/arch/arm64/lib
diff options
context:
space:
mode:
authorJulien Thierry2017-10-13 15:32:56 +0200
committerWill Deacon2017-10-13 19:56:15 +0200
commit7b77452ec524fa845efdec28ae16e2202cf797dc (patch)
treefa5da4cbe17b85d94be06f1e704eff52fe88f1f9 /arch/arm64/lib
parentarm_arch_timer: Expose event stream status (diff)
downloadkernel-qcow2-linux-7b77452ec524fa845efdec28ae16e2202cf797dc.tar.gz
kernel-qcow2-linux-7b77452ec524fa845efdec28ae16e2202cf797dc.tar.xz
kernel-qcow2-linux-7b77452ec524fa845efdec28ae16e2202cf797dc.zip
arm64: use WFE for long delays
The current delay implementation uses the yield instruction, which is a hint that it is beneficial to schedule another thread. As this is a hint, it may be implemented as a NOP, causing all delays to be busy loops. This is the case for many existing CPUs. Taking advantage of the generic timer sending periodic events to all cores, we can use WFE during delays to reduce power consumption. This is beneficial only for delays longer than the period of the timer event stream. If timer event stream is not enabled, delays will behave as yield/busy loops. Signed-off-by: Julien Thierry <julien.thierry@arm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will.deacon@arm.com> Cc: Mark Rutland <mark.rutland@arm.com> Signed-off-by: Will Deacon <will.deacon@arm.com>
Diffstat (limited to 'arch/arm64/lib')
-rw-r--r--arch/arm64/lib/delay.c23
1 files changed, 19 insertions, 4 deletions
diff --git a/arch/arm64/lib/delay.c b/arch/arm64/lib/delay.c
index dad4ec9bbfd1..e48ac402e7be 100644
--- a/arch/arm64/lib/delay.c
+++ b/arch/arm64/lib/delay.c
@@ -24,10 +24,28 @@
#include <linux/module.h>
#include <linux/timex.h>
+#include <clocksource/arm_arch_timer.h>
+
+#define USECS_TO_CYCLES(time_usecs) \
+ xloops_to_cycles((time_usecs) * 0x10C7UL)
+
+static inline unsigned long xloops_to_cycles(unsigned long xloops)
+{
+ return (xloops * loops_per_jiffy * HZ) >> 32;
+}
+
void __delay(unsigned long cycles)
{
cycles_t start = get_cycles();
+ if (arch_timer_evtstrm_available()) {
+ const cycles_t timer_evt_period =
+ USECS_TO_CYCLES(ARCH_TIMER_EVT_STREAM_PERIOD_US);
+
+ while ((get_cycles() - start + timer_evt_period) < cycles)
+ wfe();
+ }
+
while ((get_cycles() - start) < cycles)
cpu_relax();
}
@@ -35,10 +53,7 @@ EXPORT_SYMBOL(__delay);
inline void __const_udelay(unsigned long xloops)
{
- unsigned long loops;
-
- loops = xloops * loops_per_jiffy * HZ;
- __delay(loops >> 32);
+ __delay(xloops_to_cycles(xloops));
}
EXPORT_SYMBOL(__const_udelay);