summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller2017-06-13 00:44:11 +0200
committerDavid S. Miller2017-06-13 00:44:11 +0200
commit9300d9c4ad29b0e79c55c0ba7a16cfc4c13442b8 (patch)
tree46e76b92d42fe97e75fe59c77c2cca8e5f7713d6
parentMerge branch 'sparc64-LDC-changes-for-porting-VCC-driver-into-upstream-kernel' (diff)
parentsparc64: optimize functions that access tick (diff)
downloadkernel-qcow2-linux-9300d9c4ad29b0e79c55c0ba7a16cfc4c13442b8.tar.gz
kernel-qcow2-linux-9300d9c4ad29b0e79c55c0ba7a16cfc4c13442b8.tar.xz
kernel-qcow2-linux-9300d9c4ad29b0e79c55c0ba7a16cfc4c13442b8.zip
Merge branch 'sparc64-early-boot-timestamp'
Pavel Tatashin says: ==================== sparc64: Early boot timestamp Changelog: v2 - v3: - __aligned(64) -> __cacheline_aligned - Replaced in sched_clock() wmb() with barrier() v1 - v2: - Early boot timestamps are now available on all 64-bit sparc processors - New hot-patched get_tick() function. This patch set: - enables early boot timestamps on SPARC, - adds offset so we count time from zero, the same as it is done on other platforms - improves the performance by inling, hot patching, and combining loads into the same cacheline. (and few other optimizations). So, the final performance of sched_clock() is faster than now: the fewer number of loads, and all of them are coming from the same cacheline. Loads can run while we are reading tick value, and we do not do function call. Current sched_clock(): sethi %hi(0xb9b400), %g1 ldx [ %g1 + 0x250 ], %g1 ldx [ %g1 ], %g1 call %g1 nop sethi %hi(0xb9b400), %g1 ldx [ %g1 + 0x300 ], %g1 mulx %o0, %g1, %g1 rett %i7 + 8 srlx %g1, 0xa, %o0 Final sched_clock(): sethi %hi(0xb9b400), %g1 ldx [ %g1 + 0x2c8 ], %g2 ldx [ %g1 + 0x2c0 ], %g1 b 42f638 <sched_clock+0x44> rd %asr24, %i0 ... sllx %i0, 1, %i0 srlx %i0, 1, %i0 mulx %i0, %g1, %i0 srlx %i0, 0xa, %i0 rett %i7 + 8 sub %o0, %g2, %o0 ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--arch/sparc/include/asm/setup.h2
-rw-r--r--arch/sparc/include/asm/timer_64.h67
-rw-r--r--arch/sparc/kernel/kernel.h3
-rw-r--r--arch/sparc/kernel/setup_64.c7
-rw-r--r--arch/sparc/kernel/time_64.c155
-rw-r--r--arch/sparc/kernel/vmlinux.lds.S5
6 files changed, 181 insertions, 58 deletions
diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h
index 3fae200dd251..8b32538084f7 100644
--- a/arch/sparc/include/asm/setup.h
+++ b/arch/sparc/include/asm/setup.h
@@ -1,5 +1,5 @@
/*
- * Just a place holder.
+ * Just a place holder.
*/
#ifndef _SPARC_SETUP_H
#define _SPARC_SETUP_H
diff --git a/arch/sparc/include/asm/timer_64.h b/arch/sparc/include/asm/timer_64.h
index fce415034000..51bc3bc54bfe 100644
--- a/arch/sparc/include/asm/timer_64.h
+++ b/arch/sparc/include/asm/timer_64.h
@@ -9,7 +9,12 @@
#include <linux/types.h>
#include <linux/init.h>
+/* The most frequently accessed fields should be first,
+ * to fit into the same cacheline.
+ */
struct sparc64_tick_ops {
+ unsigned long ticks_per_nsec_quotient;
+ unsigned long offset;
unsigned long long (*get_tick)(void);
int (*add_compare)(unsigned long);
unsigned long softint_mask;
@@ -17,6 +22,8 @@ struct sparc64_tick_ops {
void (*init_tick)(void);
unsigned long (*add_tick)(unsigned long);
+ unsigned long (*get_frequency)(void);
+ unsigned long frequency;
char *name;
};
@@ -27,4 +34,64 @@ unsigned long sparc64_get_clock_tick(unsigned int cpu);
void setup_sparc64_timer(void);
void __init time_init(void);
+#define TICK_PRIV_BIT BIT(63)
+#define TICKCMP_IRQ_BIT BIT(63)
+
+#define HBIRD_STICKCMP_ADDR 0x1fe0000f060UL
+#define HBIRD_STICK_ADDR 0x1fe0000f070UL
+
+#define GET_TICK_NINSTR 13
+struct get_tick_patch {
+ unsigned int addr;
+ unsigned int tick[GET_TICK_NINSTR];
+ unsigned int stick[GET_TICK_NINSTR];
+};
+
+extern struct get_tick_patch __get_tick_patch;
+extern struct get_tick_patch __get_tick_patch_end;
+
+static inline unsigned long get_tick(void)
+{
+ unsigned long tick, tmp1, tmp2;
+
+ __asm__ __volatile__(
+ /* read hbtick 13 instructions */
+ "661:\n"
+ " mov 0x1fe, %1\n"
+ " sllx %1, 0x20, %1\n"
+ " sethi %%hi(0xf000), %2\n"
+ " or %2, 0x70, %2\n"
+ " or %1, %2, %1\n" /* %1 = HBIRD_STICK_ADDR */
+ " add %1, 8, %2\n"
+ " ldxa [%2]%3, %0\n"
+ " ldxa [%1]%3, %1\n"
+ " ldxa [%2]%3, %2\n"
+ " sub %2, %0, %0\n" /* don't modify %xcc */
+ " brnz,pn %0, 661b\n" /* restart to save one register */
+ " sllx %2, 32, %2\n"
+ " or %2, %1, %0\n"
+ /* Common/not patched code */
+ " sllx %0, 1, %0\n"
+ " srlx %0, 1, %0\n" /* Clear TICK_PRIV_BIT */
+ /* Beginning of patch section */
+ " .section .get_tick_patch, \"ax\"\n"
+ " .word 661b\n"
+ /* read tick 2 instructions and 11 skipped */
+ " ba 1f\n"
+ " rd %%tick, %0\n"
+ " .skip 4 * (%4 - 2)\n"
+ "1:\n"
+ /* read stick 2 instructions and 11 skipped */
+ " ba 1f\n"
+ " rd %%asr24, %0\n"
+ " .skip 4 * (%4 - 2)\n"
+ "1:\n"
+ /* End of patch section */
+ " .previous\n"
+ : "=&r" (tick), "=&r" (tmp1), "=&r" (tmp2)
+ : "i" (ASI_PHYS_BYPASS_EC_E), "i" (GET_TICK_NINSTR));
+
+ return tick;
+}
+
#endif /* _SPARC64_TIMER_H */
diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h
index 6ae1e77be0bf..b625db4cfb78 100644
--- a/arch/sparc/kernel/kernel.h
+++ b/arch/sparc/kernel/kernel.h
@@ -52,6 +52,9 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs);
void do_signal32(struct pt_regs * regs);
asmlinkage int do_sys32_sigstack(u32 u_ssptr, u32 u_ossptr, unsigned long sp);
+/* time_64.c */
+void __init time_init_early(void);
+
/* compat_audit.c */
extern unsigned int sparc32_dir_class[];
extern unsigned int sparc32_chattr_class[];
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index 422b17880955..4d9c3e13c150 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -95,7 +95,7 @@ static struct console prom_early_console = {
.index = -1,
};
-/*
+/*
* Process kernel command line switches that are specific to the
* SPARC or that require special low-level processing.
*/
@@ -365,6 +365,7 @@ void __init start_early_boot(void)
}
current_thread_info()->cpu = cpu;
+ time_init_early();
prom_init_report();
start_kernel();
}
@@ -639,7 +640,7 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_BLK_DEV_RAM
rd_image_start = ram_flags & RAMDISK_IMAGE_START_MASK;
rd_prompt = ((ram_flags & RAMDISK_PROMPT_FLAG) != 0);
- rd_doload = ((ram_flags & RAMDISK_LOAD_FLAG) != 0);
+ rd_doload = ((ram_flags & RAMDISK_LOAD_FLAG) != 0);
#endif
task_thread_info(&init_task)->kregs = &fake_swapper_regs;
@@ -648,7 +649,7 @@ void __init setup_arch(char **cmdline_p)
if (!ic_set_manually) {
phandle chosen = prom_finddevice("/chosen");
u32 cl, sv, gw;
-
+
cl = prom_getintdefault (chosen, "client-ip", 0);
sv = prom_getintdefault (chosen, "server-ip", 0);
gw = prom_getintdefault (chosen, "gateway-ip", 0);
diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c
index 98d05de8da66..a612a91cb9cd 100644
--- a/arch/sparc/kernel/time_64.c
+++ b/arch/sparc/kernel/time_64.c
@@ -47,14 +47,13 @@
#include <asm/cpudata.h>
#include <linux/uaccess.h>
#include <asm/irq_regs.h>
+#include <asm/cacheflush.h>
#include "entry.h"
+#include "kernel.h"
DEFINE_SPINLOCK(rtc_lock);
-#define TICK_PRIV_BIT (1UL << 63)
-#define TICKCMP_IRQ_BIT (1UL << 63)
-
#ifdef CONFIG_SMP
unsigned long profile_pc(struct pt_regs *regs)
{
@@ -164,13 +163,19 @@ static unsigned long tick_add_tick(unsigned long adj)
return new_tick;
}
-static struct sparc64_tick_ops tick_operations __read_mostly = {
+static unsigned long tick_get_frequency(void)
+{
+ return local_cpu_data().clock_tick;
+}
+
+static struct sparc64_tick_ops tick_operations __cacheline_aligned = {
.name = "tick",
.init_tick = tick_init_tick,
.disable_irq = tick_disable_irq,
.get_tick = tick_get_tick,
.add_tick = tick_add_tick,
.add_compare = tick_add_compare,
+ .get_frequency = tick_get_frequency,
.softint_mask = 1UL << 0,
};
@@ -250,6 +255,11 @@ static int stick_add_compare(unsigned long adj)
return ((long)(new_tick - (orig_tick+adj))) > 0L;
}
+static unsigned long stick_get_frequency(void)
+{
+ return prom_getint(prom_root_node, "stick-frequency");
+}
+
static struct sparc64_tick_ops stick_operations __read_mostly = {
.name = "stick",
.init_tick = stick_init_tick,
@@ -257,6 +267,7 @@ static struct sparc64_tick_ops stick_operations __read_mostly = {
.get_tick = stick_get_tick,
.add_tick = stick_add_tick,
.add_compare = stick_add_compare,
+ .get_frequency = stick_get_frequency,
.softint_mask = 1UL << 16,
};
@@ -277,9 +288,6 @@ static struct sparc64_tick_ops stick_operations __read_mostly = {
* 2) write high
* 3) write low
*/
-#define HBIRD_STICKCMP_ADDR 0x1fe0000f060UL
-#define HBIRD_STICK_ADDR 0x1fe0000f070UL
-
static unsigned long __hbird_read_stick(void)
{
unsigned long ret, tmp1, tmp2, tmp3;
@@ -381,6 +389,13 @@ static int hbtick_add_compare(unsigned long adj)
return ((long)(val2 - val)) > 0L;
}
+static unsigned long hbtick_get_frequency(void)
+{
+ struct device_node *dp = of_find_node_by_path("/");
+
+ return of_getintprop_default(dp, "stick-frequency", 0);
+}
+
static struct sparc64_tick_ops hbtick_operations __read_mostly = {
.name = "hbtick",
.init_tick = hbtick_init_tick,
@@ -388,11 +403,10 @@ static struct sparc64_tick_ops hbtick_operations __read_mostly = {
.get_tick = hbtick_get_tick,
.add_tick = hbtick_add_tick,
.add_compare = hbtick_add_compare,
+ .get_frequency = hbtick_get_frequency,
.softint_mask = 1UL << 0,
};
-static unsigned long timer_ticks_per_nsec_quotient __read_mostly;
-
unsigned long cmos_regs;
EXPORT_SYMBOL(cmos_regs);
@@ -582,34 +596,17 @@ static int __init clock_init(void)
*/
fs_initcall(clock_init);
-/* This is gets the master TICK_INT timer going. */
-static unsigned long sparc64_init_timers(void)
+/* Return true if this is Hummingbird, aka Ultra-IIe */
+static bool is_hummingbird(void)
{
- struct device_node *dp;
- unsigned long freq;
+ unsigned long ver, manuf, impl;
- dp = of_find_node_by_path("/");
- if (tlb_type == spitfire) {
- unsigned long ver, manuf, impl;
-
- __asm__ __volatile__ ("rdpr %%ver, %0"
- : "=&r" (ver));
- manuf = ((ver >> 48) & 0xffff);
- impl = ((ver >> 32) & 0xffff);
- if (manuf == 0x17 && impl == 0x13) {
- /* Hummingbird, aka Ultra-IIe */
- tick_ops = &hbtick_operations;
- freq = of_getintprop_default(dp, "stick-frequency", 0);
- } else {
- tick_ops = &tick_operations;
- freq = local_cpu_data().clock_tick;
- }
- } else {
- tick_ops = &stick_operations;
- freq = of_getintprop_default(dp, "stick-frequency", 0);
- }
+ __asm__ __volatile__ ("rdpr %%ver, %0"
+ : "=&r" (ver));
+ manuf = ((ver >> 48) & 0xffff);
+ impl = ((ver >> 32) & 0xffff);
- return freq;
+ return (manuf == 0x17 && impl == 0x13);
}
struct freq_table {
@@ -671,12 +668,12 @@ core_initcall(register_sparc64_cpufreq_notifier);
static int sparc64_next_event(unsigned long delta,
struct clock_event_device *evt)
{
- return tick_ops->add_compare(delta) ? -ETIME : 0;
+ return tick_operations.add_compare(delta) ? -ETIME : 0;
}
static int sparc64_timer_shutdown(struct clock_event_device *evt)
{
- tick_ops->disable_irq();
+ tick_operations.disable_irq();
return 0;
}
@@ -693,7 +690,7 @@ static DEFINE_PER_CPU(struct clock_event_device, sparc64_events);
void __irq_entry timer_interrupt(int irq, struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
- unsigned long tick_mask = tick_ops->softint_mask;
+ unsigned long tick_mask = tick_operations.softint_mask;
int cpu = smp_processor_id();
struct clock_event_device *evt = &per_cpu(sparc64_events, cpu);
@@ -728,7 +725,7 @@ void setup_sparc64_timer(void)
: "=r" (pstate)
: "i" (PSTATE_IE));
- tick_ops->init_tick();
+ tick_operations.init_tick();
/* Restore PSTATE_IE. */
__asm__ __volatile__("wrpr %0, 0x0, %%pstate"
@@ -755,12 +752,10 @@ static unsigned long tb_ticks_per_usec __read_mostly;
void __delay(unsigned long loops)
{
- unsigned long bclock, now;
+ unsigned long bclock = get_tick();
- bclock = tick_ops->get_tick();
- do {
- now = tick_ops->get_tick();
- } while ((now-bclock) < loops);
+ while ((get_tick() - bclock) < loops)
+ ;
}
EXPORT_SYMBOL(__delay);
@@ -772,26 +767,71 @@ EXPORT_SYMBOL(udelay);
static u64 clocksource_tick_read(struct clocksource *cs)
{
- return tick_ops->get_tick();
+ return get_tick();
+}
+
+static void __init get_tick_patch(void)
+{
+ unsigned int *addr, *instr, i;
+ struct get_tick_patch *p;
+
+ if (tlb_type == spitfire && is_hummingbird())
+ return;
+
+ for (p = &__get_tick_patch; p < &__get_tick_patch_end; p++) {
+ instr = (tlb_type == spitfire) ? p->tick : p->stick;
+ addr = (unsigned int *)(unsigned long)p->addr;
+ for (i = 0; i < GET_TICK_NINSTR; i++) {
+ addr[i] = instr[i];
+ /* ensure that address is modified before flush */
+ wmb();
+ flushi(&addr[i]);
+ }
+ }
+}
+
+static void init_tick_ops(struct sparc64_tick_ops *ops)
+{
+ unsigned long freq, quotient, tick;
+
+ freq = ops->get_frequency();
+ quotient = clocksource_hz2mult(freq, SPARC64_NSEC_PER_CYC_SHIFT);
+ tick = ops->get_tick();
+
+ ops->offset = (tick * quotient) >> SPARC64_NSEC_PER_CYC_SHIFT;
+ ops->ticks_per_nsec_quotient = quotient;
+ ops->frequency = freq;
+ tick_operations = *ops;
+ get_tick_patch();
+}
+
+void __init time_init_early(void)
+{
+ if (tlb_type == spitfire) {
+ if (is_hummingbird())
+ init_tick_ops(&hbtick_operations);
+ else
+ init_tick_ops(&tick_operations);
+ } else {
+ init_tick_ops(&stick_operations);
+ }
}
void __init time_init(void)
{
- unsigned long freq = sparc64_init_timers();
+ unsigned long freq;
+ freq = tick_operations.frequency;
tb_ticks_per_usec = freq / USEC_PER_SEC;
- timer_ticks_per_nsec_quotient =
- clocksource_hz2mult(freq, SPARC64_NSEC_PER_CYC_SHIFT);
-
- clocksource_tick.name = tick_ops->name;
+ clocksource_tick.name = tick_operations.name;
clocksource_tick.read = clocksource_tick_read;
clocksource_register_hz(&clocksource_tick, freq);
printk("clocksource: mult[%x] shift[%d]\n",
clocksource_tick.mult, clocksource_tick.shift);
- sparc64_clockevent.name = tick_ops->name;
+ sparc64_clockevent.name = tick_operations.name;
clockevents_calc_mult_shift(&sparc64_clockevent, freq, 4);
sparc64_clockevent.max_delta_ns =
@@ -809,14 +849,21 @@ void __init time_init(void)
unsigned long long sched_clock(void)
{
- unsigned long ticks = tick_ops->get_tick();
+ unsigned long quotient = tick_operations.ticks_per_nsec_quotient;
+ unsigned long offset = tick_operations.offset;
+
+ /* Use barrier so the compiler emits the loads first and overlaps load
+ * latency with reading tick, because reading %tick/%stick is a
+ * post-sync instruction that will flush and restart subsequent
+ * instructions after it commits.
+ */
+ barrier();
- return (ticks * timer_ticks_per_nsec_quotient)
- >> SPARC64_NSEC_PER_CYC_SHIFT;
+ return ((get_tick() * quotient) >> SPARC64_NSEC_PER_CYC_SHIFT) - offset;
}
int read_current_timer(unsigned long *timer_val)
{
- *timer_val = tick_ops->get_tick();
+ *timer_val = get_tick();
return 0;
}
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
index 572db686f845..03b3d65d1266 100644
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -149,6 +149,11 @@ SECTIONS
*(.sun_m7_2insn_patch)
__sun_m7_2insn_patch_end = .;
}
+ .get_tick_patch : {
+ __get_tick_patch = .;
+ *(.get_tick_patch)
+ __get_tick_patch_end = .;
+ }
PERCPU_SECTION(SMP_CACHE_BYTES)
#ifdef CONFIG_JUMP_LABEL