From e8a923cc1fff6e627f906655ad52ee694ef2f6d7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 17 Oct 2013 15:32:10 +0200 Subject: perf/x86: Fix NMI measurements OK, so what I'm actually seeing on my WSM is that sched/clock.c is 'broken' for the purpose we're using it for. What triggered it is that my WSM-EP is broken :-( [ 0.001000] tsc: Fast TSC calibration using PIT [ 0.002000] tsc: Detected 2533.715 MHz processor [ 0.500180] TSC synchronization [CPU#0 -> CPU#6]: [ 0.505197] Measured 3 cycles TSC warp between CPUs, turning off TSC clock. [ 0.004000] tsc: Marking TSC unstable due to check_tsc_sync_source failed For some reason it consistently detects TSC skew, even though NHM+ should have a single clock domain for 'reasonable' systems. This marks sched_clock_stable=0, which means that we do fancy stuff to try and get a 'sane' clock. Part of this fancy stuff relies on the tick, clearly that's gone when NOHZ=y. So for idle cpus time gets stuck, until it either wakes up or gets kicked by another cpu. While this is perfectly fine for the scheduler -- it only cares about actually running stuff, and when we're running stuff we're obviously not idle. This does somewhat break down for perf which can trigger events just fine on an otherwise idle cpu. So I've got NMIs get get 'measured' as taking ~1ms, which actually don't last nearly that long: -0 [013] d.h. 886.311970: rcu_nmi_enter <-do_nmi ... -0 [013] d.h. 886.311997: perf_sample_event_took: HERE!!! : 1040990 So ftrace (which uses sched_clock(), not the fancy bits) only sees ~27us, but we measure ~1ms !! Now since all this measurement stuff lives in x86 code, we can actually fix it. Signed-off-by: Peter Zijlstra Cc: mingo@kernel.org Cc: dave.hansen@linux.intel.com Cc: eranian@google.com Cc: Don Zickus Cc: jmario@redhat.com Cc: acme@infradead.org Link: http://lkml.kernel.org/r/20131017133350.GG3364@laptop.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 6 +++--- arch/x86/kernel/nmi.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 9d8449158cf9..8a87a3224121 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1276,16 +1276,16 @@ void perf_events_lapic_init(void) static int __kprobes perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) { - int ret; u64 start_clock; u64 finish_clock; + int ret; if (!atomic_read(&active_events)) return NMI_DONE; - start_clock = local_clock(); + start_clock = sched_clock(); ret = x86_pmu.handle_irq(regs); - finish_clock = local_clock(); + finish_clock = sched_clock(); perf_sample_event_took(finish_clock - start_clock); diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index ba77ebc2c353..6fcb49ce50a1 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -113,10 +113,10 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2 u64 before, delta, whole_msecs; int remainder_ns, decimal_msecs, thishandled; - before = local_clock(); + before = sched_clock(); thishandled = a->handler(type, regs); handled += thishandled; - delta = local_clock() - before; + delta = sched_clock() - before; trace_nmi_handler(a->handler, (int)delta, thishandled); if (delta < nmi_longest_ns) -- cgit v1.2.3-55-g7522 From d780a31271b2f455cb4b83eb018ecfb1c28ef5c1 Mon Sep 17 00:00:00 2001 From: Tim Gardner Date: Tue, 29 Oct 2013 09:13:54 -0600 Subject: KVM: Fix modprobe failure for kvm_intel/kvm_amd The x86 specific kvm init creates a new conflicting debugfs directory which causes modprobe issues with kvm_intel and kvm_amd. For example, sudo modprobe kvm_amd modprobe: ERROR: could not insert 'kvm_amd': Bad address The simplest fix is to just rename the directory. The following KVM config options are set: CONFIG_KVM_GUEST=y CONFIG_KVM_DEBUG_FS=y CONFIG_HAVE_KVM=y CONFIG_HAVE_KVM_IRQCHIP=y CONFIG_HAVE_KVM_IRQ_ROUTING=y CONFIG_HAVE_KVM_EVENTFD=y CONFIG_KVM_APIC_ARCHITECTURE=y CONFIG_KVM_MMIO=y CONFIG_KVM_ASYNC_PF=y CONFIG_HAVE_KVM_MSI=y CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT=y CONFIG_KVM=m CONFIG_KVM_INTEL=m CONFIG_KVM_AMD=m CONFIG_KVM_DEVICE_ASSIGNMENT=y Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Gleb Natapov Cc: Raghavendra K T Cc: Marcelo Tosatti Signed-off-by: Tim Gardner [Change debugfs directory name. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index a0e2a8a80c94..b2046e4d0b59 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -609,7 +609,7 @@ static struct dentry *d_kvm_debug; struct dentry *kvm_init_debugfs(void) { - d_kvm_debug = debugfs_create_dir("kvm", NULL); + d_kvm_debug = debugfs_create_dir("kvm-guest", NULL); if (!d_kvm_debug) printk(KERN_WARNING "Could not create 'kvm' debugfs directory\n"); -- cgit v1.2.3-55-g7522