From 6a5022a56ac37da7bffece043331a101ed0040b1 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 17 Apr 2014 17:16:51 +0900 Subject: kprobes/x86: Allow to handle reentered kprobe on single-stepping Since the NMI handlers(e.g. perf) can interrupt in the single stepping (or preparing the single stepping, do_debug etc.), we should consider a kprobe is hit in the NMI handler. Even in that case, the kprobe is allowed to be reentered as same as the kprobes hit in kprobe handlers (KPROBE_HIT_ACTIVE or KPROBE_HIT_SSDONE). The real issue will happen when a kprobe hit while another reentered kprobe is processing (KPROBE_REENTER), because we already consumed a saved-area for the previous kprobe. Signed-off-by: Masami Hiramatsu Reviewed-by: Steven Rostedt Cc: Jiri Kosina Cc: Jonathan Lebon Link: http://lkml.kernel.org/r/20140417081651.26341.10593.stgit@ltc230.yrl.intra.hitachi.co.jp Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 61b17dc2c277..da7bdaa3ce15 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -531,10 +531,11 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb switch (kcb->kprobe_status) { case KPROBE_HIT_SSDONE: case KPROBE_HIT_ACTIVE: + case KPROBE_HIT_SS: kprobes_inc_nmissed_count(p); setup_singlestep(p, regs, kcb, 1); break; - case KPROBE_HIT_SS: + case KPROBE_REENTER: /* A probe has been hit in the codepath leading up to, or just * after, single-stepping of a probed instruction. This entire * codepath should strictly reside in .kprobes.text section. -- cgit v1.2.3-55-g7522 From be8f274323c26ddc7e6fd6c44254b7abcdbe6389 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 17 Apr 2014 17:16:58 +0900 Subject: kprobes: Prohibit probing on .entry.text code .entry.text is a code area which is used for interrupt/syscall entries, which includes many sensitive code. Thus, it is better to prohibit probing on all of such code instead of a part of that. Since some symbols are already registered on kprobe blacklist, this also removes them from the blacklist. Signed-off-by: Masami Hiramatsu Reviewed-by: Steven Rostedt Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: Borislav Petkov Cc: David S. Miller Cc: Frederic Weisbecker Cc: Jan Kiszka Cc: Jiri Kosina Cc: Jonathan Lebon Cc: Seiji Aguchi Link: http://lkml.kernel.org/r/20140417081658.26341.57354.stgit@ltc230.yrl.intra.hitachi.co.jp Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_32.S | 33 --------------------------------- arch/x86/kernel/entry_64.S | 20 -------------------- arch/x86/kernel/kprobes/core.c | 8 ++++++++ include/linux/kprobes.h | 1 + kernel/kprobes.c | 13 ++++++++----- 5 files changed, 17 insertions(+), 58 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index a2a4f4697889..0ca5bf1697bb 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -314,10 +314,6 @@ ENTRY(ret_from_kernel_thread) CFI_ENDPROC ENDPROC(ret_from_kernel_thread) -/* - * Interrupt exit functions should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" /* * Return to user mode is not as complex as all this looks, * but we want the default path for a system call return to @@ -372,10 +368,6 @@ need_resched: END(resume_kernel) #endif CFI_ENDPROC -/* - * End of kprobes section - */ - .popsection /* SYSENTER_RETURN points to after the "sysenter" instruction in the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ @@ -495,10 +487,6 @@ sysexit_audit: PTGS_TO_GS_EX ENDPROC(ia32_sysenter_target) -/* - * syscall stub including irq exit should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" # system call handler stub ENTRY(system_call) RING0_INT_FRAME # can't unwind into user space anyway @@ -691,10 +679,6 @@ syscall_badsys: jmp resume_userspace END(syscall_badsys) CFI_ENDPROC -/* - * End of kprobes section - */ - .popsection .macro FIXUP_ESPFIX_STACK /* @@ -781,10 +765,6 @@ common_interrupt: ENDPROC(common_interrupt) CFI_ENDPROC -/* - * Irq entries should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" #define BUILD_INTERRUPT3(name, nr, fn) \ ENTRY(name) \ RING0_INT_FRAME; \ @@ -961,10 +941,6 @@ ENTRY(spurious_interrupt_bug) jmp error_code CFI_ENDPROC END(spurious_interrupt_bug) -/* - * End of kprobes section - */ - .popsection #ifdef CONFIG_XEN /* Xen doesn't set %esp to be precisely what the normal sysenter @@ -1239,11 +1215,6 @@ return_to_handler: jmp *%ecx #endif -/* - * Some functions should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" - #ifdef CONFIG_TRACING ENTRY(trace_page_fault) RING0_EC_FRAME @@ -1453,7 +1424,3 @@ ENTRY(async_page_fault) END(async_page_fault) #endif -/* - * End of kprobes section - */ - .popsection diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 1e96c3628bf2..43bb38951660 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -487,8 +487,6 @@ ENDPROC(native_usergs_sysret64) TRACE_IRQS_OFF .endm -/* save complete stack frame */ - .pushsection .kprobes.text, "ax" ENTRY(save_paranoid) XCPT_FRAME 1 RDI+8 cld @@ -517,7 +515,6 @@ ENTRY(save_paranoid) 1: ret CFI_ENDPROC END(save_paranoid) - .popsection /* * A newly forked process directly context switches into this address. @@ -975,10 +972,6 @@ END(interrupt) call \func .endm -/* - * Interrupt entry/exit should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" /* * The interrupt stubs push (~vector+0x80) onto the stack and * then jump to common_interrupt. @@ -1113,10 +1106,6 @@ ENTRY(retint_kernel) CFI_ENDPROC END(common_interrupt) -/* - * End of kprobes section - */ - .popsection /* * APIC interrupts. @@ -1477,11 +1466,6 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ hyperv_callback_vector hyperv_vector_handler #endif /* CONFIG_HYPERV */ -/* - * Some functions should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" - paranoidzeroentry_ist debug do_debug DEBUG_STACK paranoidzeroentry_ist int3 do_int3 DEBUG_STACK paranoiderrorentry stack_segment do_stack_segment @@ -1898,7 +1882,3 @@ ENTRY(ignore_sysret) CFI_ENDPROC END(ignore_sysret) -/* - * End of kprobes section - */ - .popsection diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index da7bdaa3ce15..7751b3dee53a 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -1065,6 +1065,14 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) return 0; } +bool arch_within_kprobe_blacklist(unsigned long addr) +{ + return (addr >= (unsigned long)__kprobes_text_start && + addr < (unsigned long)__kprobes_text_end) || + (addr >= (unsigned long)__entry_text_start && + addr < (unsigned long)__entry_text_end); +} + int __init arch_init_kprobes(void) { return 0; diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 925eaf28fca9..cdf9251f8249 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -265,6 +265,7 @@ extern void arch_disarm_kprobe(struct kprobe *p); extern int arch_init_kprobes(void); extern void show_registers(struct pt_regs *regs); extern void kprobes_inc_nmissed_count(struct kprobe *p); +extern bool arch_within_kprobe_blacklist(unsigned long addr); struct kprobe_insn_cache { struct mutex mutex; diff --git a/kernel/kprobes.c b/kernel/kprobes.c index ceeadfcabb76..5b5ac76671e7 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -96,9 +96,6 @@ static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash) static struct kprobe_blackpoint kprobe_blacklist[] = { {"preempt_schedule",}, {"native_get_debugreg",}, - {"irq_entries_start",}, - {"common_interrupt",}, - {"mcount",}, /* mcount can be called from everywhere */ {NULL} /* Terminator */ }; @@ -1324,12 +1321,18 @@ out: return ret; } +bool __weak arch_within_kprobe_blacklist(unsigned long addr) +{ + /* The __kprobes marked functions and entry code must not be probed */ + return addr >= (unsigned long)__kprobes_text_start && + addr < (unsigned long)__kprobes_text_end; +} + static int __kprobes in_kprobes_functions(unsigned long addr) { struct kprobe_blackpoint *kb; - if (addr >= (unsigned long)__kprobes_text_start && - addr < (unsigned long)__kprobes_text_end) + if (arch_within_kprobe_blacklist(addr)) return -EINVAL; /* * If there exists a kprobe_blacklist, verify and -- cgit v1.2.3-55-g7522 From 376e242429bf8539ef39a080ac113c8799840b13 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 17 Apr 2014 17:17:05 +0900 Subject: kprobes: Introduce NOKPROBE_SYMBOL() macro to maintain kprobes blacklist Introduce NOKPROBE_SYMBOL() macro which builds a kprobes blacklist at kernel build time. The usage of this macro is similar to EXPORT_SYMBOL(), placed after the function definition: NOKPROBE_SYMBOL(function); Since this macro will inhibit inlining of static/inline functions, this patch also introduces a nokprobe_inline macro for static/inline functions. In this case, we must use NOKPROBE_SYMBOL() for the inline function caller. When CONFIG_KPROBES=y, the macro stores the given function address in the "_kprobe_blacklist" section. Since the data structures are not fully initialized by the macro (because there is no "size" information), those are re-initialized at boot time by using kallsyms. Signed-off-by: Masami Hiramatsu Link: http://lkml.kernel.org/r/20140417081705.26341.96719.stgit@ltc230.yrl.intra.hitachi.co.jp Cc: Alok Kataria Cc: Ananth N Mavinakayanahalli Cc: Andrew Morton Cc: Anil S Keshavamurthy Cc: Arnd Bergmann Cc: Christopher Li Cc: Chris Wright Cc: David S. Miller Cc: Jan-Simon Möller Cc: Jeremy Fitzhardinge Cc: Linus Torvalds Cc: Randy Dunlap Cc: Rusty Russell Cc: linux-arch@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-sparse@vger.kernel.org Cc: virtualization@lists.linux-foundation.org Signed-off-by: Ingo Molnar --- Documentation/kprobes.txt | 16 +++++- arch/x86/include/asm/asm.h | 7 +++ arch/x86/kernel/paravirt.c | 4 ++ include/asm-generic/vmlinux.lds.h | 9 ++++ include/linux/compiler.h | 2 + include/linux/kprobes.h | 20 ++++++-- kernel/kprobes.c | 100 ++++++++++++++++++++------------------ kernel/sched/core.c | 1 + 8 files changed, 107 insertions(+), 52 deletions(-) (limited to 'arch/x86/kernel') diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt index 0cfb00fd86ff..4bbeca8483ed 100644 --- a/Documentation/kprobes.txt +++ b/Documentation/kprobes.txt @@ -22,8 +22,9 @@ Appendix B: The kprobes sysctl interface Kprobes enables you to dynamically break into any kernel routine and collect debugging and performance information non-disruptively. You -can trap at almost any kernel code address, specifying a handler +can trap at almost any kernel code address(*), specifying a handler routine to be invoked when the breakpoint is hit. +(*: some parts of the kernel code can not be trapped, see 1.5 Blacklist) There are currently three types of probes: kprobes, jprobes, and kretprobes (also called return probes). A kprobe can be inserted @@ -273,6 +274,19 @@ using one of the following techniques: or - Execute 'sysctl -w debug.kprobes_optimization=n' +1.5 Blacklist + +Kprobes can probe most of the kernel except itself. This means +that there are some functions where kprobes cannot probe. Probing +(trapping) such functions can cause a recursive trap (e.g. double +fault) or the nested probe handler may never be called. +Kprobes manages such functions as a blacklist. +If you want to add a function into the blacklist, you just need +to (1) include linux/kprobes.h and (2) use NOKPROBE_SYMBOL() macro +to specify a blacklisted function. +Kprobes checks the given probe address against the blacklist and +rejects registering it, if the given address is in the blacklist. + 2. Architectures Supported Kprobes, jprobes, and return probes are implemented on the following diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 4582e8e1cd1a..7730c1c5c83a 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -57,6 +57,12 @@ .long (from) - . ; \ .long (to) - . + 0x7ffffff0 ; \ .popsection + +# define _ASM_NOKPROBE(entry) \ + .pushsection "_kprobe_blacklist","aw" ; \ + _ASM_ALIGN ; \ + _ASM_PTR (entry); \ + .popsection #else # define _ASM_EXTABLE(from,to) \ " .pushsection \"__ex_table\",\"a\"\n" \ @@ -71,6 +77,7 @@ " .long (" #from ") - .\n" \ " .long (" #to ") - . + 0x7ffffff0\n" \ " .popsection\n" +/* For C file, we already have NOKPROBE_SYMBOL macro */ #endif #endif /* _ASM_X86_ASM_H */ diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 1b10af835c31..e136869ae42e 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -389,6 +390,9 @@ __visible struct pv_cpu_ops pv_cpu_ops = { .end_context_switch = paravirt_nop, }; +/* At this point, native_get_debugreg has a real function entry */ +NOKPROBE_SYMBOL(native_get_debugreg); + struct pv_apic_ops pv_apic_ops = { #ifdef CONFIG_X86_LOCAL_APIC .startup_ipi_hook = paravirt_nop, diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 146e4fffd710..40ceb3ceba79 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -109,6 +109,14 @@ #define BRANCH_PROFILE() #endif +#ifdef CONFIG_KPROBES +#define KPROBE_BLACKLIST() VMLINUX_SYMBOL(__start_kprobe_blacklist) = .; \ + *(_kprobe_blacklist) \ + VMLINUX_SYMBOL(__stop_kprobe_blacklist) = .; +#else +#define KPROBE_BLACKLIST() +#endif + #ifdef CONFIG_EVENT_TRACING #define FTRACE_EVENTS() . = ALIGN(8); \ VMLINUX_SYMBOL(__start_ftrace_events) = .; \ @@ -507,6 +515,7 @@ *(.init.rodata) \ FTRACE_EVENTS() \ TRACE_SYSCALLS() \ + KPROBE_BLACKLIST() \ MEM_DISCARD(init.rodata) \ CLK_OF_TABLES() \ RESERVEDMEM_OF_TABLES() \ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index ee7239ea1583..0300c0f5c88b 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -374,7 +374,9 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); /* Ignore/forbid kprobes attach on very low level functions marked by this attribute: */ #ifdef CONFIG_KPROBES # define __kprobes __attribute__((__section__(".kprobes.text"))) +# define nokprobe_inline __always_inline #else # define __kprobes +# define nokprobe_inline inline #endif #endif /* __LINUX_COMPILER_H */ diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index cdf9251f8249..e059507c465d 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -205,10 +205,10 @@ struct kretprobe_blackpoint { void *addr; }; -struct kprobe_blackpoint { - const char *name; +struct kprobe_blacklist_entry { + struct list_head list; unsigned long start_addr; - unsigned long range; + unsigned long end_addr; }; #ifdef CONFIG_KPROBES @@ -477,4 +477,18 @@ static inline int enable_jprobe(struct jprobe *jp) return enable_kprobe(&jp->kp); } +#ifdef CONFIG_KPROBES +/* + * Blacklist ganerating macro. Specify functions which is not probed + * by using this macro. + */ +#define __NOKPROBE_SYMBOL(fname) \ +static unsigned long __used \ + __attribute__((section("_kprobe_blacklist"))) \ + _kbl_addr_##fname = (unsigned long)fname; +#define NOKPROBE_SYMBOL(fname) __NOKPROBE_SYMBOL(fname) +#else +#define NOKPROBE_SYMBOL(fname) +#endif + #endif /* _LINUX_KPROBES_H */ diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 5b5ac76671e7..5ffc6875d2a7 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -86,18 +86,8 @@ static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash) return &(kretprobe_table_locks[hash].lock); } -/* - * Normally, functions that we'd want to prohibit kprobes in, are marked - * __kprobes. But, there are cases where such functions already belong to - * a different section (__sched for preempt_schedule) - * - * For such cases, we now have a blacklist - */ -static struct kprobe_blackpoint kprobe_blacklist[] = { - {"preempt_schedule",}, - {"native_get_debugreg",}, - {NULL} /* Terminator */ -}; +/* Blacklist -- list of struct kprobe_blacklist_entry */ +static LIST_HEAD(kprobe_blacklist); #ifdef __ARCH_WANT_KPROBES_INSN_SLOT /* @@ -1328,24 +1318,22 @@ bool __weak arch_within_kprobe_blacklist(unsigned long addr) addr < (unsigned long)__kprobes_text_end; } -static int __kprobes in_kprobes_functions(unsigned long addr) +static bool __kprobes within_kprobe_blacklist(unsigned long addr) { - struct kprobe_blackpoint *kb; + struct kprobe_blacklist_entry *ent; if (arch_within_kprobe_blacklist(addr)) - return -EINVAL; + return true; /* * If there exists a kprobe_blacklist, verify and * fail any probe registration in the prohibited area */ - for (kb = kprobe_blacklist; kb->name != NULL; kb++) { - if (kb->start_addr) { - if (addr >= kb->start_addr && - addr < (kb->start_addr + kb->range)) - return -EINVAL; - } + list_for_each_entry(ent, &kprobe_blacklist, list) { + if (addr >= ent->start_addr && addr < ent->end_addr) + return true; } - return 0; + + return false; } /* @@ -1436,7 +1424,7 @@ static __kprobes int check_kprobe_address_safe(struct kprobe *p, /* Ensure it is not in reserved area nor out of text */ if (!kernel_text_address((unsigned long) p->addr) || - in_kprobes_functions((unsigned long) p->addr) || + within_kprobe_blacklist((unsigned long) p->addr) || jump_label_text_reserved(p->addr, p->addr)) { ret = -EINVAL; goto out; @@ -2022,6 +2010,38 @@ void __kprobes dump_kprobe(struct kprobe *kp) kp->symbol_name, kp->addr, kp->offset); } +/* + * Lookup and populate the kprobe_blacklist. + * + * Unlike the kretprobe blacklist, we'll need to determine + * the range of addresses that belong to the said functions, + * since a kprobe need not necessarily be at the beginning + * of a function. + */ +static int __init populate_kprobe_blacklist(unsigned long *start, + unsigned long *end) +{ + unsigned long *iter; + struct kprobe_blacklist_entry *ent; + unsigned long offset = 0, size = 0; + + for (iter = start; iter < end; iter++) { + if (!kallsyms_lookup_size_offset(*iter, &size, &offset)) { + pr_err("Failed to find blacklist %p\n", (void *)*iter); + continue; + } + + ent = kmalloc(sizeof(*ent), GFP_KERNEL); + if (!ent) + return -ENOMEM; + ent->start_addr = *iter; + ent->end_addr = *iter + size; + INIT_LIST_HEAD(&ent->list); + list_add_tail(&ent->list, &kprobe_blacklist); + } + return 0; +} + /* Module notifier call back, checking kprobes on the module */ static int __kprobes kprobes_module_callback(struct notifier_block *nb, unsigned long val, void *data) @@ -2065,14 +2085,13 @@ static struct notifier_block kprobe_module_nb = { .priority = 0 }; +/* Markers of _kprobe_blacklist section */ +extern unsigned long __start_kprobe_blacklist[]; +extern unsigned long __stop_kprobe_blacklist[]; + static int __init init_kprobes(void) { int i, err = 0; - unsigned long offset = 0, size = 0; - char *modname, namebuf[KSYM_NAME_LEN]; - const char *symbol_name; - void *addr; - struct kprobe_blackpoint *kb; /* FIXME allocate the probe table, currently defined statically */ /* initialize all list heads */ @@ -2082,26 +2101,11 @@ static int __init init_kprobes(void) raw_spin_lock_init(&(kretprobe_table_locks[i].lock)); } - /* - * Lookup and populate the kprobe_blacklist. - * - * Unlike the kretprobe blacklist, we'll need to determine - * the range of addresses that belong to the said functions, - * since a kprobe need not necessarily be at the beginning - * of a function. - */ - for (kb = kprobe_blacklist; kb->name != NULL; kb++) { - kprobe_lookup_name(kb->name, addr); - if (!addr) - continue; - - kb->start_addr = (unsigned long)addr; - symbol_name = kallsyms_lookup(kb->start_addr, - &size, &offset, &modname, namebuf); - if (!symbol_name) - kb->range = 0; - else - kb->range = size; + err = populate_kprobe_blacklist(__start_kprobe_blacklist, + __stop_kprobe_blacklist); + if (err) { + pr_err("kprobes: failed to populate blacklist: %d\n", err); + pr_err("Please take care of using kprobes.\n"); } if (kretprobe_blacklist_size) { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 268a45ea238c..6863631e8cd0 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2804,6 +2804,7 @@ asmlinkage void __sched notrace preempt_schedule(void) barrier(); } while (need_resched()); } +NOKPROBE_SYMBOL(preempt_schedule); EXPORT_SYMBOL(preempt_schedule); #endif /* CONFIG_PREEMPT */ -- cgit v1.2.3-55-g7522 From 0f46efeb44e360b78f54a968b4d92e6877c35891 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 17 Apr 2014 17:17:12 +0900 Subject: kprobes, x86: Prohibit probing on debug_stack_*() Prohibit probing on debug_stack_reset and debug_stack_set_zero. Since the both functions are called from TRACE_IRQS_ON/OFF_DEBUG macros which run in int3 ist entry, probing it may cause a soft lockup. This happens when the kernel built with CONFIG_DYNAMIC_FTRACE=y and CONFIG_TRACE_IRQFLAGS=y. Signed-off-by: Masami Hiramatsu Reviewed-by: Steven Rostedt Cc: Borislav Petkov Cc: Jan Beulich Cc: Paul Gortmaker Cc: Seiji Aguchi Link: http://lkml.kernel.org/r/20140417081712.26341.32994.stgit@ltc230.yrl.intra.hitachi.co.jp Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a135239badb7..5af696dddd1d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -1160,6 +1161,7 @@ int is_debug_stack(unsigned long addr) (addr <= __get_cpu_var(debug_stack_addr) && addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); } +NOKPROBE_SYMBOL(is_debug_stack); DEFINE_PER_CPU(u32, debug_idt_ctr); @@ -1168,6 +1170,7 @@ void debug_stack_set_zero(void) this_cpu_inc(debug_idt_ctr); load_current_idt(); } +NOKPROBE_SYMBOL(debug_stack_set_zero); void debug_stack_reset(void) { @@ -1176,6 +1179,7 @@ void debug_stack_reset(void) if (this_cpu_dec_return(debug_idt_ctr) == 0) load_current_idt(); } +NOKPROBE_SYMBOL(debug_stack_reset); #else /* CONFIG_X86_64 */ -- cgit v1.2.3-55-g7522 From 8027197220e02d5cebbbfdff36c2827661fbc692 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 17 Apr 2014 17:17:19 +0900 Subject: kprobes, x86: Prohibit probing on native_set_debugreg()/load_idt() Since the kprobes uses do_debug for single stepping, functions called from do_debug() before notify_die() must not be probed. And also native_load_idt() is called from paranoid_exit when returning int3, this also must not be probed. Signed-off-by: Masami Hiramatsu Reviewed-by: Steven Rostedt Cc: Alok Kataria Cc: Chris Wright Cc: Jeremy Fitzhardinge Cc: Rusty Russell Cc: virtualization@lists.linux-foundation.org Link: http://lkml.kernel.org/r/20140417081719.26341.65542.stgit@ltc230.yrl.intra.hitachi.co.jp Signed-off-by: Ingo Molnar --- arch/x86/kernel/paravirt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index e136869ae42e..548d25f00c90 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -390,8 +390,10 @@ __visible struct pv_cpu_ops pv_cpu_ops = { .end_context_switch = paravirt_nop, }; -/* At this point, native_get_debugreg has a real function entry */ +/* At this point, native_get/set_debugreg has real function entries */ NOKPROBE_SYMBOL(native_get_debugreg); +NOKPROBE_SYMBOL(native_set_debugreg); +NOKPROBE_SYMBOL(native_load_idt); struct pv_apic_ops pv_apic_ops = { #ifdef CONFIG_X86_LOCAL_APIC -- cgit v1.2.3-55-g7522 From 6f6343f53d133bae516caf3d254bce37d8774625 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 17 Apr 2014 17:17:33 +0900 Subject: kprobes/x86: Call exception handlers directly from do_int3/do_debug To avoid a kernel crash by probing on lockdep code, call kprobe_int3_handler() and kprobe_debug_handler()(which was formerly called post_kprobe_handler()) directly from do_int3 and do_debug. Currently kprobes uses notify_die() to hook the int3/debug exceptoins. Since there is a locking code in notify_die, the lockdep code can be invoked. And because the lockdep involves printk() related things, theoretically, we need to prohibit probing on such code, which means much longer blacklist we'll have. Instead, hooking the int3/debug for kprobes before notify_die() can avoid this problem. Anyway, most of the int3 handlers in the kernel are already called from do_int3 directly, e.g. ftrace_int3_handler, poke_int3_handler, kgdb_ll_trap. Actually only kprobe_exceptions_notify is on the notifier_call_chain. Signed-off-by: Masami Hiramatsu Reviewed-by: Steven Rostedt Cc: Andrew Morton Cc: Borislav Petkov Cc: Jiri Kosina Cc: Jonathan Lebon Cc: Kees Cook Cc: Rusty Russell Cc: Seiji Aguchi Link: http://lkml.kernel.org/r/20140417081733.26341.24423.stgit@ltc230.yrl.intra.hitachi.co.jp Signed-off-by: Ingo Molnar --- arch/x86/include/asm/kprobes.h | 2 ++ arch/x86/kernel/kprobes/core.c | 24 +++--------------------- arch/x86/kernel/traps.c | 10 ++++++++++ 3 files changed, 15 insertions(+), 21 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 9454c167629f..53cdfb2857ab 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -116,4 +116,6 @@ struct kprobe_ctlblk { extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); extern int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); +extern int kprobe_int3_handler(struct pt_regs *regs); +extern int kprobe_debug_handler(struct pt_regs *regs); #endif /* _ASM_X86_KPROBES_H */ diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 7751b3dee53a..9b80aec1ea1a 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -559,7 +559,7 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled throughout this function. */ -static int __kprobes kprobe_handler(struct pt_regs *regs) +int __kprobes kprobe_int3_handler(struct pt_regs *regs) { kprobe_opcode_t *addr; struct kprobe *p; @@ -857,7 +857,7 @@ no_change: * Interrupts are disabled on entry as trap1 is an interrupt gate and they * remain disabled throughout this function. */ -static int __kprobes post_kprobe_handler(struct pt_regs *regs) +int __kprobes kprobe_debug_handler(struct pt_regs *regs) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -963,22 +963,7 @@ kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *d if (args->regs && user_mode_vm(args->regs)) return ret; - switch (val) { - case DIE_INT3: - if (kprobe_handler(args->regs)) - ret = NOTIFY_STOP; - break; - case DIE_DEBUG: - if (post_kprobe_handler(args->regs)) { - /* - * Reset the BS bit in dr6 (pointed by args->err) to - * denote completion of processing - */ - (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; - ret = NOTIFY_STOP; - } - break; - case DIE_GPF: + if (val == DIE_GPF) { /* * To be potentially processing a kprobe fault and to * trust the result from kprobe_running(), we have @@ -987,9 +972,6 @@ kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *d if (!preemptible() && kprobe_running() && kprobe_fault_handler(args->regs, args->trapnr)) ret = NOTIFY_STOP; - break; - default: - break; } return ret; } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 57409f6b8c62..e5d4a70814d7 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -334,6 +334,11 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co goto exit; #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ +#ifdef CONFIG_KPROBES + if (kprobe_int3_handler(regs)) + return; +#endif + if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, SIGTRAP) == NOTIFY_STOP) goto exit; @@ -440,6 +445,11 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) /* Store the virtualized DR6 value */ tsk->thread.debugreg6 = dr6; +#ifdef CONFIG_KPROBES + if (kprobe_debug_handler(regs)) + goto exit; +#endif + if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, error_code, SIGTRAP) == NOTIFY_STOP) goto exit; -- cgit v1.2.3-55-g7522 From ecd50f714c421c759354632dd00f70c718c95b10 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 17 Apr 2014 17:17:40 +0900 Subject: kprobes, x86: Call exception_enter after kprobes handled Move exception_enter() call after kprobes handler is done. Since the exception_enter() involves many other functions (like printk), it can cause recursive int3/break loop when kprobes probe such functions. Signed-off-by: Masami Hiramatsu Reviewed-by: Steven Rostedt Cc: Andrew Morton Cc: Borislav Petkov Cc: Jiri Kosina Cc: Kees Cook Cc: Rusty Russell Cc: Seiji Aguchi Link: http://lkml.kernel.org/r/20140417081740.26341.10894.stgit@ltc230.yrl.intra.hitachi.co.jp Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index e5d4a70814d7..ba9abe9cbce3 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -327,7 +327,6 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co if (poke_int3_handler(regs)) return; - prev_state = exception_enter(); #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, SIGTRAP) == NOTIFY_STOP) @@ -338,6 +337,7 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co if (kprobe_int3_handler(regs)) return; #endif + prev_state = exception_enter(); if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, SIGTRAP) == NOTIFY_STOP) @@ -415,8 +415,6 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) unsigned long dr6; int si_code; - prev_state = exception_enter(); - get_debugreg(dr6, 6); /* Filter out all the reserved bits which are preset to 1 */ @@ -449,6 +447,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) if (kprobe_debug_handler(regs)) goto exit; #endif + prev_state = exception_enter(); if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, error_code, SIGTRAP) == NOTIFY_STOP) -- cgit v1.2.3-55-g7522 From 7ec8a97a990da8e3ba87175a757731e17f74072e Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 17 Apr 2014 17:17:47 +0900 Subject: kprobes/x86: Allow probe on some kprobe preparation functions There is no need to prohibit probing on the functions used in preparation phase. Those are safely probed because those are not invoked from breakpoint/fault/debug handlers, there is no chance to cause recursive exceptions. Following functions are now removed from the kprobes blacklist: can_boost can_probe can_optimize is_IF_modifier __copy_instruction copy_optimized_instructions arch_copy_kprobe arch_prepare_kprobe arch_arm_kprobe arch_disarm_kprobe arch_remove_kprobe arch_trampoline_kprobe arch_prepare_kprobe_ftrace arch_prepare_optimized_kprobe arch_check_optimized_kprobe arch_within_optimized_kprobe __arch_remove_optimized_kprobe arch_remove_optimized_kprobe arch_optimize_kprobes arch_unoptimize_kprobe I tested those functions by putting kprobes on all instructions in the functions with the bash script I sent to LKML. See: https://lkml.org/lkml/2014/3/27/33 Signed-off-by: Masami Hiramatsu Cc: Jiri Kosina Cc: Jonathan Lebon Link: http://lkml.kernel.org/r/20140417081747.26341.36065.stgit@ltc230.yrl.intra.hitachi.co.jp Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/core.c | 20 ++++++++++---------- arch/x86/kernel/kprobes/ftrace.c | 2 +- arch/x86/kernel/kprobes/opt.c | 24 ++++++++++++------------ 3 files changed, 23 insertions(+), 23 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 9b80aec1ea1a..bd717137ae77 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -159,7 +159,7 @@ static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn) * Returns non-zero if opcode is boostable. * RIP relative instructions are adjusted at copying time in 64 bits mode */ -int __kprobes can_boost(kprobe_opcode_t *opcodes) +int can_boost(kprobe_opcode_t *opcodes) { kprobe_opcode_t opcode; kprobe_opcode_t *orig_opcodes = opcodes; @@ -260,7 +260,7 @@ unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long add } /* Check if paddr is at an instruction boundary */ -static int __kprobes can_probe(unsigned long paddr) +static int can_probe(unsigned long paddr) { unsigned long addr, __addr, offset = 0; struct insn insn; @@ -299,7 +299,7 @@ static int __kprobes can_probe(unsigned long paddr) /* * Returns non-zero if opcode modifies the interrupt flag. */ -static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) +static int is_IF_modifier(kprobe_opcode_t *insn) { /* Skip prefixes */ insn = skip_prefixes(insn); @@ -322,7 +322,7 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) * If not, return null. * Only applicable to 64-bit x86. */ -int __kprobes __copy_instruction(u8 *dest, u8 *src) +int __copy_instruction(u8 *dest, u8 *src) { struct insn insn; kprobe_opcode_t buf[MAX_INSN_SIZE]; @@ -365,7 +365,7 @@ int __kprobes __copy_instruction(u8 *dest, u8 *src) return insn.length; } -static int __kprobes arch_copy_kprobe(struct kprobe *p) +static int arch_copy_kprobe(struct kprobe *p) { int ret; @@ -392,7 +392,7 @@ static int __kprobes arch_copy_kprobe(struct kprobe *p) return 0; } -int __kprobes arch_prepare_kprobe(struct kprobe *p) +int arch_prepare_kprobe(struct kprobe *p) { if (alternatives_text_reserved(p->addr, p->addr)) return -EINVAL; @@ -407,17 +407,17 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) return arch_copy_kprobe(p); } -void __kprobes arch_arm_kprobe(struct kprobe *p) +void arch_arm_kprobe(struct kprobe *p) { text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1); } -void __kprobes arch_disarm_kprobe(struct kprobe *p) +void arch_disarm_kprobe(struct kprobe *p) { text_poke(p->addr, &p->opcode, 1); } -void __kprobes arch_remove_kprobe(struct kprobe *p) +void arch_remove_kprobe(struct kprobe *p) { if (p->ainsn.insn) { free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1)); @@ -1060,7 +1060,7 @@ int __init arch_init_kprobes(void) return 0; } -int __kprobes arch_trampoline_kprobe(struct kprobe *p) +int arch_trampoline_kprobe(struct kprobe *p) { return 0; } diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 23ef5c556f06..dcaa1310ccfd 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -85,7 +85,7 @@ end: local_irq_restore(flags); } -int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p) +int arch_prepare_kprobe_ftrace(struct kprobe *p) { p->ainsn.insn = NULL; p->ainsn.boostable = -1; diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 898160b42e43..fba7fb075e8a 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -77,7 +77,7 @@ found: } /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ -static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) +static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) { #ifdef CONFIG_X86_64 *addr++ = 0x48; @@ -169,7 +169,7 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_ local_irq_restore(flags); } -static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) +static int copy_optimized_instructions(u8 *dest, u8 *src) { int len = 0, ret; @@ -189,7 +189,7 @@ static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) } /* Check whether insn is indirect jump */ -static int __kprobes insn_is_indirect_jump(struct insn *insn) +static int insn_is_indirect_jump(struct insn *insn) { return ((insn->opcode.bytes[0] == 0xff && (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ @@ -224,7 +224,7 @@ static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) } /* Decode whole function to ensure any instructions don't jump into target */ -static int __kprobes can_optimize(unsigned long paddr) +static int can_optimize(unsigned long paddr) { unsigned long addr, size = 0, offset = 0; struct insn insn; @@ -275,7 +275,7 @@ static int __kprobes can_optimize(unsigned long paddr) } /* Check optimized_kprobe can actually be optimized. */ -int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op) +int arch_check_optimized_kprobe(struct optimized_kprobe *op) { int i; struct kprobe *p; @@ -290,15 +290,15 @@ int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op) } /* Check the addr is within the optimized instructions. */ -int __kprobes -arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr) +int arch_within_optimized_kprobe(struct optimized_kprobe *op, + unsigned long addr) { return ((unsigned long)op->kp.addr <= addr && (unsigned long)op->kp.addr + op->optinsn.size > addr); } /* Free optimized instruction slot */ -static __kprobes +static void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) { if (op->optinsn.insn) { @@ -308,7 +308,7 @@ void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) } } -void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op) +void arch_remove_optimized_kprobe(struct optimized_kprobe *op) { __arch_remove_optimized_kprobe(op, 1); } @@ -318,7 +318,7 @@ void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op) * Target instructions MUST be relocatable (checked inside) * This is called when new aggr(opt)probe is allocated or reused. */ -int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) +int arch_prepare_optimized_kprobe(struct optimized_kprobe *op) { u8 *buf; int ret; @@ -372,7 +372,7 @@ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) * Replace breakpoints (int3) with relative jumps. * Caller must call with locking kprobe_mutex and text_mutex. */ -void __kprobes arch_optimize_kprobes(struct list_head *oplist) +void arch_optimize_kprobes(struct list_head *oplist) { struct optimized_kprobe *op, *tmp; u8 insn_buf[RELATIVEJUMP_SIZE]; @@ -398,7 +398,7 @@ void __kprobes arch_optimize_kprobes(struct list_head *oplist) } /* Replace a relative jump with a breakpoint (int3). */ -void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op) +void arch_unoptimize_kprobe(struct optimized_kprobe *op) { u8 insn_buf[RELATIVEJUMP_SIZE]; -- cgit v1.2.3-55-g7522 From 9c54b6164eeb292a0eac86c6913bd8daaff35e62 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 17 Apr 2014 17:18:07 +0900 Subject: kprobes, x86: Allow kprobes on text_poke/hw_breakpoint Allow kprobes on text_poke/hw_breakpoint because those are not related to the critical int3-debug recursive path of kprobes at this moment. Signed-off-by: Masami Hiramatsu Reviewed-by: Steven Rostedt Cc: Andrew Morton Cc: Borislav Petkov Cc: Fengguang Wu Cc: Jiri Kosina Cc: Oleg Nesterov Cc: Paul Gortmaker Link: http://lkml.kernel.org/r/20140417081807.26341.73219.stgit@ltc230.yrl.intra.hitachi.co.jp Signed-off-by: Ingo Molnar --- arch/x86/kernel/alternative.c | 3 +-- arch/x86/kernel/hw_breakpoint.c | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index df94598ad05a..703130f469ec 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include @@ -551,7 +550,7 @@ void *__init_or_module text_poke_early(void *addr, const void *opcode, * * Note: Must be called under text_mutex. */ -void *__kprobes text_poke(void *addr, const void *opcode, size_t len) +void *text_poke(void *addr, const void *opcode, size_t len) { unsigned long flags; char *vaddr; diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index a67b47c31314..5f9cf20cdb68 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -424,7 +423,7 @@ EXPORT_SYMBOL_GPL(hw_breakpoint_restore); * NOTIFY_STOP returned for all other cases * */ -static int __kprobes hw_breakpoint_handler(struct die_args *args) +static int hw_breakpoint_handler(struct die_args *args) { int i, cpu, rc = NOTIFY_STOP; struct perf_event *bp; @@ -511,7 +510,7 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) /* * Handle debug exception notifications. */ -int __kprobes hw_breakpoint_exceptions_notify( +int hw_breakpoint_exceptions_notify( struct notifier_block *unused, unsigned long val, void *data) { if (val != DIE_DEBUG) -- cgit v1.2.3-55-g7522 From 9326638cbee2d36b051ed2a69f3e4e107e5f86bd Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 17 Apr 2014 17:18:14 +0900 Subject: kprobes, x86: Use NOKPROBE_SYMBOL() instead of __kprobes annotation Use NOKPROBE_SYMBOL macro for protecting functions from kprobes instead of __kprobes annotation under arch/x86. This applies nokprobe_inline annotation for some cases, because NOKPROBE_SYMBOL() will inhibit inlining by referring the symbol address. This just folds a bunch of previous NOKPROBE_SYMBOL() cleanup patches for x86 to one patch. Signed-off-by: Masami Hiramatsu Link: http://lkml.kernel.org/r/20140417081814.26341.51656.stgit@ltc230.yrl.intra.hitachi.co.jp Cc: Andrew Morton Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Dave Hansen Cc: Fernando Luis Vázquez Cao Cc: Gleb Natapov Cc: Jason Wang Cc: Jesper Nilsson Cc: Jiri Kosina Cc: Jiri Olsa Cc: Jiri Slaby Cc: Johannes Weiner Cc: Jonathan Lebon Cc: Kees Cook Cc: Matt Fleming Cc: Michel Lespinasse Cc: Paolo Bonzini Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Paul Mackerras Cc: Raghavendra K T Cc: Rusty Russell Cc: Seiji Aguchi Cc: Srivatsa Vaddagiri Cc: Tejun Heo Cc: Vineet Gupta Signed-off-by: Ingo Molnar --- arch/x86/include/asm/traps.h | 2 +- arch/x86/kernel/apic/hw_nmi.c | 3 +- arch/x86/kernel/cpu/perf_event.c | 3 +- arch/x86/kernel/cpu/perf_event_amd_ibs.c | 3 +- arch/x86/kernel/dumpstack.c | 9 ++-- arch/x86/kernel/kprobes/core.c | 77 ++++++++++++++++++++------------ arch/x86/kernel/kprobes/ftrace.c | 15 ++++--- arch/x86/kernel/kprobes/opt.c | 8 ++-- arch/x86/kernel/kvm.c | 4 +- arch/x86/kernel/nmi.c | 18 +++++--- arch/x86/kernel/traps.c | 20 ++++++--- arch/x86/mm/fault.c | 29 +++++++----- 12 files changed, 122 insertions(+), 69 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 58d66fe06b61..ca32508c58c5 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -68,7 +68,7 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long); dotraplinkage void do_stack_segment(struct pt_regs *, long); #ifdef CONFIG_X86_64 dotraplinkage void do_double_fault(struct pt_regs *, long); -asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *); +asmlinkage struct pt_regs *sync_regs(struct pt_regs *); #endif dotraplinkage void do_general_protection(struct pt_regs *, long); dotraplinkage void do_page_fault(struct pt_regs *, unsigned long); diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index a698d7165c96..73eb5b336f63 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -60,7 +60,7 @@ void arch_trigger_all_cpu_backtrace(void) smp_mb__after_clear_bit(); } -static int __kprobes +static int arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs) { int cpu; @@ -80,6 +80,7 @@ arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs) return NMI_DONE; } +NOKPROBE_SYMBOL(arch_trigger_all_cpu_backtrace_handler); static int __init register_trigger_all_cpu_backtrace(void) { diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index ae407f7226c8..5fc8771979ba 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1292,7 +1292,7 @@ void perf_events_lapic_init(void) apic_write(APIC_LVTPC, APIC_DM_NMI); } -static int __kprobes +static int perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) { u64 start_clock; @@ -1310,6 +1310,7 @@ perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) return ret; } +NOKPROBE_SYMBOL(perf_event_nmi_handler); struct event_constraint emptyconstraint; struct event_constraint unconstrained; diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index 4c36bbe3173a..cbb1be3ed9e4 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c @@ -593,7 +593,7 @@ out: return 1; } -static int __kprobes +static int perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs) { int handled = 0; @@ -606,6 +606,7 @@ perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs) return handled; } +NOKPROBE_SYMBOL(perf_ibs_nmi_handler); static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) { diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index d9c12d3022a7..b74ebc7c4402 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -200,7 +200,7 @@ static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; static int die_owner = -1; static unsigned int die_nest_count; -unsigned __kprobes long oops_begin(void) +unsigned long oops_begin(void) { int cpu; unsigned long flags; @@ -223,8 +223,9 @@ unsigned __kprobes long oops_begin(void) return flags; } EXPORT_SYMBOL_GPL(oops_begin); +NOKPROBE_SYMBOL(oops_begin); -void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) +void oops_end(unsigned long flags, struct pt_regs *regs, int signr) { if (regs && kexec_should_crash(current)) crash_kexec(regs); @@ -247,8 +248,9 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) panic("Fatal exception"); do_exit(signr); } +NOKPROBE_SYMBOL(oops_end); -int __kprobes __die(const char *str, struct pt_regs *regs, long err) +int __die(const char *str, struct pt_regs *regs, long err) { #ifdef CONFIG_X86_32 unsigned short ss; @@ -291,6 +293,7 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) #endif return 0; } +NOKPROBE_SYMBOL(__die); /* * This is gone through when something in the kernel has done something bad diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index bd717137ae77..7596df664901 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -112,7 +112,8 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = { const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); -static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) +static nokprobe_inline void +__synthesize_relative_insn(void *from, void *to, u8 op) { struct __arch_relative_insn { u8 op; @@ -125,21 +126,23 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) } /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ -void __kprobes synthesize_reljump(void *from, void *to) +void synthesize_reljump(void *from, void *to) { __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE); } +NOKPROBE_SYMBOL(synthesize_reljump); /* Insert a call instruction at address 'from', which calls address 'to'.*/ -void __kprobes synthesize_relcall(void *from, void *to) +void synthesize_relcall(void *from, void *to) { __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE); } +NOKPROBE_SYMBOL(synthesize_relcall); /* * Skip the prefixes of the instruction. */ -static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn) +static kprobe_opcode_t *skip_prefixes(kprobe_opcode_t *insn) { insn_attr_t attr; @@ -154,6 +157,7 @@ static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn) #endif return insn; } +NOKPROBE_SYMBOL(skip_prefixes); /* * Returns non-zero if opcode is boostable. @@ -425,7 +429,8 @@ void arch_remove_kprobe(struct kprobe *p) } } -static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) +static nokprobe_inline void +save_previous_kprobe(struct kprobe_ctlblk *kcb) { kcb->prev_kprobe.kp = kprobe_running(); kcb->prev_kprobe.status = kcb->kprobe_status; @@ -433,7 +438,8 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) kcb->prev_kprobe.saved_flags = kcb->kprobe_saved_flags; } -static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) +static nokprobe_inline void +restore_previous_kprobe(struct kprobe_ctlblk *kcb) { __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); kcb->kprobe_status = kcb->prev_kprobe.status; @@ -441,8 +447,9 @@ static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags; } -static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) +static nokprobe_inline void +set_current_kprobe(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) { __this_cpu_write(current_kprobe, p); kcb->kprobe_saved_flags = kcb->kprobe_old_flags @@ -451,7 +458,7 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF; } -static void __kprobes clear_btf(void) +static nokprobe_inline void clear_btf(void) { if (test_thread_flag(TIF_BLOCKSTEP)) { unsigned long debugctl = get_debugctlmsr(); @@ -461,7 +468,7 @@ static void __kprobes clear_btf(void) } } -static void __kprobes restore_btf(void) +static nokprobe_inline void restore_btf(void) { if (test_thread_flag(TIF_BLOCKSTEP)) { unsigned long debugctl = get_debugctlmsr(); @@ -471,8 +478,7 @@ static void __kprobes restore_btf(void) } } -void __kprobes -arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) +void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { unsigned long *sara = stack_addr(regs); @@ -481,9 +487,10 @@ arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) /* Replace the return addr with trampoline addr */ *sara = (unsigned long) &kretprobe_trampoline; } +NOKPROBE_SYMBOL(arch_prepare_kretprobe); -static void __kprobes -setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, int reenter) +static void setup_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb, int reenter) { if (setup_detour_execution(p, regs, reenter)) return; @@ -519,14 +526,15 @@ setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *k else regs->ip = (unsigned long)p->ainsn.insn; } +NOKPROBE_SYMBOL(setup_singlestep); /* * We have reentered the kprobe_handler(), since another probe was hit while * within the handler. We save the original kprobes variables and just single * step on the instruction of the new probe without calling any user handlers. */ -static int __kprobes -reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) +static int reenter_kprobe(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) { switch (kcb->kprobe_status) { case KPROBE_HIT_SSDONE: @@ -554,12 +562,13 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb return 1; } +NOKPROBE_SYMBOL(reenter_kprobe); /* * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled throughout this function. */ -int __kprobes kprobe_int3_handler(struct pt_regs *regs) +int kprobe_int3_handler(struct pt_regs *regs) { kprobe_opcode_t *addr; struct kprobe *p; @@ -622,12 +631,13 @@ int __kprobes kprobe_int3_handler(struct pt_regs *regs) preempt_enable_no_resched(); return 0; } +NOKPROBE_SYMBOL(kprobe_int3_handler); /* * When a retprobed function returns, this code saves registers and * calls trampoline_handler() runs, which calls the kretprobe's handler. */ -static void __used __kprobes kretprobe_trampoline_holder(void) +static void __used kretprobe_trampoline_holder(void) { asm volatile ( ".global kretprobe_trampoline\n" @@ -658,11 +668,13 @@ static void __used __kprobes kretprobe_trampoline_holder(void) #endif " ret\n"); } +NOKPROBE_SYMBOL(kretprobe_trampoline_holder); +NOKPROBE_SYMBOL(kretprobe_trampoline); /* * Called from kretprobe_trampoline */ -__visible __used __kprobes void *trampoline_handler(struct pt_regs *regs) +__visible __used void *trampoline_handler(struct pt_regs *regs) { struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; @@ -748,6 +760,7 @@ __visible __used __kprobes void *trampoline_handler(struct pt_regs *regs) } return (void *)orig_ret_address; } +NOKPROBE_SYMBOL(trampoline_handler); /* * Called after single-stepping. p->addr is the address of the @@ -776,8 +789,8 @@ __visible __used __kprobes void *trampoline_handler(struct pt_regs *regs) * jump instruction after the copied instruction, that jumps to the next * instruction after the probepoint. */ -static void __kprobes -resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) +static void resume_execution(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) { unsigned long *tos = stack_addr(regs); unsigned long copy_ip = (unsigned long)p->ainsn.insn; @@ -852,12 +865,13 @@ resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *k no_change: restore_btf(); } +NOKPROBE_SYMBOL(resume_execution); /* * Interrupts are disabled on entry as trap1 is an interrupt gate and they * remain disabled throughout this function. */ -int __kprobes kprobe_debug_handler(struct pt_regs *regs) +int kprobe_debug_handler(struct pt_regs *regs) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -892,8 +906,9 @@ out: return 1; } +NOKPROBE_SYMBOL(kprobe_debug_handler); -int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) +int kprobe_fault_handler(struct pt_regs *regs, int trapnr) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -950,12 +965,13 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) return 0; } +NOKPROBE_SYMBOL(kprobe_fault_handler); /* * Wrapper routine for handling exceptions. */ -int __kprobes -kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) +int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, + void *data) { struct die_args *args = data; int ret = NOTIFY_DONE; @@ -975,8 +991,9 @@ kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *d } return ret; } +NOKPROBE_SYMBOL(kprobe_exceptions_notify); -int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct jprobe *jp = container_of(p, struct jprobe, kp); unsigned long addr; @@ -1000,8 +1017,9 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) regs->ip = (unsigned long)(jp->entry); return 1; } +NOKPROBE_SYMBOL(setjmp_pre_handler); -void __kprobes jprobe_return(void) +void jprobe_return(void) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -1017,8 +1035,10 @@ void __kprobes jprobe_return(void) " nop \n"::"b" (kcb->jprobe_saved_sp):"memory"); } +NOKPROBE_SYMBOL(jprobe_return); +NOKPROBE_SYMBOL(jprobe_return_end); -int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); u8 *addr = (u8 *) (regs->ip - 1); @@ -1046,6 +1066,7 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) } return 0; } +NOKPROBE_SYMBOL(longjmp_break_handler); bool arch_within_kprobe_blacklist(unsigned long addr) { diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index dcaa1310ccfd..717b02a22e67 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -25,8 +25,9 @@ #include "common.h" -static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) +static nokprobe_inline +int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) { /* * Emulate singlestep (and also recover regs->ip) @@ -41,18 +42,19 @@ static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, return 1; } -int __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) +int skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) { if (kprobe_ftrace(p)) return __skip_singlestep(p, regs, kcb); else return 0; } +NOKPROBE_SYMBOL(skip_singlestep); /* Ftrace callback handler for kprobes */ -void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct pt_regs *regs) +void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct pt_regs *regs) { struct kprobe *p; struct kprobe_ctlblk *kcb; @@ -84,6 +86,7 @@ void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, end: local_irq_restore(flags); } +NOKPROBE_SYMBOL(kprobe_ftrace_handler); int arch_prepare_kprobe_ftrace(struct kprobe *p) { diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index fba7fb075e8a..f304773285ae 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -138,7 +138,8 @@ asm ( #define INT3_SIZE sizeof(kprobe_opcode_t) /* Optimized kprobe call back function: called from optinsn */ -static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) +static void +optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); unsigned long flags; @@ -168,6 +169,7 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_ } local_irq_restore(flags); } +NOKPROBE_SYMBOL(optimized_callback); static int copy_optimized_instructions(u8 *dest, u8 *src) { @@ -424,8 +426,7 @@ extern void arch_unoptimize_kprobes(struct list_head *oplist, } } -int __kprobes -setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) +int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) { struct optimized_kprobe *op; @@ -441,3 +442,4 @@ setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) } return 0; } +NOKPROBE_SYMBOL(setup_detour_execution); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 0331cb389d68..d81abcbfe501 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -251,8 +251,9 @@ u32 kvm_read_and_reset_pf_reason(void) return reason; } EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason); +NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason); -dotraplinkage void __kprobes +dotraplinkage void do_async_page_fault(struct pt_regs *regs, unsigned long error_code) { enum ctx_state prev_state; @@ -276,6 +277,7 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code) break; } } +NOKPROBE_SYMBOL(do_async_page_fault); static void __init paravirt_ops_setup(void) { diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index b4872b999a71..c3e985d1751c 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -110,7 +110,7 @@ static void nmi_max_handler(struct irq_work *w) a->handler, whole_msecs, decimal_msecs); } -static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) +static int nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) { struct nmi_desc *desc = nmi_to_desc(type); struct nmiaction *a; @@ -146,6 +146,7 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2 /* return total number of NMI events handled */ return handled; } +NOKPROBE_SYMBOL(nmi_handle); int __register_nmi_handler(unsigned int type, struct nmiaction *action) { @@ -208,7 +209,7 @@ void unregister_nmi_handler(unsigned int type, const char *name) } EXPORT_SYMBOL_GPL(unregister_nmi_handler); -static __kprobes void +static void pci_serr_error(unsigned char reason, struct pt_regs *regs) { /* check to see if anyone registered against these types of errors */ @@ -238,8 +239,9 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs) reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR; outb(reason, NMI_REASON_PORT); } +NOKPROBE_SYMBOL(pci_serr_error); -static __kprobes void +static void io_check_error(unsigned char reason, struct pt_regs *regs) { unsigned long i; @@ -269,8 +271,9 @@ io_check_error(unsigned char reason, struct pt_regs *regs) reason &= ~NMI_REASON_CLEAR_IOCHK; outb(reason, NMI_REASON_PORT); } +NOKPROBE_SYMBOL(io_check_error); -static __kprobes void +static void unknown_nmi_error(unsigned char reason, struct pt_regs *regs) { int handled; @@ -298,11 +301,12 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) pr_emerg("Dazed and confused, but trying to continue\n"); } +NOKPROBE_SYMBOL(unknown_nmi_error); static DEFINE_PER_CPU(bool, swallow_nmi); static DEFINE_PER_CPU(unsigned long, last_nmi_rip); -static __kprobes void default_do_nmi(struct pt_regs *regs) +static void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; int handled; @@ -401,6 +405,7 @@ static __kprobes void default_do_nmi(struct pt_regs *regs) else unknown_nmi_error(reason, regs); } +NOKPROBE_SYMBOL(default_do_nmi); /* * NMIs can hit breakpoints which will cause it to lose its @@ -520,7 +525,7 @@ static inline void nmi_nesting_postprocess(void) } #endif -dotraplinkage notrace __kprobes void +dotraplinkage notrace void do_nmi(struct pt_regs *regs, long error_code) { nmi_nesting_preprocess(regs); @@ -537,6 +542,7 @@ do_nmi(struct pt_regs *regs, long error_code) /* On i386, may loop back to preprocess */ nmi_nesting_postprocess(); } +NOKPROBE_SYMBOL(do_nmi); void stop_nmi(void) { diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ba9abe9cbce3..3c8ae7d83820 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -106,7 +106,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) preempt_count_dec(); } -static int __kprobes +static nokprobe_inline int do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, struct pt_regs *regs, long error_code) { @@ -136,7 +136,7 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, return -1; } -static void __kprobes +static void do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, long error_code, siginfo_t *info) { @@ -173,6 +173,7 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, else force_sig(signr, tsk); } +NOKPROBE_SYMBOL(do_trap); #define DO_ERROR(trapnr, signr, str, name) \ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ @@ -263,7 +264,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) } #endif -dotraplinkage void __kprobes +dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code) { struct task_struct *tsk; @@ -309,9 +310,10 @@ do_general_protection(struct pt_regs *regs, long error_code) exit: exception_exit(prev_state); } +NOKPROBE_SYMBOL(do_general_protection); /* May run on IST stack. */ -dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code) +dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) { enum ctx_state prev_state; @@ -355,6 +357,7 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co exit: exception_exit(prev_state); } +NOKPROBE_SYMBOL(do_int3); #ifdef CONFIG_X86_64 /* @@ -362,7 +365,7 @@ exit: * for scheduling or signal handling. The actual stack switch is done in * entry.S */ -asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) +asmlinkage struct pt_regs *sync_regs(struct pt_regs *eregs) { struct pt_regs *regs = eregs; /* Did already sync */ @@ -381,6 +384,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) *regs = *eregs; return regs; } +NOKPROBE_SYMBOL(sync_regs); #endif /* @@ -407,7 +411,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) * * May run on IST stack. */ -dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) +dotraplinkage void do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; enum ctx_state prev_state; @@ -491,6 +495,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) exit: exception_exit(prev_state); } +NOKPROBE_SYMBOL(do_debug); /* * Note that we play around with the 'TS' bit in an attempt to get @@ -662,7 +667,7 @@ void math_state_restore(void) } EXPORT_SYMBOL_GPL(math_state_restore); -dotraplinkage void __kprobes +dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code) { enum ctx_state prev_state; @@ -688,6 +693,7 @@ do_device_not_available(struct pt_regs *regs, long error_code) #endif exception_exit(prev_state); } +NOKPROBE_SYMBOL(do_device_not_available); #ifdef CONFIG_X86_32 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 8e5722992677..f83bd0de5eef 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -8,7 +8,7 @@ #include /* oops_begin/end, ... */ #include /* search_exception_table */ #include /* max_low_pfn */ -#include /* __kprobes, ... */ +#include /* NOKPROBE_SYMBOL, ... */ #include /* kmmio_handler, ... */ #include /* perf_sw_event */ #include /* hstate_index_to_shift */ @@ -45,7 +45,7 @@ enum x86_pf_error_code { * Returns 0 if mmiotrace is disabled, or if the fault is not * handled by mmiotrace: */ -static inline int __kprobes +static nokprobe_inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) { if (unlikely(is_kmmio_active())) @@ -54,7 +54,7 @@ kmmio_fault(struct pt_regs *regs, unsigned long addr) return 0; } -static inline int __kprobes kprobes_fault(struct pt_regs *regs) +static nokprobe_inline int kprobes_fault(struct pt_regs *regs) { int ret = 0; @@ -261,7 +261,7 @@ void vmalloc_sync_all(void) * * Handle a fault on the vmalloc or module mapping area */ -static noinline __kprobes int vmalloc_fault(unsigned long address) +static noinline int vmalloc_fault(unsigned long address) { unsigned long pgd_paddr; pmd_t *pmd_k; @@ -291,6 +291,7 @@ static noinline __kprobes int vmalloc_fault(unsigned long address) return 0; } +NOKPROBE_SYMBOL(vmalloc_fault); /* * Did it hit the DOS screen memory VA from vm86 mode? @@ -358,7 +359,7 @@ void vmalloc_sync_all(void) * * This assumes no large pages in there. */ -static noinline __kprobes int vmalloc_fault(unsigned long address) +static noinline int vmalloc_fault(unsigned long address) { pgd_t *pgd, *pgd_ref; pud_t *pud, *pud_ref; @@ -425,6 +426,7 @@ static noinline __kprobes int vmalloc_fault(unsigned long address) return 0; } +NOKPROBE_SYMBOL(vmalloc_fault); #ifdef CONFIG_CPU_SUP_AMD static const char errata93_warning[] = @@ -927,7 +929,7 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte) * There are no security implications to leaving a stale TLB when * increasing the permissions on a page. */ -static noinline __kprobes int +static noinline int spurious_fault(unsigned long error_code, unsigned long address) { pgd_t *pgd; @@ -975,6 +977,7 @@ spurious_fault(unsigned long error_code, unsigned long address) return ret; } +NOKPROBE_SYMBOL(spurious_fault); int show_unhandled_signals = 1; @@ -1030,7 +1033,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs) * {,trace_}do_page_fault() have notrace on. Having this an actual function * guarantees there's a function trace entry. */ -static void __kprobes noinline +static noinline void __do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address) { @@ -1253,8 +1256,9 @@ good_area: up_read(&mm->mmap_sem); } +NOKPROBE_SYMBOL(__do_page_fault); -dotraplinkage void __kprobes notrace +dotraplinkage void notrace do_page_fault(struct pt_regs *regs, unsigned long error_code) { unsigned long address = read_cr2(); /* Get the faulting address */ @@ -1272,10 +1276,12 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) __do_page_fault(regs, error_code, address); exception_exit(prev_state); } +NOKPROBE_SYMBOL(do_page_fault); #ifdef CONFIG_TRACING -static void trace_page_fault_entries(unsigned long address, struct pt_regs *regs, - unsigned long error_code) +static nokprobe_inline void +trace_page_fault_entries(unsigned long address, struct pt_regs *regs, + unsigned long error_code) { if (user_mode(regs)) trace_page_fault_user(address, regs, error_code); @@ -1283,7 +1289,7 @@ static void trace_page_fault_entries(unsigned long address, struct pt_regs *regs trace_page_fault_kernel(address, regs, error_code); } -dotraplinkage void __kprobes notrace +dotraplinkage void notrace trace_do_page_fault(struct pt_regs *regs, unsigned long error_code) { /* @@ -1300,4 +1306,5 @@ trace_do_page_fault(struct pt_regs *regs, unsigned long error_code) __do_page_fault(regs, error_code, address); exception_exit(prev_state); } +NOKPROBE_SYMBOL(trace_do_page_fault); #endif /* CONFIG_TRACING */ -- cgit v1.2.3-55-g7522 From 250bbd12c2fe1221ec96d8087d63e982d4f2180a Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Thu, 24 Apr 2014 19:08:24 +0200 Subject: uprobes/x86: Refuse to attach uprobe to "word-sized" branch insns All branch insns on x86 can be prefixed with the operand-size override prefix, 0x66. It was only ever useful for performing jumps to 32-bit offsets in 16-bit code segments. In 32-bit code, such instructions are useless since they cause IP truncation to 16 bits, and in case of call insns, they save only 16 bits of return address and misalign the stack pointer as a "bonus". In 64-bit code, such instructions are treated differently by Intel and AMD CPUs: Intel ignores the prefix altogether, AMD treats them the same as in 32-bit mode. Before this patch, the emulation code would execute the instructions as if they have no 0x66 prefix. With this patch, we refuse to attach uprobes to such insns. Signed-off-by: Denys Vlasenko Acked-by: Jim Keniston Acked-by: Masami Hiramatsu Signed-off-by: Oleg Nesterov --- arch/x86/kernel/uprobes.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index ace22916ade3..3cf24a218196 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -583,6 +583,7 @@ static struct uprobe_xol_ops branch_xol_ops = { static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) { u8 opc1 = OPCODE1(insn); + int i; /* has the side-effect of processing the entire instruction */ insn_get_length(insn); @@ -612,6 +613,16 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) return -ENOSYS; } + /* + * 16-bit overrides such as CALLW (66 e8 nn nn) are not supported. + * Intel and AMD behavior differ in 64-bit mode: Intel ignores 66 prefix. + * No one uses these insns, reject any branch insns with such prefix. + */ + for (i = 0; i < insn->prefixes.nbytes; i++) { + if (insn->prefixes.bytes[i] == 0x66) + return -ENOTSUPP; + } + auprobe->branch.opc1 = opc1; auprobe->branch.ilen = insn->length; auprobe->branch.offs = insn->immediate.value; -- cgit v1.2.3-55-g7522 From 73175d0d19657ec132cc24e8cf0e341e73c54868 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 19 Apr 2014 12:34:02 +0200 Subject: uprobes/x86: Add uprobe_init_insn(), kill validate_insn_{32,64}bits() validate_insn_32bits() and validate_insn_64bits() are very similar, turn them into the single uprobe_init_insn() which has the additional "bool x86_64" argument which can be passed to insn_init() and used to choose between good_insns_64/good_insns_32. Also kill UPROBE_FIX_NONE, it has no users. Note: the current code doesn't use ifdef's consistently, good_insns_64 depends on CONFIG_X86_64 but good_insns_32 is unconditional. This patch removes ifdef around good_insns_64, we will add it back later along with the similar one for good_insns_32. Signed-off-by: Oleg Nesterov Reviewed-by: Jim Keniston Acked-by: Srikar Dronamraju --- arch/x86/kernel/uprobes.c | 45 +++++++++++++-------------------------------- 1 file changed, 13 insertions(+), 32 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 3cf24a218196..b4aff6a70f4d 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -32,9 +32,6 @@ /* Post-execution fixups. */ -/* No fixup needed */ -#define UPROBE_FIX_NONE 0x0 - /* Adjust IP back to vicinity of actual insn */ #define UPROBE_FIX_IP 0x1 @@ -114,7 +111,6 @@ static volatile u32 good_2byte_insns[256 / 32] = { /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; -#ifdef CONFIG_X86_64 /* Good-instruction tables for 64-bit apps */ static volatile u32 good_insns_64[256 / 32] = { /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ @@ -138,7 +134,6 @@ static volatile u32 good_insns_64[256 / 32] = { /* ---------------------------------------------- */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; -#endif #undef W /* @@ -209,16 +204,22 @@ static bool is_prefix_bad(struct insn *insn) return false; } -static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn) +static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool x86_64) { - insn_init(insn, auprobe->insn, false); + u32 volatile *good_insns; + + insn_init(insn, auprobe->insn, x86_64); - /* Skip good instruction prefixes; reject "bad" ones. */ insn_get_opcode(insn); if (is_prefix_bad(insn)) return -ENOTSUPP; - if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32)) + if (x86_64) + good_insns = good_insns_64; + else + good_insns = good_insns_32; + + if (test_bit(OPCODE1(insn), (unsigned long *)good_insns)) return 0; if (insn->opcode.nbytes == 2) { @@ -355,30 +356,10 @@ handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long * } } -static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn) -{ - insn_init(insn, auprobe->insn, true); - - /* Skip good instruction prefixes; reject "bad" ones. */ - insn_get_opcode(insn); - if (is_prefix_bad(insn)) - return -ENOTSUPP; - - if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64)) - return 0; - - if (insn->opcode.nbytes == 2) { - if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns)) - return 0; - } - return -ENOTSUPP; -} - static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) { - if (mm->context.ia32_compat) - return validate_insn_32bits(auprobe, insn); - return validate_insn_64bits(auprobe, insn); + bool x86_64 = !mm->context.ia32_compat; + return uprobe_init_insn(auprobe, insn, x86_64); } #else /* 32-bit: */ /* @@ -398,7 +379,7 @@ static void handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs * static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) { - return validate_insn_32bits(auprobe, insn); + return uprobe_init_insn(auprobe, insn, false); } #endif /* CONFIG_X86_64 */ -- cgit v1.2.3-55-g7522 From 2ae1f49ae1978fedb6ad607e1f8b084aa9752f95 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 19 Apr 2014 14:03:05 +0200 Subject: uprobes/x86: Add is_64bit_mm(), kill validate_insn_bits() 1. Extract the ->ia32_compat check from 64bit validate_insn_bits() into the new helper, is_64bit_mm(), it will have more users. TODO: this checks is actually wrong if mm owner is X32 task, we need another fix which changes set_personality_ia32(). TODO: even worse, the whole 64-or-32-bit logic is very broken and the fix is not simple, we need the nontrivial changes in the core uprobes code. 2. Kill validate_insn_bits() and change its single caller to use uprobe_init_insn(is_64bit_mm(mm). Signed-off-by: Oleg Nesterov Reviewed-by: Jim Keniston Acked-by: Srikar Dronamraju --- arch/x86/kernel/uprobes.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index b4aff6a70f4d..b3b25ddc04fb 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -231,6 +231,11 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool } #ifdef CONFIG_X86_64 +static inline bool is_64bit_mm(struct mm_struct *mm) +{ + return !config_enabled(CONFIG_IA32_EMULATION) || + !mm->context.ia32_compat; +} /* * If arch_uprobe->insn doesn't use rip-relative addressing, return * immediately. Otherwise, rewrite the instruction so that it accesses @@ -355,13 +360,11 @@ handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long * *correction += 4; } } - -static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) +#else /* 32-bit: */ +static inline bool is_64bit_mm(struct mm_struct *mm) { - bool x86_64 = !mm->context.ia32_compat; - return uprobe_init_insn(auprobe, insn, x86_64); + return false; } -#else /* 32-bit: */ /* * No RIP-relative addressing on 32-bit */ @@ -376,11 +379,6 @@ static void handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs * long *correction) { } - -static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) -{ - return uprobe_init_insn(auprobe, insn, false); -} #endif /* CONFIG_X86_64 */ struct uprobe_xol_ops { @@ -625,7 +623,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, bool fix_ip = true, fix_call = false; int ret; - ret = validate_insn_bits(auprobe, mm, &insn); + ret = uprobe_init_insn(auprobe, &insn, is_64bit_mm(mm)); if (ret) return ret; -- cgit v1.2.3-55-g7522 From ff261964cfcfe49d73690ca29b0ba2853d9497e3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 19 Apr 2014 14:15:27 +0200 Subject: uprobes/x86: Shift "insn_complete" from branch_setup_xol_ops() to uprobe_init_insn() Change uprobe_init_insn() to make insn_complete() == T, this makes other insn_get_*() calls unnecessary. Signed-off-by: Oleg Nesterov Reviewed-by: Jim Keniston Acked-by: Srikar Dronamraju --- arch/x86/kernel/uprobes.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index b3b25ddc04fb..98d7db50f425 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -209,8 +209,11 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool u32 volatile *good_insns; insn_init(insn, auprobe->insn, x86_64); + /* has the side-effect of processing the entire instruction */ + insn_get_length(insn); + if (WARN_ON_ONCE(!insn_complete(insn))) + return -ENOEXEC; - insn_get_opcode(insn); if (is_prefix_bad(insn)) return -ENOTSUPP; @@ -283,8 +286,6 @@ handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) * is the immediate operand. */ cursor = auprobe->insn + insn_offset_modrm(insn); - insn_get_length(insn); - /* * Convert from rip-relative addressing to indirect addressing * via a scratch register. Change the r/m field from 0x5 (%rip) @@ -564,11 +565,6 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) u8 opc1 = OPCODE1(insn); int i; - /* has the side-effect of processing the entire instruction */ - insn_get_length(insn); - if (WARN_ON_ONCE(!insn_complete(insn))) - return -ENOEXEC; - switch (opc1) { case 0xeb: /* jmp 8 */ case 0xe9: /* jmp 32 */ @@ -654,7 +650,6 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, fix_ip = false; break; case 0xff: - insn_get_modrm(&insn); switch (MODRM_REG(&insn)) { case 2: case 3: /* call or lcall, indirect */ fix_call = true; -- cgit v1.2.3-55-g7522 From 8dbacad93a2a12adebcc717e6055b1bcc1739ab8 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 19 Apr 2014 16:07:15 +0200 Subject: uprobes/x86: Make good_insns_* depend on CONFIG_X86_* Add the suitable ifdef's around good_insns_* arrays. We do not want to add the ugly ifdef's into their only user, uprobe_init_insn(), so the "#else" branch simply defines them as NULL. This doesn't generate the extra code, gcc is smart enough, although the code is fine even if it could not detect that (without CONFIG_IA32_EMULATION) is_64bit_mm() is __builtin_constant_p(). The patch looks more complicated because it also moves good_insns_64 up close to good_insns_32. Signed-off-by: Oleg Nesterov Reviewed-by: Jim Keniston Acked-by: Srikar Dronamraju --- arch/x86/kernel/uprobes.c | 56 +++++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 24 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 98d7db50f425..892975b3c99c 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -64,6 +64,7 @@ * to keep gcc from statically optimizing it out, as variable_test_bit makes * some versions of gcc to think only *(unsigned long*) is used. */ +#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) static volatile u32 good_insns_32[256 / 32] = { /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* ---------------------------------------------- */ @@ -86,32 +87,12 @@ static volatile u32 good_insns_32[256 / 32] = { /* ---------------------------------------------- */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; - -/* Using this for both 64-bit and 32-bit apps */ -static volatile u32 good_2byte_insns[256 / 32] = { - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ - /* ---------------------------------------------- */ - W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */ - W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */ - W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */ - W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */ - W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ - W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ - W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */ - W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */ - W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ - W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ - W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */ - W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ - W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */ - W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ - W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */ - W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */ - /* ---------------------------------------------- */ - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ -}; +#else +#define good_insns_32 NULL +#endif /* Good-instruction tables for 64-bit apps */ +#if defined(CONFIG_X86_64) static volatile u32 good_insns_64[256 / 32] = { /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* ---------------------------------------------- */ @@ -134,6 +115,33 @@ static volatile u32 good_insns_64[256 / 32] = { /* ---------------------------------------------- */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; +#else +#define good_insns_64 NULL +#endif + +/* Using this for both 64-bit and 32-bit apps */ +static volatile u32 good_2byte_insns[256 / 32] = { + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* ---------------------------------------------- */ + W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */ + W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */ + W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */ + W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */ + W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ + W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ + W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */ + W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */ + W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ + W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ + W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */ + W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ + W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */ + W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ + W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */ + W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */ + /* ---------------------------------------------- */ + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ +}; #undef W /* -- cgit v1.2.3-55-g7522 From b24dc8dace74708fd849312722090169c5da97d3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 19 Apr 2014 18:10:09 +0200 Subject: uprobes/x86: Fix is_64bit_mm() with CONFIG_X86_X32 is_64bit_mm() assumes that mm->context.ia32_compat means the 32-bit instruction set, this is not true if the task is TIF_X32. Change set_personality_ia32() to initialize mm->context.ia32_compat by TIF_X32 or TIF_IA32 instead of 1. This allows to fix is_64bit_mm() without affecting other users, they all treat ia32_compat as "bool". TIF_ in ->ia32_compat looks a bit strange, but this is grep-friendly and avoids the new define's. Signed-off-by: Oleg Nesterov Reviewed-by: Jim Keniston Acked-by: Srikar Dronamraju --- arch/x86/kernel/process_64.c | 7 ++++--- arch/x86/kernel/uprobes.c | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 9c0280f93d05..9b53940981b7 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -413,12 +413,11 @@ void set_personality_ia32(bool x32) set_thread_flag(TIF_ADDR32); /* Mark the associated mm as containing 32-bit tasks. */ - if (current->mm) - current->mm->context.ia32_compat = 1; - if (x32) { clear_thread_flag(TIF_IA32); set_thread_flag(TIF_X32); + if (current->mm) + current->mm->context.ia32_compat = TIF_X32; current->personality &= ~READ_IMPLIES_EXEC; /* is_compat_task() uses the presence of the x32 syscall bit flag to determine compat status */ @@ -426,6 +425,8 @@ void set_personality_ia32(bool x32) } else { set_thread_flag(TIF_IA32); clear_thread_flag(TIF_X32); + if (current->mm) + current->mm->context.ia32_compat = TIF_IA32; current->personality |= force_personality32; /* Prepare the first "return" to user space */ current_thread_info()->status |= TS_COMPAT; diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 892975b3c99c..ecbffd16d090 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -245,7 +245,7 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool static inline bool is_64bit_mm(struct mm_struct *mm) { return !config_enabled(CONFIG_IA32_EMULATION) || - !mm->context.ia32_compat; + !(mm->context.ia32_compat == TIF_IA32); } /* * If arch_uprobe->insn doesn't use rip-relative addressing, return -- cgit v1.2.3-55-g7522 From dd91016dfc9ba9236cb0149984da3f0434278b49 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 22 Apr 2014 15:20:07 +0200 Subject: uprobes/x86: Don't change the task's state if ->pre_xol() fails Currently this doesn't matter, the only ->pre_xol() hook can't fail, but we need to fix arch_uprobe_pre_xol() anyway. If ->pre_xol() fails we should not change regs->ip/flags, we should just return the error to make restart actually possible. Signed-off-by: Oleg Nesterov Reviewed-by: Jim Keniston --- arch/x86/kernel/uprobes.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index ecbffd16d090..f4464b1b9435 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -687,6 +687,12 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { struct uprobe_task *utask = current->utask; + if (auprobe->ops->pre_xol) { + int err = auprobe->ops->pre_xol(auprobe, regs); + if (err) + return err; + } + regs->ip = utask->xol_vaddr; utask->autask.saved_trap_nr = current->thread.trap_nr; current->thread.trap_nr = UPROBE_TRAP_NR; @@ -696,8 +702,6 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) if (test_tsk_thread_flag(current, TIF_BLOCKSTEP)) set_task_blockstep(current, false); - if (auprobe->ops->pre_xol) - return auprobe->ops->pre_xol(auprobe, regs); return 0; } -- cgit v1.2.3-55-g7522 From 588fbd613c3d8fa73e96720761d49f1d40d34d4c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 21 Apr 2014 16:58:17 +0200 Subject: uprobes/x86: Introduce uprobe_xol_ops->abort() and default_abort_op() arch_uprobe_abort_xol() calls handle_riprel_post_xol() even if auprobe->ops != default_xol_ops. This is fine correctness wise, only default_pre_xol_op() can set UPROBE_FIX_RIP_AX|UPROBE_FIX_RIP_CX and otherwise handle_riprel_post_xol() is nop. But this doesn't look clean and this doesn't allow us to move ->fixups into the union in arch_uprobe. Move this handle_riprel_post_xol() call into the new default_abort_op() hook and change arch_uprobe_abort_xol() accordingly. Signed-off-by: Oleg Nesterov Reviewed-by: Jim Keniston --- arch/x86/kernel/uprobes.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index f4464b1b9435..b3c2a92cce6c 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -394,6 +394,7 @@ struct uprobe_xol_ops { bool (*emulate)(struct arch_uprobe *, struct pt_regs *); int (*pre_xol)(struct arch_uprobe *, struct pt_regs *); int (*post_xol)(struct arch_uprobe *, struct pt_regs *); + void (*abort)(struct arch_uprobe *, struct pt_regs *); }; static inline int sizeof_long(void) @@ -444,9 +445,15 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs return 0; } +static void default_abort_op(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + handle_riprel_post_xol(auprobe, regs, NULL); +} + static struct uprobe_xol_ops default_xol_ops = { .pre_xol = default_pre_xol_op, .post_xol = default_post_xol_op, + .abort = default_abort_op, }; static bool branch_is_call(struct arch_uprobe *auprobe) @@ -820,10 +827,11 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { struct uprobe_task *utask = current->utask; - current->thread.trap_nr = utask->autask.saved_trap_nr; - handle_riprel_post_xol(auprobe, regs, NULL); - instruction_pointer_set(regs, utask->vaddr); + if (auprobe->ops->abort) + auprobe->ops->abort(auprobe, regs); + current->thread.trap_nr = utask->autask.saved_trap_nr; + regs->ip = utask->vaddr; /* clear TF if it was set by us in arch_uprobe_pre_xol() */ if (!utask->autask.saved_tf) regs->flags &= ~X86_EFLAGS_TF; -- cgit v1.2.3-55-g7522 From 6ded5f3848bfd3227ee208aa38f8bf8d7209d4e3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 21 Apr 2014 18:28:02 +0200 Subject: uprobes/x86: Don't use arch_uprobe_abort_xol() in arch_uprobe_post_xol() 014940bad8e4 "uprobes/x86: Send SIGILL if arch_uprobe_post_xol() fails" changed arch_uprobe_post_xol() to use arch_uprobe_abort_xol() if ->post_xol fails. This was correct and helped to avoid the additional complications, we need to clear X86_EFLAGS_TF in this case. However, now that we have uprobe_xol_ops->abort() hook it would be better to avoid arch_uprobe_abort_xol() here. ->post_xol() should likely do what ->abort() does anyway, we should not do the same work twice. Currently only handle_riprel_post_xol() can be called twice, this is unnecessary but safe. Still this is not clean and can lead to the problems in future. Change arch_uprobe_post_xol() to clear X86_EFLAGS_TF and restore ->ip by hand and avoid arch_uprobe_abort_xol(). This temporary uglifies the usage of autask.saved_tf, we will cleanup this later. Signed-off-by: Oleg Nesterov Reviewed-by: Jim Keniston --- arch/x86/kernel/uprobes.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index b3c2a92cce6c..2efb93f96030 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -759,22 +759,24 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) struct uprobe_task *utask = current->utask; WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR); + current->thread.trap_nr = utask->autask.saved_trap_nr; if (auprobe->ops->post_xol) { int err = auprobe->ops->post_xol(auprobe, regs); if (err) { - arch_uprobe_abort_xol(auprobe, regs); + if (!utask->autask.saved_tf) + regs->flags &= ~X86_EFLAGS_TF; /* - * Restart the probed insn. ->post_xol() must ensure - * this is really possible if it returns -ERESTART. + * Restore ->ip for restart or post mortem analysis. + * ->post_xol() must not return -ERESTART unless this + * is really possible. */ + regs->ip = utask->vaddr; if (err == -ERESTART) return 0; return err; } } - - current->thread.trap_nr = utask->autask.saved_trap_nr; /* * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP * so we can get an extra SIGTRAP if we do not clear TF. We need @@ -819,9 +821,8 @@ int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, /* * This function gets called when XOL instruction either gets trapped or - * the thread has a fatal signal, or if arch_uprobe_post_xol() failed. - * Reset the instruction pointer to its probed address for the potential - * restart or for post mortem analysis. + * the thread has a fatal signal. Reset the instruction pointer to its + * probed address for the potential restart or for post mortem analysis. */ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { -- cgit v1.2.3-55-g7522 From 220ef8dc9a7a63fe202aacd3fc61e5104f6dd98c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 21 Apr 2014 20:39:56 +0200 Subject: uprobes/x86: Move UPROBE_FIX_SETF logic from arch_uprobe_post_xol() to default_post_xol_op() UPROBE_FIX_SETF is only needed to handle "popf" correctly but it is processed by the generic arch_uprobe_post_xol() code. This doesn't allows us to make ->fixups private for default_xol_ops. 1 Change default_post_xol_op(UPROBE_FIX_SETF) to set ->saved_tf = T. "popf" always reads the flags from stack, it doesn't matter if TF was set or not before single-step. Ignoring the naming, this is even more logical, "saved_tf" means "owned by application" and we do not own this flag after "popf". 2. Change arch_uprobe_post_xol() to save ->saved_tf into the local "bool send_sigtrap" before ->post_xol(). 3. Change arch_uprobe_post_xol() to ignore UPROBE_FIX_SETF and just check ->saved_tf after ->post_xol(). With this patch ->fixups and ->rip_rela_target_address are only used by default_xol_ops hooks, we are ready to remove them from the common part of arch_uprobe. Signed-off-by: Oleg Nesterov Reviewed-by: Jim Keniston --- arch/x86/kernel/uprobes.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 2efb93f96030..b2bca293fc57 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -441,6 +441,9 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs return -ERESTART; } } + /* popf; tell the caller to not touch TF */ + if (auprobe->fixups & UPROBE_FIX_SETF) + utask->autask.saved_tf = true; return 0; } @@ -757,15 +760,15 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t) int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { struct uprobe_task *utask = current->utask; + bool send_sigtrap = utask->autask.saved_tf; + int err = 0; WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR); current->thread.trap_nr = utask->autask.saved_trap_nr; if (auprobe->ops->post_xol) { - int err = auprobe->ops->post_xol(auprobe, regs); + err = auprobe->ops->post_xol(auprobe, regs); if (err) { - if (!utask->autask.saved_tf) - regs->flags &= ~X86_EFLAGS_TF; /* * Restore ->ip for restart or post mortem analysis. * ->post_xol() must not return -ERESTART unless this @@ -773,8 +776,8 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) */ regs->ip = utask->vaddr; if (err == -ERESTART) - return 0; - return err; + err = 0; + send_sigtrap = false; } } /* @@ -782,12 +785,13 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) * so we can get an extra SIGTRAP if we do not clear TF. We need * to examine the opcode to make it right. */ - if (utask->autask.saved_tf) + if (send_sigtrap) send_sig(SIGTRAP, current, 0); - else if (!(auprobe->fixups & UPROBE_FIX_SETF)) + + if (!utask->autask.saved_tf) regs->flags &= ~X86_EFLAGS_TF; - return 0; + return err; } /* callback routine for handling exceptions. */ -- cgit v1.2.3-55-g7522 From 97aa5cddbe9e01521137f337624469374e3cbde5 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 22 Apr 2014 16:20:55 +0200 Subject: uprobes/x86: Move default_xol_ops's data into arch_uprobe->def Finally we can move arch_uprobe->fixups/rip_rela_target_address into the new "def" struct and place this struct in the union, they are only used by default_xol_ops paths. The patch also renames rip_rela_target_address to riprel_target just to make this name shorter. Signed-off-by: Oleg Nesterov Reviewed-by: Jim Keniston --- arch/x86/include/asm/uprobes.h | 12 +++++++----- arch/x86/kernel/uprobes.c | 43 +++++++++++++++++++++--------------------- 2 files changed, 28 insertions(+), 27 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index 93bee7b93854..72caff7afbde 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -41,18 +41,20 @@ struct arch_uprobe { u8 ixol[MAX_UINSN_BYTES]; }; - u16 fixups; const struct uprobe_xol_ops *ops; union { -#ifdef CONFIG_X86_64 - unsigned long rip_rela_target_address; -#endif struct { s32 offs; u8 ilen; u8 opc1; - } branch; + } branch; + struct { +#ifdef CONFIG_X86_64 + long riprel_target; +#endif + u16 fixups; + } def; }; }; diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index b2bca293fc57..7824ce248f8f 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -251,10 +251,9 @@ static inline bool is_64bit_mm(struct mm_struct *mm) * If arch_uprobe->insn doesn't use rip-relative addressing, return * immediately. Otherwise, rewrite the instruction so that it accesses * its memory operand indirectly through a scratch register. Set - * arch_uprobe->fixups and arch_uprobe->rip_rela_target_address - * accordingly. (The contents of the scratch register will be saved - * before we single-step the modified instruction, and restored - * afterward.) + * def->fixups and def->riprel_target accordingly. (The contents of the + * scratch register will be saved before we single-step the modified + * instruction, and restored afterward). * * We do this because a rip-relative instruction can access only a * relatively small area (+/- 2 GB from the instruction), and the XOL @@ -308,18 +307,18 @@ handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) * is NOT the register operand, so we use %rcx (register * #1) for the scratch register. */ - auprobe->fixups = UPROBE_FIX_RIP_CX; + auprobe->def.fixups = UPROBE_FIX_RIP_CX; /* Change modrm from 00 000 101 to 00 000 001. */ *cursor = 0x1; } else { /* Use %rax (register #0) for the scratch register. */ - auprobe->fixups = UPROBE_FIX_RIP_AX; + auprobe->def.fixups = UPROBE_FIX_RIP_AX; /* Change modrm from 00 xxx 101 to 00 xxx 000 */ *cursor = (reg << 3); } /* Target address = address of next instruction + (signed) offset */ - auprobe->rip_rela_target_address = (long)insn->length + insn->displacement.value; + auprobe->def.riprel_target = (long)insn->length + insn->displacement.value; /* Displacement field is gone; slide immediate field (if any) over. */ if (insn->immediate.nbytes) { @@ -336,25 +335,25 @@ static void pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, struct arch_uprobe_task *autask) { - if (auprobe->fixups & UPROBE_FIX_RIP_AX) { + if (auprobe->def.fixups & UPROBE_FIX_RIP_AX) { autask->saved_scratch_register = regs->ax; regs->ax = current->utask->vaddr; - regs->ax += auprobe->rip_rela_target_address; - } else if (auprobe->fixups & UPROBE_FIX_RIP_CX) { + regs->ax += auprobe->def.riprel_target; + } else if (auprobe->def.fixups & UPROBE_FIX_RIP_CX) { autask->saved_scratch_register = regs->cx; regs->cx = current->utask->vaddr; - regs->cx += auprobe->rip_rela_target_address; + regs->cx += auprobe->def.riprel_target; } } static void handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction) { - if (auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) { + if (auprobe->def.fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) { struct arch_uprobe_task *autask; autask = ¤t->utask->autask; - if (auprobe->fixups & UPROBE_FIX_RIP_AX) + if (auprobe->def.fixups & UPROBE_FIX_RIP_AX) regs->ax = autask->saved_scratch_register; else regs->cx = autask->saved_scratch_register; @@ -432,17 +431,17 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs long correction = (long)(utask->vaddr - utask->xol_vaddr); handle_riprel_post_xol(auprobe, regs, &correction); - if (auprobe->fixups & UPROBE_FIX_IP) + if (auprobe->def.fixups & UPROBE_FIX_IP) regs->ip += correction; - if (auprobe->fixups & UPROBE_FIX_CALL) { + if (auprobe->def.fixups & UPROBE_FIX_CALL) { if (adjust_ret_addr(regs->sp, correction)) { regs->sp += sizeof_long(); return -ERESTART; } } /* popf; tell the caller to not touch TF */ - if (auprobe->fixups & UPROBE_FIX_SETF) + if (auprobe->def.fixups & UPROBE_FIX_SETF) utask->autask.saved_tf = true; return 0; @@ -646,13 +645,13 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, return ret; /* - * Figure out which fixups arch_uprobe_post_xol() will need to perform, - * and annotate arch_uprobe->fixups accordingly. To start with, ->fixups - * is either zero or it reflects rip-related fixups. + * Figure out which fixups default_post_xol_op() will need to perform, + * and annotate def->fixups accordingly. To start with, ->fixups is + * either zero or it reflects rip-related fixups. */ switch (OPCODE1(&insn)) { case 0x9d: /* popf */ - auprobe->fixups |= UPROBE_FIX_SETF; + auprobe->def.fixups |= UPROBE_FIX_SETF; break; case 0xc3: /* ret or lret -- ip is correct */ case 0xcb: @@ -680,9 +679,9 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, } if (fix_ip) - auprobe->fixups |= UPROBE_FIX_IP; + auprobe->def.fixups |= UPROBE_FIX_IP; if (fix_call) - auprobe->fixups |= UPROBE_FIX_CALL; + auprobe->def.fixups |= UPROBE_FIX_CALL; auprobe->ops = &default_xol_ops; return 0; -- cgit v1.2.3-55-g7522 From 78d9af4cd375880a574327210eb9dab572618364 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 24 Apr 2014 18:52:37 +0200 Subject: uprobes/x86: Cleanup the usage of arch_uprobe->def.fixups, make it u8 handle_riprel_insn() assumes that nobody else could modify ->fixups before. This is correct but fragile, change it to use "|=". Also make ->fixups u8, we are going to add the new members into the union. It is not clear why UPROBE_FIX_RIP_.X lived in the upper byte, redefine them so that they can fit into u8. Signed-off-by: Oleg Nesterov --- arch/x86/include/asm/uprobes.h | 2 +- arch/x86/kernel/uprobes.c | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index 72caff7afbde..9ce25ce04fee 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -53,7 +53,7 @@ struct arch_uprobe { #ifdef CONFIG_X86_64 long riprel_target; #endif - u16 fixups; + u8 fixups; } def; }; }; diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 7824ce248f8f..a8e1d7e47001 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -33,16 +33,16 @@ /* Post-execution fixups. */ /* Adjust IP back to vicinity of actual insn */ -#define UPROBE_FIX_IP 0x1 +#define UPROBE_FIX_IP 0x01 /* Adjust the return address of a call insn */ -#define UPROBE_FIX_CALL 0x2 +#define UPROBE_FIX_CALL 0x02 /* Instruction will modify TF, don't change it */ -#define UPROBE_FIX_SETF 0x4 +#define UPROBE_FIX_SETF 0x04 -#define UPROBE_FIX_RIP_AX 0x8000 -#define UPROBE_FIX_RIP_CX 0x4000 +#define UPROBE_FIX_RIP_AX 0x08 +#define UPROBE_FIX_RIP_CX 0x10 #define UPROBE_TRAP_NR UINT_MAX @@ -307,12 +307,12 @@ handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) * is NOT the register operand, so we use %rcx (register * #1) for the scratch register. */ - auprobe->def.fixups = UPROBE_FIX_RIP_CX; + auprobe->def.fixups |= UPROBE_FIX_RIP_CX; /* Change modrm from 00 000 101 to 00 000 001. */ *cursor = 0x1; } else { /* Use %rax (register #0) for the scratch register. */ - auprobe->def.fixups = UPROBE_FIX_RIP_AX; + auprobe->def.fixups |= UPROBE_FIX_RIP_AX; /* Change modrm from 00 xxx 101 to 00 xxx 000 */ *cursor = (reg << 3); } -- cgit v1.2.3-55-g7522 From 2b82cadffc4154a25c25d88a63c7fb3397cda9d6 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 24 Apr 2014 19:21:38 +0200 Subject: uprobes/x86: Introduce push_ret_address() Extract the "push return address" code from branch_emulate_op() into the new simple helper, push_ret_address(). It will have more users. Signed-off-by: Oleg Nesterov --- arch/x86/kernel/uprobes.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index a8e1d7e47001..df75913acfc0 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -407,6 +407,17 @@ static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) return 0; } +static int push_ret_address(struct pt_regs *regs, unsigned long ip) +{ + unsigned long new_sp = regs->sp - sizeof_long(); + + if (copy_to_user((void __user *)new_sp, &ip, sizeof_long())) + return -EFAULT; + + regs->sp = new_sp; + return 0; +} + /* * Adjust the return address pushed by a call insn executed out of line. */ @@ -517,7 +528,6 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs) unsigned long offs = (long)auprobe->branch.offs; if (branch_is_call(auprobe)) { - unsigned long new_sp = regs->sp - sizeof_long(); /* * If it fails we execute this (mangled, see the comment in * branch_clear_offset) insn out-of-line. In the likely case @@ -527,9 +537,8 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs) * * But there is corner case, see the comment in ->post_xol(). */ - if (copy_to_user((void __user *)new_sp, &new_ip, sizeof_long())) + if (push_ret_address(regs, new_ip)) return false; - regs->sp = new_sp; } else if (!check_jmp_cond(auprobe, regs)) { offs = 0; } -- cgit v1.2.3-55-g7522 From 1dc76e6eacef271230d9ff6fd0f91824bda03f44 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 25 Apr 2014 18:06:19 +0200 Subject: uprobes/x86: Kill adjust_ret_addr(), simplify UPROBE_FIX_CALL logic The only insn which could have both UPROBE_FIX_IP and UPROBE_FIX_CALL was 0xe8 "call relative", and now it is handled by branch_xol_ops. So we can change default_post_xol_op(UPROBE_FIX_CALL) to simply push the address of next insn == utask->vaddr + insn.length, just we need to record insn.length into the new auprobe->def.ilen member. Note: if/when we teach branch_xol_ops to support jcxz/loopz we can remove the "correction" logic, UPROBE_FIX_IP can use the same address. Signed-off-by: Oleg Nesterov --- arch/x86/include/asm/uprobes.h | 1 + arch/x86/kernel/uprobes.c | 24 +++--------------------- 2 files changed, 4 insertions(+), 21 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index 9ce25ce04fee..a040d493a4f9 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -54,6 +54,7 @@ struct arch_uprobe { long riprel_target; #endif u8 fixups; + u8 ilen; } def; }; }; diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index df75913acfc0..5bcce852628a 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -418,24 +418,6 @@ static int push_ret_address(struct pt_regs *regs, unsigned long ip) return 0; } -/* - * Adjust the return address pushed by a call insn executed out of line. - */ -static int adjust_ret_addr(unsigned long sp, long correction) -{ - int rasize = sizeof_long(); - long ra; - - if (copy_from_user(&ra, (void __user *)sp, rasize)) - return -EFAULT; - - ra += correction; - if (copy_to_user((void __user *)sp, &ra, rasize)) - return -EFAULT; - - return 0; -} - static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) { struct uprobe_task *utask = current->utask; @@ -446,10 +428,9 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs regs->ip += correction; if (auprobe->def.fixups & UPROBE_FIX_CALL) { - if (adjust_ret_addr(regs->sp, correction)) { - regs->sp += sizeof_long(); + regs->sp += sizeof_long(); + if (push_ret_address(regs, utask->vaddr + auprobe->def.ilen)) return -ERESTART; - } } /* popf; tell the caller to not touch TF */ if (auprobe->def.fixups & UPROBE_FIX_SETF) @@ -687,6 +668,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, handle_riprel_insn(auprobe, &insn); } + auprobe->def.ilen = insn.length; if (fix_ip) auprobe->def.fixups |= UPROBE_FIX_IP; if (fix_call) -- cgit v1.2.3-55-g7522 From 83cd591485e558ab70aed45ce7261ce3f5ee8746 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 25 Apr 2014 18:53:32 +0200 Subject: uprobes/x86: Cleanup the usage of UPROBE_FIX_IP/UPROBE_FIX_CALL Now that UPROBE_FIX_IP/UPROBE_FIX_CALL are mutually exclusive we can use a single "fix_ip_or_call" enum instead of 2 fix_* booleans. This way the logic looks more understandable and clean to me. While at it, join "case 0xea" with other "ip is correct" ret/lret cases. Also change default_post_xol_op() to use "else if" for the same reason. Signed-off-by: Oleg Nesterov --- arch/x86/kernel/uprobes.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 5bcce852628a..d2792e884d54 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -424,10 +424,9 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs long correction = (long)(utask->vaddr - utask->xol_vaddr); handle_riprel_post_xol(auprobe, regs, &correction); - if (auprobe->def.fixups & UPROBE_FIX_IP) + if (auprobe->def.fixups & UPROBE_FIX_IP) { regs->ip += correction; - - if (auprobe->def.fixups & UPROBE_FIX_CALL) { + } else if (auprobe->def.fixups & UPROBE_FIX_CALL) { regs->sp += sizeof_long(); if (push_ret_address(regs, utask->vaddr + auprobe->def.ilen)) return -ERESTART; @@ -623,7 +622,7 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr) { struct insn insn; - bool fix_ip = true, fix_call = false; + u8 fix_ip_or_call = UPROBE_FIX_IP; int ret; ret = uprobe_init_insn(auprobe, &insn, is_64bit_mm(mm)); @@ -647,21 +646,20 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, case 0xcb: case 0xc2: case 0xca: - fix_ip = false; + case 0xea: /* jmp absolute -- ip is correct */ + fix_ip_or_call = 0; break; case 0x9a: /* call absolute - Fix return addr, not ip */ - fix_call = true; - fix_ip = false; - break; - case 0xea: /* jmp absolute -- ip is correct */ - fix_ip = false; + fix_ip_or_call = UPROBE_FIX_CALL; break; case 0xff: switch (MODRM_REG(&insn)) { case 2: case 3: /* call or lcall, indirect */ - fix_call = true; + fix_ip_or_call = UPROBE_FIX_CALL; + break; case 4: case 5: /* jmp or ljmp, indirect */ - fix_ip = false; + fix_ip_or_call = 0; + break; } /* fall through */ default: @@ -669,10 +667,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, } auprobe->def.ilen = insn.length; - if (fix_ip) - auprobe->def.fixups |= UPROBE_FIX_IP; - if (fix_call) - auprobe->def.fixups |= UPROBE_FIX_CALL; + auprobe->def.fixups |= fix_ip_or_call; auprobe->ops = &default_xol_ops; return 0; -- cgit v1.2.3-55-g7522 From 1475ee7fadafc6d0c194f2f4cbdae10ed04b9580 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 27 Apr 2014 16:31:59 +0200 Subject: uprobes/x86: Rename *riprel* helpers to make the naming consistent handle_riprel_insn(), pre_xol_rip_insn() and handle_riprel_post_xol() look confusing and inconsistent. Rename them into riprel_analyze(), riprel_pre_xol(), and riprel_post_xol() respectively. No changes in compiled code. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- arch/x86/kernel/uprobes.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index d2792e884d54..187be0e15e1d 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -268,8 +268,7 @@ static inline bool is_64bit_mm(struct mm_struct *mm) * - There's never a SIB byte. * - The displacement is always 4 bytes. */ -static void -handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) +static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) { u8 *cursor; u8 reg; @@ -331,8 +330,7 @@ handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) * If we're emulating a rip-relative instruction, save the contents * of the scratch register and store the target address in that register. */ -static void -pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, +static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, struct arch_uprobe_task *autask) { if (auprobe->def.fixups & UPROBE_FIX_RIP_AX) { @@ -346,8 +344,8 @@ pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, } } -static void -handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction) +static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, + long *correction) { if (auprobe->def.fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) { struct arch_uprobe_task *autask; @@ -376,14 +374,14 @@ static inline bool is_64bit_mm(struct mm_struct *mm) /* * No RIP-relative addressing on 32-bit */ -static void handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) +static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) { } -static void pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, +static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, struct arch_uprobe_task *autask) { } -static void handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, +static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction) { } @@ -403,7 +401,7 @@ static inline int sizeof_long(void) static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) { - pre_xol_rip_insn(auprobe, regs, ¤t->utask->autask); + riprel_pre_xol(auprobe, regs, ¤t->utask->autask); return 0; } @@ -423,7 +421,7 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs struct uprobe_task *utask = current->utask; long correction = (long)(utask->vaddr - utask->xol_vaddr); - handle_riprel_post_xol(auprobe, regs, &correction); + riprel_post_xol(auprobe, regs, &correction); if (auprobe->def.fixups & UPROBE_FIX_IP) { regs->ip += correction; } else if (auprobe->def.fixups & UPROBE_FIX_CALL) { @@ -440,7 +438,7 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs static void default_abort_op(struct arch_uprobe *auprobe, struct pt_regs *regs) { - handle_riprel_post_xol(auprobe, regs, NULL); + riprel_post_xol(auprobe, regs, NULL); } static struct uprobe_xol_ops default_xol_ops = { @@ -663,7 +661,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, } /* fall through */ default: - handle_riprel_insn(auprobe, &insn); + riprel_analyze(auprobe, &insn); } auprobe->def.ilen = insn.length; -- cgit v1.2.3-55-g7522 From 7f55e82bacaaa2c41b8e14d6bc78129b096b67b8 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 27 Apr 2014 17:00:46 +0200 Subject: uprobes/x86: Kill the "autask" arg of riprel_pre_xol() default_pre_xol_op() passes ¤t->utask->autask to riprel_pre_xol() and this is just ugly because it still needs to load current->utask to read ->vaddr. Remove this argument, change riprel_pre_xol() to use current->utask. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- arch/x86/kernel/uprobes.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 187be0e15e1d..5df1bca7c2bc 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -330,16 +330,17 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) * If we're emulating a rip-relative instruction, save the contents * of the scratch register and store the target address in that register. */ -static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, - struct arch_uprobe_task *autask) +static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { + struct uprobe_task *utask = current->utask; + if (auprobe->def.fixups & UPROBE_FIX_RIP_AX) { - autask->saved_scratch_register = regs->ax; - regs->ax = current->utask->vaddr; + utask->autask.saved_scratch_register = regs->ax; + regs->ax = utask->vaddr; regs->ax += auprobe->def.riprel_target; } else if (auprobe->def.fixups & UPROBE_FIX_RIP_CX) { - autask->saved_scratch_register = regs->cx; - regs->cx = current->utask->vaddr; + utask->autask.saved_scratch_register = regs->cx; + regs->cx = utask->vaddr; regs->cx += auprobe->def.riprel_target; } } @@ -377,8 +378,7 @@ static inline bool is_64bit_mm(struct mm_struct *mm) static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) { } -static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, - struct arch_uprobe_task *autask) +static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { } static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, @@ -401,7 +401,7 @@ static inline int sizeof_long(void) static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) { - riprel_pre_xol(auprobe, regs, ¤t->utask->autask); + riprel_pre_xol(auprobe, regs); return 0; } -- cgit v1.2.3-55-g7522 From c90a6950120a7e45f31a22653fe6543507ae64d0 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 27 Apr 2014 18:13:31 +0200 Subject: uprobes/x86: Simplify riprel_{pre,post}_xol() and make them similar Ignoring the "correction" logic riprel_pre_xol() and riprel_post_xol() are very similar but look quite differently. 1. Add the "UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX" check at the start of riprel_pre_xol(), like the same check in riprel_post_xol(). 2. Add the trivial scratch_reg() helper which returns the address of scratch register pre_xol/post_xol need to change. 3. Change these functions to use the new helper and avoid copy-and-paste under if/else branches. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- arch/x86/kernel/uprobes.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 5df1bca7c2bc..2ebadb252093 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -326,22 +326,24 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) } } +static inline unsigned long * +scratch_reg(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + return (auprobe->def.fixups & UPROBE_FIX_RIP_AX) ? ®s->ax : ®s->cx; +} + /* * If we're emulating a rip-relative instruction, save the contents * of the scratch register and store the target address in that register. */ static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { - struct uprobe_task *utask = current->utask; + if (auprobe->def.fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) { + struct uprobe_task *utask = current->utask; + unsigned long *sr = scratch_reg(auprobe, regs); - if (auprobe->def.fixups & UPROBE_FIX_RIP_AX) { - utask->autask.saved_scratch_register = regs->ax; - regs->ax = utask->vaddr; - regs->ax += auprobe->def.riprel_target; - } else if (auprobe->def.fixups & UPROBE_FIX_RIP_CX) { - utask->autask.saved_scratch_register = regs->cx; - regs->cx = utask->vaddr; - regs->cx += auprobe->def.riprel_target; + utask->autask.saved_scratch_register = *sr; + *sr = utask->vaddr + auprobe->def.riprel_target; } } @@ -349,14 +351,10 @@ static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction) { if (auprobe->def.fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) { - struct arch_uprobe_task *autask; - - autask = ¤t->utask->autask; - if (auprobe->def.fixups & UPROBE_FIX_RIP_AX) - regs->ax = autask->saved_scratch_register; - else - regs->cx = autask->saved_scratch_register; + struct uprobe_task *utask = current->utask; + unsigned long *sr = scratch_reg(auprobe, regs); + *sr = utask->autask.saved_scratch_register; /* * The original instruction includes a displacement, and so * is 4 bytes longer than what we've just single-stepped. -- cgit v1.2.3-55-g7522 From 50204c6f6dd01b5bce1b53e0b003d01849455512 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Thu, 1 May 2014 16:52:46 +0200 Subject: uprobes/x86: Simplify rip-relative handling It is possible to replace rip-relative addressing mode with addressing mode of the same length: (reg+disp32). This eliminates the need to fix up immediate and correct for changing instruction length. And we can kill arch_uprobe->def.riprel_target. Signed-off-by: Denys Vlasenko Reviewed-by: Jim Keniston Signed-off-by: Oleg Nesterov --- arch/x86/include/asm/uprobes.h | 3 -- arch/x86/kernel/uprobes.c | 71 ++++++++++++++++++------------------------ 2 files changed, 30 insertions(+), 44 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index a040d493a4f9..7be3c079e389 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -50,9 +50,6 @@ struct arch_uprobe { u8 opc1; } branch; struct { -#ifdef CONFIG_X86_64 - long riprel_target; -#endif u8 fixups; u8 ilen; } def; diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 2ebadb252093..31dcb4d5ea46 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -251,9 +251,9 @@ static inline bool is_64bit_mm(struct mm_struct *mm) * If arch_uprobe->insn doesn't use rip-relative addressing, return * immediately. Otherwise, rewrite the instruction so that it accesses * its memory operand indirectly through a scratch register. Set - * def->fixups and def->riprel_target accordingly. (The contents of the - * scratch register will be saved before we single-step the modified - * instruction, and restored afterward). + * def->fixups accordingly. (The contents of the scratch register + * will be saved before we single-step the modified instruction, + * and restored afterward). * * We do this because a rip-relative instruction can access only a * relatively small area (+/- 2 GB from the instruction), and the XOL @@ -264,9 +264,12 @@ static inline bool is_64bit_mm(struct mm_struct *mm) * * Some useful facts about rip-relative instructions: * - * - There's always a modrm byte. + * - There's always a modrm byte with bit layout "00 reg 101". * - There's never a SIB byte. * - The displacement is always 4 bytes. + * - REX.B=1 bit in REX prefix, which normally extends r/m field, + * has no effect on rip-relative mode. It doesn't make modrm byte + * with r/m=101 refer to register 1101 = R13. */ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) { @@ -293,9 +296,8 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) */ cursor = auprobe->insn + insn_offset_modrm(insn); /* - * Convert from rip-relative addressing to indirect addressing - * via a scratch register. Change the r/m field from 0x5 (%rip) - * to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field. + * Convert from rip-relative addressing + * to register-relative addressing via a scratch register. */ reg = MODRM_REG(insn); if (reg == 0) { @@ -307,22 +309,21 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) * #1) for the scratch register. */ auprobe->def.fixups |= UPROBE_FIX_RIP_CX; - /* Change modrm from 00 000 101 to 00 000 001. */ - *cursor = 0x1; + /* + * Change modrm from "00 000 101" to "10 000 001". Example: + * 89 05 disp32 mov %eax,disp32(%rip) becomes + * 89 81 disp32 mov %eax,disp32(%rcx) + */ + *cursor = 0x81; } else { /* Use %rax (register #0) for the scratch register. */ auprobe->def.fixups |= UPROBE_FIX_RIP_AX; - /* Change modrm from 00 xxx 101 to 00 xxx 000 */ - *cursor = (reg << 3); - } - - /* Target address = address of next instruction + (signed) offset */ - auprobe->def.riprel_target = (long)insn->length + insn->displacement.value; - - /* Displacement field is gone; slide immediate field (if any) over. */ - if (insn->immediate.nbytes) { - cursor++; - memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes); + /* + * Change modrm from "00 reg 101" to "10 reg 000". Example: + * 89 1d disp32 mov %edx,disp32(%rip) becomes + * 89 98 disp32 mov %edx,disp32(%rax) + */ + *cursor = (reg << 3) | 0x80; } } @@ -343,26 +344,17 @@ static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) unsigned long *sr = scratch_reg(auprobe, regs); utask->autask.saved_scratch_register = *sr; - *sr = utask->vaddr + auprobe->def.riprel_target; + *sr = utask->vaddr + auprobe->def.ilen; } } -static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, - long *correction) +static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { if (auprobe->def.fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) { struct uprobe_task *utask = current->utask; unsigned long *sr = scratch_reg(auprobe, regs); *sr = utask->autask.saved_scratch_register; - /* - * The original instruction includes a displacement, and so - * is 4 bytes longer than what we've just single-stepped. - * Caller may need to apply other fixups to handle stuff - * like "jmpq *...(%rip)" and "callq *...(%rip)". - */ - if (correction) - *correction += 4; } } #else /* 32-bit: */ @@ -379,8 +371,7 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { } -static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, - long *correction) +static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { } #endif /* CONFIG_X86_64 */ @@ -417,10 +408,10 @@ static int push_ret_address(struct pt_regs *regs, unsigned long ip) static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) { struct uprobe_task *utask = current->utask; - long correction = (long)(utask->vaddr - utask->xol_vaddr); - riprel_post_xol(auprobe, regs, &correction); + riprel_post_xol(auprobe, regs); if (auprobe->def.fixups & UPROBE_FIX_IP) { + long correction = utask->vaddr - utask->xol_vaddr; regs->ip += correction; } else if (auprobe->def.fixups & UPROBE_FIX_CALL) { regs->sp += sizeof_long(); @@ -436,7 +427,7 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs static void default_abort_op(struct arch_uprobe *auprobe, struct pt_regs *regs) { - riprel_post_xol(auprobe, regs, NULL); + riprel_post_xol(auprobe, regs); } static struct uprobe_xol_ops default_xol_ops = { @@ -732,11 +723,9 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t) * * If the original instruction was a rip-relative instruction such as * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent - * instruction using a scratch register -- e.g., "movl %edx,(%rax)". - * We need to restore the contents of the scratch register and adjust - * the ip, keeping in mind that the instruction we executed is 4 bytes - * shorter than the original instruction (since we squeezed out the offset - * field). (FIX_RIP_AX or FIX_RIP_CX) + * instruction using a scratch register -- e.g., "movl %edx,0xnnnn(%rax)". + * We need to restore the contents of the scratch register + * (FIX_RIP_AX or FIX_RIP_CX). */ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { -- cgit v1.2.3-55-g7522 From 1ea30fb64598bd3a6ba43d874bb53c55878eaef5 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 2 May 2014 17:04:00 +0200 Subject: uprobes/x86: Fix scratch register selection for rip-relative fixups Before this patch, instructions such as div, mul, shifts with count in CL, cmpxchg are mishandled. This patch adds vex prefix handling. In particular, it avoids colliding with register operand encoded in vex.vvvv field. Since we need to avoid two possible register operands, the selection of scratch register needs to be from at least three registers. After looking through a lot of CPU docs, it looks like the safest choice is SI,DI,BX. Selecting BX needs care to not collide with implicit use of BX by cmpxchg8b. Test-case: #include static const char *const pass[] = { "FAIL", "pass" }; long two = 2; void test1(void) { long ax = 0, dx = 0; asm volatile("\n" " xor %%edx,%%edx\n" " lea 2(%%edx),%%eax\n" // We divide 2 by 2. Result (in eax) should be 1: " probe1: .globl probe1\n" " divl two(%%rip)\n" // If we have a bug (eax mangled on entry) the result will be 2, // because eax gets restored by probe machinery. : "=a" (ax), "=d" (dx) /*out*/ : "0" (ax), "1" (dx) /*in*/ : "memory" /*clobber*/ ); dprintf(2, "%s: %s\n", __func__, pass[ax == 1] ); } long val2 = 0; void test2(void) { long old_val = val2; long ax = 0, dx = 0; asm volatile("\n" " mov val2,%%eax\n" // eax := val2 " lea 1(%%eax),%%edx\n" // edx := eax+1 // eax is equal to val2. cmpxchg should store edx to val2: " probe2: .globl probe2\n" " cmpxchg %%edx,val2(%%rip)\n" // If we have a bug (eax mangled on entry), val2 will stay unchanged : "=a" (ax), "=d" (dx) /*out*/ : "0" (ax), "1" (dx) /*in*/ : "memory" /*clobber*/ ); dprintf(2, "%s: %s\n", __func__, pass[val2 == old_val + 1] ); } long val3[2] = {0,0}; void test3(void) { long old_val = val3[0]; long ax = 0, dx = 0; asm volatile("\n" " mov val3,%%eax\n" // edx:eax := val3 " mov val3+4,%%edx\n" " mov %%eax,%%ebx\n" // ecx:ebx := edx:eax + 1 " mov %%edx,%%ecx\n" " add $1,%%ebx\n" " adc $0,%%ecx\n" // edx:eax is equal to val3. cmpxchg8b should store ecx:ebx to val3: " probe3: .globl probe3\n" " cmpxchg8b val3(%%rip)\n" // If we have a bug (edx:eax mangled on entry), val3 will stay unchanged. // If ecx:edx in mangled, val3 will get wrong value. : "=a" (ax), "=d" (dx) /*out*/ : "0" (ax), "1" (dx) /*in*/ : "cx", "bx", "memory" /*clobber*/ ); dprintf(2, "%s: %s\n", __func__, pass[val3[0] == old_val + 1 && val3[1] == 0] ); } int main(int argc, char **argv) { test1(); test2(); test3(); return 0; } Before this change all tests fail if probe{1,2,3} are probed. Signed-off-by: Denys Vlasenko Reviewed-by: Jim Keniston Signed-off-by: Oleg Nesterov --- arch/x86/kernel/uprobes.c | 176 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 125 insertions(+), 51 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 31dcb4d5ea46..159ca520ef5b 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -41,8 +41,11 @@ /* Instruction will modify TF, don't change it */ #define UPROBE_FIX_SETF 0x04 -#define UPROBE_FIX_RIP_AX 0x08 -#define UPROBE_FIX_RIP_CX 0x10 +#define UPROBE_FIX_RIP_SI 0x08 +#define UPROBE_FIX_RIP_DI 0x10 +#define UPROBE_FIX_RIP_BX 0x20 +#define UPROBE_FIX_RIP_MASK \ + (UPROBE_FIX_RIP_SI | UPROBE_FIX_RIP_DI | UPROBE_FIX_RIP_BX) #define UPROBE_TRAP_NR UINT_MAX @@ -275,20 +278,109 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) { u8 *cursor; u8 reg; + u8 reg2; if (!insn_rip_relative(insn)) return; /* - * insn_rip_relative() would have decoded rex_prefix, modrm. + * insn_rip_relative() would have decoded rex_prefix, vex_prefix, modrm. * Clear REX.b bit (extension of MODRM.rm field): - * we want to encode rax/rcx, not r8/r9. + * we want to encode low numbered reg, not r8+. */ if (insn->rex_prefix.nbytes) { cursor = auprobe->insn + insn_offset_rex_prefix(insn); - *cursor &= 0xfe; /* Clearing REX.B bit */ + /* REX byte has 0100wrxb layout, clearing REX.b bit */ + *cursor &= 0xfe; } + /* + * Similar treatment for VEX3 prefix. + * TODO: add XOP/EVEX treatment when insn decoder supports them + */ + if (insn->vex_prefix.nbytes == 3) { + /* + * vex2: c5 rvvvvLpp (has no b bit) + * vex3/xop: c4/8f rxbmmmmm wvvvvLpp + * evex: 62 rxbR00mm wvvvv1pp zllBVaaa + * (evex will need setting of both b and x since + * in non-sib encoding evex.x is 4th bit of MODRM.rm) + * Setting VEX3.b (setting because it has inverted meaning): + */ + cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1; + *cursor |= 0x20; + } + + /* + * Convert from rip-relative addressing to register-relative addressing + * via a scratch register. + * + * This is tricky since there are insns with modrm byte + * which also use registers not encoded in modrm byte: + * [i]div/[i]mul: implicitly use dx:ax + * shift ops: implicitly use cx + * cmpxchg: implicitly uses ax + * cmpxchg8/16b: implicitly uses dx:ax and bx:cx + * Encoding: 0f c7/1 modrm + * The code below thinks that reg=1 (cx), chooses si as scratch. + * mulx: implicitly uses dx: mulx r/m,r1,r2 does r1:r2 = dx * r/m. + * First appeared in Haswell (BMI2 insn). It is vex-encoded. + * Example where none of bx,cx,dx can be used as scratch reg: + * c4 e2 63 f6 0d disp32 mulx disp32(%rip),%ebx,%ecx + * [v]pcmpistri: implicitly uses cx, xmm0 + * [v]pcmpistrm: implicitly uses xmm0 + * [v]pcmpestri: implicitly uses ax, dx, cx, xmm0 + * [v]pcmpestrm: implicitly uses ax, dx, xmm0 + * Evil SSE4.2 string comparison ops from hell. + * maskmovq/[v]maskmovdqu: implicitly uses (ds:rdi) as destination. + * Encoding: 0f f7 modrm, 66 0f f7 modrm, vex-encoded: c5 f9 f7 modrm. + * Store op1, byte-masked by op2 msb's in each byte, to (ds:rdi). + * AMD says it has no 3-operand form (vex.vvvv must be 1111) + * and that it can have only register operands, not mem + * (its modrm byte must have mode=11). + * If these restrictions will ever be lifted, + * we'll need code to prevent selection of di as scratch reg! + * + * Summary: I don't know any insns with modrm byte which + * use SI register implicitly. DI register is used only + * by one insn (maskmovq) and BX register is used + * only by one too (cmpxchg8b). + * BP is stack-segment based (may be a problem?). + * AX, DX, CX are off-limits (many implicit users). + * SP is unusable (it's stack pointer - think about "pop mem"; + * also, rsp+disp32 needs sib encoding -> insn length change). + */ + reg = MODRM_REG(insn); /* Fetch modrm.reg */ + reg2 = 0xff; /* Fetch vex.vvvv */ + if (insn->vex_prefix.nbytes == 2) + reg2 = insn->vex_prefix.bytes[1]; + else if (insn->vex_prefix.nbytes == 3) + reg2 = insn->vex_prefix.bytes[2]; + /* + * TODO: add XOP, EXEV vvvv reading. + * + * vex.vvvv field is in bits 6-3, bits are inverted. + * But in 32-bit mode, high-order bit may be ignored. + * Therefore, let's consider only 3 low-order bits. + */ + reg2 = ((reg2 >> 3) & 0x7) ^ 0x7; + /* + * Register numbering is ax,cx,dx,bx, sp,bp,si,di, r8..r15. + * + * Choose scratch reg. Order is important: must not select bx + * if we can use si (cmpxchg8b case!) + */ + if (reg != 6 && reg2 != 6) { + reg2 = 6; + auprobe->def.fixups |= UPROBE_FIX_RIP_SI; + } else if (reg != 7 && reg2 != 7) { + reg2 = 7; + auprobe->def.fixups |= UPROBE_FIX_RIP_DI; + /* TODO (paranoia): force maskmovq to not use di */ + } else { + reg2 = 3; + auprobe->def.fixups |= UPROBE_FIX_RIP_BX; + } /* * Point cursor at the modrm byte. The next 4 bytes are the * displacement. Beyond the displacement, for some instructions, @@ -296,41 +388,21 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) */ cursor = auprobe->insn + insn_offset_modrm(insn); /* - * Convert from rip-relative addressing - * to register-relative addressing via a scratch register. + * Change modrm from "00 reg 101" to "10 reg reg2". Example: + * 89 05 disp32 mov %eax,disp32(%rip) becomes + * 89 86 disp32 mov %eax,disp32(%rsi) */ - reg = MODRM_REG(insn); - if (reg == 0) { - /* - * The register operand (if any) is either the A register - * (%rax, %eax, etc.) or (if the 0x4 bit is set in the - * REX prefix) %r8. In any case, we know the C register - * is NOT the register operand, so we use %rcx (register - * #1) for the scratch register. - */ - auprobe->def.fixups |= UPROBE_FIX_RIP_CX; - /* - * Change modrm from "00 000 101" to "10 000 001". Example: - * 89 05 disp32 mov %eax,disp32(%rip) becomes - * 89 81 disp32 mov %eax,disp32(%rcx) - */ - *cursor = 0x81; - } else { - /* Use %rax (register #0) for the scratch register. */ - auprobe->def.fixups |= UPROBE_FIX_RIP_AX; - /* - * Change modrm from "00 reg 101" to "10 reg 000". Example: - * 89 1d disp32 mov %edx,disp32(%rip) becomes - * 89 98 disp32 mov %edx,disp32(%rax) - */ - *cursor = (reg << 3) | 0x80; - } + *cursor = 0x80 | (reg << 3) | reg2; } static inline unsigned long * scratch_reg(struct arch_uprobe *auprobe, struct pt_regs *regs) { - return (auprobe->def.fixups & UPROBE_FIX_RIP_AX) ? ®s->ax : ®s->cx; + if (auprobe->def.fixups & UPROBE_FIX_RIP_SI) + return ®s->si; + if (auprobe->def.fixups & UPROBE_FIX_RIP_DI) + return ®s->di; + return ®s->bx; } /* @@ -339,7 +411,7 @@ scratch_reg(struct arch_uprobe *auprobe, struct pt_regs *regs) */ static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { - if (auprobe->def.fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) { + if (auprobe->def.fixups & UPROBE_FIX_RIP_MASK) { struct uprobe_task *utask = current->utask; unsigned long *sr = scratch_reg(auprobe, regs); @@ -350,7 +422,7 @@ static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { - if (auprobe->def.fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) { + if (auprobe->def.fixups & UPROBE_FIX_RIP_MASK) { struct uprobe_task *utask = current->utask; unsigned long *sr = scratch_reg(auprobe, regs); @@ -405,6 +477,23 @@ static int push_ret_address(struct pt_regs *regs, unsigned long ip) return 0; } +/* + * We have to fix things up as follows: + * + * Typically, the new ip is relative to the copied instruction. We need + * to make it relative to the original instruction (FIX_IP). Exceptions + * are return instructions and absolute or indirect jump or call instructions. + * + * If the single-stepped instruction was a call, the return address that + * is atop the stack is the address following the copied instruction. We + * need to make it the address following the original instruction (FIX_CALL). + * + * If the original instruction was a rip-relative instruction such as + * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent + * instruction using a scratch register -- e.g., "movl %edx,0xnnnn(%rsi)". + * We need to restore the contents of the scratch register + * (FIX_RIP_reg). + */ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) { struct uprobe_task *utask = current->utask; @@ -711,21 +800,6 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t) * single-step, we single-stepped a copy of the instruction. * * This function prepares to resume execution after the single-step. - * We have to fix things up as follows: - * - * Typically, the new ip is relative to the copied instruction. We need - * to make it relative to the original instruction (FIX_IP). Exceptions - * are return instructions and absolute or indirect jump or call instructions. - * - * If the single-stepped instruction was a call, the return address that - * is atop the stack is the address following the copied instruction. We - * need to make it the address following the original instruction (FIX_CALL). - * - * If the original instruction was a rip-relative instruction such as - * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent - * instruction using a scratch register -- e.g., "movl %edx,0xnnnn(%rax)". - * We need to restore the contents of the scratch register - * (FIX_RIP_AX or FIX_RIP_CX). */ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { -- cgit v1.2.3-55-g7522 From 5e1b05beeca8139204324581a5b1ffb53d057f96 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 8 May 2014 20:34:00 +0200 Subject: x86/traps: Make math_error() static Trivial, make math_error() static. Signed-off-by: Oleg Nesterov --- arch/x86/include/asm/traps.h | 1 - arch/x86/kernel/traps.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 58d66fe06b61..a7b212db9e04 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -98,7 +98,6 @@ static inline int get_si_code(unsigned long condition) extern int panic_on_unrecovered_nmi; -void math_error(struct pt_regs *, int, int); void math_emulate(struct math_emu_info *); #ifndef CONFIG_X86_32 asmlinkage void smp_thermal_interrupt(void); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 57409f6b8c62..8eddd628326e 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -488,7 +488,7 @@ exit: * the correct behaviour even in the presence of the asynchronous * IRQ13 behaviour */ -void math_error(struct pt_regs *regs, int error_code, int trapnr) +static void math_error(struct pt_regs *regs, int error_code, int trapnr) { struct task_struct *task = current; siginfo_t info; -- cgit v1.2.3-55-g7522 From 38cad57be9800e46c52a3612fb9d963eee4fd9c3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 7 May 2014 16:47:09 +0200 Subject: x86/traps: Use SEND_SIG_PRIV instead of force_sig() force_sig() is just force_sig_info(SEND_SIG_PRIV). Imho it should die, we have too many ugly "send signal" helpers. And do_trap() looks just ugly because it uses force_sig_info() or force_sig() depending on info != NULL. Signed-off-by: Oleg Nesterov --- arch/x86/kernel/traps.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 8eddd628326e..2cd429117a41 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -168,10 +168,7 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, } #endif - if (info) - force_sig_info(signr, info, tsk); - else - force_sig(signr, tsk); + force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk); } #define DO_ERROR(trapnr, signr, str, name) \ @@ -305,7 +302,7 @@ do_general_protection(struct pt_regs *regs, long error_code) pr_cont("\n"); } - force_sig(SIGSEGV, tsk); + force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); exit: exception_exit(prev_state); } @@ -645,7 +642,7 @@ void math_state_restore(void) */ if (unlikely(restore_fpu_checking(tsk))) { drop_init_fpu(tsk); - force_sig(SIGSEGV, tsk); + force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); return; } -- cgit v1.2.3-55-g7522 From dff0796e53c29147c9bd1f5567a261dcf0e528bc Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 7 May 2014 17:21:34 +0200 Subject: x86/traps: Introduce do_error_trap() Move the common code from DO_ERROR() and DO_ERROR_INFO() into the new helper, do_error_trap(). This simplifies define's and shaves 527 bytes from traps.o. Signed-off-by: Oleg Nesterov --- arch/x86/kernel/traps.c | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 2cd429117a41..ab8dad719b9e 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -171,41 +171,37 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk); } +static void do_error_trap(struct pt_regs *regs, long error_code, char *str, + unsigned long trapnr, int signr, siginfo_t *info) +{ + enum ctx_state prev_state = exception_enter(); + + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != + NOTIFY_STOP) { + conditional_sti(regs); + do_trap(trapnr, signr, str, regs, error_code, info); + } + + exception_exit(prev_state); +} + #define DO_ERROR(trapnr, signr, str, name) \ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ { \ - enum ctx_state prev_state; \ - \ - prev_state = exception_enter(); \ - if (notify_die(DIE_TRAP, str, regs, error_code, \ - trapnr, signr) == NOTIFY_STOP) { \ - exception_exit(prev_state); \ - return; \ - } \ - conditional_sti(regs); \ - do_trap(trapnr, signr, str, regs, error_code, NULL); \ - exception_exit(prev_state); \ + do_error_trap(regs, error_code, str, trapnr, signr, NULL); \ } #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ { \ siginfo_t info; \ - enum ctx_state prev_state; \ \ info.si_signo = signr; \ info.si_errno = 0; \ info.si_code = sicode; \ info.si_addr = (void __user *)siaddr; \ - prev_state = exception_enter(); \ - if (notify_die(DIE_TRAP, str, regs, error_code, \ - trapnr, signr) == NOTIFY_STOP) { \ - exception_exit(prev_state); \ - return; \ - } \ - conditional_sti(regs); \ - do_trap(trapnr, signr, str, regs, error_code, &info); \ - exception_exit(prev_state); \ + \ + do_error_trap(regs, error_code, str, trapnr, signr, &info); \ } DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip ) -- cgit v1.2.3-55-g7522 From 958d3d729802f7d741cbe8400e69b89baae580ee Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 7 May 2014 17:59:39 +0200 Subject: x86/traps: Introduce fill_trap_info(), simplify DO_ERROR_INFO() Extract the fill-siginfo code from DO_ERROR_INFO() into the new helper, fill_trap_info(). It can calculate si_code and si_addr looking at trapnr, so we can remove these arguments from DO_ERROR_INFO() and simplify the source code. The generated code is the same, __builtin_constant_p(trapnr) == T. Signed-off-by: Oleg Nesterov --- arch/x86/kernel/traps.c | 53 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ab8dad719b9e..1cf8a4c409b6 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -136,6 +136,33 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, return -1; } +static void fill_trap_info(struct pt_regs *regs, int signr, int trapnr, + siginfo_t *info) +{ + unsigned long siaddr; + int sicode; + + switch (trapnr) { + case X86_TRAP_DE: + sicode = FPE_INTDIV; + siaddr = regs->ip; + break; + case X86_TRAP_UD: + sicode = ILL_ILLOPN; + siaddr = regs->ip; + break; + case X86_TRAP_AC: + sicode = BUS_ADRALN; + siaddr = 0; + break; + } + + info->si_signo = signr; + info->si_errno = 0; + info->si_code = sicode; + info->si_addr = (void __user *)siaddr; +} + static void __kprobes do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, long error_code, siginfo_t *info) @@ -191,30 +218,26 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ do_error_trap(regs, error_code, str, trapnr, signr, NULL); \ } -#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ +#define DO_ERROR_INFO(trapnr, signr, str, name) \ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ { \ siginfo_t info; \ \ - info.si_signo = signr; \ - info.si_errno = 0; \ - info.si_code = sicode; \ - info.si_addr = (void __user *)siaddr; \ - \ + fill_trap_info(regs, signr, trapnr, &info); \ do_error_trap(regs, error_code, str, trapnr, signr, &info); \ } -DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip ) -DO_ERROR (X86_TRAP_OF, SIGSEGV, "overflow", overflow ) -DO_ERROR (X86_TRAP_BR, SIGSEGV, "bounds", bounds ) -DO_ERROR_INFO(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip ) -DO_ERROR (X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun ) -DO_ERROR (X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS ) -DO_ERROR (X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present ) +DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error) +DO_ERROR (X86_TRAP_OF, SIGSEGV, "overflow", overflow) +DO_ERROR (X86_TRAP_BR, SIGSEGV, "bounds", bounds) +DO_ERROR_INFO(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op) +DO_ERROR (X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) +DO_ERROR (X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) +DO_ERROR (X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) #ifdef CONFIG_X86_32 -DO_ERROR (X86_TRAP_SS, SIGBUS, "stack segment", stack_segment ) +DO_ERROR (X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) #endif -DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0 ) +DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) #ifdef CONFIG_X86_64 /* Runs on IST stack */ -- cgit v1.2.3-55-g7522 From 1c326c4dfe182a1c4c1e39f2c00f04c380d11692 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 8 May 2014 20:04:11 +0200 Subject: x86/traps: Shift fill_trap_info() from DO_ERROR_INFO() to do_error_trap() Move the callsite of fill_trap_info() into do_error_trap() and remove the "siginfo_t *info" argument. This obviously breaks DO_ERROR() which passed info == NULL, we simply change fill_trap_info() to return "siginfo_t *" and add the "default" case which returns SEND_SIG_PRIV. Signed-off-by: Oleg Nesterov --- arch/x86/kernel/traps.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 1cf8a4c409b6..c9dd74124038 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -136,13 +136,16 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, return -1; } -static void fill_trap_info(struct pt_regs *regs, int signr, int trapnr, - siginfo_t *info) +static siginfo_t *fill_trap_info(struct pt_regs *regs, int signr, int trapnr, + siginfo_t *info) { unsigned long siaddr; int sicode; switch (trapnr) { + default: + return SEND_SIG_PRIV; + case X86_TRAP_DE: sicode = FPE_INTDIV; siaddr = regs->ip; @@ -161,6 +164,7 @@ static void fill_trap_info(struct pt_regs *regs, int signr, int trapnr, info->si_errno = 0; info->si_code = sicode; info->si_addr = (void __user *)siaddr; + return info; } static void __kprobes @@ -199,14 +203,16 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, } static void do_error_trap(struct pt_regs *regs, long error_code, char *str, - unsigned long trapnr, int signr, siginfo_t *info) + unsigned long trapnr, int signr) { enum ctx_state prev_state = exception_enter(); + siginfo_t info; if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != NOTIFY_STOP) { conditional_sti(regs); - do_trap(trapnr, signr, str, regs, error_code, info); + do_trap(trapnr, signr, str, regs, error_code, + fill_trap_info(regs, signr, trapnr, &info)); } exception_exit(prev_state); @@ -215,16 +221,13 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str, #define DO_ERROR(trapnr, signr, str, name) \ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ { \ - do_error_trap(regs, error_code, str, trapnr, signr, NULL); \ + do_error_trap(regs, error_code, str, trapnr, signr); \ } #define DO_ERROR_INFO(trapnr, signr, str, name) \ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ { \ - siginfo_t info; \ - \ - fill_trap_info(regs, signr, trapnr, &info); \ - do_error_trap(regs, error_code, str, trapnr, signr, &info); \ + do_error_trap(regs, error_code, str, trapnr, signr); \ } DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error) -- cgit v1.2.3-55-g7522 From 0eb14833d5b1ea1accfeffb71be5de5929f85da9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 8 May 2014 20:12:24 +0200 Subject: x86/traps: Kill DO_ERROR_INFO() Now that DO_ERROR_INFO() doesn't differ from DO_ERROR() we can remove it and use DO_ERROR() instead. Signed-off-by: Oleg Nesterov --- arch/x86/kernel/traps.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index c9dd74124038..73b3ea32245a 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -224,23 +224,17 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ do_error_trap(regs, error_code, str, trapnr, signr); \ } -#define DO_ERROR_INFO(trapnr, signr, str, name) \ -dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ -{ \ - do_error_trap(regs, error_code, str, trapnr, signr); \ -} - -DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error) -DO_ERROR (X86_TRAP_OF, SIGSEGV, "overflow", overflow) -DO_ERROR (X86_TRAP_BR, SIGSEGV, "bounds", bounds) -DO_ERROR_INFO(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op) -DO_ERROR (X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) -DO_ERROR (X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) -DO_ERROR (X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) +DO_ERROR(X86_TRAP_DE, SIGFPE, "divide error", divide_error) +DO_ERROR(X86_TRAP_OF, SIGSEGV, "overflow", overflow) +DO_ERROR(X86_TRAP_BR, SIGSEGV, "bounds", bounds) +DO_ERROR(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op) +DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun) +DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) +DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) #ifdef CONFIG_X86_32 -DO_ERROR (X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) +DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) #endif -DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) +DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) #ifdef CONFIG_X86_64 /* Runs on IST stack */ -- cgit v1.2.3-55-g7522 From b02ef20a9fba08948e643d3eec0efadf1da01a44 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 12 May 2014 18:24:45 +0200 Subject: uprobes/x86: Fix the wrong ->si_addr when xol triggers a trap If the probed insn triggers a trap, ->si_addr = regs->ip is technically correct, but this is not what the signal handler wants; we need to pass the address of the probed insn, not the address of xol slot. Add the new arch-agnostic helper, uprobe_get_trap_addr(), and change fill_trap_info() and math_error() to use it. !CONFIG_UPROBES case in uprobes.h uses a macro to avoid include hell and ensure that it can be compiled even if an architecture doesn't define instruction_pointer(). Test-case: #include #include #include extern void probe_div(void); void sigh(int sig, siginfo_t *info, void *c) { int passed = (info->si_addr == probe_div); printf(passed ? "PASS\n" : "FAIL\n"); _exit(!passed); } int main(void) { struct sigaction sa = { .sa_sigaction = sigh, .sa_flags = SA_SIGINFO, }; sigaction(SIGFPE, &sa, NULL); asm ( "xor %ecx,%ecx\n" ".globl probe_div; probe_div:\n" "idiv %ecx\n" ); return 0; } it fails if probe_div() is probed. Note: show_unhandled_signals users should probably use this helper too, but we need to cleanup them first. Signed-off-by: Oleg Nesterov Reviewed-by: Masami Hiramatsu --- arch/x86/kernel/traps.c | 7 ++++--- include/linux/uprobes.h | 4 ++++ kernel/events/uprobes.c | 10 ++++++++++ 3 files changed, 18 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 73b3ea32245a..3fdb20548c4b 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -148,11 +149,11 @@ static siginfo_t *fill_trap_info(struct pt_regs *regs, int signr, int trapnr, case X86_TRAP_DE: sicode = FPE_INTDIV; - siaddr = regs->ip; + siaddr = uprobe_get_trap_addr(regs); break; case X86_TRAP_UD: sicode = ILL_ILLOPN; - siaddr = regs->ip; + siaddr = uprobe_get_trap_addr(regs); break; case X86_TRAP_AC: sicode = BUS_ADRALN; @@ -531,7 +532,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) task->thread.error_code = error_code; info.si_signo = SIGFPE; info.si_errno = 0; - info.si_addr = (void __user *)regs->ip; + info.si_addr = (void __user *)uprobe_get_trap_addr(regs); if (trapnr == X86_TRAP_MF) { unsigned short cwd, swd; /* diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index edff2b97b864..88c3b7e8b384 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -102,6 +102,7 @@ extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, u extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); extern bool __weak is_trap_insn(uprobe_opcode_t *insn); extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); +extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs); extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t); extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool); @@ -130,6 +131,9 @@ extern bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *r #else /* !CONFIG_UPROBES */ struct uprobes_state { }; + +#define uprobe_get_trap_addr(regs) instruction_pointer(regs) + static inline int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) { diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index a13251e8bfa4..3b02c72938a8 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1351,6 +1351,16 @@ unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs) return instruction_pointer(regs) - UPROBE_SWBP_INSN_SIZE; } +unsigned long uprobe_get_trap_addr(struct pt_regs *regs) +{ + struct uprobe_task *utask = current->utask; + + if (unlikely(utask && utask->active_uprobe)) + return utask->vaddr; + + return instruction_pointer(regs); +} + /* * Called with no locks held. * Called in context of a exiting or a exec-ing thread. -- cgit v1.2.3-55-g7522 From c184c980de30dc5f6fec4b281928aa6743708da9 Mon Sep 17 00:00:00 2001 From: Vince Weaver Date: Fri, 16 May 2014 17:18:07 -0400 Subject: perf/x86: Use common PMU interrupt disabled code Make the x86 perf code use the new common PMU interrupt disabled code. Typically most x86 machines have working PMU interrupts, although some older p6-class machines had this problem. Signed-off-by: Vince Weaver Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1405161715560.11099@vincent-weaver-1.umelst.maine.edu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 32029e35f2b9..2bdfbff8a4f6 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -303,15 +303,6 @@ int x86_setup_perfctr(struct perf_event *event) hwc->sample_period = x86_pmu.max_period; hwc->last_period = hwc->sample_period; local64_set(&hwc->period_left, hwc->sample_period); - } else { - /* - * If we have a PMU initialized but no APIC - * interrupts, we cannot sample hardware - * events (user-space has to fall back and - * sample via a hrtimer based software event): - */ - if (!x86_pmu.apic) - return -EOPNOTSUPP; } if (attr->type == PERF_TYPE_RAW) @@ -1367,6 +1358,15 @@ static void __init pmu_check_apic(void) x86_pmu.apic = 0; pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); pr_info("no hardware sampling interrupt available.\n"); + + /* + * If we have a PMU initialized but no APIC + * interrupts, we cannot sample hardware + * events (user-space has to fall back and + * sample via a hrtimer based software event): + */ + pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + } static struct attribute_group x86_pmu_format_group = { -- cgit v1.2.3-55-g7522 From 37548914fbfcd56e1955a9b7e55dc3b84a3e9e25 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 22 May 2014 12:50:09 +0530 Subject: perf/x86: Add conditional branch filtering support This patch adds conditional branch filtering support, enabling it for PERF_SAMPLE_BRANCH_COND in perf branch stack sampling framework by utilizing an available software filter X86_BR_JCC. Signed-off-by: Anshuman Khandual Reviewed-by: Stephane Eranian Reviewed-by: Andi Kleen Signed-off-by: Peter Zijlstra Cc: mpe@ellerman.id.au Cc: benh@kernel.crashing.org Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1400743210-32289-3-git-send-email-khandual@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_lbr.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index d82d155aca8c..9dd2459a4c73 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -384,6 +384,9 @@ static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event) if (br_type & PERF_SAMPLE_BRANCH_NO_TX) mask |= X86_BR_NO_TX; + if (br_type & PERF_SAMPLE_BRANCH_COND) + mask |= X86_BR_JCC; + /* * stash actual user request into reg, it may * be used by fixup code for some CPU @@ -678,6 +681,7 @@ static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { * NHM/WSM erratum: must include IND_JMP to capture IND_CALL */ [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP, + [PERF_SAMPLE_BRANCH_COND] = LBR_JCC, }; static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { @@ -689,6 +693,7 @@ static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL | LBR_FAR, [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL, + [PERF_SAMPLE_BRANCH_COND] = LBR_JCC, }; /* core */ -- cgit v1.2.3-55-g7522 From 5cdb76d6f0b657c1140de74ed5af7cc8c5ed5faf Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 1 Jun 2014 21:13:46 +0200 Subject: uprobes/x86: Rename arch_uprobe->def to ->defparam, minor comment updates Purely cosmetic, no changes in .o, 1. As Jim pointed out arch_uprobe->def looks ambiguous, rename it to ->defparam. 2. Add the comment into default_post_xol_op() to explain "regs->sp +=". 3. Remove the stale part of the comment in arch_uprobe_analyze_insn(). Suggested-by: Jim Keniston Reviewed-by: Masami Hiramatsu Acked-by: Srikar Dronamraju Signed-off-by: Oleg Nesterov --- arch/x86/include/asm/uprobes.h | 2 +- arch/x86/kernel/uprobes.c | 37 ++++++++++++++++++------------------- 2 files changed, 19 insertions(+), 20 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index 7be3c079e389..74f4c2ff6427 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -52,7 +52,7 @@ struct arch_uprobe { struct { u8 fixups; u8 ilen; - } def; + } defparam; }; }; diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 159ca520ef5b..5d1cbfe4ae58 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -254,7 +254,7 @@ static inline bool is_64bit_mm(struct mm_struct *mm) * If arch_uprobe->insn doesn't use rip-relative addressing, return * immediately. Otherwise, rewrite the instruction so that it accesses * its memory operand indirectly through a scratch register. Set - * def->fixups accordingly. (The contents of the scratch register + * defparam->fixups accordingly. (The contents of the scratch register * will be saved before we single-step the modified instruction, * and restored afterward). * @@ -372,14 +372,14 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) */ if (reg != 6 && reg2 != 6) { reg2 = 6; - auprobe->def.fixups |= UPROBE_FIX_RIP_SI; + auprobe->defparam.fixups |= UPROBE_FIX_RIP_SI; } else if (reg != 7 && reg2 != 7) { reg2 = 7; - auprobe->def.fixups |= UPROBE_FIX_RIP_DI; + auprobe->defparam.fixups |= UPROBE_FIX_RIP_DI; /* TODO (paranoia): force maskmovq to not use di */ } else { reg2 = 3; - auprobe->def.fixups |= UPROBE_FIX_RIP_BX; + auprobe->defparam.fixups |= UPROBE_FIX_RIP_BX; } /* * Point cursor at the modrm byte. The next 4 bytes are the @@ -398,9 +398,9 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) static inline unsigned long * scratch_reg(struct arch_uprobe *auprobe, struct pt_regs *regs) { - if (auprobe->def.fixups & UPROBE_FIX_RIP_SI) + if (auprobe->defparam.fixups & UPROBE_FIX_RIP_SI) return ®s->si; - if (auprobe->def.fixups & UPROBE_FIX_RIP_DI) + if (auprobe->defparam.fixups & UPROBE_FIX_RIP_DI) return ®s->di; return ®s->bx; } @@ -411,18 +411,18 @@ scratch_reg(struct arch_uprobe *auprobe, struct pt_regs *regs) */ static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { - if (auprobe->def.fixups & UPROBE_FIX_RIP_MASK) { + if (auprobe->defparam.fixups & UPROBE_FIX_RIP_MASK) { struct uprobe_task *utask = current->utask; unsigned long *sr = scratch_reg(auprobe, regs); utask->autask.saved_scratch_register = *sr; - *sr = utask->vaddr + auprobe->def.ilen; + *sr = utask->vaddr + auprobe->defparam.ilen; } } static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { - if (auprobe->def.fixups & UPROBE_FIX_RIP_MASK) { + if (auprobe->defparam.fixups & UPROBE_FIX_RIP_MASK) { struct uprobe_task *utask = current->utask; unsigned long *sr = scratch_reg(auprobe, regs); @@ -499,16 +499,16 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs struct uprobe_task *utask = current->utask; riprel_post_xol(auprobe, regs); - if (auprobe->def.fixups & UPROBE_FIX_IP) { + if (auprobe->defparam.fixups & UPROBE_FIX_IP) { long correction = utask->vaddr - utask->xol_vaddr; regs->ip += correction; - } else if (auprobe->def.fixups & UPROBE_FIX_CALL) { - regs->sp += sizeof_long(); - if (push_ret_address(regs, utask->vaddr + auprobe->def.ilen)) + } else if (auprobe->defparam.fixups & UPROBE_FIX_CALL) { + regs->sp += sizeof_long(); /* Pop incorrect return address */ + if (push_ret_address(regs, utask->vaddr + auprobe->defparam.ilen)) return -ERESTART; } /* popf; tell the caller to not touch TF */ - if (auprobe->def.fixups & UPROBE_FIX_SETF) + if (auprobe->defparam.fixups & UPROBE_FIX_SETF) utask->autask.saved_tf = true; return 0; @@ -711,12 +711,11 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, /* * Figure out which fixups default_post_xol_op() will need to perform, - * and annotate def->fixups accordingly. To start with, ->fixups is - * either zero or it reflects rip-related fixups. + * and annotate defparam->fixups accordingly. */ switch (OPCODE1(&insn)) { case 0x9d: /* popf */ - auprobe->def.fixups |= UPROBE_FIX_SETF; + auprobe->defparam.fixups |= UPROBE_FIX_SETF; break; case 0xc3: /* ret or lret -- ip is correct */ case 0xcb: @@ -742,8 +741,8 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, riprel_analyze(auprobe, &insn); } - auprobe->def.ilen = insn.length; - auprobe->def.fixups |= fix_ip_or_call; + auprobe->defparam.ilen = insn.length; + auprobe->defparam.fixups |= fix_ip_or_call; auprobe->ops = &default_xol_ops; return 0; -- cgit v1.2.3-55-g7522