From 1aab16c28a0232d898d6f56f5a56019472296ee7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 27 Jan 2017 11:25:33 +0100 Subject: cpu-exec: unify icount_decr and tcg_exit_req The icount interrupt flag and tcg_exit_req serve almost the same purpose, let's make them completely the same. The former TB_EXIT_REQUESTED and TB_EXIT_ICOUNT_EXPIRED cases are unified, since we can distinguish them from the value of the interrupt flag. Signed-off-by: Paolo Bonzini --- include/exec/gen-icount.h | 53 ++++++++++++++++++++++------------------------- include/qom/cpu.h | 15 +++++++------- 2 files changed, 32 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h index 050de59b38..62d462e494 100644 --- a/include/exec/gen-icount.h +++ b/include/exec/gen-icount.h @@ -6,58 +6,55 @@ /* Helpers for instruction counting code generation. */ static int icount_start_insn_idx; -static TCGLabel *icount_label; static TCGLabel *exitreq_label; static inline void gen_tb_start(TranslationBlock *tb) { - TCGv_i32 count, flag, imm; + TCGv_i32 count, imm; exitreq_label = gen_new_label(); - flag = tcg_temp_new_i32(); - tcg_gen_ld_i32(flag, cpu_env, - offsetof(CPUState, tcg_exit_req) - ENV_OFFSET); - tcg_gen_brcondi_i32(TCG_COND_NE, flag, 0, exitreq_label); - tcg_temp_free_i32(flag); - - if (!(tb->cflags & CF_USE_ICOUNT)) { - return; + if (tb->cflags & CF_USE_ICOUNT) { + count = tcg_temp_local_new_i32(); + } else { + count = tcg_temp_new_i32(); } - icount_label = gen_new_label(); - count = tcg_temp_local_new_i32(); tcg_gen_ld_i32(count, cpu_env, -ENV_OFFSET + offsetof(CPUState, icount_decr.u32)); - imm = tcg_temp_new_i32(); - /* We emit a movi with a dummy immediate argument. Keep the insn index - * of the movi so that we later (when we know the actual insn count) - * can update the immediate argument with the actual insn count. */ - icount_start_insn_idx = tcg_op_buf_count(); - tcg_gen_movi_i32(imm, 0xdeadbeef); + if (tb->cflags & CF_USE_ICOUNT) { + imm = tcg_temp_new_i32(); + /* We emit a movi with a dummy immediate argument. Keep the insn index + * of the movi so that we later (when we know the actual insn count) + * can update the immediate argument with the actual insn count. */ + icount_start_insn_idx = tcg_op_buf_count(); + tcg_gen_movi_i32(imm, 0xdeadbeef); + + tcg_gen_sub_i32(count, count, imm); + tcg_temp_free_i32(imm); + } + + tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, exitreq_label); - tcg_gen_sub_i32(count, count, imm); - tcg_temp_free_i32(imm); + if (tb->cflags & CF_USE_ICOUNT) { + tcg_gen_st16_i32(count, cpu_env, + -ENV_OFFSET + offsetof(CPUState, icount_decr.u16.low)); + } - tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, icount_label); - tcg_gen_st16_i32(count, cpu_env, - -ENV_OFFSET + offsetof(CPUState, icount_decr.u16.low)); tcg_temp_free_i32(count); } static void gen_tb_end(TranslationBlock *tb, int num_insns) { - gen_set_label(exitreq_label); - tcg_gen_exit_tb((uintptr_t)tb + TB_EXIT_REQUESTED); - if (tb->cflags & CF_USE_ICOUNT) { /* Update the num_insn immediate parameter now that we know * the actual insn count. */ tcg_set_insn_param(icount_start_insn_idx, 1, num_insns); - gen_set_label(icount_label); - tcg_gen_exit_tb((uintptr_t)tb + TB_EXIT_ICOUNT_EXPIRED); } + gen_set_label(exitreq_label); + tcg_gen_exit_tb((uintptr_t)tb + TB_EXIT_REQUESTED); + /* Terminate the linked list. */ tcg_ctx.gen_op_buf[tcg_ctx.gen_op_buf[0].prev].next = 0; } diff --git a/include/qom/cpu.h b/include/qom/cpu.h index f69b2407ea..1bc3ad230a 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -275,11 +275,11 @@ struct qemu_work_item; * @stopped: Indicates the CPU has been artificially stopped. * @unplug: Indicates a pending CPU unplug request. * @crash_occurred: Indicates the OS reported a crash (panic) for this CPU - * @tcg_exit_req: Set to force TCG to stop executing linked TBs for this - * CPU and return to its top level loop. * @singlestep_enabled: Flags for single-stepping. * @icount_extra: Instructions until next timer event. - * @icount_decr: Number of cycles left, with interrupt flag in high bit. + * @icount_decr: Low 16 bits: number of cycles left, only used in icount mode. + * High 16 bits: Set to -1 to force TCG to stop executing linked TBs for this + * CPU and return to its top level loop (even in non-icount mode). * This allows a single read-compare-cbranch-write sequence to test * for both decrementer underflow and exceptions. * @can_do_io: Nonzero if memory-mapped IO is safe. Deterministic execution @@ -381,10 +381,6 @@ struct CPUState { /* TODO Move common fields from CPUArchState here. */ int cpu_index; /* used by alpha TCG */ uint32_t halted; /* used by alpha, cris, ppc TCG */ - union { - uint32_t u32; - icount_decr_u16 u16; - } icount_decr; uint32_t can_do_io; int32_t exception_index; /* used by m68k TCG */ @@ -397,7 +393,10 @@ struct CPUState { offset from AREG0. Leave this field at the end so as to make the (absolute value) offset as small as possible. This reduces code size, especially for hosts without large memory offsets. */ - uint32_t tcg_exit_req; + union { + uint32_t u32; + icount_decr_u16 u16; + } icount_decr; bool hax_vcpu_dirty; struct hax_vcpu_state *hax_vcpu; -- cgit v1.2.3-55-g7522 From d98d407234713d05b77114237f839c43a8152089 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 8 Feb 2017 13:22:12 +0100 Subject: cpus: remove ugly cast on sigbus_handler The cast is there because sigbus_handler is invoked via sigfd_handler. But it feels just wrong to use struct qemu_signalfd_siginfo in the prototype of a function that is passed to sigaction. Instead, do a simple-minded conversion of qemu_signalfd_siginfo to siginfo_t. Signed-off-by: Paolo Bonzini --- cpus.c | 12 +++--------- include/qemu/compatfd.h | 42 ------------------------------------------ include/qemu/osdep.h | 28 ++++++++++++++++++++++++++++ util/compatfd.c | 1 - util/main-loop.c | 5 +---- util/oslib-posix.c | 33 +++++++++++++++++++++++++++++++++ 6 files changed, 65 insertions(+), 56 deletions(-) delete mode 100644 include/qemu/compatfd.h (limited to 'include') diff --git a/cpus.c b/cpus.c index 8200ac6b75..a628cde232 100644 --- a/cpus.c +++ b/cpus.c @@ -51,10 +51,6 @@ #include "hw/nmi.h" #include "sysemu/replay.h" -#ifndef _WIN32 -#include "qemu/compatfd.h" -#endif - #ifdef CONFIG_LINUX #include @@ -924,11 +920,9 @@ static void sigbus_reraise(void) abort(); } -static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo, - void *ctx) +static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx) { - if (kvm_on_sigbus(siginfo->ssi_code, - (void *)(intptr_t)siginfo->ssi_addr)) { + if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) { sigbus_reraise(); } } @@ -939,7 +933,7 @@ static void qemu_init_sigbus(void) memset(&action, 0, sizeof(action)); action.sa_flags = SA_SIGINFO; - action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler; + action.sa_sigaction = sigbus_handler; sigaction(SIGBUS, &action, NULL); prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0); diff --git a/include/qemu/compatfd.h b/include/qemu/compatfd.h deleted file mode 100644 index aa12ee9364..0000000000 --- a/include/qemu/compatfd.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * signalfd/eventfd compatibility - * - * Copyright IBM, Corp. 2008 - * - * Authors: - * Anthony Liguori - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * - */ - -#ifndef QEMU_COMPATFD_H -#define QEMU_COMPATFD_H - - -struct qemu_signalfd_siginfo { - uint32_t ssi_signo; /* Signal number */ - int32_t ssi_errno; /* Error number (unused) */ - int32_t ssi_code; /* Signal code */ - uint32_t ssi_pid; /* PID of sender */ - uint32_t ssi_uid; /* Real UID of sender */ - int32_t ssi_fd; /* File descriptor (SIGIO) */ - uint32_t ssi_tid; /* Kernel timer ID (POSIX timers) */ - uint32_t ssi_band; /* Band event (SIGIO) */ - uint32_t ssi_overrun; /* POSIX timer overrun count */ - uint32_t ssi_trapno; /* Trap number that caused signal */ - int32_t ssi_status; /* Exit status or signal (SIGCHLD) */ - int32_t ssi_int; /* Integer sent by sigqueue(2) */ - uint64_t ssi_ptr; /* Pointer sent by sigqueue(2) */ - uint64_t ssi_utime; /* User CPU time consumed (SIGCHLD) */ - uint64_t ssi_stime; /* System CPU time consumed (SIGCHLD) */ - uint64_t ssi_addr; /* Address that generated signal - (for hardware-generated signals) */ - uint8_t pad[48]; /* Pad size to 128 bytes (allow for - additional fields in the future) */ -}; - -int qemu_signalfd(const sigset_t *mask); - -#endif diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index 56c9e22405..6932709e4e 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -297,6 +297,34 @@ void qemu_anon_ram_free(void *ptr, size_t size); # define QEMU_VMALLOC_ALIGN getpagesize() #endif +#ifdef CONFIG_POSIX +struct qemu_signalfd_siginfo { + uint32_t ssi_signo; /* Signal number */ + int32_t ssi_errno; /* Error number (unused) */ + int32_t ssi_code; /* Signal code */ + uint32_t ssi_pid; /* PID of sender */ + uint32_t ssi_uid; /* Real UID of sender */ + int32_t ssi_fd; /* File descriptor (SIGIO) */ + uint32_t ssi_tid; /* Kernel timer ID (POSIX timers) */ + uint32_t ssi_band; /* Band event (SIGIO) */ + uint32_t ssi_overrun; /* POSIX timer overrun count */ + uint32_t ssi_trapno; /* Trap number that caused signal */ + int32_t ssi_status; /* Exit status or signal (SIGCHLD) */ + int32_t ssi_int; /* Integer sent by sigqueue(2) */ + uint64_t ssi_ptr; /* Pointer sent by sigqueue(2) */ + uint64_t ssi_utime; /* User CPU time consumed (SIGCHLD) */ + uint64_t ssi_stime; /* System CPU time consumed (SIGCHLD) */ + uint64_t ssi_addr; /* Address that generated signal + (for hardware-generated signals) */ + uint8_t pad[48]; /* Pad size to 128 bytes (allow for + additional fields in the future) */ +}; + +int qemu_signalfd(const sigset_t *mask); +void sigaction_invoke(struct sigaction *action, + struct qemu_signalfd_siginfo *info); +#endif + int qemu_madvise(void *addr, size_t len, int advice); int qemu_open(const char *name, int flags, ...); diff --git a/util/compatfd.c b/util/compatfd.c index 9a43042ae6..980bd33e52 100644 --- a/util/compatfd.c +++ b/util/compatfd.c @@ -15,7 +15,6 @@ #include "qemu/osdep.h" #include "qemu-common.h" -#include "qemu/compatfd.h" #include "qemu/thread.h" #include diff --git a/util/main-loop.c b/util/main-loop.c index ad10bca211..ca7bb072f9 100644 --- a/util/main-loop.c +++ b/util/main-loop.c @@ -34,8 +34,6 @@ #ifndef _WIN32 -#include "qemu/compatfd.h" - /* If we have signalfd, we mask out the signals we want to handle and then * use signalfd to listen for them. We rely on whatever the current signal * handler is to dispatch the signals when we receive them. @@ -63,8 +61,7 @@ static void sigfd_handler(void *opaque) sigaction(info.ssi_signo, NULL, &action); if ((action.sa_flags & SA_SIGINFO) && action.sa_sigaction) { - action.sa_sigaction(info.ssi_signo, - (siginfo_t *)&info, NULL); + sigaction_invoke(&action, &info); } else if (action.sa_handler) { action.sa_handler(info.ssi_signo); } diff --git a/util/oslib-posix.c b/util/oslib-posix.c index f63146407f..cd686aae3d 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -603,3 +603,36 @@ void qemu_free_stack(void *stack, size_t sz) munmap(stack, sz); } + +void sigaction_invoke(struct sigaction *action, + struct qemu_signalfd_siginfo *info) +{ + siginfo_t si = { 0 }; + si.si_signo = info->ssi_signo; + si.si_errno = info->ssi_errno; + si.si_code = info->ssi_code; + + /* Convert the minimal set of fields defined by POSIX. + * Positive si_code values are reserved for kernel-generated + * signals, where the valid siginfo fields are determined by + * the signal number. But according to POSIX, it is unspecified + * whether SI_USER and SI_QUEUE have values less than or equal to + * zero. + */ + if (info->ssi_code == SI_USER || info->ssi_code == SI_QUEUE || + info->ssi_code <= 0) { + /* SIGTERM, etc. */ + si.si_pid = info->ssi_pid; + si.si_uid = info->ssi_uid; + } else if (info->ssi_signo == SIGILL || info->ssi_signo == SIGFPE || + info->ssi_signo == SIGSEGV || info->ssi_signo == SIGBUS) { + si.si_addr = (void *)(uintptr_t)info->ssi_addr; + } else if (info->ssi_signo == SIGCHLD) { + si.si_pid = info->ssi_pid; + si.si_status = info->ssi_status; + si.si_uid = info->ssi_uid; + } else if (info->ssi_signo == SIGIO) { + si.si_band = info->ssi_band; + } + action->sa_sigaction(info->ssi_signo, &si, NULL); +} -- cgit v1.2.3-55-g7522 From a16fc07ebd58da51d5e1c2928069879c40a26f59 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 9 Feb 2017 09:50:02 +0100 Subject: cpus: reorganize signal handling code Move the KVM "eat signals" code under CONFIG_LINUX, in preparation for moving it to kvm-all.c; reraise non-MCE SIGBUS immediately, without passing it to KVM. Signed-off-by: Paolo Bonzini --- cpus.c | 63 ++++++++++++++++++++++++++-------------------------- include/qemu/osdep.h | 9 ++++++++ target/i386/kvm.c | 15 ++----------- 3 files changed, 43 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/cpus.c b/cpus.c index a628cde232..399e2713b8 100644 --- a/cpus.c +++ b/cpus.c @@ -922,6 +922,10 @@ static void sigbus_reraise(void) static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx) { + if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) { + sigbus_reraise(); + } + if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) { sigbus_reraise(); } @@ -939,6 +943,30 @@ static void qemu_init_sigbus(void) prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0); } +static void dummy_signal(int sig) +{ +} + +static void qemu_kvm_init_cpu_signals(CPUState *cpu) +{ + int r; + sigset_t set; + struct sigaction sigact; + + memset(&sigact, 0, sizeof(sigact)); + sigact.sa_handler = dummy_signal; + sigaction(SIG_IPI, &sigact, NULL); + + pthread_sigmask(SIG_BLOCK, NULL, &set); + sigdelset(&set, SIG_IPI); + sigdelset(&set, SIGBUS); + r = kvm_set_signal_mask(cpu, &set); + if (r) { + fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r)); + exit(1); + } +} + static void qemu_kvm_eat_signals(CPUState *cpu) { struct timespec ts = { 0, 0 }; @@ -960,6 +988,9 @@ static void qemu_kvm_eat_signals(CPUState *cpu) switch (r) { case SIGBUS: + if (siginfo.si_code != BUS_MCEERR_AO && siginfo.si_code != BUS_MCEERR_AR) { + sigbus_reraise(); + } if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) { sigbus_reraise(); } @@ -975,9 +1006,7 @@ static void qemu_kvm_eat_signals(CPUState *cpu) } } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS)); } - #else /* !CONFIG_LINUX */ - static void qemu_init_sigbus(void) { } @@ -985,39 +1014,11 @@ static void qemu_init_sigbus(void) static void qemu_kvm_eat_signals(CPUState *cpu) { } -#endif /* !CONFIG_LINUX */ - -#ifndef _WIN32 -static void dummy_signal(int sig) -{ -} - -static void qemu_kvm_init_cpu_signals(CPUState *cpu) -{ - int r; - sigset_t set; - struct sigaction sigact; - - memset(&sigact, 0, sizeof(sigact)); - sigact.sa_handler = dummy_signal; - sigaction(SIG_IPI, &sigact, NULL); - - pthread_sigmask(SIG_BLOCK, NULL, &set); - sigdelset(&set, SIG_IPI); - sigdelset(&set, SIGBUS); - r = kvm_set_signal_mask(cpu, &set); - if (r) { - fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r)); - exit(1); - } -} -#else /* _WIN32 */ static void qemu_kvm_init_cpu_signals(CPUState *cpu) { - abort(); } -#endif /* _WIN32 */ +#endif /* !CONFIG_LINUX */ static QemuMutex qemu_global_mutex; diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index 6932709e4e..af37195fef 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -284,6 +284,15 @@ void qemu_anon_ram_free(void *ptr, size_t size); #endif +#if defined(CONFIG_LINUX) +#ifndef BUS_MCEERR_AR +#define BUS_MCEERR_AR 4 +#endif +#ifndef BUS_MCEERR_AO +#define BUS_MCEERR_AO 5 +#endif +#endif + #if defined(__linux__) && \ (defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)) /* Use 2 MiB alignment so transparent hugepages can be used by KVM. diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 0c48dfdae5..f49a786c98 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -64,13 +64,6 @@ * 255 kvm_msr_entry structs */ #define MSR_BUF_SIZE 4096 -#ifndef BUS_MCEERR_AR -#define BUS_MCEERR_AR 4 -#endif -#ifndef BUS_MCEERR_AO -#define BUS_MCEERR_AO 5 -#endif - const KVMCapabilityInfo kvm_arch_required_capabilities[] = { KVM_CAP_INFO(SET_TSS_ADDR), KVM_CAP_INFO(EXT_CPUID), @@ -469,9 +462,7 @@ int kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) ram_addr_t ram_addr; hwaddr paddr; - if (code != BUS_MCEERR_AR && code != BUS_MCEERR_AO) { - return 1; - } + assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO); /* Because the MCE happened while running the VCPU, KVM could have * injected action required MCEs too. Action optional MCEs should @@ -504,9 +495,7 @@ int kvm_arch_on_sigbus(int code, void *addr) { X86CPU *cpu = X86_CPU(first_cpu); - if (code != BUS_MCEERR_AR && code != BUS_MCEERR_AO) { - return 1; - } + assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO); if (code == BUS_MCEERR_AR) { hardware_memory_error(); -- cgit v1.2.3-55-g7522 From 4d39892cca86a9162beaa3944057d118ef42edcd Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 9 Feb 2017 10:04:34 +0100 Subject: KVM: remove kvm_arch_on_sigbus Build it on kvm_arch_on_sigbus_vcpu instead. They do the same for "action optional" SIGBUSes, and the main thread should never get "action required" SIGBUSes because it blocks the signal. Signed-off-by: Paolo Bonzini --- include/sysemu/kvm.h | 1 - kvm-all.c | 9 ++++++++- target/arm/kvm.c | 5 ----- target/i386/kvm.c | 40 +++++----------------------------------- target/mips/kvm.c | 6 ------ target/ppc/kvm.c | 5 ----- target/s390x/kvm.c | 5 ----- 7 files changed, 13 insertions(+), 58 deletions(-) (limited to 'include') diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 3045ee7678..6ecb61cdef 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -358,7 +358,6 @@ bool kvm_vcpu_id_is_valid(int vcpu_id); unsigned long kvm_arch_vcpu_id(CPUState *cpu); int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr); -int kvm_arch_on_sigbus(int code, void *addr); void kvm_arch_init_irq_routing(KVMState *s); diff --git a/kvm-all.c b/kvm-all.c index 0c94637c46..a433ad3090 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -2391,6 +2391,7 @@ int kvm_set_signal_mask(CPUState *cpu, const sigset_t *sigset) return r; } + int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr) { return kvm_arch_on_sigbus_vcpu(cpu, code, addr); @@ -2398,7 +2399,13 @@ int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr) int kvm_on_sigbus(int code, void *addr) { - return kvm_arch_on_sigbus(code, addr); + /* Action required MCE kills the process if SIGBUS is blocked. Because + * that's what happens in the I/O thread, where we handle MCE via signalfd, + * we can only get action optional here. + */ + assert(code != BUS_MCEERR_AR); + kvm_arch_on_sigbus_vcpu(first_cpu, code, addr); + return 0; } int kvm_create_device(KVMState *s, uint64_t type, bool test) diff --git a/target/arm/kvm.c b/target/arm/kvm.c index 395e986973..e5218f6e5d 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -565,11 +565,6 @@ int kvm_arch_on_sigbus_vcpu(CPUState *cs, int code, void *addr) return 1; } -int kvm_arch_on_sigbus(int code, void *addr) -{ - return 1; -} - /* The #ifdef protections are until 32bit headers are imported and can * be removed once both 32 and 64 bit reach feature parity. */ diff --git a/target/i386/kvm.c b/target/i386/kvm.c index f49a786c98..2adf992c84 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -462,14 +462,13 @@ int kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) ram_addr_t ram_addr; hwaddr paddr; + /* If we get an action required MCE, it has been injected by KVM + * while the VM was running. An action optional MCE instead should + * be coming from the main thread, which qemu_init_sigbus identifies + * as the "early kill" thread. + */ assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO); - /* Because the MCE happened while running the VCPU, KVM could have - * injected action required MCEs too. Action optional MCEs should - * be delivered to the main thread, which qemu_init_sigbus identifies - * as the "early kill" thread, but if we get one for whatever reason - * we just handle it just like the main thread would. - */ if ((env->mcg_cap & MCG_SER_P) && addr) { ram_addr = qemu_ram_addr_from_host(addr); if (ram_addr != RAM_ADDR_INVALID && @@ -491,35 +490,6 @@ int kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) return 0; } -int kvm_arch_on_sigbus(int code, void *addr) -{ - X86CPU *cpu = X86_CPU(first_cpu); - - assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO); - - if (code == BUS_MCEERR_AR) { - hardware_memory_error(); - } - - /* Hope we are lucky for AO MCE */ - if ((cpu->env.mcg_cap & MCG_SER_P) && addr) { - ram_addr_t ram_addr; - hwaddr paddr; - - ram_addr = qemu_ram_addr_from_host(addr); - if (ram_addr != RAM_ADDR_INVALID && - kvm_physical_memory_addr_from_host(first_cpu->kvm_state, - addr, &paddr)) { - kvm_hwpoison_page_add(ram_addr); - kvm_mce_inject(X86_CPU(first_cpu), paddr, code); - } - - fprintf(stderr, "Hardware memory error for memory used by " - "QEMU itself instead of guest system!: %p\n", addr); - } - return 0; -} - static int kvm_inject_mce_oldstyle(X86CPU *cpu) { CPUX86State *env = &cpu->env; diff --git a/target/mips/kvm.c b/target/mips/kvm.c index 998c3412c3..3e686e73a5 100644 --- a/target/mips/kvm.c +++ b/target/mips/kvm.c @@ -186,12 +186,6 @@ int kvm_arch_on_sigbus_vcpu(CPUState *cs, int code, void *addr) return 1; } -int kvm_arch_on_sigbus(int code, void *addr) -{ - DPRINTF("%s\n", __func__); - return 1; -} - void kvm_arch_init_irq_routing(KVMState *s) { } diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index acc40ece65..75598cd779 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -2587,11 +2587,6 @@ int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr) return 1; } -int kvm_arch_on_sigbus(int code, void *addr) -{ - return 1; -} - void kvm_arch_init_irq_routing(KVMState *s) { } diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c index 5ec050cf89..e7eea6ddb6 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c @@ -2145,11 +2145,6 @@ int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr) return 1; } -int kvm_arch_on_sigbus(int code, void *addr) -{ - return 1; -} - void kvm_s390_io_interrupt(uint16_t subchannel_id, uint16_t subchannel_nr, uint32_t io_int_parm, uint32_t io_int_word) -- cgit v1.2.3-55-g7522 From 2ae41db262e02743b27719fe085e749d957613c0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 8 Feb 2017 12:48:54 +0100 Subject: KVM: do not use sigtimedwait to catch SIGBUS Call kvm_on_sigbus_vcpu asynchronously from the VCPU thread. Information for the SIGBUS can be stored in thread-local variables and processed later in kvm_cpu_exec. Signed-off-by: Paolo Bonzini --- cpus.c | 31 +++++++++++++------------------ include/sysemu/kvm.h | 5 ++++- kvm-all.c | 35 ++++++++++++++++++++++++++++++++++- target/arm/kvm.c | 5 ----- target/i386/kvm.c | 5 ++--- target/mips/kvm.c | 6 ------ target/ppc/kvm.c | 5 ----- target/s390x/kvm.c | 5 ----- 8 files changed, 53 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/cpus.c b/cpus.c index 399e2713b8..56b1338c87 100644 --- a/cpus.c +++ b/cpus.c @@ -926,8 +926,16 @@ static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx) sigbus_reraise(); } - if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) { - sigbus_reraise(); + if (current_cpu) { + /* Called asynchronously in VCPU thread. */ + if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) { + sigbus_reraise(); + } + } else { + /* Called synchronously (via signalfd) in main thread. */ + if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) { + sigbus_reraise(); + } } } @@ -958,8 +966,9 @@ static void qemu_kvm_init_cpu_signals(CPUState *cpu) sigaction(SIG_IPI, &sigact, NULL); pthread_sigmask(SIG_BLOCK, NULL, &set); - sigdelset(&set, SIG_IPI); sigdelset(&set, SIGBUS); + pthread_sigmask(SIG_SETMASK, &set, NULL); + sigdelset(&set, SIG_IPI); r = kvm_set_signal_mask(cpu, &set); if (r) { fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r)); @@ -977,7 +986,6 @@ static void qemu_kvm_eat_signals(CPUState *cpu) sigemptyset(&waitset); sigaddset(&waitset, SIG_IPI); - sigaddset(&waitset, SIGBUS); do { r = sigtimedwait(&waitset, &siginfo, &ts); @@ -986,25 +994,12 @@ static void qemu_kvm_eat_signals(CPUState *cpu) exit(1); } - switch (r) { - case SIGBUS: - if (siginfo.si_code != BUS_MCEERR_AO && siginfo.si_code != BUS_MCEERR_AR) { - sigbus_reraise(); - } - if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) { - sigbus_reraise(); - } - break; - default: - break; - } - r = sigpending(&chkset); if (r == -1) { perror("sigpending"); exit(1); } - } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS)); + } while (sigismember(&chkset, SIG_IPI)); } #else /* !CONFIG_LINUX */ static void qemu_init_sigbus(void) diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 6ecb61cdef..a1b019da6f 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -357,7 +357,10 @@ bool kvm_vcpu_id_is_valid(int vcpu_id); /* Returns VCPU ID to be used on KVM_CREATE_VCPU ioctl() */ unsigned long kvm_arch_vcpu_id(CPUState *cpu); -int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr); +#ifdef TARGET_I386 +#define KVM_HAVE_MCE_INJECTION 1 +void kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr); +#endif void kvm_arch_init_irq_routing(KVMState *s); diff --git a/kvm-all.c b/kvm-all.c index a433ad3090..0baa193763 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -1893,6 +1893,12 @@ void kvm_cpu_synchronize_post_init(CPUState *cpu) run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL); } +#ifdef KVM_HAVE_MCE_INJECTION +static __thread void *pending_sigbus_addr; +static __thread int pending_sigbus_code; +static __thread bool have_sigbus_pending; +#endif + int kvm_cpu_exec(CPUState *cpu) { struct kvm_run *run = cpu->kvm_run; @@ -1930,6 +1936,16 @@ int kvm_cpu_exec(CPUState *cpu) attrs = kvm_arch_post_run(cpu, run); +#ifdef KVM_HAVE_MCE_INJECTION + if (unlikely(have_sigbus_pending)) { + qemu_mutex_lock_iothread(); + kvm_arch_on_sigbus_vcpu(cpu, pending_sigbus_code, + pending_sigbus_addr); + have_sigbus_pending = false; + qemu_mutex_unlock_iothread(); + } +#endif + if (run_ret < 0) { if (run_ret == -EINTR || run_ret == -EAGAIN) { DPRINTF("io window exit\n"); @@ -2392,13 +2408,27 @@ int kvm_set_signal_mask(CPUState *cpu, const sigset_t *sigset) return r; } +/* Called asynchronously in VCPU thread. */ int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr) { - return kvm_arch_on_sigbus_vcpu(cpu, code, addr); +#ifdef KVM_HAVE_MCE_INJECTION + if (have_sigbus_pending) { + return 1; + } + have_sigbus_pending = true; + pending_sigbus_addr = addr; + pending_sigbus_code = code; + atomic_set(&cpu->exit_request, 1); + return 0; +#else + return 1; +#endif } +/* Called synchronously (via signalfd) in main thread. */ int kvm_on_sigbus(int code, void *addr) { +#ifdef KVM_HAVE_MCE_INJECTION /* Action required MCE kills the process if SIGBUS is blocked. Because * that's what happens in the I/O thread, where we handle MCE via signalfd, * we can only get action optional here. @@ -2406,6 +2436,9 @@ int kvm_on_sigbus(int code, void *addr) assert(code != BUS_MCEERR_AR); kvm_arch_on_sigbus_vcpu(first_cpu, code, addr); return 0; +#else + return 1; +#endif } int kvm_create_device(KVMState *s, uint64_t type, bool test) diff --git a/target/arm/kvm.c b/target/arm/kvm.c index e5218f6e5d..45554682f2 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -560,11 +560,6 @@ int kvm_arch_process_async_events(CPUState *cs) return 0; } -int kvm_arch_on_sigbus_vcpu(CPUState *cs, int code, void *addr) -{ - return 1; -} - /* The #ifdef protections are until 32bit headers are imported and can * be removed once both 32 and 64 bit reach feature parity. */ diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 2adf992c84..7698421ae7 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -455,7 +455,7 @@ static void hardware_memory_error(void) exit(1); } -int kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) +void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) { X86CPU *cpu = X86_CPU(c); CPUX86State *env = &cpu->env; @@ -475,7 +475,7 @@ int kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) { kvm_hwpoison_page_add(ram_addr); kvm_mce_inject(cpu, paddr, code); - return 0; + return; } fprintf(stderr, "Hardware memory error for memory used by " @@ -487,7 +487,6 @@ int kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) } /* Hope we are lucky for AO MCE */ - return 0; } static int kvm_inject_mce_oldstyle(X86CPU *cpu) diff --git a/target/mips/kvm.c b/target/mips/kvm.c index 3e686e73a5..0982e874bb 100644 --- a/target/mips/kvm.c +++ b/target/mips/kvm.c @@ -180,12 +180,6 @@ bool kvm_arch_stop_on_emulation_error(CPUState *cs) return true; } -int kvm_arch_on_sigbus_vcpu(CPUState *cs, int code, void *addr) -{ - DPRINTF("%s\n", __func__); - return 1; -} - void kvm_arch_init_irq_routing(KVMState *s) { } diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 75598cd779..03f5097eab 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -2582,11 +2582,6 @@ bool kvm_arch_stop_on_emulation_error(CPUState *cpu) return true; } -int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr) -{ - return 1; -} - void kvm_arch_init_irq_routing(KVMState *s) { } diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c index e7eea6ddb6..ac47154b83 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c @@ -2140,11 +2140,6 @@ bool kvm_arch_stop_on_emulation_error(CPUState *cpu) return true; } -int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr) -{ - return 1; -} - void kvm_s390_io_interrupt(uint16_t subchannel_id, uint16_t subchannel_nr, uint32_t io_int_parm, uint32_t io_int_word) -- cgit v1.2.3-55-g7522 From 18268b6016930efe76c77ae590e244d42d9671ea Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 9 Feb 2017 09:41:14 +0100 Subject: KVM: move SIG_IPI handling to kvm-all.c This lets us remove a bunch of CONFIG_LINUX defines. Signed-off-by: Paolo Bonzini --- cpus.c | 62 +--------------------------------------------------- include/sysemu/kvm.h | 5 ++--- kvm-all.c | 60 +++++++++++++++++++++++++++++++++++++++++++++----- kvm-stub.c | 12 +++++----- 4 files changed, 63 insertions(+), 76 deletions(-) (limited to 'include') diff --git a/cpus.c b/cpus.c index 56b1338c87..c857ad2957 100644 --- a/cpus.c +++ b/cpus.c @@ -950,69 +950,10 @@ static void qemu_init_sigbus(void) prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0); } - -static void dummy_signal(int sig) -{ -} - -static void qemu_kvm_init_cpu_signals(CPUState *cpu) -{ - int r; - sigset_t set; - struct sigaction sigact; - - memset(&sigact, 0, sizeof(sigact)); - sigact.sa_handler = dummy_signal; - sigaction(SIG_IPI, &sigact, NULL); - - pthread_sigmask(SIG_BLOCK, NULL, &set); - sigdelset(&set, SIGBUS); - pthread_sigmask(SIG_SETMASK, &set, NULL); - sigdelset(&set, SIG_IPI); - r = kvm_set_signal_mask(cpu, &set); - if (r) { - fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r)); - exit(1); - } -} - -static void qemu_kvm_eat_signals(CPUState *cpu) -{ - struct timespec ts = { 0, 0 }; - siginfo_t siginfo; - sigset_t waitset; - sigset_t chkset; - int r; - - sigemptyset(&waitset); - sigaddset(&waitset, SIG_IPI); - - do { - r = sigtimedwait(&waitset, &siginfo, &ts); - if (r == -1 && !(errno == EAGAIN || errno == EINTR)) { - perror("sigtimedwait"); - exit(1); - } - - r = sigpending(&chkset); - if (r == -1) { - perror("sigpending"); - exit(1); - } - } while (sigismember(&chkset, SIG_IPI)); -} #else /* !CONFIG_LINUX */ static void qemu_init_sigbus(void) { } - -static void qemu_kvm_eat_signals(CPUState *cpu) -{ -} - -static void qemu_kvm_init_cpu_signals(CPUState *cpu) -{ -} #endif /* !CONFIG_LINUX */ static QemuMutex qemu_global_mutex; @@ -1089,7 +1030,6 @@ static void qemu_kvm_wait_io_event(CPUState *cpu) qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex); } - qemu_kvm_eat_signals(cpu); qemu_wait_io_event_common(cpu); } @@ -1112,7 +1052,7 @@ static void *qemu_kvm_cpu_thread_fn(void *arg) exit(1); } - qemu_kvm_init_cpu_signals(cpu); + kvm_init_cpu_signals(cpu); /* signal CPU creation */ cpu->created = true; diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index a1b019da6f..24281fc7f8 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -238,9 +238,6 @@ int kvm_remove_breakpoint(CPUState *cpu, target_ulong addr, target_ulong len, int type); void kvm_remove_all_breakpoints(CPUState *cpu); int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap); -#ifndef _WIN32 -int kvm_set_signal_mask(CPUState *cpu, const sigset_t *sigset); -#endif int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr); int kvm_on_sigbus(int code, void *addr); @@ -463,6 +460,8 @@ void kvm_cpu_synchronize_state(CPUState *cpu); void kvm_cpu_synchronize_post_reset(CPUState *cpu); void kvm_cpu_synchronize_post_init(CPUState *cpu); +void kvm_init_cpu_signals(CPUState *cpu); + /** * kvm_irqchip_add_msi_route - Add MSI route for specific vector * @s: KVM state diff --git a/kvm-all.c b/kvm-all.c index 0baa193763..1d7fc6c1e8 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -1899,6 +1899,32 @@ static __thread int pending_sigbus_code; static __thread bool have_sigbus_pending; #endif +static void kvm_eat_signals(CPUState *cpu) +{ + struct timespec ts = { 0, 0 }; + siginfo_t siginfo; + sigset_t waitset; + sigset_t chkset; + int r; + + sigemptyset(&waitset); + sigaddset(&waitset, SIG_IPI); + + do { + r = sigtimedwait(&waitset, &siginfo, &ts); + if (r == -1 && !(errno == EAGAIN || errno == EINTR)) { + perror("sigtimedwait"); + exit(1); + } + + r = sigpending(&chkset); + if (r == -1) { + perror("sigpending"); + exit(1); + } + } while (sigismember(&chkset, SIG_IPI)); +} + int kvm_cpu_exec(CPUState *cpu) { struct kvm_run *run = cpu->kvm_run; @@ -1949,6 +1975,7 @@ int kvm_cpu_exec(CPUState *cpu) if (run_ret < 0) { if (run_ret == -EINTR || run_ret == -EAGAIN) { DPRINTF("io window exit\n"); + kvm_eat_signals(cpu); ret = EXCP_INTERRUPT; break; } @@ -2388,16 +2415,12 @@ void kvm_remove_all_breakpoints(CPUState *cpu) } #endif /* !KVM_CAP_SET_GUEST_DEBUG */ -int kvm_set_signal_mask(CPUState *cpu, const sigset_t *sigset) +static int kvm_set_signal_mask(CPUState *cpu, const sigset_t *sigset) { KVMState *s = kvm_state; struct kvm_signal_mask *sigmask; int r; - if (!sigset) { - return kvm_vcpu_ioctl(cpu, KVM_SET_SIGNAL_MASK, NULL); - } - sigmask = g_malloc(sizeof(*sigmask) + sizeof(*sigset)); sigmask->len = s->sigmask_len; @@ -2408,6 +2431,33 @@ int kvm_set_signal_mask(CPUState *cpu, const sigset_t *sigset) return r; } +static void dummy_signal(int sig) +{ +} + +void kvm_init_cpu_signals(CPUState *cpu) +{ + int r; + sigset_t set; + struct sigaction sigact; + + memset(&sigact, 0, sizeof(sigact)); + sigact.sa_handler = dummy_signal; + sigaction(SIG_IPI, &sigact, NULL); + + pthread_sigmask(SIG_BLOCK, NULL, &set); +#if defined KVM_HAVE_MCE_INJECTION + sigdelset(&set, SIGBUS); + pthread_sigmask(SIG_SETMASK, &set, NULL); +#endif + sigdelset(&set, SIG_IPI); + r = kvm_set_signal_mask(cpu, &set); + if (r) { + fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r)); + exit(1); + } +} + /* Called asynchronously in VCPU thread. */ int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr) { diff --git a/kvm-stub.c b/kvm-stub.c index b1b6b96c96..ef0c7346af 100644 --- a/kvm-stub.c +++ b/kvm-stub.c @@ -95,13 +95,6 @@ void kvm_remove_all_breakpoints(CPUState *cpu) { } -#ifndef _WIN32 -int kvm_set_signal_mask(CPUState *cpu, const sigset_t *sigset) -{ - abort(); -} -#endif - int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr) { return 1; @@ -157,4 +150,9 @@ bool kvm_has_free_slot(MachineState *ms) { return false; } + +void kvm_init_cpu_signals(CPUState *cpu) +{ + abort(); +} #endif -- cgit v1.2.3-55-g7522 From c99a29e702528698c0ce2590f06ca7ff239f7c39 Mon Sep 17 00:00:00 2001 From: Yongji Xie Date: Mon, 27 Feb 2017 12:52:44 +0800 Subject: memory: Introduce DEVICE_HOST_ENDIAN for ram device At the moment ram device's memory regions are DEVICE_NATIVE_ENDIAN. It's incorrect. This memory region is backed by a MMIO area in host, so the uint64_t data that MemoryRegionOps read from/write to this area should be host-endian rather than target-endian. Hence, current code does not work when target and host endianness are different which is the most common case on PPC64. To fix it, this introduces DEVICE_HOST_ENDIAN for the ram device. This has been tested on PPC64 BE/LE host/guest in all possible combinations including TCG. Suggested-by: Paolo Bonzini Signed-off-by: Yongji Xie Message-Id: <1488171164-28319-1-git-send-email-xyjxie@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- include/exec/cpu-common.h | 6 ++++++ memory.c | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index 8c305aa4fa..b62f0d82e4 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -36,6 +36,12 @@ enum device_endian { DEVICE_LITTLE_ENDIAN, }; +#if defined(HOST_WORDS_BIGENDIAN) +#define DEVICE_HOST_ENDIAN DEVICE_BIG_ENDIAN +#else +#define DEVICE_HOST_ENDIAN DEVICE_LITTLE_ENDIAN +#endif + /* address in the RAM (different from a physical address) */ #if defined(CONFIG_XEN_BACKEND) typedef uint64_t ram_addr_t; diff --git a/memory.c b/memory.c index d61caee867..573fa6e5f6 100644 --- a/memory.c +++ b/memory.c @@ -1182,7 +1182,7 @@ static void memory_region_ram_device_write(void *opaque, hwaddr addr, static const MemoryRegionOps ram_device_mem_ops = { .read = memory_region_ram_device_read, .write = memory_region_ram_device_write, - .endianness = DEVICE_NATIVE_ENDIAN, + .endianness = DEVICE_HOST_ENDIAN, .valid = { .min_access_size = 1, .max_access_size = 8, -- cgit v1.2.3-55-g7522 From fc3a1fd74fac0e3233060aaaf923fe8ec104b48f Mon Sep 17 00:00:00 2001 From: Dr. David Alan Gilbert Date: Thu, 23 Feb 2017 13:34:41 +0000 Subject: x86: Work around SMI migration breakages Migration from a 2.3.0 qemu results in a reboot on the receiving QEMU due to a disagreement about SM (System management) interrupts. 2.3.0 didn't have much SMI support, but it did set CPU_INTERRUPT_SMI and this gets into the migration stream, but on 2.3.0 it never got delivered. ~2.4.0 SMI interrupt support was added but was broken - so that when a 2.3.0 stream was received it cleared the CPU_INTERRUPT_SMI but never actually caused an interrupt. The SMI delivery was recently fixed by 68c6efe07a, but the effect now is that an incoming 2.3.0 stream takes the interrupt it had flagged but it's bios can't actually handle it(I think partly due to the original interrupt not being taken during boot?). The consequence is a triple(?) fault and a reboot. Tested from: 2.3.1 -M 2.3.0 2.7.0 -M 2.3.0 2.8.0 -M 2.3.0 2.8.0 -M 2.8.0 This corresponds to RH bugzilla entry 1420679. Signed-off-by: Dr. David Alan Gilbert Message-Id: <20170223133441.16010-1-dgilbert@redhat.com> Signed-off-by: Paolo Bonzini --- include/hw/i386/pc.h | 4 ++++ target/i386/cpu.c | 2 ++ target/i386/cpu.h | 3 +++ target/i386/kvm.c | 7 ++++++- 4 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index d1f45540a1..ab303c7fee 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -623,6 +623,10 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); .driver = "Broadwell-noTSX" "-" TYPE_X86_CPU,\ .property = "xlevel",\ .value = stringify(0x8000000a),\ + },{\ + .driver = TYPE_X86_CPU,\ + .property = "kvm-no-smi-migration",\ + .value = "on",\ }, #define PC_COMPAT_2_2 \ diff --git a/target/i386/cpu.c b/target/i386/cpu.c index aec5d9daf8..fba92125ab 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -3983,6 +3983,8 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true), DEFINE_PROP_BOOL("lmce", X86CPU, enable_lmce, false), DEFINE_PROP_BOOL("l3-cache", X86CPU, enable_l3_cache, true), + DEFINE_PROP_BOOL("kvm-no-smi-migration", X86CPU, kvm_no_smi_migration, + false), DEFINE_PROP_BOOL("vmware-cpuid-freq", X86CPU, vmware_cpuid_freq, true), DEFINE_PROP_END_OF_LIST() }; diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 12a39d590f..ac2ad6d443 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1255,6 +1255,9 @@ struct X86CPU { /* if true override the phys_bits value with a value read from the host */ bool host_phys_bits; + /* Stop SMI delivery for migration compatibility with old machines */ + bool kvm_no_smi_migration; + /* Number of physical address bits supported */ uint32_t phys_bits; diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 7698421ae7..887a81268f 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -2492,7 +2492,12 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level) events.smi.pending = 0; events.smi.latched_init = 0; } - events.flags |= KVM_VCPUEVENT_VALID_SMM; + /* Stop SMI delivery on old machine types to avoid a reboot + * on an inward migration of an old VM. + */ + if (!cpu->kvm_no_smi_migration) { + events.flags |= KVM_VCPUEVENT_VALID_SMM; + } } if (level >= KVM_PUT_RESET_STATE) { -- cgit v1.2.3-55-g7522