diff options
130 files changed, 2175 insertions, 1605 deletions
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index db8d83b137..eecd8031cf 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -469,6 +469,7 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp) cpu->kvm_fd = ret; cpu->kvm_state = s; cpu->vcpu_dirty = true; + cpu->dirty_pages = 0; mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0); if (mmap_size < 0) { @@ -743,6 +744,7 @@ static uint32_t kvm_dirty_ring_reap_one(KVMState *s, CPUState *cpu) count++; } cpu->kvm_fetch_index = fetch; + cpu->dirty_pages += count; return count; } @@ -2296,6 +2298,11 @@ bool kvm_vcpu_id_is_valid(int vcpu_id) return vcpu_id >= 0 && vcpu_id < kvm_max_vcpu_id(s); } +bool kvm_dirty_ring_enabled(void) +{ + return kvm_state->kvm_dirty_ring_size ? true : false; +} + static int kvm_init(MachineState *ms) { MachineClass *mc = MACHINE_GET_CLASS(ms); diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c index 5b1d00a222..5319573e00 100644 --- a/accel/stubs/kvm-stub.c +++ b/accel/stubs/kvm-stub.c @@ -147,4 +147,9 @@ bool kvm_arm_supports_user_irq(void) { return false; } + +bool kvm_dirty_ring_enabled(void) +{ + return false; +} #endif diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index c9764c1325..bba4672632 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -462,6 +462,7 @@ void cpu_exec_step_atomic(CPUState *cpu) * memory. */ #ifndef CONFIG_SOFTMMU + clear_helper_retaddr(); tcg_debug_assert(!have_mmap_lock()); #endif if (qemu_mutex_iothread_locked()) { @@ -471,7 +472,6 @@ void cpu_exec_step_atomic(CPUState *cpu) qemu_plugin_disable_mem_helpers(cpu); } - /* * As we start the exclusive region before codegen we must still * be in the region if we longjump out of either the codegen or @@ -916,6 +916,7 @@ int cpu_exec(CPUState *cpu) #endif #ifndef CONFIG_SOFTMMU + clear_helper_retaddr(); tcg_debug_assert(!have_mmap_lock()); #endif if (qemu_mutex_iothread_locked()) { diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c index e6bb29b42d..1528a21fad 100644 --- a/accel/tcg/user-exec.c +++ b/accel/tcg/user-exec.c @@ -27,48 +27,18 @@ #include "exec/helper-proto.h" #include "qemu/atomic128.h" #include "trace/trace-root.h" +#include "tcg/tcg-ldst.h" #include "internal.h" -#undef EAX -#undef ECX -#undef EDX -#undef EBX -#undef ESP -#undef EBP -#undef ESI -#undef EDI -#undef EIP -#ifdef __linux__ -#include <sys/ucontext.h> -#endif - __thread uintptr_t helper_retaddr; //#define DEBUG_SIGNAL -/* exit the current TB from a signal handler. The host registers are - restored in a state compatible with the CPU emulator +/* + * Adjust the pc to pass to cpu_restore_state; return the memop type. */ -static void QEMU_NORETURN cpu_exit_tb_from_sighandler(CPUState *cpu, - sigset_t *old_set) -{ - /* XXX: use siglongjmp ? */ - sigprocmask(SIG_SETMASK, old_set, NULL); - cpu_loop_exit_noexc(cpu); -} - -/* 'pc' is the host PC at which the exception was raised. 'address' is - the effective address of the memory exception. 'is_write' is 1 if a - write caused the exception and otherwise 0'. 'old_set' is the - signal set which should be restored */ -static inline int handle_cpu_signal(uintptr_t pc, siginfo_t *info, - int is_write, sigset_t *old_set) +MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write) { - CPUState *cpu = current_cpu; - CPUClass *cc; - unsigned long address = (unsigned long)info->si_addr; - MMUAccessType access_type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD; - switch (helper_retaddr) { default: /* @@ -77,7 +47,7 @@ static inline int handle_cpu_signal(uintptr_t pc, siginfo_t *info, * pointer into the generated code that will unwind to the * correct guest pc. */ - pc = helper_retaddr; + *pc = helper_retaddr; break; case 0: @@ -97,7 +67,7 @@ static inline int handle_cpu_signal(uintptr_t pc, siginfo_t *info, * Therefore, adjust to compensate for what will be done later * by cpu_restore_state_from_tb. */ - pc += GETPC_ADJ; + *pc += GETPC_ADJ; break; case 1: @@ -113,118 +83,97 @@ static inline int handle_cpu_signal(uintptr_t pc, siginfo_t *info, * * Like tb_gen_code, release the memory lock before cpu_loop_exit. */ - pc = 0; - access_type = MMU_INST_FETCH; mmap_unlock(); - break; + *pc = 0; + return MMU_INST_FETCH; } - /* For synchronous signals we expect to be coming from the vCPU - * thread (so current_cpu should be valid) and either from running - * code or during translation which can fault as we cross pages. - * - * If neither is true then something has gone wrong and we should - * abort rather than try and restart the vCPU execution. - */ - if (!cpu || !cpu->running) { - printf("qemu:%s received signal outside vCPU context @ pc=0x%" - PRIxPTR "\n", __func__, pc); - abort(); - } + return is_write ? MMU_DATA_STORE : MMU_DATA_LOAD; +} -#if defined(DEBUG_SIGNAL) - printf("qemu: SIGSEGV pc=0x%08lx address=%08lx w=%d oldset=0x%08lx\n", - pc, address, is_write, *(unsigned long *)old_set); -#endif - /* XXX: locking issue */ - /* Note that it is important that we don't call page_unprotect() unless - * this is really a "write to nonwriteable page" fault, because - * page_unprotect() assumes that if it is called for an access to - * a page that's writeable this means we had two threads racing and - * another thread got there first and already made the page writeable; - * so we will retry the access. If we were to call page_unprotect() - * for some other kind of fault that should really be passed to the - * guest, we'd end up in an infinite loop of retrying the faulting - * access. - */ - if (is_write && info->si_signo == SIGSEGV && info->si_code == SEGV_ACCERR && - h2g_valid(address)) { - switch (page_unprotect(h2g(address), pc)) { - case 0: - /* Fault not caused by a page marked unwritable to protect - * cached translations, must be the guest binary's problem. - */ - break; - case 1: - /* Fault caused by protection of cached translation; TBs - * invalidated, so resume execution. Retain helper_retaddr - * for a possible second fault. - */ - return 1; - case 2: - /* Fault caused by protection of cached translation, and the - * currently executing TB was modified and must be exited - * immediately. Clear helper_retaddr for next execution. - */ - clear_helper_retaddr(); - cpu_exit_tb_from_sighandler(cpu, old_set); - /* NORETURN */ - - default: - g_assert_not_reached(); - } +/** + * handle_sigsegv_accerr_write: + * @cpu: the cpu context + * @old_set: the sigset_t from the signal ucontext_t + * @host_pc: the host pc, adjusted for the signal + * @guest_addr: the guest address of the fault + * + * Return true if the write fault has been handled, and should be re-tried. + * + * Note that it is important that we don't call page_unprotect() unless + * this is really a "write to nonwriteable page" fault, because + * page_unprotect() assumes that if it is called for an access to + * a page that's writeable this means we had two threads racing and + * another thread got there first and already made the page writeable; + * so we will retry the access. If we were to call page_unprotect() + * for some other kind of fault that should really be passed to the + * guest, we'd end up in an infinite loop of retrying the faulting access. + */ +bool handle_sigsegv_accerr_write(CPUState *cpu, sigset_t *old_set, + uintptr_t host_pc, abi_ptr guest_addr) +{ + switch (page_unprotect(guest_addr, host_pc)) { + case 0: + /* + * Fault not caused by a page marked unwritable to protect + * cached translations, must be the guest binary's problem. + */ + return false; + case 1: + /* + * Fault caused by protection of cached translation; TBs + * invalidated, so resume execution. + */ + return true; + case 2: + /* + * Fault caused by protection of cached translation, and the + * currently executing TB was modified and must be exited immediately. + */ + sigprocmask(SIG_SETMASK, old_set, NULL); + cpu_loop_exit_noexc(cpu); + /* NORETURN */ + default: + g_assert_not_reached(); } - - /* Convert forcefully to guest address space, invalid addresses - are still valid segv ones */ - address = h2g_nocheck(address); - - /* - * There is no way the target can handle this other than raising - * an exception. Undo signal and retaddr state prior to longjmp. - */ - sigprocmask(SIG_SETMASK, old_set, NULL); - clear_helper_retaddr(); - - cc = CPU_GET_CLASS(cpu); - cc->tcg_ops->tlb_fill(cpu, address, 0, access_type, - MMU_USER_IDX, false, pc); - g_assert_not_reached(); } static int probe_access_internal(CPUArchState *env, target_ulong addr, int fault_size, MMUAccessType access_type, bool nonfault, uintptr_t ra) { - int flags; + int acc_flag; + bool maperr; switch (access_type) { case MMU_DATA_STORE: - flags = PAGE_WRITE; + acc_flag = PAGE_WRITE_ORG; break; case MMU_DATA_LOAD: - flags = PAGE_READ; + acc_flag = PAGE_READ; break; case MMU_INST_FETCH: - flags = PAGE_EXEC; + acc_flag = PAGE_EXEC; break; default: g_assert_not_reached(); } - if (!guest_addr_valid_untagged(addr) || - page_check_range(addr, 1, flags) < 0) { - if (nonfault) { - return TLB_INVALID_MASK; - } else { - CPUState *cpu = env_cpu(env); - CPUClass *cc = CPU_GET_CLASS(cpu); - cc->tcg_ops->tlb_fill(cpu, addr, fault_size, access_type, - MMU_USER_IDX, false, ra); - g_assert_not_reached(); + if (guest_addr_valid_untagged(addr)) { + int page_flags = page_get_flags(addr); + if (page_flags & acc_flag) { + return 0; /* success */ } + maperr = !(page_flags & PAGE_VALID); + } else { + maperr = true; } - return 0; + + if (nonfault) { + return TLB_INVALID_MASK; + } + + cpu_loop_exit_sigsegv(env_cpu(env), addr, access_type, maperr, ra); } int probe_access_flags(CPUArchState *env, target_ulong addr, @@ -250,640 +199,6 @@ void *probe_access(CPUArchState *env, target_ulong addr, int size, return size ? g2h(env_cpu(env), addr) : NULL; } -#if defined(__i386__) - -#if defined(__NetBSD__) -#include <ucontext.h> -#include <machine/trap.h> - -#define EIP_sig(context) ((context)->uc_mcontext.__gregs[_REG_EIP]) -#define TRAP_sig(context) ((context)->uc_mcontext.__gregs[_REG_TRAPNO]) -#define ERROR_sig(context) ((context)->uc_mcontext.__gregs[_REG_ERR]) -#define MASK_sig(context) ((context)->uc_sigmask) -#define PAGE_FAULT_TRAP T_PAGEFLT -#elif defined(__FreeBSD__) || defined(__DragonFly__) -#include <ucontext.h> -#include <machine/trap.h> - -#define EIP_sig(context) (*((unsigned long *)&(context)->uc_mcontext.mc_eip)) -#define TRAP_sig(context) ((context)->uc_mcontext.mc_trapno) -#define ERROR_sig(context) ((context)->uc_mcontext.mc_err) -#define MASK_sig(context) ((context)->uc_sigmask) -#define PAGE_FAULT_TRAP T_PAGEFLT -#elif defined(__OpenBSD__) -#include <machine/trap.h> -#define EIP_sig(context) ((context)->sc_eip) -#define TRAP_sig(context) ((context)->sc_trapno) -#define ERROR_sig(context) ((context)->sc_err) -#define MASK_sig(context) ((context)->sc_mask) -#define PAGE_FAULT_TRAP T_PAGEFLT -#else -#define EIP_sig(context) ((context)->uc_mcontext.gregs[REG_EIP]) -#define TRAP_sig(context) ((context)->uc_mcontext.gregs[REG_TRAPNO]) -#define ERROR_sig(context) ((context)->uc_mcontext.gregs[REG_ERR]) -#define MASK_sig(context) ((context)->uc_sigmask) -#define PAGE_FAULT_TRAP 0xe -#endif - -int cpu_signal_handler(int host_signum, void *pinfo, - void *puc) -{ - siginfo_t *info = pinfo; -#if defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__) - ucontext_t *uc = puc; -#elif defined(__OpenBSD__) - struct sigcontext *uc = puc; -#else - ucontext_t *uc = puc; -#endif - unsigned long pc; - int trapno; - -#ifndef REG_EIP -/* for glibc 2.1 */ -#define REG_EIP EIP -#define REG_ERR ERR -#define REG_TRAPNO TRAPNO -#endif - pc = EIP_sig(uc); - trapno = TRAP_sig(uc); - return handle_cpu_signal(pc, info, - trapno == PAGE_FAULT_TRAP ? - (ERROR_sig(uc) >> 1) & 1 : 0, - &MASK_sig(uc)); -} - -#elif defined(__x86_64__) - -#ifdef __NetBSD__ -#include <machine/trap.h> -#define PC_sig(context) _UC_MACHINE_PC(context) -#define TRAP_sig(context) ((context)->uc_mcontext.__gregs[_REG_TRAPNO]) -#define ERROR_sig(context) ((context)->uc_mcontext.__gregs[_REG_ERR]) -#define MASK_sig(context) ((context)->uc_sigmask) -#define PAGE_FAULT_TRAP T_PAGEFLT -#elif defined(__OpenBSD__) -#include <machine/trap.h> -#define PC_sig(context) ((context)->sc_rip) -#define TRAP_sig(context) ((context)->sc_trapno) -#define ERROR_sig(context) ((context)->sc_err) -#define MASK_sig(context) ((context)->sc_mask) -#define PAGE_FAULT_TRAP T_PAGEFLT -#elif defined(__FreeBSD__) || defined(__DragonFly__) -#include <ucontext.h> -#include <machine/trap.h> - -#define PC_sig(context) (*((unsigned long *)&(context)->uc_mcontext.mc_rip)) -#define TRAP_sig(context) ((context)->uc_mcontext.mc_trapno) -#define ERROR_sig(context) ((context)->uc_mcontext.mc_err) -#define MASK_sig(context) ((context)->uc_sigmask) -#define PAGE_FAULT_TRAP T_PAGEFLT -#else -#define PC_sig(context) ((context)->uc_mcontext.gregs[REG_RIP]) -#define TRAP_sig(context) ((context)->uc_mcontext.gregs[REG_TRAPNO]) -#define ERROR_sig(context) ((context)->uc_mcontext.gregs[REG_ERR]) -#define MASK_sig(context) ((context)->uc_sigmask) -#define PAGE_FAULT_TRAP 0xe -#endif - -int cpu_signal_handler(int host_signum, void *pinfo, - void *puc) -{ - siginfo_t *info = pinfo; - unsigned long pc; -#if defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__) - ucontext_t *uc = puc; -#elif defined(__OpenBSD__) - struct sigcontext *uc = puc; -#else - ucontext_t *uc = puc; -#endif - - pc = PC_sig(uc); - return handle_cpu_signal(pc, info, - TRAP_sig(uc) == PAGE_FAULT_TRAP ? - (ERROR_sig(uc) >> 1) & 1 : 0, - &MASK_sig(uc)); -} - -#elif defined(_ARCH_PPC) - -/*********************************************************************** - * signal context platform-specific definitions - * From Wine - */ -#ifdef linux -/* All Registers access - only for local access */ -#define REG_sig(reg_name, context) \ - ((context)->uc_mcontext.regs->reg_name) -/* Gpr Registers access */ -#define GPR_sig(reg_num, context) REG_sig(gpr[reg_num], context) -/* Program counter */ -#define IAR_sig(context) REG_sig(nip, context) -/* Machine State Register (Supervisor) */ -#define MSR_sig(context) REG_sig(msr, context) -/* Count register */ -#define CTR_sig(context) REG_sig(ctr, context) -/* User's integer exception register */ -#define XER_sig(context) REG_sig(xer, context) -/* Link register */ -#define LR_sig(context) REG_sig(link, context) -/* Condition register */ -#define CR_sig(context) REG_sig(ccr, context) - -/* Float Registers access */ -#define FLOAT_sig(reg_num, context) \ - (((double *)((char *)((context)->uc_mcontext.regs + 48 * 4)))[reg_num]) -#define FPSCR_sig(context) \ - (*(int *)((char *)((context)->uc_mcontext.regs + (48 + 32 * 2) * 4))) -/* Exception Registers access */ -#define DAR_sig(context) REG_sig(dar, context) -#define DSISR_sig(context) REG_sig(dsisr, context) -#define TRAP_sig(context) REG_sig(trap, context) -#endif /* linux */ - -#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) -#include <ucontext.h> -#define IAR_sig(context) ((context)->uc_mcontext.mc_srr0) -#define MSR_sig(context) ((context)->uc_mcontext.mc_srr1) -#define CTR_sig(context) ((context)->uc_mcontext.mc_ctr) -#define XER_sig(context) ((context)->uc_mcontext.mc_xer) -#define LR_sig(context) ((context)->uc_mcontext.mc_lr) -#define CR_sig(context) ((context)->uc_mcontext.mc_cr) -/* Exception Registers access */ -#define DAR_sig(context) ((context)->uc_mcontext.mc_dar) -#define DSISR_sig(context) ((context)->uc_mcontext.mc_dsisr) -#define TRAP_sig(context) ((context)->uc_mcontext.mc_exc) -#endif /* __FreeBSD__|| __FreeBSD_kernel__ */ - -int cpu_signal_handler(int host_signum, void *pinfo, - void *puc) -{ - siginfo_t *info = pinfo; -#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) - ucontext_t *uc = puc; -#else - ucontext_t *uc = puc; -#endif - unsigned long pc; - int is_write; - - pc = IAR_sig(uc); - is_write = 0; -#if 0 - /* ppc 4xx case */ - if (DSISR_sig(uc) & 0x00800000) { - is_write = 1; - } -#else - if (TRAP_sig(uc) != 0x400 && (DSISR_sig(uc) & 0x02000000)) { - is_write = 1; - } -#endif - return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask); -} - -#elif defined(__alpha__) - -int cpu_signal_handler(int host_signum, void *pinfo, - void *puc) -{ - siginfo_t *info = pinfo; - ucontext_t *uc = puc; - uint32_t *pc = uc->uc_mcontext.sc_pc; - uint32_t insn = *pc; - int is_write = 0; - - /* XXX: need kernel patch to get write flag faster */ - switch (insn >> 26) { - case 0x0d: /* stw */ - case 0x0e: /* stb */ - case 0x0f: /* stq_u */ - case 0x24: /* stf */ - case 0x25: /* stg */ - case 0x26: /* sts */ - case 0x27: /* stt */ - case 0x2c: /* stl */ - case 0x2d: /* stq */ - case 0x2e: /* stl_c */ - case 0x2f: /* stq_c */ - is_write = 1; - } - - return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask); -} -#elif defined(__sparc__) - -int cpu_signal_handler(int host_signum, void *pinfo, - void *puc) -{ - siginfo_t *info = pinfo; - int is_write; - uint32_t insn; -#if !defined(__arch64__) || defined(CONFIG_SOLARIS) - uint32_t *regs = (uint32_t *)(info + 1); - void *sigmask = (regs + 20); - /* XXX: is there a standard glibc define ? */ - unsigned long pc = regs[1]; -#else -#ifdef __linux__ - struct sigcontext *sc = puc; - unsigned long pc = sc->sigc_regs.tpc; - void *sigmask = (void *)sc->sigc_mask; -#elif defined(__OpenBSD__) - struct sigcontext *uc = puc; - unsigned long pc = uc->sc_pc; - void *sigmask = (void *)(long)uc->sc_mask; -#elif defined(__NetBSD__) - ucontext_t *uc = puc; - unsigned long pc = _UC_MACHINE_PC(uc); - void *sigmask = (void *)&uc->uc_sigmask; -#endif -#endif - - /* XXX: need kernel patch to get write flag faster */ - is_write = 0; - insn = *(uint32_t *)pc; - if ((insn >> 30) == 3) { - switch ((insn >> 19) & 0x3f) { - case 0x05: /* stb */ - case 0x15: /* stba */ - case 0x06: /* sth */ - case 0x16: /* stha */ - case 0x04: /* st */ - case 0x14: /* sta */ - case 0x07: /* std */ - case 0x17: /* stda */ - case 0x0e: /* stx */ - case 0x1e: /* stxa */ - case 0x24: /* stf */ - case 0x34: /* stfa */ - case 0x27: /* stdf */ - case 0x37: /* stdfa */ - case 0x26: /* stqf */ - case 0x36: /* stqfa */ - case 0x25: /* stfsr */ - case 0x3c: /* casa */ - case 0x3e: /* casxa */ - is_write = 1; - break; - } - } - return handle_cpu_signal(pc, info, is_write, sigmask); -} - -#elif defined(__arm__) - -#if defined(__NetBSD__) -#include <ucontext.h> -#include <sys/siginfo.h> -#endif - -int cpu_signal_handler(int host_signum, void *pinfo, - void *puc) -{ - siginfo_t *info = pinfo; -#if defined(__NetBSD__) - ucontext_t *uc = puc; - siginfo_t *si = pinfo; -#else - ucontext_t *uc = puc; -#endif - unsigned long pc; - uint32_t fsr; - int is_write; - -#if defined(__NetBSD__) - pc = uc->uc_mcontext.__gregs[_REG_R15]; -#elif defined(__GLIBC__) && (__GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ <= 3)) - pc = uc->uc_mcontext.gregs[R15]; -#else - pc = uc->uc_mcontext.arm_pc; -#endif - -#ifdef __NetBSD__ - fsr = si->si_trap; -#else - fsr = uc->uc_mcontext.error_code; -#endif - /* - * In the FSR, bit 11 is WnR, assuming a v6 or - * later processor. On v5 we will always report - * this as a read, which will fail later. - */ - is_write = extract32(fsr, 11, 1); - return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask); -} - -#elif defined(__aarch64__) - -#if defined(__NetBSD__) - -#include <ucontext.h> -#include <sys/siginfo.h> - -int cpu_signal_handler(int host_signum, void *pinfo, void *puc) -{ - ucontext_t *uc = puc; - siginfo_t *si = pinfo; - unsigned long pc; - int is_write; - uint32_t esr; - - pc = uc->uc_mcontext.__gregs[_REG_PC]; - esr = si->si_trap; - - /* - * siginfo_t::si_trap is the ESR value, for data aborts ESR.EC - * is 0b10010x: then bit 6 is the WnR bit - */ - is_write = extract32(esr, 27, 5) == 0x12 && extract32(esr, 6, 1) == 1; - return handle_cpu_signal(pc, si, is_write, &uc->uc_sigmask); -} - -#else - -#ifndef ESR_MAGIC -/* Pre-3.16 kernel headers don't have these, so provide fallback definitions */ -#define ESR_MAGIC 0x45535201 -struct esr_context { - struct _aarch64_ctx head; - uint64_t esr; -}; -#endif - -static inline struct _aarch64_ctx *first_ctx(ucontext_t *uc) -{ - return (struct _aarch64_ctx *)&uc->uc_mcontext.__reserved; -} - -static inline struct _aarch64_ctx *next_ctx(struct _aarch64_ctx *hdr) -{ - return (struct _aarch64_ctx *)((char *)hdr + hdr->size); -} - -int cpu_signal_handler(int host_signum, void *pinfo, void *puc) -{ - siginfo_t *info = pinfo; - ucontext_t *uc = puc; - uintptr_t pc = uc->uc_mcontext.pc; - bool is_write; - struct _aarch64_ctx *hdr; - struct esr_context const *esrctx = NULL; - - /* Find the esr_context, which has the WnR bit in it */ - for (hdr = first_ctx(uc); hdr->magic; hdr = next_ctx(hdr)) { - if (hdr->magic == ESR_MAGIC) { - esrctx = (struct esr_context const *)hdr; - break; - } - } - - if (esrctx) { - /* For data aborts ESR.EC is 0b10010x: then bit 6 is the WnR bit */ - uint64_t esr = esrctx->esr; - is_write = extract32(esr, 27, 5) == 0x12 && extract32(esr, 6, 1) == 1; - } else { - /* - * Fall back to parsing instructions; will only be needed - * for really ancient (pre-3.16) kernels. - */ - uint32_t insn = *(uint32_t *)pc; - - is_write = ((insn & 0xbfff0000) == 0x0c000000 /* C3.3.1 */ - || (insn & 0xbfe00000) == 0x0c800000 /* C3.3.2 */ - || (insn & 0xbfdf0000) == 0x0d000000 /* C3.3.3 */ - || (insn & 0xbfc00000) == 0x0d800000 /* C3.3.4 */ - || (insn & 0x3f400000) == 0x08000000 /* C3.3.6 */ - || (insn & 0x3bc00000) == 0x39000000 /* C3.3.13 */ - || (insn & 0x3fc00000) == 0x3d800000 /* ... 128bit */ - /* Ignore bits 10, 11 & 21, controlling indexing. */ - || (insn & 0x3bc00000) == 0x38000000 /* C3.3.8-12 */ - || (insn & 0x3fe00000) == 0x3c800000 /* ... 128bit */ - /* Ignore bits 23 & 24, controlling indexing. */ - || (insn & 0x3a400000) == 0x28000000); /* C3.3.7,14-16 */ - } - return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask); -} -#endif - -#elif defined(__s390__) - -int cpu_signal_handler(int host_signum, void *pinfo, - void *puc) -{ - siginfo_t *info = pinfo; - ucontext_t *uc = puc; - unsigned long pc; - uint16_t *pinsn; - int is_write = 0; - - pc = uc->uc_mcontext.psw.addr; - - /* - * ??? On linux, the non-rt signal handler has 4 (!) arguments instead - * of the normal 2 arguments. The 4th argument contains the "Translation- - * Exception Identification for DAT Exceptions" from the hardware (aka - * "int_parm_long"), which does in fact contain the is_write value. - * The rt signal handler, as far as I can tell, does not give this value - * at all. Not that we could get to it from here even if it were. - * So fall back to parsing instructions. Treat read-modify-write ones as - * writes, which is not fully correct, but for tracking self-modifying code - * this is better than treating them as reads. Checking si_addr page flags - * might be a viable improvement, albeit a racy one. - */ - /* ??? This is not even close to complete. */ - pinsn = (uint16_t *)pc; - switch (pinsn[0] >> 8) { - case 0x50: /* ST */ - case 0x42: /* STC */ - case 0x40: /* STH */ - case 0xba: /* CS */ - case 0xbb: /* CDS */ - is_write = 1; - break; - case 0xc4: /* RIL format insns */ - switch (pinsn[0] & 0xf) { - case 0xf: /* STRL */ - case 0xb: /* STGRL */ - case 0x7: /* STHRL */ - is_write = 1; - } - break; - case 0xc8: /* SSF format insns */ - switch (pinsn[0] & 0xf) { - case 0x2: /* CSST */ - is_write = 1; - } - break; - case 0xe3: /* RXY format insns */ - switch (pinsn[2] & 0xff) { - case 0x50: /* STY */ - case 0x24: /* STG */ - case 0x72: /* STCY */ - case 0x70: /* STHY */ - case 0x8e: /* STPQ */ - case 0x3f: /* STRVH */ - case 0x3e: /* STRV */ - case 0x2f: /* STRVG */ - is_write = 1; - } - break; - case 0xeb: /* RSY format insns */ - switch (pinsn[2] & 0xff) { - case 0x14: /* CSY */ - case 0x30: /* CSG */ - case 0x31: /* CDSY */ - case 0x3e: /* CDSG */ - case 0xe4: /* LANG */ - case 0xe6: /* LAOG */ - case 0xe7: /* LAXG */ - case 0xe8: /* LAAG */ - case 0xea: /* LAALG */ - case 0xf4: /* LAN */ - case 0xf6: /* LAO */ - case 0xf7: /* LAX */ - case 0xfa: /* LAAL */ - case 0xf8: /* LAA */ - is_write = 1; - } - break; - } - - return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask); -} - -#elif defined(__mips__) - -#if defined(__misp16) || defined(__mips_micromips) -#error "Unsupported encoding" -#endif - -int cpu_signal_handler(int host_signum, void *pinfo, - void *puc) -{ - siginfo_t *info = pinfo; - ucontext_t *uc = puc; - uintptr_t pc = uc->uc_mcontext.pc; - uint32_t insn = *(uint32_t *)pc; - int is_write = 0; - - /* Detect all store instructions at program counter. */ - switch((insn >> 26) & 077) { - case 050: /* SB */ - case 051: /* SH */ - case 052: /* SWL */ - case 053: /* SW */ - case 054: /* SDL */ - case 055: /* SDR */ - case 056: /* SWR */ - case 070: /* SC */ - case 071: /* SWC1 */ - case 074: /* SCD */ - case 075: /* SDC1 */ - case 077: /* SD */ -#if !defined(__mips_isa_rev) || __mips_isa_rev < 6 - case 072: /* SWC2 */ - case 076: /* SDC2 */ -#endif - is_write = 1; - break; - case 023: /* COP1X */ - /* Required in all versions of MIPS64 since - MIPS64r1 and subsequent versions of MIPS32r2. */ - switch (insn & 077) { - case 010: /* SWXC1 */ - case 011: /* SDXC1 */ - case 015: /* SUXC1 */ - is_write = 1; - } - break; - } - - return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask); -} - -#elif defined(__riscv) - -int cpu_signal_handler(int host_signum, void *pinfo, - void *puc) -{ - siginfo_t *info = pinfo; - ucontext_t *uc = puc; - greg_t pc = uc->uc_mcontext.__gregs[REG_PC]; - uint32_t insn = *(uint32_t *)pc; - int is_write = 0; - - /* Detect store by reading the instruction at the program - counter. Note: we currently only generate 32-bit - instructions so we thus only detect 32-bit stores */ - switch (((insn >> 0) & 0b11)) { - case 3: - switch (((insn >> 2) & 0b11111)) { - case 8: - switch (((insn >> 12) & 0b111)) { - case 0: /* sb */ - case 1: /* sh */ - case 2: /* sw */ - case 3: /* sd */ - case 4: /* sq */ - is_write = 1; - break; - default: - break; - } - break; - case 9: - switch (((insn >> 12) & 0b111)) { - case 2: /* fsw */ - case 3: /* fsd */ - case 4: /* fsq */ - is_write = 1; - break; - default: - break; - } - break; - default: - break; - } - } - - /* Check for compressed instructions */ - switch (((insn >> 13) & 0b111)) { - case 7: - switch (insn & 0b11) { - case 0: /*c.sd */ - case 2: /* c.sdsp */ - is_write = 1; - break; - default: - break; - } - break; - case 6: - switch (insn & 0b11) { - case 0: /* c.sw */ - case 3: /* c.swsp */ - is_write = 1; - break; - default: - break; - } - break; - default: - break; - } - - return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask); -} - -#else - -#error host CPU specific signal handler needed - -#endif - /* The softmmu versions of these helpers are in cputlb.c. */ /* @@ -901,12 +216,27 @@ static void validate_memop(MemOpIdx oi, MemOp expected) #endif } +void helper_unaligned_ld(CPUArchState *env, target_ulong addr) +{ + cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_LOAD, GETPC()); +} + +void helper_unaligned_st(CPUArchState *env, target_ulong addr) +{ + cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_STORE, GETPC()); +} + static void *cpu_mmu_lookup(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t ra, MMUAccessType type) { + MemOp mop = get_memop(oi); + int a_bits = get_alignment_bits(mop); void *ret; - /* TODO: Enforce guest required alignment. */ + /* Enforce guest required alignment. */ + if (unlikely(addr & ((1 << a_bits) - 1))) { + cpu_loop_exit_sigbus(env_cpu(env), addr, type, ra); + } ret = g2h(env_cpu(env), addr); set_helper_retaddr(ra); @@ -1160,11 +490,22 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, MemOpIdx oi, int size, int prot, uintptr_t retaddr) { + MemOp mop = get_memop(oi); + int a_bits = get_alignment_bits(mop); + void *ret; + + /* Enforce guest required alignment. */ + if (unlikely(addr & ((1 << a_bits) - 1))) { + MMUAccessType t = prot == PAGE_READ ? MMU_DATA_LOAD : MMU_DATA_STORE; + cpu_loop_exit_sigbus(env_cpu(env), addr, t, retaddr); + } + /* Enforce qemu required alignment. */ if (unlikely(addr & (size - 1))) { cpu_loop_exit_atomic(env_cpu(env), retaddr); } - void *ret = g2h(env_cpu(env), addr); + + ret = g2h(env_cpu(env), addr); set_helper_retaddr(retaddr); return ret; } @@ -570,11 +570,7 @@ elif check_define __s390__ ; then cpu="s390" fi elif check_define __riscv ; then - if check_define _LP64 ; then - cpu="riscv64" - else - cpu="riscv32" - fi + cpu="riscv" elif check_define __arm__ ; then cpu="arm" elif check_define __aarch64__ ; then @@ -587,7 +583,7 @@ ARCH= # Normalise host CPU name and set ARCH. # Note that this case should only have supported host CPUs, not guests. case "$cpu" in - ppc|ppc64|s390x|sparc64|x32|riscv32|riscv64) + ppc|ppc64|s390x|sparc64|x32|riscv) ;; ppc64le) ARCH="ppc64" diff --git a/dump/dump.c b/dump/dump.c index ab625909f3..662d0a62cd 100644 --- a/dump/dump.c +++ b/dump/dump.c @@ -29,6 +29,7 @@ #include "qemu/error-report.h" #include "qemu/main-loop.h" #include "hw/misc/vmcoreinfo.h" +#include "migration/blocker.h" #ifdef TARGET_X86_64 #include "win_dump.h" @@ -47,6 +48,8 @@ #define MAX_GUEST_NOTE_SIZE (1 << 20) /* 1MB should be enough */ +static Error *dump_migration_blocker; + #define ELF_NOTE_SIZE(hdr_size, name_size, desc_size) \ ((DIV_ROUND_UP((hdr_size), 4) + \ DIV_ROUND_UP((name_size), 4) + \ @@ -101,6 +104,7 @@ static int dump_cleanup(DumpState *s) qemu_mutex_unlock_iothread(); } } + migrate_del_blocker(dump_migration_blocker); return 0; } @@ -2005,6 +2009,21 @@ void qmp_dump_guest_memory(bool paging, const char *file, return; } + if (!dump_migration_blocker) { + error_setg(&dump_migration_blocker, + "Live migration disabled: dump-guest-memory in progress"); + } + + /* + * Allows even for -only-migratable, but forbid migration during the + * process of dump guest memory. + */ + if (migrate_add_blocker_internal(dump_migration_blocker, errp)) { + /* Remember to release the fd before passing it over to dump state */ + close(fd); + return; + } + s = &dump_state_global; dump_state_prepare(s); diff --git a/hmp-commands.hx b/hmp-commands.hx index cf723c69ac..3a5aeba3fe 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1737,8 +1737,10 @@ ERST { .name = "calc_dirty_rate", - .args_type = "second:l,sample_pages_per_GB:l?", - .params = "second [sample_pages_per_GB]", - .help = "start a round of guest dirty rate measurement", + .args_type = "dirty_ring:-r,dirty_bitmap:-b,second:l,sample_pages_per_GB:l?", + .params = "[-r] [-b] second [sample_pages_per_GB]", + .help = "start a round of guest dirty rate measurement (using -r to" + "\n\t\t\t specify dirty ring as the method of calculation and" + "\n\t\t\t -b to specify dirty bitmap as method of calculation)", .cmd = hmp_calc_dirty_rate, }, diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c index e3d3d5cf89..482be95415 100644 --- a/hw/i386/xen/xen-hvm.c +++ b/hw/i386/xen/xen-hvm.c @@ -1613,8 +1613,8 @@ void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length) void qmp_xen_set_global_dirty_log(bool enable, Error **errp) { if (enable) { - memory_global_dirty_log_start(); + memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION); } else { - memory_global_dirty_log_stop(); + memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION); } } diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c index df91e454b2..d5a578142b 100644 --- a/hw/virtio/virtio-mem.c +++ b/hw/virtio/virtio-mem.c @@ -228,6 +228,38 @@ static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem, return ret; } +static int virtio_mem_for_each_unplugged_section(const VirtIOMEM *vmem, + MemoryRegionSection *s, + void *arg, + virtio_mem_section_cb cb) +{ + unsigned long first_bit, last_bit; + uint64_t offset, size; + int ret = 0; + + first_bit = s->offset_within_region / vmem->bitmap_size; + first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, first_bit); + while (first_bit < vmem->bitmap_size) { + MemoryRegionSection tmp = *s; + + offset = first_bit * vmem->block_size; + last_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, + first_bit + 1) - 1; + size = (last_bit - first_bit + 1) * vmem->block_size; + + if (!virito_mem_intersect_memory_section(&tmp, offset, size)) { + break; + } + ret = cb(&tmp, arg); + if (ret) { + break; + } + first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, + last_bit + 2); + } + return ret; +} + static int virtio_mem_notify_populate_cb(MemoryRegionSection *s, void *arg) { RamDiscardListener *rdl = arg; @@ -744,7 +776,6 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) host_memory_backend_set_mapped(vmem->memdev, true); vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem)); qemu_register_reset(virtio_mem_system_reset, vmem); - precopy_add_notifier(&vmem->precopy_notifier); /* * Set ourselves as RamDiscardManager before the plug handler maps the @@ -764,7 +795,6 @@ static void virtio_mem_device_unrealize(DeviceState *dev) * found via an address space anymore. Unset ourselves. */ memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL); - precopy_remove_notifier(&vmem->precopy_notifier); qemu_unregister_reset(virtio_mem_system_reset, vmem); vmstate_unregister_ram(&vmem->memdev->mr, DEVICE(vmem)); host_memory_backend_set_mapped(vmem->memdev, false); @@ -1057,43 +1087,11 @@ static void virtio_mem_set_block_size(Object *obj, Visitor *v, const char *name, vmem->block_size = value; } -static int virtio_mem_precopy_exclude_range_cb(const VirtIOMEM *vmem, void *arg, - uint64_t offset, uint64_t size) -{ - void * const host = qemu_ram_get_host_addr(vmem->memdev->mr.ram_block); - - qemu_guest_free_page_hint(host + offset, size); - return 0; -} - -static void virtio_mem_precopy_exclude_unplugged(VirtIOMEM *vmem) -{ - virtio_mem_for_each_unplugged_range(vmem, NULL, - virtio_mem_precopy_exclude_range_cb); -} - -static int virtio_mem_precopy_notify(NotifierWithReturn *n, void *data) -{ - VirtIOMEM *vmem = container_of(n, VirtIOMEM, precopy_notifier); - PrecopyNotifyData *pnd = data; - - switch (pnd->reason) { - case PRECOPY_NOTIFY_AFTER_BITMAP_SYNC: - virtio_mem_precopy_exclude_unplugged(vmem); - break; - default: - break; - } - - return 0; -} - static void virtio_mem_instance_init(Object *obj) { VirtIOMEM *vmem = VIRTIO_MEM(obj); notifier_list_init(&vmem->size_change_notifiers); - vmem->precopy_notifier.notify = virtio_mem_precopy_notify; QLIST_INIT(&vmem->rdl_list); object_property_add(obj, VIRTIO_MEM_SIZE_PROP, "size", virtio_mem_get_size, @@ -1170,6 +1168,31 @@ static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm, virtio_mem_rdm_replay_populated_cb); } +static int virtio_mem_rdm_replay_discarded_cb(MemoryRegionSection *s, + void *arg) +{ + struct VirtIOMEMReplayData *data = arg; + + ((ReplayRamDiscard)data->fn)(s, data->opaque); + return 0; +} + +static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm, + MemoryRegionSection *s, + ReplayRamDiscard replay_fn, + void *opaque) +{ + const VirtIOMEM *vmem = VIRTIO_MEM(rdm); + struct VirtIOMEMReplayData data = { + .fn = replay_fn, + .opaque = opaque, + }; + + g_assert(s->mr == &vmem->memdev->mr); + virtio_mem_for_each_unplugged_section(vmem, s, &data, + virtio_mem_rdm_replay_discarded_cb); +} + static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm, RamDiscardListener *rdl, MemoryRegionSection *s) @@ -1234,6 +1257,7 @@ static void virtio_mem_class_init(ObjectClass *klass, void *data) rdmc->get_min_granularity = virtio_mem_rdm_get_min_granularity; rdmc->is_populated = virtio_mem_rdm_is_populated; rdmc->replay_populated = virtio_mem_rdm_replay_populated; + rdmc->replay_discarded = virtio_mem_rdm_replay_discarded; rdmc->register_listener = virtio_mem_rdm_register_listener; rdmc->unregister_listener = virtio_mem_rdm_unregister_listener; } diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 9d5987ba04..6bb2a0f7ec 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -664,16 +664,55 @@ static inline tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, } /** - * cpu_signal_handler - * @signum: host signal number - * @pinfo: host siginfo_t - * @puc: host ucontext_t + * adjust_signal_pc: + * @pc: raw pc from the host signal ucontext_t. + * @is_write: host memory operation was write, or read-modify-write. * - * To be called from the SIGBUS and SIGSEGV signal handler to inform the - * virtual cpu of exceptions. Returns true if the signal was handled by - * the virtual CPU. + * Alter @pc as required for unwinding. Return the type of the + * guest memory access -- host reads may be for guest execution. */ -int cpu_signal_handler(int signum, void *pinfo, void *puc); +MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write); + +/** + * handle_sigsegv_accerr_write: + * @cpu: the cpu context + * @old_set: the sigset_t from the signal ucontext_t + * @host_pc: the host pc, adjusted for the signal + * @host_addr: the host address of the fault + * + * Return true if the write fault has been handled, and should be re-tried. + */ +bool handle_sigsegv_accerr_write(CPUState *cpu, sigset_t *old_set, + uintptr_t host_pc, abi_ptr guest_addr); + +/** + * cpu_loop_exit_sigsegv: + * @cpu: the cpu context + * @addr: the guest address of the fault + * @access_type: access was read/write/execute + * @maperr: true for invalid page, false for permission fault + * @ra: host pc for unwinding + * + * Use the TCGCPUOps hook to record cpu state, do guest operating system + * specific things to raise SIGSEGV, and jump to the main cpu loop. + */ +void QEMU_NORETURN cpu_loop_exit_sigsegv(CPUState *cpu, target_ulong addr, + MMUAccessType access_type, + bool maperr, uintptr_t ra); + +/** + * cpu_loop_exit_sigbus: + * @cpu: the cpu context + * @addr: the guest address of the alignment fault + * @access_type: access was read/write/execute + * @ra: host pc for unwinding + * + * Use the TCGCPUOps hook to record cpu state, do guest operating system + * specific things to raise SIGBUS, and jump to the main cpu loop. + */ +void QEMU_NORETURN cpu_loop_exit_sigbus(CPUState *cpu, target_ulong addr, + MMUAccessType access_type, + uintptr_t ra); #else static inline void mmap_lock(void) {} diff --git a/include/exec/memory.h b/include/exec/memory.h index a185b6dcb8..20f1b27377 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -61,7 +61,17 @@ static inline void fuzz_dma_read_cb(size_t addr, } #endif -extern bool global_dirty_log; +/* Possible bits for global_dirty_log_{start|stop} */ + +/* Dirty tracking enabled because migration is running */ +#define GLOBAL_DIRTY_MIGRATION (1U << 0) + +/* Dirty tracking enabled because measuring dirty rate */ +#define GLOBAL_DIRTY_DIRTY_RATE (1U << 1) + +#define GLOBAL_DIRTY_MASK (0x3) + +extern unsigned int global_dirty_tracking; typedef struct MemoryRegionOps MemoryRegionOps; @@ -540,6 +550,7 @@ static inline void ram_discard_listener_init(RamDiscardListener *rdl, } typedef int (*ReplayRamPopulate)(MemoryRegionSection *section, void *opaque); +typedef void (*ReplayRamDiscard)(MemoryRegionSection *section, void *opaque); /* * RamDiscardManagerClass: @@ -629,6 +640,21 @@ struct RamDiscardManagerClass { ReplayRamPopulate replay_fn, void *opaque); /** + * @replay_discarded: + * + * Call the #ReplayRamDiscard callback for all discarded parts within the + * #MemoryRegionSection via the #RamDiscardManager. + * + * @rdm: the #RamDiscardManager + * @section: the #MemoryRegionSection + * @replay_fn: the #ReplayRamDiscard callback + * @opaque: pointer to forward to the callback + */ + void (*replay_discarded)(const RamDiscardManager *rdm, + MemoryRegionSection *section, + ReplayRamDiscard replay_fn, void *opaque); + + /** * @register_listener: * * Register a #RamDiscardListener for the given #MemoryRegionSection and @@ -672,6 +698,11 @@ int ram_discard_manager_replay_populated(const RamDiscardManager *rdm, ReplayRamPopulate replay_fn, void *opaque); +void ram_discard_manager_replay_discarded(const RamDiscardManager *rdm, + MemoryRegionSection *section, + ReplayRamDiscard replay_fn, + void *opaque); + void ram_discard_manager_register_listener(RamDiscardManager *rdm, RamDiscardListener *rdl, MemoryRegionSection *section); @@ -2388,13 +2419,17 @@ void memory_listener_unregister(MemoryListener *listener); /** * memory_global_dirty_log_start: begin dirty logging for all regions + * + * @flags: purpose of starting dirty log, migration or dirty rate */ -void memory_global_dirty_log_start(void); +void memory_global_dirty_log_start(unsigned int flags); /** * memory_global_dirty_log_stop: end dirty logging for all regions + * + * @flags: purpose of stopping dirty log, migration or dirty rate */ -void memory_global_dirty_log_stop(void); +void memory_global_dirty_log_stop(unsigned int flags); void mtree_info(bool flatview, bool dispatch_tree, bool owner, bool disabled); diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index 551876bed0..64fb936c7c 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -26,6 +26,8 @@ #include "exec/ramlist.h" #include "exec/ramblock.h" +extern uint64_t total_dirty_pages; + /** * clear_bmap_size: calculate clear bitmap size * @@ -369,10 +371,14 @@ static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, qatomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp); - if (global_dirty_log) { + if (global_dirty_tracking) { qatomic_or( &blocks[DIRTY_MEMORY_MIGRATION][idx][offset], temp); + if (unlikely( + global_dirty_tracking & GLOBAL_DIRTY_DIRTY_RATE)) { + total_dirty_pages += ctpopl(temp); + } } if (tcg_enabled()) { @@ -392,7 +398,7 @@ static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, } else { uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE; - if (!global_dirty_log) { + if (!global_dirty_tracking) { clients &= ~(1 << DIRTY_MEMORY_MIGRATION); } @@ -403,6 +409,9 @@ static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, for (i = 0; i < len; i++) { if (bitmap[i] != 0) { c = leul_to_cpu(bitmap[i]); + if (unlikely(global_dirty_tracking & GLOBAL_DIRTY_DIRTY_RATE)) { + total_dirty_pages += ctpopl(c); + } do { j = ctzl(c); c &= ~(1ul << j); diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index 1a10497af3..e948e81f1a 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -381,6 +381,7 @@ struct CPUState { struct kvm_run *kvm_run; struct kvm_dirty_gfn *kvm_dirty_gfns; uint32_t kvm_fetch_index; + uint64_t dirty_pages; /* Used for events with 'vcpu' and *without* the 'disabled' properties */ DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS); diff --git a/include/hw/core/tcg-cpu-ops.h b/include/hw/core/tcg-cpu-ops.h index 6cbe17f2e6..e13898553a 100644 --- a/include/hw/core/tcg-cpu-ops.h +++ b/include/hw/core/tcg-cpu-ops.h @@ -35,18 +35,6 @@ struct TCGCPUOps { void (*cpu_exec_enter)(CPUState *cpu); /** @cpu_exec_exit: Callback for cpu_exec cleanup */ void (*cpu_exec_exit)(CPUState *cpu); - /** - * @tlb_fill: Handle a softmmu tlb miss or user-only address fault - * - * For system mode, if the access is valid, call tlb_set_page - * and return true; if the access is invalid, and probe is - * true, return false; otherwise raise an exception and do - * not return. For user-only mode, always raise an exception - * and do not return. - */ - bool (*tlb_fill)(CPUState *cpu, vaddr address, int size, - MMUAccessType access_type, int mmu_idx, - bool probe, uintptr_t retaddr); /** @debug_excp_handler: Callback for handling debug exceptions */ void (*debug_excp_handler)(CPUState *cpu); @@ -69,6 +57,16 @@ struct TCGCPUOps { /** @cpu_exec_interrupt: Callback for processing interrupts in cpu_exec */ bool (*cpu_exec_interrupt)(CPUState *cpu, int interrupt_request); /** + * @tlb_fill: Handle a softmmu tlb miss + * + * If the access is valid, call tlb_set_page and return true; + * if the access is invalid and probe is true, return false; + * otherwise raise an exception and do not return. + */ + bool (*tlb_fill)(CPUState *cpu, vaddr address, int size, + MMUAccessType access_type, int mmu_idx, + bool probe, uintptr_t retaddr); + /** * @do_transaction_failed: Callback for handling failed memory transactions * (ie bus faults or external aborts; not MMU faults) */ @@ -111,6 +109,55 @@ struct TCGCPUOps { */ bool (*io_recompile_replay_branch)(CPUState *cpu, const TranslationBlock *tb); +#else + /** + * record_sigsegv: + * @cpu: cpu context + * @addr: faulting guest address + * @access_type: access was read/write/execute + * @maperr: true for invalid page, false for permission fault + * @ra: host pc for unwinding + * + * We are about to raise SIGSEGV with si_code set for @maperr, + * and si_addr set for @addr. Record anything further needed + * for the signal ucontext_t. + * + * If the emulated kernel does not provide anything to the signal + * handler with anything besides the user context registers, and + * the siginfo_t, then this hook need do nothing and may be omitted. + * Otherwise, record the data and return; the caller will raise + * the signal, unwind the cpu state, and return to the main loop. + * + * If it is simpler to re-use the sysemu tlb_fill code, @ra is provided + * so that a "normal" cpu exception can be raised. In this case, + * the signal must be raised by the architecture cpu_loop. + */ + void (*record_sigsegv)(CPUState *cpu, vaddr addr, + MMUAccessType access_type, + bool maperr, uintptr_t ra); + /** + * record_sigbus: + * @cpu: cpu context + * @addr: misaligned guest address + * @access_type: access was read/write/execute + * @ra: host pc for unwinding + * + * We are about to raise SIGBUS with si_code BUS_ADRALN, + * and si_addr set for @addr. Record anything further needed + * for the signal ucontext_t. + * + * If the emulated kernel does not provide the signal handler with + * anything besides the user context registers, and the siginfo_t, + * then this hook need do nothing and may be omitted. + * Otherwise, record the data and return; the caller will raise + * the signal, unwind the cpu state, and return to the main loop. + * + * If it is simpler to re-use the sysemu do_unaligned_access code, + * @ra is provided so that a "normal" cpu exception can be raised. + * In this case, the signal must be raised by the architecture cpu_loop. + */ + void (*record_sigbus)(CPUState *cpu, vaddr addr, + MMUAccessType access_type, uintptr_t ra); #endif /* CONFIG_SOFTMMU */ #endif /* NEED_CPU_H */ diff --git a/include/hw/virtio/virtio-mem.h b/include/hw/virtio/virtio-mem.h index 9a6e348fa2..a5dd6a493b 100644 --- a/include/hw/virtio/virtio-mem.h +++ b/include/hw/virtio/virtio-mem.h @@ -65,9 +65,6 @@ struct VirtIOMEM { /* notifiers to notify when "size" changes */ NotifierList size_change_notifiers; - /* don't migrate unplugged memory */ - NotifierWithReturn precopy_notifier; - /* listeners to notify on plug/unplug activity. */ QLIST_HEAD(, RamDiscardListener) rdl_list; }; diff --git a/include/migration/blocker.h b/include/migration/blocker.h index acd27018e9..9cebe2ba06 100644 --- a/include/migration/blocker.h +++ b/include/migration/blocker.h @@ -26,6 +26,22 @@ int migrate_add_blocker(Error *reason, Error **errp); /** + * @migrate_add_blocker_internal - prevent migration from proceeding without + * only-migrate implications + * + * @reason - an error to be returned whenever migration is attempted + * + * @errp - [out] The reason (if any) we cannot block migration right now. + * + * @returns - 0 on success, -EBUSY on failure, with errp set. + * + * Some of the migration blockers can be temporary (e.g., for a few seconds), + * so it shouldn't need to conflict with "-only-migratable". For those cases, + * we can call this function rather than @migrate_add_blocker(). + */ +int migrate_add_blocker_internal(Error *reason, Error **errp); + +/** * @migrate_del_blocker - remove a blocking error from migration * * @reason - the error blocking migration diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index a1ab1ee12d..7b22aeb6ae 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -547,4 +547,5 @@ bool kvm_cpu_check_are_resettable(void); bool kvm_arch_cpu_check_are_resettable(void); +bool kvm_dirty_ring_enabled(void); #endif diff --git a/include/tcg/tcg-ldst.h b/include/tcg/tcg-ldst.h index 8c86365611..bf40942de4 100644 --- a/include/tcg/tcg-ldst.h +++ b/include/tcg/tcg-ldst.h @@ -70,5 +70,10 @@ void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, MemOpIdx oi, uintptr_t retaddr); +#else + +void QEMU_NORETURN helper_unaligned_ld(CPUArchState *env, target_ulong addr); +void QEMU_NORETURN helper_unaligned_st(CPUArchState *env, target_ulong addr); + #endif /* CONFIG_SOFTMMU */ #endif /* TCG_LDST_H */ diff --git a/linux-user/aarch64/cpu_loop.c b/linux-user/aarch64/cpu_loop.c index 034b737435..97e0728b67 100644 --- a/linux-user/aarch64/cpu_loop.c +++ b/linux-user/aarch64/cpu_loop.c @@ -79,7 +79,7 @@ void cpu_loop(CPUARMState *env) { CPUState *cs = env_cpu(env); - int trapnr, ec, fsc, si_code; + int trapnr, ec, fsc, si_code, si_signo; abi_long ret; for (;;) { @@ -121,20 +121,26 @@ void cpu_loop(CPUARMState *env) fsc = extract32(env->exception.syndrome, 0, 6); switch (fsc) { case 0x04 ... 0x07: /* Translation fault, level {0-3} */ + si_signo = TARGET_SIGSEGV; si_code = TARGET_SEGV_MAPERR; break; case 0x09 ... 0x0b: /* Access flag fault, level {1-3} */ case 0x0d ... 0x0f: /* Permission fault, level {1-3} */ + si_signo = TARGET_SIGSEGV; si_code = TARGET_SEGV_ACCERR; break; case 0x11: /* Synchronous Tag Check Fault */ + si_signo = TARGET_SIGSEGV; si_code = TARGET_SEGV_MTESERR; break; + case 0x21: /* Alignment fault */ + si_signo = TARGET_SIGBUS; + si_code = TARGET_BUS_ADRALN; + break; default: g_assert_not_reached(); } - - force_sig_fault(TARGET_SIGSEGV, si_code, env->exception.vaddress); + force_sig_fault(si_signo, si_code, env->exception.vaddress); break; case EXCP_DEBUG: case EXCP_BKPT: diff --git a/linux-user/alpha/cpu_loop.c b/linux-user/alpha/cpu_loop.c index 1b00a81385..4029849d5c 100644 --- a/linux-user/alpha/cpu_loop.c +++ b/linux-user/alpha/cpu_loop.c @@ -54,21 +54,6 @@ void cpu_loop(CPUAlphaState *env) fprintf(stderr, "External interrupt. Exit\n"); exit(EXIT_FAILURE); break; - case EXCP_MMFAULT: - info.si_signo = TARGET_SIGSEGV; - info.si_errno = 0; - info.si_code = (page_get_flags(env->trap_arg0) & PAGE_VALID - ? TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR); - info._sifields._sigfault._addr = env->trap_arg0; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); - break; - case EXCP_UNALIGN: - info.si_signo = TARGET_SIGBUS; - info.si_errno = 0; - info.si_code = TARGET_BUS_ADRALN; - info._sifields._sigfault._addr = env->trap_arg0; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); - break; case EXCP_OPCDEC: do_sigill: info.si_signo = TARGET_SIGILL; diff --git a/linux-user/arm/cpu_loop.c b/linux-user/arm/cpu_loop.c index ae09adcb95..01cb6eb534 100644 --- a/linux-user/arm/cpu_loop.c +++ b/linux-user/arm/cpu_loop.c @@ -25,6 +25,7 @@ #include "cpu_loop-common.h" #include "signal-common.h" #include "semihosting/common-semi.h" +#include "target/arm/syndrome.h" #define get_user_code_u32(x, gaddr, env) \ ({ abi_long __r = get_user_u32((x), (gaddr)); \ @@ -280,7 +281,7 @@ static bool emulate_arm_fpa11(CPUARMState *env, uint32_t opcode) void cpu_loop(CPUARMState *env) { CPUState *cs = env_cpu(env); - int trapnr; + int trapnr, si_signo, si_code; unsigned int n, insn; abi_ulong ret; @@ -423,9 +424,30 @@ void cpu_loop(CPUARMState *env) break; case EXCP_PREFETCH_ABORT: case EXCP_DATA_ABORT: - /* XXX: check env->error_code */ - force_sig_fault(TARGET_SIGSEGV, TARGET_SEGV_MAPERR, - env->exception.vaddress); + /* For user-only we don't set TTBCR_EAE, so look at the FSR. */ + switch (env->exception.fsr & 0x1f) { + case 0x1: /* Alignment */ + si_signo = TARGET_SIGBUS; + si_code = TARGET_BUS_ADRALN; + break; + case 0x3: /* Access flag fault, level 1 */ + case 0x6: /* Access flag fault, level 2 */ + case 0x9: /* Domain fault, level 1 */ + case 0xb: /* Domain fault, level 2 */ + case 0xd: /* Permision fault, level 1 */ + case 0xf: /* Permision fault, level 2 */ + si_signo = TARGET_SIGSEGV; + si_code = TARGET_SEGV_ACCERR; + break; + case 0x5: /* Translation fault, level 1 */ + case 0x7: /* Translation fault, level 2 */ + si_signo = TARGET_SIGSEGV; + si_code = TARGET_SEGV_MAPERR; + break; + default: + g_assert_not_reached(); + } + force_sig_fault(si_signo, si_code, env->exception.vaddress); break; case EXCP_DEBUG: case EXCP_BKPT: diff --git a/linux-user/cris/cpu_loop.c b/linux-user/cris/cpu_loop.c index b9085619c4..0d5d268609 100644 --- a/linux-user/cris/cpu_loop.c +++ b/linux-user/cris/cpu_loop.c @@ -37,16 +37,6 @@ void cpu_loop(CPUCRISState *env) process_queued_cpu_work(cs); switch (trapnr) { - case 0xaa: - { - info.si_signo = TARGET_SIGSEGV; - info.si_errno = 0; - /* XXX: check env->error_code */ - info.si_code = TARGET_SEGV_MAPERR; - info._sifields._sigfault._addr = env->pregs[PR_EDA]; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); - } - break; case EXCP_INTERRUPT: /* just indicate that signals should be handled asap */ break; diff --git a/linux-user/hexagon/cpu_loop.c b/linux-user/hexagon/cpu_loop.c index bee2a9e4ea..6b24cbaba9 100644 --- a/linux-user/hexagon/cpu_loop.c +++ b/linux-user/hexagon/cpu_loop.c @@ -28,8 +28,7 @@ void cpu_loop(CPUHexagonState *env) { CPUState *cs = env_cpu(env); - int trapnr, signum, sigcode; - target_ulong sigaddr; + int trapnr; target_ulong syscallnum; target_ulong ret; @@ -39,10 +38,6 @@ void cpu_loop(CPUHexagonState *env) cpu_exec_end(cs); process_queued_cpu_work(cs); - signum = 0; - sigcode = 0; - sigaddr = 0; - switch (trapnr) { case EXCP_INTERRUPT: /* just indicate that signals should be handled asap */ @@ -65,12 +60,6 @@ void cpu_loop(CPUHexagonState *env) env->gpr[0] = ret; } break; - case HEX_EXCP_FETCH_NO_UPAGE: - case HEX_EXCP_PRIV_NO_UREAD: - case HEX_EXCP_PRIV_NO_UWRITE: - signum = TARGET_SIGSEGV; - sigcode = TARGET_SEGV_MAPERR; - break; case EXCP_ATOMIC: cpu_exec_step_atomic(cs); break; @@ -79,17 +68,6 @@ void cpu_loop(CPUHexagonState *env) trapnr); exit(EXIT_FAILURE); } - - if (signum) { - target_siginfo_t info = { - .si_signo = signum, - .si_errno = 0, - .si_code = sigcode, - ._sifields._sigfault._addr = sigaddr - }; - queue_signal(env, info.si_signo, QEMU_SI_KILL, &info); - } - process_pending_signals(env); } } diff --git a/linux-user/host/aarch64/host-signal.h b/linux-user/host/aarch64/host-signal.h new file mode 100644 index 0000000000..0c0b08383a --- /dev/null +++ b/linux-user/host/aarch64/host-signal.h @@ -0,0 +1,74 @@ +/* + * host-signal.h: signal info dependent on the host architecture + * + * Copyright (c) 2003-2005 Fabrice Bellard + * Copyright (c) 2021 Linaro Limited + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef AARCH64_HOST_SIGNAL_H +#define AARCH64_HOST_SIGNAL_H + +/* Pre-3.16 kernel headers don't have these, so provide fallback definitions */ +#ifndef ESR_MAGIC +#define ESR_MAGIC 0x45535201 +struct esr_context { + struct _aarch64_ctx head; + uint64_t esr; +}; +#endif + +static inline struct _aarch64_ctx *first_ctx(ucontext_t *uc) +{ + return (struct _aarch64_ctx *)&uc->uc_mcontext.__reserved; +} + +static inline struct _aarch64_ctx *next_ctx(struct _aarch64_ctx *hdr) +{ + return (struct _aarch64_ctx *)((char *)hdr + hdr->size); +} + +static inline uintptr_t host_signal_pc(ucontext_t *uc) +{ + return uc->uc_mcontext.pc; +} + +static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) +{ + struct _aarch64_ctx *hdr; + uint32_t insn; + + /* Find the esr_context, which has the WnR bit in it */ + for (hdr = first_ctx(uc); hdr->magic; hdr = next_ctx(hdr)) { + if (hdr->magic == ESR_MAGIC) { + struct esr_context const *ec = (struct esr_context const *)hdr; + uint64_t esr = ec->esr; + + /* For data aborts ESR.EC is 0b10010x: then bit 6 is the WnR bit */ + return extract32(esr, 27, 5) == 0x12 && extract32(esr, 6, 1) == 1; + } + } + + /* + * Fall back to parsing instructions; will only be needed + * for really ancient (pre-3.16) kernels. + */ + insn = *(uint32_t *)host_signal_pc(uc); + + return (insn & 0xbfff0000) == 0x0c000000 /* C3.3.1 */ + || (insn & 0xbfe00000) == 0x0c800000 /* C3.3.2 */ + || (insn & 0xbfdf0000) == 0x0d000000 /* C3.3.3 */ + || (insn & 0xbfc00000) == 0x0d800000 /* C3.3.4 */ + || (insn & 0x3f400000) == 0x08000000 /* C3.3.6 */ + || (insn & 0x3bc00000) == 0x39000000 /* C3.3.13 */ + || (insn & 0x3fc00000) == 0x3d800000 /* ... 128bit */ + /* Ignore bits 10, 11 & 21, controlling indexing. */ + || (insn & 0x3bc00000) == 0x38000000 /* C3.3.8-12 */ + || (insn & 0x3fe00000) == 0x3c800000 /* ... 128bit */ + /* Ignore bits 23 & 24, controlling indexing. */ + || (insn & 0x3a400000) == 0x28000000; /* C3.3.7,14-16 */ +} + +#endif diff --git a/linux-user/host/alpha/host-signal.h b/linux-user/host/alpha/host-signal.h new file mode 100644 index 0000000000..e080be412f --- /dev/null +++ b/linux-user/host/alpha/host-signal.h @@ -0,0 +1,42 @@ +/* + * host-signal.h: signal info dependent on the host architecture + * + * Copyright (c) 2003-2005 Fabrice Bellard + * Copyright (c) 2021 Linaro Limited + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef ALPHA_HOST_SIGNAL_H +#define ALPHA_HOST_SIGNAL_H + +static inline uintptr_t host_signal_pc(ucontext_t *uc) +{ + return uc->uc_mcontext.sc_pc; +} + +static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) +{ + uint32_t *pc = (uint32_t *)host_signal_pc(uc); + uint32_t insn = *pc; + + /* XXX: need kernel patch to get write flag faster */ + switch (insn >> 26) { + case 0x0d: /* stw */ + case 0x0e: /* stb */ + case 0x0f: /* stq_u */ + case 0x24: /* stf */ + case 0x25: /* stg */ + case 0x26: /* sts */ + case 0x27: /* stt */ + case 0x2c: /* stl */ + case 0x2d: /* stq */ + case 0x2e: /* stl_c */ + case 0x2f: /* stq_c */ + return true; + } + return false; +} + +#endif diff --git a/linux-user/host/arm/host-signal.h b/linux-user/host/arm/host-signal.h new file mode 100644 index 0000000000..efb165c0c5 --- /dev/null +++ b/linux-user/host/arm/host-signal.h @@ -0,0 +1,30 @@ +/* + * host-signal.h: signal info dependent on the host architecture + * + * Copyright (c) 2003-2005 Fabrice Bellard + * Copyright (c) 2021 Linaro Limited + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef ARM_HOST_SIGNAL_H +#define ARM_HOST_SIGNAL_H + +static inline uintptr_t host_signal_pc(ucontext_t *uc) +{ + return uc->uc_mcontext.arm_pc; +} + +static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) +{ + /* + * In the FSR, bit 11 is WnR, assuming a v6 or + * later processor. On v5 we will always report + * this as a read, which will fail later. + */ + uint32_t fsr = uc->uc_mcontext.error_code; + return extract32(fsr, 11, 1); +} + +#endif diff --git a/linux-user/host/i386/host-signal.h b/linux-user/host/i386/host-signal.h new file mode 100644 index 0000000000..4c8eef99ce --- /dev/null +++ b/linux-user/host/i386/host-signal.h @@ -0,0 +1,25 @@ +/* + * host-signal.h: signal info dependent on the host architecture + * + * Copyright (c) 2003-2005 Fabrice Bellard + * Copyright (c) 2021 Linaro Limited + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef I386_HOST_SIGNAL_H +#define I386_HOST_SIGNAL_H + +static inline uintptr_t host_signal_pc(ucontext_t *uc) +{ + return uc->uc_mcontext.gregs[REG_EIP]; +} + +static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) +{ + return uc->uc_mcontext.gregs[REG_TRAPNO] == 0xe + && (uc->uc_mcontext.gregs[REG_ERR] & 0x2); +} + +#endif diff --git a/linux-user/host/mips/host-signal.h b/linux-user/host/mips/host-signal.h new file mode 100644 index 0000000000..ef341f7c20 --- /dev/null +++ b/linux-user/host/mips/host-signal.h @@ -0,0 +1,62 @@ +/* + * host-signal.h: signal info dependent on the host architecture + * + * Copyright (c) 2003-2005 Fabrice Bellard + * Copyright (c) 2021 Linaro Limited + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef MIPS_HOST_SIGNAL_H +#define MIPS_HOST_SIGNAL_H + +static inline uintptr_t host_signal_pc(ucontext_t *uc) +{ + return uc->uc_mcontext.pc; +} + +#if defined(__misp16) || defined(__mips_micromips) +#error "Unsupported encoding" +#endif + +static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) +{ + uint32_t insn = *(uint32_t *)host_signal_pc(uc); + + /* Detect all store instructions at program counter. */ + switch ((insn >> 26) & 077) { + case 050: /* SB */ + case 051: /* SH */ + case 052: /* SWL */ + case 053: /* SW */ + case 054: /* SDL */ + case 055: /* SDR */ + case 056: /* SWR */ + case 070: /* SC */ + case 071: /* SWC1 */ + case 074: /* SCD */ + case 075: /* SDC1 */ + case 077: /* SD */ +#if !defined(__mips_isa_rev) || __mips_isa_rev < 6 + case 072: /* SWC2 */ + case 076: /* SDC2 */ +#endif + return true; + case 023: /* COP1X */ + /* + * Required in all versions of MIPS64 since + * MIPS64r1 and subsequent versions of MIPS32r2. + */ + switch (insn & 077) { + case 010: /* SWXC1 */ + case 011: /* SDXC1 */ + case 015: /* SUXC1 */ + return true; + } + break; + } + return false; +} + +#endif diff --git a/linux-user/host/ppc/host-signal.h b/linux-user/host/ppc/host-signal.h new file mode 100644 index 0000000000..a491c413dc --- /dev/null +++ b/linux-user/host/ppc/host-signal.h @@ -0,0 +1,25 @@ +/* + * host-signal.h: signal info dependent on the host architecture + * + * Copyright (c) 2003-2005 Fabrice Bellard + * Copyright (c) 2021 Linaro Limited + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef PPC_HOST_SIGNAL_H +#define PPC_HOST_SIGNAL_H + +static inline uintptr_t host_signal_pc(ucontext_t *uc) +{ + return uc->uc_mcontext.regs->nip; +} + +static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) +{ + return uc->uc_mcontext.regs->trap != 0x400 + && (uc->uc_mcontext.regs->dsisr & 0x02000000); +} + +#endif diff --git a/linux-user/host/ppc64/host-signal.h b/linux-user/host/ppc64/host-signal.h new file mode 100644 index 0000000000..a353c22a90 --- /dev/null +++ b/linux-user/host/ppc64/host-signal.h @@ -0,0 +1 @@ +#include "../ppc/host-signal.h" diff --git a/linux-user/host/riscv/host-signal.h b/linux-user/host/riscv/host-signal.h new file mode 100644 index 0000000000..3b168cb58b --- /dev/null +++ b/linux-user/host/riscv/host-signal.h @@ -0,0 +1,58 @@ +/* + * host-signal.h: signal info dependent on the host architecture + * + * Copyright (c) 2003-2005 Fabrice Bellard + * Copyright (c) 2021 Linaro Limited + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef RISCV_HOST_SIGNAL_H +#define RISCV_HOST_SIGNAL_H + +static inline uintptr_t host_signal_pc(ucontext_t *uc) +{ + return uc->uc_mcontext.__gregs[REG_PC]; +} + +static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) +{ + /* + * Detect store by reading the instruction at the program counter. + * Do not read more than 16 bits, because we have not yet determined + * the size of the instruction. + */ + const uint16_t *pinsn = (const uint16_t *)host_signal_pc(uc); + uint16_t insn = pinsn[0]; + + /* 16-bit instructions */ + switch (insn & 0xe003) { + case 0xa000: /* c.fsd */ + case 0xc000: /* c.sw */ + case 0xe000: /* c.sd (rv64) / c.fsw (rv32) */ + case 0xa002: /* c.fsdsp */ + case 0xc002: /* c.swsp */ + case 0xe002: /* c.sdsp (rv64) / c.fswsp (rv32) */ + return true; + } + + /* 32-bit instructions, major opcodes */ + switch (insn & 0x7f) { + case 0x23: /* store */ + case 0x27: /* store-fp */ + return true; + case 0x2f: /* amo */ + /* + * The AMO function code is in bits 25-31, unread as yet. + * The AMO functions are LR (read), SC (write), and the + * rest are all read-modify-write. + */ + insn = pinsn[1]; + return (insn >> 11) != 2; /* LR */ + } + + return false; +} + +#endif diff --git a/linux-user/host/riscv64/hostdep.h b/linux-user/host/riscv/hostdep.h index 865f0fb9ff..2ba07456ae 100644 --- a/linux-user/host/riscv64/hostdep.h +++ b/linux-user/host/riscv/hostdep.h @@ -5,8 +5,8 @@ * See the COPYING file in the top-level directory. */ -#ifndef RISCV64_HOSTDEP_H -#define RISCV64_HOSTDEP_H +#ifndef RISCV_HOSTDEP_H +#define RISCV_HOSTDEP_H /* We have a safe-syscall.inc.S */ #define HAVE_SAFE_SYSCALL diff --git a/linux-user/host/riscv64/safe-syscall.inc.S b/linux-user/host/riscv/safe-syscall.inc.S index 9ca3fbfd1e..9ca3fbfd1e 100644 --- a/linux-user/host/riscv64/safe-syscall.inc.S +++ b/linux-user/host/riscv/safe-syscall.inc.S diff --git a/linux-user/host/riscv32/hostdep.h b/linux-user/host/riscv32/hostdep.h deleted file mode 100644 index adf9edbf2d..0000000000 --- a/linux-user/host/riscv32/hostdep.h +++ /dev/null @@ -1,11 +0,0 @@ -/* - * hostdep.h : things which are dependent on the host architecture - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - */ - -#ifndef RISCV32_HOSTDEP_H -#define RISCV32_HOSTDEP_H - -#endif diff --git a/linux-user/host/s390/host-signal.h b/linux-user/host/s390/host-signal.h new file mode 100644 index 0000000000..26990e4893 --- /dev/null +++ b/linux-user/host/s390/host-signal.h @@ -0,0 +1,93 @@ +/* + * host-signal.h: signal info dependent on the host architecture + * + * Copyright (c) 2003-2005 Fabrice Bellard + * Copyright (c) 2021 Linaro Limited + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef S390_HOST_SIGNAL_H +#define S390_HOST_SIGNAL_H + +static inline uintptr_t host_signal_pc(ucontext_t *uc) +{ + return uc->uc_mcontext.psw.addr; +} + +static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) +{ + uint16_t *pinsn = (uint16_t *)host_signal_pc(uc); + + /* + * ??? On linux, the non-rt signal handler has 4 (!) arguments instead + * of the normal 2 arguments. The 4th argument contains the "Translation- + * Exception Identification for DAT Exceptions" from the hardware (aka + * "int_parm_long"), which does in fact contain the is_write value. + * The rt signal handler, as far as I can tell, does not give this value + * at all. Not that we could get to it from here even if it were. + * So fall back to parsing instructions. Treat read-modify-write ones as + * writes, which is not fully correct, but for tracking self-modifying code + * this is better than treating them as reads. Checking si_addr page flags + * might be a viable improvement, albeit a racy one. + */ + /* ??? This is not even close to complete. */ + switch (pinsn[0] >> 8) { + case 0x50: /* ST */ + case 0x42: /* STC */ + case 0x40: /* STH */ + case 0xba: /* CS */ + case 0xbb: /* CDS */ + return true; + case 0xc4: /* RIL format insns */ + switch (pinsn[0] & 0xf) { + case 0xf: /* STRL */ + case 0xb: /* STGRL */ + case 0x7: /* STHRL */ + return true; + } + break; + case 0xc8: /* SSF format insns */ + switch (pinsn[0] & 0xf) { + case 0x2: /* CSST */ + return true; + } + break; + case 0xe3: /* RXY format insns */ + switch (pinsn[2] & 0xff) { + case 0x50: /* STY */ + case 0x24: /* STG */ + case 0x72: /* STCY */ + case 0x70: /* STHY */ + case 0x8e: /* STPQ */ + case 0x3f: /* STRVH */ + case 0x3e: /* STRV */ + case 0x2f: /* STRVG */ + return true; + } + break; + case 0xeb: /* RSY format insns */ + switch (pinsn[2] & 0xff) { + case 0x14: /* CSY */ + case 0x30: /* CSG */ + case 0x31: /* CDSY */ + case 0x3e: /* CDSG */ + case 0xe4: /* LANG */ + case 0xe6: /* LAOG */ + case 0xe7: /* LAXG */ + case 0xe8: /* LAAG */ + case 0xea: /* LAALG */ + case 0xf4: /* LAN */ + case 0xf6: /* LAO */ + case 0xf7: /* LAX */ + case 0xfa: /* LAAL */ + case 0xf8: /* LAA */ + return true; + } + break; + } + return false; +} + +#endif diff --git a/linux-user/host/s390x/host-signal.h b/linux-user/host/s390x/host-signal.h new file mode 100644 index 0000000000..0e83f9358d --- /dev/null +++ b/linux-user/host/s390x/host-signal.h @@ -0,0 +1 @@ +#include "../s390/host-signal.h" diff --git a/linux-user/host/sparc/host-signal.h b/linux-user/host/sparc/host-signal.h new file mode 100644 index 0000000000..5e71d33f8e --- /dev/null +++ b/linux-user/host/sparc/host-signal.h @@ -0,0 +1,54 @@ +/* + * host-signal.h: signal info dependent on the host architecture + * + * Copyright (c) 2003-2005 Fabrice Bellard + * Copyright (c) 2021 Linaro Limited + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef SPARC_HOST_SIGNAL_H +#define SPARC_HOST_SIGNAL_H + +static inline uintptr_t host_signal_pc(ucontext_t *uc) +{ +#ifdef __arch64__ + return uc->uc_mcontext.mc_gregs[MC_PC]; +#else + return uc->uc_mcontext.gregs[REG_PC]; +#endif +} + +static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) +{ + uint32_t insn = *(uint32_t *)host_signal_pc(uc); + + if ((insn >> 30) == 3) { + switch ((insn >> 19) & 0x3f) { + case 0x05: /* stb */ + case 0x15: /* stba */ + case 0x06: /* sth */ + case 0x16: /* stha */ + case 0x04: /* st */ + case 0x14: /* sta */ + case 0x07: /* std */ + case 0x17: /* stda */ + case 0x0e: /* stx */ + case 0x1e: /* stxa */ + case 0x24: /* stf */ + case 0x34: /* stfa */ + case 0x27: /* stdf */ + case 0x37: /* stdfa */ + case 0x26: /* stqf */ + case 0x36: /* stqfa */ + case 0x25: /* stfsr */ + case 0x3c: /* casa */ + case 0x3e: /* casxa */ + return true; + } + } + return false; +} + +#endif diff --git a/linux-user/host/sparc64/host-signal.h b/linux-user/host/sparc64/host-signal.h new file mode 100644 index 0000000000..1191fe2d40 --- /dev/null +++ b/linux-user/host/sparc64/host-signal.h @@ -0,0 +1 @@ +#include "../sparc/host-signal.h" diff --git a/linux-user/host/x32/host-signal.h b/linux-user/host/x32/host-signal.h new file mode 100644 index 0000000000..26800591d3 --- /dev/null +++ b/linux-user/host/x32/host-signal.h @@ -0,0 +1 @@ +#include "../x86_64/host-signal.h" diff --git a/linux-user/host/x86_64/host-signal.h b/linux-user/host/x86_64/host-signal.h new file mode 100644 index 0000000000..883d2fcf65 --- /dev/null +++ b/linux-user/host/x86_64/host-signal.h @@ -0,0 +1,24 @@ +/* + * host-signal.h: signal info dependent on the host architecture + * + * Copyright (C) 2021 Linaro Limited + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef X86_64_HOST_SIGNAL_H +#define X86_64_HOST_SIGNAL_H + +static inline uintptr_t host_signal_pc(ucontext_t *uc) +{ + return uc->uc_mcontext.gregs[REG_RIP]; +} + +static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) +{ + return uc->uc_mcontext.gregs[REG_TRAPNO] == 0xe + && (uc->uc_mcontext.gregs[REG_ERR] & 0x2); +} + +#endif diff --git a/linux-user/hppa/cpu_loop.c b/linux-user/hppa/cpu_loop.c index 81607a9b27..375576c8f0 100644 --- a/linux-user/hppa/cpu_loop.c +++ b/linux-user/hppa/cpu_loop.c @@ -144,29 +144,6 @@ void cpu_loop(CPUHPPAState *env) env->iaoq_f = env->gr[31]; env->iaoq_b = env->gr[31] + 4; break; - case EXCP_ITLB_MISS: - case EXCP_DTLB_MISS: - case EXCP_NA_ITLB_MISS: - case EXCP_NA_DTLB_MISS: - case EXCP_IMP: - case EXCP_DMP: - case EXCP_DMB: - case EXCP_PAGE_REF: - case EXCP_DMAR: - case EXCP_DMPI: - info.si_signo = TARGET_SIGSEGV; - info.si_errno = 0; - info.si_code = TARGET_SEGV_ACCERR; - info._sifields._sigfault._addr = env->cr[CR_IOR]; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); - break; - case EXCP_UNALIGN: - info.si_signo = TARGET_SIGBUS; - info.si_errno = 0; - info.si_code = 0; - info._sifields._sigfault._addr = env->cr[CR_IOR]; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); - break; case EXCP_ILL: case EXCP_PRIV_OPR: case EXCP_PRIV_REG: diff --git a/linux-user/m68k/cpu_loop.c b/linux-user/m68k/cpu_loop.c index ebf32be78f..790bd558c3 100644 --- a/linux-user/m68k/cpu_loop.c +++ b/linux-user/m68k/cpu_loop.c @@ -90,16 +90,6 @@ void cpu_loop(CPUM68KState *env) case EXCP_INTERRUPT: /* just indicate that signals should be handled asap */ break; - case EXCP_ACCESS: - { - info.si_signo = TARGET_SIGSEGV; - info.si_errno = 0; - /* XXX: check env->error_code */ - info.si_code = TARGET_SEGV_MAPERR; - info._sifields._sigfault._addr = env->mmu.ar; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); - } - break; case EXCP_DEBUG: info.si_signo = TARGET_SIGTRAP; info.si_errno = 0; diff --git a/linux-user/microblaze/cpu_loop.c b/linux-user/microblaze/cpu_loop.c index 52222eb93f..a94467dd2d 100644 --- a/linux-user/microblaze/cpu_loop.c +++ b/linux-user/microblaze/cpu_loop.c @@ -37,16 +37,6 @@ void cpu_loop(CPUMBState *env) process_queued_cpu_work(cs); switch (trapnr) { - case 0xaa: - { - info.si_signo = TARGET_SIGSEGV; - info.si_errno = 0; - /* XXX: check env->error_code */ - info.si_code = TARGET_SEGV_MAPERR; - info._sifields._sigfault._addr = 0; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); - } - break; case EXCP_INTERRUPT: /* just indicate that signals should be handled asap */ break; diff --git a/linux-user/mips/cpu_loop.c b/linux-user/mips/cpu_loop.c index cb03fb066b..b735c99a24 100644 --- a/linux-user/mips/cpu_loop.c +++ b/linux-user/mips/cpu_loop.c @@ -158,17 +158,6 @@ done_syscall: } env->active_tc.gpr[2] = ret; break; - case EXCP_TLBL: - case EXCP_TLBS: - case EXCP_AdEL: - case EXCP_AdES: - info.si_signo = TARGET_SIGSEGV; - info.si_errno = 0; - /* XXX: check env->error_code */ - info.si_code = TARGET_SEGV_MAPERR; - info._sifields._sigfault._addr = env->CP0_BadVAddr; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); - break; case EXCP_CpU: case EXCP_RI: info.si_signo = TARGET_SIGILL; diff --git a/linux-user/openrisc/cpu_loop.c b/linux-user/openrisc/cpu_loop.c index f6360db47c..3cfdbbf037 100644 --- a/linux-user/openrisc/cpu_loop.c +++ b/linux-user/openrisc/cpu_loop.c @@ -54,15 +54,6 @@ void cpu_loop(CPUOpenRISCState *env) cpu_set_gpr(env, 11, ret); } break; - case EXCP_DPF: - case EXCP_IPF: - case EXCP_RANGE: - info.si_signo = TARGET_SIGSEGV; - info.si_errno = 0; - info.si_code = TARGET_SEGV_MAPERR; - info._sifields._sigfault._addr = env->pc; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); - break; case EXCP_ALIGN: info.si_signo = TARGET_SIGBUS; info.si_errno = 0; @@ -77,13 +68,6 @@ void cpu_loop(CPUOpenRISCState *env) info._sifields._sigfault._addr = env->pc; queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); break; - case EXCP_FPE: - info.si_signo = TARGET_SIGFPE; - info.si_errno = 0; - info.si_code = 0; - info._sifields._sigfault._addr = env->pc; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); - break; case EXCP_INTERRUPT: /* We processed the pending cpu work above. */ break; @@ -96,6 +80,15 @@ void cpu_loop(CPUOpenRISCState *env) case EXCP_ATOMIC: cpu_exec_step_atomic(cs); break; + case EXCP_RANGE: + /* Requires SR.OVE set, which linux-user won't do. */ + cpu_abort(cs, "Unexpected RANGE exception"); + case EXCP_FPE: + /* + * Requires FPSCR.FPEE set. Writes to FPSCR from usermode not + * yet enabled in kernel ABI, so linux-user does not either. + */ + cpu_abort(cs, "Unexpected FPE exception"); default: g_assert_not_reached(); } diff --git a/linux-user/ppc/cpu_loop.c b/linux-user/ppc/cpu_loop.c index 840b23736b..483e669300 100644 --- a/linux-user/ppc/cpu_loop.c +++ b/linux-user/ppc/cpu_loop.c @@ -162,14 +162,6 @@ void cpu_loop(CPUPPCState *env) cpu_abort(cs, "External interrupt while in user mode. " "Aborting\n"); break; - case POWERPC_EXCP_ALIGN: /* Alignment exception */ - /* XXX: check this */ - info.si_signo = TARGET_SIGBUS; - info.si_errno = 0; - info.si_code = TARGET_BUS_ADRALN; - info._sifields._sigfault._addr = env->nip; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); - break; case POWERPC_EXCP_PROGRAM: /* Program exception */ case POWERPC_EXCP_HV_EMU: /* HV emulation */ /* XXX: check this */ diff --git a/linux-user/riscv/cpu_loop.c b/linux-user/riscv/cpu_loop.c index e5bb6d908a..b301dac802 100644 --- a/linux-user/riscv/cpu_loop.c +++ b/linux-user/riscv/cpu_loop.c @@ -87,13 +87,6 @@ void cpu_loop(CPURISCVState *env) sigcode = TARGET_TRAP_BRKPT; sigaddr = env->pc; break; - case RISCV_EXCP_INST_PAGE_FAULT: - case RISCV_EXCP_LOAD_PAGE_FAULT: - case RISCV_EXCP_STORE_PAGE_FAULT: - signum = TARGET_SIGSEGV; - sigcode = TARGET_SEGV_MAPERR; - sigaddr = env->badaddr; - break; case RISCV_EXCP_SEMIHOST: env->gpr[xA0] = do_common_semihosting(cs); env->pc += 4; diff --git a/linux-user/s390x/cpu_loop.c b/linux-user/s390x/cpu_loop.c index 69b69981f6..d089c8417e 100644 --- a/linux-user/s390x/cpu_loop.c +++ b/linux-user/s390x/cpu_loop.c @@ -24,8 +24,6 @@ #include "cpu_loop-common.h" #include "signal-common.h" -/* s390x masks the fault address it reports in si_addr for SIGSEGV and SIGBUS */ -#define S390X_FAIL_ADDR_MASK -4096LL static int get_pgm_data_si_code(int dxc_code) { @@ -111,12 +109,13 @@ void cpu_loop(CPUS390XState *env) n = TARGET_ILL_ILLOPC; goto do_signal_pc; case PGM_PROTECTION: + force_sig_fault(TARGET_SIGSEGV, TARGET_SEGV_ACCERR, + env->__excp_addr); + break; case PGM_ADDRESSING: - sig = TARGET_SIGSEGV; - /* XXX: check env->error_code */ - n = TARGET_SEGV_MAPERR; - addr = env->__excp_addr & S390X_FAIL_ADDR_MASK; - goto do_signal; + force_sig_fault(TARGET_SIGSEGV, TARGET_SEGV_MAPERR, + env->__excp_addr); + break; case PGM_EXECUTE: case PGM_SPECIFICATION: case PGM_SPECIAL_OP: diff --git a/linux-user/sh4/cpu_loop.c b/linux-user/sh4/cpu_loop.c index 65b8972e3c..ac9b01840c 100644 --- a/linux-user/sh4/cpu_loop.c +++ b/linux-user/sh4/cpu_loop.c @@ -65,14 +65,6 @@ void cpu_loop(CPUSH4State *env) info.si_code = TARGET_TRAP_BRKPT; queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); break; - case 0xa0: - case 0xc0: - info.si_signo = TARGET_SIGSEGV; - info.si_errno = 0; - info.si_code = TARGET_SEGV_MAPERR; - info._sifields._sigfault._addr = env->tea; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); - break; case EXCP_ATOMIC: cpu_exec_step_atomic(cs); arch_interrupt = false; diff --git a/linux-user/signal.c b/linux-user/signal.c index 14d8fdfde1..81c45bfce9 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -19,6 +19,7 @@ #include "qemu/osdep.h" #include "qemu/bitops.h" #include "exec/gdbstub.h" +#include "hw/core/tcg-cpu-ops.h" #include <sys/ucontext.h> #include <sys/resource.h> @@ -29,6 +30,7 @@ #include "loader.h" #include "trace.h" #include "signal-common.h" +#include "host-signal.h" static struct target_sigaction sigact_table[TARGET_NSIG]; @@ -686,9 +688,38 @@ void force_sigsegv(int oldsig) } force_sig(TARGET_SIGSEGV); } - #endif +void cpu_loop_exit_sigsegv(CPUState *cpu, target_ulong addr, + MMUAccessType access_type, bool maperr, uintptr_t ra) +{ + const struct TCGCPUOps *tcg_ops = CPU_GET_CLASS(cpu)->tcg_ops; + + if (tcg_ops->record_sigsegv) { + tcg_ops->record_sigsegv(cpu, addr, access_type, maperr, ra); + } + + force_sig_fault(TARGET_SIGSEGV, + maperr ? TARGET_SEGV_MAPERR : TARGET_SEGV_ACCERR, + addr); + cpu->exception_index = EXCP_INTERRUPT; + cpu_loop_exit_restore(cpu, ra); +} + +void cpu_loop_exit_sigbus(CPUState *cpu, target_ulong addr, + MMUAccessType access_type, uintptr_t ra) +{ + const struct TCGCPUOps *tcg_ops = CPU_GET_CLASS(cpu)->tcg_ops; + + if (tcg_ops->record_sigbus) { + tcg_ops->record_sigbus(cpu, addr, access_type, ra); + } + + force_sig_fault(TARGET_SIGBUS, TARGET_BUS_ADRALN, addr); + cpu->exception_index = EXCP_INTERRUPT; + cpu_loop_exit_restore(cpu, ra); +} + /* abort execution with signal */ static void QEMU_NORETURN dump_core_and_abort(int target_sig) { @@ -769,41 +800,101 @@ static inline void rewind_if_in_safe_syscall(void *puc) } #endif -static void host_signal_handler(int host_signum, siginfo_t *info, - void *puc) +static void host_signal_handler(int host_sig, siginfo_t *info, void *puc) { CPUArchState *env = thread_cpu->env_ptr; CPUState *cpu = env_cpu(env); TaskState *ts = cpu->opaque; - - int sig; target_siginfo_t tinfo; ucontext_t *uc = puc; struct emulated_sigtable *k; + int guest_sig; + uintptr_t pc = 0; + bool sync_sig = false; + + /* + * Non-spoofed SIGSEGV and SIGBUS are synchronous, and need special + * handling wrt signal blocking and unwinding. + */ + if ((host_sig == SIGSEGV || host_sig == SIGBUS) && info->si_code > 0) { + MMUAccessType access_type; + uintptr_t host_addr; + abi_ptr guest_addr; + bool is_write; + + host_addr = (uintptr_t)info->si_addr; + + /* + * Convert forcefully to guest address space: addresses outside + * reserved_va are still valid to report via SEGV_MAPERR. + */ + guest_addr = h2g_nocheck(host_addr); - /* the CPU emulator uses some host signals to detect exceptions, - we forward to it some signals */ - if ((host_signum == SIGSEGV || host_signum == SIGBUS) - && info->si_code > 0) { - if (cpu_signal_handler(host_signum, info, puc)) - return; + pc = host_signal_pc(uc); + is_write = host_signal_write(info, uc); + access_type = adjust_signal_pc(&pc, is_write); + + if (host_sig == SIGSEGV) { + bool maperr = true; + + if (info->si_code == SEGV_ACCERR && h2g_valid(host_addr)) { + /* If this was a write to a TB protected page, restart. */ + if (is_write && + handle_sigsegv_accerr_write(cpu, &uc->uc_sigmask, + pc, guest_addr)) { + return; + } + + /* + * With reserved_va, the whole address space is PROT_NONE, + * which means that we may get ACCERR when we want MAPERR. + */ + if (page_get_flags(guest_addr) & PAGE_VALID) { + maperr = false; + } else { + info->si_code = SEGV_MAPERR; + } + } + + sigprocmask(SIG_SETMASK, &uc->uc_sigmask, NULL); + cpu_loop_exit_sigsegv(cpu, guest_addr, access_type, maperr, pc); + } else { + sigprocmask(SIG_SETMASK, &uc->uc_sigmask, NULL); + if (info->si_code == BUS_ADRALN) { + cpu_loop_exit_sigbus(cpu, guest_addr, access_type, pc); + } + } + + sync_sig = true; } /* get target signal number */ - sig = host_to_target_signal(host_signum); - if (sig < 1 || sig > TARGET_NSIG) + guest_sig = host_to_target_signal(host_sig); + if (guest_sig < 1 || guest_sig > TARGET_NSIG) { return; - trace_user_host_signal(env, host_signum, sig); - - rewind_if_in_safe_syscall(puc); + } + trace_user_host_signal(env, host_sig, guest_sig); host_to_target_siginfo_noswap(&tinfo, info); - k = &ts->sigtab[sig - 1]; + k = &ts->sigtab[guest_sig - 1]; k->info = tinfo; - k->pending = sig; + k->pending = guest_sig; ts->signal_pending = 1; - /* Block host signals until target signal handler entered. We + /* + * For synchronous signals, unwind the cpu state to the faulting + * insn and then exit back to the main loop so that the signal + * is delivered immediately. + */ + if (sync_sig) { + cpu->exception_index = EXCP_INTERRUPT; + cpu_loop_exit_restore(cpu, pc); + } + + rewind_if_in_safe_syscall(puc); + + /* + * Block host signals until target signal handler entered. We * can't block SIGSEGV or SIGBUS while we're executing guest * code in case the guest code provokes one in the window between * now and it getting out to the main loop. Signals will be diff --git a/linux-user/sparc/cpu_loop.c b/linux-user/sparc/cpu_loop.c index ad29b4eb6a..0ba65e431c 100644 --- a/linux-user/sparc/cpu_loop.c +++ b/linux-user/sparc/cpu_loop.c @@ -219,17 +219,6 @@ void cpu_loop (CPUSPARCState *env) case TT_WIN_UNF: /* window underflow */ restore_window(env); break; - case TT_TFAULT: - case TT_DFAULT: - { - info.si_signo = TARGET_SIGSEGV; - info.si_errno = 0; - /* XXX: check env->error_code */ - info.si_code = TARGET_SEGV_MAPERR; - info._sifields._sigfault._addr = env->mmuregs[4]; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); - } - break; #else case TT_SPILL: /* window overflow */ save_window(env); @@ -237,20 +226,6 @@ void cpu_loop (CPUSPARCState *env) case TT_FILL: /* window underflow */ restore_window(env); break; - case TT_TFAULT: - case TT_DFAULT: - { - info.si_signo = TARGET_SIGSEGV; - info.si_errno = 0; - /* XXX: check env->error_code */ - info.si_code = TARGET_SEGV_MAPERR; - if (trapnr == TT_DFAULT) - info._sifields._sigfault._addr = env->dmmu.mmuregs[4]; - else - info._sifields._sigfault._addr = cpu_tsptr(env)->tpc; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); - } - break; #ifndef TARGET_ABI32 case 0x16e: flush_windows(env); diff --git a/linux-user/xtensa/cpu_loop.c b/linux-user/xtensa/cpu_loop.c index 622afbcd34..a83490ab35 100644 --- a/linux-user/xtensa/cpu_loop.c +++ b/linux-user/xtensa/cpu_loop.c @@ -226,15 +226,6 @@ void cpu_loop(CPUXtensaState *env) queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); break; - case LOAD_PROHIBITED_CAUSE: - case STORE_PROHIBITED_CAUSE: - info.si_signo = TARGET_SIGSEGV; - info.si_errno = 0; - info.si_code = TARGET_SEGV_ACCERR; - info._sifields._sigfault._addr = env->sregs[EXCVADDR]; - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); - break; - default: fprintf(stderr, "exccause = %d\n", env->sregs[EXCCAUSE]); g_assert_not_reached(); diff --git a/meson.build b/meson.build index 85f1e43dfe..bc918fbe58 100644 --- a/meson.build +++ b/meson.build @@ -55,7 +55,7 @@ have_block = have_system or have_tools python = import('python').find_installation() supported_oses = ['windows', 'freebsd', 'netbsd', 'openbsd', 'darwin', 'sunos', 'linux'] -supported_cpus = ['ppc', 'ppc64', 's390x', 'riscv32', 'riscv64', 'x86', 'x86_64', +supported_cpus = ['ppc', 'ppc64', 's390x', 'riscv', 'x86', 'x86_64', 'arm', 'aarch64', 'mips', 'mips64', 'sparc', 'sparc64'] cpu = host_machine.cpu_family() @@ -351,8 +351,6 @@ if not get_option('tcg').disabled() tcg_arch = 'i386' elif config_host['ARCH'] == 'ppc64' tcg_arch = 'ppc' - elif config_host['ARCH'] in ['riscv32', 'riscv64'] - tcg_arch = 'riscv' endif add_project_arguments('-iquote', meson.current_source_dir() / 'tcg' / tcg_arch, language: ['c', 'cpp', 'objc']) diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c index 320c56ba2c..d65e744af9 100644 --- a/migration/dirtyrate.c +++ b/migration/dirtyrate.c @@ -15,7 +15,9 @@ #include "qapi/error.h" #include "cpu.h" #include "exec/ramblock.h" +#include "exec/ram_addr.h" #include "qemu/rcu_queue.h" +#include "qemu/main-loop.h" #include "qapi/qapi-commands-migration.h" #include "ram.h" #include "trace.h" @@ -23,9 +25,26 @@ #include "monitor/hmp.h" #include "monitor/monitor.h" #include "qapi/qmp/qdict.h" +#include "sysemu/kvm.h" +#include "sysemu/runstate.h" +#include "exec/memory.h" + +/* + * total_dirty_pages is procted by BQL and is used + * to stat dirty pages during the period of two + * memory_global_dirty_log_sync + */ +uint64_t total_dirty_pages; + +typedef struct DirtyPageRecord { + uint64_t start_pages; + uint64_t end_pages; +} DirtyPageRecord; static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED; static struct DirtyRateStat DirtyStat; +static DirtyRateMeasureMode dirtyrate_mode = + DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING; static int64_t set_sample_page_period(int64_t msec, int64_t initial_time) { @@ -70,51 +89,94 @@ static int dirtyrate_set_state(int *state, int old_state, int new_state) static struct DirtyRateInfo *query_dirty_rate_info(void) { + int i; int64_t dirty_rate = DirtyStat.dirty_rate; struct DirtyRateInfo *info = g_malloc0(sizeof(DirtyRateInfo)); - - if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURED) { - info->has_dirty_rate = true; - info->dirty_rate = dirty_rate; - } + DirtyRateVcpuList *head = NULL, **tail = &head; info->status = CalculatingState; info->start_time = DirtyStat.start_time; info->calc_time = DirtyStat.calc_time; info->sample_pages = DirtyStat.sample_pages; + info->mode = dirtyrate_mode; + + if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURED) { + info->has_dirty_rate = true; + info->dirty_rate = dirty_rate; + + if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) { + /* + * set sample_pages with 0 to indicate page sampling + * isn't enabled + **/ + info->sample_pages = 0; + info->has_vcpu_dirty_rate = true; + for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) { + DirtyRateVcpu *rate = g_malloc0(sizeof(DirtyRateVcpu)); + rate->id = DirtyStat.dirty_ring.rates[i].id; + rate->dirty_rate = DirtyStat.dirty_ring.rates[i].dirty_rate; + QAPI_LIST_APPEND(tail, rate); + } + info->vcpu_dirty_rate = head; + } + + if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) { + info->sample_pages = 0; + } + } trace_query_dirty_rate_info(DirtyRateStatus_str(CalculatingState)); return info; } -static void init_dirtyrate_stat(int64_t start_time, int64_t calc_time, - uint64_t sample_pages) +static void init_dirtyrate_stat(int64_t start_time, + struct DirtyRateConfig config) { - DirtyStat.total_dirty_samples = 0; - DirtyStat.total_sample_count = 0; - DirtyStat.total_block_mem_MB = 0; DirtyStat.dirty_rate = -1; DirtyStat.start_time = start_time; - DirtyStat.calc_time = calc_time; - DirtyStat.sample_pages = sample_pages; + DirtyStat.calc_time = config.sample_period_seconds; + DirtyStat.sample_pages = config.sample_pages_per_gigabytes; + + switch (config.mode) { + case DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING: + DirtyStat.page_sampling.total_dirty_samples = 0; + DirtyStat.page_sampling.total_sample_count = 0; + DirtyStat.page_sampling.total_block_mem_MB = 0; + break; + case DIRTY_RATE_MEASURE_MODE_DIRTY_RING: + DirtyStat.dirty_ring.nvcpu = -1; + DirtyStat.dirty_ring.rates = NULL; + break; + default: + break; + } +} + +static void cleanup_dirtyrate_stat(struct DirtyRateConfig config) +{ + /* last calc-dirty-rate qmp use dirty ring mode */ + if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) { + free(DirtyStat.dirty_ring.rates); + DirtyStat.dirty_ring.rates = NULL; + } } static void update_dirtyrate_stat(struct RamblockDirtyInfo *info) { - DirtyStat.total_dirty_samples += info->sample_dirty_count; - DirtyStat.total_sample_count += info->sample_pages_count; + DirtyStat.page_sampling.total_dirty_samples += info->sample_dirty_count; + DirtyStat.page_sampling.total_sample_count += info->sample_pages_count; /* size of total pages in MB */ - DirtyStat.total_block_mem_MB += (info->ramblock_pages * - TARGET_PAGE_SIZE) >> 20; + DirtyStat.page_sampling.total_block_mem_MB += (info->ramblock_pages * + TARGET_PAGE_SIZE) >> 20; } static void update_dirtyrate(uint64_t msec) { uint64_t dirtyrate; - uint64_t total_dirty_samples = DirtyStat.total_dirty_samples; - uint64_t total_sample_count = DirtyStat.total_sample_count; - uint64_t total_block_mem_MB = DirtyStat.total_block_mem_MB; + uint64_t total_dirty_samples = DirtyStat.page_sampling.total_dirty_samples; + uint64_t total_sample_count = DirtyStat.page_sampling.total_sample_count; + uint64_t total_block_mem_MB = DirtyStat.page_sampling.total_block_mem_MB; dirtyrate = total_dirty_samples * total_block_mem_MB * 1000 / (total_sample_count * msec); @@ -327,21 +389,183 @@ static bool compare_page_hash_info(struct RamblockDirtyInfo *info, update_dirtyrate_stat(block_dinfo); } - if (DirtyStat.total_sample_count == 0) { + if (DirtyStat.page_sampling.total_sample_count == 0) { return false; } return true; } -static void calculate_dirtyrate(struct DirtyRateConfig config) +static inline void record_dirtypages(DirtyPageRecord *dirty_pages, + CPUState *cpu, bool start) +{ + if (start) { + dirty_pages[cpu->cpu_index].start_pages = cpu->dirty_pages; + } else { + dirty_pages[cpu->cpu_index].end_pages = cpu->dirty_pages; + } +} + +static void dirtyrate_global_dirty_log_start(void) +{ + qemu_mutex_lock_iothread(); + memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE); + qemu_mutex_unlock_iothread(); +} + +static void dirtyrate_global_dirty_log_stop(void) +{ + qemu_mutex_lock_iothread(); + memory_global_dirty_log_sync(); + memory_global_dirty_log_stop(GLOBAL_DIRTY_DIRTY_RATE); + qemu_mutex_unlock_iothread(); +} + +static int64_t do_calculate_dirtyrate_vcpu(DirtyPageRecord dirty_pages) +{ + uint64_t memory_size_MB; + int64_t time_s; + uint64_t increased_dirty_pages = + dirty_pages.end_pages - dirty_pages.start_pages; + + memory_size_MB = (increased_dirty_pages * TARGET_PAGE_SIZE) >> 20; + time_s = DirtyStat.calc_time; + + return memory_size_MB / time_s; +} + +static inline void record_dirtypages_bitmap(DirtyPageRecord *dirty_pages, + bool start) +{ + if (start) { + dirty_pages->start_pages = total_dirty_pages; + } else { + dirty_pages->end_pages = total_dirty_pages; + } +} + +static void do_calculate_dirtyrate_bitmap(DirtyPageRecord dirty_pages) +{ + DirtyStat.dirty_rate = do_calculate_dirtyrate_vcpu(dirty_pages); +} + +static inline void dirtyrate_manual_reset_protect(void) +{ + RAMBlock *block = NULL; + + WITH_RCU_READ_LOCK_GUARD() { + RAMBLOCK_FOREACH_MIGRATABLE(block) { + memory_region_clear_dirty_bitmap(block->mr, 0, + block->used_length); + } + } +} + +static void calculate_dirtyrate_dirty_bitmap(struct DirtyRateConfig config) +{ + int64_t msec = 0; + int64_t start_time; + DirtyPageRecord dirty_pages; + + qemu_mutex_lock_iothread(); + memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE); + + /* + * 1'round of log sync may return all 1 bits with + * KVM_DIRTY_LOG_INITIALLY_SET enable + * skip it unconditionally and start dirty tracking + * from 2'round of log sync + */ + memory_global_dirty_log_sync(); + + /* + * reset page protect manually and unconditionally. + * this make sure kvm dirty log be cleared if + * KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE cap is enabled. + */ + dirtyrate_manual_reset_protect(); + qemu_mutex_unlock_iothread(); + + record_dirtypages_bitmap(&dirty_pages, true); + + start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + DirtyStat.start_time = start_time / 1000; + + msec = config.sample_period_seconds * 1000; + msec = set_sample_page_period(msec, start_time); + DirtyStat.calc_time = msec / 1000; + + /* + * dirtyrate_global_dirty_log_stop do two things. + * 1. fetch dirty bitmap from kvm + * 2. stop dirty tracking + */ + dirtyrate_global_dirty_log_stop(); + + record_dirtypages_bitmap(&dirty_pages, false); + + do_calculate_dirtyrate_bitmap(dirty_pages); +} + +static void calculate_dirtyrate_dirty_ring(struct DirtyRateConfig config) +{ + CPUState *cpu; + int64_t msec = 0; + int64_t start_time; + uint64_t dirtyrate = 0; + uint64_t dirtyrate_sum = 0; + DirtyPageRecord *dirty_pages; + int nvcpu = 0; + int i = 0; + + CPU_FOREACH(cpu) { + nvcpu++; + } + + dirty_pages = malloc(sizeof(*dirty_pages) * nvcpu); + + DirtyStat.dirty_ring.nvcpu = nvcpu; + DirtyStat.dirty_ring.rates = malloc(sizeof(DirtyRateVcpu) * nvcpu); + + dirtyrate_global_dirty_log_start(); + + CPU_FOREACH(cpu) { + record_dirtypages(dirty_pages, cpu, true); + } + + start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + DirtyStat.start_time = start_time / 1000; + + msec = config.sample_period_seconds * 1000; + msec = set_sample_page_period(msec, start_time); + DirtyStat.calc_time = msec / 1000; + + dirtyrate_global_dirty_log_stop(); + + CPU_FOREACH(cpu) { + record_dirtypages(dirty_pages, cpu, false); + } + + for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) { + dirtyrate = do_calculate_dirtyrate_vcpu(dirty_pages[i]); + trace_dirtyrate_do_calculate_vcpu(i, dirtyrate); + + DirtyStat.dirty_ring.rates[i].id = i; + DirtyStat.dirty_ring.rates[i].dirty_rate = dirtyrate; + dirtyrate_sum += dirtyrate; + } + + DirtyStat.dirty_rate = dirtyrate_sum; + free(dirty_pages); +} + +static void calculate_dirtyrate_sample_vm(struct DirtyRateConfig config) { struct RamblockDirtyInfo *block_dinfo = NULL; int block_count = 0; int64_t msec = 0; int64_t initial_time; - rcu_register_thread(); rcu_read_lock(); initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); if (!record_ramblock_hash_info(&block_dinfo, config, &block_count)) { @@ -364,16 +588,26 @@ static void calculate_dirtyrate(struct DirtyRateConfig config) out: rcu_read_unlock(); free_ramblock_dirty_info(block_dinfo, block_count); - rcu_unregister_thread(); +} + +static void calculate_dirtyrate(struct DirtyRateConfig config) +{ + if (config.mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) { + calculate_dirtyrate_dirty_bitmap(config); + } else if (config.mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) { + calculate_dirtyrate_dirty_ring(config); + } else { + calculate_dirtyrate_sample_vm(config); + } + + trace_dirtyrate_calculate(DirtyStat.dirty_rate); } void *get_dirtyrate_thread(void *arg) { struct DirtyRateConfig config = *(struct DirtyRateConfig *)arg; int ret; - int64_t start_time; - int64_t calc_time; - uint64_t sample_pages; + rcu_register_thread(); ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_UNSTARTED, DIRTY_RATE_STATUS_MEASURING); @@ -382,11 +616,6 @@ void *get_dirtyrate_thread(void *arg) return NULL; } - start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000; - calc_time = config.sample_period_seconds; - sample_pages = config.sample_pages_per_gigabytes; - init_dirtyrate_stat(start_time, calc_time, sample_pages); - calculate_dirtyrate(config); ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_MEASURING, @@ -394,15 +623,22 @@ void *get_dirtyrate_thread(void *arg) if (ret == -1) { error_report("change dirtyrate state failed."); } + + rcu_unregister_thread(); return NULL; } -void qmp_calc_dirty_rate(int64_t calc_time, bool has_sample_pages, - int64_t sample_pages, Error **errp) +void qmp_calc_dirty_rate(int64_t calc_time, + bool has_sample_pages, + int64_t sample_pages, + bool has_mode, + DirtyRateMeasureMode mode, + Error **errp) { static struct DirtyRateConfig config; QemuThread thread; int ret; + int64_t start_time; /* * If the dirty rate is already being measured, don't attempt to start. @@ -419,6 +655,15 @@ void qmp_calc_dirty_rate(int64_t calc_time, bool has_sample_pages, return; } + if (!has_mode) { + mode = DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING; + } + + if (has_sample_pages && mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) { + error_setg(errp, "either sample-pages or dirty-ring can be specified."); + return; + } + if (has_sample_pages) { if (!is_sample_pages_valid(sample_pages)) { error_setg(errp, "sample-pages is out of range[%d, %d].", @@ -431,6 +676,19 @@ void qmp_calc_dirty_rate(int64_t calc_time, bool has_sample_pages, } /* + * dirty ring mode only works when kvm dirty ring is enabled. + * on the contrary, dirty bitmap mode is not. + */ + if (((mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) && + !kvm_dirty_ring_enabled()) || + ((mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) && + kvm_dirty_ring_enabled())) { + error_setg(errp, "mode %s is not enabled, use other method instead.", + DirtyRateMeasureMode_str(mode)); + return; + } + + /* * Init calculation state as unstarted. */ ret = dirtyrate_set_state(&CalculatingState, CalculatingState, @@ -442,6 +700,19 @@ void qmp_calc_dirty_rate(int64_t calc_time, bool has_sample_pages, config.sample_period_seconds = calc_time; config.sample_pages_per_gigabytes = sample_pages; + config.mode = mode; + + cleanup_dirtyrate_stat(config); + + /* + * update dirty rate mode so that we can figure out what mode has + * been used in last calculation + **/ + dirtyrate_mode = mode; + + start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000; + init_dirtyrate_stat(start_time, config); + qemu_thread_create(&thread, "get_dirtyrate", get_dirtyrate_thread, (void *)&config, QEMU_THREAD_DETACHED); } @@ -463,12 +734,24 @@ void hmp_info_dirty_rate(Monitor *mon, const QDict *qdict) info->sample_pages); monitor_printf(mon, "Period: %"PRIi64" (sec)\n", info->calc_time); + monitor_printf(mon, "Mode: %s\n", + DirtyRateMeasureMode_str(info->mode)); monitor_printf(mon, "Dirty rate: "); if (info->has_dirty_rate) { monitor_printf(mon, "%"PRIi64" (MB/s)\n", info->dirty_rate); + if (info->has_vcpu_dirty_rate) { + DirtyRateVcpuList *rate, *head = info->vcpu_dirty_rate; + for (rate = head; rate != NULL; rate = rate->next) { + monitor_printf(mon, "vcpu[%"PRIi64"], Dirty rate: %"PRIi64 + " (MB/s)\n", rate->value->id, + rate->value->dirty_rate); + } + } } else { monitor_printf(mon, "(not ready)\n"); } + + qapi_free_DirtyRateVcpuList(info->vcpu_dirty_rate); g_free(info); } @@ -477,6 +760,9 @@ void hmp_calc_dirty_rate(Monitor *mon, const QDict *qdict) int64_t sec = qdict_get_try_int(qdict, "second", 0); int64_t sample_pages = qdict_get_try_int(qdict, "sample_pages_per_GB", -1); bool has_sample_pages = (sample_pages != -1); + bool dirty_ring = qdict_get_try_bool(qdict, "dirty_ring", false); + bool dirty_bitmap = qdict_get_try_bool(qdict, "dirty_bitmap", false); + DirtyRateMeasureMode mode = DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING; Error *err = NULL; if (!sec) { @@ -484,7 +770,20 @@ void hmp_calc_dirty_rate(Monitor *mon, const QDict *qdict) return; } - qmp_calc_dirty_rate(sec, has_sample_pages, sample_pages, &err); + if (dirty_ring && dirty_bitmap) { + monitor_printf(mon, "Either dirty ring or dirty bitmap " + "can be specified!\n"); + return; + } + + if (dirty_bitmap) { + mode = DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP; + } else if (dirty_ring) { + mode = DIRTY_RATE_MEASURE_MODE_DIRTY_RING; + } + + qmp_calc_dirty_rate(sec, has_sample_pages, sample_pages, true, + mode, &err); if (err) { hmp_handle_error(mon, err); return; diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h index e1fd29089e..69d4c5b865 100644 --- a/migration/dirtyrate.h +++ b/migration/dirtyrate.h @@ -43,6 +43,7 @@ struct DirtyRateConfig { uint64_t sample_pages_per_gigabytes; /* sample pages per GB */ int64_t sample_period_seconds; /* time duration between two sampling */ + DirtyRateMeasureMode mode; /* mode of dirtyrate measurement */ }; /* @@ -58,17 +59,29 @@ struct RamblockDirtyInfo { uint32_t *hash_result; /* array of hash result for sampled pages */ }; +typedef struct SampleVMStat { + uint64_t total_dirty_samples; /* total dirty sampled page */ + uint64_t total_sample_count; /* total sampled pages */ + uint64_t total_block_mem_MB; /* size of total sampled pages in MB */ +} SampleVMStat; + +typedef struct VcpuStat { + int nvcpu; /* number of vcpu */ + DirtyRateVcpu *rates; /* array of dirty rate for each vcpu */ +} VcpuStat; + /* * Store calculation statistics for each measure. */ struct DirtyRateStat { - uint64_t total_dirty_samples; /* total dirty sampled page */ - uint64_t total_sample_count; /* total sampled pages */ - uint64_t total_block_mem_MB; /* size of total sampled pages in MB */ int64_t dirty_rate; /* dirty rate in MB/s */ int64_t start_time; /* calculation start time in units of second */ int64_t calc_time; /* time duration of two sampling in units of second */ uint64_t sample_pages; /* sample pages per GB */ + union { + SampleVMStat page_sampling; + VcpuStat dirty_ring; + }; }; void *get_dirtyrate_thread(void *arg); diff --git a/migration/migration.c b/migration/migration.c index 9172686b89..53b9a8af96 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -391,7 +391,7 @@ int migrate_send_rp_message_req_pages(MigrationIncomingState *mis, int migrate_send_rp_req_pages(MigrationIncomingState *mis, RAMBlock *rb, ram_addr_t start, uint64_t haddr) { - void *aligned = (void *)(uintptr_t)(haddr & (-qemu_ram_pagesize(rb))); + void *aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, qemu_ram_pagesize(rb)); bool received = false; WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) { @@ -2049,6 +2049,20 @@ void migrate_init(MigrationState *s) s->threshold_size = 0; } +int migrate_add_blocker_internal(Error *reason, Error **errp) +{ + /* Snapshots are similar to migrations, so check RUN_STATE_SAVE_VM too. */ + if (runstate_check(RUN_STATE_SAVE_VM) || !migration_is_idle()) { + error_propagate_prepend(errp, error_copy(reason), + "disallowing migration blocker " + "(migration/snapshot in progress) for: "); + return -EBUSY; + } + + migration_blockers = g_slist_prepend(migration_blockers, reason); + return 0; +} + int migrate_add_blocker(Error *reason, Error **errp) { if (only_migratable) { @@ -2058,15 +2072,7 @@ int migrate_add_blocker(Error *reason, Error **errp) return -EACCES; } - if (migration_is_idle()) { - migration_blockers = g_slist_prepend(migration_blockers, reason); - return 0; - } - - error_propagate_prepend(errp, error_copy(reason), - "disallowing migration blocker " - "(migration in progress) for: "); - return -EBUSY; + return migrate_add_blocker_internal(reason, errp); } void migrate_del_blocker(Error *reason) @@ -2631,8 +2637,8 @@ static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname, * Since we currently insist on matching page sizes, just sanity check * we're being asked for whole host pages. */ - if (start & (our_host_ps - 1) || - (len & (our_host_ps - 1))) { + if (!QEMU_IS_ALIGNED(start, our_host_ps) || + !QEMU_IS_ALIGNED(len, our_host_ps)) { error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT " len: %zd", __func__, start, len); mark_source_rp_bad(ms); diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 2e9697bdd2..e721f69d0f 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -402,7 +402,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) strerror(errno)); goto out; } - g_assert(((size_t)testarea & (pagesize - 1)) == 0); + g_assert(QEMU_PTR_IS_ALIGNED(testarea, pagesize)); reg_struct.range.start = (uintptr_t)testarea; reg_struct.range.len = pagesize; @@ -660,7 +660,7 @@ int postcopy_wake_shared(struct PostCopyFD *pcfd, struct uffdio_range range; int ret; trace_postcopy_wake_shared(client_addr, qemu_ram_get_idstr(rb)); - range.start = client_addr & ~(pagesize - 1); + range.start = ROUND_DOWN(client_addr, pagesize); range.len = pagesize; ret = ioctl(pcfd->fd, UFFDIO_WAKE, &range); if (ret) { @@ -671,6 +671,29 @@ int postcopy_wake_shared(struct PostCopyFD *pcfd, return ret; } +static int postcopy_request_page(MigrationIncomingState *mis, RAMBlock *rb, + ram_addr_t start, uint64_t haddr) +{ + void *aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, qemu_ram_pagesize(rb)); + + /* + * Discarded pages (via RamDiscardManager) are never migrated. On unlikely + * access, place a zeropage, which will also set the relevant bits in the + * recv_bitmap accordingly, so we won't try placing a zeropage twice. + * + * Checking a single bit is sufficient to handle pagesize > TPS as either + * all relevant bits are set or not. + */ + assert(QEMU_IS_ALIGNED(start, qemu_ram_pagesize(rb))); + if (ramblock_page_is_discarded(rb, start)) { + bool received = ramblock_recv_bitmap_test_byte_offset(rb, start); + + return received ? 0 : postcopy_place_page_zero(mis, aligned, rb); + } + + return migrate_send_rp_req_pages(mis, rb, start, haddr); +} + /* * Callback from shared fault handlers to ask for a page, * the page must be specified by a RAMBlock and an offset in that rb @@ -679,8 +702,7 @@ int postcopy_wake_shared(struct PostCopyFD *pcfd, int postcopy_request_shared_page(struct PostCopyFD *pcfd, RAMBlock *rb, uint64_t client_addr, uint64_t rb_offset) { - size_t pagesize = qemu_ram_pagesize(rb); - uint64_t aligned_rbo = rb_offset & ~(pagesize - 1); + uint64_t aligned_rbo = ROUND_DOWN(rb_offset, qemu_ram_pagesize(rb)); MigrationIncomingState *mis = migration_incoming_get_current(); trace_postcopy_request_shared_page(pcfd->idstr, qemu_ram_get_idstr(rb), @@ -690,7 +712,7 @@ int postcopy_request_shared_page(struct PostCopyFD *pcfd, RAMBlock *rb, qemu_ram_get_idstr(rb), rb_offset); return postcopy_wake_shared(pcfd, client_addr, rb); } - migrate_send_rp_req_pages(mis, rb, aligned_rbo, client_addr); + postcopy_request_page(mis, rb, aligned_rbo, client_addr); return 0; } @@ -970,7 +992,7 @@ static void *postcopy_ram_fault_thread(void *opaque) break; } - rb_offset &= ~(qemu_ram_pagesize(rb) - 1); + rb_offset = ROUND_DOWN(rb_offset, qemu_ram_pagesize(rb)); trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address, qemu_ram_get_idstr(rb), rb_offset, @@ -984,8 +1006,8 @@ retry: * Send the request to the source - we want to request one * of our host page sizes (which is >= TPS) */ - ret = migrate_send_rp_req_pages(mis, rb, rb_offset, - msg.arg.pagefault.address); + ret = postcopy_request_page(mis, rb, rb_offset, + msg.arg.pagefault.address); if (ret) { /* May be network failure, try to wait for recovery */ if (ret == -EIO && postcopy_pause_fault_thread(mis)) { @@ -993,7 +1015,7 @@ retry: goto retry; } else { /* This is a unavoidable fault */ - error_report("%s: migrate_send_rp_req_pages() get %d", + error_report("%s: postcopy_request_page() get %d", __func__, ret); break; } diff --git a/migration/ram.c b/migration/ram.c index bb908822d5..680a5158aa 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -811,7 +811,7 @@ static void migration_clear_memory_region_dirty_bitmap(RAMBlock *rb, assert(shift >= 6); size = 1ULL << (TARGET_PAGE_BITS + shift); - start = (((ram_addr_t)page) << TARGET_PAGE_BITS) & (-size); + start = QEMU_ALIGN_DOWN((ram_addr_t)page << TARGET_PAGE_BITS, size); trace_migration_bitmap_clear_dirty(rb->idstr, start, size, page); memory_region_clear_dirty_bitmap(rb->mr, start, size); } @@ -858,6 +858,81 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs, return ret; } +static void dirty_bitmap_clear_section(MemoryRegionSection *section, + void *opaque) +{ + const hwaddr offset = section->offset_within_region; + const hwaddr size = int128_get64(section->size); + const unsigned long start = offset >> TARGET_PAGE_BITS; + const unsigned long npages = size >> TARGET_PAGE_BITS; + RAMBlock *rb = section->mr->ram_block; + uint64_t *cleared_bits = opaque; + + /* + * We don't grab ram_state->bitmap_mutex because we expect to run + * only when starting migration or during postcopy recovery where + * we don't have concurrent access. + */ + if (!migration_in_postcopy() && !migrate_background_snapshot()) { + migration_clear_memory_region_dirty_bitmap_range(rb, start, npages); + } + *cleared_bits += bitmap_count_one_with_offset(rb->bmap, start, npages); + bitmap_clear(rb->bmap, start, npages); +} + +/* + * Exclude all dirty pages from migration that fall into a discarded range as + * managed by a RamDiscardManager responsible for the mapped memory region of + * the RAMBlock. Clear the corresponding bits in the dirty bitmaps. + * + * Discarded pages ("logically unplugged") have undefined content and must + * not get migrated, because even reading these pages for migration might + * result in undesired behavior. + * + * Returns the number of cleared bits in the RAMBlock dirty bitmap. + * + * Note: The result is only stable while migrating (precopy/postcopy). + */ +static uint64_t ramblock_dirty_bitmap_clear_discarded_pages(RAMBlock *rb) +{ + uint64_t cleared_bits = 0; + + if (rb->mr && rb->bmap && memory_region_has_ram_discard_manager(rb->mr)) { + RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr); + MemoryRegionSection section = { + .mr = rb->mr, + .offset_within_region = 0, + .size = int128_make64(qemu_ram_get_used_length(rb)), + }; + + ram_discard_manager_replay_discarded(rdm, §ion, + dirty_bitmap_clear_section, + &cleared_bits); + } + return cleared_bits; +} + +/* + * Check if a host-page aligned page falls into a discarded range as managed by + * a RamDiscardManager responsible for the mapped memory region of the RAMBlock. + * + * Note: The result is only stable while migrating (precopy/postcopy). + */ +bool ramblock_page_is_discarded(RAMBlock *rb, ram_addr_t start) +{ + if (rb->mr && memory_region_has_ram_discard_manager(rb->mr)) { + RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr); + MemoryRegionSection section = { + .mr = rb->mr, + .offset_within_region = start, + .size = int128_make64(qemu_ram_pagesize(rb)), + }; + + return !ram_discard_manager_is_populated(rdm, §ion); + } + return false; +} + /* Called with RCU critical section */ static void ramblock_sync_dirty_bitmap(RAMState *rs, RAMBlock *rb) { @@ -1564,25 +1639,68 @@ out: return ret; } +static inline void populate_read_range(RAMBlock *block, ram_addr_t offset, + ram_addr_t size) +{ + /* + * We read one byte of each page; this will preallocate page tables if + * required and populate the shared zeropage on MAP_PRIVATE anonymous memory + * where no page was populated yet. This might require adaption when + * supporting other mappings, like shmem. + */ + for (; offset < size; offset += block->page_size) { + char tmp = *((char *)block->host + offset); + + /* Don't optimize the read out */ + asm volatile("" : "+r" (tmp)); + } +} + +static inline int populate_read_section(MemoryRegionSection *section, + void *opaque) +{ + const hwaddr size = int128_get64(section->size); + hwaddr offset = section->offset_within_region; + RAMBlock *block = section->mr->ram_block; + + populate_read_range(block, offset, size); + return 0; +} + /* - * ram_block_populate_pages: populate memory in the RAM block by reading - * an integer from the beginning of each page. + * ram_block_populate_read: preallocate page tables and populate pages in the + * RAM block by reading a byte of each page. * * Since it's solely used for userfault_fd WP feature, here we just * hardcode page size to qemu_real_host_page_size. * * @block: RAM block to populate */ -static void ram_block_populate_pages(RAMBlock *block) +static void ram_block_populate_read(RAMBlock *rb) { - char *ptr = (char *) block->host; - - for (ram_addr_t offset = 0; offset < block->used_length; - offset += qemu_real_host_page_size) { - char tmp = *(ptr + offset); - - /* Don't optimize the read out */ - asm volatile("" : "+r" (tmp)); + /* + * Skip populating all pages that fall into a discarded range as managed by + * a RamDiscardManager responsible for the mapped memory region of the + * RAMBlock. Such discarded ("logically unplugged") parts of a RAMBlock + * must not get populated automatically. We don't have to track + * modifications via userfaultfd WP reliably, because these pages will + * not be part of the migration stream either way -- see + * ramblock_dirty_bitmap_exclude_discarded_pages(). + * + * Note: The result is only stable while migrating (precopy/postcopy). + */ + if (rb->mr && memory_region_has_ram_discard_manager(rb->mr)) { + RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr); + MemoryRegionSection section = { + .mr = rb->mr, + .offset_within_region = 0, + .size = rb->mr->size, + }; + + ram_discard_manager_replay_populated(rdm, §ion, + populate_read_section, NULL); + } else { + populate_read_range(rb, 0, rb->used_length); } } @@ -1609,7 +1727,7 @@ void ram_write_tracking_prepare(void) * UFFDIO_WRITEPROTECT_MODE_WP mode setting would silently skip * pages with pte_none() entries in page table. */ - ram_block_populate_pages(block); + ram_block_populate_read(block); } } @@ -2216,7 +2334,14 @@ static void ram_save_cleanup(void *opaque) /* caller have hold iothread lock or is in a bh, so there is * no writing race against the migration bitmap */ - memory_global_dirty_log_stop(); + if (global_dirty_tracking & GLOBAL_DIRTY_MIGRATION) { + /* + * do not stop dirty log without starting it, since + * memory_global_dirty_log_stop will assert that + * memory_global_dirty_log_start/stop used in pairs + */ + memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION); + } } RAMBLOCK_FOREACH_NOT_IGNORED(block) { @@ -2668,6 +2793,19 @@ static void ram_list_init_bitmaps(void) } } +static void migration_bitmap_clear_discarded_pages(RAMState *rs) +{ + unsigned long pages; + RAMBlock *rb; + + RCU_READ_LOCK_GUARD(); + + RAMBLOCK_FOREACH_NOT_IGNORED(rb) { + pages = ramblock_dirty_bitmap_clear_discarded_pages(rb); + rs->migration_dirty_pages -= pages; + } +} + static void ram_init_bitmaps(RAMState *rs) { /* For memory_global_dirty_log_start below. */ @@ -2678,12 +2816,18 @@ static void ram_init_bitmaps(RAMState *rs) ram_list_init_bitmaps(); /* We don't use dirty log with background snapshots */ if (!migrate_background_snapshot()) { - memory_global_dirty_log_start(); + memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION); migration_bitmap_sync_precopy(rs); } } qemu_mutex_unlock_ramlist(); qemu_mutex_unlock_iothread(); + + /* + * After an eventual first bitmap sync, fixup the initial bitmap + * containing all 1s to exclude any discarded pages from migration. + */ + migration_bitmap_clear_discarded_pages(rs); } static int ram_init_all(RAMState **rsp) @@ -3434,7 +3578,7 @@ void colo_incoming_start_dirty_log(void) /* Discard this dirty bitmap record */ bitmap_zero(block->bmap, block->max_length >> TARGET_PAGE_BITS); } - memory_global_dirty_log_start(); + memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION); } ram_state->migration_dirty_pages = 0; qemu_mutex_unlock_ramlist(); @@ -3446,7 +3590,7 @@ void colo_release_ram_cache(void) { RAMBlock *block; - memory_global_dirty_log_stop(); + memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION); RAMBLOCK_FOREACH_NOT_IGNORED(block) { g_free(block->bmap); block->bmap = NULL; @@ -4112,6 +4256,10 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block) */ bitmap_complement(block->bmap, block->bmap, nbits); + /* Clear dirty bits of discarded ranges that we don't want to migrate. */ + ramblock_dirty_bitmap_clear_discarded_pages(block); + + /* We'll recalculate migration_dirty_pages in ram_state_resume_prepare(). */ trace_ram_dirty_bitmap_reload_complete(block->idstr); /* diff --git a/migration/ram.h b/migration/ram.h index 4833e9fd5b..dda1988f3d 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -72,6 +72,7 @@ void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr, size_t nr); int64_t ramblock_recv_bitmap_send(QEMUFile *file, const char *block_name); int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *rb); +bool ramblock_page_is_discarded(RAMBlock *rb, ram_addr_t start); /* ram cache */ int colo_init_ram_cache(void); diff --git a/migration/rdma.c b/migration/rdma.c index 2a3c7889b9..f5d3bbe7e9 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -358,9 +358,11 @@ typedef struct RDMAContext { struct ibv_context *verbs; struct rdma_event_channel *channel; struct ibv_qp *qp; /* queue pair */ - struct ibv_comp_channel *comp_channel; /* completion channel */ + struct ibv_comp_channel *recv_comp_channel; /* recv completion channel */ + struct ibv_comp_channel *send_comp_channel; /* send completion channel */ struct ibv_pd *pd; /* protection domain */ - struct ibv_cq *cq; /* completion queue */ + struct ibv_cq *recv_cq; /* recvieve completion queue */ + struct ibv_cq *send_cq; /* send completion queue */ /* * If a previous write failed (perhaps because of a failed @@ -1059,21 +1061,34 @@ static int qemu_rdma_alloc_pd_cq(RDMAContext *rdma) return -1; } - /* create completion channel */ - rdma->comp_channel = ibv_create_comp_channel(rdma->verbs); - if (!rdma->comp_channel) { - error_report("failed to allocate completion channel"); + /* create receive completion channel */ + rdma->recv_comp_channel = ibv_create_comp_channel(rdma->verbs); + if (!rdma->recv_comp_channel) { + error_report("failed to allocate receive completion channel"); goto err_alloc_pd_cq; } /* - * Completion queue can be filled by both read and write work requests, - * so must reflect the sum of both possible queue sizes. + * Completion queue can be filled by read work requests. */ - rdma->cq = ibv_create_cq(rdma->verbs, (RDMA_SIGNALED_SEND_MAX * 3), - NULL, rdma->comp_channel, 0); - if (!rdma->cq) { - error_report("failed to allocate completion queue"); + rdma->recv_cq = ibv_create_cq(rdma->verbs, (RDMA_SIGNALED_SEND_MAX * 3), + NULL, rdma->recv_comp_channel, 0); + if (!rdma->recv_cq) { + error_report("failed to allocate receive completion queue"); + goto err_alloc_pd_cq; + } + + /* create send completion channel */ + rdma->send_comp_channel = ibv_create_comp_channel(rdma->verbs); + if (!rdma->send_comp_channel) { + error_report("failed to allocate send completion channel"); + goto err_alloc_pd_cq; + } + + rdma->send_cq = ibv_create_cq(rdma->verbs, (RDMA_SIGNALED_SEND_MAX * 3), + NULL, rdma->send_comp_channel, 0); + if (!rdma->send_cq) { + error_report("failed to allocate send completion queue"); goto err_alloc_pd_cq; } @@ -1083,11 +1098,19 @@ err_alloc_pd_cq: if (rdma->pd) { ibv_dealloc_pd(rdma->pd); } - if (rdma->comp_channel) { - ibv_destroy_comp_channel(rdma->comp_channel); + if (rdma->recv_comp_channel) { + ibv_destroy_comp_channel(rdma->recv_comp_channel); + } + if (rdma->send_comp_channel) { + ibv_destroy_comp_channel(rdma->send_comp_channel); + } + if (rdma->recv_cq) { + ibv_destroy_cq(rdma->recv_cq); + rdma->recv_cq = NULL; } rdma->pd = NULL; - rdma->comp_channel = NULL; + rdma->recv_comp_channel = NULL; + rdma->send_comp_channel = NULL; return -1; } @@ -1104,8 +1127,8 @@ static int qemu_rdma_alloc_qp(RDMAContext *rdma) attr.cap.max_recv_wr = 3; attr.cap.max_send_sge = 1; attr.cap.max_recv_sge = 1; - attr.send_cq = rdma->cq; - attr.recv_cq = rdma->cq; + attr.send_cq = rdma->send_cq; + attr.recv_cq = rdma->recv_cq; attr.qp_type = IBV_QPT_RC; ret = rdma_create_qp(rdma->cm_id, rdma->pd, &attr); @@ -1496,14 +1519,14 @@ static void qemu_rdma_signal_unregister(RDMAContext *rdma, uint64_t index, * (of any kind) has completed. * Return the work request ID that completed. */ -static uint64_t qemu_rdma_poll(RDMAContext *rdma, uint64_t *wr_id_out, - uint32_t *byte_len) +static uint64_t qemu_rdma_poll(RDMAContext *rdma, struct ibv_cq *cq, + uint64_t *wr_id_out, uint32_t *byte_len) { int ret; struct ibv_wc wc; uint64_t wr_id; - ret = ibv_poll_cq(rdma->cq, 1, &wc); + ret = ibv_poll_cq(cq, 1, &wc); if (!ret) { *wr_id_out = RDMA_WRID_NONE; @@ -1575,7 +1598,8 @@ static uint64_t qemu_rdma_poll(RDMAContext *rdma, uint64_t *wr_id_out, /* Wait for activity on the completion channel. * Returns 0 on success, none-0 on error. */ -static int qemu_rdma_wait_comp_channel(RDMAContext *rdma) +static int qemu_rdma_wait_comp_channel(RDMAContext *rdma, + struct ibv_comp_channel *comp_channel) { struct rdma_cm_event *cm_event; int ret = -1; @@ -1586,7 +1610,7 @@ static int qemu_rdma_wait_comp_channel(RDMAContext *rdma) */ if (rdma->migration_started_on_destination && migration_incoming_get_current()->state == MIGRATION_STATUS_ACTIVE) { - yield_until_fd_readable(rdma->comp_channel->fd); + yield_until_fd_readable(comp_channel->fd); } else { /* This is the source side, we're in a separate thread * or destination prior to migration_fd_process_incoming() @@ -1597,7 +1621,7 @@ static int qemu_rdma_wait_comp_channel(RDMAContext *rdma) */ while (!rdma->error_state && !rdma->received_error) { GPollFD pfds[2]; - pfds[0].fd = rdma->comp_channel->fd; + pfds[0].fd = comp_channel->fd; pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR; pfds[0].revents = 0; @@ -1655,6 +1679,17 @@ static int qemu_rdma_wait_comp_channel(RDMAContext *rdma) return rdma->error_state; } +static struct ibv_comp_channel *to_channel(RDMAContext *rdma, int wrid) +{ + return wrid < RDMA_WRID_RECV_CONTROL ? rdma->send_comp_channel : + rdma->recv_comp_channel; +} + +static struct ibv_cq *to_cq(RDMAContext *rdma, int wrid) +{ + return wrid < RDMA_WRID_RECV_CONTROL ? rdma->send_cq : rdma->recv_cq; +} + /* * Block until the next work request has completed. * @@ -1675,13 +1710,15 @@ static int qemu_rdma_block_for_wrid(RDMAContext *rdma, int wrid_requested, struct ibv_cq *cq; void *cq_ctx; uint64_t wr_id = RDMA_WRID_NONE, wr_id_in; + struct ibv_comp_channel *ch = to_channel(rdma, wrid_requested); + struct ibv_cq *poll_cq = to_cq(rdma, wrid_requested); - if (ibv_req_notify_cq(rdma->cq, 0)) { + if (ibv_req_notify_cq(poll_cq, 0)) { return -1; } /* poll cq first */ while (wr_id != wrid_requested) { - ret = qemu_rdma_poll(rdma, &wr_id_in, byte_len); + ret = qemu_rdma_poll(rdma, poll_cq, &wr_id_in, byte_len); if (ret < 0) { return ret; } @@ -1702,12 +1739,12 @@ static int qemu_rdma_block_for_wrid(RDMAContext *rdma, int wrid_requested, } while (1) { - ret = qemu_rdma_wait_comp_channel(rdma); + ret = qemu_rdma_wait_comp_channel(rdma, ch); if (ret) { goto err_block_for_wrid; } - ret = ibv_get_cq_event(rdma->comp_channel, &cq, &cq_ctx); + ret = ibv_get_cq_event(ch, &cq, &cq_ctx); if (ret) { perror("ibv_get_cq_event"); goto err_block_for_wrid; @@ -1721,7 +1758,7 @@ static int qemu_rdma_block_for_wrid(RDMAContext *rdma, int wrid_requested, } while (wr_id != wrid_requested) { - ret = qemu_rdma_poll(rdma, &wr_id_in, byte_len); + ret = qemu_rdma_poll(rdma, poll_cq, &wr_id_in, byte_len); if (ret < 0) { goto err_block_for_wrid; } @@ -2437,13 +2474,21 @@ static void qemu_rdma_cleanup(RDMAContext *rdma) rdma_destroy_qp(rdma->cm_id); rdma->qp = NULL; } - if (rdma->cq) { - ibv_destroy_cq(rdma->cq); - rdma->cq = NULL; + if (rdma->recv_cq) { + ibv_destroy_cq(rdma->recv_cq); + rdma->recv_cq = NULL; + } + if (rdma->send_cq) { + ibv_destroy_cq(rdma->send_cq); + rdma->send_cq = NULL; + } + if (rdma->recv_comp_channel) { + ibv_destroy_comp_channel(rdma->recv_comp_channel); + rdma->recv_comp_channel = NULL; } - if (rdma->comp_channel) { - ibv_destroy_comp_channel(rdma->comp_channel); - rdma->comp_channel = NULL; + if (rdma->send_comp_channel) { + ibv_destroy_comp_channel(rdma->send_comp_channel); + rdma->send_comp_channel = NULL; } if (rdma->pd) { ibv_dealloc_pd(rdma->pd); @@ -3115,10 +3160,14 @@ static void qio_channel_rdma_set_aio_fd_handler(QIOChannel *ioc, { QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); if (io_read) { - aio_set_fd_handler(ctx, rioc->rdmain->comp_channel->fd, + aio_set_fd_handler(ctx, rioc->rdmain->recv_comp_channel->fd, + false, io_read, io_write, NULL, opaque); + aio_set_fd_handler(ctx, rioc->rdmain->send_comp_channel->fd, false, io_read, io_write, NULL, opaque); } else { - aio_set_fd_handler(ctx, rioc->rdmaout->comp_channel->fd, + aio_set_fd_handler(ctx, rioc->rdmaout->recv_comp_channel->fd, + false, io_read, io_write, NULL, opaque); + aio_set_fd_handler(ctx, rioc->rdmaout->send_comp_channel->fd, false, io_read, io_write, NULL, opaque); } } @@ -3332,7 +3381,22 @@ static size_t qemu_rdma_save_page(QEMUFile *f, void *opaque, */ while (1) { uint64_t wr_id, wr_id_in; - int ret = qemu_rdma_poll(rdma, &wr_id_in, NULL); + int ret = qemu_rdma_poll(rdma, rdma->recv_cq, &wr_id_in, NULL); + if (ret < 0) { + error_report("rdma migration: polling error! %d", ret); + goto err; + } + + wr_id = wr_id_in & RDMA_WRID_TYPE_MASK; + + if (wr_id == RDMA_WRID_NONE) { + break; + } + } + + while (1) { + uint64_t wr_id, wr_id_in; + int ret = qemu_rdma_poll(rdma, rdma->send_cq, &wr_id_in, NULL); if (ret < 0) { error_report("rdma migration: polling error! %d", ret); goto err; diff --git a/migration/trace-events b/migration/trace-events index a8ae163707..b48d873b8a 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -333,6 +333,8 @@ get_ramblock_vfn_hash(const char *idstr, uint64_t vfn, uint32_t crc) "ramblock n calc_page_dirty_rate(const char *idstr, uint32_t new_crc, uint32_t old_crc) "ramblock name: %s, new crc: %" PRIu32 ", old crc: %" PRIu32 skip_sample_ramblock(const char *idstr, uint64_t ramblock_size) "ramblock name: %s, ramblock size: %" PRIu64 find_page_matched(const char *idstr) "ramblock %s addr or size changed" +dirtyrate_calculate(int64_t dirtyrate) "dirty rate: %" PRIi64 " MB/s" +dirtyrate_do_calculate_vcpu(int idx, uint64_t rate) "vcpu[%d]: %"PRIu64 " MB/s" # block.c migration_block_init_shared(const char *blk_device_name) "Start migration for %s with shared base image" diff --git a/pc-bios/openbios-ppc b/pc-bios/openbios-ppc Binary files differindex 91a73db9a3..67f32a8602 100644 --- a/pc-bios/openbios-ppc +++ b/pc-bios/openbios-ppc diff --git a/pc-bios/openbios-sparc32 b/pc-bios/openbios-sparc32 Binary files differindex a5b7389191..376b01c10b 100644 --- a/pc-bios/openbios-sparc32 +++ b/pc-bios/openbios-sparc32 diff --git a/pc-bios/openbios-sparc64 b/pc-bios/openbios-sparc64 Binary files differindex f7a501efc6..bbd746fde9 100644 --- a/pc-bios/openbios-sparc64 +++ b/pc-bios/openbios-sparc64 diff --git a/qapi/migration.json b/qapi/migration.json index 9aa8bc5759..87146ceea2 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -1732,6 +1732,21 @@ 'data': { 'device-id': 'str' } } ## +# @DirtyRateVcpu: +# +# Dirty rate of vcpu. +# +# @id: vcpu index. +# +# @dirty-rate: dirty rate. +# +# Since: 6.1 +# +## +{ 'struct': 'DirtyRateVcpu', + 'data': { 'id': 'int', 'dirty-rate': 'int64' } } + +## # @DirtyRateStatus: # # An enumeration of dirtyrate status. @@ -1749,6 +1764,23 @@ 'data': [ 'unstarted', 'measuring', 'measured'] } ## +# @DirtyRateMeasureMode: +# +# An enumeration of mode of measuring dirtyrate. +# +# @page-sampling: calculate dirtyrate by sampling pages. +# +# @dirty-ring: calculate dirtyrate by dirty ring. +# +# @dirty-bitmap: calculate dirtyrate by dirty bitmap. +# +# Since: 6.1 +# +## +{ 'enum': 'DirtyRateMeasureMode', + 'data': ['page-sampling', 'dirty-ring', 'dirty-bitmap'] } + +## # @DirtyRateInfo: # # Information about current dirty page rate of vm. @@ -1766,6 +1798,12 @@ # @sample-pages: page count per GB for sample dirty pages # the default value is 512 (since 6.1) # +# @mode: mode containing method of calculate dirtyrate includes +# 'page-sampling' and 'dirty-ring' (Since 6.1) +# +# @vcpu-dirty-rate: dirtyrate for each vcpu if dirty-ring +# mode specified (Since 6.1) +# # Since: 5.2 # ## @@ -1774,7 +1812,9 @@ 'status': 'DirtyRateStatus', 'start-time': 'int64', 'calc-time': 'int64', - 'sample-pages': 'uint64'} } + 'sample-pages': 'uint64', + 'mode': 'DirtyRateMeasureMode', + '*vcpu-dirty-rate': [ 'DirtyRateVcpu' ] } } ## # @calc-dirty-rate: @@ -1786,6 +1826,9 @@ # @sample-pages: page count per GB for sample dirty pages # the default value is 512 (since 6.1) # +# @mode: mechanism of calculating dirtyrate includes +# 'page-sampling' and 'dirty-ring' (Since 6.1) +# # Since: 5.2 # # Example: @@ -1794,7 +1837,8 @@ # ## { 'command': 'calc-dirty-rate', 'data': {'calc-time': 'int64', - '*sample-pages': 'int'} } + '*sample-pages': 'int', + '*mode': 'DirtyRateMeasureMode'} } ## # @query-dirty-rate: diff --git a/roms/openbios b/roms/openbios -Subproject d657b653186c0fd6e062cab133497415c2a5a5b +Subproject b9062deaaea7269369eaa46260d75edcaf276af diff --git a/softmmu/memory.c b/softmmu/memory.c index e5826faa0c..7340e19ff5 100644 --- a/softmmu/memory.c +++ b/softmmu/memory.c @@ -39,7 +39,7 @@ static unsigned memory_region_transaction_depth; static bool memory_region_update_pending; static bool ioeventfd_update_pending; -bool global_dirty_log; +unsigned int global_dirty_tracking; static QTAILQ_HEAD(, MemoryListener) memory_listeners = QTAILQ_HEAD_INITIALIZER(memory_listeners); @@ -1821,7 +1821,7 @@ uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr) uint8_t mask = mr->dirty_log_mask; RAMBlock *rb = mr->ram_block; - if (global_dirty_log && ((rb && qemu_ram_is_migratable(rb)) || + if (global_dirty_tracking && ((rb && qemu_ram_is_migratable(rb)) || memory_region_is_iommu(mr))) { mask |= (1 << DIRTY_MEMORY_MIGRATION); } @@ -2081,6 +2081,17 @@ int ram_discard_manager_replay_populated(const RamDiscardManager *rdm, return rdmc->replay_populated(rdm, section, replay_fn, opaque); } +void ram_discard_manager_replay_discarded(const RamDiscardManager *rdm, + MemoryRegionSection *section, + ReplayRamDiscard replay_fn, + void *opaque) +{ + RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); + + g_assert(rdmc->replay_discarded); + rdmc->replay_discarded(rdm, section, replay_fn, opaque); +} + void ram_discard_manager_register_listener(RamDiscardManager *rdm, RamDiscardListener *rdl, MemoryRegionSection *section) @@ -2760,14 +2771,18 @@ void memory_global_after_dirty_log_sync(void) static VMChangeStateEntry *vmstate_change; -void memory_global_dirty_log_start(void) +void memory_global_dirty_log_start(unsigned int flags) { if (vmstate_change) { qemu_del_vm_change_state_handler(vmstate_change); vmstate_change = NULL; } - global_dirty_log = true; + assert(flags && !(flags & (~GLOBAL_DIRTY_MASK))); + assert(!(global_dirty_tracking & flags)); + global_dirty_tracking |= flags; + + trace_global_dirty_changed(global_dirty_tracking); MEMORY_LISTENER_CALL_GLOBAL(log_global_start, Forward); @@ -2777,9 +2792,13 @@ void memory_global_dirty_log_start(void) memory_region_transaction_commit(); } -static void memory_global_dirty_log_do_stop(void) +static void memory_global_dirty_log_do_stop(unsigned int flags) { - global_dirty_log = false; + assert(flags && !(flags & (~GLOBAL_DIRTY_MASK))); + assert((global_dirty_tracking & flags) == flags); + global_dirty_tracking &= ~flags; + + trace_global_dirty_changed(global_dirty_tracking); /* Refresh DIRTY_MEMORY_MIGRATION bit. */ memory_region_transaction_begin(); @@ -2792,8 +2811,9 @@ static void memory_global_dirty_log_do_stop(void) static void memory_vm_change_state_handler(void *opaque, bool running, RunState state) { + unsigned int flags = (unsigned int)(uintptr_t)opaque; if (running) { - memory_global_dirty_log_do_stop(); + memory_global_dirty_log_do_stop(flags); if (vmstate_change) { qemu_del_vm_change_state_handler(vmstate_change); @@ -2802,18 +2822,19 @@ static void memory_vm_change_state_handler(void *opaque, bool running, } } -void memory_global_dirty_log_stop(void) +void memory_global_dirty_log_stop(unsigned int flags) { if (!runstate_is_running()) { if (vmstate_change) { return; } vmstate_change = qemu_add_vm_change_state_handler( - memory_vm_change_state_handler, NULL); + memory_vm_change_state_handler, + (void *)(uintptr_t)flags); return; } - memory_global_dirty_log_do_stop(); + memory_global_dirty_log_do_stop(flags); } static void listener_add_address_space(MemoryListener *listener, @@ -2825,7 +2846,7 @@ static void listener_add_address_space(MemoryListener *listener, if (listener->begin) { listener->begin(listener); } - if (global_dirty_log) { + if (global_dirty_tracking) { if (listener->log_global_start) { listener->log_global_start(listener); } diff --git a/softmmu/trace-events b/softmmu/trace-events index bf1469990e..9c88887b3c 100644 --- a/softmmu/trace-events +++ b/softmmu/trace-events @@ -19,6 +19,7 @@ memory_region_sync_dirty(const char *mr, const char *listener, int global) "mr ' flatview_new(void *view, void *root) "%p (root %p)" flatview_destroy(void *view, void *root) "%p (root %p)" flatview_destroy_rcu(void *view, void *root) "%p (root %p)" +global_dirty_changed(unsigned int bitmask) "bitmask 0x%"PRIx32 # softmmu.c vm_stop_flush_all(int ret) "ret %d" diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c index 93e16a2ffb..a8990d401b 100644 --- a/target/alpha/cpu.c +++ b/target/alpha/cpu.c @@ -218,9 +218,12 @@ static const struct SysemuCPUOps alpha_sysemu_ops = { static const struct TCGCPUOps alpha_tcg_ops = { .initialize = alpha_translate_init, - .tlb_fill = alpha_cpu_tlb_fill, -#ifndef CONFIG_USER_ONLY +#ifdef CONFIG_USER_ONLY + .record_sigsegv = alpha_cpu_record_sigsegv, + .record_sigbus = alpha_cpu_record_sigbus, +#else + .tlb_fill = alpha_cpu_tlb_fill, .cpu_exec_interrupt = alpha_cpu_exec_interrupt, .do_interrupt = alpha_cpu_do_interrupt, .do_transaction_failed = alpha_cpu_do_transaction_failed, diff --git a/target/alpha/cpu.h b/target/alpha/cpu.h index 772828cc26..afd975c878 100644 --- a/target/alpha/cpu.h +++ b/target/alpha/cpu.h @@ -282,9 +282,6 @@ void alpha_cpu_dump_state(CPUState *cs, FILE *f, int flags); hwaddr alpha_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr); int alpha_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); int alpha_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); -void alpha_cpu_do_unaligned_access(CPUState *cpu, vaddr addr, - MMUAccessType access_type, int mmu_idx, - uintptr_t retaddr) QEMU_NORETURN; #define cpu_list alpha_cpu_list @@ -439,9 +436,6 @@ void alpha_translate_init(void); #define CPU_RESOLVING_TYPE TYPE_ALPHA_CPU void alpha_cpu_list(void); -bool alpha_cpu_tlb_fill(CPUState *cs, vaddr address, int size, - MMUAccessType access_type, int mmu_idx, - bool probe, uintptr_t retaddr); void QEMU_NORETURN dynamic_excp(CPUAlphaState *, uintptr_t, int, int); void QEMU_NORETURN arith_excp(CPUAlphaState *, uintptr_t, int, uint64_t); @@ -449,7 +443,20 @@ uint64_t cpu_alpha_load_fpcr (CPUAlphaState *env); void cpu_alpha_store_fpcr (CPUAlphaState *env, uint64_t val); uint64_t cpu_alpha_load_gr(CPUAlphaState *env, unsigned reg); void cpu_alpha_store_gr(CPUAlphaState *env, unsigned reg, uint64_t val); -#ifndef CONFIG_USER_ONLY + +#ifdef CONFIG_USER_ONLY +void alpha_cpu_record_sigsegv(CPUState *cs, vaddr address, + MMUAccessType access_type, + bool maperr, uintptr_t retaddr); +void alpha_cpu_record_sigbus(CPUState *cs, vaddr address, + MMUAccessType access_type, uintptr_t retaddr); +#else +bool alpha_cpu_tlb_fill(CPUState *cs, vaddr address, int size, + MMUAccessType access_type, int mmu_idx, + bool probe, uintptr_t retaddr); +void alpha_cpu_do_unaligned_access(CPUState *cpu, vaddr addr, + MMUAccessType access_type, int mmu_idx, + uintptr_t retaddr) QEMU_NORETURN; void alpha_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, vaddr addr, unsigned size, MMUAccessType access_type, diff --git a/target/alpha/helper.c b/target/alpha/helper.c index 81550d9e2f..b7e7f73b15 100644 --- a/target/alpha/helper.c +++ b/target/alpha/helper.c @@ -120,15 +120,44 @@ void cpu_alpha_store_gr(CPUAlphaState *env, unsigned reg, uint64_t val) } #if defined(CONFIG_USER_ONLY) -bool alpha_cpu_tlb_fill(CPUState *cs, vaddr address, int size, - MMUAccessType access_type, int mmu_idx, - bool probe, uintptr_t retaddr) +void alpha_cpu_record_sigsegv(CPUState *cs, vaddr address, + MMUAccessType access_type, + bool maperr, uintptr_t retaddr) { AlphaCPU *cpu = ALPHA_CPU(cs); + target_ulong mmcsr, cause; + + /* Assuming !maperr, infer the missing protection. */ + switch (access_type) { + case MMU_DATA_LOAD: + mmcsr = MM_K_FOR; + cause = 0; + break; + case MMU_DATA_STORE: + mmcsr = MM_K_FOW; + cause = 1; + break; + case MMU_INST_FETCH: + mmcsr = MM_K_FOE; + cause = -1; + break; + default: + g_assert_not_reached(); + } + if (maperr) { + if (address < BIT_ULL(TARGET_VIRT_ADDR_SPACE_BITS - 1)) { + /* Userspace address, therefore page not mapped. */ + mmcsr = MM_K_TNV; + } else { + /* Kernel or invalid address. */ + mmcsr = MM_K_ACV; + } + } - cs->exception_index = EXCP_MMFAULT; + /* Record the arguments that PALcode would give to the kernel. */ cpu->env.trap_arg0 = address; - cpu_loop_exit_restore(cs, retaddr); + cpu->env.trap_arg1 = mmcsr; + cpu->env.trap_arg2 = cause; } #else /* Returns the OSF/1 entMM failure indication, or -1 on success. */ diff --git a/target/alpha/mem_helper.c b/target/alpha/mem_helper.c index 75e72bc337..47283a0612 100644 --- a/target/alpha/mem_helper.c +++ b/target/alpha/mem_helper.c @@ -23,18 +23,12 @@ #include "exec/exec-all.h" #include "exec/cpu_ldst.h" -/* Softmmu support */ -#ifndef CONFIG_USER_ONLY -void alpha_cpu_do_unaligned_access(CPUState *cs, vaddr addr, - MMUAccessType access_type, - int mmu_idx, uintptr_t retaddr) +static void do_unaligned_access(CPUAlphaState *env, vaddr addr, uintptr_t retaddr) { - AlphaCPU *cpu = ALPHA_CPU(cs); - CPUAlphaState *env = &cpu->env; uint64_t pc; uint32_t insn; - cpu_restore_state(cs, retaddr, true); + cpu_restore_state(env_cpu(env), retaddr, true); pc = env->pc; insn = cpu_ldl_code(env, pc); @@ -42,6 +36,26 @@ void alpha_cpu_do_unaligned_access(CPUState *cs, vaddr addr, env->trap_arg0 = addr; env->trap_arg1 = insn >> 26; /* opcode */ env->trap_arg2 = (insn >> 21) & 31; /* dest regno */ +} + +#ifdef CONFIG_USER_ONLY +void alpha_cpu_record_sigbus(CPUState *cs, vaddr addr, + MMUAccessType access_type, uintptr_t retaddr) +{ + AlphaCPU *cpu = ALPHA_CPU(cs); + CPUAlphaState *env = &cpu->env; + + do_unaligned_access(env, addr, retaddr); +} +#else +void alpha_cpu_do_unaligned_access(CPUState *cs, vaddr addr, + MMUAccessType access_type, + int mmu_idx, uintptr_t retaddr) +{ + AlphaCPU *cpu = ALPHA_CPU(cs); + CPUAlphaState *env = &cpu->env; + + do_unaligned_access(env, addr, retaddr); cs->exception_index = EXCP_UNALIGN; env->error_code = 0; cpu_loop_exit(cs); diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 641a8c2d3d..a211804fd3 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -2031,10 +2031,13 @@ static const struct SysemuCPUOps arm_sysemu_ops = { static const struct TCGCPUOps arm_tcg_ops = { .initialize = arm_translate_init, .synchronize_from_tb = arm_cpu_synchronize_from_tb, - .tlb_fill = arm_cpu_tlb_fill, .debug_excp_handler = arm_debug_excp_handler, -#if !defined(CONFIG_USER_ONLY) +#ifdef CONFIG_USER_ONLY + .record_sigsegv = arm_cpu_record_sigsegv, + .record_sigbus = arm_cpu_record_sigbus, +#else + .tlb_fill = arm_cpu_tlb_fill, .cpu_exec_interrupt = arm_cpu_exec_interrupt, .do_interrupt = arm_cpu_do_interrupt, .do_transaction_failed = arm_cpu_do_transaction_failed, diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c index 0d5adccf1a..13d0e9b195 100644 --- a/target/arm/cpu_tcg.c +++ b/target/arm/cpu_tcg.c @@ -898,10 +898,13 @@ static void pxa270c5_initfn(Object *obj) static const struct TCGCPUOps arm_v7m_tcg_ops = { .initialize = arm_translate_init, .synchronize_from_tb = arm_cpu_synchronize_from_tb, - .tlb_fill = arm_cpu_tlb_fill, .debug_excp_handler = arm_debug_excp_handler, -#if !defined(CONFIG_USER_ONLY) +#ifdef CONFIG_USER_ONLY + .record_sigsegv = arm_cpu_record_sigsegv, + .record_sigbus = arm_cpu_record_sigbus, +#else + .tlb_fill = arm_cpu_tlb_fill, .cpu_exec_interrupt = arm_v7m_cpu_exec_interrupt, .do_interrupt = arm_v7m_cpu_do_interrupt, .do_transaction_failed = arm_cpu_do_transaction_failed, diff --git a/target/arm/internals.h b/target/arm/internals.h index 3612107ab2..89f7610ebc 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -544,9 +544,17 @@ static inline bool arm_extabort_type(MemTxResult result) return result != MEMTX_DECODE_ERROR; } +#ifdef CONFIG_USER_ONLY +void arm_cpu_record_sigsegv(CPUState *cpu, vaddr addr, + MMUAccessType access_type, + bool maperr, uintptr_t ra); +void arm_cpu_record_sigbus(CPUState *cpu, vaddr addr, + MMUAccessType access_type, uintptr_t ra); +#else bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size, MMUAccessType access_type, int mmu_idx, bool probe, uintptr_t retaddr); +#endif static inline int arm_to_core_mmu_idx(ARMMMUIdx mmu_idx) { diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c index 724175210b..e09b7e46a2 100644 --- a/target/arm/mte_helper.c +++ b/target/arm/mte_helper.c @@ -84,10 +84,8 @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx, uintptr_t index; if (!(flags & (ptr_access == MMU_DATA_STORE ? PAGE_WRITE_ORG : PAGE_READ))) { - /* SIGSEGV */ - arm_cpu_tlb_fill(env_cpu(env), ptr, ptr_size, ptr_access, - ptr_mmu_idx, false, ra); - g_assert_not_reached(); + cpu_loop_exit_sigsegv(env_cpu(env), ptr, ptr_access, + !(flags & PAGE_VALID), ra); } /* Require both MAP_ANON and PROT_MTE for the page. */ diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index dab5f1d1cd..07be55b7e1 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -6118,7 +6118,7 @@ DO_LDN_2(4, dd, MO_64) * linux-user/ in its get_user/put_user macros. * * TODO: Construct some helpers, written in assembly, that interact with - * handle_cpu_signal to produce memory ops which can properly report errors + * host_signal_handler to produce memory ops which can properly report errors * without racing. */ diff --git a/target/arm/tlb_helper.c b/target/arm/tlb_helper.c index 3107f9823e..12a934e924 100644 --- a/target/arm/tlb_helper.c +++ b/target/arm/tlb_helper.c @@ -147,28 +147,12 @@ void arm_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, arm_deliver_fault(cpu, addr, access_type, mmu_idx, &fi); } -#endif /* !defined(CONFIG_USER_ONLY) */ - bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size, MMUAccessType access_type, int mmu_idx, bool probe, uintptr_t retaddr) { ARMCPU *cpu = ARM_CPU(cs); ARMMMUFaultInfo fi = {}; - -#ifdef CONFIG_USER_ONLY - int flags = page_get_flags(useronly_clean_ptr(address)); - if (flags & PAGE_VALID) { - fi.type = ARMFault_Permission; - } else { - fi.type = ARMFault_Translation; - } - fi.level = 3; - - /* now we have a real cpu fault */ - cpu_restore_state(cs, retaddr, true); - arm_deliver_fault(cpu, address, access_type, mmu_idx, &fi); -#else hwaddr phys_addr; target_ulong page_size; int prot, ret; @@ -210,5 +194,29 @@ bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size, cpu_restore_state(cs, retaddr, true); arm_deliver_fault(cpu, address, access_type, mmu_idx, &fi); } -#endif } +#else +void arm_cpu_record_sigsegv(CPUState *cs, vaddr addr, + MMUAccessType access_type, + bool maperr, uintptr_t ra) +{ + ARMMMUFaultInfo fi = { + .type = maperr ? ARMFault_Translation : ARMFault_Permission, + .level = 3, + }; + ARMCPU *cpu = ARM_CPU(cs); + + /* + * We report both ESR and FAR to signal handlers. + * For now, it's easiest to deliver the fault normally. + */ + cpu_restore_state(cs, ra, true); + arm_deliver_fault(cpu, addr, access_type, MMU_USER_IDX, &fi); +} + +void arm_cpu_record_sigbus(CPUState *cs, vaddr addr, + MMUAccessType access_type, uintptr_t ra) +{ + arm_cpu_do_unaligned_access(cs, addr, access_type, MMU_USER_IDX, ra); +} +#endif /* !defined(CONFIG_USER_ONLY) */ diff --git a/target/cris/cpu.c b/target/cris/cpu.c index c2e7483f5b..ed6c781342 100644 --- a/target/cris/cpu.c +++ b/target/cris/cpu.c @@ -205,9 +205,9 @@ static const struct SysemuCPUOps cris_sysemu_ops = { static const struct TCGCPUOps crisv10_tcg_ops = { .initialize = cris_initialize_crisv10_tcg, - .tlb_fill = cris_cpu_tlb_fill, #ifndef CONFIG_USER_ONLY + .tlb_fill = cris_cpu_tlb_fill, .cpu_exec_interrupt = cris_cpu_exec_interrupt, .do_interrupt = crisv10_cpu_do_interrupt, #endif /* !CONFIG_USER_ONLY */ @@ -215,9 +215,9 @@ static const struct TCGCPUOps crisv10_tcg_ops = { static const struct TCGCPUOps crisv32_tcg_ops = { .initialize = cris_initialize_tcg, - .tlb_fill = cris_cpu_tlb_fill, #ifndef CONFIG_USER_ONLY + .tlb_fill = cris_cpu_tlb_fill, .cpu_exec_interrupt = cris_cpu_exec_interrupt, .do_interrupt = cris_cpu_do_interrupt, #endif /* !CONFIG_USER_ONLY */ diff --git a/target/cris/cpu.h b/target/cris/cpu.h index 6603565f83..b445b194ea 100644 --- a/target/cris/cpu.h +++ b/target/cris/cpu.h @@ -189,6 +189,10 @@ extern const VMStateDescription vmstate_cris_cpu; void cris_cpu_do_interrupt(CPUState *cpu); void crisv10_cpu_do_interrupt(CPUState *cpu); bool cris_cpu_exec_interrupt(CPUState *cpu, int int_req); + +bool cris_cpu_tlb_fill(CPUState *cs, vaddr address, int size, + MMUAccessType access_type, int mmu_idx, + bool probe, uintptr_t retaddr); #endif void cris_cpu_dump_state(CPUState *cs, FILE *f, int flags); @@ -251,10 +255,6 @@ static inline int cpu_mmu_index (CPUCRISState *env, bool ifetch) return !!(env->pregs[PR_CCS] & U_FLAG); } -bool cris_cpu_tlb_fill(CPUState *cs, vaddr address, int size, - MMUAccessType access_type, int mmu_idx, - bool probe, uintptr_t retaddr); - /* Support function regs. */ #define SFR_RW_GC_CFG 0][0 #define SFR_RW_MM_CFG env->pregs[PR_SRS]][0 diff --git a/target/cris/helper.c b/target/cris/helper.c index 36926faf32..a0d6ecdcd3 100644 --- a/target/cris/helper.c +++ b/target/cris/helper.c @@ -39,22 +39,6 @@ #define D_LOG(...) do { } while (0) #endif -#if defined(CONFIG_USER_ONLY) - -bool cris_cpu_tlb_fill(CPUState *cs, vaddr address, int size, - MMUAccessType access_type, int mmu_idx, - bool probe, uintptr_t retaddr) -{ - CRISCPU *cpu = CRIS_CPU(cs); - - cs->exception_index = 0xaa; - cpu->env.pregs[PR_EDA] = address; - cpu_loop_exit_restore(cs, retaddr); -} - -#else /* !CONFIG_USER_ONLY */ - - static void cris_shift_ccs(CPUCRISState *env) { uint32_t ccs; @@ -304,5 +288,3 @@ bool cris_cpu_exec_interrupt(CPUState *cs, int interrupt_request) return ret; } - -#endif /* !CONFIG_USER_ONLY */ diff --git a/target/cris/meson.build b/target/cris/meson.build index 67c3793c85..c1e326d950 100644 --- a/target/cris/meson.build +++ b/target/cris/meson.build @@ -2,13 +2,16 @@ cris_ss = ss.source_set() cris_ss.add(files( 'cpu.c', 'gdbstub.c', - 'helper.c', 'op_helper.c', 'translate.c', )) cris_softmmu_ss = ss.source_set() -cris_softmmu_ss.add(files('mmu.c', 'machine.c')) +cris_softmmu_ss.add(files( + 'helper.c', + 'machine.c', + 'mmu.c', +)) target_arch += {'cris': cris_ss} target_softmmu_arch += {'cris': cris_softmmu_ss} diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index 3338365c16..160a46a3d5 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -245,34 +245,11 @@ static void hexagon_cpu_init(Object *obj) qdev_property_add_static(DEVICE(obj), &hexagon_lldb_stack_adjust_property); } -static bool hexagon_tlb_fill(CPUState *cs, vaddr address, int size, - MMUAccessType access_type, int mmu_idx, - bool probe, uintptr_t retaddr) -{ -#ifdef CONFIG_USER_ONLY - switch (access_type) { - case MMU_INST_FETCH: - cs->exception_index = HEX_EXCP_FETCH_NO_UPAGE; - break; - case MMU_DATA_LOAD: - cs->exception_index = HEX_EXCP_PRIV_NO_UREAD; - break; - case MMU_DATA_STORE: - cs->exception_index = HEX_EXCP_PRIV_NO_UWRITE; - break; - } - cpu_loop_exit_restore(cs, retaddr); -#else -#error System mode not implemented for Hexagon -#endif -} - #include "hw/core/tcg-cpu-ops.h" static const struct TCGCPUOps hexagon_tcg_ops = { .initialize = hexagon_translate_init, .synchronize_from_tb = hexagon_cpu_synchronize_from_tb, - .tlb_fill = hexagon_tlb_fill, }; static void hexagon_cpu_class_init(ObjectClass *c, void *data) diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c index 89cba9d7a2..23eb254228 100644 --- a/target/hppa/cpu.c +++ b/target/hppa/cpu.c @@ -145,9 +145,9 @@ static const struct SysemuCPUOps hppa_sysemu_ops = { static const struct TCGCPUOps hppa_tcg_ops = { .initialize = hppa_translate_init, .synchronize_from_tb = hppa_cpu_synchronize_from_tb, - .tlb_fill = hppa_cpu_tlb_fill, #ifndef CONFIG_USER_ONLY + .tlb_fill = hppa_cpu_tlb_fill, .cpu_exec_interrupt = hppa_cpu_exec_interrupt, .do_interrupt = hppa_cpu_do_interrupt, .do_unaligned_access = hppa_cpu_do_unaligned_access, diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h index d3cb7a279f..294fd7297f 100644 --- a/target/hppa/cpu.h +++ b/target/hppa/cpu.h @@ -323,10 +323,10 @@ hwaddr hppa_cpu_get_phys_page_debug(CPUState *cs, vaddr addr); int hppa_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); int hppa_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); void hppa_cpu_dump_state(CPUState *cs, FILE *f, int); +#ifndef CONFIG_USER_ONLY bool hppa_cpu_tlb_fill(CPUState *cs, vaddr address, int size, MMUAccessType access_type, int mmu_idx, bool probe, uintptr_t retaddr); -#ifndef CONFIG_USER_ONLY void hppa_cpu_do_interrupt(CPUState *cpu); bool hppa_cpu_exec_interrupt(CPUState *cpu, int int_req); int hppa_get_physical_address(CPUHPPAState *env, vaddr addr, int mmu_idx, diff --git a/target/hppa/mem_helper.c b/target/hppa/mem_helper.c index afc5b56c3e..bf07445cd1 100644 --- a/target/hppa/mem_helper.c +++ b/target/hppa/mem_helper.c @@ -24,20 +24,6 @@ #include "hw/core/cpu.h" #include "trace.h" -#ifdef CONFIG_USER_ONLY -bool hppa_cpu_tlb_fill(CPUState *cs, vaddr address, int size, - MMUAccessType access_type, int mmu_idx, - bool probe, uintptr_t retaddr) -{ - HPPACPU *cpu = HPPA_CPU(cs); - - /* ??? Test between data page fault and data memory protection trap, - which would affect si_code. */ - cs->exception_index = EXCP_DMP; - cpu->env.cr[CR_IOR] = address; - cpu_loop_exit_restore(cs, retaddr); -} -#else static hppa_tlb_entry *hppa_find_tlb(CPUHPPAState *env, vaddr addr) { int i; @@ -392,4 +378,3 @@ int hppa_artype_for_page(CPUHPPAState *env, target_ulong vaddr) hppa_tlb_entry *ent = hppa_find_tlb(env, vaddr); return ent ? ent->ar_type : -1; } -#endif /* CONFIG_USER_ONLY */ diff --git a/target/hppa/meson.build b/target/hppa/meson.build index 8a7ff82efc..021e42a2d0 100644 --- a/target/hppa/meson.build +++ b/target/hppa/meson.build @@ -7,13 +7,15 @@ hppa_ss.add(files( 'gdbstub.c', 'helper.c', 'int_helper.c', - 'mem_helper.c', 'op_helper.c', 'translate.c', )) hppa_softmmu_ss = ss.source_set() -hppa_softmmu_ss.add(files('machine.c')) +hppa_softmmu_ss.add(files( + 'machine.c', + 'mem_helper.c', +)) target_arch += {'hppa': hppa_ss} target_softmmu_arch += {'hppa': hppa_softmmu_ss} diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h index 60ca09e95e..0a4401e917 100644 --- a/target/i386/tcg/helper-tcg.h +++ b/target/i386/tcg/helper-tcg.h @@ -43,9 +43,15 @@ bool x86_cpu_exec_interrupt(CPUState *cpu, int int_req); #endif /* helper.c */ +#ifdef CONFIG_USER_ONLY +void x86_cpu_record_sigsegv(CPUState *cs, vaddr addr, + MMUAccessType access_type, + bool maperr, uintptr_t ra); +#else bool x86_cpu_tlb_fill(CPUState *cs, vaddr address, int size, MMUAccessType access_type, int mmu_idx, bool probe, uintptr_t retaddr); +#endif void breakpoint_handler(CPUState *cs); diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c index 3ecfae34cb..6fdfdf9598 100644 --- a/target/i386/tcg/tcg-cpu.c +++ b/target/i386/tcg/tcg-cpu.c @@ -72,10 +72,11 @@ static const struct TCGCPUOps x86_tcg_ops = { .synchronize_from_tb = x86_cpu_synchronize_from_tb, .cpu_exec_enter = x86_cpu_exec_enter, .cpu_exec_exit = x86_cpu_exec_exit, - .tlb_fill = x86_cpu_tlb_fill, #ifdef CONFIG_USER_ONLY .fake_user_interrupt = x86_cpu_do_interrupt, + .record_sigsegv = x86_cpu_record_sigsegv, #else + .tlb_fill = x86_cpu_tlb_fill, .do_interrupt = x86_cpu_do_interrupt, .cpu_exec_interrupt = x86_cpu_exec_interrupt, .debug_excp_handler = breakpoint_handler, diff --git a/target/i386/tcg/user/excp_helper.c b/target/i386/tcg/user/excp_helper.c index a89b5228fd..cd507e2a1b 100644 --- a/target/i386/tcg/user/excp_helper.c +++ b/target/i386/tcg/user/excp_helper.c @@ -22,18 +22,29 @@ #include "exec/exec-all.h" #include "tcg/helper-tcg.h" -bool x86_cpu_tlb_fill(CPUState *cs, vaddr addr, int size, - MMUAccessType access_type, int mmu_idx, - bool probe, uintptr_t retaddr) +void x86_cpu_record_sigsegv(CPUState *cs, vaddr addr, + MMUAccessType access_type, + bool maperr, uintptr_t ra) { X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; + /* + * The error_code that hw reports as part of the exception frame + * is copied to linux sigcontext.err. The exception_index is + * copied to linux sigcontext.trapno. Short of inventing a new + * place to store the trapno, we cannot let our caller raise the + * signal and set exception_index to EXCP_INTERRUPT. + */ env->cr[2] = addr; - env->error_code = (access_type == MMU_DATA_STORE) << PG_ERROR_W_BIT; - env->error_code |= PG_ERROR_U_MASK; + env->error_code = ((access_type == MMU_DATA_STORE) << PG_ERROR_W_BIT) + | (maperr ? 0 : PG_ERROR_P_MASK) + | PG_ERROR_U_MASK; cs->exception_index = EXCP0E_PAGE; + + /* Disable do_interrupt_user. */ env->exception_is_int = 0; env->exception_next_eip = -1; - cpu_loop_exit_restore(cs, retaddr); + + cpu_loop_exit_restore(cs, ra); } diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c index 66d22d1189..c7aeb7da9c 100644 --- a/target/m68k/cpu.c +++ b/target/m68k/cpu.c @@ -515,9 +515,9 @@ static const struct SysemuCPUOps m68k_sysemu_ops = { static const struct TCGCPUOps m68k_tcg_ops = { .initialize = m68k_tcg_init, - .tlb_fill = m68k_cpu_tlb_fill, #ifndef CONFIG_USER_ONLY + .tlb_fill = m68k_cpu_tlb_fill, .cpu_exec_interrupt = m68k_cpu_exec_interrupt, .do_interrupt = m68k_cpu_do_interrupt, .do_transaction_failed = m68k_cpu_transaction_failed, diff --git a/target/m68k/helper.c b/target/m68k/helper.c index 137a3e1a3d..5728e48585 100644 --- a/target/m68k/helper.c +++ b/target/m68k/helper.c @@ -978,16 +978,12 @@ void m68k_set_irq_level(M68kCPU *cpu, int level, uint8_t vector) } } -#endif - bool m68k_cpu_tlb_fill(CPUState *cs, vaddr address, int size, MMUAccessType qemu_access_type, int mmu_idx, bool probe, uintptr_t retaddr) { M68kCPU *cpu = M68K_CPU(cs); CPUM68KState *env = &cpu->env; - -#ifndef CONFIG_USER_ONLY hwaddr physical; int prot; int access_type; @@ -1051,12 +1047,12 @@ bool m68k_cpu_tlb_fill(CPUState *cs, vaddr address, int size, if (!(access_type & ACCESS_STORE)) { env->mmu.ssw |= M68K_RW_040; } -#endif cs->exception_index = EXCP_ACCESS; env->mmu.ar = address; cpu_loop_exit_restore(cs, retaddr); } +#endif /* !CONFIG_USER_ONLY */ uint32_t HELPER(bitrev)(uint32_t x) { diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c index 15db277925..b9c888b87e 100644 --- a/target/microblaze/cpu.c +++ b/target/microblaze/cpu.c @@ -365,9 +365,9 @@ static const struct SysemuCPUOps mb_sysemu_ops = { static const struct TCGCPUOps mb_tcg_ops = { .initialize = mb_tcg_init, .synchronize_from_tb = mb_cpu_synchronize_from_tb, - .tlb_fill = mb_cpu_tlb_fill, #ifndef CONFIG_USER_ONLY + .tlb_fill = mb_cpu_tlb_fill, .cpu_exec_interrupt = mb_cpu_exec_interrupt, .do_interrupt = mb_cpu_do_interrupt, .do_transaction_failed = mb_cpu_transaction_failed, diff --git a/target/microblaze/cpu.h b/target/microblaze/cpu.h index b7a848bbae..e9cd0b88de 100644 --- a/target/microblaze/cpu.h +++ b/target/microblaze/cpu.h @@ -394,10 +394,6 @@ void mb_tcg_init(void); #define MMU_USER_IDX 2 /* See NB_MMU_MODES further up the file. */ -bool mb_cpu_tlb_fill(CPUState *cs, vaddr address, int size, - MMUAccessType access_type, int mmu_idx, - bool probe, uintptr_t retaddr); - typedef CPUMBState CPUArchState; typedef MicroBlazeCPU ArchCPU; @@ -415,6 +411,10 @@ static inline void cpu_get_tb_cpu_state(CPUMBState *env, target_ulong *pc, } #if !defined(CONFIG_USER_ONLY) +bool mb_cpu_tlb_fill(CPUState *cs, vaddr address, int size, + MMUAccessType access_type, int mmu_idx, + bool probe, uintptr_t retaddr); + void mb_cpu_transaction_failed(CPUState *cs, hwaddr physaddr, vaddr addr, unsigned size, MMUAccessType access_type, int mmu_idx, MemTxAttrs attrs, diff --git a/target/microblaze/helper.c b/target/microblaze/helper.c index dd2aecd1d5..a607fe68e5 100644 --- a/target/microblaze/helper.c +++ b/target/microblaze/helper.c @@ -24,18 +24,7 @@ #include "qemu/host-utils.h" #include "exec/log.h" -#if defined(CONFIG_USER_ONLY) - -bool mb_cpu_tlb_fill(CPUState *cs, vaddr address, int size, - MMUAccessType access_type, int mmu_idx, - bool probe, uintptr_t retaddr) -{ - cs->exception_index = 0xaa; - cpu_loop_exit_restore(cs, retaddr); -} - -#else /* !CONFIG_USER_ONLY */ - +#ifndef CONFIG_USER_ONLY static bool mb_cpu_access_is_secure(MicroBlazeCPU *cpu, MMUAccessType access_type) { diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c index 437bbed6d6..2561b904b9 100644 --- a/target/microblaze/translate.c +++ b/target/microblaze/translate.c @@ -722,6 +722,7 @@ static TCGv compute_ldst_addr_ea(DisasContext *dc, int ra, int rb) } #endif +#ifndef CONFIG_USER_ONLY static void record_unaligned_ess(DisasContext *dc, int rd, MemOp size, bool store) { @@ -734,6 +735,7 @@ static void record_unaligned_ess(DisasContext *dc, int rd, tcg_set_insn_start_param(dc->insn_start, 1, iflags); } +#endif static bool do_load(DisasContext *dc, int rd, TCGv addr, MemOp mop, int mem_index, bool rev) @@ -755,12 +757,19 @@ static bool do_load(DisasContext *dc, int rd, TCGv addr, MemOp mop, } } + /* + * For system mode, enforce alignment if the cpu configuration + * requires it. For user-mode, the Linux kernel will have fixed up + * any unaligned access, so emulate that by *not* setting MO_ALIGN. + */ +#ifndef CONFIG_USER_ONLY if (size > MO_8 && (dc->tb_flags & MSR_EE) && dc->cfg->unaligned_exceptions) { record_unaligned_ess(dc, rd, size, false); mop |= MO_ALIGN; } +#endif tcg_gen_qemu_ld_i32(reg_for_write(dc, rd), addr, mem_index, mop); @@ -901,12 +910,19 @@ static bool do_store(DisasContext *dc, int rd, TCGv addr, MemOp mop, } } + /* + * For system mode, enforce alignment if the cpu configuration + * requires it. For user-mode, the Linux kernel will have fixed up + * any unaligned access, so emulate that by *not* setting MO_ALIGN. + */ +#ifndef CONFIG_USER_ONLY if (size > MO_8 && (dc->tb_flags & MSR_EE) && dc->cfg->unaligned_exceptions) { record_unaligned_ess(dc, rd, size, true); mop |= MO_ALIGN; } +#endif tcg_gen_qemu_st_i32(reg_for_read(dc, rd), addr, mem_index, mop); diff --git a/target/mips/cpu.c b/target/mips/cpu.c index 00e0c55d0e..4aae23934b 100644 --- a/target/mips/cpu.c +++ b/target/mips/cpu.c @@ -539,9 +539,9 @@ static const struct SysemuCPUOps mips_sysemu_ops = { static const struct TCGCPUOps mips_tcg_ops = { .initialize = mips_tcg_init, .synchronize_from_tb = mips_cpu_synchronize_from_tb, - .tlb_fill = mips_cpu_tlb_fill, #if !defined(CONFIG_USER_ONLY) + .tlb_fill = mips_cpu_tlb_fill, .cpu_exec_interrupt = mips_cpu_exec_interrupt, .do_interrupt = mips_cpu_do_interrupt, .do_transaction_failed = mips_cpu_do_transaction_failed, diff --git a/target/mips/tcg/meson.build b/target/mips/tcg/meson.build index 8f6f7508b6..98003779ae 100644 --- a/target/mips/tcg/meson.build +++ b/target/mips/tcg/meson.build @@ -28,9 +28,6 @@ mips_ss.add(when: 'TARGET_MIPS64', if_true: files( 'mxu_translate.c', )) -if have_user - subdir('user') -endif if have_system subdir('sysemu') endif diff --git a/target/mips/tcg/tcg-internal.h b/target/mips/tcg/tcg-internal.h index bad3deb611..466768aec4 100644 --- a/target/mips/tcg/tcg-internal.h +++ b/target/mips/tcg/tcg-internal.h @@ -18,9 +18,6 @@ void mips_tcg_init(void); void mips_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb); -bool mips_cpu_tlb_fill(CPUState *cs, vaddr address, int size, - MMUAccessType access_type, int mmu_idx, - bool probe, uintptr_t retaddr); void mips_cpu_do_unaligned_access(CPUState *cpu, vaddr addr, MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) QEMU_NORETURN; @@ -60,6 +57,10 @@ void mips_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, MemTxResult response, uintptr_t retaddr); void cpu_mips_tlb_flush(CPUMIPSState *env); +bool mips_cpu_tlb_fill(CPUState *cs, vaddr address, int size, + MMUAccessType access_type, int mmu_idx, + bool probe, uintptr_t retaddr); + #endif /* !CONFIG_USER_ONLY */ #endif diff --git a/target/mips/tcg/user/meson.build b/target/mips/tcg/user/meson.build deleted file mode 100644 index 79badcd321..0000000000 --- a/target/mips/tcg/user/meson.build +++ /dev/null @@ -1,3 +0,0 @@ -mips_user_ss.add(files( - 'tlb_helper.c', -)) diff --git a/target/mips/tcg/user/tlb_helper.c b/target/mips/tcg/user/tlb_helper.c deleted file mode 100644 index 210c6d529e..0000000000 --- a/target/mips/tcg/user/tlb_helper.c +++ /dev/null @@ -1,59 +0,0 @@ -/* - * MIPS TLB (Translation lookaside buffer) helpers. - * - * Copyright (c) 2004-2005 Jocelyn Mayer - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see <http://www.gnu.org/licenses/>. - */ -#include "qemu/osdep.h" - -#include "cpu.h" -#include "exec/exec-all.h" -#include "internal.h" - -static void raise_mmu_exception(CPUMIPSState *env, target_ulong address, - MMUAccessType access_type) -{ - CPUState *cs = env_cpu(env); - - env->error_code = 0; - if (access_type == MMU_INST_FETCH) { - env->error_code |= EXCP_INST_NOTAVAIL; - } - - /* Reference to kernel address from user mode or supervisor mode */ - /* Reference to supervisor address from user mode */ - if (access_type == MMU_DATA_STORE) { - cs->exception_index = EXCP_AdES; - } else { - cs->exception_index = EXCP_AdEL; - } - - /* Raise exception */ - if (!(env->hflags & MIPS_HFLAG_DM)) { - env->CP0_BadVAddr = address; - } -} - -bool mips_cpu_tlb_fill(CPUState *cs, vaddr address, int size, - MMUAccessType access_type, int mmu_idx, - bool probe, uintptr_t retaddr) -{ - MIPSCPU *cpu = MIPS_CPU(cs); - CPUMIPSState *env = &cpu->env; - - /* data access */ - raise_mmu_exception(env, address, access_type); - do_raise_exception_err(env, cs->exception_index, env->error_code, retaddr); -} diff --git a/target/nios2/cpu.c b/target/nios2/cpu.c index 58ecd27d75..4cade61e93 100644 --- a/target/nios2/cpu.c +++ b/target/nios2/cpu.c @@ -216,9 +216,11 @@ static const struct SysemuCPUOps nios2_sysemu_ops = { static const struct TCGCPUOps nios2_tcg_ops = { .initialize = nios2_tcg_init, - .tlb_fill = nios2_cpu_tlb_fill, -#ifndef CONFIG_USER_ONLY +#ifdef CONFIG_USER_ONLY + .record_sigsegv = nios2_cpu_record_sigsegv, +#else + .tlb_fill = nios2_cpu_tlb_fill, .cpu_exec_interrupt = nios2_cpu_exec_interrupt, .do_interrupt = nios2_cpu_do_interrupt, .do_unaligned_access = nios2_cpu_do_unaligned_access, diff --git a/target/nios2/cpu.h b/target/nios2/cpu.h index a80587338a..1a69ed7a49 100644 --- a/target/nios2/cpu.h +++ b/target/nios2/cpu.h @@ -218,9 +218,15 @@ static inline int cpu_mmu_index(CPUNios2State *env, bool ifetch) MMU_SUPERVISOR_IDX; } +#ifdef CONFIG_USER_ONLY +void nios2_cpu_record_sigsegv(CPUState *cpu, vaddr addr, + MMUAccessType access_type, + bool maperr, uintptr_t ra); +#else bool nios2_cpu_tlb_fill(CPUState *cs, vaddr address, int size, MMUAccessType access_type, int mmu_idx, bool probe, uintptr_t retaddr); +#endif static inline int cpu_interrupts_enabled(CPUNios2State *env) { diff --git a/target/nios2/helper.c b/target/nios2/helper.c index 53be8398e9..e5c98650e1 100644 --- a/target/nios2/helper.c +++ b/target/nios2/helper.c @@ -38,10 +38,11 @@ void nios2_cpu_do_interrupt(CPUState *cs) env->regs[R_EA] = env->regs[R_PC] + 4; } -bool nios2_cpu_tlb_fill(CPUState *cs, vaddr address, int size, - MMUAccessType access_type, int mmu_idx, - bool probe, uintptr_t retaddr) +void nios2_cpu_record_sigsegv(CPUState *cs, vaddr addr, + MMUAccessType access_type, + bool maperr, uintptr_t retaddr) { + /* FIXME: Disentangle kuser page from linux-user sigsegv handling. */ cs->exception_index = 0xaa; cpu_loop_exit_restore(cs, retaddr); } diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c index 27cb04152f..dfbafc5236 100644 --- a/target/openrisc/cpu.c +++ b/target/openrisc/cpu.c @@ -186,9 +186,9 @@ static const struct SysemuCPUOps openrisc_sysemu_ops = { static const struct TCGCPUOps openrisc_tcg_ops = { .initialize = openrisc_translate_init, - .tlb_fill = openrisc_cpu_tlb_fill, #ifndef CONFIG_USER_ONLY + .tlb_fill = openrisc_cpu_tlb_fill, .cpu_exec_interrupt = openrisc_cpu_exec_interrupt, .do_interrupt = openrisc_cpu_do_interrupt, #endif /* !CONFIG_USER_ONLY */ diff --git a/target/openrisc/cpu.h b/target/openrisc/cpu.h index 187a4a114e..ee069b080c 100644 --- a/target/openrisc/cpu.h +++ b/target/openrisc/cpu.h @@ -317,14 +317,15 @@ hwaddr openrisc_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr); int openrisc_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); int openrisc_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); void openrisc_translate_init(void); -bool openrisc_cpu_tlb_fill(CPUState *cs, vaddr address, int size, - MMUAccessType access_type, int mmu_idx, - bool probe, uintptr_t retaddr); int print_insn_or1k(bfd_vma addr, disassemble_info *info); #define cpu_list cpu_openrisc_list #ifndef CONFIG_USER_ONLY +bool openrisc_cpu_tlb_fill(CPUState *cs, vaddr address, int size, + MMUAccessType access_type, int mmu_idx, + bool probe, uintptr_t retaddr); + extern const VMStateDescription vmstate_openrisc_cpu; void openrisc_cpu_do_interrupt(CPUState *cpu); diff --git a/target/openrisc/meson.build b/target/openrisc/meson.build index e445dec4a0..84322086ec 100644 --- a/target/openrisc/meson.build +++ b/target/openrisc/meson.build @@ -10,7 +10,6 @@ openrisc_ss.add(files( 'fpu_helper.c', 'gdbstub.c', 'interrupt_helper.c', - 'mmu.c', 'sys_helper.c', 'translate.c', )) @@ -19,6 +18,7 @@ openrisc_softmmu_ss = ss.source_set() openrisc_softmmu_ss.add(files( 'interrupt.c', 'machine.c', + 'mmu.c', )) target_arch += {'openrisc': openrisc_ss} diff --git a/target/openrisc/mmu.c b/target/openrisc/mmu.c index 94df8c7bef..e561ef245b 100644 --- a/target/openrisc/mmu.c +++ b/target/openrisc/mmu.c @@ -23,11 +23,8 @@ #include "exec/exec-all.h" #include "exec/gdbstub.h" #include "qemu/host-utils.h" -#ifndef CONFIG_USER_ONLY #include "hw/loader.h" -#endif -#ifndef CONFIG_USER_ONLY static inline void get_phys_nommu(hwaddr *phys_addr, int *prot, target_ulong address) { @@ -94,7 +91,6 @@ static int get_phys_mmu(OpenRISCCPU *cpu, hwaddr *phys_addr, int *prot, return need & PAGE_EXEC ? EXCP_ITLBMISS : EXCP_DTLBMISS; } } -#endif static void raise_mmu_exception(OpenRISCCPU *cpu, target_ulong address, int exception) @@ -112,8 +108,6 @@ bool openrisc_cpu_tlb_fill(CPUState *cs, vaddr addr, int size, { OpenRISCCPU *cpu = OPENRISC_CPU(cs); int excp = EXCP_DPF; - -#ifndef CONFIG_USER_ONLY int prot; hwaddr phys_addr; @@ -138,13 +132,11 @@ bool openrisc_cpu_tlb_fill(CPUState *cs, vaddr addr, int size, if (probe) { return false; } -#endif raise_mmu_exception(cpu, addr, excp); cpu_loop_exit_restore(cs, retaddr); } -#ifndef CONFIG_USER_ONLY hwaddr openrisc_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) { OpenRISCCPU *cpu = OPENRISC_CPU(cs); @@ -177,4 +169,3 @@ hwaddr openrisc_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) return phys_addr; } } -#endif diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 0472ec9154..e946da5f3a 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1302,9 +1302,6 @@ extern const VMStateDescription vmstate_ppc_cpu; /*****************************************************************************/ void ppc_translate_init(void); -bool ppc_cpu_tlb_fill(CPUState *cs, vaddr address, int size, - MMUAccessType access_type, int mmu_idx, - bool probe, uintptr_t retaddr); #if !defined(CONFIG_USER_ONLY) void ppc_store_sdr1(CPUPPCState *env, target_ulong value); diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index 65545ba9ca..1c7a7b4b38 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -9014,9 +9014,11 @@ static const struct SysemuCPUOps ppc_sysemu_ops = { static const struct TCGCPUOps ppc_tcg_ops = { .initialize = ppc_translate_init, - .tlb_fill = ppc_cpu_tlb_fill, -#ifndef CONFIG_USER_ONLY +#ifdef CONFIG_USER_ONLY + .record_sigsegv = ppc_cpu_record_sigsegv, +#else + .tlb_fill = ppc_cpu_tlb_fill, .cpu_exec_interrupt = ppc_cpu_exec_interrupt, .do_interrupt = ppc_cpu_do_interrupt, .cpu_exec_enter = ppc_cpu_exec_enter, diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index b7d1767920..17607adbe4 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -454,13 +454,15 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) break; } case POWERPC_EXCP_ALIGN: /* Alignment exception */ - /* Get rS/rD and rA from faulting opcode */ /* - * Note: the opcode fields will not be set properly for a - * direct store load/store, but nobody cares as nobody - * actually uses direct store segments. + * Get rS/rD and rA from faulting opcode. + * Note: We will only invoke ALIGN for atomic operations, + * so all instructions are X-form. */ - env->spr[SPR_DSISR] |= (env->error_code & 0x03FF0000) >> 16; + { + uint32_t insn = cpu_ldl_code(env, env->nip); + env->spr[SPR_DSISR] |= (insn & 0x03FF0000) >> 16; + } break; case POWERPC_EXCP_PROGRAM: /* Program exception */ switch (env->error_code & ~0xF) { @@ -1452,24 +1454,31 @@ void helper_book3s_msgsndp(CPUPPCState *env, target_ulong rb) book3s_msgsnd_common(pir, PPC_INTERRUPT_DOORBELL); } -#endif -#endif /* CONFIG_TCG */ -#endif +#endif /* TARGET_PPC64 */ -#ifdef CONFIG_TCG void ppc_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr, MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) { CPUPPCState *env = cs->env_ptr; - uint32_t insn; - /* Restore state and reload the insn we executed, for filling in DSISR. */ - cpu_restore_state(cs, retaddr, true); - insn = cpu_ldl_code(env, env->nip); + switch (env->mmu_model) { + case POWERPC_MMU_SOFT_4xx: + case POWERPC_MMU_SOFT_4xx_Z: + env->spr[SPR_40x_DEAR] = vaddr; + break; + case POWERPC_MMU_BOOKE: + case POWERPC_MMU_BOOKE206: + env->spr[SPR_BOOKE_DEAR] = vaddr; + break; + default: + env->spr[SPR_DAR] = vaddr; + break; + } cs->exception_index = POWERPC_EXCP_ALIGN; - env->error_code = insn & 0x03FF0000; - cpu_loop_exit(cs); + env->error_code = 0; + cpu_loop_exit_restore(cs, retaddr); } -#endif +#endif /* CONFIG_TCG */ +#endif /* !CONFIG_USER_ONLY */ diff --git a/target/ppc/internal.h b/target/ppc/internal.h index 55284369f5..6aa9484f34 100644 --- a/target/ppc/internal.h +++ b/target/ppc/internal.h @@ -211,11 +211,6 @@ void helper_compute_fprf_float16(CPUPPCState *env, float16 arg); void helper_compute_fprf_float32(CPUPPCState *env, float32 arg); void helper_compute_fprf_float128(CPUPPCState *env, float128 arg); -/* Raise a data fault alignment exception for the specified virtual address */ -void ppc_cpu_do_unaligned_access(CPUState *cs, vaddr addr, - MMUAccessType access_type, int mmu_idx, - uintptr_t retaddr) QEMU_NORETURN; - /* translate.c */ int ppc_fixup_cpu(PowerPCCPU *cpu); @@ -283,5 +278,17 @@ static inline void pte_invalidate(target_ulong *pte0) #define PTE_PTEM_MASK 0x7FFFFFBF #define PTE_CHECK_MASK (TARGET_PAGE_MASK | 0x7B) +#ifdef CONFIG_USER_ONLY +void ppc_cpu_record_sigsegv(CPUState *cs, vaddr addr, + MMUAccessType access_type, + bool maperr, uintptr_t ra); +#else +bool ppc_cpu_tlb_fill(CPUState *cs, vaddr address, int size, + MMUAccessType access_type, int mmu_idx, + bool probe, uintptr_t retaddr); +void ppc_cpu_do_unaligned_access(CPUState *cs, vaddr addr, + MMUAccessType access_type, int mmu_idx, + uintptr_t retaddr) QEMU_NORETURN; +#endif #endif /* PPC_INTERNAL_H */ diff --git a/target/ppc/user_only_helper.c b/target/ppc/user_only_helper.c index aa3f867596..7ff76f7a06 100644 --- a/target/ppc/user_only_helper.c +++ b/target/ppc/user_only_helper.c @@ -21,16 +21,23 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/exec-all.h" +#include "internal.h" - -bool ppc_cpu_tlb_fill(CPUState *cs, vaddr address, int size, - MMUAccessType access_type, int mmu_idx, - bool probe, uintptr_t retaddr) +void ppc_cpu_record_sigsegv(CPUState *cs, vaddr address, + MMUAccessType access_type, + bool maperr, uintptr_t retaddr) { PowerPCCPU *cpu = POWERPC_CPU(cs); CPUPPCState *env = &cpu->env; int exception, error_code; + /* + * Both DSISR and the "trap number" (exception vector offset, + * looked up from exception_index) are present in the linux-user + * signal frame. + * FIXME: we don't actually populate the trap number properly. + * It would be easiest to fill in an env->trap value now. + */ if (access_type == MMU_INST_FETCH) { exception = POWERPC_EXCP_ISI; error_code = 0x40000000; diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index 7d53125dbc..f812998123 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -694,9 +694,9 @@ static const struct SysemuCPUOps riscv_sysemu_ops = { static const struct TCGCPUOps riscv_tcg_ops = { .initialize = riscv_translate_init, .synchronize_from_tb = riscv_cpu_synchronize_from_tb, - .tlb_fill = riscv_cpu_tlb_fill, #ifndef CONFIG_USER_ONLY + .tlb_fill = riscv_cpu_tlb_fill, .cpu_exec_interrupt = riscv_cpu_exec_interrupt, .do_interrupt = riscv_cpu_do_interrupt, .do_transaction_failed = riscv_cpu_do_transaction_failed, diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c index f30ff672f8..9eeed38c7e 100644 --- a/target/riscv/cpu_helper.c +++ b/target/riscv/cpu_helper.c @@ -814,7 +814,6 @@ void riscv_cpu_do_unaligned_access(CPUState *cs, vaddr addr, riscv_cpu_two_stage_lookup(mmu_idx); riscv_raise_exception(env, cs->exception_index, retaddr); } -#endif /* !CONFIG_USER_ONLY */ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size, MMUAccessType access_type, int mmu_idx, @@ -822,7 +821,6 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size, { RISCVCPU *cpu = RISCV_CPU(cs); CPURISCVState *env = &cpu->env; -#ifndef CONFIG_USER_ONLY vaddr im_address; hwaddr pa = 0; int prot, prot2, prot_pmp; @@ -954,25 +952,8 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size, } return true; - -#else - switch (access_type) { - case MMU_INST_FETCH: - cs->exception_index = RISCV_EXCP_INST_PAGE_FAULT; - break; - case MMU_DATA_LOAD: - cs->exception_index = RISCV_EXCP_LOAD_PAGE_FAULT; - break; - case MMU_DATA_STORE: - cs->exception_index = RISCV_EXCP_STORE_PAGE_FAULT; - break; - default: - g_assert_not_reached(); - } - env->badaddr = address; - cpu_loop_exit_restore(cs, retaddr); -#endif } +#endif /* !CONFIG_USER_ONLY */ /* * Handle Traps diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c index 7b7b05f1d3..ccdbaf84d5 100644 --- a/target/s390x/cpu.c +++ b/target/s390x/cpu.c @@ -266,9 +266,12 @@ static void s390_cpu_reset_full(DeviceState *dev) static const struct TCGCPUOps s390_tcg_ops = { .initialize = s390x_translate_init, - .tlb_fill = s390_cpu_tlb_fill, -#if !defined(CONFIG_USER_ONLY) +#ifdef CONFIG_USER_ONLY + .record_sigsegv = s390_cpu_record_sigsegv, + .record_sigbus = s390_cpu_record_sigbus, +#else + .tlb_fill = s390_cpu_tlb_fill, .cpu_exec_interrupt = s390_cpu_exec_interrupt, .do_interrupt = s390_cpu_do_interrupt, .debug_excp_handler = s390x_cpu_debug_excp_handler, diff --git a/target/s390x/s390x-internal.h b/target/s390x/s390x-internal.h index 27d4a03ca1..1a178aed41 100644 --- a/target/s390x/s390x-internal.h +++ b/target/s390x/s390x-internal.h @@ -270,12 +270,21 @@ ObjectClass *s390_cpu_class_by_name(const char *name); void s390x_cpu_debug_excp_handler(CPUState *cs); void s390_cpu_do_interrupt(CPUState *cpu); bool s390_cpu_exec_interrupt(CPUState *cpu, int int_req); + +#ifdef CONFIG_USER_ONLY +void s390_cpu_record_sigsegv(CPUState *cs, vaddr address, + MMUAccessType access_type, + bool maperr, uintptr_t retaddr); +void s390_cpu_record_sigbus(CPUState *cs, vaddr address, + MMUAccessType access_type, uintptr_t retaddr); +#else bool s390_cpu_tlb_fill(CPUState *cs, vaddr address, int size, MMUAccessType access_type, int mmu_idx, bool probe, uintptr_t retaddr); void s390x_cpu_do_unaligned_access(CPUState *cs, vaddr addr, MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) QEMU_NORETURN; +#endif /* fpu_helper.c */ diff --git a/target/s390x/tcg/excp_helper.c b/target/s390x/tcg/excp_helper.c index 3d6662a53c..4e7648f301 100644 --- a/target/s390x/tcg/excp_helper.c +++ b/target/s390x/tcg/excp_helper.c @@ -82,6 +82,19 @@ void HELPER(data_exception)(CPUS390XState *env, uint32_t dxc) tcg_s390_data_exception(env, dxc, GETPC()); } +/* + * Unaligned accesses are only diagnosed with MO_ALIGN. At the moment, + * this is only for the atomic operations, for which we want to raise a + * specification exception. + */ +static void QEMU_NORETURN do_unaligned_access(CPUState *cs, uintptr_t retaddr) +{ + S390CPU *cpu = S390_CPU(cs); + CPUS390XState *env = &cpu->env; + + tcg_s390_program_interrupt(env, PGM_SPECIFICATION, retaddr); +} + #if defined(CONFIG_USER_ONLY) void s390_cpu_do_interrupt(CPUState *cs) @@ -89,19 +102,29 @@ void s390_cpu_do_interrupt(CPUState *cs) cs->exception_index = -1; } -bool s390_cpu_tlb_fill(CPUState *cs, vaddr address, int size, - MMUAccessType access_type, int mmu_idx, - bool probe, uintptr_t retaddr) +void s390_cpu_record_sigsegv(CPUState *cs, vaddr address, + MMUAccessType access_type, + bool maperr, uintptr_t retaddr) { S390CPU *cpu = S390_CPU(cs); - trigger_pgm_exception(&cpu->env, PGM_ADDRESSING); - /* On real machines this value is dropped into LowMem. Since this - is userland, simply put this someplace that cpu_loop can find it. */ - cpu->env.__excp_addr = address; + trigger_pgm_exception(&cpu->env, maperr ? PGM_ADDRESSING : PGM_PROTECTION); + /* + * On real machines this value is dropped into LowMem. Since this + * is userland, simply put this someplace that cpu_loop can find it. + * S390 only gives the page of the fault, not the exact address. + * C.f. the construction of TEC in mmu_translate(). + */ + cpu->env.__excp_addr = address & TARGET_PAGE_MASK; cpu_loop_exit_restore(cs, retaddr); } +void s390_cpu_record_sigbus(CPUState *cs, vaddr address, + MMUAccessType access_type, uintptr_t retaddr) +{ + do_unaligned_access(cs, retaddr); +} + #else /* !CONFIG_USER_ONLY */ static inline uint64_t cpu_mmu_idx_to_asc(int mmu_idx) @@ -589,17 +612,11 @@ void s390x_cpu_debug_excp_handler(CPUState *cs) } } -/* Unaligned accesses are only diagnosed with MO_ALIGN. At the moment, - this is only for the atomic operations, for which we want to raise a - specification exception. */ void s390x_cpu_do_unaligned_access(CPUState *cs, vaddr addr, MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) { - S390CPU *cpu = S390_CPU(cs); - CPUS390XState *env = &cpu->env; - - tcg_s390_program_interrupt(env, PGM_SPECIFICATION, retaddr); + do_unaligned_access(cs, retaddr); } static void QEMU_NORETURN monitor_event(CPUS390XState *env, diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c index 17e3f83641..362a30d99e 100644 --- a/target/s390x/tcg/mem_helper.c +++ b/target/s390x/tcg/mem_helper.c @@ -141,20 +141,12 @@ static int s390_probe_access(CPUArchState *env, target_ulong addr, int size, MMUAccessType access_type, int mmu_idx, bool nonfault, void **phost, uintptr_t ra) { - int flags; - #if defined(CONFIG_USER_ONLY) - flags = page_get_flags(addr); - if (!(flags & (access_type == MMU_DATA_LOAD ? PAGE_READ : PAGE_WRITE_ORG))) { - env->__excp_addr = addr; - flags = (flags & PAGE_VALID) ? PGM_PROTECTION : PGM_ADDRESSING; - if (nonfault) { - return flags; - } - tcg_s390_program_interrupt(env, flags, ra); - } - *phost = g2h(env_cpu(env), addr); + return probe_access_flags(env, addr, access_type, mmu_idx, + nonfault, phost, ra); #else + int flags; + /* * For !CONFIG_USER_ONLY, we cannot rely on TLB_INVALID_MASK or haddr==NULL * to detect if there was an exception during tlb_fill(). @@ -173,8 +165,8 @@ static int s390_probe_access(CPUArchState *env, target_ulong addr, int size, (access_type == MMU_DATA_STORE ? BP_MEM_WRITE : BP_MEM_READ), ra); } -#endif return 0; +#endif } static int access_prepare_nf(S390Access *access, CPUS390XState *env, diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c index 2047742d03..06b2691dc4 100644 --- a/target/sh4/cpu.c +++ b/target/sh4/cpu.c @@ -236,9 +236,9 @@ static const struct SysemuCPUOps sh4_sysemu_ops = { static const struct TCGCPUOps superh_tcg_ops = { .initialize = sh4_translate_init, .synchronize_from_tb = superh_cpu_synchronize_from_tb, - .tlb_fill = superh_cpu_tlb_fill, #ifndef CONFIG_USER_ONLY + .tlb_fill = superh_cpu_tlb_fill, .cpu_exec_interrupt = superh_cpu_exec_interrupt, .do_interrupt = superh_cpu_do_interrupt, .do_unaligned_access = superh_cpu_do_unaligned_access, diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h index dc81406646..4cfb109f56 100644 --- a/target/sh4/cpu.h +++ b/target/sh4/cpu.h @@ -213,12 +213,12 @@ void superh_cpu_do_unaligned_access(CPUState *cpu, vaddr addr, uintptr_t retaddr) QEMU_NORETURN; void sh4_translate_init(void); +void sh4_cpu_list(void); + +#if !defined(CONFIG_USER_ONLY) bool superh_cpu_tlb_fill(CPUState *cs, vaddr address, int size, MMUAccessType access_type, int mmu_idx, bool probe, uintptr_t retaddr); - -void sh4_cpu_list(void); -#if !defined(CONFIG_USER_ONLY) void superh_cpu_do_interrupt(CPUState *cpu); bool superh_cpu_exec_interrupt(CPUState *cpu, int int_req); void cpu_sh4_invalidate_tlb(CPUSH4State *s); diff --git a/target/sh4/helper.c b/target/sh4/helper.c index 53cb9c3b63..6a620e36fc 100644 --- a/target/sh4/helper.c +++ b/target/sh4/helper.c @@ -796,8 +796,6 @@ bool superh_cpu_exec_interrupt(CPUState *cs, int interrupt_request) return false; } -#endif /* !CONFIG_USER_ONLY */ - bool superh_cpu_tlb_fill(CPUState *cs, vaddr address, int size, MMUAccessType access_type, int mmu_idx, bool probe, uintptr_t retaddr) @@ -806,11 +804,6 @@ bool superh_cpu_tlb_fill(CPUState *cs, vaddr address, int size, CPUSH4State *env = &cpu->env; int ret; -#ifdef CONFIG_USER_ONLY - ret = (access_type == MMU_DATA_STORE ? MMU_DTLB_VIOLATION_WRITE : - access_type == MMU_INST_FETCH ? MMU_ITLB_VIOLATION : - MMU_DTLB_VIOLATION_READ); -#else target_ulong physical; int prot; @@ -829,7 +822,6 @@ bool superh_cpu_tlb_fill(CPUState *cs, vaddr address, int size, if (ret != MMU_DTLB_MULTIPLE && ret != MMU_ITLB_MULTIPLE) { env->pteh = (env->pteh & PTEH_ASID_MASK) | (address & PTEH_VPN_MASK); } -#endif env->tea = address; switch (ret) { @@ -868,3 +860,4 @@ bool superh_cpu_tlb_fill(CPUState *cs, vaddr address, int size, } cpu_loop_exit_restore(cs, retaddr); } +#endif /* !CONFIG_USER_ONLY */ diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c index c996dce7df..752669825f 100644 --- a/target/sh4/op_helper.c +++ b/target/sh4/op_helper.c @@ -29,6 +29,9 @@ void superh_cpu_do_unaligned_access(CPUState *cs, vaddr addr, MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) { + CPUSH4State *env = cs->env_ptr; + + env->tea = addr; switch (access_type) { case MMU_INST_FETCH: case MMU_DATA_LOAD: @@ -37,6 +40,8 @@ void superh_cpu_do_unaligned_access(CPUState *cs, vaddr addr, case MMU_DATA_STORE: cs->exception_index = 0x100; break; + default: + g_assert_not_reached(); } cpu_loop_exit_restore(cs, retaddr); } diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c index 21dd27796d..55268ed2a1 100644 --- a/target/sparc/cpu.c +++ b/target/sparc/cpu.c @@ -865,9 +865,9 @@ static const struct SysemuCPUOps sparc_sysemu_ops = { static const struct TCGCPUOps sparc_tcg_ops = { .initialize = sparc_tcg_init, .synchronize_from_tb = sparc_cpu_synchronize_from_tb, - .tlb_fill = sparc_cpu_tlb_fill, #ifndef CONFIG_USER_ONLY + .tlb_fill = sparc_cpu_tlb_fill, .cpu_exec_interrupt = sparc_cpu_exec_interrupt, .do_interrupt = sparc_cpu_do_interrupt, .do_transaction_failed = sparc_cpu_do_transaction_failed, diff --git a/target/sparc/ldst_helper.c b/target/sparc/ldst_helper.c index bbf3601cb1..a3e1cf9b6e 100644 --- a/target/sparc/ldst_helper.c +++ b/target/sparc/ldst_helper.c @@ -27,7 +27,6 @@ //#define DEBUG_MMU //#define DEBUG_MXCC -//#define DEBUG_UNALIGNED //#define DEBUG_UNASSIGNED //#define DEBUG_ASI //#define DEBUG_CACHE_CONTROL @@ -364,10 +363,6 @@ static void do_check_align(CPUSPARCState *env, target_ulong addr, uint32_t align, uintptr_t ra) { if (addr & align) { -#ifdef DEBUG_UNALIGNED - printf("Unaligned access to 0x" TARGET_FMT_lx " from 0x" TARGET_FMT_lx - "\n", addr, env->pc); -#endif cpu_raise_exception_ra(env, TT_UNALIGNED, ra); } } @@ -1958,20 +1953,3 @@ void sparc_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, is_asi, size, retaddr); } #endif - -#if !defined(CONFIG_USER_ONLY) -void QEMU_NORETURN sparc_cpu_do_unaligned_access(CPUState *cs, vaddr addr, - MMUAccessType access_type, - int mmu_idx, - uintptr_t retaddr) -{ - SPARCCPU *cpu = SPARC_CPU(cs); - CPUSPARCState *env = &cpu->env; - -#ifdef DEBUG_UNALIGNED - printf("Unaligned access to 0x" TARGET_FMT_lx " from 0x" TARGET_FMT_lx - "\n", addr, env->pc); -#endif - cpu_raise_exception_ra(env, TT_UNALIGNED, retaddr); -} -#endif diff --git a/target/sparc/meson.build b/target/sparc/meson.build index a3638b9503..a801802ee2 100644 --- a/target/sparc/meson.build +++ b/target/sparc/meson.build @@ -6,7 +6,6 @@ sparc_ss.add(files( 'gdbstub.c', 'helper.c', 'ldst_helper.c', - 'mmu_helper.c', 'translate.c', 'win_helper.c', )) @@ -16,6 +15,7 @@ sparc_ss.add(when: 'TARGET_SPARC64', if_true: files('int64_helper.c', 'vis_helpe sparc_softmmu_ss = ss.source_set() sparc_softmmu_ss.add(files( 'machine.c', + 'mmu_helper.c', 'monitor.c', )) diff --git a/target/sparc/mmu_helper.c b/target/sparc/mmu_helper.c index a44473a1c7..f2668389b0 100644 --- a/target/sparc/mmu_helper.c +++ b/target/sparc/mmu_helper.c @@ -25,30 +25,6 @@ /* Sparc MMU emulation */ -#if defined(CONFIG_USER_ONLY) - -bool sparc_cpu_tlb_fill(CPUState *cs, vaddr address, int size, - MMUAccessType access_type, int mmu_idx, - bool probe, uintptr_t retaddr) -{ - SPARCCPU *cpu = SPARC_CPU(cs); - CPUSPARCState *env = &cpu->env; - - if (access_type == MMU_INST_FETCH) { - cs->exception_index = TT_TFAULT; - } else { - cs->exception_index = TT_DFAULT; -#ifdef TARGET_SPARC64 - env->dmmu.mmuregs[4] = address; -#else - env->mmuregs[4] = address; -#endif - } - cpu_loop_exit_restore(cs, retaddr); -} - -#else - #ifndef TARGET_SPARC64 /* * Sparc V8 Reference MMU (SRMMU) @@ -526,16 +502,60 @@ static inline int ultrasparc_tag_match(SparcTLBEntry *tlb, return 0; } +static uint64_t build_sfsr(CPUSPARCState *env, int mmu_idx, int rw) +{ + uint64_t sfsr = SFSR_VALID_BIT; + + switch (mmu_idx) { + case MMU_PHYS_IDX: + sfsr |= SFSR_CT_NOTRANS; + break; + case MMU_USER_IDX: + case MMU_KERNEL_IDX: + sfsr |= SFSR_CT_PRIMARY; + break; + case MMU_USER_SECONDARY_IDX: + case MMU_KERNEL_SECONDARY_IDX: + sfsr |= SFSR_CT_SECONDARY; + break; + case MMU_NUCLEUS_IDX: + sfsr |= SFSR_CT_NUCLEUS; + break; + default: + g_assert_not_reached(); + } + + if (rw == 1) { + sfsr |= SFSR_WRITE_BIT; + } else if (rw == 4) { + sfsr |= SFSR_NF_BIT; + } + + if (env->pstate & PS_PRIV) { + sfsr |= SFSR_PR_BIT; + } + + if (env->dmmu.sfsr & SFSR_VALID_BIT) { /* Fault status register */ + sfsr |= SFSR_OW_BIT; /* overflow (not read before another fault) */ + } + + /* FIXME: ASI field in SFSR must be set */ + + return sfsr; +} + static int get_physical_address_data(CPUSPARCState *env, hwaddr *physical, int *prot, MemTxAttrs *attrs, target_ulong address, int rw, int mmu_idx) { CPUState *cs = env_cpu(env); unsigned int i; + uint64_t sfsr; uint64_t context; - uint64_t sfsr = 0; bool is_user = false; + sfsr = build_sfsr(env, mmu_idx, rw); + switch (mmu_idx) { case MMU_PHYS_IDX: g_assert_not_reached(); @@ -544,29 +564,18 @@ static int get_physical_address_data(CPUSPARCState *env, hwaddr *physical, /* fallthru */ case MMU_KERNEL_IDX: context = env->dmmu.mmu_primary_context & 0x1fff; - sfsr |= SFSR_CT_PRIMARY; break; case MMU_USER_SECONDARY_IDX: is_user = true; /* fallthru */ case MMU_KERNEL_SECONDARY_IDX: context = env->dmmu.mmu_secondary_context & 0x1fff; - sfsr |= SFSR_CT_SECONDARY; break; - case MMU_NUCLEUS_IDX: - sfsr |= SFSR_CT_NUCLEUS; - /* FALLTHRU */ default: context = 0; break; } - if (rw == 1) { - sfsr |= SFSR_WRITE_BIT; - } else if (rw == 4) { - sfsr |= SFSR_NF_BIT; - } - for (i = 0; i < 64; i++) { /* ctx match, vaddr match, valid? */ if (ultrasparc_tag_match(&env->dtlb[i], address, context, physical)) { @@ -616,22 +625,9 @@ static int get_physical_address_data(CPUSPARCState *env, hwaddr *physical, return 0; } - if (env->dmmu.sfsr & SFSR_VALID_BIT) { /* Fault status register */ - sfsr |= SFSR_OW_BIT; /* overflow (not read before - another fault) */ - } - - if (env->pstate & PS_PRIV) { - sfsr |= SFSR_PR_BIT; - } - - /* FIXME: ASI field in SFSR must be set */ - env->dmmu.sfsr = sfsr | SFSR_VALID_BIT; - + env->dmmu.sfsr = sfsr; env->dmmu.sfar = address; /* Fault address register */ - env->dmmu.tag_access = (address & ~0x1fffULL) | context; - return 1; } } @@ -926,4 +922,23 @@ hwaddr sparc_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) } return phys_addr; } + +#ifndef CONFIG_USER_ONLY +void QEMU_NORETURN sparc_cpu_do_unaligned_access(CPUState *cs, vaddr addr, + MMUAccessType access_type, + int mmu_idx, + uintptr_t retaddr) +{ + SPARCCPU *cpu = SPARC_CPU(cs); + CPUSPARCState *env = &cpu->env; + +#ifdef TARGET_SPARC64 + env->dmmu.sfsr = build_sfsr(env, mmu_idx, access_type); + env->dmmu.sfar = addr; +#else + env->mmuregs[4] = addr; #endif + + cpu_raise_exception_ra(env, TT_UNALIGNED, retaddr); +} +#endif /* !CONFIG_USER_ONLY */ diff --git a/target/xtensa/cpu.c b/target/xtensa/cpu.c index c1cbd03595..224f723236 100644 --- a/target/xtensa/cpu.c +++ b/target/xtensa/cpu.c @@ -192,10 +192,10 @@ static const struct SysemuCPUOps xtensa_sysemu_ops = { static const struct TCGCPUOps xtensa_tcg_ops = { .initialize = xtensa_translate_init, - .tlb_fill = xtensa_cpu_tlb_fill, .debug_excp_handler = xtensa_breakpoint_handler, #ifndef CONFIG_USER_ONLY + .tlb_fill = xtensa_cpu_tlb_fill, .cpu_exec_interrupt = xtensa_cpu_exec_interrupt, .do_interrupt = xtensa_cpu_do_interrupt, .do_transaction_failed = xtensa_cpu_do_transaction_failed, diff --git a/target/xtensa/cpu.h b/target/xtensa/cpu.h index f9a510ca46..02143f2f77 100644 --- a/target/xtensa/cpu.h +++ b/target/xtensa/cpu.h @@ -563,10 +563,10 @@ struct XtensaCPU { }; +#ifndef CONFIG_USER_ONLY bool xtensa_cpu_tlb_fill(CPUState *cs, vaddr address, int size, MMUAccessType access_type, int mmu_idx, bool probe, uintptr_t retaddr); -#ifndef CONFIG_USER_ONLY void xtensa_cpu_do_interrupt(CPUState *cpu); bool xtensa_cpu_exec_interrupt(CPUState *cpu, int interrupt_request); void xtensa_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, vaddr addr, diff --git a/target/xtensa/helper.c b/target/xtensa/helper.c index f18ab383fd..29d216ec1b 100644 --- a/target/xtensa/helper.c +++ b/target/xtensa/helper.c @@ -242,27 +242,7 @@ void xtensa_cpu_list(void) } } -#ifdef CONFIG_USER_ONLY - -bool xtensa_cpu_tlb_fill(CPUState *cs, vaddr address, int size, - MMUAccessType access_type, int mmu_idx, - bool probe, uintptr_t retaddr) -{ - XtensaCPU *cpu = XTENSA_CPU(cs); - CPUXtensaState *env = &cpu->env; - - qemu_log_mask(CPU_LOG_INT, - "%s: rw = %d, address = 0x%08" VADDR_PRIx ", size = %d\n", - __func__, access_type, address, size); - env->sregs[EXCVADDR] = address; - env->sregs[EXCCAUSE] = (access_type == MMU_DATA_STORE ? - STORE_PROHIBITED_CAUSE : LOAD_PROHIBITED_CAUSE); - cs->exception_index = EXC_USER; - cpu_loop_exit_restore(cs, retaddr); -} - -#else /* !CONFIG_USER_ONLY */ - +#ifndef CONFIG_USER_ONLY void xtensa_cpu_do_unaligned_access(CPUState *cs, vaddr addr, MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) |