summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--accel/kvm/kvm-all.c7
-rw-r--r--accel/stubs/kvm-stub.c5
-rw-r--r--accel/tcg/cpu-exec.c3
-rw-r--r--accel/tcg/user-exec.c859
-rwxr-xr-xconfigure8
-rw-r--r--dump/dump.c19
-rw-r--r--hmp-commands.hx8
-rw-r--r--hw/i386/xen/xen-hvm.c4
-rw-r--r--hw/virtio/virtio-mem.c92
-rw-r--r--include/exec/exec-all.h55
-rw-r--r--include/exec/memory.h41
-rw-r--r--include/exec/ram_addr.h13
-rw-r--r--include/hw/core/cpu.h1
-rw-r--r--include/hw/core/tcg-cpu-ops.h71
-rw-r--r--include/hw/virtio/virtio-mem.h3
-rw-r--r--include/migration/blocker.h16
-rw-r--r--include/sysemu/kvm.h1
-rw-r--r--include/tcg/tcg-ldst.h5
-rw-r--r--linux-user/aarch64/cpu_loop.c12
-rw-r--r--linux-user/alpha/cpu_loop.c15
-rw-r--r--linux-user/arm/cpu_loop.c30
-rw-r--r--linux-user/cris/cpu_loop.c10
-rw-r--r--linux-user/hexagon/cpu_loop.c24
-rw-r--r--linux-user/host/aarch64/host-signal.h74
-rw-r--r--linux-user/host/alpha/host-signal.h42
-rw-r--r--linux-user/host/arm/host-signal.h30
-rw-r--r--linux-user/host/i386/host-signal.h25
-rw-r--r--linux-user/host/mips/host-signal.h62
-rw-r--r--linux-user/host/ppc/host-signal.h25
-rw-r--r--linux-user/host/ppc64/host-signal.h1
-rw-r--r--linux-user/host/riscv/host-signal.h58
-rw-r--r--linux-user/host/riscv/hostdep.h (renamed from linux-user/host/riscv64/hostdep.h)4
-rw-r--r--linux-user/host/riscv/safe-syscall.inc.S (renamed from linux-user/host/riscv64/safe-syscall.inc.S)0
-rw-r--r--linux-user/host/riscv32/hostdep.h11
-rw-r--r--linux-user/host/s390/host-signal.h93
-rw-r--r--linux-user/host/s390x/host-signal.h1
-rw-r--r--linux-user/host/sparc/host-signal.h54
-rw-r--r--linux-user/host/sparc64/host-signal.h1
-rw-r--r--linux-user/host/x32/host-signal.h1
-rw-r--r--linux-user/host/x86_64/host-signal.h24
-rw-r--r--linux-user/hppa/cpu_loop.c23
-rw-r--r--linux-user/m68k/cpu_loop.c10
-rw-r--r--linux-user/microblaze/cpu_loop.c10
-rw-r--r--linux-user/mips/cpu_loop.c11
-rw-r--r--linux-user/openrisc/cpu_loop.c25
-rw-r--r--linux-user/ppc/cpu_loop.c8
-rw-r--r--linux-user/riscv/cpu_loop.c7
-rw-r--r--linux-user/s390x/cpu_loop.c13
-rw-r--r--linux-user/sh4/cpu_loop.c8
-rw-r--r--linux-user/signal.c129
-rw-r--r--linux-user/sparc/cpu_loop.c25
-rw-r--r--linux-user/xtensa/cpu_loop.c9
-rw-r--r--meson.build4
-rw-r--r--migration/dirtyrate.c367
-rw-r--r--migration/dirtyrate.h19
-rw-r--r--migration/migration.c30
-rw-r--r--migration/postcopy-ram.c40
-rw-r--r--migration/ram.c182
-rw-r--r--migration/ram.h1
-rw-r--r--migration/rdma.c138
-rw-r--r--migration/trace-events2
-rw-r--r--pc-bios/openbios-ppcbin696912 -> 696912 bytes
-rw-r--r--pc-bios/openbios-sparc32bin382048 -> 382048 bytes
-rw-r--r--pc-bios/openbios-sparc64bin1593408 -> 1593408 bytes
-rw-r--r--qapi/migration.json48
m---------roms/openbios0
-rw-r--r--softmmu/memory.c43
-rw-r--r--softmmu/trace-events1
-rw-r--r--target/alpha/cpu.c7
-rw-r--r--target/alpha/cpu.h21
-rw-r--r--target/alpha/helper.c39
-rw-r--r--target/alpha/mem_helper.c30
-rw-r--r--target/arm/cpu.c7
-rw-r--r--target/arm/cpu_tcg.c7
-rw-r--r--target/arm/internals.h8
-rw-r--r--target/arm/mte_helper.c6
-rw-r--r--target/arm/sve_helper.c2
-rw-r--r--target/arm/tlb_helper.c42
-rw-r--r--target/cris/cpu.c4
-rw-r--r--target/cris/cpu.h8
-rw-r--r--target/cris/helper.c18
-rw-r--r--target/cris/meson.build7
-rw-r--r--target/hexagon/cpu.c23
-rw-r--r--target/hppa/cpu.c2
-rw-r--r--target/hppa/cpu.h2
-rw-r--r--target/hppa/mem_helper.c15
-rw-r--r--target/hppa/meson.build6
-rw-r--r--target/i386/tcg/helper-tcg.h6
-rw-r--r--target/i386/tcg/tcg-cpu.c3
-rw-r--r--target/i386/tcg/user/excp_helper.c23
-rw-r--r--target/m68k/cpu.c2
-rw-r--r--target/m68k/helper.c6
-rw-r--r--target/microblaze/cpu.c2
-rw-r--r--target/microblaze/cpu.h8
-rw-r--r--target/microblaze/helper.c13
-rw-r--r--target/microblaze/translate.c16
-rw-r--r--target/mips/cpu.c2
-rw-r--r--target/mips/tcg/meson.build3
-rw-r--r--target/mips/tcg/tcg-internal.h7
-rw-r--r--target/mips/tcg/user/meson.build3
-rw-r--r--target/mips/tcg/user/tlb_helper.c59
-rw-r--r--target/nios2/cpu.c6
-rw-r--r--target/nios2/cpu.h6
-rw-r--r--target/nios2/helper.c7
-rw-r--r--target/openrisc/cpu.c2
-rw-r--r--target/openrisc/cpu.h7
-rw-r--r--target/openrisc/meson.build2
-rw-r--r--target/openrisc/mmu.c9
-rw-r--r--target/ppc/cpu.h3
-rw-r--r--target/ppc/cpu_init.c6
-rw-r--r--target/ppc/excp_helper.c41
-rw-r--r--target/ppc/internal.h17
-rw-r--r--target/ppc/user_only_helper.c15
-rw-r--r--target/riscv/cpu.c2
-rw-r--r--target/riscv/cpu_helper.c21
-rw-r--r--target/s390x/cpu.c7
-rw-r--r--target/s390x/s390x-internal.h9
-rw-r--r--target/s390x/tcg/excp_helper.c45
-rw-r--r--target/s390x/tcg/mem_helper.c18
-rw-r--r--target/sh4/cpu.c2
-rw-r--r--target/sh4/cpu.h6
-rw-r--r--target/sh4/helper.c9
-rw-r--r--target/sh4/op_helper.c5
-rw-r--r--target/sparc/cpu.c2
-rw-r--r--target/sparc/ldst_helper.c22
-rw-r--r--target/sparc/meson.build2
-rw-r--r--target/sparc/mmu_helper.c115
-rw-r--r--target/xtensa/cpu.c2
-rw-r--r--target/xtensa/cpu.h2
-rw-r--r--target/xtensa/helper.c22
130 files changed, 2175 insertions, 1605 deletions
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index db8d83b137..eecd8031cf 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -469,6 +469,7 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
cpu->kvm_fd = ret;
cpu->kvm_state = s;
cpu->vcpu_dirty = true;
+ cpu->dirty_pages = 0;
mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
if (mmap_size < 0) {
@@ -743,6 +744,7 @@ static uint32_t kvm_dirty_ring_reap_one(KVMState *s, CPUState *cpu)
count++;
}
cpu->kvm_fetch_index = fetch;
+ cpu->dirty_pages += count;
return count;
}
@@ -2296,6 +2298,11 @@ bool kvm_vcpu_id_is_valid(int vcpu_id)
return vcpu_id >= 0 && vcpu_id < kvm_max_vcpu_id(s);
}
+bool kvm_dirty_ring_enabled(void)
+{
+ return kvm_state->kvm_dirty_ring_size ? true : false;
+}
+
static int kvm_init(MachineState *ms)
{
MachineClass *mc = MACHINE_GET_CLASS(ms);
diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c
index 5b1d00a222..5319573e00 100644
--- a/accel/stubs/kvm-stub.c
+++ b/accel/stubs/kvm-stub.c
@@ -147,4 +147,9 @@ bool kvm_arm_supports_user_irq(void)
{
return false;
}
+
+bool kvm_dirty_ring_enabled(void)
+{
+ return false;
+}
#endif
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index c9764c1325..bba4672632 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -462,6 +462,7 @@ void cpu_exec_step_atomic(CPUState *cpu)
* memory.
*/
#ifndef CONFIG_SOFTMMU
+ clear_helper_retaddr();
tcg_debug_assert(!have_mmap_lock());
#endif
if (qemu_mutex_iothread_locked()) {
@@ -471,7 +472,6 @@ void cpu_exec_step_atomic(CPUState *cpu)
qemu_plugin_disable_mem_helpers(cpu);
}
-
/*
* As we start the exclusive region before codegen we must still
* be in the region if we longjump out of either the codegen or
@@ -916,6 +916,7 @@ int cpu_exec(CPUState *cpu)
#endif
#ifndef CONFIG_SOFTMMU
+ clear_helper_retaddr();
tcg_debug_assert(!have_mmap_lock());
#endif
if (qemu_mutex_iothread_locked()) {
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index e6bb29b42d..1528a21fad 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -27,48 +27,18 @@
#include "exec/helper-proto.h"
#include "qemu/atomic128.h"
#include "trace/trace-root.h"
+#include "tcg/tcg-ldst.h"
#include "internal.h"
-#undef EAX
-#undef ECX
-#undef EDX
-#undef EBX
-#undef ESP
-#undef EBP
-#undef ESI
-#undef EDI
-#undef EIP
-#ifdef __linux__
-#include <sys/ucontext.h>
-#endif
-
__thread uintptr_t helper_retaddr;
//#define DEBUG_SIGNAL
-/* exit the current TB from a signal handler. The host registers are
- restored in a state compatible with the CPU emulator
+/*
+ * Adjust the pc to pass to cpu_restore_state; return the memop type.
*/
-static void QEMU_NORETURN cpu_exit_tb_from_sighandler(CPUState *cpu,
- sigset_t *old_set)
-{
- /* XXX: use siglongjmp ? */
- sigprocmask(SIG_SETMASK, old_set, NULL);
- cpu_loop_exit_noexc(cpu);
-}
-
-/* 'pc' is the host PC at which the exception was raised. 'address' is
- the effective address of the memory exception. 'is_write' is 1 if a
- write caused the exception and otherwise 0'. 'old_set' is the
- signal set which should be restored */
-static inline int handle_cpu_signal(uintptr_t pc, siginfo_t *info,
- int is_write, sigset_t *old_set)
+MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write)
{
- CPUState *cpu = current_cpu;
- CPUClass *cc;
- unsigned long address = (unsigned long)info->si_addr;
- MMUAccessType access_type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD;
-
switch (helper_retaddr) {
default:
/*
@@ -77,7 +47,7 @@ static inline int handle_cpu_signal(uintptr_t pc, siginfo_t *info,
* pointer into the generated code that will unwind to the
* correct guest pc.
*/
- pc = helper_retaddr;
+ *pc = helper_retaddr;
break;
case 0:
@@ -97,7 +67,7 @@ static inline int handle_cpu_signal(uintptr_t pc, siginfo_t *info,
* Therefore, adjust to compensate for what will be done later
* by cpu_restore_state_from_tb.
*/
- pc += GETPC_ADJ;
+ *pc += GETPC_ADJ;
break;
case 1:
@@ -113,118 +83,97 @@ static inline int handle_cpu_signal(uintptr_t pc, siginfo_t *info,
*
* Like tb_gen_code, release the memory lock before cpu_loop_exit.
*/
- pc = 0;
- access_type = MMU_INST_FETCH;
mmap_unlock();
- break;
+ *pc = 0;
+ return MMU_INST_FETCH;
}
- /* For synchronous signals we expect to be coming from the vCPU
- * thread (so current_cpu should be valid) and either from running
- * code or during translation which can fault as we cross pages.
- *
- * If neither is true then something has gone wrong and we should
- * abort rather than try and restart the vCPU execution.
- */
- if (!cpu || !cpu->running) {
- printf("qemu:%s received signal outside vCPU context @ pc=0x%"
- PRIxPTR "\n", __func__, pc);
- abort();
- }
+ return is_write ? MMU_DATA_STORE : MMU_DATA_LOAD;
+}
-#if defined(DEBUG_SIGNAL)
- printf("qemu: SIGSEGV pc=0x%08lx address=%08lx w=%d oldset=0x%08lx\n",
- pc, address, is_write, *(unsigned long *)old_set);
-#endif
- /* XXX: locking issue */
- /* Note that it is important that we don't call page_unprotect() unless
- * this is really a "write to nonwriteable page" fault, because
- * page_unprotect() assumes that if it is called for an access to
- * a page that's writeable this means we had two threads racing and
- * another thread got there first and already made the page writeable;
- * so we will retry the access. If we were to call page_unprotect()
- * for some other kind of fault that should really be passed to the
- * guest, we'd end up in an infinite loop of retrying the faulting
- * access.
- */
- if (is_write && info->si_signo == SIGSEGV && info->si_code == SEGV_ACCERR &&
- h2g_valid(address)) {
- switch (page_unprotect(h2g(address), pc)) {
- case 0:
- /* Fault not caused by a page marked unwritable to protect
- * cached translations, must be the guest binary's problem.
- */
- break;
- case 1:
- /* Fault caused by protection of cached translation; TBs
- * invalidated, so resume execution. Retain helper_retaddr
- * for a possible second fault.
- */
- return 1;
- case 2:
- /* Fault caused by protection of cached translation, and the
- * currently executing TB was modified and must be exited
- * immediately. Clear helper_retaddr for next execution.
- */
- clear_helper_retaddr();
- cpu_exit_tb_from_sighandler(cpu, old_set);
- /* NORETURN */
-
- default:
- g_assert_not_reached();
- }
+/**
+ * handle_sigsegv_accerr_write:
+ * @cpu: the cpu context
+ * @old_set: the sigset_t from the signal ucontext_t
+ * @host_pc: the host pc, adjusted for the signal
+ * @guest_addr: the guest address of the fault
+ *
+ * Return true if the write fault has been handled, and should be re-tried.
+ *
+ * Note that it is important that we don't call page_unprotect() unless
+ * this is really a "write to nonwriteable page" fault, because
+ * page_unprotect() assumes that if it is called for an access to
+ * a page that's writeable this means we had two threads racing and
+ * another thread got there first and already made the page writeable;
+ * so we will retry the access. If we were to call page_unprotect()
+ * for some other kind of fault that should really be passed to the
+ * guest, we'd end up in an infinite loop of retrying the faulting access.
+ */
+bool handle_sigsegv_accerr_write(CPUState *cpu, sigset_t *old_set,
+ uintptr_t host_pc, abi_ptr guest_addr)
+{
+ switch (page_unprotect(guest_addr, host_pc)) {
+ case 0:
+ /*
+ * Fault not caused by a page marked unwritable to protect
+ * cached translations, must be the guest binary's problem.
+ */
+ return false;
+ case 1:
+ /*
+ * Fault caused by protection of cached translation; TBs
+ * invalidated, so resume execution.
+ */
+ return true;
+ case 2:
+ /*
+ * Fault caused by protection of cached translation, and the
+ * currently executing TB was modified and must be exited immediately.
+ */
+ sigprocmask(SIG_SETMASK, old_set, NULL);
+ cpu_loop_exit_noexc(cpu);
+ /* NORETURN */
+ default:
+ g_assert_not_reached();
}
-
- /* Convert forcefully to guest address space, invalid addresses
- are still valid segv ones */
- address = h2g_nocheck(address);
-
- /*
- * There is no way the target can handle this other than raising
- * an exception. Undo signal and retaddr state prior to longjmp.
- */
- sigprocmask(SIG_SETMASK, old_set, NULL);
- clear_helper_retaddr();
-
- cc = CPU_GET_CLASS(cpu);
- cc->tcg_ops->tlb_fill(cpu, address, 0, access_type,
- MMU_USER_IDX, false, pc);
- g_assert_not_reached();
}
static int probe_access_internal(CPUArchState *env, target_ulong addr,
int fault_size, MMUAccessType access_type,
bool nonfault, uintptr_t ra)
{
- int flags;
+ int acc_flag;
+ bool maperr;
switch (access_type) {
case MMU_DATA_STORE:
- flags = PAGE_WRITE;
+ acc_flag = PAGE_WRITE_ORG;
break;
case MMU_DATA_LOAD:
- flags = PAGE_READ;
+ acc_flag = PAGE_READ;
break;
case MMU_INST_FETCH:
- flags = PAGE_EXEC;
+ acc_flag = PAGE_EXEC;
break;
default:
g_assert_not_reached();
}
- if (!guest_addr_valid_untagged(addr) ||
- page_check_range(addr, 1, flags) < 0) {
- if (nonfault) {
- return TLB_INVALID_MASK;
- } else {
- CPUState *cpu = env_cpu(env);
- CPUClass *cc = CPU_GET_CLASS(cpu);
- cc->tcg_ops->tlb_fill(cpu, addr, fault_size, access_type,
- MMU_USER_IDX, false, ra);
- g_assert_not_reached();
+ if (guest_addr_valid_untagged(addr)) {
+ int page_flags = page_get_flags(addr);
+ if (page_flags & acc_flag) {
+ return 0; /* success */
}
+ maperr = !(page_flags & PAGE_VALID);
+ } else {
+ maperr = true;
}
- return 0;
+
+ if (nonfault) {
+ return TLB_INVALID_MASK;
+ }
+
+ cpu_loop_exit_sigsegv(env_cpu(env), addr, access_type, maperr, ra);
}
int probe_access_flags(CPUArchState *env, target_ulong addr,
@@ -250,640 +199,6 @@ void *probe_access(CPUArchState *env, target_ulong addr, int size,
return size ? g2h(env_cpu(env), addr) : NULL;
}
-#if defined(__i386__)
-
-#if defined(__NetBSD__)
-#include <ucontext.h>
-#include <machine/trap.h>
-
-#define EIP_sig(context) ((context)->uc_mcontext.__gregs[_REG_EIP])
-#define TRAP_sig(context) ((context)->uc_mcontext.__gregs[_REG_TRAPNO])
-#define ERROR_sig(context) ((context)->uc_mcontext.__gregs[_REG_ERR])
-#define MASK_sig(context) ((context)->uc_sigmask)
-#define PAGE_FAULT_TRAP T_PAGEFLT
-#elif defined(__FreeBSD__) || defined(__DragonFly__)
-#include <ucontext.h>
-#include <machine/trap.h>
-
-#define EIP_sig(context) (*((unsigned long *)&(context)->uc_mcontext.mc_eip))
-#define TRAP_sig(context) ((context)->uc_mcontext.mc_trapno)
-#define ERROR_sig(context) ((context)->uc_mcontext.mc_err)
-#define MASK_sig(context) ((context)->uc_sigmask)
-#define PAGE_FAULT_TRAP T_PAGEFLT
-#elif defined(__OpenBSD__)
-#include <machine/trap.h>
-#define EIP_sig(context) ((context)->sc_eip)
-#define TRAP_sig(context) ((context)->sc_trapno)
-#define ERROR_sig(context) ((context)->sc_err)
-#define MASK_sig(context) ((context)->sc_mask)
-#define PAGE_FAULT_TRAP T_PAGEFLT
-#else
-#define EIP_sig(context) ((context)->uc_mcontext.gregs[REG_EIP])
-#define TRAP_sig(context) ((context)->uc_mcontext.gregs[REG_TRAPNO])
-#define ERROR_sig(context) ((context)->uc_mcontext.gregs[REG_ERR])
-#define MASK_sig(context) ((context)->uc_sigmask)
-#define PAGE_FAULT_TRAP 0xe
-#endif
-
-int cpu_signal_handler(int host_signum, void *pinfo,
- void *puc)
-{
- siginfo_t *info = pinfo;
-#if defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
- ucontext_t *uc = puc;
-#elif defined(__OpenBSD__)
- struct sigcontext *uc = puc;
-#else
- ucontext_t *uc = puc;
-#endif
- unsigned long pc;
- int trapno;
-
-#ifndef REG_EIP
-/* for glibc 2.1 */
-#define REG_EIP EIP
-#define REG_ERR ERR
-#define REG_TRAPNO TRAPNO
-#endif
- pc = EIP_sig(uc);
- trapno = TRAP_sig(uc);
- return handle_cpu_signal(pc, info,
- trapno == PAGE_FAULT_TRAP ?
- (ERROR_sig(uc) >> 1) & 1 : 0,
- &MASK_sig(uc));
-}
-
-#elif defined(__x86_64__)
-
-#ifdef __NetBSD__
-#include <machine/trap.h>
-#define PC_sig(context) _UC_MACHINE_PC(context)
-#define TRAP_sig(context) ((context)->uc_mcontext.__gregs[_REG_TRAPNO])
-#define ERROR_sig(context) ((context)->uc_mcontext.__gregs[_REG_ERR])
-#define MASK_sig(context) ((context)->uc_sigmask)
-#define PAGE_FAULT_TRAP T_PAGEFLT
-#elif defined(__OpenBSD__)
-#include <machine/trap.h>
-#define PC_sig(context) ((context)->sc_rip)
-#define TRAP_sig(context) ((context)->sc_trapno)
-#define ERROR_sig(context) ((context)->sc_err)
-#define MASK_sig(context) ((context)->sc_mask)
-#define PAGE_FAULT_TRAP T_PAGEFLT
-#elif defined(__FreeBSD__) || defined(__DragonFly__)
-#include <ucontext.h>
-#include <machine/trap.h>
-
-#define PC_sig(context) (*((unsigned long *)&(context)->uc_mcontext.mc_rip))
-#define TRAP_sig(context) ((context)->uc_mcontext.mc_trapno)
-#define ERROR_sig(context) ((context)->uc_mcontext.mc_err)
-#define MASK_sig(context) ((context)->uc_sigmask)
-#define PAGE_FAULT_TRAP T_PAGEFLT
-#else
-#define PC_sig(context) ((context)->uc_mcontext.gregs[REG_RIP])
-#define TRAP_sig(context) ((context)->uc_mcontext.gregs[REG_TRAPNO])
-#define ERROR_sig(context) ((context)->uc_mcontext.gregs[REG_ERR])
-#define MASK_sig(context) ((context)->uc_sigmask)
-#define PAGE_FAULT_TRAP 0xe
-#endif
-
-int cpu_signal_handler(int host_signum, void *pinfo,
- void *puc)
-{
- siginfo_t *info = pinfo;
- unsigned long pc;
-#if defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
- ucontext_t *uc = puc;
-#elif defined(__OpenBSD__)
- struct sigcontext *uc = puc;
-#else
- ucontext_t *uc = puc;
-#endif
-
- pc = PC_sig(uc);
- return handle_cpu_signal(pc, info,
- TRAP_sig(uc) == PAGE_FAULT_TRAP ?
- (ERROR_sig(uc) >> 1) & 1 : 0,
- &MASK_sig(uc));
-}
-
-#elif defined(_ARCH_PPC)
-
-/***********************************************************************
- * signal context platform-specific definitions
- * From Wine
- */
-#ifdef linux
-/* All Registers access - only for local access */
-#define REG_sig(reg_name, context) \
- ((context)->uc_mcontext.regs->reg_name)
-/* Gpr Registers access */
-#define GPR_sig(reg_num, context) REG_sig(gpr[reg_num], context)
-/* Program counter */
-#define IAR_sig(context) REG_sig(nip, context)
-/* Machine State Register (Supervisor) */
-#define MSR_sig(context) REG_sig(msr, context)
-/* Count register */
-#define CTR_sig(context) REG_sig(ctr, context)
-/* User's integer exception register */
-#define XER_sig(context) REG_sig(xer, context)
-/* Link register */
-#define LR_sig(context) REG_sig(link, context)
-/* Condition register */
-#define CR_sig(context) REG_sig(ccr, context)
-
-/* Float Registers access */
-#define FLOAT_sig(reg_num, context) \
- (((double *)((char *)((context)->uc_mcontext.regs + 48 * 4)))[reg_num])
-#define FPSCR_sig(context) \
- (*(int *)((char *)((context)->uc_mcontext.regs + (48 + 32 * 2) * 4)))
-/* Exception Registers access */
-#define DAR_sig(context) REG_sig(dar, context)
-#define DSISR_sig(context) REG_sig(dsisr, context)
-#define TRAP_sig(context) REG_sig(trap, context)
-#endif /* linux */
-
-#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
-#include <ucontext.h>
-#define IAR_sig(context) ((context)->uc_mcontext.mc_srr0)
-#define MSR_sig(context) ((context)->uc_mcontext.mc_srr1)
-#define CTR_sig(context) ((context)->uc_mcontext.mc_ctr)
-#define XER_sig(context) ((context)->uc_mcontext.mc_xer)
-#define LR_sig(context) ((context)->uc_mcontext.mc_lr)
-#define CR_sig(context) ((context)->uc_mcontext.mc_cr)
-/* Exception Registers access */
-#define DAR_sig(context) ((context)->uc_mcontext.mc_dar)
-#define DSISR_sig(context) ((context)->uc_mcontext.mc_dsisr)
-#define TRAP_sig(context) ((context)->uc_mcontext.mc_exc)
-#endif /* __FreeBSD__|| __FreeBSD_kernel__ */
-
-int cpu_signal_handler(int host_signum, void *pinfo,
- void *puc)
-{
- siginfo_t *info = pinfo;
-#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
- ucontext_t *uc = puc;
-#else
- ucontext_t *uc = puc;
-#endif
- unsigned long pc;
- int is_write;
-
- pc = IAR_sig(uc);
- is_write = 0;
-#if 0
- /* ppc 4xx case */
- if (DSISR_sig(uc) & 0x00800000) {
- is_write = 1;
- }
-#else
- if (TRAP_sig(uc) != 0x400 && (DSISR_sig(uc) & 0x02000000)) {
- is_write = 1;
- }
-#endif
- return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask);
-}
-
-#elif defined(__alpha__)
-
-int cpu_signal_handler(int host_signum, void *pinfo,
- void *puc)
-{
- siginfo_t *info = pinfo;
- ucontext_t *uc = puc;
- uint32_t *pc = uc->uc_mcontext.sc_pc;
- uint32_t insn = *pc;
- int is_write = 0;
-
- /* XXX: need kernel patch to get write flag faster */
- switch (insn >> 26) {
- case 0x0d: /* stw */
- case 0x0e: /* stb */
- case 0x0f: /* stq_u */
- case 0x24: /* stf */
- case 0x25: /* stg */
- case 0x26: /* sts */
- case 0x27: /* stt */
- case 0x2c: /* stl */
- case 0x2d: /* stq */
- case 0x2e: /* stl_c */
- case 0x2f: /* stq_c */
- is_write = 1;
- }
-
- return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask);
-}
-#elif defined(__sparc__)
-
-int cpu_signal_handler(int host_signum, void *pinfo,
- void *puc)
-{
- siginfo_t *info = pinfo;
- int is_write;
- uint32_t insn;
-#if !defined(__arch64__) || defined(CONFIG_SOLARIS)
- uint32_t *regs = (uint32_t *)(info + 1);
- void *sigmask = (regs + 20);
- /* XXX: is there a standard glibc define ? */
- unsigned long pc = regs[1];
-#else
-#ifdef __linux__
- struct sigcontext *sc = puc;
- unsigned long pc = sc->sigc_regs.tpc;
- void *sigmask = (void *)sc->sigc_mask;
-#elif defined(__OpenBSD__)
- struct sigcontext *uc = puc;
- unsigned long pc = uc->sc_pc;
- void *sigmask = (void *)(long)uc->sc_mask;
-#elif defined(__NetBSD__)
- ucontext_t *uc = puc;
- unsigned long pc = _UC_MACHINE_PC(uc);
- void *sigmask = (void *)&uc->uc_sigmask;
-#endif
-#endif
-
- /* XXX: need kernel patch to get write flag faster */
- is_write = 0;
- insn = *(uint32_t *)pc;
- if ((insn >> 30) == 3) {
- switch ((insn >> 19) & 0x3f) {
- case 0x05: /* stb */
- case 0x15: /* stba */
- case 0x06: /* sth */
- case 0x16: /* stha */
- case 0x04: /* st */
- case 0x14: /* sta */
- case 0x07: /* std */
- case 0x17: /* stda */
- case 0x0e: /* stx */
- case 0x1e: /* stxa */
- case 0x24: /* stf */
- case 0x34: /* stfa */
- case 0x27: /* stdf */
- case 0x37: /* stdfa */
- case 0x26: /* stqf */
- case 0x36: /* stqfa */
- case 0x25: /* stfsr */
- case 0x3c: /* casa */
- case 0x3e: /* casxa */
- is_write = 1;
- break;
- }
- }
- return handle_cpu_signal(pc, info, is_write, sigmask);
-}
-
-#elif defined(__arm__)
-
-#if defined(__NetBSD__)
-#include <ucontext.h>
-#include <sys/siginfo.h>
-#endif
-
-int cpu_signal_handler(int host_signum, void *pinfo,
- void *puc)
-{
- siginfo_t *info = pinfo;
-#if defined(__NetBSD__)
- ucontext_t *uc = puc;
- siginfo_t *si = pinfo;
-#else
- ucontext_t *uc = puc;
-#endif
- unsigned long pc;
- uint32_t fsr;
- int is_write;
-
-#if defined(__NetBSD__)
- pc = uc->uc_mcontext.__gregs[_REG_R15];
-#elif defined(__GLIBC__) && (__GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ <= 3))
- pc = uc->uc_mcontext.gregs[R15];
-#else
- pc = uc->uc_mcontext.arm_pc;
-#endif
-
-#ifdef __NetBSD__
- fsr = si->si_trap;
-#else
- fsr = uc->uc_mcontext.error_code;
-#endif
- /*
- * In the FSR, bit 11 is WnR, assuming a v6 or
- * later processor. On v5 we will always report
- * this as a read, which will fail later.
- */
- is_write = extract32(fsr, 11, 1);
- return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask);
-}
-
-#elif defined(__aarch64__)
-
-#if defined(__NetBSD__)
-
-#include <ucontext.h>
-#include <sys/siginfo.h>
-
-int cpu_signal_handler(int host_signum, void *pinfo, void *puc)
-{
- ucontext_t *uc = puc;
- siginfo_t *si = pinfo;
- unsigned long pc;
- int is_write;
- uint32_t esr;
-
- pc = uc->uc_mcontext.__gregs[_REG_PC];
- esr = si->si_trap;
-
- /*
- * siginfo_t::si_trap is the ESR value, for data aborts ESR.EC
- * is 0b10010x: then bit 6 is the WnR bit
- */
- is_write = extract32(esr, 27, 5) == 0x12 && extract32(esr, 6, 1) == 1;
- return handle_cpu_signal(pc, si, is_write, &uc->uc_sigmask);
-}
-
-#else
-
-#ifndef ESR_MAGIC
-/* Pre-3.16 kernel headers don't have these, so provide fallback definitions */
-#define ESR_MAGIC 0x45535201
-struct esr_context {
- struct _aarch64_ctx head;
- uint64_t esr;
-};
-#endif
-
-static inline struct _aarch64_ctx *first_ctx(ucontext_t *uc)
-{
- return (struct _aarch64_ctx *)&uc->uc_mcontext.__reserved;
-}
-
-static inline struct _aarch64_ctx *next_ctx(struct _aarch64_ctx *hdr)
-{
- return (struct _aarch64_ctx *)((char *)hdr + hdr->size);
-}
-
-int cpu_signal_handler(int host_signum, void *pinfo, void *puc)
-{
- siginfo_t *info = pinfo;
- ucontext_t *uc = puc;
- uintptr_t pc = uc->uc_mcontext.pc;
- bool is_write;
- struct _aarch64_ctx *hdr;
- struct esr_context const *esrctx = NULL;
-
- /* Find the esr_context, which has the WnR bit in it */
- for (hdr = first_ctx(uc); hdr->magic; hdr = next_ctx(hdr)) {
- if (hdr->magic == ESR_MAGIC) {
- esrctx = (struct esr_context const *)hdr;
- break;
- }
- }
-
- if (esrctx) {
- /* For data aborts ESR.EC is 0b10010x: then bit 6 is the WnR bit */
- uint64_t esr = esrctx->esr;
- is_write = extract32(esr, 27, 5) == 0x12 && extract32(esr, 6, 1) == 1;
- } else {
- /*
- * Fall back to parsing instructions; will only be needed
- * for really ancient (pre-3.16) kernels.
- */
- uint32_t insn = *(uint32_t *)pc;
-
- is_write = ((insn & 0xbfff0000) == 0x0c000000 /* C3.3.1 */
- || (insn & 0xbfe00000) == 0x0c800000 /* C3.3.2 */
- || (insn & 0xbfdf0000) == 0x0d000000 /* C3.3.3 */
- || (insn & 0xbfc00000) == 0x0d800000 /* C3.3.4 */
- || (insn & 0x3f400000) == 0x08000000 /* C3.3.6 */
- || (insn & 0x3bc00000) == 0x39000000 /* C3.3.13 */
- || (insn & 0x3fc00000) == 0x3d800000 /* ... 128bit */
- /* Ignore bits 10, 11 & 21, controlling indexing. */
- || (insn & 0x3bc00000) == 0x38000000 /* C3.3.8-12 */
- || (insn & 0x3fe00000) == 0x3c800000 /* ... 128bit */
- /* Ignore bits 23 & 24, controlling indexing. */
- || (insn & 0x3a400000) == 0x28000000); /* C3.3.7,14-16 */
- }
- return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask);
-}
-#endif
-
-#elif defined(__s390__)
-
-int cpu_signal_handler(int host_signum, void *pinfo,
- void *puc)
-{
- siginfo_t *info = pinfo;
- ucontext_t *uc = puc;
- unsigned long pc;
- uint16_t *pinsn;
- int is_write = 0;
-
- pc = uc->uc_mcontext.psw.addr;
-
- /*
- * ??? On linux, the non-rt signal handler has 4 (!) arguments instead
- * of the normal 2 arguments. The 4th argument contains the "Translation-
- * Exception Identification for DAT Exceptions" from the hardware (aka
- * "int_parm_long"), which does in fact contain the is_write value.
- * The rt signal handler, as far as I can tell, does not give this value
- * at all. Not that we could get to it from here even if it were.
- * So fall back to parsing instructions. Treat read-modify-write ones as
- * writes, which is not fully correct, but for tracking self-modifying code
- * this is better than treating them as reads. Checking si_addr page flags
- * might be a viable improvement, albeit a racy one.
- */
- /* ??? This is not even close to complete. */
- pinsn = (uint16_t *)pc;
- switch (pinsn[0] >> 8) {
- case 0x50: /* ST */
- case 0x42: /* STC */
- case 0x40: /* STH */
- case 0xba: /* CS */
- case 0xbb: /* CDS */
- is_write = 1;
- break;
- case 0xc4: /* RIL format insns */
- switch (pinsn[0] & 0xf) {
- case 0xf: /* STRL */
- case 0xb: /* STGRL */
- case 0x7: /* STHRL */
- is_write = 1;
- }
- break;
- case 0xc8: /* SSF format insns */
- switch (pinsn[0] & 0xf) {
- case 0x2: /* CSST */
- is_write = 1;
- }
- break;
- case 0xe3: /* RXY format insns */
- switch (pinsn[2] & 0xff) {
- case 0x50: /* STY */
- case 0x24: /* STG */
- case 0x72: /* STCY */
- case 0x70: /* STHY */
- case 0x8e: /* STPQ */
- case 0x3f: /* STRVH */
- case 0x3e: /* STRV */
- case 0x2f: /* STRVG */
- is_write = 1;
- }
- break;
- case 0xeb: /* RSY format insns */
- switch (pinsn[2] & 0xff) {
- case 0x14: /* CSY */
- case 0x30: /* CSG */
- case 0x31: /* CDSY */
- case 0x3e: /* CDSG */
- case 0xe4: /* LANG */
- case 0xe6: /* LAOG */
- case 0xe7: /* LAXG */
- case 0xe8: /* LAAG */
- case 0xea: /* LAALG */
- case 0xf4: /* LAN */
- case 0xf6: /* LAO */
- case 0xf7: /* LAX */
- case 0xfa: /* LAAL */
- case 0xf8: /* LAA */
- is_write = 1;
- }
- break;
- }
-
- return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask);
-}
-
-#elif defined(__mips__)
-
-#if defined(__misp16) || defined(__mips_micromips)
-#error "Unsupported encoding"
-#endif
-
-int cpu_signal_handler(int host_signum, void *pinfo,
- void *puc)
-{
- siginfo_t *info = pinfo;
- ucontext_t *uc = puc;
- uintptr_t pc = uc->uc_mcontext.pc;
- uint32_t insn = *(uint32_t *)pc;
- int is_write = 0;
-
- /* Detect all store instructions at program counter. */
- switch((insn >> 26) & 077) {
- case 050: /* SB */
- case 051: /* SH */
- case 052: /* SWL */
- case 053: /* SW */
- case 054: /* SDL */
- case 055: /* SDR */
- case 056: /* SWR */
- case 070: /* SC */
- case 071: /* SWC1 */
- case 074: /* SCD */
- case 075: /* SDC1 */
- case 077: /* SD */
-#if !defined(__mips_isa_rev) || __mips_isa_rev < 6
- case 072: /* SWC2 */
- case 076: /* SDC2 */
-#endif
- is_write = 1;
- break;
- case 023: /* COP1X */
- /* Required in all versions of MIPS64 since
- MIPS64r1 and subsequent versions of MIPS32r2. */
- switch (insn & 077) {
- case 010: /* SWXC1 */
- case 011: /* SDXC1 */
- case 015: /* SUXC1 */
- is_write = 1;
- }
- break;
- }
-
- return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask);
-}
-
-#elif defined(__riscv)
-
-int cpu_signal_handler(int host_signum, void *pinfo,
- void *puc)
-{
- siginfo_t *info = pinfo;
- ucontext_t *uc = puc;
- greg_t pc = uc->uc_mcontext.__gregs[REG_PC];
- uint32_t insn = *(uint32_t *)pc;
- int is_write = 0;
-
- /* Detect store by reading the instruction at the program
- counter. Note: we currently only generate 32-bit
- instructions so we thus only detect 32-bit stores */
- switch (((insn >> 0) & 0b11)) {
- case 3:
- switch (((insn >> 2) & 0b11111)) {
- case 8:
- switch (((insn >> 12) & 0b111)) {
- case 0: /* sb */
- case 1: /* sh */
- case 2: /* sw */
- case 3: /* sd */
- case 4: /* sq */
- is_write = 1;
- break;
- default:
- break;
- }
- break;
- case 9:
- switch (((insn >> 12) & 0b111)) {
- case 2: /* fsw */
- case 3: /* fsd */
- case 4: /* fsq */
- is_write = 1;
- break;
- default:
- break;
- }
- break;
- default:
- break;
- }
- }
-
- /* Check for compressed instructions */
- switch (((insn >> 13) & 0b111)) {
- case 7:
- switch (insn & 0b11) {
- case 0: /*c.sd */
- case 2: /* c.sdsp */
- is_write = 1;
- break;
- default:
- break;
- }
- break;
- case 6:
- switch (insn & 0b11) {
- case 0: /* c.sw */
- case 3: /* c.swsp */
- is_write = 1;
- break;
- default:
- break;
- }
- break;
- default:
- break;
- }
-
- return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask);
-}
-
-#else
-
-#error host CPU specific signal handler needed
-
-#endif
-
/* The softmmu versions of these helpers are in cputlb.c. */
/*
@@ -901,12 +216,27 @@ static void validate_memop(MemOpIdx oi, MemOp expected)
#endif
}
+void helper_unaligned_ld(CPUArchState *env, target_ulong addr)
+{
+ cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_LOAD, GETPC());
+}
+
+void helper_unaligned_st(CPUArchState *env, target_ulong addr)
+{
+ cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_STORE, GETPC());
+}
+
static void *cpu_mmu_lookup(CPUArchState *env, target_ulong addr,
MemOpIdx oi, uintptr_t ra, MMUAccessType type)
{
+ MemOp mop = get_memop(oi);
+ int a_bits = get_alignment_bits(mop);
void *ret;
- /* TODO: Enforce guest required alignment. */
+ /* Enforce guest required alignment. */
+ if (unlikely(addr & ((1 << a_bits) - 1))) {
+ cpu_loop_exit_sigbus(env_cpu(env), addr, type, ra);
+ }
ret = g2h(env_cpu(env), addr);
set_helper_retaddr(ra);
@@ -1160,11 +490,22 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
MemOpIdx oi, int size, int prot,
uintptr_t retaddr)
{
+ MemOp mop = get_memop(oi);
+ int a_bits = get_alignment_bits(mop);
+ void *ret;
+
+ /* Enforce guest required alignment. */
+ if (unlikely(addr & ((1 << a_bits) - 1))) {
+ MMUAccessType t = prot == PAGE_READ ? MMU_DATA_LOAD : MMU_DATA_STORE;
+ cpu_loop_exit_sigbus(env_cpu(env), addr, t, retaddr);
+ }
+
/* Enforce qemu required alignment. */
if (unlikely(addr & (size - 1))) {
cpu_loop_exit_atomic(env_cpu(env), retaddr);
}
- void *ret = g2h(env_cpu(env), addr);
+
+ ret = g2h(env_cpu(env), addr);
set_helper_retaddr(retaddr);
return ret;
}
diff --git a/configure b/configure
index 039467c04b..d57ad58342 100755
--- a/configure
+++ b/configure
@@ -570,11 +570,7 @@ elif check_define __s390__ ; then
cpu="s390"
fi
elif check_define __riscv ; then
- if check_define _LP64 ; then
- cpu="riscv64"
- else
- cpu="riscv32"
- fi
+ cpu="riscv"
elif check_define __arm__ ; then
cpu="arm"
elif check_define __aarch64__ ; then
@@ -587,7 +583,7 @@ ARCH=
# Normalise host CPU name and set ARCH.
# Note that this case should only have supported host CPUs, not guests.
case "$cpu" in
- ppc|ppc64|s390x|sparc64|x32|riscv32|riscv64)
+ ppc|ppc64|s390x|sparc64|x32|riscv)
;;
ppc64le)
ARCH="ppc64"
diff --git a/dump/dump.c b/dump/dump.c
index ab625909f3..662d0a62cd 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -29,6 +29,7 @@
#include "qemu/error-report.h"
#include "qemu/main-loop.h"
#include "hw/misc/vmcoreinfo.h"
+#include "migration/blocker.h"
#ifdef TARGET_X86_64
#include "win_dump.h"
@@ -47,6 +48,8 @@
#define MAX_GUEST_NOTE_SIZE (1 << 20) /* 1MB should be enough */
+static Error *dump_migration_blocker;
+
#define ELF_NOTE_SIZE(hdr_size, name_size, desc_size) \
((DIV_ROUND_UP((hdr_size), 4) + \
DIV_ROUND_UP((name_size), 4) + \
@@ -101,6 +104,7 @@ static int dump_cleanup(DumpState *s)
qemu_mutex_unlock_iothread();
}
}
+ migrate_del_blocker(dump_migration_blocker);
return 0;
}
@@ -2005,6 +2009,21 @@ void qmp_dump_guest_memory(bool paging, const char *file,
return;
}
+ if (!dump_migration_blocker) {
+ error_setg(&dump_migration_blocker,
+ "Live migration disabled: dump-guest-memory in progress");
+ }
+
+ /*
+ * Allows even for -only-migratable, but forbid migration during the
+ * process of dump guest memory.
+ */
+ if (migrate_add_blocker_internal(dump_migration_blocker, errp)) {
+ /* Remember to release the fd before passing it over to dump state */
+ close(fd);
+ return;
+ }
+
s = &dump_state_global;
dump_state_prepare(s);
diff --git a/hmp-commands.hx b/hmp-commands.hx
index cf723c69ac..3a5aeba3fe 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1737,8 +1737,10 @@ ERST
{
.name = "calc_dirty_rate",
- .args_type = "second:l,sample_pages_per_GB:l?",
- .params = "second [sample_pages_per_GB]",
- .help = "start a round of guest dirty rate measurement",
+ .args_type = "dirty_ring:-r,dirty_bitmap:-b,second:l,sample_pages_per_GB:l?",
+ .params = "[-r] [-b] second [sample_pages_per_GB]",
+ .help = "start a round of guest dirty rate measurement (using -r to"
+ "\n\t\t\t specify dirty ring as the method of calculation and"
+ "\n\t\t\t -b to specify dirty bitmap as method of calculation)",
.cmd = hmp_calc_dirty_rate,
},
diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
index e3d3d5cf89..482be95415 100644
--- a/hw/i386/xen/xen-hvm.c
+++ b/hw/i386/xen/xen-hvm.c
@@ -1613,8 +1613,8 @@ void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length)
void qmp_xen_set_global_dirty_log(bool enable, Error **errp)
{
if (enable) {
- memory_global_dirty_log_start();
+ memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
} else {
- memory_global_dirty_log_stop();
+ memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION);
}
}
diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
index df91e454b2..d5a578142b 100644
--- a/hw/virtio/virtio-mem.c
+++ b/hw/virtio/virtio-mem.c
@@ -228,6 +228,38 @@ static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem,
return ret;
}
+static int virtio_mem_for_each_unplugged_section(const VirtIOMEM *vmem,
+ MemoryRegionSection *s,
+ void *arg,
+ virtio_mem_section_cb cb)
+{
+ unsigned long first_bit, last_bit;
+ uint64_t offset, size;
+ int ret = 0;
+
+ first_bit = s->offset_within_region / vmem->bitmap_size;
+ first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, first_bit);
+ while (first_bit < vmem->bitmap_size) {
+ MemoryRegionSection tmp = *s;
+
+ offset = first_bit * vmem->block_size;
+ last_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
+ first_bit + 1) - 1;
+ size = (last_bit - first_bit + 1) * vmem->block_size;
+
+ if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
+ break;
+ }
+ ret = cb(&tmp, arg);
+ if (ret) {
+ break;
+ }
+ first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
+ last_bit + 2);
+ }
+ return ret;
+}
+
static int virtio_mem_notify_populate_cb(MemoryRegionSection *s, void *arg)
{
RamDiscardListener *rdl = arg;
@@ -744,7 +776,6 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
host_memory_backend_set_mapped(vmem->memdev, true);
vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem));
qemu_register_reset(virtio_mem_system_reset, vmem);
- precopy_add_notifier(&vmem->precopy_notifier);
/*
* Set ourselves as RamDiscardManager before the plug handler maps the
@@ -764,7 +795,6 @@ static void virtio_mem_device_unrealize(DeviceState *dev)
* found via an address space anymore. Unset ourselves.
*/
memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
- precopy_remove_notifier(&vmem->precopy_notifier);
qemu_unregister_reset(virtio_mem_system_reset, vmem);
vmstate_unregister_ram(&vmem->memdev->mr, DEVICE(vmem));
host_memory_backend_set_mapped(vmem->memdev, false);
@@ -1057,43 +1087,11 @@ static void virtio_mem_set_block_size(Object *obj, Visitor *v, const char *name,
vmem->block_size = value;
}
-static int virtio_mem_precopy_exclude_range_cb(const VirtIOMEM *vmem, void *arg,
- uint64_t offset, uint64_t size)
-{
- void * const host = qemu_ram_get_host_addr(vmem->memdev->mr.ram_block);
-
- qemu_guest_free_page_hint(host + offset, size);
- return 0;
-}
-
-static void virtio_mem_precopy_exclude_unplugged(VirtIOMEM *vmem)
-{
- virtio_mem_for_each_unplugged_range(vmem, NULL,
- virtio_mem_precopy_exclude_range_cb);
-}
-
-static int virtio_mem_precopy_notify(NotifierWithReturn *n, void *data)
-{
- VirtIOMEM *vmem = container_of(n, VirtIOMEM, precopy_notifier);
- PrecopyNotifyData *pnd = data;
-
- switch (pnd->reason) {
- case PRECOPY_NOTIFY_AFTER_BITMAP_SYNC:
- virtio_mem_precopy_exclude_unplugged(vmem);
- break;
- default:
- break;
- }
-
- return 0;
-}
-
static void virtio_mem_instance_init(Object *obj)
{
VirtIOMEM *vmem = VIRTIO_MEM(obj);
notifier_list_init(&vmem->size_change_notifiers);
- vmem->precopy_notifier.notify = virtio_mem_precopy_notify;
QLIST_INIT(&vmem->rdl_list);
object_property_add(obj, VIRTIO_MEM_SIZE_PROP, "size", virtio_mem_get_size,
@@ -1170,6 +1168,31 @@ static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm,
virtio_mem_rdm_replay_populated_cb);
}
+static int virtio_mem_rdm_replay_discarded_cb(MemoryRegionSection *s,
+ void *arg)
+{
+ struct VirtIOMEMReplayData *data = arg;
+
+ ((ReplayRamDiscard)data->fn)(s, data->opaque);
+ return 0;
+}
+
+static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm,
+ MemoryRegionSection *s,
+ ReplayRamDiscard replay_fn,
+ void *opaque)
+{
+ const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+ struct VirtIOMEMReplayData data = {
+ .fn = replay_fn,
+ .opaque = opaque,
+ };
+
+ g_assert(s->mr == &vmem->memdev->mr);
+ virtio_mem_for_each_unplugged_section(vmem, s, &data,
+ virtio_mem_rdm_replay_discarded_cb);
+}
+
static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm,
RamDiscardListener *rdl,
MemoryRegionSection *s)
@@ -1234,6 +1257,7 @@ static void virtio_mem_class_init(ObjectClass *klass, void *data)
rdmc->get_min_granularity = virtio_mem_rdm_get_min_granularity;
rdmc->is_populated = virtio_mem_rdm_is_populated;
rdmc->replay_populated = virtio_mem_rdm_replay_populated;
+ rdmc->replay_discarded = virtio_mem_rdm_replay_discarded;
rdmc->register_listener = virtio_mem_rdm_register_listener;
rdmc->unregister_listener = virtio_mem_rdm_unregister_listener;
}
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 9d5987ba04..6bb2a0f7ec 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -664,16 +664,55 @@ static inline tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env,
}
/**
- * cpu_signal_handler
- * @signum: host signal number
- * @pinfo: host siginfo_t
- * @puc: host ucontext_t
+ * adjust_signal_pc:
+ * @pc: raw pc from the host signal ucontext_t.
+ * @is_write: host memory operation was write, or read-modify-write.
*
- * To be called from the SIGBUS and SIGSEGV signal handler to inform the
- * virtual cpu of exceptions. Returns true if the signal was handled by
- * the virtual CPU.
+ * Alter @pc as required for unwinding. Return the type of the
+ * guest memory access -- host reads may be for guest execution.
*/
-int cpu_signal_handler(int signum, void *pinfo, void *puc);
+MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write);
+
+/**
+ * handle_sigsegv_accerr_write:
+ * @cpu: the cpu context
+ * @old_set: the sigset_t from the signal ucontext_t
+ * @host_pc: the host pc, adjusted for the signal
+ * @host_addr: the host address of the fault
+ *
+ * Return true if the write fault has been handled, and should be re-tried.
+ */
+bool handle_sigsegv_accerr_write(CPUState *cpu, sigset_t *old_set,
+ uintptr_t host_pc, abi_ptr guest_addr);
+
+/**
+ * cpu_loop_exit_sigsegv:
+ * @cpu: the cpu context
+ * @addr: the guest address of the fault
+ * @access_type: access was read/write/execute
+ * @maperr: true for invalid page, false for permission fault
+ * @ra: host pc for unwinding
+ *
+ * Use the TCGCPUOps hook to record cpu state, do guest operating system
+ * specific things to raise SIGSEGV, and jump to the main cpu loop.
+ */
+void QEMU_NORETURN cpu_loop_exit_sigsegv(CPUState *cpu, target_ulong addr,
+ MMUAccessType access_type,
+ bool maperr, uintptr_t ra);
+
+/**
+ * cpu_loop_exit_sigbus:
+ * @cpu: the cpu context
+ * @addr: the guest address of the alignment fault
+ * @access_type: access was read/write/execute
+ * @ra: host pc for unwinding
+ *
+ * Use the TCGCPUOps hook to record cpu state, do guest operating system
+ * specific things to raise SIGBUS, and jump to the main cpu loop.
+ */
+void QEMU_NORETURN cpu_loop_exit_sigbus(CPUState *cpu, target_ulong addr,
+ MMUAccessType access_type,
+ uintptr_t ra);
#else
static inline void mmap_lock(void) {}
diff --git a/include/exec/memory.h b/include/exec/memory.h
index a185b6dcb8..20f1b27377 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -61,7 +61,17 @@ static inline void fuzz_dma_read_cb(size_t addr,
}
#endif
-extern bool global_dirty_log;
+/* Possible bits for global_dirty_log_{start|stop} */
+
+/* Dirty tracking enabled because migration is running */
+#define GLOBAL_DIRTY_MIGRATION (1U << 0)
+
+/* Dirty tracking enabled because measuring dirty rate */
+#define GLOBAL_DIRTY_DIRTY_RATE (1U << 1)
+
+#define GLOBAL_DIRTY_MASK (0x3)
+
+extern unsigned int global_dirty_tracking;
typedef struct MemoryRegionOps MemoryRegionOps;
@@ -540,6 +550,7 @@ static inline void ram_discard_listener_init(RamDiscardListener *rdl,
}
typedef int (*ReplayRamPopulate)(MemoryRegionSection *section, void *opaque);
+typedef void (*ReplayRamDiscard)(MemoryRegionSection *section, void *opaque);
/*
* RamDiscardManagerClass:
@@ -629,6 +640,21 @@ struct RamDiscardManagerClass {
ReplayRamPopulate replay_fn, void *opaque);
/**
+ * @replay_discarded:
+ *
+ * Call the #ReplayRamDiscard callback for all discarded parts within the
+ * #MemoryRegionSection via the #RamDiscardManager.
+ *
+ * @rdm: the #RamDiscardManager
+ * @section: the #MemoryRegionSection
+ * @replay_fn: the #ReplayRamDiscard callback
+ * @opaque: pointer to forward to the callback
+ */
+ void (*replay_discarded)(const RamDiscardManager *rdm,
+ MemoryRegionSection *section,
+ ReplayRamDiscard replay_fn, void *opaque);
+
+ /**
* @register_listener:
*
* Register a #RamDiscardListener for the given #MemoryRegionSection and
@@ -672,6 +698,11 @@ int ram_discard_manager_replay_populated(const RamDiscardManager *rdm,
ReplayRamPopulate replay_fn,
void *opaque);
+void ram_discard_manager_replay_discarded(const RamDiscardManager *rdm,
+ MemoryRegionSection *section,
+ ReplayRamDiscard replay_fn,
+ void *opaque);
+
void ram_discard_manager_register_listener(RamDiscardManager *rdm,
RamDiscardListener *rdl,
MemoryRegionSection *section);
@@ -2388,13 +2419,17 @@ void memory_listener_unregister(MemoryListener *listener);
/**
* memory_global_dirty_log_start: begin dirty logging for all regions
+ *
+ * @flags: purpose of starting dirty log, migration or dirty rate
*/
-void memory_global_dirty_log_start(void);
+void memory_global_dirty_log_start(unsigned int flags);
/**
* memory_global_dirty_log_stop: end dirty logging for all regions
+ *
+ * @flags: purpose of stopping dirty log, migration or dirty rate
*/
-void memory_global_dirty_log_stop(void);
+void memory_global_dirty_log_stop(unsigned int flags);
void mtree_info(bool flatview, bool dispatch_tree, bool owner, bool disabled);
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 551876bed0..64fb936c7c 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -26,6 +26,8 @@
#include "exec/ramlist.h"
#include "exec/ramblock.h"
+extern uint64_t total_dirty_pages;
+
/**
* clear_bmap_size: calculate clear bitmap size
*
@@ -369,10 +371,14 @@ static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
qatomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp);
- if (global_dirty_log) {
+ if (global_dirty_tracking) {
qatomic_or(
&blocks[DIRTY_MEMORY_MIGRATION][idx][offset],
temp);
+ if (unlikely(
+ global_dirty_tracking & GLOBAL_DIRTY_DIRTY_RATE)) {
+ total_dirty_pages += ctpopl(temp);
+ }
}
if (tcg_enabled()) {
@@ -392,7 +398,7 @@ static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
} else {
uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE;
- if (!global_dirty_log) {
+ if (!global_dirty_tracking) {
clients &= ~(1 << DIRTY_MEMORY_MIGRATION);
}
@@ -403,6 +409,9 @@ static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
for (i = 0; i < len; i++) {
if (bitmap[i] != 0) {
c = leul_to_cpu(bitmap[i]);
+ if (unlikely(global_dirty_tracking & GLOBAL_DIRTY_DIRTY_RATE)) {
+ total_dirty_pages += ctpopl(c);
+ }
do {
j = ctzl(c);
c &= ~(1ul << j);
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 1a10497af3..e948e81f1a 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -381,6 +381,7 @@ struct CPUState {
struct kvm_run *kvm_run;
struct kvm_dirty_gfn *kvm_dirty_gfns;
uint32_t kvm_fetch_index;
+ uint64_t dirty_pages;
/* Used for events with 'vcpu' and *without* the 'disabled' properties */
DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS);
diff --git a/include/hw/core/tcg-cpu-ops.h b/include/hw/core/tcg-cpu-ops.h
index 6cbe17f2e6..e13898553a 100644
--- a/include/hw/core/tcg-cpu-ops.h
+++ b/include/hw/core/tcg-cpu-ops.h
@@ -35,18 +35,6 @@ struct TCGCPUOps {
void (*cpu_exec_enter)(CPUState *cpu);
/** @cpu_exec_exit: Callback for cpu_exec cleanup */
void (*cpu_exec_exit)(CPUState *cpu);
- /**
- * @tlb_fill: Handle a softmmu tlb miss or user-only address fault
- *
- * For system mode, if the access is valid, call tlb_set_page
- * and return true; if the access is invalid, and probe is
- * true, return false; otherwise raise an exception and do
- * not return. For user-only mode, always raise an exception
- * and do not return.
- */
- bool (*tlb_fill)(CPUState *cpu, vaddr address, int size,
- MMUAccessType access_type, int mmu_idx,
- bool probe, uintptr_t retaddr);
/** @debug_excp_handler: Callback for handling debug exceptions */
void (*debug_excp_handler)(CPUState *cpu);
@@ -69,6 +57,16 @@ struct TCGCPUOps {
/** @cpu_exec_interrupt: Callback for processing interrupts in cpu_exec */
bool (*cpu_exec_interrupt)(CPUState *cpu, int interrupt_request);
/**
+ * @tlb_fill: Handle a softmmu tlb miss
+ *
+ * If the access is valid, call tlb_set_page and return true;
+ * if the access is invalid and probe is true, return false;
+ * otherwise raise an exception and do not return.
+ */
+ bool (*tlb_fill)(CPUState *cpu, vaddr address, int size,
+ MMUAccessType access_type, int mmu_idx,
+ bool probe, uintptr_t retaddr);
+ /**
* @do_transaction_failed: Callback for handling failed memory transactions
* (ie bus faults or external aborts; not MMU faults)
*/
@@ -111,6 +109,55 @@ struct TCGCPUOps {
*/
bool (*io_recompile_replay_branch)(CPUState *cpu,
const TranslationBlock *tb);
+#else
+ /**
+ * record_sigsegv:
+ * @cpu: cpu context
+ * @addr: faulting guest address
+ * @access_type: access was read/write/execute
+ * @maperr: true for invalid page, false for permission fault
+ * @ra: host pc for unwinding
+ *
+ * We are about to raise SIGSEGV with si_code set for @maperr,
+ * and si_addr set for @addr. Record anything further needed
+ * for the signal ucontext_t.
+ *
+ * If the emulated kernel does not provide anything to the signal
+ * handler with anything besides the user context registers, and
+ * the siginfo_t, then this hook need do nothing and may be omitted.
+ * Otherwise, record the data and return; the caller will raise
+ * the signal, unwind the cpu state, and return to the main loop.
+ *
+ * If it is simpler to re-use the sysemu tlb_fill code, @ra is provided
+ * so that a "normal" cpu exception can be raised. In this case,
+ * the signal must be raised by the architecture cpu_loop.
+ */
+ void (*record_sigsegv)(CPUState *cpu, vaddr addr,
+ MMUAccessType access_type,
+ bool maperr, uintptr_t ra);
+ /**
+ * record_sigbus:
+ * @cpu: cpu context
+ * @addr: misaligned guest address
+ * @access_type: access was read/write/execute
+ * @ra: host pc for unwinding
+ *
+ * We are about to raise SIGBUS with si_code BUS_ADRALN,
+ * and si_addr set for @addr. Record anything further needed
+ * for the signal ucontext_t.
+ *
+ * If the emulated kernel does not provide the signal handler with
+ * anything besides the user context registers, and the siginfo_t,
+ * then this hook need do nothing and may be omitted.
+ * Otherwise, record the data and return; the caller will raise
+ * the signal, unwind the cpu state, and return to the main loop.
+ *
+ * If it is simpler to re-use the sysemu do_unaligned_access code,
+ * @ra is provided so that a "normal" cpu exception can be raised.
+ * In this case, the signal must be raised by the architecture cpu_loop.
+ */
+ void (*record_sigbus)(CPUState *cpu, vaddr addr,
+ MMUAccessType access_type, uintptr_t ra);
#endif /* CONFIG_SOFTMMU */
#endif /* NEED_CPU_H */
diff --git a/include/hw/virtio/virtio-mem.h b/include/hw/virtio/virtio-mem.h
index 9a6e348fa2..a5dd6a493b 100644
--- a/include/hw/virtio/virtio-mem.h
+++ b/include/hw/virtio/virtio-mem.h
@@ -65,9 +65,6 @@ struct VirtIOMEM {
/* notifiers to notify when "size" changes */
NotifierList size_change_notifiers;
- /* don't migrate unplugged memory */
- NotifierWithReturn precopy_notifier;
-
/* listeners to notify on plug/unplug activity. */
QLIST_HEAD(, RamDiscardListener) rdl_list;
};
diff --git a/include/migration/blocker.h b/include/migration/blocker.h
index acd27018e9..9cebe2ba06 100644
--- a/include/migration/blocker.h
+++ b/include/migration/blocker.h
@@ -26,6 +26,22 @@
int migrate_add_blocker(Error *reason, Error **errp);
/**
+ * @migrate_add_blocker_internal - prevent migration from proceeding without
+ * only-migrate implications
+ *
+ * @reason - an error to be returned whenever migration is attempted
+ *
+ * @errp - [out] The reason (if any) we cannot block migration right now.
+ *
+ * @returns - 0 on success, -EBUSY on failure, with errp set.
+ *
+ * Some of the migration blockers can be temporary (e.g., for a few seconds),
+ * so it shouldn't need to conflict with "-only-migratable". For those cases,
+ * we can call this function rather than @migrate_add_blocker().
+ */
+int migrate_add_blocker_internal(Error *reason, Error **errp);
+
+/**
* @migrate_del_blocker - remove a blocking error from migration
*
* @reason - the error blocking migration
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index a1ab1ee12d..7b22aeb6ae 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -547,4 +547,5 @@ bool kvm_cpu_check_are_resettable(void);
bool kvm_arch_cpu_check_are_resettable(void);
+bool kvm_dirty_ring_enabled(void);
#endif
diff --git a/include/tcg/tcg-ldst.h b/include/tcg/tcg-ldst.h
index 8c86365611..bf40942de4 100644
--- a/include/tcg/tcg-ldst.h
+++ b/include/tcg/tcg-ldst.h
@@ -70,5 +70,10 @@ void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
MemOpIdx oi, uintptr_t retaddr);
+#else
+
+void QEMU_NORETURN helper_unaligned_ld(CPUArchState *env, target_ulong addr);
+void QEMU_NORETURN helper_unaligned_st(CPUArchState *env, target_ulong addr);
+
#endif /* CONFIG_SOFTMMU */
#endif /* TCG_LDST_H */
diff --git a/linux-user/aarch64/cpu_loop.c b/linux-user/aarch64/cpu_loop.c
index 034b737435..97e0728b67 100644
--- a/linux-user/aarch64/cpu_loop.c
+++ b/linux-user/aarch64/cpu_loop.c
@@ -79,7 +79,7 @@
void cpu_loop(CPUARMState *env)
{
CPUState *cs = env_cpu(env);
- int trapnr, ec, fsc, si_code;
+ int trapnr, ec, fsc, si_code, si_signo;
abi_long ret;
for (;;) {
@@ -121,20 +121,26 @@ void cpu_loop(CPUARMState *env)
fsc = extract32(env->exception.syndrome, 0, 6);
switch (fsc) {
case 0x04 ... 0x07: /* Translation fault, level {0-3} */
+ si_signo = TARGET_SIGSEGV;
si_code = TARGET_SEGV_MAPERR;
break;
case 0x09 ... 0x0b: /* Access flag fault, level {1-3} */
case 0x0d ... 0x0f: /* Permission fault, level {1-3} */
+ si_signo = TARGET_SIGSEGV;
si_code = TARGET_SEGV_ACCERR;
break;
case 0x11: /* Synchronous Tag Check Fault */
+ si_signo = TARGET_SIGSEGV;
si_code = TARGET_SEGV_MTESERR;
break;
+ case 0x21: /* Alignment fault */
+ si_signo = TARGET_SIGBUS;
+ si_code = TARGET_BUS_ADRALN;
+ break;
default:
g_assert_not_reached();
}
-
- force_sig_fault(TARGET_SIGSEGV, si_code, env->exception.vaddress);
+ force_sig_fault(si_signo, si_code, env->exception.vaddress);
break;
case EXCP_DEBUG:
case EXCP_BKPT:
diff --git a/linux-user/alpha/cpu_loop.c b/linux-user/alpha/cpu_loop.c
index 1b00a81385..4029849d5c 100644
--- a/linux-user/alpha/cpu_loop.c
+++ b/linux-user/alpha/cpu_loop.c
@@ -54,21 +54,6 @@ void cpu_loop(CPUAlphaState *env)
fprintf(stderr, "External interrupt. Exit\n");
exit(EXIT_FAILURE);
break;
- case EXCP_MMFAULT:
- info.si_signo = TARGET_SIGSEGV;
- info.si_errno = 0;
- info.si_code = (page_get_flags(env->trap_arg0) & PAGE_VALID
- ? TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR);
- info._sifields._sigfault._addr = env->trap_arg0;
- queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
- break;
- case EXCP_UNALIGN:
- info.si_signo = TARGET_SIGBUS;
- info.si_errno = 0;
- info.si_code = TARGET_BUS_ADRALN;
- info._sifields._sigfault._addr = env->trap_arg0;
- queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
- break;
case EXCP_OPCDEC:
do_sigill:
info.si_signo = TARGET_SIGILL;
diff --git a/linux-user/arm/cpu_loop.c b/linux-user/arm/cpu_loop.c
index ae09adcb95..01cb6eb534 100644
--- a/linux-user/arm/cpu_loop.c
+++ b/linux-user/arm/cpu_loop.c
@@ -25,6 +25,7 @@
#include "cpu_loop-common.h"
#include "signal-common.h"
#include "semihosting/common-semi.h"
+#include "target/arm/syndrome.h"
#define get_user_code_u32(x, gaddr, env) \
({ abi_long __r = get_user_u32((x), (gaddr)); \
@@ -280,7 +281,7 @@ static bool emulate_arm_fpa11(CPUARMState *env, uint32_t opcode)
void cpu_loop(CPUARMState *env)
{
CPUState *cs = env_cpu(env);
- int trapnr;
+ int trapnr, si_signo, si_code;
unsigned int n, insn;
abi_ulong ret;
@@ -423,9 +424,30 @@ void cpu_loop(CPUARMState *env)
break;
case EXCP_PREFETCH_ABORT:
case EXCP_DATA_ABORT:
- /* XXX: check env->error_code */
- force_sig_fault(TARGET_SIGSEGV, TARGET_SEGV_MAPERR,
- env->exception.vaddress);
+ /* For user-only we don't set TTBCR_EAE, so look at the FSR. */
+ switch (env->exception.fsr & 0x1f) {
+ case 0x1: /* Alignment */
+ si_signo = TARGET_SIGBUS;
+ si_code = TARGET_BUS_ADRALN;
+ break;
+ case 0x3: /* Access flag fault, level 1 */
+ case 0x6: /* Access flag fault, level 2 */
+ case 0x9: /* Domain fault, level 1 */
+ case 0xb: /* Domain fault, level 2 */
+ case 0xd: /* Permision fault, level 1 */
+ case 0xf: /* Permision fault, level 2 */
+ si_signo = TARGET_SIGSEGV;
+ si_code = TARGET_SEGV_ACCERR;
+ break;
+ case 0x5: /* Translation fault, level 1 */
+ case 0x7: /* Translation fault, level 2 */
+ si_signo = TARGET_SIGSEGV;
+ si_code = TARGET_SEGV_MAPERR;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ force_sig_fault(si_signo, si_code, env->exception.vaddress);
break;
case EXCP_DEBUG:
case EXCP_BKPT:
diff --git a/linux-user/cris/cpu_loop.c b/linux-user/cris/cpu_loop.c
index b9085619c4..0d5d268609 100644
--- a/linux-user/cris/cpu_loop.c
+++ b/linux-user/cris/cpu_loop.c
@@ -37,16 +37,6 @@ void cpu_loop(CPUCRISState *env)
process_queued_cpu_work(cs);
switch (trapnr) {
- case 0xaa:
- {
- info.si_signo = TARGET_SIGSEGV;
- info.si_errno = 0;
- /* XXX: check env->error_code */
- info.si_code = TARGET_SEGV_MAPERR;
- info._sifields._sigfault._addr = env->pregs[PR_EDA];
- queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
- }
- break;
case EXCP_INTERRUPT:
/* just indicate that signals should be handled asap */
break;
diff --git a/linux-user/hexagon/cpu_loop.c b/linux-user/hexagon/cpu_loop.c
index bee2a9e4ea..6b24cbaba9 100644
--- a/linux-user/hexagon/cpu_loop.c
+++ b/linux-user/hexagon/cpu_loop.c
@@ -28,8 +28,7 @@
void cpu_loop(CPUHexagonState *env)
{
CPUState *cs = env_cpu(env);
- int trapnr, signum, sigcode;
- target_ulong sigaddr;
+ int trapnr;
target_ulong syscallnum;
target_ulong ret;
@@ -39,10 +38,6 @@ void cpu_loop(CPUHexagonState *env)
cpu_exec_end(cs);
process_queued_cpu_work(cs);
- signum = 0;
- sigcode = 0;
- sigaddr = 0;
-
switch (trapnr) {
case EXCP_INTERRUPT:
/* just indicate that signals should be handled asap */
@@ -65,12 +60,6 @@ void cpu_loop(CPUHexagonState *env)
env->gpr[0] = ret;
}
break;
- case HEX_EXCP_FETCH_NO_UPAGE:
- case HEX_EXCP_PRIV_NO_UREAD:
- case HEX_EXCP_PRIV_NO_UWRITE:
- signum = TARGET_SIGSEGV;
- sigcode = TARGET_SEGV_MAPERR;
- break;
case EXCP_ATOMIC:
cpu_exec_step_atomic(cs);
break;
@@ -79,17 +68,6 @@ void cpu_loop(CPUHexagonState *env)
trapnr);
exit(EXIT_FAILURE);
}
-
- if (signum) {
- target_siginfo_t info = {
- .si_signo = signum,
- .si_errno = 0,
- .si_code = sigcode,
- ._sifields._sigfault._addr = sigaddr
- };
- queue_signal(env, info.si_signo, QEMU_SI_KILL, &info);
- }
-
process_pending_signals(env);
}
}
diff --git a/linux-user/host/aarch64/host-signal.h b/linux-user/host/aarch64/host-signal.h
new file mode 100644
index 0000000000..0c0b08383a
--- /dev/null
+++ b/linux-user/host/aarch64/host-signal.h
@@ -0,0 +1,74 @@
+/*
+ * host-signal.h: signal info dependent on the host architecture
+ *
+ * Copyright (c) 2003-2005 Fabrice Bellard
+ * Copyright (c) 2021 Linaro Limited
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef AARCH64_HOST_SIGNAL_H
+#define AARCH64_HOST_SIGNAL_H
+
+/* Pre-3.16 kernel headers don't have these, so provide fallback definitions */
+#ifndef ESR_MAGIC
+#define ESR_MAGIC 0x45535201
+struct esr_context {
+ struct _aarch64_ctx head;
+ uint64_t esr;
+};
+#endif
+
+static inline struct _aarch64_ctx *first_ctx(ucontext_t *uc)
+{
+ return (struct _aarch64_ctx *)&uc->uc_mcontext.__reserved;
+}
+
+static inline struct _aarch64_ctx *next_ctx(struct _aarch64_ctx *hdr)
+{
+ return (struct _aarch64_ctx *)((char *)hdr + hdr->size);
+}
+
+static inline uintptr_t host_signal_pc(ucontext_t *uc)
+{
+ return uc->uc_mcontext.pc;
+}
+
+static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
+{
+ struct _aarch64_ctx *hdr;
+ uint32_t insn;
+
+ /* Find the esr_context, which has the WnR bit in it */
+ for (hdr = first_ctx(uc); hdr->magic; hdr = next_ctx(hdr)) {
+ if (hdr->magic == ESR_MAGIC) {
+ struct esr_context const *ec = (struct esr_context const *)hdr;
+ uint64_t esr = ec->esr;
+
+ /* For data aborts ESR.EC is 0b10010x: then bit 6 is the WnR bit */
+ return extract32(esr, 27, 5) == 0x12 && extract32(esr, 6, 1) == 1;
+ }
+ }
+
+ /*
+ * Fall back to parsing instructions; will only be needed
+ * for really ancient (pre-3.16) kernels.
+ */
+ insn = *(uint32_t *)host_signal_pc(uc);
+
+ return (insn & 0xbfff0000) == 0x0c000000 /* C3.3.1 */
+ || (insn & 0xbfe00000) == 0x0c800000 /* C3.3.2 */
+ || (insn & 0xbfdf0000) == 0x0d000000 /* C3.3.3 */
+ || (insn & 0xbfc00000) == 0x0d800000 /* C3.3.4 */
+ || (insn & 0x3f400000) == 0x08000000 /* C3.3.6 */
+ || (insn & 0x3bc00000) == 0x39000000 /* C3.3.13 */
+ || (insn & 0x3fc00000) == 0x3d800000 /* ... 128bit */
+ /* Ignore bits 10, 11 & 21, controlling indexing. */
+ || (insn & 0x3bc00000) == 0x38000000 /* C3.3.8-12 */
+ || (insn & 0x3fe00000) == 0x3c800000 /* ... 128bit */
+ /* Ignore bits 23 & 24, controlling indexing. */
+ || (insn & 0x3a400000) == 0x28000000; /* C3.3.7,14-16 */
+}
+
+#endif
diff --git a/linux-user/host/alpha/host-signal.h b/linux-user/host/alpha/host-signal.h
new file mode 100644
index 0000000000..e080be412f
--- /dev/null
+++ b/linux-user/host/alpha/host-signal.h
@@ -0,0 +1,42 @@
+/*
+ * host-signal.h: signal info dependent on the host architecture
+ *
+ * Copyright (c) 2003-2005 Fabrice Bellard
+ * Copyright (c) 2021 Linaro Limited
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef ALPHA_HOST_SIGNAL_H
+#define ALPHA_HOST_SIGNAL_H
+
+static inline uintptr_t host_signal_pc(ucontext_t *uc)
+{
+ return uc->uc_mcontext.sc_pc;
+}
+
+static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
+{
+ uint32_t *pc = (uint32_t *)host_signal_pc(uc);
+ uint32_t insn = *pc;
+
+ /* XXX: need kernel patch to get write flag faster */
+ switch (insn >> 26) {
+ case 0x0d: /* stw */
+ case 0x0e: /* stb */
+ case 0x0f: /* stq_u */
+ case 0x24: /* stf */
+ case 0x25: /* stg */
+ case 0x26: /* sts */
+ case 0x27: /* stt */
+ case 0x2c: /* stl */
+ case 0x2d: /* stq */
+ case 0x2e: /* stl_c */
+ case 0x2f: /* stq_c */
+ return true;
+ }
+ return false;
+}
+
+#endif
diff --git a/linux-user/host/arm/host-signal.h b/linux-user/host/arm/host-signal.h
new file mode 100644
index 0000000000..efb165c0c5
--- /dev/null
+++ b/linux-user/host/arm/host-signal.h
@@ -0,0 +1,30 @@
+/*
+ * host-signal.h: signal info dependent on the host architecture
+ *
+ * Copyright (c) 2003-2005 Fabrice Bellard
+ * Copyright (c) 2021 Linaro Limited
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef ARM_HOST_SIGNAL_H
+#define ARM_HOST_SIGNAL_H
+
+static inline uintptr_t host_signal_pc(ucontext_t *uc)
+{
+ return uc->uc_mcontext.arm_pc;
+}
+
+static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
+{
+ /*
+ * In the FSR, bit 11 is WnR, assuming a v6 or
+ * later processor. On v5 we will always report
+ * this as a read, which will fail later.
+ */
+ uint32_t fsr = uc->uc_mcontext.error_code;
+ return extract32(fsr, 11, 1);
+}
+
+#endif
diff --git a/linux-user/host/i386/host-signal.h b/linux-user/host/i386/host-signal.h
new file mode 100644
index 0000000000..4c8eef99ce
--- /dev/null
+++ b/linux-user/host/i386/host-signal.h
@@ -0,0 +1,25 @@
+/*
+ * host-signal.h: signal info dependent on the host architecture
+ *
+ * Copyright (c) 2003-2005 Fabrice Bellard
+ * Copyright (c) 2021 Linaro Limited
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef I386_HOST_SIGNAL_H
+#define I386_HOST_SIGNAL_H
+
+static inline uintptr_t host_signal_pc(ucontext_t *uc)
+{
+ return uc->uc_mcontext.gregs[REG_EIP];
+}
+
+static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
+{
+ return uc->uc_mcontext.gregs[REG_TRAPNO] == 0xe
+ && (uc->uc_mcontext.gregs[REG_ERR] & 0x2);
+}
+
+#endif
diff --git a/linux-user/host/mips/host-signal.h b/linux-user/host/mips/host-signal.h
new file mode 100644
index 0000000000..ef341f7c20
--- /dev/null
+++ b/linux-user/host/mips/host-signal.h
@@ -0,0 +1,62 @@
+/*
+ * host-signal.h: signal info dependent on the host architecture
+ *
+ * Copyright (c) 2003-2005 Fabrice Bellard
+ * Copyright (c) 2021 Linaro Limited
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef MIPS_HOST_SIGNAL_H
+#define MIPS_HOST_SIGNAL_H
+
+static inline uintptr_t host_signal_pc(ucontext_t *uc)
+{
+ return uc->uc_mcontext.pc;
+}
+
+#if defined(__misp16) || defined(__mips_micromips)
+#error "Unsupported encoding"
+#endif
+
+static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
+{
+ uint32_t insn = *(uint32_t *)host_signal_pc(uc);
+
+ /* Detect all store instructions at program counter. */
+ switch ((insn >> 26) & 077) {
+ case 050: /* SB */
+ case 051: /* SH */
+ case 052: /* SWL */
+ case 053: /* SW */
+ case 054: /* SDL */
+ case 055: /* SDR */
+ case 056: /* SWR */
+ case 070: /* SC */
+ case 071: /* SWC1 */
+ case 074: /* SCD */
+ case 075: /* SDC1 */
+ case 077: /* SD */
+#if !defined(__mips_isa_rev) || __mips_isa_rev < 6
+ case 072: /* SWC2 */
+ case 076: /* SDC2 */
+#endif
+ return true;
+ case 023: /* COP1X */
+ /*
+ * Required in all versions of MIPS64 since
+ * MIPS64r1 and subsequent versions of MIPS32r2.
+ */
+ switch (insn & 077) {
+ case 010: /* SWXC1 */
+ case 011: /* SDXC1 */
+ case 015: /* SUXC1 */
+ return true;
+ }
+ break;
+ }
+ return false;
+}
+
+#endif
diff --git a/linux-user/host/ppc/host-signal.h b/linux-user/host/ppc/host-signal.h
new file mode 100644
index 0000000000..a491c413dc
--- /dev/null
+++ b/linux-user/host/ppc/host-signal.h
@@ -0,0 +1,25 @@
+/*
+ * host-signal.h: signal info dependent on the host architecture
+ *
+ * Copyright (c) 2003-2005 Fabrice Bellard
+ * Copyright (c) 2021 Linaro Limited
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef PPC_HOST_SIGNAL_H
+#define PPC_HOST_SIGNAL_H
+
+static inline uintptr_t host_signal_pc(ucontext_t *uc)
+{
+ return uc->uc_mcontext.regs->nip;
+}
+
+static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
+{
+ return uc->uc_mcontext.regs->trap != 0x400
+ && (uc->uc_mcontext.regs->dsisr & 0x02000000);
+}
+
+#endif
diff --git a/linux-user/host/ppc64/host-signal.h b/linux-user/host/ppc64/host-signal.h
new file mode 100644
index 0000000000..a353c22a90
--- /dev/null
+++ b/linux-user/host/ppc64/host-signal.h
@@ -0,0 +1 @@
+#include "../ppc/host-signal.h"
diff --git a/linux-user/host/riscv/host-signal.h b/linux-user/host/riscv/host-signal.h
new file mode 100644
index 0000000000..3b168cb58b
--- /dev/null
+++ b/linux-user/host/riscv/host-signal.h
@@ -0,0 +1,58 @@
+/*
+ * host-signal.h: signal info dependent on the host architecture
+ *
+ * Copyright (c) 2003-2005 Fabrice Bellard
+ * Copyright (c) 2021 Linaro Limited
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef RISCV_HOST_SIGNAL_H
+#define RISCV_HOST_SIGNAL_H
+
+static inline uintptr_t host_signal_pc(ucontext_t *uc)
+{
+ return uc->uc_mcontext.__gregs[REG_PC];
+}
+
+static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
+{
+ /*
+ * Detect store by reading the instruction at the program counter.
+ * Do not read more than 16 bits, because we have not yet determined
+ * the size of the instruction.
+ */
+ const uint16_t *pinsn = (const uint16_t *)host_signal_pc(uc);
+ uint16_t insn = pinsn[0];
+
+ /* 16-bit instructions */
+ switch (insn & 0xe003) {
+ case 0xa000: /* c.fsd */
+ case 0xc000: /* c.sw */
+ case 0xe000: /* c.sd (rv64) / c.fsw (rv32) */
+ case 0xa002: /* c.fsdsp */
+ case 0xc002: /* c.swsp */
+ case 0xe002: /* c.sdsp (rv64) / c.fswsp (rv32) */
+ return true;
+ }
+
+ /* 32-bit instructions, major opcodes */
+ switch (insn & 0x7f) {
+ case 0x23: /* store */
+ case 0x27: /* store-fp */
+ return true;
+ case 0x2f: /* amo */
+ /*
+ * The AMO function code is in bits 25-31, unread as yet.
+ * The AMO functions are LR (read), SC (write), and the
+ * rest are all read-modify-write.
+ */
+ insn = pinsn[1];
+ return (insn >> 11) != 2; /* LR */
+ }
+
+ return false;
+}
+
+#endif
diff --git a/linux-user/host/riscv64/hostdep.h b/linux-user/host/riscv/hostdep.h
index 865f0fb9ff..2ba07456ae 100644
--- a/linux-user/host/riscv64/hostdep.h
+++ b/linux-user/host/riscv/hostdep.h
@@ -5,8 +5,8 @@
* See the COPYING file in the top-level directory.
*/
-#ifndef RISCV64_HOSTDEP_H
-#define RISCV64_HOSTDEP_H
+#ifndef RISCV_HOSTDEP_H
+#define RISCV_HOSTDEP_H
/* We have a safe-syscall.inc.S */
#define HAVE_SAFE_SYSCALL
diff --git a/linux-user/host/riscv64/safe-syscall.inc.S b/linux-user/host/riscv/safe-syscall.inc.S
index 9ca3fbfd1e..9ca3fbfd1e 100644
--- a/linux-user/host/riscv64/safe-syscall.inc.S
+++ b/linux-user/host/riscv/safe-syscall.inc.S
diff --git a/linux-user/host/riscv32/hostdep.h b/linux-user/host/riscv32/hostdep.h
deleted file mode 100644
index adf9edbf2d..0000000000
--- a/linux-user/host/riscv32/hostdep.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/*
- * hostdep.h : things which are dependent on the host architecture
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#ifndef RISCV32_HOSTDEP_H
-#define RISCV32_HOSTDEP_H
-
-#endif
diff --git a/linux-user/host/s390/host-signal.h b/linux-user/host/s390/host-signal.h
new file mode 100644
index 0000000000..26990e4893
--- /dev/null
+++ b/linux-user/host/s390/host-signal.h
@@ -0,0 +1,93 @@
+/*
+ * host-signal.h: signal info dependent on the host architecture
+ *
+ * Copyright (c) 2003-2005 Fabrice Bellard
+ * Copyright (c) 2021 Linaro Limited
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef S390_HOST_SIGNAL_H
+#define S390_HOST_SIGNAL_H
+
+static inline uintptr_t host_signal_pc(ucontext_t *uc)
+{
+ return uc->uc_mcontext.psw.addr;
+}
+
+static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
+{
+ uint16_t *pinsn = (uint16_t *)host_signal_pc(uc);
+
+ /*
+ * ??? On linux, the non-rt signal handler has 4 (!) arguments instead
+ * of the normal 2 arguments. The 4th argument contains the "Translation-
+ * Exception Identification for DAT Exceptions" from the hardware (aka
+ * "int_parm_long"), which does in fact contain the is_write value.
+ * The rt signal handler, as far as I can tell, does not give this value
+ * at all. Not that we could get to it from here even if it were.
+ * So fall back to parsing instructions. Treat read-modify-write ones as
+ * writes, which is not fully correct, but for tracking self-modifying code
+ * this is better than treating them as reads. Checking si_addr page flags
+ * might be a viable improvement, albeit a racy one.
+ */
+ /* ??? This is not even close to complete. */
+ switch (pinsn[0] >> 8) {
+ case 0x50: /* ST */
+ case 0x42: /* STC */
+ case 0x40: /* STH */
+ case 0xba: /* CS */
+ case 0xbb: /* CDS */
+ return true;
+ case 0xc4: /* RIL format insns */
+ switch (pinsn[0] & 0xf) {
+ case 0xf: /* STRL */
+ case 0xb: /* STGRL */
+ case 0x7: /* STHRL */
+ return true;
+ }
+ break;
+ case 0xc8: /* SSF format insns */
+ switch (pinsn[0] & 0xf) {
+ case 0x2: /* CSST */
+ return true;
+ }
+ break;
+ case 0xe3: /* RXY format insns */
+ switch (pinsn[2] & 0xff) {
+ case 0x50: /* STY */
+ case 0x24: /* STG */
+ case 0x72: /* STCY */
+ case 0x70: /* STHY */
+ case 0x8e: /* STPQ */
+ case 0x3f: /* STRVH */
+ case 0x3e: /* STRV */
+ case 0x2f: /* STRVG */
+ return true;
+ }
+ break;
+ case 0xeb: /* RSY format insns */
+ switch (pinsn[2] & 0xff) {
+ case 0x14: /* CSY */
+ case 0x30: /* CSG */
+ case 0x31: /* CDSY */
+ case 0x3e: /* CDSG */
+ case 0xe4: /* LANG */
+ case 0xe6: /* LAOG */
+ case 0xe7: /* LAXG */
+ case 0xe8: /* LAAG */
+ case 0xea: /* LAALG */
+ case 0xf4: /* LAN */
+ case 0xf6: /* LAO */
+ case 0xf7: /* LAX */
+ case 0xfa: /* LAAL */
+ case 0xf8: /* LAA */
+ return true;
+ }
+ break;
+ }
+ return false;
+}
+
+#endif
diff --git a/linux-user/host/s390x/host-signal.h b/linux-user/host/s390x/host-signal.h
new file mode 100644
index 0000000000..0e83f9358d
--- /dev/null
+++ b/linux-user/host/s390x/host-signal.h
@@ -0,0 +1 @@
+#include "../s390/host-signal.h"
diff --git a/linux-user/host/sparc/host-signal.h b/linux-user/host/sparc/host-signal.h
new file mode 100644
index 0000000000..5e71d33f8e
--- /dev/null
+++ b/linux-user/host/sparc/host-signal.h
@@ -0,0 +1,54 @@
+/*
+ * host-signal.h: signal info dependent on the host architecture
+ *
+ * Copyright (c) 2003-2005 Fabrice Bellard
+ * Copyright (c) 2021 Linaro Limited
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef SPARC_HOST_SIGNAL_H
+#define SPARC_HOST_SIGNAL_H
+
+static inline uintptr_t host_signal_pc(ucontext_t *uc)
+{
+#ifdef __arch64__
+ return uc->uc_mcontext.mc_gregs[MC_PC];
+#else
+ return uc->uc_mcontext.gregs[REG_PC];
+#endif
+}
+
+static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
+{
+ uint32_t insn = *(uint32_t *)host_signal_pc(uc);
+
+ if ((insn >> 30) == 3) {
+ switch ((insn >> 19) & 0x3f) {
+ case 0x05: /* stb */
+ case 0x15: /* stba */
+ case 0x06: /* sth */
+ case 0x16: /* stha */
+ case 0x04: /* st */
+ case 0x14: /* sta */
+ case 0x07: /* std */
+ case 0x17: /* stda */
+ case 0x0e: /* stx */
+ case 0x1e: /* stxa */
+ case 0x24: /* stf */
+ case 0x34: /* stfa */
+ case 0x27: /* stdf */
+ case 0x37: /* stdfa */
+ case 0x26: /* stqf */
+ case 0x36: /* stqfa */
+ case 0x25: /* stfsr */
+ case 0x3c: /* casa */
+ case 0x3e: /* casxa */
+ return true;
+ }
+ }
+ return false;
+}
+
+#endif
diff --git a/linux-user/host/sparc64/host-signal.h b/linux-user/host/sparc64/host-signal.h
new file mode 100644
index 0000000000..1191fe2d40
--- /dev/null
+++ b/linux-user/host/sparc64/host-signal.h
@@ -0,0 +1 @@
+#include "../sparc/host-signal.h"
diff --git a/linux-user/host/x32/host-signal.h b/linux-user/host/x32/host-signal.h
new file mode 100644
index 0000000000..26800591d3
--- /dev/null
+++ b/linux-user/host/x32/host-signal.h
@@ -0,0 +1 @@
+#include "../x86_64/host-signal.h"
diff --git a/linux-user/host/x86_64/host-signal.h b/linux-user/host/x86_64/host-signal.h
new file mode 100644
index 0000000000..883d2fcf65
--- /dev/null
+++ b/linux-user/host/x86_64/host-signal.h
@@ -0,0 +1,24 @@
+/*
+ * host-signal.h: signal info dependent on the host architecture
+ *
+ * Copyright (C) 2021 Linaro Limited
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef X86_64_HOST_SIGNAL_H
+#define X86_64_HOST_SIGNAL_H
+
+static inline uintptr_t host_signal_pc(ucontext_t *uc)
+{
+ return uc->uc_mcontext.gregs[REG_RIP];
+}
+
+static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
+{
+ return uc->uc_mcontext.gregs[REG_TRAPNO] == 0xe
+ && (uc->uc_mcontext.gregs[REG_ERR] & 0x2);
+}
+
+#endif
diff --git a/linux-user/hppa/cpu_loop.c b/linux-user/hppa/cpu_loop.c
index 81607a9b27..375576c8f0 100644
--- a/linux-user/hppa/cpu_loop.c
+++ b/linux-user/hppa/cpu_loop.c
@@ -144,29 +144,6 @@ void cpu_loop(CPUHPPAState *env)
env->iaoq_f = env->gr[31];
env->iaoq_b = env->gr[31] + 4;
break;
- case EXCP_ITLB_MISS:
- case EXCP_DTLB_MISS:
- case EXCP_NA_ITLB_MISS:
- case EXCP_NA_DTLB_MISS:
- case EXCP_IMP:
- case EXCP_DMP:
- case EXCP_DMB:
- case EXCP_PAGE_REF:
- case EXCP_DMAR:
- case EXCP_DMPI:
- info.si_signo = TARGET_SIGSEGV;
- info.si_errno = 0;
- info.si_code = TARGET_SEGV_ACCERR;
- info._sifields._sigfault._addr = env->cr[CR_IOR];
- queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
- break;
- case EXCP_UNALIGN:
- info.si_signo = TARGET_SIGBUS;
- info.si_errno = 0;
- info.si_code = 0;
- info._sifields._sigfault._addr = env->cr[CR_IOR];
- queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
- break;
case EXCP_ILL:
case EXCP_PRIV_OPR:
case EXCP_PRIV_REG:
diff --git a/linux-user/m68k/cpu_loop.c b/linux-user/m68k/cpu_loop.c
index ebf32be78f..790bd558c3 100644
--- a/linux-user/m68k/cpu_loop.c
+++ b/linux-user/m68k/cpu_loop.c
@@ -90,16 +90,6 @@ void cpu_loop(CPUM68KState *env)
case EXCP_INTERRUPT:
/* just indicate that signals should be handled asap */
break;
- case EXCP_ACCESS:
- {
- info.si_signo = TARGET_SIGSEGV;
- info.si_errno = 0;
- /* XXX: check env->error_code */
- info.si_code = TARGET_SEGV_MAPERR;
- info._sifields._sigfault._addr = env->mmu.ar;
- queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
- }
- break;
case EXCP_DEBUG:
info.si_signo = TARGET_SIGTRAP;
info.si_errno = 0;
diff --git a/linux-user/microblaze/cpu_loop.c b/linux-user/microblaze/cpu_loop.c
index 52222eb93f..a94467dd2d 100644
--- a/linux-user/microblaze/cpu_loop.c
+++ b/linux-user/microblaze/cpu_loop.c
@@ -37,16 +37,6 @@ void cpu_loop(CPUMBState *env)
process_queued_cpu_work(cs);
switch (trapnr) {
- case 0xaa:
- {
- info.si_signo = TARGET_SIGSEGV;
- info.si_errno = 0;
- /* XXX: check env->error_code */
- info.si_code = TARGET_SEGV_MAPERR;
- info._sifields._sigfault._addr = 0;
- queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
- }
- break;
case EXCP_INTERRUPT:
/* just indicate that signals should be handled asap */
break;
diff --git a/linux-user/mips/cpu_loop.c b/linux-user/mips/cpu_loop.c
index cb03fb066b..b735c99a24 100644
--- a/linux-user/mips/cpu_loop.c
+++ b/linux-user/mips/cpu_loop.c
@@ -158,17 +158,6 @@ done_syscall:
}
env->active_tc.gpr[2] = ret;
break;
- case EXCP_TLBL:
- case EXCP_TLBS:
- case EXCP_AdEL:
- case EXCP_AdES:
- info.si_signo = TARGET_SIGSEGV;
- info.si_errno = 0;
- /* XXX: check env->error_code */
- info.si_code = TARGET_SEGV_MAPERR;
- info._sifields._sigfault._addr = env->CP0_BadVAddr;
- queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
- break;
case EXCP_CpU:
case EXCP_RI:
info.si_signo = TARGET_SIGILL;
diff --git a/linux-user/openrisc/cpu_loop.c b/linux-user/openrisc/cpu_loop.c
index f6360db47c..3cfdbbf037 100644
--- a/linux-user/openrisc/cpu_loop.c
+++ b/linux-user/openrisc/cpu_loop.c
@@ -54,15 +54,6 @@ void cpu_loop(CPUOpenRISCState *env)
cpu_set_gpr(env, 11, ret);
}
break;
- case EXCP_DPF:
- case EXCP_IPF:
- case EXCP_RANGE:
- info.si_signo = TARGET_SIGSEGV;
- info.si_errno = 0;
- info.si_code = TARGET_SEGV_MAPERR;
- info._sifields._sigfault._addr = env->pc;
- queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
- break;
case EXCP_ALIGN:
info.si_signo = TARGET_SIGBUS;
info.si_errno = 0;
@@ -77,13 +68,6 @@ void cpu_loop(CPUOpenRISCState *env)
info._sifields._sigfault._addr = env->pc;
queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
break;
- case EXCP_FPE:
- info.si_signo = TARGET_SIGFPE;
- info.si_errno = 0;
- info.si_code = 0;
- info._sifields._sigfault._addr = env->pc;
- queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
- break;
case EXCP_INTERRUPT:
/* We processed the pending cpu work above. */
break;
@@ -96,6 +80,15 @@ void cpu_loop(CPUOpenRISCState *env)
case EXCP_ATOMIC:
cpu_exec_step_atomic(cs);
break;
+ case EXCP_RANGE:
+ /* Requires SR.OVE set, which linux-user won't do. */
+ cpu_abort(cs, "Unexpected RANGE exception");
+ case EXCP_FPE:
+ /*
+ * Requires FPSCR.FPEE set. Writes to FPSCR from usermode not
+ * yet enabled in kernel ABI, so linux-user does not either.
+ */
+ cpu_abort(cs, "Unexpected FPE exception");
default:
g_assert_not_reached();
}
diff --git a/linux-user/ppc/cpu_loop.c b/linux-user/ppc/cpu_loop.c
index 840b23736b..483e669300 100644
--- a/linux-user/ppc/cpu_loop.c
+++ b/linux-user/ppc/cpu_loop.c
@@ -162,14 +162,6 @@ void cpu_loop(CPUPPCState *env)
cpu_abort(cs, "External interrupt while in user mode. "
"Aborting\n");
break;
- case POWERPC_EXCP_ALIGN: /* Alignment exception */
- /* XXX: check this */
- info.si_signo = TARGET_SIGBUS;
- info.si_errno = 0;
- info.si_code = TARGET_BUS_ADRALN;
- info._sifields._sigfault._addr = env->nip;
- queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
- break;
case POWERPC_EXCP_PROGRAM: /* Program exception */
case POWERPC_EXCP_HV_EMU: /* HV emulation */
/* XXX: check this */
diff --git a/linux-user/riscv/cpu_loop.c b/linux-user/riscv/cpu_loop.c
index e5bb6d908a..b301dac802 100644
--- a/linux-user/riscv/cpu_loop.c
+++ b/linux-user/riscv/cpu_loop.c
@@ -87,13 +87,6 @@ void cpu_loop(CPURISCVState *env)
sigcode = TARGET_TRAP_BRKPT;
sigaddr = env->pc;
break;
- case RISCV_EXCP_INST_PAGE_FAULT:
- case RISCV_EXCP_LOAD_PAGE_FAULT:
- case RISCV_EXCP_STORE_PAGE_FAULT:
- signum = TARGET_SIGSEGV;
- sigcode = TARGET_SEGV_MAPERR;
- sigaddr = env->badaddr;
- break;
case RISCV_EXCP_SEMIHOST:
env->gpr[xA0] = do_common_semihosting(cs);
env->pc += 4;
diff --git a/linux-user/s390x/cpu_loop.c b/linux-user/s390x/cpu_loop.c
index 69b69981f6..d089c8417e 100644
--- a/linux-user/s390x/cpu_loop.c
+++ b/linux-user/s390x/cpu_loop.c
@@ -24,8 +24,6 @@
#include "cpu_loop-common.h"
#include "signal-common.h"
-/* s390x masks the fault address it reports in si_addr for SIGSEGV and SIGBUS */
-#define S390X_FAIL_ADDR_MASK -4096LL
static int get_pgm_data_si_code(int dxc_code)
{
@@ -111,12 +109,13 @@ void cpu_loop(CPUS390XState *env)
n = TARGET_ILL_ILLOPC;
goto do_signal_pc;
case PGM_PROTECTION:
+ force_sig_fault(TARGET_SIGSEGV, TARGET_SEGV_ACCERR,
+ env->__excp_addr);
+ break;
case PGM_ADDRESSING:
- sig = TARGET_SIGSEGV;
- /* XXX: check env->error_code */
- n = TARGET_SEGV_MAPERR;
- addr = env->__excp_addr & S390X_FAIL_ADDR_MASK;
- goto do_signal;
+ force_sig_fault(TARGET_SIGSEGV, TARGET_SEGV_MAPERR,
+ env->__excp_addr);
+ break;
case PGM_EXECUTE:
case PGM_SPECIFICATION:
case PGM_SPECIAL_OP:
diff --git a/linux-user/sh4/cpu_loop.c b/linux-user/sh4/cpu_loop.c
index 65b8972e3c..ac9b01840c 100644
--- a/linux-user/sh4/cpu_loop.c
+++ b/linux-user/sh4/cpu_loop.c
@@ -65,14 +65,6 @@ void cpu_loop(CPUSH4State *env)
info.si_code = TARGET_TRAP_BRKPT;
queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
break;
- case 0xa0:
- case 0xc0:
- info.si_signo = TARGET_SIGSEGV;
- info.si_errno = 0;
- info.si_code = TARGET_SEGV_MAPERR;
- info._sifields._sigfault._addr = env->tea;
- queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
- break;
case EXCP_ATOMIC:
cpu_exec_step_atomic(cs);
arch_interrupt = false;
diff --git a/linux-user/signal.c b/linux-user/signal.c
index 14d8fdfde1..81c45bfce9 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -19,6 +19,7 @@
#include "qemu/osdep.h"
#include "qemu/bitops.h"
#include "exec/gdbstub.h"
+#include "hw/core/tcg-cpu-ops.h"
#include <sys/ucontext.h>
#include <sys/resource.h>
@@ -29,6 +30,7 @@
#include "loader.h"
#include "trace.h"
#include "signal-common.h"
+#include "host-signal.h"
static struct target_sigaction sigact_table[TARGET_NSIG];
@@ -686,9 +688,38 @@ void force_sigsegv(int oldsig)
}
force_sig(TARGET_SIGSEGV);
}
-
#endif
+void cpu_loop_exit_sigsegv(CPUState *cpu, target_ulong addr,
+ MMUAccessType access_type, bool maperr, uintptr_t ra)
+{
+ const struct TCGCPUOps *tcg_ops = CPU_GET_CLASS(cpu)->tcg_ops;
+
+ if (tcg_ops->record_sigsegv) {
+ tcg_ops->record_sigsegv(cpu, addr, access_type, maperr, ra);
+ }
+
+ force_sig_fault(TARGET_SIGSEGV,
+ maperr ? TARGET_SEGV_MAPERR : TARGET_SEGV_ACCERR,
+ addr);
+ cpu->exception_index = EXCP_INTERRUPT;
+ cpu_loop_exit_restore(cpu, ra);
+}
+
+void cpu_loop_exit_sigbus(CPUState *cpu, target_ulong addr,
+ MMUAccessType access_type, uintptr_t ra)
+{
+ const struct TCGCPUOps *tcg_ops = CPU_GET_CLASS(cpu)->tcg_ops;
+
+ if (tcg_ops->record_sigbus) {
+ tcg_ops->record_sigbus(cpu, addr, access_type, ra);
+ }
+
+ force_sig_fault(TARGET_SIGBUS, TARGET_BUS_ADRALN, addr);
+ cpu->exception_index = EXCP_INTERRUPT;
+ cpu_loop_exit_restore(cpu, ra);
+}
+
/* abort execution with signal */
static void QEMU_NORETURN dump_core_and_abort(int target_sig)
{
@@ -769,41 +800,101 @@ static inline void rewind_if_in_safe_syscall(void *puc)
}
#endif
-static void host_signal_handler(int host_signum, siginfo_t *info,
- void *puc)
+static void host_signal_handler(int host_sig, siginfo_t *info, void *puc)
{
CPUArchState *env = thread_cpu->env_ptr;
CPUState *cpu = env_cpu(env);
TaskState *ts = cpu->opaque;
-
- int sig;
target_siginfo_t tinfo;
ucontext_t *uc = puc;
struct emulated_sigtable *k;
+ int guest_sig;
+ uintptr_t pc = 0;
+ bool sync_sig = false;
+
+ /*
+ * Non-spoofed SIGSEGV and SIGBUS are synchronous, and need special
+ * handling wrt signal blocking and unwinding.
+ */
+ if ((host_sig == SIGSEGV || host_sig == SIGBUS) && info->si_code > 0) {
+ MMUAccessType access_type;
+ uintptr_t host_addr;
+ abi_ptr guest_addr;
+ bool is_write;
+
+ host_addr = (uintptr_t)info->si_addr;
+
+ /*
+ * Convert forcefully to guest address space: addresses outside
+ * reserved_va are still valid to report via SEGV_MAPERR.
+ */
+ guest_addr = h2g_nocheck(host_addr);
- /* the CPU emulator uses some host signals to detect exceptions,
- we forward to it some signals */
- if ((host_signum == SIGSEGV || host_signum == SIGBUS)
- && info->si_code > 0) {
- if (cpu_signal_handler(host_signum, info, puc))
- return;
+ pc = host_signal_pc(uc);
+ is_write = host_signal_write(info, uc);
+ access_type = adjust_signal_pc(&pc, is_write);
+
+ if (host_sig == SIGSEGV) {
+ bool maperr = true;
+
+ if (info->si_code == SEGV_ACCERR && h2g_valid(host_addr)) {
+ /* If this was a write to a TB protected page, restart. */
+ if (is_write &&
+ handle_sigsegv_accerr_write(cpu, &uc->uc_sigmask,
+ pc, guest_addr)) {
+ return;
+ }
+
+ /*
+ * With reserved_va, the whole address space is PROT_NONE,
+ * which means that we may get ACCERR when we want MAPERR.
+ */
+ if (page_get_flags(guest_addr) & PAGE_VALID) {
+ maperr = false;
+ } else {
+ info->si_code = SEGV_MAPERR;
+ }
+ }
+
+ sigprocmask(SIG_SETMASK, &uc->uc_sigmask, NULL);
+ cpu_loop_exit_sigsegv(cpu, guest_addr, access_type, maperr, pc);
+ } else {
+ sigprocmask(SIG_SETMASK, &uc->uc_sigmask, NULL);
+ if (info->si_code == BUS_ADRALN) {
+ cpu_loop_exit_sigbus(cpu, guest_addr, access_type, pc);
+ }
+ }
+
+ sync_sig = true;
}
/* get target signal number */
- sig = host_to_target_signal(host_signum);
- if (sig < 1 || sig > TARGET_NSIG)
+ guest_sig = host_to_target_signal(host_sig);
+ if (guest_sig < 1 || guest_sig > TARGET_NSIG) {
return;
- trace_user_host_signal(env, host_signum, sig);
-
- rewind_if_in_safe_syscall(puc);
+ }
+ trace_user_host_signal(env, host_sig, guest_sig);
host_to_target_siginfo_noswap(&tinfo, info);
- k = &ts->sigtab[sig - 1];
+ k = &ts->sigtab[guest_sig - 1];
k->info = tinfo;
- k->pending = sig;
+ k->pending = guest_sig;
ts->signal_pending = 1;
- /* Block host signals until target signal handler entered. We
+ /*
+ * For synchronous signals, unwind the cpu state to the faulting
+ * insn and then exit back to the main loop so that the signal
+ * is delivered immediately.
+ */
+ if (sync_sig) {
+ cpu->exception_index = EXCP_INTERRUPT;
+ cpu_loop_exit_restore(cpu, pc);
+ }
+
+ rewind_if_in_safe_syscall(puc);
+
+ /*
+ * Block host signals until target signal handler entered. We
* can't block SIGSEGV or SIGBUS while we're executing guest
* code in case the guest code provokes one in the window between
* now and it getting out to the main loop. Signals will be
diff --git a/linux-user/sparc/cpu_loop.c b/linux-user/sparc/cpu_loop.c
index ad29b4eb6a..0ba65e431c 100644
--- a/linux-user/sparc/cpu_loop.c
+++ b/linux-user/sparc/cpu_loop.c
@@ -219,17 +219,6 @@ void cpu_loop (CPUSPARCState *env)
case TT_WIN_UNF: /* window underflow */
restore_window(env);
break;
- case TT_TFAULT:
- case TT_DFAULT:
- {
- info.si_signo = TARGET_SIGSEGV;
- info.si_errno = 0;
- /* XXX: check env->error_code */
- info.si_code = TARGET_SEGV_MAPERR;
- info._sifields._sigfault._addr = env->mmuregs[4];
- queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
- }
- break;
#else
case TT_SPILL: /* window overflow */
save_window(env);
@@ -237,20 +226,6 @@ void cpu_loop (CPUSPARCState *env)
case TT_FILL: /* window underflow */
restore_window(env);
break;
- case TT_TFAULT:
- case TT_DFAULT:
- {
- info.si_signo = TARGET_SIGSEGV;
- info.si_errno = 0;
- /* XXX: check env->error_code */
- info.si_code = TARGET_SEGV_MAPERR;
- if (trapnr == TT_DFAULT)
- info._sifields._sigfault._addr = env->dmmu.mmuregs[4];
- else
- info._sifields._sigfault._addr = cpu_tsptr(env)->tpc;
- queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
- }
- break;
#ifndef TARGET_ABI32
case 0x16e:
flush_windows(env);
diff --git a/linux-user/xtensa/cpu_loop.c b/linux-user/xtensa/cpu_loop.c
index 622afbcd34..a83490ab35 100644
--- a/linux-user/xtensa/cpu_loop.c
+++ b/linux-user/xtensa/cpu_loop.c
@@ -226,15 +226,6 @@ void cpu_loop(CPUXtensaState *env)
queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
break;
- case LOAD_PROHIBITED_CAUSE:
- case STORE_PROHIBITED_CAUSE:
- info.si_signo = TARGET_SIGSEGV;
- info.si_errno = 0;
- info.si_code = TARGET_SEGV_ACCERR;
- info._sifields._sigfault._addr = env->sregs[EXCVADDR];
- queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
- break;
-
default:
fprintf(stderr, "exccause = %d\n", env->sregs[EXCCAUSE]);
g_assert_not_reached();
diff --git a/meson.build b/meson.build
index 85f1e43dfe..bc918fbe58 100644
--- a/meson.build
+++ b/meson.build
@@ -55,7 +55,7 @@ have_block = have_system or have_tools
python = import('python').find_installation()
supported_oses = ['windows', 'freebsd', 'netbsd', 'openbsd', 'darwin', 'sunos', 'linux']
-supported_cpus = ['ppc', 'ppc64', 's390x', 'riscv32', 'riscv64', 'x86', 'x86_64',
+supported_cpus = ['ppc', 'ppc64', 's390x', 'riscv', 'x86', 'x86_64',
'arm', 'aarch64', 'mips', 'mips64', 'sparc', 'sparc64']
cpu = host_machine.cpu_family()
@@ -351,8 +351,6 @@ if not get_option('tcg').disabled()
tcg_arch = 'i386'
elif config_host['ARCH'] == 'ppc64'
tcg_arch = 'ppc'
- elif config_host['ARCH'] in ['riscv32', 'riscv64']
- tcg_arch = 'riscv'
endif
add_project_arguments('-iquote', meson.current_source_dir() / 'tcg' / tcg_arch,
language: ['c', 'cpp', 'objc'])
diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
index 320c56ba2c..d65e744af9 100644
--- a/migration/dirtyrate.c
+++ b/migration/dirtyrate.c
@@ -15,7 +15,9 @@
#include "qapi/error.h"
#include "cpu.h"
#include "exec/ramblock.h"
+#include "exec/ram_addr.h"
#include "qemu/rcu_queue.h"
+#include "qemu/main-loop.h"
#include "qapi/qapi-commands-migration.h"
#include "ram.h"
#include "trace.h"
@@ -23,9 +25,26 @@
#include "monitor/hmp.h"
#include "monitor/monitor.h"
#include "qapi/qmp/qdict.h"
+#include "sysemu/kvm.h"
+#include "sysemu/runstate.h"
+#include "exec/memory.h"
+
+/*
+ * total_dirty_pages is procted by BQL and is used
+ * to stat dirty pages during the period of two
+ * memory_global_dirty_log_sync
+ */
+uint64_t total_dirty_pages;
+
+typedef struct DirtyPageRecord {
+ uint64_t start_pages;
+ uint64_t end_pages;
+} DirtyPageRecord;
static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED;
static struct DirtyRateStat DirtyStat;
+static DirtyRateMeasureMode dirtyrate_mode =
+ DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
static int64_t set_sample_page_period(int64_t msec, int64_t initial_time)
{
@@ -70,51 +89,94 @@ static int dirtyrate_set_state(int *state, int old_state, int new_state)
static struct DirtyRateInfo *query_dirty_rate_info(void)
{
+ int i;
int64_t dirty_rate = DirtyStat.dirty_rate;
struct DirtyRateInfo *info = g_malloc0(sizeof(DirtyRateInfo));
-
- if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURED) {
- info->has_dirty_rate = true;
- info->dirty_rate = dirty_rate;
- }
+ DirtyRateVcpuList *head = NULL, **tail = &head;
info->status = CalculatingState;
info->start_time = DirtyStat.start_time;
info->calc_time = DirtyStat.calc_time;
info->sample_pages = DirtyStat.sample_pages;
+ info->mode = dirtyrate_mode;
+
+ if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURED) {
+ info->has_dirty_rate = true;
+ info->dirty_rate = dirty_rate;
+
+ if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
+ /*
+ * set sample_pages with 0 to indicate page sampling
+ * isn't enabled
+ **/
+ info->sample_pages = 0;
+ info->has_vcpu_dirty_rate = true;
+ for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) {
+ DirtyRateVcpu *rate = g_malloc0(sizeof(DirtyRateVcpu));
+ rate->id = DirtyStat.dirty_ring.rates[i].id;
+ rate->dirty_rate = DirtyStat.dirty_ring.rates[i].dirty_rate;
+ QAPI_LIST_APPEND(tail, rate);
+ }
+ info->vcpu_dirty_rate = head;
+ }
+
+ if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) {
+ info->sample_pages = 0;
+ }
+ }
trace_query_dirty_rate_info(DirtyRateStatus_str(CalculatingState));
return info;
}
-static void init_dirtyrate_stat(int64_t start_time, int64_t calc_time,
- uint64_t sample_pages)
+static void init_dirtyrate_stat(int64_t start_time,
+ struct DirtyRateConfig config)
{
- DirtyStat.total_dirty_samples = 0;
- DirtyStat.total_sample_count = 0;
- DirtyStat.total_block_mem_MB = 0;
DirtyStat.dirty_rate = -1;
DirtyStat.start_time = start_time;
- DirtyStat.calc_time = calc_time;
- DirtyStat.sample_pages = sample_pages;
+ DirtyStat.calc_time = config.sample_period_seconds;
+ DirtyStat.sample_pages = config.sample_pages_per_gigabytes;
+
+ switch (config.mode) {
+ case DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING:
+ DirtyStat.page_sampling.total_dirty_samples = 0;
+ DirtyStat.page_sampling.total_sample_count = 0;
+ DirtyStat.page_sampling.total_block_mem_MB = 0;
+ break;
+ case DIRTY_RATE_MEASURE_MODE_DIRTY_RING:
+ DirtyStat.dirty_ring.nvcpu = -1;
+ DirtyStat.dirty_ring.rates = NULL;
+ break;
+ default:
+ break;
+ }
+}
+
+static void cleanup_dirtyrate_stat(struct DirtyRateConfig config)
+{
+ /* last calc-dirty-rate qmp use dirty ring mode */
+ if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
+ free(DirtyStat.dirty_ring.rates);
+ DirtyStat.dirty_ring.rates = NULL;
+ }
}
static void update_dirtyrate_stat(struct RamblockDirtyInfo *info)
{
- DirtyStat.total_dirty_samples += info->sample_dirty_count;
- DirtyStat.total_sample_count += info->sample_pages_count;
+ DirtyStat.page_sampling.total_dirty_samples += info->sample_dirty_count;
+ DirtyStat.page_sampling.total_sample_count += info->sample_pages_count;
/* size of total pages in MB */
- DirtyStat.total_block_mem_MB += (info->ramblock_pages *
- TARGET_PAGE_SIZE) >> 20;
+ DirtyStat.page_sampling.total_block_mem_MB += (info->ramblock_pages *
+ TARGET_PAGE_SIZE) >> 20;
}
static void update_dirtyrate(uint64_t msec)
{
uint64_t dirtyrate;
- uint64_t total_dirty_samples = DirtyStat.total_dirty_samples;
- uint64_t total_sample_count = DirtyStat.total_sample_count;
- uint64_t total_block_mem_MB = DirtyStat.total_block_mem_MB;
+ uint64_t total_dirty_samples = DirtyStat.page_sampling.total_dirty_samples;
+ uint64_t total_sample_count = DirtyStat.page_sampling.total_sample_count;
+ uint64_t total_block_mem_MB = DirtyStat.page_sampling.total_block_mem_MB;
dirtyrate = total_dirty_samples * total_block_mem_MB *
1000 / (total_sample_count * msec);
@@ -327,21 +389,183 @@ static bool compare_page_hash_info(struct RamblockDirtyInfo *info,
update_dirtyrate_stat(block_dinfo);
}
- if (DirtyStat.total_sample_count == 0) {
+ if (DirtyStat.page_sampling.total_sample_count == 0) {
return false;
}
return true;
}
-static void calculate_dirtyrate(struct DirtyRateConfig config)
+static inline void record_dirtypages(DirtyPageRecord *dirty_pages,
+ CPUState *cpu, bool start)
+{
+ if (start) {
+ dirty_pages[cpu->cpu_index].start_pages = cpu->dirty_pages;
+ } else {
+ dirty_pages[cpu->cpu_index].end_pages = cpu->dirty_pages;
+ }
+}
+
+static void dirtyrate_global_dirty_log_start(void)
+{
+ qemu_mutex_lock_iothread();
+ memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE);
+ qemu_mutex_unlock_iothread();
+}
+
+static void dirtyrate_global_dirty_log_stop(void)
+{
+ qemu_mutex_lock_iothread();
+ memory_global_dirty_log_sync();
+ memory_global_dirty_log_stop(GLOBAL_DIRTY_DIRTY_RATE);
+ qemu_mutex_unlock_iothread();
+}
+
+static int64_t do_calculate_dirtyrate_vcpu(DirtyPageRecord dirty_pages)
+{
+ uint64_t memory_size_MB;
+ int64_t time_s;
+ uint64_t increased_dirty_pages =
+ dirty_pages.end_pages - dirty_pages.start_pages;
+
+ memory_size_MB = (increased_dirty_pages * TARGET_PAGE_SIZE) >> 20;
+ time_s = DirtyStat.calc_time;
+
+ return memory_size_MB / time_s;
+}
+
+static inline void record_dirtypages_bitmap(DirtyPageRecord *dirty_pages,
+ bool start)
+{
+ if (start) {
+ dirty_pages->start_pages = total_dirty_pages;
+ } else {
+ dirty_pages->end_pages = total_dirty_pages;
+ }
+}
+
+static void do_calculate_dirtyrate_bitmap(DirtyPageRecord dirty_pages)
+{
+ DirtyStat.dirty_rate = do_calculate_dirtyrate_vcpu(dirty_pages);
+}
+
+static inline void dirtyrate_manual_reset_protect(void)
+{
+ RAMBlock *block = NULL;
+
+ WITH_RCU_READ_LOCK_GUARD() {
+ RAMBLOCK_FOREACH_MIGRATABLE(block) {
+ memory_region_clear_dirty_bitmap(block->mr, 0,
+ block->used_length);
+ }
+ }
+}
+
+static void calculate_dirtyrate_dirty_bitmap(struct DirtyRateConfig config)
+{
+ int64_t msec = 0;
+ int64_t start_time;
+ DirtyPageRecord dirty_pages;
+
+ qemu_mutex_lock_iothread();
+ memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE);
+
+ /*
+ * 1'round of log sync may return all 1 bits with
+ * KVM_DIRTY_LOG_INITIALLY_SET enable
+ * skip it unconditionally and start dirty tracking
+ * from 2'round of log sync
+ */
+ memory_global_dirty_log_sync();
+
+ /*
+ * reset page protect manually and unconditionally.
+ * this make sure kvm dirty log be cleared if
+ * KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE cap is enabled.
+ */
+ dirtyrate_manual_reset_protect();
+ qemu_mutex_unlock_iothread();
+
+ record_dirtypages_bitmap(&dirty_pages, true);
+
+ start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+ DirtyStat.start_time = start_time / 1000;
+
+ msec = config.sample_period_seconds * 1000;
+ msec = set_sample_page_period(msec, start_time);
+ DirtyStat.calc_time = msec / 1000;
+
+ /*
+ * dirtyrate_global_dirty_log_stop do two things.
+ * 1. fetch dirty bitmap from kvm
+ * 2. stop dirty tracking
+ */
+ dirtyrate_global_dirty_log_stop();
+
+ record_dirtypages_bitmap(&dirty_pages, false);
+
+ do_calculate_dirtyrate_bitmap(dirty_pages);
+}
+
+static void calculate_dirtyrate_dirty_ring(struct DirtyRateConfig config)
+{
+ CPUState *cpu;
+ int64_t msec = 0;
+ int64_t start_time;
+ uint64_t dirtyrate = 0;
+ uint64_t dirtyrate_sum = 0;
+ DirtyPageRecord *dirty_pages;
+ int nvcpu = 0;
+ int i = 0;
+
+ CPU_FOREACH(cpu) {
+ nvcpu++;
+ }
+
+ dirty_pages = malloc(sizeof(*dirty_pages) * nvcpu);
+
+ DirtyStat.dirty_ring.nvcpu = nvcpu;
+ DirtyStat.dirty_ring.rates = malloc(sizeof(DirtyRateVcpu) * nvcpu);
+
+ dirtyrate_global_dirty_log_start();
+
+ CPU_FOREACH(cpu) {
+ record_dirtypages(dirty_pages, cpu, true);
+ }
+
+ start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+ DirtyStat.start_time = start_time / 1000;
+
+ msec = config.sample_period_seconds * 1000;
+ msec = set_sample_page_period(msec, start_time);
+ DirtyStat.calc_time = msec / 1000;
+
+ dirtyrate_global_dirty_log_stop();
+
+ CPU_FOREACH(cpu) {
+ record_dirtypages(dirty_pages, cpu, false);
+ }
+
+ for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) {
+ dirtyrate = do_calculate_dirtyrate_vcpu(dirty_pages[i]);
+ trace_dirtyrate_do_calculate_vcpu(i, dirtyrate);
+
+ DirtyStat.dirty_ring.rates[i].id = i;
+ DirtyStat.dirty_ring.rates[i].dirty_rate = dirtyrate;
+ dirtyrate_sum += dirtyrate;
+ }
+
+ DirtyStat.dirty_rate = dirtyrate_sum;
+ free(dirty_pages);
+}
+
+static void calculate_dirtyrate_sample_vm(struct DirtyRateConfig config)
{
struct RamblockDirtyInfo *block_dinfo = NULL;
int block_count = 0;
int64_t msec = 0;
int64_t initial_time;
- rcu_register_thread();
rcu_read_lock();
initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
if (!record_ramblock_hash_info(&block_dinfo, config, &block_count)) {
@@ -364,16 +588,26 @@ static void calculate_dirtyrate(struct DirtyRateConfig config)
out:
rcu_read_unlock();
free_ramblock_dirty_info(block_dinfo, block_count);
- rcu_unregister_thread();
+}
+
+static void calculate_dirtyrate(struct DirtyRateConfig config)
+{
+ if (config.mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) {
+ calculate_dirtyrate_dirty_bitmap(config);
+ } else if (config.mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
+ calculate_dirtyrate_dirty_ring(config);
+ } else {
+ calculate_dirtyrate_sample_vm(config);
+ }
+
+ trace_dirtyrate_calculate(DirtyStat.dirty_rate);
}
void *get_dirtyrate_thread(void *arg)
{
struct DirtyRateConfig config = *(struct DirtyRateConfig *)arg;
int ret;
- int64_t start_time;
- int64_t calc_time;
- uint64_t sample_pages;
+ rcu_register_thread();
ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_UNSTARTED,
DIRTY_RATE_STATUS_MEASURING);
@@ -382,11 +616,6 @@ void *get_dirtyrate_thread(void *arg)
return NULL;
}
- start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000;
- calc_time = config.sample_period_seconds;
- sample_pages = config.sample_pages_per_gigabytes;
- init_dirtyrate_stat(start_time, calc_time, sample_pages);
-
calculate_dirtyrate(config);
ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_MEASURING,
@@ -394,15 +623,22 @@ void *get_dirtyrate_thread(void *arg)
if (ret == -1) {
error_report("change dirtyrate state failed.");
}
+
+ rcu_unregister_thread();
return NULL;
}
-void qmp_calc_dirty_rate(int64_t calc_time, bool has_sample_pages,
- int64_t sample_pages, Error **errp)
+void qmp_calc_dirty_rate(int64_t calc_time,
+ bool has_sample_pages,
+ int64_t sample_pages,
+ bool has_mode,
+ DirtyRateMeasureMode mode,
+ Error **errp)
{
static struct DirtyRateConfig config;
QemuThread thread;
int ret;
+ int64_t start_time;
/*
* If the dirty rate is already being measured, don't attempt to start.
@@ -419,6 +655,15 @@ void qmp_calc_dirty_rate(int64_t calc_time, bool has_sample_pages,
return;
}
+ if (!has_mode) {
+ mode = DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
+ }
+
+ if (has_sample_pages && mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
+ error_setg(errp, "either sample-pages or dirty-ring can be specified.");
+ return;
+ }
+
if (has_sample_pages) {
if (!is_sample_pages_valid(sample_pages)) {
error_setg(errp, "sample-pages is out of range[%d, %d].",
@@ -431,6 +676,19 @@ void qmp_calc_dirty_rate(int64_t calc_time, bool has_sample_pages,
}
/*
+ * dirty ring mode only works when kvm dirty ring is enabled.
+ * on the contrary, dirty bitmap mode is not.
+ */
+ if (((mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) &&
+ !kvm_dirty_ring_enabled()) ||
+ ((mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) &&
+ kvm_dirty_ring_enabled())) {
+ error_setg(errp, "mode %s is not enabled, use other method instead.",
+ DirtyRateMeasureMode_str(mode));
+ return;
+ }
+
+ /*
* Init calculation state as unstarted.
*/
ret = dirtyrate_set_state(&CalculatingState, CalculatingState,
@@ -442,6 +700,19 @@ void qmp_calc_dirty_rate(int64_t calc_time, bool has_sample_pages,
config.sample_period_seconds = calc_time;
config.sample_pages_per_gigabytes = sample_pages;
+ config.mode = mode;
+
+ cleanup_dirtyrate_stat(config);
+
+ /*
+ * update dirty rate mode so that we can figure out what mode has
+ * been used in last calculation
+ **/
+ dirtyrate_mode = mode;
+
+ start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000;
+ init_dirtyrate_stat(start_time, config);
+
qemu_thread_create(&thread, "get_dirtyrate", get_dirtyrate_thread,
(void *)&config, QEMU_THREAD_DETACHED);
}
@@ -463,12 +734,24 @@ void hmp_info_dirty_rate(Monitor *mon, const QDict *qdict)
info->sample_pages);
monitor_printf(mon, "Period: %"PRIi64" (sec)\n",
info->calc_time);
+ monitor_printf(mon, "Mode: %s\n",
+ DirtyRateMeasureMode_str(info->mode));
monitor_printf(mon, "Dirty rate: ");
if (info->has_dirty_rate) {
monitor_printf(mon, "%"PRIi64" (MB/s)\n", info->dirty_rate);
+ if (info->has_vcpu_dirty_rate) {
+ DirtyRateVcpuList *rate, *head = info->vcpu_dirty_rate;
+ for (rate = head; rate != NULL; rate = rate->next) {
+ monitor_printf(mon, "vcpu[%"PRIi64"], Dirty rate: %"PRIi64
+ " (MB/s)\n", rate->value->id,
+ rate->value->dirty_rate);
+ }
+ }
} else {
monitor_printf(mon, "(not ready)\n");
}
+
+ qapi_free_DirtyRateVcpuList(info->vcpu_dirty_rate);
g_free(info);
}
@@ -477,6 +760,9 @@ void hmp_calc_dirty_rate(Monitor *mon, const QDict *qdict)
int64_t sec = qdict_get_try_int(qdict, "second", 0);
int64_t sample_pages = qdict_get_try_int(qdict, "sample_pages_per_GB", -1);
bool has_sample_pages = (sample_pages != -1);
+ bool dirty_ring = qdict_get_try_bool(qdict, "dirty_ring", false);
+ bool dirty_bitmap = qdict_get_try_bool(qdict, "dirty_bitmap", false);
+ DirtyRateMeasureMode mode = DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
Error *err = NULL;
if (!sec) {
@@ -484,7 +770,20 @@ void hmp_calc_dirty_rate(Monitor *mon, const QDict *qdict)
return;
}
- qmp_calc_dirty_rate(sec, has_sample_pages, sample_pages, &err);
+ if (dirty_ring && dirty_bitmap) {
+ monitor_printf(mon, "Either dirty ring or dirty bitmap "
+ "can be specified!\n");
+ return;
+ }
+
+ if (dirty_bitmap) {
+ mode = DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP;
+ } else if (dirty_ring) {
+ mode = DIRTY_RATE_MEASURE_MODE_DIRTY_RING;
+ }
+
+ qmp_calc_dirty_rate(sec, has_sample_pages, sample_pages, true,
+ mode, &err);
if (err) {
hmp_handle_error(mon, err);
return;
diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h
index e1fd29089e..69d4c5b865 100644
--- a/migration/dirtyrate.h
+++ b/migration/dirtyrate.h
@@ -43,6 +43,7 @@
struct DirtyRateConfig {
uint64_t sample_pages_per_gigabytes; /* sample pages per GB */
int64_t sample_period_seconds; /* time duration between two sampling */
+ DirtyRateMeasureMode mode; /* mode of dirtyrate measurement */
};
/*
@@ -58,17 +59,29 @@ struct RamblockDirtyInfo {
uint32_t *hash_result; /* array of hash result for sampled pages */
};
+typedef struct SampleVMStat {
+ uint64_t total_dirty_samples; /* total dirty sampled page */
+ uint64_t total_sample_count; /* total sampled pages */
+ uint64_t total_block_mem_MB; /* size of total sampled pages in MB */
+} SampleVMStat;
+
+typedef struct VcpuStat {
+ int nvcpu; /* number of vcpu */
+ DirtyRateVcpu *rates; /* array of dirty rate for each vcpu */
+} VcpuStat;
+
/*
* Store calculation statistics for each measure.
*/
struct DirtyRateStat {
- uint64_t total_dirty_samples; /* total dirty sampled page */
- uint64_t total_sample_count; /* total sampled pages */
- uint64_t total_block_mem_MB; /* size of total sampled pages in MB */
int64_t dirty_rate; /* dirty rate in MB/s */
int64_t start_time; /* calculation start time in units of second */
int64_t calc_time; /* time duration of two sampling in units of second */
uint64_t sample_pages; /* sample pages per GB */
+ union {
+ SampleVMStat page_sampling;
+ VcpuStat dirty_ring;
+ };
};
void *get_dirtyrate_thread(void *arg);
diff --git a/migration/migration.c b/migration/migration.c
index 9172686b89..53b9a8af96 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -391,7 +391,7 @@ int migrate_send_rp_message_req_pages(MigrationIncomingState *mis,
int migrate_send_rp_req_pages(MigrationIncomingState *mis,
RAMBlock *rb, ram_addr_t start, uint64_t haddr)
{
- void *aligned = (void *)(uintptr_t)(haddr & (-qemu_ram_pagesize(rb)));
+ void *aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, qemu_ram_pagesize(rb));
bool received = false;
WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) {
@@ -2049,6 +2049,20 @@ void migrate_init(MigrationState *s)
s->threshold_size = 0;
}
+int migrate_add_blocker_internal(Error *reason, Error **errp)
+{
+ /* Snapshots are similar to migrations, so check RUN_STATE_SAVE_VM too. */
+ if (runstate_check(RUN_STATE_SAVE_VM) || !migration_is_idle()) {
+ error_propagate_prepend(errp, error_copy(reason),
+ "disallowing migration blocker "
+ "(migration/snapshot in progress) for: ");
+ return -EBUSY;
+ }
+
+ migration_blockers = g_slist_prepend(migration_blockers, reason);
+ return 0;
+}
+
int migrate_add_blocker(Error *reason, Error **errp)
{
if (only_migratable) {
@@ -2058,15 +2072,7 @@ int migrate_add_blocker(Error *reason, Error **errp)
return -EACCES;
}
- if (migration_is_idle()) {
- migration_blockers = g_slist_prepend(migration_blockers, reason);
- return 0;
- }
-
- error_propagate_prepend(errp, error_copy(reason),
- "disallowing migration blocker "
- "(migration in progress) for: ");
- return -EBUSY;
+ return migrate_add_blocker_internal(reason, errp);
}
void migrate_del_blocker(Error *reason)
@@ -2631,8 +2637,8 @@ static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
* Since we currently insist on matching page sizes, just sanity check
* we're being asked for whole host pages.
*/
- if (start & (our_host_ps - 1) ||
- (len & (our_host_ps - 1))) {
+ if (!QEMU_IS_ALIGNED(start, our_host_ps) ||
+ !QEMU_IS_ALIGNED(len, our_host_ps)) {
error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT
" len: %zd", __func__, start, len);
mark_source_rp_bad(ms);
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 2e9697bdd2..e721f69d0f 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -402,7 +402,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
strerror(errno));
goto out;
}
- g_assert(((size_t)testarea & (pagesize - 1)) == 0);
+ g_assert(QEMU_PTR_IS_ALIGNED(testarea, pagesize));
reg_struct.range.start = (uintptr_t)testarea;
reg_struct.range.len = pagesize;
@@ -660,7 +660,7 @@ int postcopy_wake_shared(struct PostCopyFD *pcfd,
struct uffdio_range range;
int ret;
trace_postcopy_wake_shared(client_addr, qemu_ram_get_idstr(rb));
- range.start = client_addr & ~(pagesize - 1);
+ range.start = ROUND_DOWN(client_addr, pagesize);
range.len = pagesize;
ret = ioctl(pcfd->fd, UFFDIO_WAKE, &range);
if (ret) {
@@ -671,6 +671,29 @@ int postcopy_wake_shared(struct PostCopyFD *pcfd,
return ret;
}
+static int postcopy_request_page(MigrationIncomingState *mis, RAMBlock *rb,
+ ram_addr_t start, uint64_t haddr)
+{
+ void *aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, qemu_ram_pagesize(rb));
+
+ /*
+ * Discarded pages (via RamDiscardManager) are never migrated. On unlikely
+ * access, place a zeropage, which will also set the relevant bits in the
+ * recv_bitmap accordingly, so we won't try placing a zeropage twice.
+ *
+ * Checking a single bit is sufficient to handle pagesize > TPS as either
+ * all relevant bits are set or not.
+ */
+ assert(QEMU_IS_ALIGNED(start, qemu_ram_pagesize(rb)));
+ if (ramblock_page_is_discarded(rb, start)) {
+ bool received = ramblock_recv_bitmap_test_byte_offset(rb, start);
+
+ return received ? 0 : postcopy_place_page_zero(mis, aligned, rb);
+ }
+
+ return migrate_send_rp_req_pages(mis, rb, start, haddr);
+}
+
/*
* Callback from shared fault handlers to ask for a page,
* the page must be specified by a RAMBlock and an offset in that rb
@@ -679,8 +702,7 @@ int postcopy_wake_shared(struct PostCopyFD *pcfd,
int postcopy_request_shared_page(struct PostCopyFD *pcfd, RAMBlock *rb,
uint64_t client_addr, uint64_t rb_offset)
{
- size_t pagesize = qemu_ram_pagesize(rb);
- uint64_t aligned_rbo = rb_offset & ~(pagesize - 1);
+ uint64_t aligned_rbo = ROUND_DOWN(rb_offset, qemu_ram_pagesize(rb));
MigrationIncomingState *mis = migration_incoming_get_current();
trace_postcopy_request_shared_page(pcfd->idstr, qemu_ram_get_idstr(rb),
@@ -690,7 +712,7 @@ int postcopy_request_shared_page(struct PostCopyFD *pcfd, RAMBlock *rb,
qemu_ram_get_idstr(rb), rb_offset);
return postcopy_wake_shared(pcfd, client_addr, rb);
}
- migrate_send_rp_req_pages(mis, rb, aligned_rbo, client_addr);
+ postcopy_request_page(mis, rb, aligned_rbo, client_addr);
return 0;
}
@@ -970,7 +992,7 @@ static void *postcopy_ram_fault_thread(void *opaque)
break;
}
- rb_offset &= ~(qemu_ram_pagesize(rb) - 1);
+ rb_offset = ROUND_DOWN(rb_offset, qemu_ram_pagesize(rb));
trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
qemu_ram_get_idstr(rb),
rb_offset,
@@ -984,8 +1006,8 @@ retry:
* Send the request to the source - we want to request one
* of our host page sizes (which is >= TPS)
*/
- ret = migrate_send_rp_req_pages(mis, rb, rb_offset,
- msg.arg.pagefault.address);
+ ret = postcopy_request_page(mis, rb, rb_offset,
+ msg.arg.pagefault.address);
if (ret) {
/* May be network failure, try to wait for recovery */
if (ret == -EIO && postcopy_pause_fault_thread(mis)) {
@@ -993,7 +1015,7 @@ retry:
goto retry;
} else {
/* This is a unavoidable fault */
- error_report("%s: migrate_send_rp_req_pages() get %d",
+ error_report("%s: postcopy_request_page() get %d",
__func__, ret);
break;
}
diff --git a/migration/ram.c b/migration/ram.c
index bb908822d5..680a5158aa 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -811,7 +811,7 @@ static void migration_clear_memory_region_dirty_bitmap(RAMBlock *rb,
assert(shift >= 6);
size = 1ULL << (TARGET_PAGE_BITS + shift);
- start = (((ram_addr_t)page) << TARGET_PAGE_BITS) & (-size);
+ start = QEMU_ALIGN_DOWN((ram_addr_t)page << TARGET_PAGE_BITS, size);
trace_migration_bitmap_clear_dirty(rb->idstr, start, size, page);
memory_region_clear_dirty_bitmap(rb->mr, start, size);
}
@@ -858,6 +858,81 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs,
return ret;
}
+static void dirty_bitmap_clear_section(MemoryRegionSection *section,
+ void *opaque)
+{
+ const hwaddr offset = section->offset_within_region;
+ const hwaddr size = int128_get64(section->size);
+ const unsigned long start = offset >> TARGET_PAGE_BITS;
+ const unsigned long npages = size >> TARGET_PAGE_BITS;
+ RAMBlock *rb = section->mr->ram_block;
+ uint64_t *cleared_bits = opaque;
+
+ /*
+ * We don't grab ram_state->bitmap_mutex because we expect to run
+ * only when starting migration or during postcopy recovery where
+ * we don't have concurrent access.
+ */
+ if (!migration_in_postcopy() && !migrate_background_snapshot()) {
+ migration_clear_memory_region_dirty_bitmap_range(rb, start, npages);
+ }
+ *cleared_bits += bitmap_count_one_with_offset(rb->bmap, start, npages);
+ bitmap_clear(rb->bmap, start, npages);
+}
+
+/*
+ * Exclude all dirty pages from migration that fall into a discarded range as
+ * managed by a RamDiscardManager responsible for the mapped memory region of
+ * the RAMBlock. Clear the corresponding bits in the dirty bitmaps.
+ *
+ * Discarded pages ("logically unplugged") have undefined content and must
+ * not get migrated, because even reading these pages for migration might
+ * result in undesired behavior.
+ *
+ * Returns the number of cleared bits in the RAMBlock dirty bitmap.
+ *
+ * Note: The result is only stable while migrating (precopy/postcopy).
+ */
+static uint64_t ramblock_dirty_bitmap_clear_discarded_pages(RAMBlock *rb)
+{
+ uint64_t cleared_bits = 0;
+
+ if (rb->mr && rb->bmap && memory_region_has_ram_discard_manager(rb->mr)) {
+ RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr);
+ MemoryRegionSection section = {
+ .mr = rb->mr,
+ .offset_within_region = 0,
+ .size = int128_make64(qemu_ram_get_used_length(rb)),
+ };
+
+ ram_discard_manager_replay_discarded(rdm, &section,
+ dirty_bitmap_clear_section,
+ &cleared_bits);
+ }
+ return cleared_bits;
+}
+
+/*
+ * Check if a host-page aligned page falls into a discarded range as managed by
+ * a RamDiscardManager responsible for the mapped memory region of the RAMBlock.
+ *
+ * Note: The result is only stable while migrating (precopy/postcopy).
+ */
+bool ramblock_page_is_discarded(RAMBlock *rb, ram_addr_t start)
+{
+ if (rb->mr && memory_region_has_ram_discard_manager(rb->mr)) {
+ RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr);
+ MemoryRegionSection section = {
+ .mr = rb->mr,
+ .offset_within_region = start,
+ .size = int128_make64(qemu_ram_pagesize(rb)),
+ };
+
+ return !ram_discard_manager_is_populated(rdm, &section);
+ }
+ return false;
+}
+
/* Called with RCU critical section */
static void ramblock_sync_dirty_bitmap(RAMState *rs, RAMBlock *rb)
{
@@ -1564,25 +1639,68 @@ out:
return ret;
}
+static inline void populate_read_range(RAMBlock *block, ram_addr_t offset,
+ ram_addr_t size)
+{
+ /*
+ * We read one byte of each page; this will preallocate page tables if
+ * required and populate the shared zeropage on MAP_PRIVATE anonymous memory
+ * where no page was populated yet. This might require adaption when
+ * supporting other mappings, like shmem.
+ */
+ for (; offset < size; offset += block->page_size) {
+ char tmp = *((char *)block->host + offset);
+
+ /* Don't optimize the read out */
+ asm volatile("" : "+r" (tmp));
+ }
+}
+
+static inline int populate_read_section(MemoryRegionSection *section,
+ void *opaque)
+{
+ const hwaddr size = int128_get64(section->size);
+ hwaddr offset = section->offset_within_region;
+ RAMBlock *block = section->mr->ram_block;
+
+ populate_read_range(block, offset, size);
+ return 0;
+}
+
/*
- * ram_block_populate_pages: populate memory in the RAM block by reading
- * an integer from the beginning of each page.
+ * ram_block_populate_read: preallocate page tables and populate pages in the
+ * RAM block by reading a byte of each page.
*
* Since it's solely used for userfault_fd WP feature, here we just
* hardcode page size to qemu_real_host_page_size.
*
* @block: RAM block to populate
*/
-static void ram_block_populate_pages(RAMBlock *block)
+static void ram_block_populate_read(RAMBlock *rb)
{
- char *ptr = (char *) block->host;
-
- for (ram_addr_t offset = 0; offset < block->used_length;
- offset += qemu_real_host_page_size) {
- char tmp = *(ptr + offset);
-
- /* Don't optimize the read out */
- asm volatile("" : "+r" (tmp));
+ /*
+ * Skip populating all pages that fall into a discarded range as managed by
+ * a RamDiscardManager responsible for the mapped memory region of the
+ * RAMBlock. Such discarded ("logically unplugged") parts of a RAMBlock
+ * must not get populated automatically. We don't have to track
+ * modifications via userfaultfd WP reliably, because these pages will
+ * not be part of the migration stream either way -- see
+ * ramblock_dirty_bitmap_exclude_discarded_pages().
+ *
+ * Note: The result is only stable while migrating (precopy/postcopy).
+ */
+ if (rb->mr && memory_region_has_ram_discard_manager(rb->mr)) {
+ RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr);
+ MemoryRegionSection section = {
+ .mr = rb->mr,
+ .offset_within_region = 0,
+ .size = rb->mr->size,
+ };
+
+ ram_discard_manager_replay_populated(rdm, &section,
+ populate_read_section, NULL);
+ } else {
+ populate_read_range(rb, 0, rb->used_length);
}
}
@@ -1609,7 +1727,7 @@ void ram_write_tracking_prepare(void)
* UFFDIO_WRITEPROTECT_MODE_WP mode setting would silently skip
* pages with pte_none() entries in page table.
*/
- ram_block_populate_pages(block);
+ ram_block_populate_read(block);
}
}
@@ -2216,7 +2334,14 @@ static void ram_save_cleanup(void *opaque)
/* caller have hold iothread lock or is in a bh, so there is
* no writing race against the migration bitmap
*/
- memory_global_dirty_log_stop();
+ if (global_dirty_tracking & GLOBAL_DIRTY_MIGRATION) {
+ /*
+ * do not stop dirty log without starting it, since
+ * memory_global_dirty_log_stop will assert that
+ * memory_global_dirty_log_start/stop used in pairs
+ */
+ memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION);
+ }
}
RAMBLOCK_FOREACH_NOT_IGNORED(block) {
@@ -2668,6 +2793,19 @@ static void ram_list_init_bitmaps(void)
}
}
+static void migration_bitmap_clear_discarded_pages(RAMState *rs)
+{
+ unsigned long pages;
+ RAMBlock *rb;
+
+ RCU_READ_LOCK_GUARD();
+
+ RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
+ pages = ramblock_dirty_bitmap_clear_discarded_pages(rb);
+ rs->migration_dirty_pages -= pages;
+ }
+}
+
static void ram_init_bitmaps(RAMState *rs)
{
/* For memory_global_dirty_log_start below. */
@@ -2678,12 +2816,18 @@ static void ram_init_bitmaps(RAMState *rs)
ram_list_init_bitmaps();
/* We don't use dirty log with background snapshots */
if (!migrate_background_snapshot()) {
- memory_global_dirty_log_start();
+ memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
migration_bitmap_sync_precopy(rs);
}
}
qemu_mutex_unlock_ramlist();
qemu_mutex_unlock_iothread();
+
+ /*
+ * After an eventual first bitmap sync, fixup the initial bitmap
+ * containing all 1s to exclude any discarded pages from migration.
+ */
+ migration_bitmap_clear_discarded_pages(rs);
}
static int ram_init_all(RAMState **rsp)
@@ -3434,7 +3578,7 @@ void colo_incoming_start_dirty_log(void)
/* Discard this dirty bitmap record */
bitmap_zero(block->bmap, block->max_length >> TARGET_PAGE_BITS);
}
- memory_global_dirty_log_start();
+ memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
}
ram_state->migration_dirty_pages = 0;
qemu_mutex_unlock_ramlist();
@@ -3446,7 +3590,7 @@ void colo_release_ram_cache(void)
{
RAMBlock *block;
- memory_global_dirty_log_stop();
+ memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION);
RAMBLOCK_FOREACH_NOT_IGNORED(block) {
g_free(block->bmap);
block->bmap = NULL;
@@ -4112,6 +4256,10 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
*/
bitmap_complement(block->bmap, block->bmap, nbits);
+ /* Clear dirty bits of discarded ranges that we don't want to migrate. */
+ ramblock_dirty_bitmap_clear_discarded_pages(block);
+
+ /* We'll recalculate migration_dirty_pages in ram_state_resume_prepare(). */
trace_ram_dirty_bitmap_reload_complete(block->idstr);
/*
diff --git a/migration/ram.h b/migration/ram.h
index 4833e9fd5b..dda1988f3d 100644
--- a/migration/ram.h
+++ b/migration/ram.h
@@ -72,6 +72,7 @@ void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr, size_t nr);
int64_t ramblock_recv_bitmap_send(QEMUFile *file,
const char *block_name);
int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *rb);
+bool ramblock_page_is_discarded(RAMBlock *rb, ram_addr_t start);
/* ram cache */
int colo_init_ram_cache(void);
diff --git a/migration/rdma.c b/migration/rdma.c
index 2a3c7889b9..f5d3bbe7e9 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -358,9 +358,11 @@ typedef struct RDMAContext {
struct ibv_context *verbs;
struct rdma_event_channel *channel;
struct ibv_qp *qp; /* queue pair */
- struct ibv_comp_channel *comp_channel; /* completion channel */
+ struct ibv_comp_channel *recv_comp_channel; /* recv completion channel */
+ struct ibv_comp_channel *send_comp_channel; /* send completion channel */
struct ibv_pd *pd; /* protection domain */
- struct ibv_cq *cq; /* completion queue */
+ struct ibv_cq *recv_cq; /* recvieve completion queue */
+ struct ibv_cq *send_cq; /* send completion queue */
/*
* If a previous write failed (perhaps because of a failed
@@ -1059,21 +1061,34 @@ static int qemu_rdma_alloc_pd_cq(RDMAContext *rdma)
return -1;
}
- /* create completion channel */
- rdma->comp_channel = ibv_create_comp_channel(rdma->verbs);
- if (!rdma->comp_channel) {
- error_report("failed to allocate completion channel");
+ /* create receive completion channel */
+ rdma->recv_comp_channel = ibv_create_comp_channel(rdma->verbs);
+ if (!rdma->recv_comp_channel) {
+ error_report("failed to allocate receive completion channel");
goto err_alloc_pd_cq;
}
/*
- * Completion queue can be filled by both read and write work requests,
- * so must reflect the sum of both possible queue sizes.
+ * Completion queue can be filled by read work requests.
*/
- rdma->cq = ibv_create_cq(rdma->verbs, (RDMA_SIGNALED_SEND_MAX * 3),
- NULL, rdma->comp_channel, 0);
- if (!rdma->cq) {
- error_report("failed to allocate completion queue");
+ rdma->recv_cq = ibv_create_cq(rdma->verbs, (RDMA_SIGNALED_SEND_MAX * 3),
+ NULL, rdma->recv_comp_channel, 0);
+ if (!rdma->recv_cq) {
+ error_report("failed to allocate receive completion queue");
+ goto err_alloc_pd_cq;
+ }
+
+ /* create send completion channel */
+ rdma->send_comp_channel = ibv_create_comp_channel(rdma->verbs);
+ if (!rdma->send_comp_channel) {
+ error_report("failed to allocate send completion channel");
+ goto err_alloc_pd_cq;
+ }
+
+ rdma->send_cq = ibv_create_cq(rdma->verbs, (RDMA_SIGNALED_SEND_MAX * 3),
+ NULL, rdma->send_comp_channel, 0);
+ if (!rdma->send_cq) {
+ error_report("failed to allocate send completion queue");
goto err_alloc_pd_cq;
}
@@ -1083,11 +1098,19 @@ err_alloc_pd_cq:
if (rdma->pd) {
ibv_dealloc_pd(rdma->pd);
}
- if (rdma->comp_channel) {
- ibv_destroy_comp_channel(rdma->comp_channel);
+ if (rdma->recv_comp_channel) {
+ ibv_destroy_comp_channel(rdma->recv_comp_channel);
+ }
+ if (rdma->send_comp_channel) {
+ ibv_destroy_comp_channel(rdma->send_comp_channel);
+ }
+ if (rdma->recv_cq) {
+ ibv_destroy_cq(rdma->recv_cq);
+ rdma->recv_cq = NULL;
}
rdma->pd = NULL;
- rdma->comp_channel = NULL;
+ rdma->recv_comp_channel = NULL;
+ rdma->send_comp_channel = NULL;
return -1;
}
@@ -1104,8 +1127,8 @@ static int qemu_rdma_alloc_qp(RDMAContext *rdma)
attr.cap.max_recv_wr = 3;
attr.cap.max_send_sge = 1;
attr.cap.max_recv_sge = 1;
- attr.send_cq = rdma->cq;
- attr.recv_cq = rdma->cq;
+ attr.send_cq = rdma->send_cq;
+ attr.recv_cq = rdma->recv_cq;
attr.qp_type = IBV_QPT_RC;
ret = rdma_create_qp(rdma->cm_id, rdma->pd, &attr);
@@ -1496,14 +1519,14 @@ static void qemu_rdma_signal_unregister(RDMAContext *rdma, uint64_t index,
* (of any kind) has completed.
* Return the work request ID that completed.
*/
-static uint64_t qemu_rdma_poll(RDMAContext *rdma, uint64_t *wr_id_out,
- uint32_t *byte_len)
+static uint64_t qemu_rdma_poll(RDMAContext *rdma, struct ibv_cq *cq,
+ uint64_t *wr_id_out, uint32_t *byte_len)
{
int ret;
struct ibv_wc wc;
uint64_t wr_id;
- ret = ibv_poll_cq(rdma->cq, 1, &wc);
+ ret = ibv_poll_cq(cq, 1, &wc);
if (!ret) {
*wr_id_out = RDMA_WRID_NONE;
@@ -1575,7 +1598,8 @@ static uint64_t qemu_rdma_poll(RDMAContext *rdma, uint64_t *wr_id_out,
/* Wait for activity on the completion channel.
* Returns 0 on success, none-0 on error.
*/
-static int qemu_rdma_wait_comp_channel(RDMAContext *rdma)
+static int qemu_rdma_wait_comp_channel(RDMAContext *rdma,
+ struct ibv_comp_channel *comp_channel)
{
struct rdma_cm_event *cm_event;
int ret = -1;
@@ -1586,7 +1610,7 @@ static int qemu_rdma_wait_comp_channel(RDMAContext *rdma)
*/
if (rdma->migration_started_on_destination &&
migration_incoming_get_current()->state == MIGRATION_STATUS_ACTIVE) {
- yield_until_fd_readable(rdma->comp_channel->fd);
+ yield_until_fd_readable(comp_channel->fd);
} else {
/* This is the source side, we're in a separate thread
* or destination prior to migration_fd_process_incoming()
@@ -1597,7 +1621,7 @@ static int qemu_rdma_wait_comp_channel(RDMAContext *rdma)
*/
while (!rdma->error_state && !rdma->received_error) {
GPollFD pfds[2];
- pfds[0].fd = rdma->comp_channel->fd;
+ pfds[0].fd = comp_channel->fd;
pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
pfds[0].revents = 0;
@@ -1655,6 +1679,17 @@ static int qemu_rdma_wait_comp_channel(RDMAContext *rdma)
return rdma->error_state;
}
+static struct ibv_comp_channel *to_channel(RDMAContext *rdma, int wrid)
+{
+ return wrid < RDMA_WRID_RECV_CONTROL ? rdma->send_comp_channel :
+ rdma->recv_comp_channel;
+}
+
+static struct ibv_cq *to_cq(RDMAContext *rdma, int wrid)
+{
+ return wrid < RDMA_WRID_RECV_CONTROL ? rdma->send_cq : rdma->recv_cq;
+}
+
/*
* Block until the next work request has completed.
*
@@ -1675,13 +1710,15 @@ static int qemu_rdma_block_for_wrid(RDMAContext *rdma, int wrid_requested,
struct ibv_cq *cq;
void *cq_ctx;
uint64_t wr_id = RDMA_WRID_NONE, wr_id_in;
+ struct ibv_comp_channel *ch = to_channel(rdma, wrid_requested);
+ struct ibv_cq *poll_cq = to_cq(rdma, wrid_requested);
- if (ibv_req_notify_cq(rdma->cq, 0)) {
+ if (ibv_req_notify_cq(poll_cq, 0)) {
return -1;
}
/* poll cq first */
while (wr_id != wrid_requested) {
- ret = qemu_rdma_poll(rdma, &wr_id_in, byte_len);
+ ret = qemu_rdma_poll(rdma, poll_cq, &wr_id_in, byte_len);
if (ret < 0) {
return ret;
}
@@ -1702,12 +1739,12 @@ static int qemu_rdma_block_for_wrid(RDMAContext *rdma, int wrid_requested,
}
while (1) {
- ret = qemu_rdma_wait_comp_channel(rdma);
+ ret = qemu_rdma_wait_comp_channel(rdma, ch);
if (ret) {
goto err_block_for_wrid;
}
- ret = ibv_get_cq_event(rdma->comp_channel, &cq, &cq_ctx);
+ ret = ibv_get_cq_event(ch, &cq, &cq_ctx);
if (ret) {
perror("ibv_get_cq_event");
goto err_block_for_wrid;
@@ -1721,7 +1758,7 @@ static int qemu_rdma_block_for_wrid(RDMAContext *rdma, int wrid_requested,
}
while (wr_id != wrid_requested) {
- ret = qemu_rdma_poll(rdma, &wr_id_in, byte_len);
+ ret = qemu_rdma_poll(rdma, poll_cq, &wr_id_in, byte_len);
if (ret < 0) {
goto err_block_for_wrid;
}
@@ -2437,13 +2474,21 @@ static void qemu_rdma_cleanup(RDMAContext *rdma)
rdma_destroy_qp(rdma->cm_id);
rdma->qp = NULL;
}
- if (rdma->cq) {
- ibv_destroy_cq(rdma->cq);
- rdma->cq = NULL;
+ if (rdma->recv_cq) {
+ ibv_destroy_cq(rdma->recv_cq);
+ rdma->recv_cq = NULL;
+ }
+ if (rdma->send_cq) {
+ ibv_destroy_cq(rdma->send_cq);
+ rdma->send_cq = NULL;
+ }
+ if (rdma->recv_comp_channel) {
+ ibv_destroy_comp_channel(rdma->recv_comp_channel);
+ rdma->recv_comp_channel = NULL;
}
- if (rdma->comp_channel) {
- ibv_destroy_comp_channel(rdma->comp_channel);
- rdma->comp_channel = NULL;
+ if (rdma->send_comp_channel) {
+ ibv_destroy_comp_channel(rdma->send_comp_channel);
+ rdma->send_comp_channel = NULL;
}
if (rdma->pd) {
ibv_dealloc_pd(rdma->pd);
@@ -3115,10 +3160,14 @@ static void qio_channel_rdma_set_aio_fd_handler(QIOChannel *ioc,
{
QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
if (io_read) {
- aio_set_fd_handler(ctx, rioc->rdmain->comp_channel->fd,
+ aio_set_fd_handler(ctx, rioc->rdmain->recv_comp_channel->fd,
+ false, io_read, io_write, NULL, opaque);
+ aio_set_fd_handler(ctx, rioc->rdmain->send_comp_channel->fd,
false, io_read, io_write, NULL, opaque);
} else {
- aio_set_fd_handler(ctx, rioc->rdmaout->comp_channel->fd,
+ aio_set_fd_handler(ctx, rioc->rdmaout->recv_comp_channel->fd,
+ false, io_read, io_write, NULL, opaque);
+ aio_set_fd_handler(ctx, rioc->rdmaout->send_comp_channel->fd,
false, io_read, io_write, NULL, opaque);
}
}
@@ -3332,7 +3381,22 @@ static size_t qemu_rdma_save_page(QEMUFile *f, void *opaque,
*/
while (1) {
uint64_t wr_id, wr_id_in;
- int ret = qemu_rdma_poll(rdma, &wr_id_in, NULL);
+ int ret = qemu_rdma_poll(rdma, rdma->recv_cq, &wr_id_in, NULL);
+ if (ret < 0) {
+ error_report("rdma migration: polling error! %d", ret);
+ goto err;
+ }
+
+ wr_id = wr_id_in & RDMA_WRID_TYPE_MASK;
+
+ if (wr_id == RDMA_WRID_NONE) {
+ break;
+ }
+ }
+
+ while (1) {
+ uint64_t wr_id, wr_id_in;
+ int ret = qemu_rdma_poll(rdma, rdma->send_cq, &wr_id_in, NULL);
if (ret < 0) {
error_report("rdma migration: polling error! %d", ret);
goto err;
diff --git a/migration/trace-events b/migration/trace-events
index a8ae163707..b48d873b8a 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -333,6 +333,8 @@ get_ramblock_vfn_hash(const char *idstr, uint64_t vfn, uint32_t crc) "ramblock n
calc_page_dirty_rate(const char *idstr, uint32_t new_crc, uint32_t old_crc) "ramblock name: %s, new crc: %" PRIu32 ", old crc: %" PRIu32
skip_sample_ramblock(const char *idstr, uint64_t ramblock_size) "ramblock name: %s, ramblock size: %" PRIu64
find_page_matched(const char *idstr) "ramblock %s addr or size changed"
+dirtyrate_calculate(int64_t dirtyrate) "dirty rate: %" PRIi64 " MB/s"
+dirtyrate_do_calculate_vcpu(int idx, uint64_t rate) "vcpu[%d]: %"PRIu64 " MB/s"
# block.c
migration_block_init_shared(const char *blk_device_name) "Start migration for %s with shared base image"
diff --git a/pc-bios/openbios-ppc b/pc-bios/openbios-ppc
index 91a73db9a3..67f32a8602 100644
--- a/pc-bios/openbios-ppc
+++ b/pc-bios/openbios-ppc
Binary files differ
diff --git a/pc-bios/openbios-sparc32 b/pc-bios/openbios-sparc32
index a5b7389191..376b01c10b 100644
--- a/pc-bios/openbios-sparc32
+++ b/pc-bios/openbios-sparc32
Binary files differ
diff --git a/pc-bios/openbios-sparc64 b/pc-bios/openbios-sparc64
index f7a501efc6..bbd746fde9 100644
--- a/pc-bios/openbios-sparc64
+++ b/pc-bios/openbios-sparc64
Binary files differ
diff --git a/qapi/migration.json b/qapi/migration.json
index 9aa8bc5759..87146ceea2 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -1732,6 +1732,21 @@
'data': { 'device-id': 'str' } }
##
+# @DirtyRateVcpu:
+#
+# Dirty rate of vcpu.
+#
+# @id: vcpu index.
+#
+# @dirty-rate: dirty rate.
+#
+# Since: 6.1
+#
+##
+{ 'struct': 'DirtyRateVcpu',
+ 'data': { 'id': 'int', 'dirty-rate': 'int64' } }
+
+##
# @DirtyRateStatus:
#
# An enumeration of dirtyrate status.
@@ -1749,6 +1764,23 @@
'data': [ 'unstarted', 'measuring', 'measured'] }
##
+# @DirtyRateMeasureMode:
+#
+# An enumeration of mode of measuring dirtyrate.
+#
+# @page-sampling: calculate dirtyrate by sampling pages.
+#
+# @dirty-ring: calculate dirtyrate by dirty ring.
+#
+# @dirty-bitmap: calculate dirtyrate by dirty bitmap.
+#
+# Since: 6.1
+#
+##
+{ 'enum': 'DirtyRateMeasureMode',
+ 'data': ['page-sampling', 'dirty-ring', 'dirty-bitmap'] }
+
+##
# @DirtyRateInfo:
#
# Information about current dirty page rate of vm.
@@ -1766,6 +1798,12 @@
# @sample-pages: page count per GB for sample dirty pages
# the default value is 512 (since 6.1)
#
+# @mode: mode containing method of calculate dirtyrate includes
+# 'page-sampling' and 'dirty-ring' (Since 6.1)
+#
+# @vcpu-dirty-rate: dirtyrate for each vcpu if dirty-ring
+# mode specified (Since 6.1)
+#
# Since: 5.2
#
##
@@ -1774,7 +1812,9 @@
'status': 'DirtyRateStatus',
'start-time': 'int64',
'calc-time': 'int64',
- 'sample-pages': 'uint64'} }
+ 'sample-pages': 'uint64',
+ 'mode': 'DirtyRateMeasureMode',
+ '*vcpu-dirty-rate': [ 'DirtyRateVcpu' ] } }
##
# @calc-dirty-rate:
@@ -1786,6 +1826,9 @@
# @sample-pages: page count per GB for sample dirty pages
# the default value is 512 (since 6.1)
#
+# @mode: mechanism of calculating dirtyrate includes
+# 'page-sampling' and 'dirty-ring' (Since 6.1)
+#
# Since: 5.2
#
# Example:
@@ -1794,7 +1837,8 @@
#
##
{ 'command': 'calc-dirty-rate', 'data': {'calc-time': 'int64',
- '*sample-pages': 'int'} }
+ '*sample-pages': 'int',
+ '*mode': 'DirtyRateMeasureMode'} }
##
# @query-dirty-rate:
diff --git a/roms/openbios b/roms/openbios
-Subproject d657b653186c0fd6e062cab133497415c2a5a5b
+Subproject b9062deaaea7269369eaa46260d75edcaf276af
diff --git a/softmmu/memory.c b/softmmu/memory.c
index e5826faa0c..7340e19ff5 100644
--- a/softmmu/memory.c
+++ b/softmmu/memory.c
@@ -39,7 +39,7 @@
static unsigned memory_region_transaction_depth;
static bool memory_region_update_pending;
static bool ioeventfd_update_pending;
-bool global_dirty_log;
+unsigned int global_dirty_tracking;
static QTAILQ_HEAD(, MemoryListener) memory_listeners
= QTAILQ_HEAD_INITIALIZER(memory_listeners);
@@ -1821,7 +1821,7 @@ uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr)
uint8_t mask = mr->dirty_log_mask;
RAMBlock *rb = mr->ram_block;
- if (global_dirty_log && ((rb && qemu_ram_is_migratable(rb)) ||
+ if (global_dirty_tracking && ((rb && qemu_ram_is_migratable(rb)) ||
memory_region_is_iommu(mr))) {
mask |= (1 << DIRTY_MEMORY_MIGRATION);
}
@@ -2081,6 +2081,17 @@ int ram_discard_manager_replay_populated(const RamDiscardManager *rdm,
return rdmc->replay_populated(rdm, section, replay_fn, opaque);
}
+void ram_discard_manager_replay_discarded(const RamDiscardManager *rdm,
+ MemoryRegionSection *section,
+ ReplayRamDiscard replay_fn,
+ void *opaque)
+{
+ RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
+
+ g_assert(rdmc->replay_discarded);
+ rdmc->replay_discarded(rdm, section, replay_fn, opaque);
+}
+
void ram_discard_manager_register_listener(RamDiscardManager *rdm,
RamDiscardListener *rdl,
MemoryRegionSection *section)
@@ -2760,14 +2771,18 @@ void memory_global_after_dirty_log_sync(void)
static VMChangeStateEntry *vmstate_change;
-void memory_global_dirty_log_start(void)
+void memory_global_dirty_log_start(unsigned int flags)
{
if (vmstate_change) {
qemu_del_vm_change_state_handler(vmstate_change);
vmstate_change = NULL;
}
- global_dirty_log = true;
+ assert(flags && !(flags & (~GLOBAL_DIRTY_MASK)));
+ assert(!(global_dirty_tracking & flags));
+ global_dirty_tracking |= flags;
+
+ trace_global_dirty_changed(global_dirty_tracking);
MEMORY_LISTENER_CALL_GLOBAL(log_global_start, Forward);
@@ -2777,9 +2792,13 @@ void memory_global_dirty_log_start(void)
memory_region_transaction_commit();
}
-static void memory_global_dirty_log_do_stop(void)
+static void memory_global_dirty_log_do_stop(unsigned int flags)
{
- global_dirty_log = false;
+ assert(flags && !(flags & (~GLOBAL_DIRTY_MASK)));
+ assert((global_dirty_tracking & flags) == flags);
+ global_dirty_tracking &= ~flags;
+
+ trace_global_dirty_changed(global_dirty_tracking);
/* Refresh DIRTY_MEMORY_MIGRATION bit. */
memory_region_transaction_begin();
@@ -2792,8 +2811,9 @@ static void memory_global_dirty_log_do_stop(void)
static void memory_vm_change_state_handler(void *opaque, bool running,
RunState state)
{
+ unsigned int flags = (unsigned int)(uintptr_t)opaque;
if (running) {
- memory_global_dirty_log_do_stop();
+ memory_global_dirty_log_do_stop(flags);
if (vmstate_change) {
qemu_del_vm_change_state_handler(vmstate_change);
@@ -2802,18 +2822,19 @@ static void memory_vm_change_state_handler(void *opaque, bool running,
}
}
-void memory_global_dirty_log_stop(void)
+void memory_global_dirty_log_stop(unsigned int flags)
{
if (!runstate_is_running()) {
if (vmstate_change) {
return;
}
vmstate_change = qemu_add_vm_change_state_handler(
- memory_vm_change_state_handler, NULL);
+ memory_vm_change_state_handler,
+ (void *)(uintptr_t)flags);
return;
}
- memory_global_dirty_log_do_stop();
+ memory_global_dirty_log_do_stop(flags);
}
static void listener_add_address_space(MemoryListener *listener,
@@ -2825,7 +2846,7 @@ static void listener_add_address_space(MemoryListener *listener,
if (listener->begin) {
listener->begin(listener);
}
- if (global_dirty_log) {
+ if (global_dirty_tracking) {
if (listener->log_global_start) {
listener->log_global_start(listener);
}
diff --git a/softmmu/trace-events b/softmmu/trace-events
index bf1469990e..9c88887b3c 100644
--- a/softmmu/trace-events
+++ b/softmmu/trace-events
@@ -19,6 +19,7 @@ memory_region_sync_dirty(const char *mr, const char *listener, int global) "mr '
flatview_new(void *view, void *root) "%p (root %p)"
flatview_destroy(void *view, void *root) "%p (root %p)"
flatview_destroy_rcu(void *view, void *root) "%p (root %p)"
+global_dirty_changed(unsigned int bitmask) "bitmask 0x%"PRIx32
# softmmu.c
vm_stop_flush_all(int ret) "ret %d"
diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c
index 93e16a2ffb..a8990d401b 100644
--- a/target/alpha/cpu.c
+++ b/target/alpha/cpu.c
@@ -218,9 +218,12 @@ static const struct SysemuCPUOps alpha_sysemu_ops = {
static const struct TCGCPUOps alpha_tcg_ops = {
.initialize = alpha_translate_init,
- .tlb_fill = alpha_cpu_tlb_fill,
-#ifndef CONFIG_USER_ONLY
+#ifdef CONFIG_USER_ONLY
+ .record_sigsegv = alpha_cpu_record_sigsegv,
+ .record_sigbus = alpha_cpu_record_sigbus,
+#else
+ .tlb_fill = alpha_cpu_tlb_fill,
.cpu_exec_interrupt = alpha_cpu_exec_interrupt,
.do_interrupt = alpha_cpu_do_interrupt,
.do_transaction_failed = alpha_cpu_do_transaction_failed,
diff --git a/target/alpha/cpu.h b/target/alpha/cpu.h
index 772828cc26..afd975c878 100644
--- a/target/alpha/cpu.h
+++ b/target/alpha/cpu.h
@@ -282,9 +282,6 @@ void alpha_cpu_dump_state(CPUState *cs, FILE *f, int flags);
hwaddr alpha_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
int alpha_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
int alpha_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
-void alpha_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
- MMUAccessType access_type, int mmu_idx,
- uintptr_t retaddr) QEMU_NORETURN;
#define cpu_list alpha_cpu_list
@@ -439,9 +436,6 @@ void alpha_translate_init(void);
#define CPU_RESOLVING_TYPE TYPE_ALPHA_CPU
void alpha_cpu_list(void);
-bool alpha_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
- MMUAccessType access_type, int mmu_idx,
- bool probe, uintptr_t retaddr);
void QEMU_NORETURN dynamic_excp(CPUAlphaState *, uintptr_t, int, int);
void QEMU_NORETURN arith_excp(CPUAlphaState *, uintptr_t, int, uint64_t);
@@ -449,7 +443,20 @@ uint64_t cpu_alpha_load_fpcr (CPUAlphaState *env);
void cpu_alpha_store_fpcr (CPUAlphaState *env, uint64_t val);
uint64_t cpu_alpha_load_gr(CPUAlphaState *env, unsigned reg);
void cpu_alpha_store_gr(CPUAlphaState *env, unsigned reg, uint64_t val);
-#ifndef CONFIG_USER_ONLY
+
+#ifdef CONFIG_USER_ONLY
+void alpha_cpu_record_sigsegv(CPUState *cs, vaddr address,
+ MMUAccessType access_type,
+ bool maperr, uintptr_t retaddr);
+void alpha_cpu_record_sigbus(CPUState *cs, vaddr address,
+ MMUAccessType access_type, uintptr_t retaddr);
+#else
+bool alpha_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
+ MMUAccessType access_type, int mmu_idx,
+ bool probe, uintptr_t retaddr);
+void alpha_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
+ MMUAccessType access_type, int mmu_idx,
+ uintptr_t retaddr) QEMU_NORETURN;
void alpha_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr,
vaddr addr, unsigned size,
MMUAccessType access_type,
diff --git a/target/alpha/helper.c b/target/alpha/helper.c
index 81550d9e2f..b7e7f73b15 100644
--- a/target/alpha/helper.c
+++ b/target/alpha/helper.c
@@ -120,15 +120,44 @@ void cpu_alpha_store_gr(CPUAlphaState *env, unsigned reg, uint64_t val)
}
#if defined(CONFIG_USER_ONLY)
-bool alpha_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
- MMUAccessType access_type, int mmu_idx,
- bool probe, uintptr_t retaddr)
+void alpha_cpu_record_sigsegv(CPUState *cs, vaddr address,
+ MMUAccessType access_type,
+ bool maperr, uintptr_t retaddr)
{
AlphaCPU *cpu = ALPHA_CPU(cs);
+ target_ulong mmcsr, cause;
+
+ /* Assuming !maperr, infer the missing protection. */
+ switch (access_type) {
+ case MMU_DATA_LOAD:
+ mmcsr = MM_K_FOR;
+ cause = 0;
+ break;
+ case MMU_DATA_STORE:
+ mmcsr = MM_K_FOW;
+ cause = 1;
+ break;
+ case MMU_INST_FETCH:
+ mmcsr = MM_K_FOE;
+ cause = -1;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ if (maperr) {
+ if (address < BIT_ULL(TARGET_VIRT_ADDR_SPACE_BITS - 1)) {
+ /* Userspace address, therefore page not mapped. */
+ mmcsr = MM_K_TNV;
+ } else {
+ /* Kernel or invalid address. */
+ mmcsr = MM_K_ACV;
+ }
+ }
- cs->exception_index = EXCP_MMFAULT;
+ /* Record the arguments that PALcode would give to the kernel. */
cpu->env.trap_arg0 = address;
- cpu_loop_exit_restore(cs, retaddr);
+ cpu->env.trap_arg1 = mmcsr;
+ cpu->env.trap_arg2 = cause;
}
#else
/* Returns the OSF/1 entMM failure indication, or -1 on success. */
diff --git a/target/alpha/mem_helper.c b/target/alpha/mem_helper.c
index 75e72bc337..47283a0612 100644
--- a/target/alpha/mem_helper.c
+++ b/target/alpha/mem_helper.c
@@ -23,18 +23,12 @@
#include "exec/exec-all.h"
#include "exec/cpu_ldst.h"
-/* Softmmu support */
-#ifndef CONFIG_USER_ONLY
-void alpha_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
- MMUAccessType access_type,
- int mmu_idx, uintptr_t retaddr)
+static void do_unaligned_access(CPUAlphaState *env, vaddr addr, uintptr_t retaddr)
{
- AlphaCPU *cpu = ALPHA_CPU(cs);
- CPUAlphaState *env = &cpu->env;
uint64_t pc;
uint32_t insn;
- cpu_restore_state(cs, retaddr, true);
+ cpu_restore_state(env_cpu(env), retaddr, true);
pc = env->pc;
insn = cpu_ldl_code(env, pc);
@@ -42,6 +36,26 @@ void alpha_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
env->trap_arg0 = addr;
env->trap_arg1 = insn >> 26; /* opcode */
env->trap_arg2 = (insn >> 21) & 31; /* dest regno */
+}
+
+#ifdef CONFIG_USER_ONLY
+void alpha_cpu_record_sigbus(CPUState *cs, vaddr addr,
+ MMUAccessType access_type, uintptr_t retaddr)
+{
+ AlphaCPU *cpu = ALPHA_CPU(cs);
+ CPUAlphaState *env = &cpu->env;
+
+ do_unaligned_access(env, addr, retaddr);
+}
+#else
+void alpha_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
+ MMUAccessType access_type,
+ int mmu_idx, uintptr_t retaddr)
+{
+ AlphaCPU *cpu = ALPHA_CPU(cs);
+ CPUAlphaState *env = &cpu->env;
+
+ do_unaligned_access(env, addr, retaddr);
cs->exception_index = EXCP_UNALIGN;
env->error_code = 0;
cpu_loop_exit(cs);
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 641a8c2d3d..a211804fd3 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -2031,10 +2031,13 @@ static const struct SysemuCPUOps arm_sysemu_ops = {
static const struct TCGCPUOps arm_tcg_ops = {
.initialize = arm_translate_init,
.synchronize_from_tb = arm_cpu_synchronize_from_tb,
- .tlb_fill = arm_cpu_tlb_fill,
.debug_excp_handler = arm_debug_excp_handler,
-#if !defined(CONFIG_USER_ONLY)
+#ifdef CONFIG_USER_ONLY
+ .record_sigsegv = arm_cpu_record_sigsegv,
+ .record_sigbus = arm_cpu_record_sigbus,
+#else
+ .tlb_fill = arm_cpu_tlb_fill,
.cpu_exec_interrupt = arm_cpu_exec_interrupt,
.do_interrupt = arm_cpu_do_interrupt,
.do_transaction_failed = arm_cpu_do_transaction_failed,
diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c
index 0d5adccf1a..13d0e9b195 100644
--- a/target/arm/cpu_tcg.c
+++ b/target/arm/cpu_tcg.c
@@ -898,10 +898,13 @@ static void pxa270c5_initfn(Object *obj)
static const struct TCGCPUOps arm_v7m_tcg_ops = {
.initialize = arm_translate_init,
.synchronize_from_tb = arm_cpu_synchronize_from_tb,
- .tlb_fill = arm_cpu_tlb_fill,
.debug_excp_handler = arm_debug_excp_handler,
-#if !defined(CONFIG_USER_ONLY)
+#ifdef CONFIG_USER_ONLY
+ .record_sigsegv = arm_cpu_record_sigsegv,
+ .record_sigbus = arm_cpu_record_sigbus,
+#else
+ .tlb_fill = arm_cpu_tlb_fill,
.cpu_exec_interrupt = arm_v7m_cpu_exec_interrupt,
.do_interrupt = arm_v7m_cpu_do_interrupt,
.do_transaction_failed = arm_cpu_do_transaction_failed,
diff --git a/target/arm/internals.h b/target/arm/internals.h
index 3612107ab2..89f7610ebc 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -544,9 +544,17 @@ static inline bool arm_extabort_type(MemTxResult result)
return result != MEMTX_DECODE_ERROR;
}
+#ifdef CONFIG_USER_ONLY
+void arm_cpu_record_sigsegv(CPUState *cpu, vaddr addr,
+ MMUAccessType access_type,
+ bool maperr, uintptr_t ra);
+void arm_cpu_record_sigbus(CPUState *cpu, vaddr addr,
+ MMUAccessType access_type, uintptr_t ra);
+#else
bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
MMUAccessType access_type, int mmu_idx,
bool probe, uintptr_t retaddr);
+#endif
static inline int arm_to_core_mmu_idx(ARMMMUIdx mmu_idx)
{
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
index 724175210b..e09b7e46a2 100644
--- a/target/arm/mte_helper.c
+++ b/target/arm/mte_helper.c
@@ -84,10 +84,8 @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
uintptr_t index;
if (!(flags & (ptr_access == MMU_DATA_STORE ? PAGE_WRITE_ORG : PAGE_READ))) {
- /* SIGSEGV */
- arm_cpu_tlb_fill(env_cpu(env), ptr, ptr_size, ptr_access,
- ptr_mmu_idx, false, ra);
- g_assert_not_reached();
+ cpu_loop_exit_sigsegv(env_cpu(env), ptr, ptr_access,
+ !(flags & PAGE_VALID), ra);
}
/* Require both MAP_ANON and PROT_MTE for the page. */
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index dab5f1d1cd..07be55b7e1 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -6118,7 +6118,7 @@ DO_LDN_2(4, dd, MO_64)
* linux-user/ in its get_user/put_user macros.
*
* TODO: Construct some helpers, written in assembly, that interact with
- * handle_cpu_signal to produce memory ops which can properly report errors
+ * host_signal_handler to produce memory ops which can properly report errors
* without racing.
*/
diff --git a/target/arm/tlb_helper.c b/target/arm/tlb_helper.c
index 3107f9823e..12a934e924 100644
--- a/target/arm/tlb_helper.c
+++ b/target/arm/tlb_helper.c
@@ -147,28 +147,12 @@ void arm_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr,
arm_deliver_fault(cpu, addr, access_type, mmu_idx, &fi);
}
-#endif /* !defined(CONFIG_USER_ONLY) */
-
bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
MMUAccessType access_type, int mmu_idx,
bool probe, uintptr_t retaddr)
{
ARMCPU *cpu = ARM_CPU(cs);
ARMMMUFaultInfo fi = {};
-
-#ifdef CONFIG_USER_ONLY
- int flags = page_get_flags(useronly_clean_ptr(address));
- if (flags & PAGE_VALID) {
- fi.type = ARMFault_Permission;
- } else {
- fi.type = ARMFault_Translation;
- }
- fi.level = 3;
-
- /* now we have a real cpu fault */
- cpu_restore_state(cs, retaddr, true);
- arm_deliver_fault(cpu, address, access_type, mmu_idx, &fi);
-#else
hwaddr phys_addr;
target_ulong page_size;
int prot, ret;
@@ -210,5 +194,29 @@ bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
cpu_restore_state(cs, retaddr, true);
arm_deliver_fault(cpu, address, access_type, mmu_idx, &fi);
}
-#endif
}
+#else
+void arm_cpu_record_sigsegv(CPUState *cs, vaddr addr,
+ MMUAccessType access_type,
+ bool maperr, uintptr_t ra)
+{
+ ARMMMUFaultInfo fi = {
+ .type = maperr ? ARMFault_Translation : ARMFault_Permission,
+ .level = 3,
+ };
+ ARMCPU *cpu = ARM_CPU(cs);
+
+ /*
+ * We report both ESR and FAR to signal handlers.
+ * For now, it's easiest to deliver the fault normally.
+ */
+ cpu_restore_state(cs, ra, true);
+ arm_deliver_fault(cpu, addr, access_type, MMU_USER_IDX, &fi);
+}
+
+void arm_cpu_record_sigbus(CPUState *cs, vaddr addr,
+ MMUAccessType access_type, uintptr_t ra)
+{
+ arm_cpu_do_unaligned_access(cs, addr, access_type, MMU_USER_IDX, ra);
+}
+#endif /* !defined(CONFIG_USER_ONLY) */
diff --git a/target/cris/cpu.c b/target/cris/cpu.c
index c2e7483f5b..ed6c781342 100644
--- a/target/cris/cpu.c
+++ b/target/cris/cpu.c
@@ -205,9 +205,9 @@ static const struct SysemuCPUOps cris_sysemu_ops = {
static const struct TCGCPUOps crisv10_tcg_ops = {
.initialize = cris_initialize_crisv10_tcg,
- .tlb_fill = cris_cpu_tlb_fill,
#ifndef CONFIG_USER_ONLY
+ .tlb_fill = cris_cpu_tlb_fill,
.cpu_exec_interrupt = cris_cpu_exec_interrupt,
.do_interrupt = crisv10_cpu_do_interrupt,
#endif /* !CONFIG_USER_ONLY */
@@ -215,9 +215,9 @@ static const struct TCGCPUOps crisv10_tcg_ops = {
static const struct TCGCPUOps crisv32_tcg_ops = {
.initialize = cris_initialize_tcg,
- .tlb_fill = cris_cpu_tlb_fill,
#ifndef CONFIG_USER_ONLY
+ .tlb_fill = cris_cpu_tlb_fill,
.cpu_exec_interrupt = cris_cpu_exec_interrupt,
.do_interrupt = cris_cpu_do_interrupt,
#endif /* !CONFIG_USER_ONLY */
diff --git a/target/cris/cpu.h b/target/cris/cpu.h
index 6603565f83..b445b194ea 100644
--- a/target/cris/cpu.h
+++ b/target/cris/cpu.h
@@ -189,6 +189,10 @@ extern const VMStateDescription vmstate_cris_cpu;
void cris_cpu_do_interrupt(CPUState *cpu);
void crisv10_cpu_do_interrupt(CPUState *cpu);
bool cris_cpu_exec_interrupt(CPUState *cpu, int int_req);
+
+bool cris_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
+ MMUAccessType access_type, int mmu_idx,
+ bool probe, uintptr_t retaddr);
#endif
void cris_cpu_dump_state(CPUState *cs, FILE *f, int flags);
@@ -251,10 +255,6 @@ static inline int cpu_mmu_index (CPUCRISState *env, bool ifetch)
return !!(env->pregs[PR_CCS] & U_FLAG);
}
-bool cris_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
- MMUAccessType access_type, int mmu_idx,
- bool probe, uintptr_t retaddr);
-
/* Support function regs. */
#define SFR_RW_GC_CFG 0][0
#define SFR_RW_MM_CFG env->pregs[PR_SRS]][0
diff --git a/target/cris/helper.c b/target/cris/helper.c
index 36926faf32..a0d6ecdcd3 100644
--- a/target/cris/helper.c
+++ b/target/cris/helper.c
@@ -39,22 +39,6 @@
#define D_LOG(...) do { } while (0)
#endif
-#if defined(CONFIG_USER_ONLY)
-
-bool cris_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
- MMUAccessType access_type, int mmu_idx,
- bool probe, uintptr_t retaddr)
-{
- CRISCPU *cpu = CRIS_CPU(cs);
-
- cs->exception_index = 0xaa;
- cpu->env.pregs[PR_EDA] = address;
- cpu_loop_exit_restore(cs, retaddr);
-}
-
-#else /* !CONFIG_USER_ONLY */
-
-
static void cris_shift_ccs(CPUCRISState *env)
{
uint32_t ccs;
@@ -304,5 +288,3 @@ bool cris_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
return ret;
}
-
-#endif /* !CONFIG_USER_ONLY */
diff --git a/target/cris/meson.build b/target/cris/meson.build
index 67c3793c85..c1e326d950 100644
--- a/target/cris/meson.build
+++ b/target/cris/meson.build
@@ -2,13 +2,16 @@ cris_ss = ss.source_set()
cris_ss.add(files(
'cpu.c',
'gdbstub.c',
- 'helper.c',
'op_helper.c',
'translate.c',
))
cris_softmmu_ss = ss.source_set()
-cris_softmmu_ss.add(files('mmu.c', 'machine.c'))
+cris_softmmu_ss.add(files(
+ 'helper.c',
+ 'machine.c',
+ 'mmu.c',
+))
target_arch += {'cris': cris_ss}
target_softmmu_arch += {'cris': cris_softmmu_ss}
diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c
index 3338365c16..160a46a3d5 100644
--- a/target/hexagon/cpu.c
+++ b/target/hexagon/cpu.c
@@ -245,34 +245,11 @@ static void hexagon_cpu_init(Object *obj)
qdev_property_add_static(DEVICE(obj), &hexagon_lldb_stack_adjust_property);
}
-static bool hexagon_tlb_fill(CPUState *cs, vaddr address, int size,
- MMUAccessType access_type, int mmu_idx,
- bool probe, uintptr_t retaddr)
-{
-#ifdef CONFIG_USER_ONLY
- switch (access_type) {
- case MMU_INST_FETCH:
- cs->exception_index = HEX_EXCP_FETCH_NO_UPAGE;
- break;
- case MMU_DATA_LOAD:
- cs->exception_index = HEX_EXCP_PRIV_NO_UREAD;
- break;
- case MMU_DATA_STORE:
- cs->exception_index = HEX_EXCP_PRIV_NO_UWRITE;
- break;
- }
- cpu_loop_exit_restore(cs, retaddr);
-#else
-#error System mode not implemented for Hexagon
-#endif
-}
-
#include "hw/core/tcg-cpu-ops.h"
static const struct TCGCPUOps hexagon_tcg_ops = {
.initialize = hexagon_translate_init,
.synchronize_from_tb = hexagon_cpu_synchronize_from_tb,
- .tlb_fill = hexagon_tlb_fill,
};
static void hexagon_cpu_class_init(ObjectClass *c, void *data)
diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c
index 89cba9d7a2..23eb254228 100644
--- a/target/hppa/cpu.c
+++ b/target/hppa/cpu.c
@@ -145,9 +145,9 @@ static const struct SysemuCPUOps hppa_sysemu_ops = {
static const struct TCGCPUOps hppa_tcg_ops = {
.initialize = hppa_translate_init,
.synchronize_from_tb = hppa_cpu_synchronize_from_tb,
- .tlb_fill = hppa_cpu_tlb_fill,
#ifndef CONFIG_USER_ONLY
+ .tlb_fill = hppa_cpu_tlb_fill,
.cpu_exec_interrupt = hppa_cpu_exec_interrupt,
.do_interrupt = hppa_cpu_do_interrupt,
.do_unaligned_access = hppa_cpu_do_unaligned_access,
diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h
index d3cb7a279f..294fd7297f 100644
--- a/target/hppa/cpu.h
+++ b/target/hppa/cpu.h
@@ -323,10 +323,10 @@ hwaddr hppa_cpu_get_phys_page_debug(CPUState *cs, vaddr addr);
int hppa_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
int hppa_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
void hppa_cpu_dump_state(CPUState *cs, FILE *f, int);
+#ifndef CONFIG_USER_ONLY
bool hppa_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
MMUAccessType access_type, int mmu_idx,
bool probe, uintptr_t retaddr);
-#ifndef CONFIG_USER_ONLY
void hppa_cpu_do_interrupt(CPUState *cpu);
bool hppa_cpu_exec_interrupt(CPUState *cpu, int int_req);
int hppa_get_physical_address(CPUHPPAState *env, vaddr addr, int mmu_idx,
diff --git a/target/hppa/mem_helper.c b/target/hppa/mem_helper.c
index afc5b56c3e..bf07445cd1 100644
--- a/target/hppa/mem_helper.c
+++ b/target/hppa/mem_helper.c
@@ -24,20 +24,6 @@
#include "hw/core/cpu.h"
#include "trace.h"
-#ifdef CONFIG_USER_ONLY
-bool hppa_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
- MMUAccessType access_type, int mmu_idx,
- bool probe, uintptr_t retaddr)
-{
- HPPACPU *cpu = HPPA_CPU(cs);
-
- /* ??? Test between data page fault and data memory protection trap,
- which would affect si_code. */
- cs->exception_index = EXCP_DMP;
- cpu->env.cr[CR_IOR] = address;
- cpu_loop_exit_restore(cs, retaddr);
-}
-#else
static hppa_tlb_entry *hppa_find_tlb(CPUHPPAState *env, vaddr addr)
{
int i;
@@ -392,4 +378,3 @@ int hppa_artype_for_page(CPUHPPAState *env, target_ulong vaddr)
hppa_tlb_entry *ent = hppa_find_tlb(env, vaddr);
return ent ? ent->ar_type : -1;
}
-#endif /* CONFIG_USER_ONLY */
diff --git a/target/hppa/meson.build b/target/hppa/meson.build
index 8a7ff82efc..021e42a2d0 100644
--- a/target/hppa/meson.build
+++ b/target/hppa/meson.build
@@ -7,13 +7,15 @@ hppa_ss.add(files(
'gdbstub.c',
'helper.c',
'int_helper.c',
- 'mem_helper.c',
'op_helper.c',
'translate.c',
))
hppa_softmmu_ss = ss.source_set()
-hppa_softmmu_ss.add(files('machine.c'))
+hppa_softmmu_ss.add(files(
+ 'machine.c',
+ 'mem_helper.c',
+))
target_arch += {'hppa': hppa_ss}
target_softmmu_arch += {'hppa': hppa_softmmu_ss}
diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h
index 60ca09e95e..0a4401e917 100644
--- a/target/i386/tcg/helper-tcg.h
+++ b/target/i386/tcg/helper-tcg.h
@@ -43,9 +43,15 @@ bool x86_cpu_exec_interrupt(CPUState *cpu, int int_req);
#endif
/* helper.c */
+#ifdef CONFIG_USER_ONLY
+void x86_cpu_record_sigsegv(CPUState *cs, vaddr addr,
+ MMUAccessType access_type,
+ bool maperr, uintptr_t ra);
+#else
bool x86_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
MMUAccessType access_type, int mmu_idx,
bool probe, uintptr_t retaddr);
+#endif
void breakpoint_handler(CPUState *cs);
diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c
index 3ecfae34cb..6fdfdf9598 100644
--- a/target/i386/tcg/tcg-cpu.c
+++ b/target/i386/tcg/tcg-cpu.c
@@ -72,10 +72,11 @@ static const struct TCGCPUOps x86_tcg_ops = {
.synchronize_from_tb = x86_cpu_synchronize_from_tb,
.cpu_exec_enter = x86_cpu_exec_enter,
.cpu_exec_exit = x86_cpu_exec_exit,
- .tlb_fill = x86_cpu_tlb_fill,
#ifdef CONFIG_USER_ONLY
.fake_user_interrupt = x86_cpu_do_interrupt,
+ .record_sigsegv = x86_cpu_record_sigsegv,
#else
+ .tlb_fill = x86_cpu_tlb_fill,
.do_interrupt = x86_cpu_do_interrupt,
.cpu_exec_interrupt = x86_cpu_exec_interrupt,
.debug_excp_handler = breakpoint_handler,
diff --git a/target/i386/tcg/user/excp_helper.c b/target/i386/tcg/user/excp_helper.c
index a89b5228fd..cd507e2a1b 100644
--- a/target/i386/tcg/user/excp_helper.c
+++ b/target/i386/tcg/user/excp_helper.c
@@ -22,18 +22,29 @@
#include "exec/exec-all.h"
#include "tcg/helper-tcg.h"
-bool x86_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
- MMUAccessType access_type, int mmu_idx,
- bool probe, uintptr_t retaddr)
+void x86_cpu_record_sigsegv(CPUState *cs, vaddr addr,
+ MMUAccessType access_type,
+ bool maperr, uintptr_t ra)
{
X86CPU *cpu = X86_CPU(cs);
CPUX86State *env = &cpu->env;
+ /*
+ * The error_code that hw reports as part of the exception frame
+ * is copied to linux sigcontext.err. The exception_index is
+ * copied to linux sigcontext.trapno. Short of inventing a new
+ * place to store the trapno, we cannot let our caller raise the
+ * signal and set exception_index to EXCP_INTERRUPT.
+ */
env->cr[2] = addr;
- env->error_code = (access_type == MMU_DATA_STORE) << PG_ERROR_W_BIT;
- env->error_code |= PG_ERROR_U_MASK;
+ env->error_code = ((access_type == MMU_DATA_STORE) << PG_ERROR_W_BIT)
+ | (maperr ? 0 : PG_ERROR_P_MASK)
+ | PG_ERROR_U_MASK;
cs->exception_index = EXCP0E_PAGE;
+
+ /* Disable do_interrupt_user. */
env->exception_is_int = 0;
env->exception_next_eip = -1;
- cpu_loop_exit_restore(cs, retaddr);
+
+ cpu_loop_exit_restore(cs, ra);
}
diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c
index 66d22d1189..c7aeb7da9c 100644
--- a/target/m68k/cpu.c
+++ b/target/m68k/cpu.c
@@ -515,9 +515,9 @@ static const struct SysemuCPUOps m68k_sysemu_ops = {
static const struct TCGCPUOps m68k_tcg_ops = {
.initialize = m68k_tcg_init,
- .tlb_fill = m68k_cpu_tlb_fill,
#ifndef CONFIG_USER_ONLY
+ .tlb_fill = m68k_cpu_tlb_fill,
.cpu_exec_interrupt = m68k_cpu_exec_interrupt,
.do_interrupt = m68k_cpu_do_interrupt,
.do_transaction_failed = m68k_cpu_transaction_failed,
diff --git a/target/m68k/helper.c b/target/m68k/helper.c
index 137a3e1a3d..5728e48585 100644
--- a/target/m68k/helper.c
+++ b/target/m68k/helper.c
@@ -978,16 +978,12 @@ void m68k_set_irq_level(M68kCPU *cpu, int level, uint8_t vector)
}
}
-#endif
-
bool m68k_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
MMUAccessType qemu_access_type, int mmu_idx,
bool probe, uintptr_t retaddr)
{
M68kCPU *cpu = M68K_CPU(cs);
CPUM68KState *env = &cpu->env;
-
-#ifndef CONFIG_USER_ONLY
hwaddr physical;
int prot;
int access_type;
@@ -1051,12 +1047,12 @@ bool m68k_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
if (!(access_type & ACCESS_STORE)) {
env->mmu.ssw |= M68K_RW_040;
}
-#endif
cs->exception_index = EXCP_ACCESS;
env->mmu.ar = address;
cpu_loop_exit_restore(cs, retaddr);
}
+#endif /* !CONFIG_USER_ONLY */
uint32_t HELPER(bitrev)(uint32_t x)
{
diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c
index 15db277925..b9c888b87e 100644
--- a/target/microblaze/cpu.c
+++ b/target/microblaze/cpu.c
@@ -365,9 +365,9 @@ static const struct SysemuCPUOps mb_sysemu_ops = {
static const struct TCGCPUOps mb_tcg_ops = {
.initialize = mb_tcg_init,
.synchronize_from_tb = mb_cpu_synchronize_from_tb,
- .tlb_fill = mb_cpu_tlb_fill,
#ifndef CONFIG_USER_ONLY
+ .tlb_fill = mb_cpu_tlb_fill,
.cpu_exec_interrupt = mb_cpu_exec_interrupt,
.do_interrupt = mb_cpu_do_interrupt,
.do_transaction_failed = mb_cpu_transaction_failed,
diff --git a/target/microblaze/cpu.h b/target/microblaze/cpu.h
index b7a848bbae..e9cd0b88de 100644
--- a/target/microblaze/cpu.h
+++ b/target/microblaze/cpu.h
@@ -394,10 +394,6 @@ void mb_tcg_init(void);
#define MMU_USER_IDX 2
/* See NB_MMU_MODES further up the file. */
-bool mb_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
- MMUAccessType access_type, int mmu_idx,
- bool probe, uintptr_t retaddr);
-
typedef CPUMBState CPUArchState;
typedef MicroBlazeCPU ArchCPU;
@@ -415,6 +411,10 @@ static inline void cpu_get_tb_cpu_state(CPUMBState *env, target_ulong *pc,
}
#if !defined(CONFIG_USER_ONLY)
+bool mb_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
+ MMUAccessType access_type, int mmu_idx,
+ bool probe, uintptr_t retaddr);
+
void mb_cpu_transaction_failed(CPUState *cs, hwaddr physaddr, vaddr addr,
unsigned size, MMUAccessType access_type,
int mmu_idx, MemTxAttrs attrs,
diff --git a/target/microblaze/helper.c b/target/microblaze/helper.c
index dd2aecd1d5..a607fe68e5 100644
--- a/target/microblaze/helper.c
+++ b/target/microblaze/helper.c
@@ -24,18 +24,7 @@
#include "qemu/host-utils.h"
#include "exec/log.h"
-#if defined(CONFIG_USER_ONLY)
-
-bool mb_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
- MMUAccessType access_type, int mmu_idx,
- bool probe, uintptr_t retaddr)
-{
- cs->exception_index = 0xaa;
- cpu_loop_exit_restore(cs, retaddr);
-}
-
-#else /* !CONFIG_USER_ONLY */
-
+#ifndef CONFIG_USER_ONLY
static bool mb_cpu_access_is_secure(MicroBlazeCPU *cpu,
MMUAccessType access_type)
{
diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c
index 437bbed6d6..2561b904b9 100644
--- a/target/microblaze/translate.c
+++ b/target/microblaze/translate.c
@@ -722,6 +722,7 @@ static TCGv compute_ldst_addr_ea(DisasContext *dc, int ra, int rb)
}
#endif
+#ifndef CONFIG_USER_ONLY
static void record_unaligned_ess(DisasContext *dc, int rd,
MemOp size, bool store)
{
@@ -734,6 +735,7 @@ static void record_unaligned_ess(DisasContext *dc, int rd,
tcg_set_insn_start_param(dc->insn_start, 1, iflags);
}
+#endif
static bool do_load(DisasContext *dc, int rd, TCGv addr, MemOp mop,
int mem_index, bool rev)
@@ -755,12 +757,19 @@ static bool do_load(DisasContext *dc, int rd, TCGv addr, MemOp mop,
}
}
+ /*
+ * For system mode, enforce alignment if the cpu configuration
+ * requires it. For user-mode, the Linux kernel will have fixed up
+ * any unaligned access, so emulate that by *not* setting MO_ALIGN.
+ */
+#ifndef CONFIG_USER_ONLY
if (size > MO_8 &&
(dc->tb_flags & MSR_EE) &&
dc->cfg->unaligned_exceptions) {
record_unaligned_ess(dc, rd, size, false);
mop |= MO_ALIGN;
}
+#endif
tcg_gen_qemu_ld_i32(reg_for_write(dc, rd), addr, mem_index, mop);
@@ -901,12 +910,19 @@ static bool do_store(DisasContext *dc, int rd, TCGv addr, MemOp mop,
}
}
+ /*
+ * For system mode, enforce alignment if the cpu configuration
+ * requires it. For user-mode, the Linux kernel will have fixed up
+ * any unaligned access, so emulate that by *not* setting MO_ALIGN.
+ */
+#ifndef CONFIG_USER_ONLY
if (size > MO_8 &&
(dc->tb_flags & MSR_EE) &&
dc->cfg->unaligned_exceptions) {
record_unaligned_ess(dc, rd, size, true);
mop |= MO_ALIGN;
}
+#endif
tcg_gen_qemu_st_i32(reg_for_read(dc, rd), addr, mem_index, mop);
diff --git a/target/mips/cpu.c b/target/mips/cpu.c
index 00e0c55d0e..4aae23934b 100644
--- a/target/mips/cpu.c
+++ b/target/mips/cpu.c
@@ -539,9 +539,9 @@ static const struct SysemuCPUOps mips_sysemu_ops = {
static const struct TCGCPUOps mips_tcg_ops = {
.initialize = mips_tcg_init,
.synchronize_from_tb = mips_cpu_synchronize_from_tb,
- .tlb_fill = mips_cpu_tlb_fill,
#if !defined(CONFIG_USER_ONLY)
+ .tlb_fill = mips_cpu_tlb_fill,
.cpu_exec_interrupt = mips_cpu_exec_interrupt,
.do_interrupt = mips_cpu_do_interrupt,
.do_transaction_failed = mips_cpu_do_transaction_failed,
diff --git a/target/mips/tcg/meson.build b/target/mips/tcg/meson.build
index 8f6f7508b6..98003779ae 100644
--- a/target/mips/tcg/meson.build
+++ b/target/mips/tcg/meson.build
@@ -28,9 +28,6 @@ mips_ss.add(when: 'TARGET_MIPS64', if_true: files(
'mxu_translate.c',
))
-if have_user
- subdir('user')
-endif
if have_system
subdir('sysemu')
endif
diff --git a/target/mips/tcg/tcg-internal.h b/target/mips/tcg/tcg-internal.h
index bad3deb611..466768aec4 100644
--- a/target/mips/tcg/tcg-internal.h
+++ b/target/mips/tcg/tcg-internal.h
@@ -18,9 +18,6 @@
void mips_tcg_init(void);
void mips_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb);
-bool mips_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
- MMUAccessType access_type, int mmu_idx,
- bool probe, uintptr_t retaddr);
void mips_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
MMUAccessType access_type, int mmu_idx,
uintptr_t retaddr) QEMU_NORETURN;
@@ -60,6 +57,10 @@ void mips_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr,
MemTxResult response, uintptr_t retaddr);
void cpu_mips_tlb_flush(CPUMIPSState *env);
+bool mips_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
+ MMUAccessType access_type, int mmu_idx,
+ bool probe, uintptr_t retaddr);
+
#endif /* !CONFIG_USER_ONLY */
#endif
diff --git a/target/mips/tcg/user/meson.build b/target/mips/tcg/user/meson.build
deleted file mode 100644
index 79badcd321..0000000000
--- a/target/mips/tcg/user/meson.build
+++ /dev/null
@@ -1,3 +0,0 @@
-mips_user_ss.add(files(
- 'tlb_helper.c',
-))
diff --git a/target/mips/tcg/user/tlb_helper.c b/target/mips/tcg/user/tlb_helper.c
deleted file mode 100644
index 210c6d529e..0000000000
--- a/target/mips/tcg/user/tlb_helper.c
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * MIPS TLB (Translation lookaside buffer) helpers.
- *
- * Copyright (c) 2004-2005 Jocelyn Mayer
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-#include "qemu/osdep.h"
-
-#include "cpu.h"
-#include "exec/exec-all.h"
-#include "internal.h"
-
-static void raise_mmu_exception(CPUMIPSState *env, target_ulong address,
- MMUAccessType access_type)
-{
- CPUState *cs = env_cpu(env);
-
- env->error_code = 0;
- if (access_type == MMU_INST_FETCH) {
- env->error_code |= EXCP_INST_NOTAVAIL;
- }
-
- /* Reference to kernel address from user mode or supervisor mode */
- /* Reference to supervisor address from user mode */
- if (access_type == MMU_DATA_STORE) {
- cs->exception_index = EXCP_AdES;
- } else {
- cs->exception_index = EXCP_AdEL;
- }
-
- /* Raise exception */
- if (!(env->hflags & MIPS_HFLAG_DM)) {
- env->CP0_BadVAddr = address;
- }
-}
-
-bool mips_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
- MMUAccessType access_type, int mmu_idx,
- bool probe, uintptr_t retaddr)
-{
- MIPSCPU *cpu = MIPS_CPU(cs);
- CPUMIPSState *env = &cpu->env;
-
- /* data access */
- raise_mmu_exception(env, address, access_type);
- do_raise_exception_err(env, cs->exception_index, env->error_code, retaddr);
-}
diff --git a/target/nios2/cpu.c b/target/nios2/cpu.c
index 58ecd27d75..4cade61e93 100644
--- a/target/nios2/cpu.c
+++ b/target/nios2/cpu.c
@@ -216,9 +216,11 @@ static const struct SysemuCPUOps nios2_sysemu_ops = {
static const struct TCGCPUOps nios2_tcg_ops = {
.initialize = nios2_tcg_init,
- .tlb_fill = nios2_cpu_tlb_fill,
-#ifndef CONFIG_USER_ONLY
+#ifdef CONFIG_USER_ONLY
+ .record_sigsegv = nios2_cpu_record_sigsegv,
+#else
+ .tlb_fill = nios2_cpu_tlb_fill,
.cpu_exec_interrupt = nios2_cpu_exec_interrupt,
.do_interrupt = nios2_cpu_do_interrupt,
.do_unaligned_access = nios2_cpu_do_unaligned_access,
diff --git a/target/nios2/cpu.h b/target/nios2/cpu.h
index a80587338a..1a69ed7a49 100644
--- a/target/nios2/cpu.h
+++ b/target/nios2/cpu.h
@@ -218,9 +218,15 @@ static inline int cpu_mmu_index(CPUNios2State *env, bool ifetch)
MMU_SUPERVISOR_IDX;
}
+#ifdef CONFIG_USER_ONLY
+void nios2_cpu_record_sigsegv(CPUState *cpu, vaddr addr,
+ MMUAccessType access_type,
+ bool maperr, uintptr_t ra);
+#else
bool nios2_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
MMUAccessType access_type, int mmu_idx,
bool probe, uintptr_t retaddr);
+#endif
static inline int cpu_interrupts_enabled(CPUNios2State *env)
{
diff --git a/target/nios2/helper.c b/target/nios2/helper.c
index 53be8398e9..e5c98650e1 100644
--- a/target/nios2/helper.c
+++ b/target/nios2/helper.c
@@ -38,10 +38,11 @@ void nios2_cpu_do_interrupt(CPUState *cs)
env->regs[R_EA] = env->regs[R_PC] + 4;
}
-bool nios2_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
- MMUAccessType access_type, int mmu_idx,
- bool probe, uintptr_t retaddr)
+void nios2_cpu_record_sigsegv(CPUState *cs, vaddr addr,
+ MMUAccessType access_type,
+ bool maperr, uintptr_t retaddr)
{
+ /* FIXME: Disentangle kuser page from linux-user sigsegv handling. */
cs->exception_index = 0xaa;
cpu_loop_exit_restore(cs, retaddr);
}
diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
index 27cb04152f..dfbafc5236 100644
--- a/target/openrisc/cpu.c
+++ b/target/openrisc/cpu.c
@@ -186,9 +186,9 @@ static const struct SysemuCPUOps openrisc_sysemu_ops = {
static const struct TCGCPUOps openrisc_tcg_ops = {
.initialize = openrisc_translate_init,
- .tlb_fill = openrisc_cpu_tlb_fill,
#ifndef CONFIG_USER_ONLY
+ .tlb_fill = openrisc_cpu_tlb_fill,
.cpu_exec_interrupt = openrisc_cpu_exec_interrupt,
.do_interrupt = openrisc_cpu_do_interrupt,
#endif /* !CONFIG_USER_ONLY */
diff --git a/target/openrisc/cpu.h b/target/openrisc/cpu.h
index 187a4a114e..ee069b080c 100644
--- a/target/openrisc/cpu.h
+++ b/target/openrisc/cpu.h
@@ -317,14 +317,15 @@ hwaddr openrisc_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
int openrisc_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
int openrisc_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
void openrisc_translate_init(void);
-bool openrisc_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
- MMUAccessType access_type, int mmu_idx,
- bool probe, uintptr_t retaddr);
int print_insn_or1k(bfd_vma addr, disassemble_info *info);
#define cpu_list cpu_openrisc_list
#ifndef CONFIG_USER_ONLY
+bool openrisc_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
+ MMUAccessType access_type, int mmu_idx,
+ bool probe, uintptr_t retaddr);
+
extern const VMStateDescription vmstate_openrisc_cpu;
void openrisc_cpu_do_interrupt(CPUState *cpu);
diff --git a/target/openrisc/meson.build b/target/openrisc/meson.build
index e445dec4a0..84322086ec 100644
--- a/target/openrisc/meson.build
+++ b/target/openrisc/meson.build
@@ -10,7 +10,6 @@ openrisc_ss.add(files(
'fpu_helper.c',
'gdbstub.c',
'interrupt_helper.c',
- 'mmu.c',
'sys_helper.c',
'translate.c',
))
@@ -19,6 +18,7 @@ openrisc_softmmu_ss = ss.source_set()
openrisc_softmmu_ss.add(files(
'interrupt.c',
'machine.c',
+ 'mmu.c',
))
target_arch += {'openrisc': openrisc_ss}
diff --git a/target/openrisc/mmu.c b/target/openrisc/mmu.c
index 94df8c7bef..e561ef245b 100644
--- a/target/openrisc/mmu.c
+++ b/target/openrisc/mmu.c
@@ -23,11 +23,8 @@
#include "exec/exec-all.h"
#include "exec/gdbstub.h"
#include "qemu/host-utils.h"
-#ifndef CONFIG_USER_ONLY
#include "hw/loader.h"
-#endif
-#ifndef CONFIG_USER_ONLY
static inline void get_phys_nommu(hwaddr *phys_addr, int *prot,
target_ulong address)
{
@@ -94,7 +91,6 @@ static int get_phys_mmu(OpenRISCCPU *cpu, hwaddr *phys_addr, int *prot,
return need & PAGE_EXEC ? EXCP_ITLBMISS : EXCP_DTLBMISS;
}
}
-#endif
static void raise_mmu_exception(OpenRISCCPU *cpu, target_ulong address,
int exception)
@@ -112,8 +108,6 @@ bool openrisc_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
{
OpenRISCCPU *cpu = OPENRISC_CPU(cs);
int excp = EXCP_DPF;
-
-#ifndef CONFIG_USER_ONLY
int prot;
hwaddr phys_addr;
@@ -138,13 +132,11 @@ bool openrisc_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
if (probe) {
return false;
}
-#endif
raise_mmu_exception(cpu, addr, excp);
cpu_loop_exit_restore(cs, retaddr);
}
-#ifndef CONFIG_USER_ONLY
hwaddr openrisc_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
{
OpenRISCCPU *cpu = OPENRISC_CPU(cs);
@@ -177,4 +169,3 @@ hwaddr openrisc_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
return phys_addr;
}
}
-#endif
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 0472ec9154..e946da5f3a 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1302,9 +1302,6 @@ extern const VMStateDescription vmstate_ppc_cpu;
/*****************************************************************************/
void ppc_translate_init(void);
-bool ppc_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
- MMUAccessType access_type, int mmu_idx,
- bool probe, uintptr_t retaddr);
#if !defined(CONFIG_USER_ONLY)
void ppc_store_sdr1(CPUPPCState *env, target_ulong value);
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 65545ba9ca..1c7a7b4b38 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -9014,9 +9014,11 @@ static const struct SysemuCPUOps ppc_sysemu_ops = {
static const struct TCGCPUOps ppc_tcg_ops = {
.initialize = ppc_translate_init,
- .tlb_fill = ppc_cpu_tlb_fill,
-#ifndef CONFIG_USER_ONLY
+#ifdef CONFIG_USER_ONLY
+ .record_sigsegv = ppc_cpu_record_sigsegv,
+#else
+ .tlb_fill = ppc_cpu_tlb_fill,
.cpu_exec_interrupt = ppc_cpu_exec_interrupt,
.do_interrupt = ppc_cpu_do_interrupt,
.cpu_exec_enter = ppc_cpu_exec_enter,
diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index b7d1767920..17607adbe4 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -454,13 +454,15 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
break;
}
case POWERPC_EXCP_ALIGN: /* Alignment exception */
- /* Get rS/rD and rA from faulting opcode */
/*
- * Note: the opcode fields will not be set properly for a
- * direct store load/store, but nobody cares as nobody
- * actually uses direct store segments.
+ * Get rS/rD and rA from faulting opcode.
+ * Note: We will only invoke ALIGN for atomic operations,
+ * so all instructions are X-form.
*/
- env->spr[SPR_DSISR] |= (env->error_code & 0x03FF0000) >> 16;
+ {
+ uint32_t insn = cpu_ldl_code(env, env->nip);
+ env->spr[SPR_DSISR] |= (insn & 0x03FF0000) >> 16;
+ }
break;
case POWERPC_EXCP_PROGRAM: /* Program exception */
switch (env->error_code & ~0xF) {
@@ -1452,24 +1454,31 @@ void helper_book3s_msgsndp(CPUPPCState *env, target_ulong rb)
book3s_msgsnd_common(pir, PPC_INTERRUPT_DOORBELL);
}
-#endif
-#endif /* CONFIG_TCG */
-#endif
+#endif /* TARGET_PPC64 */
-#ifdef CONFIG_TCG
void ppc_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
MMUAccessType access_type,
int mmu_idx, uintptr_t retaddr)
{
CPUPPCState *env = cs->env_ptr;
- uint32_t insn;
- /* Restore state and reload the insn we executed, for filling in DSISR. */
- cpu_restore_state(cs, retaddr, true);
- insn = cpu_ldl_code(env, env->nip);
+ switch (env->mmu_model) {
+ case POWERPC_MMU_SOFT_4xx:
+ case POWERPC_MMU_SOFT_4xx_Z:
+ env->spr[SPR_40x_DEAR] = vaddr;
+ break;
+ case POWERPC_MMU_BOOKE:
+ case POWERPC_MMU_BOOKE206:
+ env->spr[SPR_BOOKE_DEAR] = vaddr;
+ break;
+ default:
+ env->spr[SPR_DAR] = vaddr;
+ break;
+ }
cs->exception_index = POWERPC_EXCP_ALIGN;
- env->error_code = insn & 0x03FF0000;
- cpu_loop_exit(cs);
+ env->error_code = 0;
+ cpu_loop_exit_restore(cs, retaddr);
}
-#endif
+#endif /* CONFIG_TCG */
+#endif /* !CONFIG_USER_ONLY */
diff --git a/target/ppc/internal.h b/target/ppc/internal.h
index 55284369f5..6aa9484f34 100644
--- a/target/ppc/internal.h
+++ b/target/ppc/internal.h
@@ -211,11 +211,6 @@ void helper_compute_fprf_float16(CPUPPCState *env, float16 arg);
void helper_compute_fprf_float32(CPUPPCState *env, float32 arg);
void helper_compute_fprf_float128(CPUPPCState *env, float128 arg);
-/* Raise a data fault alignment exception for the specified virtual address */
-void ppc_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
- MMUAccessType access_type, int mmu_idx,
- uintptr_t retaddr) QEMU_NORETURN;
-
/* translate.c */
int ppc_fixup_cpu(PowerPCCPU *cpu);
@@ -283,5 +278,17 @@ static inline void pte_invalidate(target_ulong *pte0)
#define PTE_PTEM_MASK 0x7FFFFFBF
#define PTE_CHECK_MASK (TARGET_PAGE_MASK | 0x7B)
+#ifdef CONFIG_USER_ONLY
+void ppc_cpu_record_sigsegv(CPUState *cs, vaddr addr,
+ MMUAccessType access_type,
+ bool maperr, uintptr_t ra);
+#else
+bool ppc_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
+ MMUAccessType access_type, int mmu_idx,
+ bool probe, uintptr_t retaddr);
+void ppc_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
+ MMUAccessType access_type, int mmu_idx,
+ uintptr_t retaddr) QEMU_NORETURN;
+#endif
#endif /* PPC_INTERNAL_H */
diff --git a/target/ppc/user_only_helper.c b/target/ppc/user_only_helper.c
index aa3f867596..7ff76f7a06 100644
--- a/target/ppc/user_only_helper.c
+++ b/target/ppc/user_only_helper.c
@@ -21,16 +21,23 @@
#include "qemu/osdep.h"
#include "cpu.h"
#include "exec/exec-all.h"
+#include "internal.h"
-
-bool ppc_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
- MMUAccessType access_type, int mmu_idx,
- bool probe, uintptr_t retaddr)
+void ppc_cpu_record_sigsegv(CPUState *cs, vaddr address,
+ MMUAccessType access_type,
+ bool maperr, uintptr_t retaddr)
{
PowerPCCPU *cpu = POWERPC_CPU(cs);
CPUPPCState *env = &cpu->env;
int exception, error_code;
+ /*
+ * Both DSISR and the "trap number" (exception vector offset,
+ * looked up from exception_index) are present in the linux-user
+ * signal frame.
+ * FIXME: we don't actually populate the trap number properly.
+ * It would be easiest to fill in an env->trap value now.
+ */
if (access_type == MMU_INST_FETCH) {
exception = POWERPC_EXCP_ISI;
error_code = 0x40000000;
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 7d53125dbc..f812998123 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -694,9 +694,9 @@ static const struct SysemuCPUOps riscv_sysemu_ops = {
static const struct TCGCPUOps riscv_tcg_ops = {
.initialize = riscv_translate_init,
.synchronize_from_tb = riscv_cpu_synchronize_from_tb,
- .tlb_fill = riscv_cpu_tlb_fill,
#ifndef CONFIG_USER_ONLY
+ .tlb_fill = riscv_cpu_tlb_fill,
.cpu_exec_interrupt = riscv_cpu_exec_interrupt,
.do_interrupt = riscv_cpu_do_interrupt,
.do_transaction_failed = riscv_cpu_do_transaction_failed,
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index f30ff672f8..9eeed38c7e 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -814,7 +814,6 @@ void riscv_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
riscv_cpu_two_stage_lookup(mmu_idx);
riscv_raise_exception(env, cs->exception_index, retaddr);
}
-#endif /* !CONFIG_USER_ONLY */
bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
MMUAccessType access_type, int mmu_idx,
@@ -822,7 +821,6 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
{
RISCVCPU *cpu = RISCV_CPU(cs);
CPURISCVState *env = &cpu->env;
-#ifndef CONFIG_USER_ONLY
vaddr im_address;
hwaddr pa = 0;
int prot, prot2, prot_pmp;
@@ -954,25 +952,8 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
}
return true;
-
-#else
- switch (access_type) {
- case MMU_INST_FETCH:
- cs->exception_index = RISCV_EXCP_INST_PAGE_FAULT;
- break;
- case MMU_DATA_LOAD:
- cs->exception_index = RISCV_EXCP_LOAD_PAGE_FAULT;
- break;
- case MMU_DATA_STORE:
- cs->exception_index = RISCV_EXCP_STORE_PAGE_FAULT;
- break;
- default:
- g_assert_not_reached();
- }
- env->badaddr = address;
- cpu_loop_exit_restore(cs, retaddr);
-#endif
}
+#endif /* !CONFIG_USER_ONLY */
/*
* Handle Traps
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
index 7b7b05f1d3..ccdbaf84d5 100644
--- a/target/s390x/cpu.c
+++ b/target/s390x/cpu.c
@@ -266,9 +266,12 @@ static void s390_cpu_reset_full(DeviceState *dev)
static const struct TCGCPUOps s390_tcg_ops = {
.initialize = s390x_translate_init,
- .tlb_fill = s390_cpu_tlb_fill,
-#if !defined(CONFIG_USER_ONLY)
+#ifdef CONFIG_USER_ONLY
+ .record_sigsegv = s390_cpu_record_sigsegv,
+ .record_sigbus = s390_cpu_record_sigbus,
+#else
+ .tlb_fill = s390_cpu_tlb_fill,
.cpu_exec_interrupt = s390_cpu_exec_interrupt,
.do_interrupt = s390_cpu_do_interrupt,
.debug_excp_handler = s390x_cpu_debug_excp_handler,
diff --git a/target/s390x/s390x-internal.h b/target/s390x/s390x-internal.h
index 27d4a03ca1..1a178aed41 100644
--- a/target/s390x/s390x-internal.h
+++ b/target/s390x/s390x-internal.h
@@ -270,12 +270,21 @@ ObjectClass *s390_cpu_class_by_name(const char *name);
void s390x_cpu_debug_excp_handler(CPUState *cs);
void s390_cpu_do_interrupt(CPUState *cpu);
bool s390_cpu_exec_interrupt(CPUState *cpu, int int_req);
+
+#ifdef CONFIG_USER_ONLY
+void s390_cpu_record_sigsegv(CPUState *cs, vaddr address,
+ MMUAccessType access_type,
+ bool maperr, uintptr_t retaddr);
+void s390_cpu_record_sigbus(CPUState *cs, vaddr address,
+ MMUAccessType access_type, uintptr_t retaddr);
+#else
bool s390_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
MMUAccessType access_type, int mmu_idx,
bool probe, uintptr_t retaddr);
void s390x_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
MMUAccessType access_type, int mmu_idx,
uintptr_t retaddr) QEMU_NORETURN;
+#endif
/* fpu_helper.c */
diff --git a/target/s390x/tcg/excp_helper.c b/target/s390x/tcg/excp_helper.c
index 3d6662a53c..4e7648f301 100644
--- a/target/s390x/tcg/excp_helper.c
+++ b/target/s390x/tcg/excp_helper.c
@@ -82,6 +82,19 @@ void HELPER(data_exception)(CPUS390XState *env, uint32_t dxc)
tcg_s390_data_exception(env, dxc, GETPC());
}
+/*
+ * Unaligned accesses are only diagnosed with MO_ALIGN. At the moment,
+ * this is only for the atomic operations, for which we want to raise a
+ * specification exception.
+ */
+static void QEMU_NORETURN do_unaligned_access(CPUState *cs, uintptr_t retaddr)
+{
+ S390CPU *cpu = S390_CPU(cs);
+ CPUS390XState *env = &cpu->env;
+
+ tcg_s390_program_interrupt(env, PGM_SPECIFICATION, retaddr);
+}
+
#if defined(CONFIG_USER_ONLY)
void s390_cpu_do_interrupt(CPUState *cs)
@@ -89,19 +102,29 @@ void s390_cpu_do_interrupt(CPUState *cs)
cs->exception_index = -1;
}
-bool s390_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
- MMUAccessType access_type, int mmu_idx,
- bool probe, uintptr_t retaddr)
+void s390_cpu_record_sigsegv(CPUState *cs, vaddr address,
+ MMUAccessType access_type,
+ bool maperr, uintptr_t retaddr)
{
S390CPU *cpu = S390_CPU(cs);
- trigger_pgm_exception(&cpu->env, PGM_ADDRESSING);
- /* On real machines this value is dropped into LowMem. Since this
- is userland, simply put this someplace that cpu_loop can find it. */
- cpu->env.__excp_addr = address;
+ trigger_pgm_exception(&cpu->env, maperr ? PGM_ADDRESSING : PGM_PROTECTION);
+ /*
+ * On real machines this value is dropped into LowMem. Since this
+ * is userland, simply put this someplace that cpu_loop can find it.
+ * S390 only gives the page of the fault, not the exact address.
+ * C.f. the construction of TEC in mmu_translate().
+ */
+ cpu->env.__excp_addr = address & TARGET_PAGE_MASK;
cpu_loop_exit_restore(cs, retaddr);
}
+void s390_cpu_record_sigbus(CPUState *cs, vaddr address,
+ MMUAccessType access_type, uintptr_t retaddr)
+{
+ do_unaligned_access(cs, retaddr);
+}
+
#else /* !CONFIG_USER_ONLY */
static inline uint64_t cpu_mmu_idx_to_asc(int mmu_idx)
@@ -589,17 +612,11 @@ void s390x_cpu_debug_excp_handler(CPUState *cs)
}
}
-/* Unaligned accesses are only diagnosed with MO_ALIGN. At the moment,
- this is only for the atomic operations, for which we want to raise a
- specification exception. */
void s390x_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
MMUAccessType access_type,
int mmu_idx, uintptr_t retaddr)
{
- S390CPU *cpu = S390_CPU(cs);
- CPUS390XState *env = &cpu->env;
-
- tcg_s390_program_interrupt(env, PGM_SPECIFICATION, retaddr);
+ do_unaligned_access(cs, retaddr);
}
static void QEMU_NORETURN monitor_event(CPUS390XState *env,
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
index 17e3f83641..362a30d99e 100644
--- a/target/s390x/tcg/mem_helper.c
+++ b/target/s390x/tcg/mem_helper.c
@@ -141,20 +141,12 @@ static int s390_probe_access(CPUArchState *env, target_ulong addr, int size,
MMUAccessType access_type, int mmu_idx,
bool nonfault, void **phost, uintptr_t ra)
{
- int flags;
-
#if defined(CONFIG_USER_ONLY)
- flags = page_get_flags(addr);
- if (!(flags & (access_type == MMU_DATA_LOAD ? PAGE_READ : PAGE_WRITE_ORG))) {
- env->__excp_addr = addr;
- flags = (flags & PAGE_VALID) ? PGM_PROTECTION : PGM_ADDRESSING;
- if (nonfault) {
- return flags;
- }
- tcg_s390_program_interrupt(env, flags, ra);
- }
- *phost = g2h(env_cpu(env), addr);
+ return probe_access_flags(env, addr, access_type, mmu_idx,
+ nonfault, phost, ra);
#else
+ int flags;
+
/*
* For !CONFIG_USER_ONLY, we cannot rely on TLB_INVALID_MASK or haddr==NULL
* to detect if there was an exception during tlb_fill().
@@ -173,8 +165,8 @@ static int s390_probe_access(CPUArchState *env, target_ulong addr, int size,
(access_type == MMU_DATA_STORE
? BP_MEM_WRITE : BP_MEM_READ), ra);
}
-#endif
return 0;
+#endif
}
static int access_prepare_nf(S390Access *access, CPUS390XState *env,
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
index 2047742d03..06b2691dc4 100644
--- a/target/sh4/cpu.c
+++ b/target/sh4/cpu.c
@@ -236,9 +236,9 @@ static const struct SysemuCPUOps sh4_sysemu_ops = {
static const struct TCGCPUOps superh_tcg_ops = {
.initialize = sh4_translate_init,
.synchronize_from_tb = superh_cpu_synchronize_from_tb,
- .tlb_fill = superh_cpu_tlb_fill,
#ifndef CONFIG_USER_ONLY
+ .tlb_fill = superh_cpu_tlb_fill,
.cpu_exec_interrupt = superh_cpu_exec_interrupt,
.do_interrupt = superh_cpu_do_interrupt,
.do_unaligned_access = superh_cpu_do_unaligned_access,
diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
index dc81406646..4cfb109f56 100644
--- a/target/sh4/cpu.h
+++ b/target/sh4/cpu.h
@@ -213,12 +213,12 @@ void superh_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
uintptr_t retaddr) QEMU_NORETURN;
void sh4_translate_init(void);
+void sh4_cpu_list(void);
+
+#if !defined(CONFIG_USER_ONLY)
bool superh_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
MMUAccessType access_type, int mmu_idx,
bool probe, uintptr_t retaddr);
-
-void sh4_cpu_list(void);
-#if !defined(CONFIG_USER_ONLY)
void superh_cpu_do_interrupt(CPUState *cpu);
bool superh_cpu_exec_interrupt(CPUState *cpu, int int_req);
void cpu_sh4_invalidate_tlb(CPUSH4State *s);
diff --git a/target/sh4/helper.c b/target/sh4/helper.c
index 53cb9c3b63..6a620e36fc 100644
--- a/target/sh4/helper.c
+++ b/target/sh4/helper.c
@@ -796,8 +796,6 @@ bool superh_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
return false;
}
-#endif /* !CONFIG_USER_ONLY */
-
bool superh_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
MMUAccessType access_type, int mmu_idx,
bool probe, uintptr_t retaddr)
@@ -806,11 +804,6 @@ bool superh_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
CPUSH4State *env = &cpu->env;
int ret;
-#ifdef CONFIG_USER_ONLY
- ret = (access_type == MMU_DATA_STORE ? MMU_DTLB_VIOLATION_WRITE :
- access_type == MMU_INST_FETCH ? MMU_ITLB_VIOLATION :
- MMU_DTLB_VIOLATION_READ);
-#else
target_ulong physical;
int prot;
@@ -829,7 +822,6 @@ bool superh_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
if (ret != MMU_DTLB_MULTIPLE && ret != MMU_ITLB_MULTIPLE) {
env->pteh = (env->pteh & PTEH_ASID_MASK) | (address & PTEH_VPN_MASK);
}
-#endif
env->tea = address;
switch (ret) {
@@ -868,3 +860,4 @@ bool superh_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
}
cpu_loop_exit_restore(cs, retaddr);
}
+#endif /* !CONFIG_USER_ONLY */
diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c
index c996dce7df..752669825f 100644
--- a/target/sh4/op_helper.c
+++ b/target/sh4/op_helper.c
@@ -29,6 +29,9 @@ void superh_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
MMUAccessType access_type,
int mmu_idx, uintptr_t retaddr)
{
+ CPUSH4State *env = cs->env_ptr;
+
+ env->tea = addr;
switch (access_type) {
case MMU_INST_FETCH:
case MMU_DATA_LOAD:
@@ -37,6 +40,8 @@ void superh_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
case MMU_DATA_STORE:
cs->exception_index = 0x100;
break;
+ default:
+ g_assert_not_reached();
}
cpu_loop_exit_restore(cs, retaddr);
}
diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
index 21dd27796d..55268ed2a1 100644
--- a/target/sparc/cpu.c
+++ b/target/sparc/cpu.c
@@ -865,9 +865,9 @@ static const struct SysemuCPUOps sparc_sysemu_ops = {
static const struct TCGCPUOps sparc_tcg_ops = {
.initialize = sparc_tcg_init,
.synchronize_from_tb = sparc_cpu_synchronize_from_tb,
- .tlb_fill = sparc_cpu_tlb_fill,
#ifndef CONFIG_USER_ONLY
+ .tlb_fill = sparc_cpu_tlb_fill,
.cpu_exec_interrupt = sparc_cpu_exec_interrupt,
.do_interrupt = sparc_cpu_do_interrupt,
.do_transaction_failed = sparc_cpu_do_transaction_failed,
diff --git a/target/sparc/ldst_helper.c b/target/sparc/ldst_helper.c
index bbf3601cb1..a3e1cf9b6e 100644
--- a/target/sparc/ldst_helper.c
+++ b/target/sparc/ldst_helper.c
@@ -27,7 +27,6 @@
//#define DEBUG_MMU
//#define DEBUG_MXCC
-//#define DEBUG_UNALIGNED
//#define DEBUG_UNASSIGNED
//#define DEBUG_ASI
//#define DEBUG_CACHE_CONTROL
@@ -364,10 +363,6 @@ static void do_check_align(CPUSPARCState *env, target_ulong addr,
uint32_t align, uintptr_t ra)
{
if (addr & align) {
-#ifdef DEBUG_UNALIGNED
- printf("Unaligned access to 0x" TARGET_FMT_lx " from 0x" TARGET_FMT_lx
- "\n", addr, env->pc);
-#endif
cpu_raise_exception_ra(env, TT_UNALIGNED, ra);
}
}
@@ -1958,20 +1953,3 @@ void sparc_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr,
is_asi, size, retaddr);
}
#endif
-
-#if !defined(CONFIG_USER_ONLY)
-void QEMU_NORETURN sparc_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
- MMUAccessType access_type,
- int mmu_idx,
- uintptr_t retaddr)
-{
- SPARCCPU *cpu = SPARC_CPU(cs);
- CPUSPARCState *env = &cpu->env;
-
-#ifdef DEBUG_UNALIGNED
- printf("Unaligned access to 0x" TARGET_FMT_lx " from 0x" TARGET_FMT_lx
- "\n", addr, env->pc);
-#endif
- cpu_raise_exception_ra(env, TT_UNALIGNED, retaddr);
-}
-#endif
diff --git a/target/sparc/meson.build b/target/sparc/meson.build
index a3638b9503..a801802ee2 100644
--- a/target/sparc/meson.build
+++ b/target/sparc/meson.build
@@ -6,7 +6,6 @@ sparc_ss.add(files(
'gdbstub.c',
'helper.c',
'ldst_helper.c',
- 'mmu_helper.c',
'translate.c',
'win_helper.c',
))
@@ -16,6 +15,7 @@ sparc_ss.add(when: 'TARGET_SPARC64', if_true: files('int64_helper.c', 'vis_helpe
sparc_softmmu_ss = ss.source_set()
sparc_softmmu_ss.add(files(
'machine.c',
+ 'mmu_helper.c',
'monitor.c',
))
diff --git a/target/sparc/mmu_helper.c b/target/sparc/mmu_helper.c
index a44473a1c7..f2668389b0 100644
--- a/target/sparc/mmu_helper.c
+++ b/target/sparc/mmu_helper.c
@@ -25,30 +25,6 @@
/* Sparc MMU emulation */
-#if defined(CONFIG_USER_ONLY)
-
-bool sparc_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
- MMUAccessType access_type, int mmu_idx,
- bool probe, uintptr_t retaddr)
-{
- SPARCCPU *cpu = SPARC_CPU(cs);
- CPUSPARCState *env = &cpu->env;
-
- if (access_type == MMU_INST_FETCH) {
- cs->exception_index = TT_TFAULT;
- } else {
- cs->exception_index = TT_DFAULT;
-#ifdef TARGET_SPARC64
- env->dmmu.mmuregs[4] = address;
-#else
- env->mmuregs[4] = address;
-#endif
- }
- cpu_loop_exit_restore(cs, retaddr);
-}
-
-#else
-
#ifndef TARGET_SPARC64
/*
* Sparc V8 Reference MMU (SRMMU)
@@ -526,16 +502,60 @@ static inline int ultrasparc_tag_match(SparcTLBEntry *tlb,
return 0;
}
+static uint64_t build_sfsr(CPUSPARCState *env, int mmu_idx, int rw)
+{
+ uint64_t sfsr = SFSR_VALID_BIT;
+
+ switch (mmu_idx) {
+ case MMU_PHYS_IDX:
+ sfsr |= SFSR_CT_NOTRANS;
+ break;
+ case MMU_USER_IDX:
+ case MMU_KERNEL_IDX:
+ sfsr |= SFSR_CT_PRIMARY;
+ break;
+ case MMU_USER_SECONDARY_IDX:
+ case MMU_KERNEL_SECONDARY_IDX:
+ sfsr |= SFSR_CT_SECONDARY;
+ break;
+ case MMU_NUCLEUS_IDX:
+ sfsr |= SFSR_CT_NUCLEUS;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (rw == 1) {
+ sfsr |= SFSR_WRITE_BIT;
+ } else if (rw == 4) {
+ sfsr |= SFSR_NF_BIT;
+ }
+
+ if (env->pstate & PS_PRIV) {
+ sfsr |= SFSR_PR_BIT;
+ }
+
+ if (env->dmmu.sfsr & SFSR_VALID_BIT) { /* Fault status register */
+ sfsr |= SFSR_OW_BIT; /* overflow (not read before another fault) */
+ }
+
+ /* FIXME: ASI field in SFSR must be set */
+
+ return sfsr;
+}
+
static int get_physical_address_data(CPUSPARCState *env, hwaddr *physical,
int *prot, MemTxAttrs *attrs,
target_ulong address, int rw, int mmu_idx)
{
CPUState *cs = env_cpu(env);
unsigned int i;
+ uint64_t sfsr;
uint64_t context;
- uint64_t sfsr = 0;
bool is_user = false;
+ sfsr = build_sfsr(env, mmu_idx, rw);
+
switch (mmu_idx) {
case MMU_PHYS_IDX:
g_assert_not_reached();
@@ -544,29 +564,18 @@ static int get_physical_address_data(CPUSPARCState *env, hwaddr *physical,
/* fallthru */
case MMU_KERNEL_IDX:
context = env->dmmu.mmu_primary_context & 0x1fff;
- sfsr |= SFSR_CT_PRIMARY;
break;
case MMU_USER_SECONDARY_IDX:
is_user = true;
/* fallthru */
case MMU_KERNEL_SECONDARY_IDX:
context = env->dmmu.mmu_secondary_context & 0x1fff;
- sfsr |= SFSR_CT_SECONDARY;
break;
- case MMU_NUCLEUS_IDX:
- sfsr |= SFSR_CT_NUCLEUS;
- /* FALLTHRU */
default:
context = 0;
break;
}
- if (rw == 1) {
- sfsr |= SFSR_WRITE_BIT;
- } else if (rw == 4) {
- sfsr |= SFSR_NF_BIT;
- }
-
for (i = 0; i < 64; i++) {
/* ctx match, vaddr match, valid? */
if (ultrasparc_tag_match(&env->dtlb[i], address, context, physical)) {
@@ -616,22 +625,9 @@ static int get_physical_address_data(CPUSPARCState *env, hwaddr *physical,
return 0;
}
- if (env->dmmu.sfsr & SFSR_VALID_BIT) { /* Fault status register */
- sfsr |= SFSR_OW_BIT; /* overflow (not read before
- another fault) */
- }
-
- if (env->pstate & PS_PRIV) {
- sfsr |= SFSR_PR_BIT;
- }
-
- /* FIXME: ASI field in SFSR must be set */
- env->dmmu.sfsr = sfsr | SFSR_VALID_BIT;
-
+ env->dmmu.sfsr = sfsr;
env->dmmu.sfar = address; /* Fault address register */
-
env->dmmu.tag_access = (address & ~0x1fffULL) | context;
-
return 1;
}
}
@@ -926,4 +922,23 @@ hwaddr sparc_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
}
return phys_addr;
}
+
+#ifndef CONFIG_USER_ONLY
+void QEMU_NORETURN sparc_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
+ MMUAccessType access_type,
+ int mmu_idx,
+ uintptr_t retaddr)
+{
+ SPARCCPU *cpu = SPARC_CPU(cs);
+ CPUSPARCState *env = &cpu->env;
+
+#ifdef TARGET_SPARC64
+ env->dmmu.sfsr = build_sfsr(env, mmu_idx, access_type);
+ env->dmmu.sfar = addr;
+#else
+ env->mmuregs[4] = addr;
#endif
+
+ cpu_raise_exception_ra(env, TT_UNALIGNED, retaddr);
+}
+#endif /* !CONFIG_USER_ONLY */
diff --git a/target/xtensa/cpu.c b/target/xtensa/cpu.c
index c1cbd03595..224f723236 100644
--- a/target/xtensa/cpu.c
+++ b/target/xtensa/cpu.c
@@ -192,10 +192,10 @@ static const struct SysemuCPUOps xtensa_sysemu_ops = {
static const struct TCGCPUOps xtensa_tcg_ops = {
.initialize = xtensa_translate_init,
- .tlb_fill = xtensa_cpu_tlb_fill,
.debug_excp_handler = xtensa_breakpoint_handler,
#ifndef CONFIG_USER_ONLY
+ .tlb_fill = xtensa_cpu_tlb_fill,
.cpu_exec_interrupt = xtensa_cpu_exec_interrupt,
.do_interrupt = xtensa_cpu_do_interrupt,
.do_transaction_failed = xtensa_cpu_do_transaction_failed,
diff --git a/target/xtensa/cpu.h b/target/xtensa/cpu.h
index f9a510ca46..02143f2f77 100644
--- a/target/xtensa/cpu.h
+++ b/target/xtensa/cpu.h
@@ -563,10 +563,10 @@ struct XtensaCPU {
};
+#ifndef CONFIG_USER_ONLY
bool xtensa_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
MMUAccessType access_type, int mmu_idx,
bool probe, uintptr_t retaddr);
-#ifndef CONFIG_USER_ONLY
void xtensa_cpu_do_interrupt(CPUState *cpu);
bool xtensa_cpu_exec_interrupt(CPUState *cpu, int interrupt_request);
void xtensa_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, vaddr addr,
diff --git a/target/xtensa/helper.c b/target/xtensa/helper.c
index f18ab383fd..29d216ec1b 100644
--- a/target/xtensa/helper.c
+++ b/target/xtensa/helper.c
@@ -242,27 +242,7 @@ void xtensa_cpu_list(void)
}
}
-#ifdef CONFIG_USER_ONLY
-
-bool xtensa_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
- MMUAccessType access_type, int mmu_idx,
- bool probe, uintptr_t retaddr)
-{
- XtensaCPU *cpu = XTENSA_CPU(cs);
- CPUXtensaState *env = &cpu->env;
-
- qemu_log_mask(CPU_LOG_INT,
- "%s: rw = %d, address = 0x%08" VADDR_PRIx ", size = %d\n",
- __func__, access_type, address, size);
- env->sregs[EXCVADDR] = address;
- env->sregs[EXCCAUSE] = (access_type == MMU_DATA_STORE ?
- STORE_PROHIBITED_CAUSE : LOAD_PROHIBITED_CAUSE);
- cs->exception_index = EXC_USER;
- cpu_loop_exit_restore(cs, retaddr);
-}
-
-#else /* !CONFIG_USER_ONLY */
-
+#ifndef CONFIG_USER_ONLY
void xtensa_cpu_do_unaligned_access(CPUState *cs,
vaddr addr, MMUAccessType access_type,
int mmu_idx, uintptr_t retaddr)