diff options
49 files changed, 1247 insertions, 241 deletions
@@ -522,6 +522,7 @@ $(ROM_DIRS_RULES): recurse-all: $(addsuffix /all, $(TARGET_DIRS) $(ROM_DIRS)) recurse-clean: $(addsuffix /clean, $(TARGET_DIRS) $(ROM_DIRS)) recurse-install: $(addsuffix /install, $(TARGET_DIRS)) +$(addsuffix /install, $(TARGET_DIRS)): all $(BUILD_DIR)/version.o: $(SRC_PATH)/version.rc config-host.h $(call quiet-command,$(WINDRES) -I$(BUILD_DIR) -o $@ $<,"RC","version.o") @@ -1020,7 +1021,8 @@ pdf: qemu-doc.pdf docs/interop/qemu-qmp-ref.pdf docs/interop/qemu-ga-ref.pdf txt: qemu-doc.txt docs/interop/qemu-qmp-ref.txt docs/interop/qemu-ga-ref.txt qemu-doc.html qemu-doc.info qemu-doc.pdf qemu-doc.txt: \ - qemu-img.texi qemu-nbd.texi qemu-options.texi qemu-option-trace.texi \ + qemu-img.texi qemu-nbd.texi qemu-options.texi \ + qemu-tech.texi qemu-option-trace.texi \ qemu-deprecated.texi qemu-monitor.texi qemu-img-cmds.texi qemu-ga.texi \ qemu-monitor-info.texi docs/qemu-block-drivers.texi \ docs/qemu-cpu-models.texi docs/security.texi diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 3d86ae5052..35ea3cb624 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -91,6 +91,7 @@ struct KVMState int many_ioeventfds; int intx_set_mask; bool sync_mmu; + bool manual_dirty_log_protect; /* The man page (and posix) say ioctl numbers are signed int, but * they're not. Linux, glibc and *BSD all treat ioctl numbers as * unsigned, and treating them as signed here can break things */ @@ -138,6 +139,9 @@ static const KVMCapabilityInfo kvm_required_capabilites[] = { KVM_CAP_LAST_INFO }; +#define kvm_slots_lock(kml) qemu_mutex_lock(&(kml)->slots_lock) +#define kvm_slots_unlock(kml) qemu_mutex_unlock(&(kml)->slots_lock) + int kvm_get_max_memslots(void) { KVMState *s = KVM_STATE(current_machine->accelerator); @@ -165,6 +169,7 @@ int kvm_memcrypt_encrypt_data(uint8_t *ptr, uint64_t len) return 1; } +/* Called with KVMMemoryListener.slots_lock held */ static KVMSlot *kvm_get_free_slot(KVMMemoryListener *kml) { KVMState *s = kvm_state; @@ -182,10 +187,17 @@ static KVMSlot *kvm_get_free_slot(KVMMemoryListener *kml) bool kvm_has_free_slot(MachineState *ms) { KVMState *s = KVM_STATE(ms->accelerator); + bool result; + KVMMemoryListener *kml = &s->memory_listener; + + kvm_slots_lock(kml); + result = !!kvm_get_free_slot(kml); + kvm_slots_unlock(kml); - return kvm_get_free_slot(&s->memory_listener); + return result; } +/* Called with KVMMemoryListener.slots_lock held */ static KVMSlot *kvm_alloc_slot(KVMMemoryListener *kml) { KVMSlot *slot = kvm_get_free_slot(kml); @@ -244,18 +256,21 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram, hwaddr *phys_addr) { KVMMemoryListener *kml = &s->memory_listener; - int i; + int i, ret = 0; + kvm_slots_lock(kml); for (i = 0; i < s->nr_slots; i++) { KVMSlot *mem = &kml->slots[i]; if (ram >= mem->ram && ram < mem->ram + mem->memory_size) { *phys_addr = mem->start_addr + (ram - mem->ram); - return 1; + ret = 1; + break; } } + kvm_slots_unlock(kml); - return 0; + return ret; } static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot, bool new) @@ -396,6 +411,7 @@ static int kvm_mem_flags(MemoryRegion *mr) return flags; } +/* Called with KVMMemoryListener.slots_lock held */ static int kvm_slot_update_flags(KVMMemoryListener *kml, KVMSlot *mem, MemoryRegion *mr) { @@ -414,19 +430,26 @@ static int kvm_section_update_flags(KVMMemoryListener *kml, { hwaddr start_addr, size; KVMSlot *mem; + int ret = 0; size = kvm_align_section(section, &start_addr); if (!size) { return 0; } + kvm_slots_lock(kml); + mem = kvm_lookup_matching_slot(kml, start_addr, size); if (!mem) { /* We don't have a slot if we want to trap every access. */ - return 0; + goto out; } - return kvm_slot_update_flags(kml, mem, section->mr); + ret = kvm_slot_update_flags(kml, mem, section->mr); + +out: + kvm_slots_unlock(kml); + return ret; } static void kvm_log_start(MemoryListener *listener, @@ -478,13 +501,15 @@ static int kvm_get_dirty_pages_log_range(MemoryRegionSection *section, #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1)) /** - * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space - * This function updates qemu's dirty bitmap using - * memory_region_set_dirty(). This means all bits are set - * to dirty. + * kvm_physical_sync_dirty_bitmap - Sync dirty bitmap from kernel space + * + * This function will first try to fetch dirty bitmap from the kernel, + * and then updates qemu's dirty bitmap. + * + * NOTE: caller must be with kml->slots_lock held. * - * @start_add: start of logged region. - * @end_addr: end of logged region. + * @kml: the KVM memory listener object + * @section: the memory section to sync the dirty bitmap with */ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, MemoryRegionSection *section) @@ -493,13 +518,14 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, struct kvm_dirty_log d = {}; KVMSlot *mem; hwaddr start_addr, size; + int ret = 0; size = kvm_align_section(section, &start_addr); if (size) { mem = kvm_lookup_matching_slot(kml, start_addr, size); if (!mem) { /* We don't have a slot if we want to trap every access. */ - return 0; + goto out; } /* XXX bad kernel interface alert @@ -516,20 +542,176 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, */ size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), /*HOST_LONG_BITS*/ 64) / 8; - d.dirty_bitmap = g_malloc0(size); + if (!mem->dirty_bmap) { + /* Allocate on the first log_sync, once and for all */ + mem->dirty_bmap = g_malloc0(size); + } + d.dirty_bitmap = mem->dirty_bmap; d.slot = mem->slot | (kml->as_id << 16); if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) { DPRINTF("ioctl failed %d\n", errno); - g_free(d.dirty_bitmap); - return -1; + ret = -1; + goto out; } kvm_get_dirty_pages_log_range(section, d.dirty_bitmap); - g_free(d.dirty_bitmap); } +out: + return ret; +} - return 0; +/* Alignment requirement for KVM_CLEAR_DIRTY_LOG - 64 pages */ +#define KVM_CLEAR_LOG_SHIFT 6 +#define KVM_CLEAR_LOG_ALIGN (qemu_real_host_page_size << KVM_CLEAR_LOG_SHIFT) +#define KVM_CLEAR_LOG_MASK (-KVM_CLEAR_LOG_ALIGN) + +/** + * kvm_physical_log_clear - Clear the kernel's dirty bitmap for range + * + * NOTE: this will be a no-op if we haven't enabled manual dirty log + * protection in the host kernel because in that case this operation + * will be done within log_sync(). + * + * @kml: the kvm memory listener + * @section: the memory range to clear dirty bitmap + */ +static int kvm_physical_log_clear(KVMMemoryListener *kml, + MemoryRegionSection *section) +{ + KVMState *s = kvm_state; + struct kvm_clear_dirty_log d; + uint64_t start, end, bmap_start, start_delta, bmap_npages, size; + unsigned long *bmap_clear = NULL, psize = qemu_real_host_page_size; + KVMSlot *mem = NULL; + int ret, i; + + if (!s->manual_dirty_log_protect) { + /* No need to do explicit clear */ + return 0; + } + + start = section->offset_within_address_space; + size = int128_get64(section->size); + + if (!size) { + /* Nothing more we can do... */ + return 0; + } + + kvm_slots_lock(kml); + + /* Find any possible slot that covers the section */ + for (i = 0; i < s->nr_slots; i++) { + mem = &kml->slots[i]; + if (mem->start_addr <= start && + start + size <= mem->start_addr + mem->memory_size) { + break; + } + } + + /* + * We should always find one memslot until this point, otherwise + * there could be something wrong from the upper layer + */ + assert(mem && i != s->nr_slots); + + /* + * We need to extend either the start or the size or both to + * satisfy the KVM interface requirement. Firstly, do the start + * page alignment on 64 host pages + */ + bmap_start = (start - mem->start_addr) & KVM_CLEAR_LOG_MASK; + start_delta = start - mem->start_addr - bmap_start; + bmap_start /= psize; + + /* + * The kernel interface has restriction on the size too, that either: + * + * (1) the size is 64 host pages aligned (just like the start), or + * (2) the size fills up until the end of the KVM memslot. + */ + bmap_npages = DIV_ROUND_UP(size + start_delta, KVM_CLEAR_LOG_ALIGN) + << KVM_CLEAR_LOG_SHIFT; + end = mem->memory_size / psize; + if (bmap_npages > end - bmap_start) { + bmap_npages = end - bmap_start; + } + start_delta /= psize; + + /* + * Prepare the bitmap to clear dirty bits. Here we must guarantee + * that we won't clear any unknown dirty bits otherwise we might + * accidentally clear some set bits which are not yet synced from + * the kernel into QEMU's bitmap, then we'll lose track of the + * guest modifications upon those pages (which can directly lead + * to guest data loss or panic after migration). + * + * Layout of the KVMSlot.dirty_bmap: + * + * |<-------- bmap_npages -----------..>| + * [1] + * start_delta size + * |----------------|-------------|------------------|------------| + * ^ ^ ^ ^ + * | | | | + * start bmap_start (start) end + * of memslot of memslot + * + * [1] bmap_npages can be aligned to either 64 pages or the end of slot + */ + + assert(bmap_start % BITS_PER_LONG == 0); + /* We should never do log_clear before log_sync */ + assert(mem->dirty_bmap); + if (start_delta) { + /* Slow path - we need to manipulate a temp bitmap */ + bmap_clear = bitmap_new(bmap_npages); + bitmap_copy_with_src_offset(bmap_clear, mem->dirty_bmap, + bmap_start, start_delta + size / psize); + /* + * We need to fill the holes at start because that was not + * specified by the caller and we extended the bitmap only for + * 64 pages alignment + */ + bitmap_clear(bmap_clear, 0, start_delta); + d.dirty_bitmap = bmap_clear; + } else { + /* Fast path - start address aligns well with BITS_PER_LONG */ + d.dirty_bitmap = mem->dirty_bmap + BIT_WORD(bmap_start); + } + + d.first_page = bmap_start; + /* It should never overflow. If it happens, say something */ + assert(bmap_npages <= UINT32_MAX); + d.num_pages = bmap_npages; + d.slot = mem->slot | (kml->as_id << 16); + + if (kvm_vm_ioctl(s, KVM_CLEAR_DIRTY_LOG, &d) == -1) { + ret = -errno; + error_report("%s: KVM_CLEAR_DIRTY_LOG failed, slot=%d, " + "start=0x%"PRIx64", size=0x%"PRIx32", errno=%d", + __func__, d.slot, (uint64_t)d.first_page, + (uint32_t)d.num_pages, ret); + } else { + ret = 0; + trace_kvm_clear_dirty_log(d.slot, d.first_page, d.num_pages); + } + + /* + * After we have updated the remote dirty bitmap, we update the + * cached bitmap as well for the memslot, then if another user + * clears the same region we know we shouldn't clear it again on + * the remote otherwise it's data loss as well. + */ + bitmap_clear(mem->dirty_bmap, bmap_start + start_delta, + size / psize); + /* This handles the NULL case well */ + g_free(bmap_clear); + + kvm_slots_unlock(kml); + + return ret; } static void kvm_coalesce_mmio_region(MemoryListener *listener, @@ -791,16 +973,20 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, ram = memory_region_get_ram_ptr(mr) + section->offset_within_region + (start_addr - section->offset_within_address_space); + kvm_slots_lock(kml); + if (!add) { mem = kvm_lookup_matching_slot(kml, start_addr, size); if (!mem) { - return; + goto out; } if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { kvm_physical_sync_dirty_bitmap(kml, section); } /* unregister the slot */ + g_free(mem->dirty_bmap); + mem->dirty_bmap = NULL; mem->memory_size = 0; mem->flags = 0; err = kvm_set_user_memory_region(kml, mem, false); @@ -809,7 +995,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, __func__, strerror(-err)); abort(); } - return; + goto out; } /* register the new slot */ @@ -825,6 +1011,9 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, strerror(-err)); abort(); } + +out: + kvm_slots_unlock(kml); } static void kvm_region_add(MemoryListener *listener, @@ -851,8 +1040,26 @@ static void kvm_log_sync(MemoryListener *listener, KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener); int r; + kvm_slots_lock(kml); r = kvm_physical_sync_dirty_bitmap(kml, section); + kvm_slots_unlock(kml); + if (r < 0) { + abort(); + } +} + +static void kvm_log_clear(MemoryListener *listener, + MemoryRegionSection *section) +{ + KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener); + int r; + + r = kvm_physical_log_clear(kml, section); if (r < 0) { + error_report_once("%s: kvm log clear failed: mr=%s " + "offset=%"HWADDR_PRIx" size=%"PRIx64, __func__, + section->mr->name, section->offset_within_region, + int128_get64(section->size)); abort(); } } @@ -935,6 +1142,7 @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, { int i; + qemu_mutex_init(&kml->slots_lock); kml->slots = g_malloc0(s->nr_slots * sizeof(KVMSlot)); kml->as_id = as_id; @@ -947,6 +1155,7 @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, kml->listener.log_start = kvm_log_start; kml->listener.log_stop = kvm_log_stop; kml->listener.log_sync = kvm_log_sync; + kml->listener.log_clear = kvm_log_clear; kml->listener.priority = 10; memory_listener_register(&kml->listener, as); @@ -1671,6 +1880,17 @@ static int kvm_init(MachineState *ms) s->coalesced_pio = s->coalesced_mmio && kvm_check_extension(s, KVM_CAP_COALESCED_PIO); + s->manual_dirty_log_protect = + kvm_check_extension(s, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); + if (s->manual_dirty_log_protect) { + ret = kvm_vm_enable_cap(s, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2, 0, 1); + if (ret) { + warn_report("Trying to enable KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 " + "but failed. Falling back to the legacy mode. "); + s->manual_dirty_log_protect = false; + } + } + #ifdef KVM_CAP_VCPU_EVENTS s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS); #endif diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events index 33c5b1b3af..4fb6e59d19 100644 --- a/accel/kvm/trace-events +++ b/accel/kvm/trace-events @@ -15,4 +15,5 @@ kvm_irqchip_release_virq(int virq) "virq %d" kvm_set_ioeventfd_mmio(int fd, uint64_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%" PRIx64 " val=0x%x assign: %d size: %d match: %d" kvm_set_ioeventfd_pio(int fd, uint16_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%x val=0x%x assign: %d size: %d match: %d" kvm_set_user_memory(uint32_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d" +kvm_clear_dirty_log(uint32_t slot, uint64_t start, uint32_t size) "slot#%"PRId32" start 0x%"PRIx64" size 0x%"PRIx32 diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c index cb5f4b19c5..897d1571c4 100644 --- a/accel/tcg/user-exec.c +++ b/accel/tcg/user-exec.c @@ -64,27 +64,56 @@ static inline int handle_cpu_signal(uintptr_t pc, siginfo_t *info, CPUState *cpu = current_cpu; CPUClass *cc; unsigned long address = (unsigned long)info->si_addr; - MMUAccessType access_type; + MMUAccessType access_type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD; - /* We must handle PC addresses from two different sources: - * a call return address and a signal frame address. - * - * Within cpu_restore_state_from_tb we assume the former and adjust - * the address by -GETPC_ADJ so that the address is within the call - * insn so that addr does not accidentally match the beginning of the - * next guest insn. - * - * However, when the PC comes from the signal frame, it points to - * the actual faulting host insn and not a call insn. Subtracting - * GETPC_ADJ in that case may accidentally match the previous guest insn. - * - * So for the later case, adjust forward to compensate for what - * will be done later by cpu_restore_state_from_tb. - */ - if (helper_retaddr) { + switch (helper_retaddr) { + default: + /* + * Fault during host memory operation within a helper function. + * The helper's host return address, saved here, gives us a + * pointer into the generated code that will unwind to the + * correct guest pc. + */ pc = helper_retaddr; - } else { + break; + + case 0: + /* + * Fault during host memory operation within generated code. + * (Or, a unrelated bug within qemu, but we can't tell from here). + * + * We take the host pc from the signal frame. However, we cannot + * use that value directly. Within cpu_restore_state_from_tb, we + * assume PC comes from GETPC(), as used by the helper functions, + * so we adjust the address by -GETPC_ADJ to form an address that + * is within the call insn, so that the address does not accidentially + * match the beginning of the next guest insn. However, when the + * pc comes from the signal frame it points to the actual faulting + * host memory insn and not the return from a call insn. + * + * Therefore, adjust to compensate for what will be done later + * by cpu_restore_state_from_tb. + */ pc += GETPC_ADJ; + break; + + case 1: + /* + * Fault during host read for translation, or loosely, "execution". + * + * The guest pc is already pointing to the start of the TB for which + * code is being generated. If the guest translator manages the + * page crossings correctly, this is exactly the correct address + * (and if the translator doesn't handle page boundaries correctly + * there's little we can do about that here). Therefore, do not + * trigger the unwinder. + * + * Like tb_gen_code, release the memory lock before cpu_loop_exit. + */ + pc = 0; + access_type = MMU_INST_FETCH; + mmap_unlock(); + break; } /* For synchronous signals we expect to be coming from the vCPU @@ -134,7 +163,7 @@ static inline int handle_cpu_signal(uintptr_t pc, siginfo_t *info, * currently executing TB was modified and must be exited * immediately. Clear helper_retaddr for next execution. */ - helper_retaddr = 0; + clear_helper_retaddr(); cpu_exit_tb_from_sighandler(cpu, old_set); /* NORETURN */ @@ -152,10 +181,9 @@ static inline int handle_cpu_signal(uintptr_t pc, siginfo_t *info, * an exception. Undo signal and retaddr state prior to longjmp. */ sigprocmask(SIG_SETMASK, old_set, NULL); - helper_retaddr = 0; + clear_helper_retaddr(); cc = CPU_GET_CLASS(cpu); - access_type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD; cc->tlb_fill(cpu, address, 0, access_type, MMU_USER_IDX, false, pc); g_assert_not_reached(); } @@ -682,14 +710,15 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, if (unlikely(addr & (size - 1))) { cpu_loop_exit_atomic(env_cpu(env), retaddr); } - helper_retaddr = retaddr; - return g2h(addr); + void *ret = g2h(addr); + set_helper_retaddr(retaddr); + return ret; } /* Macro to call the above, with local variables from the use context. */ #define ATOMIC_MMU_DECLS do {} while (0) #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, DATA_SIZE, GETPC()) -#define ATOMIC_MMU_CLEANUP do { helper_retaddr = 0; } while (0) +#define ATOMIC_MMU_CLEANUP do { clear_helper_retaddr(); } while (0) #define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END)) #define EXTRA_ARGS @@ -2472,18 +2472,20 @@ void bdrv_root_unref_child(BdrvChild *child) bdrv_unref(child_bs); } -void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child) +/** + * Clear all inherits_from pointers from children and grandchildren of + * @root that point to @root, where necessary. + */ +static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child) { - if (child == NULL) { - return; - } - - if (child->bs->inherits_from == parent) { - BdrvChild *c; + BdrvChild *c; - /* Remove inherits_from only when the last reference between parent and - * child->bs goes away. */ - QLIST_FOREACH(c, &parent->children, next) { + if (child->bs->inherits_from == root) { + /* + * Remove inherits_from only when the last reference between root and + * child->bs goes away. + */ + QLIST_FOREACH(c, &root->children, next) { if (c != child && c->bs == child->bs) { break; } @@ -2493,6 +2495,18 @@ void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child) } } + QLIST_FOREACH(c, &child->bs->children, next) { + bdrv_unset_inherits_from(root, c); + } +} + +void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child) +{ + if (child == NULL) { + return; + } + + bdrv_unset_inherits_from(parent, child); bdrv_root_unref_child(child); } @@ -4417,6 +4431,14 @@ int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base, } for (i = bs; i != base; i = backing_bs(i)) { + if (i->backing && backing_bs(i)->never_freeze) { + error_setg(errp, "Cannot freeze '%s' link to '%s'", + i->backing->name, backing_bs(i)->node_name); + return -EPERM; + } + } + + for (i = bs; i != base; i = backing_bs(i)) { if (i->backing) { i->backing->frozen = true; } diff --git a/block/commit.c b/block/commit.c index ca7e408b26..2c5a6d4ebc 100644 --- a/block/commit.c +++ b/block/commit.c @@ -298,6 +298,10 @@ void commit_start(const char *job_id, BlockDriverState *bs, if (!filter_node_name) { commit_top_bs->implicit = true; } + + /* So that we can always drop this node */ + commit_top_bs->never_freeze = true; + commit_top_bs->total_sectors = top->total_sectors; bdrv_append(commit_top_bs, top, &local_err); diff --git a/block/gluster.c b/block/gluster.c index 62f8ff2147..f64dc5b01e 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -931,7 +931,17 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state, gconf->has_debug = true; gconf->logfile = g_strdup(s->logfile); gconf->has_logfile = true; - reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, NULL, errp); + + /* + * If 'state->bs->exact_filename' is empty, 'state->options' should contain + * the JSON parameters already parsed. + */ + if (state->bs->exact_filename[0] != '\0') { + reop_s->glfs = qemu_gluster_init(gconf, state->bs->exact_filename, NULL, + errp); + } else { + reop_s->glfs = qemu_gluster_init(gconf, NULL, state->options, errp); + } if (reop_s->glfs == NULL) { ret = -errno; goto exit; diff --git a/block/mirror.c b/block/mirror.c index 2fcec70e35..8cb75fb409 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1551,6 +1551,10 @@ static BlockJob *mirror_start_job( if (!filter_node_name) { mirror_top_bs->implicit = true; } + + /* So that we can always drop this node */ + mirror_top_bs->never_freeze = true; + mirror_top_bs->total_sectors = bs->total_sectors; mirror_top_bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED; mirror_top_bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | diff --git a/block/nvme.c b/block/nvme.c index 73ed5fa75f..9896b7f7c6 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -613,12 +613,12 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, /* Set up admin queue. */ s->queues = g_new(NVMeQueuePair *, 1); - s->nr_queues = 1; s->queues[0] = nvme_create_queue_pair(bs, 0, NVME_QUEUE_SIZE, errp); if (!s->queues[0]) { ret = -EINVAL; goto out; } + s->nr_queues = 1; QEMU_BUILD_BUG_ON(NVME_QUEUE_SIZE & 0xF000); s->regs->aqa = cpu_to_le32((NVME_QUEUE_SIZE << 16) | NVME_QUEUE_SIZE); s->regs->asq = cpu_to_le64(s->queues[0]->sq.iova); diff --git a/block/stream.c b/block/stream.c index cd5e2ba9b0..6ac1e7bec4 100644 --- a/block/stream.c +++ b/block/stream.c @@ -78,8 +78,8 @@ static int stream_prepare(Job *job) base_fmt = base->drv->format_name; } } - ret = bdrv_change_backing_file(bs, base_id, base_fmt); bdrv_set_backing_hd(bs, base, &local_err); + ret = bdrv_change_backing_file(bs, base_id, base_fmt); if (local_err) { error_report_err(local_err); return -EPERM; @@ -284,5 +284,5 @@ fail: if (bs_read_only) { bdrv_reopen_set_read_only(bs, true, NULL); } - bdrv_unfreeze_backing_chain(bs, base); + bdrv_unfreeze_backing_chain(bs, bottom); } @@ -1358,6 +1358,8 @@ bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, DirtyMemoryBlocks *blocks; unsigned long end, page; bool dirty = false; + RAMBlock *ramblock; + uint64_t mr_offset, mr_size; if (length == 0) { return false; @@ -1369,6 +1371,10 @@ bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, rcu_read_lock(); blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); + ramblock = qemu_get_ram_block(start); + /* Range sanity check on the ramblock */ + assert(start >= ramblock->offset && + start + length <= ramblock->offset + ramblock->used_length); while (page < end) { unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE; @@ -1380,6 +1386,10 @@ bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, page += num; } + mr_offset = (ram_addr_t)(page << TARGET_PAGE_BITS) - ramblock->offset; + mr_size = (end - page) << TARGET_PAGE_BITS; + memory_region_clear_dirty_bitmap(ramblock->mr, mr_offset, mr_size); + rcu_read_unlock(); if (dirty && tcg_enabled()) { @@ -1390,9 +1400,10 @@ bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, } DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty - (ram_addr_t start, ram_addr_t length, unsigned client) + (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client) { DirtyMemoryBlocks *blocks; + ram_addr_t start = memory_region_get_ram_addr(mr) + offset; unsigned long align = 1UL << (TARGET_PAGE_BITS + BITS_PER_LEVEL); ram_addr_t first = QEMU_ALIGN_DOWN(start, align); ram_addr_t last = QEMU_ALIGN_UP(start + length, align); @@ -1434,6 +1445,8 @@ DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty tlb_reset_dirty_range_all(start, length); } + memory_region_clear_dirty_bitmap(mr, offset, length); + return snap; } diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 0b5138cb22..d9496c9363 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -1674,7 +1674,7 @@ static void machvirt_init(MachineState *machine) &machine->device_memory->mr); } - virt_flash_fdt(vms, sysmem, secure_sysmem); + virt_flash_fdt(vms, sysmem, secure_sysmem ?: sysmem); create_gic(vms, pic); diff --git a/hw/core/machine.c b/hw/core/machine.c index c4ead16010..c58a8e594e 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -35,6 +35,7 @@ GlobalProperty hw_compat_4_0[] = { { "virtio-gpu-pci", "edid", "false" }, { "virtio-device", "use-started", "false" }, { "virtio-balloon-device", "qemu-4-0-config-size", "true" }, + { "pl031", "migrate-tick-offset", "false" }, }; const size_t hw_compat_4_0_len = G_N_ELEMENTS(hw_compat_4_0); diff --git a/hw/display/xlnx_dp.c b/hw/display/xlnx_dp.c index cfd4c700b7..cc5b650df0 100644 --- a/hw/display/xlnx_dp.c +++ b/hw/display/xlnx_dp.c @@ -427,11 +427,18 @@ static uint8_t xlnx_dp_aux_pop_rx_fifo(XlnxDPState *s) uint8_t ret; if (fifo8_is_empty(&s->rx_fifo)) { - DPRINTF("rx_fifo underflow..\n"); - abort(); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Reading empty RX_FIFO\n", + __func__); + /* + * The datasheet is not clear about the reset value, it seems + * to be unspecified. We choose to return '0'. + */ + ret = 0; + } else { + ret = fifo8_pop(&s->rx_fifo); + DPRINTF("pop 0x%" PRIX8 " from rx_fifo.\n", ret); } - ret = fifo8_pop(&s->rx_fifo); - DPRINTF("pop 0x%" PRIX8 " from rx_fifo.\n", ret); return ret; } diff --git a/hw/ssi/mss-spi.c b/hw/ssi/mss-spi.c index 918b1f3e82..4c9da5d2b2 100644 --- a/hw/ssi/mss-spi.c +++ b/hw/ssi/mss-spi.c @@ -165,7 +165,13 @@ spi_read(void *opaque, hwaddr addr, unsigned int size) case R_SPI_RX: s->regs[R_SPI_STATUS] &= ~S_RXFIFOFUL; s->regs[R_SPI_STATUS] &= ~S_RXCHOVRF; - ret = fifo32_pop(&s->rx_fifo); + if (fifo32_is_empty(&s->rx_fifo)) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Reading empty RX_FIFO\n", + __func__); + } else { + ret = fifo32_pop(&s->rx_fifo); + } if (fifo32_is_empty(&s->rx_fifo)) { s->regs[R_SPI_STATUS] |= S_RXFIFOEMP; } diff --git a/hw/ssi/xilinx_spips.c b/hw/ssi/xilinx_spips.c index 8115bb6d46..b29e0a4a89 100644 --- a/hw/ssi/xilinx_spips.c +++ b/hw/ssi/xilinx_spips.c @@ -1202,28 +1202,47 @@ static void lqspi_load_cache(void *opaque, hwaddr addr) } } -static uint64_t -lqspi_read(void *opaque, hwaddr addr, unsigned int size) +static MemTxResult lqspi_read(void *opaque, hwaddr addr, uint64_t *value, + unsigned size, MemTxAttrs attrs) { - XilinxQSPIPS *q = opaque; - uint32_t ret; + XilinxQSPIPS *q = XILINX_QSPIPS(opaque); if (addr >= q->lqspi_cached_addr && addr <= q->lqspi_cached_addr + LQSPI_CACHE_SIZE - 4) { uint8_t *retp = &q->lqspi_buf[addr - q->lqspi_cached_addr]; - ret = cpu_to_le32(*(uint32_t *)retp); - DB_PRINT_L(1, "addr: %08x, data: %08x\n", (unsigned)addr, - (unsigned)ret); - return ret; - } else { - lqspi_load_cache(opaque, addr); - return lqspi_read(opaque, addr, size); + *value = cpu_to_le32(*(uint32_t *)retp); + DB_PRINT_L(1, "addr: %08" HWADDR_PRIx ", data: %08" PRIx64 "\n", + addr, *value); + return MEMTX_OK; } + + lqspi_load_cache(opaque, addr); + return lqspi_read(opaque, addr, value, size, attrs); +} + +static MemTxResult lqspi_write(void *opaque, hwaddr offset, uint64_t value, + unsigned size, MemTxAttrs attrs) +{ + /* + * From UG1085, Chapter 24 (Quad-SPI controllers): + * - Writes are ignored + * - AXI writes generate an external AXI slave error (SLVERR) + */ + qemu_log_mask(LOG_GUEST_ERROR, "%s Unexpected %u-bit access to 0x%" PRIx64 + " (value: 0x%" PRIx64 "\n", + __func__, size << 3, offset, value); + + return MEMTX_ERROR; } static const MemoryRegionOps lqspi_ops = { - .read = lqspi_read, + .read_with_attrs = lqspi_read, + .write_with_attrs = lqspi_write, .endianness = DEVICE_NATIVE_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, .valid = { .min_access_size = 1, .max_access_size = 4 diff --git a/hw/timer/pl031.c b/hw/timer/pl031.c index 3378084f4a..1a7e2ee06b 100644 --- a/hw/timer/pl031.c +++ b/hw/timer/pl031.c @@ -199,29 +199,94 @@ static int pl031_pre_save(void *opaque) { PL031State *s = opaque; - /* tick_offset is base_time - rtc_clock base time. Instead, we want to - * store the base time relative to the QEMU_CLOCK_VIRTUAL for backwards-compatibility. */ + /* + * The PL031 device model code uses the tick_offset field, which is + * the offset between what the guest RTC should read and what the + * QEMU rtc_clock reads: + * guest_rtc = rtc_clock + tick_offset + * and so + * tick_offset = guest_rtc - rtc_clock + * + * We want to migrate this offset, which sounds straightforward. + * Unfortunately older versions of QEMU migrated a conversion of this + * offset into an offset from the vm_clock. (This was in turn an + * attempt to be compatible with even older QEMU versions, but it + * has incorrect behaviour if the rtc_clock is not the same as the + * vm_clock.) So we put the actual tick_offset into a migration + * subsection, and the backwards-compatible time-relative-to-vm_clock + * in the main migration state. + * + * Calculate base time relative to QEMU_CLOCK_VIRTUAL: + */ int64_t delta = qemu_clock_get_ns(rtc_clock) - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); s->tick_offset_vmstate = s->tick_offset + delta / NANOSECONDS_PER_SECOND; return 0; } +static int pl031_pre_load(void *opaque) +{ + PL031State *s = opaque; + + s->tick_offset_migrated = false; + return 0; +} + static int pl031_post_load(void *opaque, int version_id) { PL031State *s = opaque; - int64_t delta = qemu_clock_get_ns(rtc_clock) - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - s->tick_offset = s->tick_offset_vmstate - delta / NANOSECONDS_PER_SECOND; + /* + * If we got the tick_offset subsection, then we can just use + * the value in that. Otherwise the source is an older QEMU and + * has given us the offset from the vm_clock; convert it back to + * an offset from the rtc_clock. This will cause time to incorrectly + * go backwards compared to the host RTC, but this is unavoidable. + */ + + if (!s->tick_offset_migrated) { + int64_t delta = qemu_clock_get_ns(rtc_clock) - + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + s->tick_offset = s->tick_offset_vmstate - + delta / NANOSECONDS_PER_SECOND; + } pl031_set_alarm(s); return 0; } +static int pl031_tick_offset_post_load(void *opaque, int version_id) +{ + PL031State *s = opaque; + + s->tick_offset_migrated = true; + return 0; +} + +static bool pl031_tick_offset_needed(void *opaque) +{ + PL031State *s = opaque; + + return s->migrate_tick_offset; +} + +static const VMStateDescription vmstate_pl031_tick_offset = { + .name = "pl031/tick-offset", + .version_id = 1, + .minimum_version_id = 1, + .needed = pl031_tick_offset_needed, + .post_load = pl031_tick_offset_post_load, + .fields = (VMStateField[]) { + VMSTATE_UINT32(tick_offset, PL031State), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_pl031 = { .name = "pl031", .version_id = 1, .minimum_version_id = 1, .pre_save = pl031_pre_save, + .pre_load = pl031_pre_load, .post_load = pl031_post_load, .fields = (VMStateField[]) { VMSTATE_UINT32(tick_offset_vmstate, PL031State), @@ -231,14 +296,33 @@ static const VMStateDescription vmstate_pl031 = { VMSTATE_UINT32(im, PL031State), VMSTATE_UINT32(is, PL031State), VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription*[]) { + &vmstate_pl031_tick_offset, + NULL } }; +static Property pl031_properties[] = { + /* + * True to correctly migrate the tick offset of the RTC. False to + * obtain backward migration compatibility with older QEMU versions, + * at the expense of the guest RTC going backwards compared with the + * host RTC when the VM is saved/restored if using -rtc host. + * (Even if set to 'true' older QEMU can migrate forward to newer QEMU; + * 'false' also permits newer QEMU to migrate to older QEMU.) + */ + DEFINE_PROP_BOOL("migrate-tick-offset", + PL031State, migrate_tick_offset, true), + DEFINE_PROP_END_OF_LIST() +}; + static void pl031_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); dc->vmsd = &vmstate_pl031; + dc->props = pl031_properties; } static const TypeInfo pl031_info = { diff --git a/include/block/block_int.h b/include/block/block_int.h index d6415b53c1..50902531b7 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -885,6 +885,9 @@ struct BlockDriverState { /* Only read/written by whoever has set active_flush_req to true. */ unsigned int flushed_gen; /* Flushed write generation */ + + /* BdrvChild links to this node may never be frozen */ + bool never_freeze; }; struct BlockBackendRootState { diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h index a08b11bd2c..9de8c93303 100644 --- a/include/exec/cpu_ldst.h +++ b/include/exec/cpu_ldst.h @@ -89,6 +89,26 @@ typedef target_ulong abi_ptr; extern __thread uintptr_t helper_retaddr; +static inline void set_helper_retaddr(uintptr_t ra) +{ + helper_retaddr = ra; + /* + * Ensure that this write is visible to the SIGSEGV handler that + * may be invoked due to a subsequent invalid memory operation. + */ + signal_barrier(); +} + +static inline void clear_helper_retaddr(void) +{ + /* + * Ensure that previous memory operations have succeeded before + * removing the data visible to the signal handler. + */ + signal_barrier(); + helper_retaddr = 0; +} + /* In user-only mode we provide only the _code and _data accessors. */ #define MEMSUFFIX _data diff --git a/include/exec/cpu_ldst_useronly_template.h b/include/exec/cpu_ldst_useronly_template.h index bc45e2b8d4..2378f2958c 100644 --- a/include/exec/cpu_ldst_useronly_template.h +++ b/include/exec/cpu_ldst_useronly_template.h @@ -64,61 +64,75 @@ static inline RES_TYPE glue(glue(cpu_ld, USUFFIX), MEMSUFFIX)(CPUArchState *env, abi_ptr ptr) { -#if !defined(CODE_ACCESS) +#ifdef CODE_ACCESS + RES_TYPE ret; + set_helper_retaddr(1); + ret = glue(glue(ld, USUFFIX), _p)(g2h(ptr)); + clear_helper_retaddr(); + return ret; +#else trace_guest_mem_before_exec( env_cpu(env), ptr, trace_mem_build_info(SHIFT, false, MO_TE, false)); -#endif return glue(glue(ld, USUFFIX), _p)(g2h(ptr)); +#endif } +#ifndef CODE_ACCESS static inline RES_TYPE glue(glue(glue(cpu_ld, USUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr) { RES_TYPE ret; - helper_retaddr = retaddr; + set_helper_retaddr(retaddr); ret = glue(glue(cpu_ld, USUFFIX), MEMSUFFIX)(env, ptr); - helper_retaddr = 0; + clear_helper_retaddr(); return ret; } +#endif #if DATA_SIZE <= 2 static inline int glue(glue(cpu_lds, SUFFIX), MEMSUFFIX)(CPUArchState *env, abi_ptr ptr) { -#if !defined(CODE_ACCESS) +#ifdef CODE_ACCESS + int ret; + set_helper_retaddr(1); + ret = glue(glue(lds, SUFFIX), _p)(g2h(ptr)); + clear_helper_retaddr(); + return ret; +#else trace_guest_mem_before_exec( env_cpu(env), ptr, trace_mem_build_info(SHIFT, true, MO_TE, false)); -#endif return glue(glue(lds, SUFFIX), _p)(g2h(ptr)); +#endif } +#ifndef CODE_ACCESS static inline int glue(glue(glue(cpu_lds, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr) { int ret; - helper_retaddr = retaddr; + set_helper_retaddr(retaddr); ret = glue(glue(cpu_lds, SUFFIX), MEMSUFFIX)(env, ptr); - helper_retaddr = 0; + clear_helper_retaddr(); return ret; } -#endif +#endif /* CODE_ACCESS */ +#endif /* DATA_SIZE <= 2 */ #ifndef CODE_ACCESS static inline void glue(glue(cpu_st, SUFFIX), MEMSUFFIX)(CPUArchState *env, abi_ptr ptr, RES_TYPE v) { -#if !defined(CODE_ACCESS) trace_guest_mem_before_exec( env_cpu(env), ptr, trace_mem_build_info(SHIFT, false, MO_TE, true)); -#endif glue(glue(st, SUFFIX), _p)(g2h(ptr), v); } @@ -128,9 +142,9 @@ glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, RES_TYPE v, uintptr_t retaddr) { - helper_retaddr = retaddr; + set_helper_retaddr(retaddr); glue(glue(cpu_st, SUFFIX), MEMSUFFIX)(env, ptr, v); - helper_retaddr = 0; + clear_helper_retaddr(); } #endif diff --git a/include/exec/memory.h b/include/exec/memory.h index 2c5cdffa31..bb0961ddb9 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -46,6 +46,8 @@ OBJECT_GET_CLASS(IOMMUMemoryRegionClass, (obj), \ TYPE_IOMMU_MEMORY_REGION) +extern bool global_dirty_log; + typedef struct MemoryRegionOps MemoryRegionOps; typedef struct MemoryRegionMmio MemoryRegionMmio; @@ -414,6 +416,7 @@ struct MemoryListener { void (*log_stop)(MemoryListener *listener, MemoryRegionSection *section, int old, int new); void (*log_sync)(MemoryListener *listener, MemoryRegionSection *section); + void (*log_clear)(MemoryListener *listener, MemoryRegionSection *section); void (*log_global_start)(MemoryListener *listener); void (*log_global_stop)(MemoryListener *listener); void (*eventfd_add)(MemoryListener *listener, MemoryRegionSection *section, @@ -1268,6 +1271,22 @@ void memory_region_set_dirty(MemoryRegion *mr, hwaddr addr, hwaddr size); /** + * memory_region_clear_dirty_bitmap - clear dirty bitmap for memory range + * + * This function is called when the caller wants to clear the remote + * dirty bitmap of a memory range within the memory region. This can + * be used by e.g. KVM to manually clear dirty log when + * KVM_CAP_MANUAL_DIRTY_LOG_PROTECT is declared support by the host + * kernel. + * + * @mr: the memory region to clear the dirty log upon + * @start: start address offset within the memory region + * @len: length of the memory region to clear dirty bitmap + */ +void memory_region_clear_dirty_bitmap(MemoryRegion *mr, hwaddr start, + hwaddr len); + +/** * memory_region_snapshot_and_clear_dirty: Get a snapshot of the dirty * bitmap and clear it. * diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index f96777bb99..b7b2e60ff6 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -51,8 +51,70 @@ struct RAMBlock { unsigned long *unsentmap; /* bitmap of already received pages in postcopy */ unsigned long *receivedmap; + + /* + * bitmap to track already cleared dirty bitmap. When the bit is + * set, it means the corresponding memory chunk needs a log-clear. + * Set this up to non-NULL to enable the capability to postpone + * and split clearing of dirty bitmap on the remote node (e.g., + * KVM). The bitmap will be set only when doing global sync. + * + * NOTE: this bitmap is different comparing to the other bitmaps + * in that one bit can represent multiple guest pages (which is + * decided by the `clear_bmap_shift' variable below). On + * destination side, this should always be NULL, and the variable + * `clear_bmap_shift' is meaningless. + */ + unsigned long *clear_bmap; + uint8_t clear_bmap_shift; }; +/** + * clear_bmap_size: calculate clear bitmap size + * + * @pages: number of guest pages + * @shift: guest page number shift + * + * Returns: number of bits for the clear bitmap + */ +static inline long clear_bmap_size(uint64_t pages, uint8_t shift) +{ + return DIV_ROUND_UP(pages, 1UL << shift); +} + +/** + * clear_bmap_set: set clear bitmap for the page range + * + * @rb: the ramblock to operate on + * @start: the start page number + * @size: number of pages to set in the bitmap + * + * Returns: None + */ +static inline void clear_bmap_set(RAMBlock *rb, uint64_t start, + uint64_t npages) +{ + uint8_t shift = rb->clear_bmap_shift; + + bitmap_set_atomic(rb->clear_bmap, start >> shift, + clear_bmap_size(npages, shift)); +} + +/** + * clear_bmap_test_and_clear: test clear bitmap for the page, clear if set + * + * @rb: the ramblock to operate on + * @page: the page number to check + * + * Returns: true if the bit was set, false otherwise + */ +static inline bool clear_bmap_test_and_clear(RAMBlock *rb, uint64_t page) +{ + uint8_t shift = rb->clear_bmap_shift; + + return bitmap_test_and_clear_atomic(rb->clear_bmap, page >> shift, 1); +} + static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset) { return (b && b->host && offset < b->used_length) ? true : false; @@ -349,8 +411,13 @@ static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, if (bitmap[k]) { unsigned long temp = leul_to_cpu(bitmap[k]); - atomic_or(&blocks[DIRTY_MEMORY_MIGRATION][idx][offset], temp); atomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp); + + if (global_dirty_log) { + atomic_or(&blocks[DIRTY_MEMORY_MIGRATION][idx][offset], + temp); + } + if (tcg_enabled()) { atomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset], temp); } @@ -367,6 +434,11 @@ static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS); } else { uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE; + + if (!global_dirty_log) { + clients &= ~(1 << DIRTY_MEMORY_MIGRATION); + } + /* * bitmap-traveling is faster than memory-traveling (for addr...) * especially when most of the memory is not dirty. @@ -394,7 +466,7 @@ bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, unsigned client); DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty - (ram_addr_t start, ram_addr_t length, unsigned client); + (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client); bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap, ram_addr_t start, @@ -409,6 +481,7 @@ static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start, } +/* Called with RCU critical section */ static inline uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb, ram_addr_t start, @@ -432,8 +505,6 @@ uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb, DIRTY_MEMORY_BLOCK_SIZE); unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); - rcu_read_lock(); - src = atomic_rcu_read( &ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION])->blocks; @@ -454,7 +525,18 @@ uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb, } } - rcu_read_unlock(); + if (rb->clear_bmap) { + /* + * Postpone the dirty bitmap clear to the point before we + * really send the pages, also we will split the clear + * dirty procedure into smaller chunks. + */ + clear_bmap_set(rb, start >> TARGET_PAGE_BITS, + length >> TARGET_PAGE_BITS); + } else { + /* Slow path - still do that in a huge chunk */ + memory_region_clear_dirty_bitmap(rb->mr, start, length); + } } else { ram_addr_t offset = rb->offset; diff --git a/include/hw/timer/pl031.h b/include/hw/timer/pl031.h index 8857c24ca5..8c3f555ee2 100644 --- a/include/hw/timer/pl031.h +++ b/include/hw/timer/pl031.h @@ -33,6 +33,8 @@ typedef struct PL031State { */ uint32_t tick_offset_vmstate; uint32_t tick_offset; + bool tick_offset_migrated; + bool migrate_tick_offset; uint32_t mr; uint32_t lr; diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h index a6ac188188..f9cd24c899 100644 --- a/include/qemu/atomic.h +++ b/include/qemu/atomic.h @@ -88,6 +88,13 @@ #define smp_read_barrier_depends() barrier() #endif +/* + * A signal barrier forces all pending local memory ops to be observed before + * a SIGSEGV is delivered to the *same* thread. In practice this is exactly + * the same as barrier(), but since we have the correct builtin, use it. + */ +#define signal_barrier() __atomic_signal_fence(__ATOMIC_SEQ_CST) + /* Sanity check that the size of an atomic operation isn't "overly large". * Despite the fact that e.g. i686 has 64-bit atomic operations, we do not * want to use them because we ought not need them, and this lets us do a @@ -308,6 +315,10 @@ #define smp_read_barrier_depends() barrier() #endif +#ifndef signal_barrier +#define signal_barrier() barrier() +#endif + /* These will only be atomic if the processor does the fetch or store * in a single issue memory operation */ diff --git a/include/qemu/bitmap.h b/include/qemu/bitmap.h index 5c313346b9..82a1d2f41f 100644 --- a/include/qemu/bitmap.h +++ b/include/qemu/bitmap.h @@ -41,6 +41,10 @@ * bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area * bitmap_to_le(dst, src, nbits) Convert bitmap to little endian * bitmap_from_le(dst, src, nbits) Convert bitmap from little endian + * bitmap_copy_with_src_offset(dst, src, offset, nbits) + * *dst = *src (with an offset into src) + * bitmap_copy_with_dst_offset(dst, src, offset, nbits) + * *dst = *src (with an offset into dst) */ /* @@ -271,4 +275,9 @@ void bitmap_to_le(unsigned long *dst, const unsigned long *src, void bitmap_from_le(unsigned long *dst, const unsigned long *src, long nbits); +void bitmap_copy_with_src_offset(unsigned long *dst, const unsigned long *src, + unsigned long offset, unsigned long nbits); +void bitmap_copy_with_dst_offset(unsigned long *dst, const unsigned long *src, + unsigned long shift, unsigned long nbits); + #endif /* BITMAP_H */ diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h index f838412491..31df465fdc 100644 --- a/include/sysemu/kvm_int.h +++ b/include/sysemu/kvm_int.h @@ -21,10 +21,14 @@ typedef struct KVMSlot int slot; int flags; int old_flags; + /* Dirty bitmap cache for the slot */ + unsigned long *dirty_bmap; } KVMSlot; typedef struct KVMMemoryListener { MemoryListener listener; + /* Protects the slots and all inside them */ + QemuMutex slots_lock; KVMSlot *slots; int as_id; } KVMMemoryListener; @@ -38,7 +38,7 @@ static unsigned memory_region_transaction_depth; static bool memory_region_update_pending; static bool ioeventfd_update_pending; -static bool global_dirty_log = false; +bool global_dirty_log; static QTAILQ_HEAD(, MemoryListener) memory_listeners = QTAILQ_HEAD_INITIALIZER(memory_listeners); @@ -2067,6 +2067,57 @@ static void memory_region_sync_dirty_bitmap(MemoryRegion *mr) } } +void memory_region_clear_dirty_bitmap(MemoryRegion *mr, hwaddr start, + hwaddr len) +{ + MemoryRegionSection mrs; + MemoryListener *listener; + AddressSpace *as; + FlatView *view; + FlatRange *fr; + hwaddr sec_start, sec_end, sec_size; + + QTAILQ_FOREACH(listener, &memory_listeners, link) { + if (!listener->log_clear) { + continue; + } + as = listener->address_space; + view = address_space_get_flatview(as); + FOR_EACH_FLAT_RANGE(fr, view) { + if (!fr->dirty_log_mask || fr->mr != mr) { + /* + * Clear dirty bitmap operation only applies to those + * regions whose dirty logging is at least enabled + */ + continue; + } + + mrs = section_from_flat_range(fr, view); + + sec_start = MAX(mrs.offset_within_region, start); + sec_end = mrs.offset_within_region + int128_get64(mrs.size); + sec_end = MIN(sec_end, start + len); + + if (sec_start >= sec_end) { + /* + * If this memory region section has no intersection + * with the requested range, skip. + */ + continue; + } + + /* Valid case; shrink the section if needed */ + mrs.offset_within_address_space += + sec_start - mrs.offset_within_region; + mrs.offset_within_region = sec_start; + sec_size = sec_end - sec_start; + mrs.size = int128_make64(sec_size); + listener->log_clear(listener, &mrs); + } + flatview_unref(view); + } +} + DirtyBitmapSnapshot *memory_region_snapshot_and_clear_dirty(MemoryRegion *mr, hwaddr addr, hwaddr size, @@ -2074,8 +2125,7 @@ DirtyBitmapSnapshot *memory_region_snapshot_and_clear_dirty(MemoryRegion *mr, { assert(mr->ram_block); memory_region_sync_dirty_bitmap(mr); - return cpu_physical_memory_snapshot_and_clear_dirty( - memory_region_get_ram_addr(mr) + addr, size, client); + return cpu_physical_memory_snapshot_and_clear_dirty(mr, addr, size, client); } bool memory_region_snapshot_get_dirty(MemoryRegion *mr, DirtyBitmapSnapshot *snap, diff --git a/migration/migration.c b/migration/migration.c index 2865ae3fa9..8a607fe1e2 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -3362,6 +3362,8 @@ void migration_global_dump(Monitor *mon) ms->send_section_footer ? "on" : "off"); monitor_printf(mon, "decompress-error-check: %s\n", ms->decompress_error_check ? "on" : "off"); + monitor_printf(mon, "clear-bitmap-shift: %u\n", + ms->clear_bitmap_shift); } #define DEFINE_PROP_MIG_CAP(name, x) \ @@ -3376,6 +3378,8 @@ static Property migration_properties[] = { send_section_footer, true), DEFINE_PROP_BOOL("decompress-error-check", MigrationState, decompress_error_check, true), + DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, + clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), /* Migration parameters */ DEFINE_PROP_UINT8("x-compress-level", MigrationState, diff --git a/migration/migration.h b/migration/migration.h index 5e8f09c6db..1fdd7b21fd 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -26,6 +26,23 @@ struct PostcopyBlocktimeContext; #define MIGRATION_RESUME_ACK_VALUE (1) +/* + * 1<<6=64 pages -> 256K chunk when page size is 4K. This gives us + * the benefit that all the chunks are 64 pages aligned then the + * bitmaps are always aligned to LONG. + */ +#define CLEAR_BITMAP_SHIFT_MIN 6 +/* + * 1<<18=256K pages -> 1G chunk when page size is 4K. This is the + * default value to use if no one specified. + */ +#define CLEAR_BITMAP_SHIFT_DEFAULT 18 +/* + * 1<<31=2G pages -> 8T chunk when page size is 4K. This should be + * big enough and make sure we won't overflow easily. + */ +#define CLEAR_BITMAP_SHIFT_MAX 31 + /* State for the incoming migration */ struct MigrationIncomingState { QEMUFile *from_src_file; @@ -232,6 +249,16 @@ struct MigrationState * do not trigger spurious decompression errors. */ bool decompress_error_check; + + /* + * This decides the size of guest memory chunk that will be used + * to track dirty bitmap clearing. The size of memory chunk will + * be GUEST_PAGE_SIZE << N. Say, N=0 means we will clear dirty + * bitmap for each page to send (1<<0=1); N=10 means we will clear + * dirty bitmap only once for 1<<10=1K continuous guest pages + * (which is in 4M chunk). + */ + uint8_t clear_bitmap_shift; }; void migrate_set_state(int *state, int old_state, int new_state); diff --git a/migration/ram.c b/migration/ram.c index 908517fc2b..2b0774c2bf 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1291,15 +1291,15 @@ static void multifd_recv_sync_main(void) trace_multifd_recv_sync_main_wait(p->id); qemu_sem_wait(&multifd_recv_state->sem_sync); + } + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDRecvParams *p = &multifd_recv_state->params[i]; + qemu_mutex_lock(&p->mutex); if (multifd_recv_state->packet_num < p->packet_num) { multifd_recv_state->packet_num = p->packet_num; } qemu_mutex_unlock(&p->mutex); - } - for (i = 0; i < migrate_multifd_channels(); i++) { - MultiFDRecvParams *p = &multifd_recv_state->params[i]; - trace_multifd_recv_sync_main_signal(p->id); qemu_sem_post(&p->sem_sync); } @@ -1585,25 +1585,30 @@ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data, encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE, XBZRLE.encoded_buf, TARGET_PAGE_SIZE); + + /* + * Update the cache contents, so that it corresponds to the data + * sent, in all cases except where we skip the page. + */ + if (!last_stage && encoded_len != 0) { + memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE); + /* + * In the case where we couldn't compress, ensure that the caller + * sends the data from the cache, since the guest might have + * changed the RAM since we copied it. + */ + *current_data = prev_cached_page; + } + if (encoded_len == 0) { trace_save_xbzrle_page_skipping(); return 0; } else if (encoded_len == -1) { trace_save_xbzrle_page_overflow(); xbzrle_counters.overflow++; - /* update data in the cache */ - if (!last_stage) { - memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE); - *current_data = prev_cached_page; - } return -1; } - /* we need to update the data in the cache, in order to get the same data */ - if (!last_stage) { - memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE); - } - /* Send XBZRLE based compressed page */ bytes_xbzrle = save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_XBZRLE); @@ -1659,6 +1664,33 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs, bool ret; qemu_mutex_lock(&rs->bitmap_mutex); + + /* + * Clear dirty bitmap if needed. This _must_ be called before we + * send any of the page in the chunk because we need to make sure + * we can capture further page content changes when we sync dirty + * log the next time. So as long as we are going to send any of + * the page in the chunk we clear the remote dirty bitmap for all. + * Clearing it earlier won't be a problem, but too late will. + */ + if (rb->clear_bmap && clear_bmap_test_and_clear(rb, page)) { + uint8_t shift = rb->clear_bmap_shift; + hwaddr size = 1ULL << (TARGET_PAGE_BITS + shift); + hwaddr start = (page << TARGET_PAGE_BITS) & (-size); + + /* + * CLEAR_BITMAP_SHIFT_MIN should always guarantee this... this + * can make things easier sometimes since then start address + * of the small chunk will always be 64 pages aligned so the + * bitmap will always be aligned to unsigned long. We should + * even be able to remove this restriction but I'm simply + * keeping it. + */ + assert(shift >= 6); + trace_migration_bitmap_clear_dirty(rb->idstr, start, size, page); + memory_region_clear_dirty_bitmap(rb->mr, start, size); + } + ret = test_and_clear_bit(page, rb->bmap); if (ret) { @@ -1669,6 +1701,7 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs, return ret; } +/* Called with RCU critical section */ static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb, ram_addr_t length) { @@ -2281,6 +2314,12 @@ static bool get_queued_page(RAMState *rs, PageSearchStatus *pss) */ pss->block = block; pss->page = offset >> TARGET_PAGE_BITS; + + /* + * This unqueued page would break the "one round" check, even is + * really rare. + */ + pss->complete_round = false; } return !!block; @@ -2675,6 +2714,8 @@ static void ram_save_cleanup(void *opaque) memory_global_dirty_log_stop(); RAMBLOCK_FOREACH_NOT_IGNORED(block) { + g_free(block->clear_bmap); + block->clear_bmap = NULL; g_free(block->bmap); block->bmap = NULL; g_free(block->unsentmap); @@ -2763,8 +2804,7 @@ void ram_postcopy_migrated_memory_release(MigrationState *ms) * * @ms: current migration state * @pds: state for postcopy - * @start: RAMBlock starting page - * @length: RAMBlock size + * @block: RAMBlock to discard */ static int postcopy_send_discard_bm_ram(MigrationState *ms, PostcopyDiscardState *pds, @@ -2963,7 +3003,7 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, } /** - * postcopy_chuck_hostpages: discrad any partially sent host page + * postcopy_chunk_hostpages: discard any partially sent host page * * Utility for the outgoing postcopy code. * @@ -3173,11 +3213,11 @@ static int ram_state_init(RAMState **rsp) QSIMPLEQ_INIT(&(*rsp)->src_page_requests); /* + * Count the total number of pages used by ram blocks not including any + * gaps due to alignment or unplugs. * This must match with the initial values of dirty bitmap. - * Currently we initialize the dirty bitmap to all zeros so - * here the total dirty page count is zero. */ - (*rsp)->migration_dirty_pages = 0; + (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS; ram_state_reset(*rsp); return 0; @@ -3185,23 +3225,39 @@ static int ram_state_init(RAMState **rsp) static void ram_list_init_bitmaps(void) { + MigrationState *ms = migrate_get_current(); RAMBlock *block; unsigned long pages; + uint8_t shift; /* Skip setting bitmap if there is no RAM */ if (ram_bytes_total()) { + shift = ms->clear_bitmap_shift; + if (shift > CLEAR_BITMAP_SHIFT_MAX) { + error_report("clear_bitmap_shift (%u) too big, using " + "max value (%u)", shift, CLEAR_BITMAP_SHIFT_MAX); + shift = CLEAR_BITMAP_SHIFT_MAX; + } else if (shift < CLEAR_BITMAP_SHIFT_MIN) { + error_report("clear_bitmap_shift (%u) too small, using " + "min value (%u)", shift, CLEAR_BITMAP_SHIFT_MIN); + shift = CLEAR_BITMAP_SHIFT_MIN; + } + RAMBLOCK_FOREACH_NOT_IGNORED(block) { pages = block->max_length >> TARGET_PAGE_BITS; /* * The initial dirty bitmap for migration must be set with all * ones to make sure we'll migrate every guest RAM page to * destination. - * Here we didn't set RAMBlock.bmap simply because it is already - * set in ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION] in - * ram_block_add, and that's where we'll sync the dirty bitmaps. - * Here setting RAMBlock.bmap would be fine too but not necessary. + * Here we set RAMBlock.bmap all to 1 because when rebegin a + * new migration after a failed migration, ram_list. + * dirty_memory[DIRTY_MEMORY_MIGRATION] don't include the whole + * guest memory. */ block->bmap = bitmap_new(pages); + bitmap_set(block->bmap, 0, pages); + block->clear_bmap_shift = shift; + block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift)); if (migrate_postcopy_ram()) { block->unsentmap = bitmap_new(pages); bitmap_set(block->unsentmap, 0, pages); @@ -3370,7 +3426,6 @@ static int ram_save_setup(QEMUFile *f, void *opaque) } if (migrate_ignore_shared()) { qemu_put_be64(f, block->mr->addr); - qemu_put_byte(f, ramblock_is_ignored(block) ? 1 : 0); } } @@ -3466,8 +3521,8 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) */ ram_control_after_iterate(f, RAM_CONTROL_ROUND); - multifd_send_sync_main(); out: + multifd_send_sync_main(); qemu_put_be64(f, RAM_SAVE_FLAG_EOS); qemu_fflush(f); ram_counters.transferred += 8; @@ -4337,12 +4392,6 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) } if (migrate_ignore_shared()) { hwaddr addr = qemu_get_be64(f); - bool ignored = qemu_get_byte(f); - if (ignored != ramblock_is_ignored(block)) { - error_report("RAM block %s should %s be migrated", - id, ignored ? "" : "not"); - ret = -EINVAL; - } if (ramblock_is_ignored(block) && block->mr->addr != addr) { error_report("Mismatched GPAs for block %s " diff --git a/migration/savevm.c b/migration/savevm.c index c0e557b4c2..79ed44d475 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1863,7 +1863,6 @@ static void loadvm_postcopy_handle_run_bh(void *opaque) } trace_loadvm_postcopy_handle_run_cpu_sync(); - cpu_synchronize_all_post_init(); trace_loadvm_postcopy_handle_run_vmstart(); diff --git a/migration/trace-events b/migration/trace-events index de2e136e57..d8e54c367a 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -79,8 +79,9 @@ get_queued_page(const char *block_name, uint64_t tmp_offset, unsigned long page_ get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, unsigned long page_abs, int sent) "%s/0x%" PRIx64 " page_abs=0x%lx (sent=%d)" migration_bitmap_sync_start(void) "" migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64 +migration_bitmap_clear_dirty(char *str, uint64_t start, uint64_t size, unsigned long page) "rb %s start 0x%"PRIx64" size 0x%"PRIx64" page 0x%lx" migration_throttle(void) "" -multifd_recv(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %d packet number %" PRIu64 " pages %d flags 0x%x next packet size %d" +multifd_recv(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %d packet_num %" PRIu64 " pages %d flags 0x%x next packet size %d" multifd_recv_sync_main(long packet_num) "packet num %ld" multifd_recv_sync_main_signal(uint8_t id) "channel %d" multifd_recv_sync_main_wait(uint8_t id) "channel %d" diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c index 99ceb0846b..5ca3ebe942 100644 --- a/monitor/hmp-cmds.c +++ b/monitor/hmp-cmds.c @@ -1962,7 +1962,6 @@ static void hmp_change_read_arg(void *opaque, const char *password, void hmp_change(Monitor *mon, const QDict *qdict) { - MonitorHMP *hmp_mon = container_of(mon, MonitorHMP, common); const char *device = qdict_get_str(qdict, "device"); const char *target = qdict_get_str(qdict, "target"); const char *arg = qdict_get_try_str(qdict, "arg"); @@ -1980,6 +1979,7 @@ void hmp_change(Monitor *mon, const QDict *qdict) if (strcmp(target, "passwd") == 0 || strcmp(target, "password") == 0) { if (!arg) { + MonitorHMP *hmp_mon = container_of(mon, MonitorHMP, common); monitor_read_password(hmp_mon, hmp_change_read_arg, NULL); return; } diff --git a/qemu-tech.texi b/qemu-tech.texi index 3451cfaa5b..0380de77b6 100644 --- a/qemu-tech.texi +++ b/qemu-tech.texi @@ -3,10 +3,7 @@ @menu * CPU emulation:: -* Translator Internals:: -* QEMU compared to other emulators:: * Managed start up options:: -* Bibliography:: @end menu @node CPU emulation diff --git a/target/arm/cpu.c b/target/arm/cpu.c index e75a64a25a..1959467fdc 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -1666,6 +1666,12 @@ static void arm926_initfn(Object *obj) * set the field to indicate Jazelle support within QEMU. */ cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1); + /* + * Similarly, we need to set MVFR0 fields to enable double precision + * and short vector support even though ARMv5 doesn't have this register. + */ + cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSHVEC, 1); + cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPDP, 1); } static void arm946_initfn(Object *obj) @@ -1702,6 +1708,12 @@ static void arm1026_initfn(Object *obj) * set the field to indicate Jazelle support within QEMU. */ cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1); + /* + * Similarly, we need to set MVFR0 fields to enable double precision + * and short vector support even though ARMv5 doesn't have this register. + */ + cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSHVEC, 1); + cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPDP, 1); { /* The 1026 had an IFAR at c6,c0,0,1 rather than the ARMv6 c6,c0,0,2 */ @@ -2452,6 +2464,10 @@ static void arm_max_initfn(Object *obj) t = FIELD_DP32(t, ID_ISAR6, SPECRES, 1); cpu->isar.id_isar6 = t; + t = cpu->isar.mvfr1; + t = FIELD_DP32(t, MVFR1, FPHP, 2); /* v8.0 FP support */ + cpu->isar.mvfr1 = t; + t = cpu->isar.mvfr2; t = FIELD_DP32(t, MVFR2, SIMDMISC, 3); /* SIMD MaxNum */ t = FIELD_DP32(t, MVFR2, FPMISC, 4); /* FP MaxNum */ diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c index 44e45a8037..060699b901 100644 --- a/target/arm/helper-a64.c +++ b/target/arm/helper-a64.c @@ -554,7 +554,7 @@ uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr, /* ??? Enforce alignment. */ uint64_t *haddr = g2h(addr); - helper_retaddr = ra; + set_helper_retaddr(ra); o0 = ldq_le_p(haddr + 0); o1 = ldq_le_p(haddr + 1); oldv = int128_make128(o0, o1); @@ -564,7 +564,7 @@ uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr, stq_le_p(haddr + 0, int128_getlo(newv)); stq_le_p(haddr + 1, int128_gethi(newv)); } - helper_retaddr = 0; + clear_helper_retaddr(); #else int mem_idx = cpu_mmu_index(env, false); TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); @@ -624,7 +624,7 @@ uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr, /* ??? Enforce alignment. */ uint64_t *haddr = g2h(addr); - helper_retaddr = ra; + set_helper_retaddr(ra); o1 = ldq_be_p(haddr + 0); o0 = ldq_be_p(haddr + 1); oldv = int128_make128(o0, o1); @@ -634,7 +634,7 @@ uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr, stq_be_p(haddr + 0, int128_gethi(newv)); stq_be_p(haddr + 1, int128_getlo(newv)); } - helper_retaddr = 0; + clear_helper_retaddr(); #else int mem_idx = cpu_mmu_index(env, false); TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); diff --git a/target/arm/m_helper.c b/target/arm/m_helper.c index 1867435db7..84609f446e 100644 --- a/target/arm/m_helper.c +++ b/target/arm/m_helper.c @@ -624,7 +624,11 @@ static bool arm_v7m_load_vector(ARMCPU *cpu, int exc, bool targets_secure, if (sattrs.ns) { attrs.secure = false; } else if (!targets_secure) { - /* NS access to S memory */ + /* + * NS access to S memory: the underlying exception which we escalate + * to HardFault is SecureFault, which always targets Secure. + */ + exc_secure = true; goto load_fail; } } @@ -632,6 +636,11 @@ static bool arm_v7m_load_vector(ARMCPU *cpu, int exc, bool targets_secure, vector_entry = address_space_ldl(arm_addressspace(cs, attrs), addr, attrs, &result); if (result != MEMTX_OK) { + /* + * Underlying exception is BusFault: its target security state + * depends on BFHFNMINS. + */ + exc_secure = !(cpu->env.v7m.aircr & R_V7M_AIRCR_BFHFNMINS_MASK); goto load_fail; } *pvec = vector_entry; @@ -641,13 +650,17 @@ load_fail: /* * All vector table fetch fails are reported as HardFault, with * HFSR.VECTTBL and .FORCED set. (FORCED is set because - * technically the underlying exception is a MemManage or BusFault + * technically the underlying exception is a SecureFault or BusFault * that is escalated to HardFault.) This is a terminal exception, * so we will either take the HardFault immediately or else enter * lockup (the latter case is handled in armv7m_nvic_set_pending_derived()). + * The HardFault is Secure if BFHFNMINS is 0 (meaning that all HFs are + * secure); otherwise it targets the same security state as the + * underlying exception. */ - exc_secure = targets_secure || - !(cpu->env.v7m.aircr & R_V7M_AIRCR_BFHFNMINS_MASK); + if (!(cpu->env.v7m.aircr & R_V7M_AIRCR_BFHFNMINS_MASK)) { + exc_secure = true; + } env->v7m.hfsr |= R_V7M_HFSR_VECTTBL_MASK | R_V7M_HFSR_FORCED_MASK; armv7m_nvic_set_pending_derived(env->nvic, ARMV7M_EXCP_HARD, exc_secure); return false; diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index fd434c66ea..fc0c1755d2 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -4125,12 +4125,11 @@ static intptr_t max_for_page(target_ulong base, intptr_t mem_off, return MIN(split, mem_max - mem_off) + mem_off; } -static inline void set_helper_retaddr(uintptr_t ra) -{ -#ifdef CONFIG_USER_ONLY - helper_retaddr = ra; +#ifndef CONFIG_USER_ONLY +/* These are normally defined only for CONFIG_USER_ONLY in <exec/cpu_ldst.h> */ +static inline void set_helper_retaddr(uintptr_t ra) { } +static inline void clear_helper_retaddr(void) { } #endif -} /* * The result of tlb_vaddr_to_host for user-only is just g2h(x), @@ -4188,7 +4187,7 @@ static void sve_ld1_r(CPUARMState *env, void *vg, const target_ulong addr, if (test_host_page(host)) { mem_off = host_fn(vd, vg, host - mem_off, mem_off, mem_max); tcg_debug_assert(mem_off == mem_max); - set_helper_retaddr(0); + clear_helper_retaddr(); /* After having taken any fault, zero leading inactive elements. */ swap_memzero(vd, reg_off); return; @@ -4239,7 +4238,7 @@ static void sve_ld1_r(CPUARMState *env, void *vg, const target_ulong addr, } #endif - set_helper_retaddr(0); + clear_helper_retaddr(); memcpy(vd, &scratch, reg_max); } @@ -4312,7 +4311,7 @@ static void sve_ld2_r(CPUARMState *env, void *vg, target_ulong addr, addr += 2 * size; } while (i & 15); } - set_helper_retaddr(0); + clear_helper_retaddr(); /* Wait until all exceptions have been raised to write back. */ memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz); @@ -4341,7 +4340,7 @@ static void sve_ld3_r(CPUARMState *env, void *vg, target_ulong addr, addr += 3 * size; } while (i & 15); } - set_helper_retaddr(0); + clear_helper_retaddr(); /* Wait until all exceptions have been raised to write back. */ memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz); @@ -4372,7 +4371,7 @@ static void sve_ld4_r(CPUARMState *env, void *vg, target_ulong addr, addr += 4 * size; } while (i & 15); } - set_helper_retaddr(0); + clear_helper_retaddr(); /* Wait until all exceptions have been raised to write back. */ memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz); @@ -4494,7 +4493,7 @@ static void sve_ldff1_r(CPUARMState *env, void *vg, const target_ulong addr, if (test_host_page(host)) { mem_off = host_fn(vd, vg, host - mem_off, mem_off, mem_max); tcg_debug_assert(mem_off == mem_max); - set_helper_retaddr(0); + clear_helper_retaddr(); /* After any fault, zero any leading inactive elements. */ swap_memzero(vd, reg_off); return; @@ -4537,7 +4536,7 @@ static void sve_ldff1_r(CPUARMState *env, void *vg, const target_ulong addr, } #endif - set_helper_retaddr(0); + clear_helper_retaddr(); record_fault(env, reg_off, reg_max); } @@ -4740,7 +4739,7 @@ static void sve_st1_r(CPUARMState *env, void *vg, target_ulong addr, addr += msize; } while (i & 15); } - set_helper_retaddr(0); + clear_helper_retaddr(); } static void sve_st2_r(CPUARMState *env, void *vg, target_ulong addr, @@ -4766,7 +4765,7 @@ static void sve_st2_r(CPUARMState *env, void *vg, target_ulong addr, addr += 2 * msize; } while (i & 15); } - set_helper_retaddr(0); + clear_helper_retaddr(); } static void sve_st3_r(CPUARMState *env, void *vg, target_ulong addr, @@ -4794,7 +4793,7 @@ static void sve_st3_r(CPUARMState *env, void *vg, target_ulong addr, addr += 3 * msize; } while (i & 15); } - set_helper_retaddr(0); + clear_helper_retaddr(); } static void sve_st4_r(CPUARMState *env, void *vg, target_ulong addr, @@ -4824,7 +4823,7 @@ static void sve_st4_r(CPUARMState *env, void *vg, target_ulong addr, addr += 4 * msize; } while (i & 15); } - set_helper_retaddr(0); + clear_helper_retaddr(); } #define DO_STN_1(N, NAME, ESIZE) \ @@ -4932,7 +4931,7 @@ static void sve_ld1_zs(CPUARMState *env, void *vd, void *vg, void *vm, i += 4, pg >>= 4; } while (i & 15); } - set_helper_retaddr(0); + clear_helper_retaddr(); /* Wait until all exceptions have been raised to write back. */ memcpy(vd, &scratch, oprsz); @@ -4955,7 +4954,7 @@ static void sve_ld1_zd(CPUARMState *env, void *vd, void *vg, void *vm, tlb_fn(env, &scratch, i * 8, base + (off << scale), oi, ra); } } - set_helper_retaddr(0); + clear_helper_retaddr(); /* Wait until all exceptions have been raised to write back. */ memcpy(vd, &scratch, oprsz * 8); @@ -5133,7 +5132,7 @@ static inline void sve_ldff1_zs(CPUARMState *env, void *vd, void *vg, void *vm, tlb_fn(env, vd, reg_off, addr, oi, ra); /* The rest of the reads will be non-faulting. */ - set_helper_retaddr(0); + clear_helper_retaddr(); } /* After any fault, zero the leading predicated false elements. */ @@ -5175,7 +5174,7 @@ static inline void sve_ldff1_zd(CPUARMState *env, void *vd, void *vg, void *vm, tlb_fn(env, vd, reg_off, addr, oi, ra); /* The rest of the reads will be non-faulting. */ - set_helper_retaddr(0); + clear_helper_retaddr(); } /* After any fault, zero the leading predicated false elements. */ @@ -5299,7 +5298,7 @@ static void sve_st1_zs(CPUARMState *env, void *vd, void *vg, void *vm, i += 4, pg >>= 4; } while (i & 15); } - set_helper_retaddr(0); + clear_helper_retaddr(); } static void sve_st1_zd(CPUARMState *env, void *vd, void *vg, void *vm, @@ -5318,7 +5317,7 @@ static void sve_st1_zd(CPUARMState *env, void *vd, void *vg, void *vm, tlb_fn(env, vd, i * 8, base + (off << scale), oi, ra); } } - set_helper_retaddr(0); + clear_helper_retaddr(); } #define DO_ST1_ZPZ_S(MEM, OFS) \ diff --git a/target/mips/translate.c b/target/mips/translate.c index f96f141cdf..3575eff0ae 100644 --- a/target/mips/translate.c +++ b/target/mips/translate.c @@ -6745,6 +6745,7 @@ static void gen_mthc0(DisasContext *ctx, TCGv arg, int reg, int sel) default: goto cp0_unimplemented; } + break; case CP0_REGISTER_17: switch (sel) { case 0: @@ -9825,6 +9826,7 @@ static void gen_mftr(CPUMIPSState *env, DisasContext *ctx, int rt, int rd, gen_mfc0(ctx, t0, rt, sel); break; } + break; case 12: switch (sel) { case 0: @@ -9834,6 +9836,7 @@ static void gen_mftr(CPUMIPSState *env, DisasContext *ctx, int rt, int rd, gen_mfc0(ctx, t0, rt, sel); break; } + break; case 13: switch (sel) { case 0: @@ -10052,6 +10055,7 @@ static void gen_mttr(CPUMIPSState *env, DisasContext *ctx, int rd, int rt, gen_mtc0(ctx, t0, rd, sel); break; } + break; case 12: switch (sel) { case 0: @@ -10061,6 +10065,7 @@ static void gen_mttr(CPUMIPSState *env, DisasContext *ctx, int rd, int rt, gen_mtc0(ctx, t0, rd, sel); break; } + break; case 13: switch (sel) { case 0: diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c index b0f8106642..0713448bf5 100644 --- a/tcg/aarch64/tcg-target.inc.c +++ b/tcg/aarch64/tcg-target.inc.c @@ -2226,7 +2226,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_extract2_i64: case INDEX_op_extract2_i32: - tcg_out_extr(s, ext, a0, a1, a2, args[3]); + tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]); break; case INDEX_op_add2_i32: diff --git a/tcg/optimize.c b/tcg/optimize.c index d7c71a6085..d2424de4af 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -1213,8 +1213,8 @@ void tcg_optimize(TCGContext *s) if (opc == INDEX_op_extract2_i64) { tmp = (v1 >> op->args[3]) | (v2 << (64 - op->args[3])); } else { - tmp = (v1 >> op->args[3]) | (v2 << (32 - op->args[3])); - tmp = (int32_t)tmp; + tmp = (int32_t)(((uint32_t)v1 >> op->args[3]) | + ((uint32_t)v2 << (32 - op->args[3]))); } tcg_opt_gen_movi(s, op, op->args[0], tmp); break; diff --git a/tests/Makefile.include b/tests/Makefile.include index a983dd32da..fd7fdb8658 100644 --- a/tests/Makefile.include +++ b/tests/Makefile.include @@ -65,6 +65,7 @@ check-unit-y += tests/test-opts-visitor$(EXESUF) check-unit-$(CONFIG_BLOCK) += tests/test-coroutine$(EXESUF) check-unit-y += tests/test-visitor-serialization$(EXESUF) check-unit-y += tests/test-iov$(EXESUF) +check-unit-y += tests/test-bitmap$(EXESUF) check-unit-$(CONFIG_BLOCK) += tests/test-aio$(EXESUF) check-unit-$(CONFIG_BLOCK) += tests/test-aio-multithread$(EXESUF) check-unit-$(CONFIG_BLOCK) += tests/test-throttle$(EXESUF) @@ -538,6 +539,7 @@ tests/test-image-locking$(EXESUF): tests/test-image-locking.o $(test-block-obj-y tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(test-block-obj-y) tests/test-iov$(EXESUF): tests/test-iov.o $(test-util-obj-y) tests/test-hbitmap$(EXESUF): tests/test-hbitmap.o $(test-util-obj-y) $(test-crypto-obj-y) +tests/test-bitmap$(EXESUF): tests/test-bitmap.o $(test-util-obj-y) tests/test-x86-cpuid$(EXESUF): tests/test-x86-cpuid.o tests/test-xbzrle$(EXESUF): tests/test-xbzrle.o migration/xbzrle.o migration/page_cache.o $(test-util-obj-y) tests/test-cutils$(EXESUF): tests/test-cutils.o util/cutils.o $(test-util-obj-y) diff --git a/tests/migration-test.c b/tests/migration-test.c index b6434628e1..a4feb9545d 100644 --- a/tests/migration-test.c +++ b/tests/migration-test.c @@ -398,7 +398,8 @@ static char *migrate_get_socket_address(QTestState *who, const char *parameter) return result; } -static long long migrate_get_parameter(QTestState *who, const char *parameter) +static long long migrate_get_parameter_int(QTestState *who, + const char *parameter) { QDict *rsp; long long result; @@ -409,17 +410,17 @@ static long long migrate_get_parameter(QTestState *who, const char *parameter) return result; } -static void migrate_check_parameter(QTestState *who, const char *parameter, - long long value) +static void migrate_check_parameter_int(QTestState *who, const char *parameter, + long long value) { long long result; - result = migrate_get_parameter(who, parameter); + result = migrate_get_parameter_int(who, parameter); g_assert_cmpint(result, ==, value); } -static void migrate_set_parameter(QTestState *who, const char *parameter, - long long value) +static void migrate_set_parameter_int(QTestState *who, const char *parameter, + long long value) { QDict *rsp; @@ -429,7 +430,7 @@ static void migrate_set_parameter(QTestState *who, const char *parameter, parameter, value); g_assert(qdict_haskey(rsp, "return")); qobject_unref(rsp); - migrate_check_parameter(who, parameter, value); + migrate_check_parameter_int(who, parameter, value); } static void migrate_pause(QTestState *who) @@ -681,7 +682,7 @@ static void deprecated_set_downtime(QTestState *who, const double value) " 'arguments': { 'value': %f } }", value); g_assert(qdict_haskey(rsp, "return")); qobject_unref(rsp); - migrate_check_parameter(who, "downtime-limit", value * 1000); + migrate_check_parameter_int(who, "downtime-limit", value * 1000); } static void deprecated_set_speed(QTestState *who, long long value) @@ -692,7 +693,7 @@ static void deprecated_set_speed(QTestState *who, long long value) "'arguments': { 'value': %lld } }", value); g_assert(qdict_haskey(rsp, "return")); qobject_unref(rsp); - migrate_check_parameter(who, "max-bandwidth", value); + migrate_check_parameter_int(who, "max-bandwidth", value); } static void deprecated_set_cache_size(QTestState *who, long long value) @@ -703,7 +704,7 @@ static void deprecated_set_cache_size(QTestState *who, long long value) "'arguments': { 'value': %lld } }", value); g_assert(qdict_haskey(rsp, "return")); qobject_unref(rsp); - migrate_check_parameter(who, "xbzrle-cache-size", value); + migrate_check_parameter_int(who, "xbzrle-cache-size", value); } static void test_deprecated(void) @@ -738,8 +739,8 @@ static int migrate_postcopy_prepare(QTestState **from_ptr, * quickly, but that it doesn't complete precopy even on a slow * machine, so also set the downtime. */ - migrate_set_parameter(from, "max-bandwidth", 100000000); - migrate_set_parameter(from, "downtime-limit", 1); + migrate_set_parameter_int(from, "max-bandwidth", 100000000); + migrate_set_parameter_int(from, "downtime-limit", 1); /* Wait for the first serial output from the source */ wait_for_serial("src_serial"); @@ -790,7 +791,7 @@ static void test_postcopy_recovery(void) } /* Turn postcopy speed down, 4K/s is slow enough on any machines */ - migrate_set_parameter(from, "max-postcopy-bandwidth", 4096); + migrate_set_parameter_int(from, "max-postcopy-bandwidth", 4096); /* Now we start the postcopy */ migrate_postcopy_start(from, to); @@ -831,7 +832,7 @@ static void test_postcopy_recovery(void) g_free(uri); /* Restore the postcopy bandwidth to unlimited */ - migrate_set_parameter(from, "max-postcopy-bandwidth", 0); + migrate_set_parameter_int(from, "max-postcopy-bandwidth", 0); migrate_postcopy_complete(from, to); } @@ -877,9 +878,9 @@ static void test_precopy_unix(void) * machine, so also set the downtime. */ /* 1 ms should make it not converge*/ - migrate_set_parameter(from, "downtime-limit", 1); + migrate_set_parameter_int(from, "downtime-limit", 1); /* 1GB/s */ - migrate_set_parameter(from, "max-bandwidth", 1000000000); + migrate_set_parameter_int(from, "max-bandwidth", 1000000000); /* Wait for the first serial output from the source */ wait_for_serial("src_serial"); @@ -889,7 +890,7 @@ static void test_precopy_unix(void) wait_for_migration_pass(from); /* 300 ms should converge */ - migrate_set_parameter(from, "downtime-limit", 300); + migrate_set_parameter_int(from, "downtime-limit", 300); if (!got_stop) { qtest_qmp_eventwait(from, "STOP"); @@ -956,11 +957,11 @@ static void test_xbzrle(const char *uri) * machine, so also set the downtime. */ /* 1 ms should make it not converge*/ - migrate_set_parameter(from, "downtime-limit", 1); + migrate_set_parameter_int(from, "downtime-limit", 1); /* 1GB/s */ - migrate_set_parameter(from, "max-bandwidth", 1000000000); + migrate_set_parameter_int(from, "max-bandwidth", 1000000000); - migrate_set_parameter(from, "xbzrle-cache-size", 33554432); + migrate_set_parameter_int(from, "xbzrle-cache-size", 33554432); migrate_set_capability(from, "xbzrle", "true"); migrate_set_capability(to, "xbzrle", "true"); @@ -972,7 +973,7 @@ static void test_xbzrle(const char *uri) wait_for_migration_pass(from); /* 300ms should converge */ - migrate_set_parameter(from, "downtime-limit", 300); + migrate_set_parameter_int(from, "downtime-limit", 300); if (!got_stop) { qtest_qmp_eventwait(from, "STOP"); @@ -1008,9 +1009,9 @@ static void test_precopy_tcp(void) * machine, so also set the downtime. */ /* 1 ms should make it not converge*/ - migrate_set_parameter(from, "downtime-limit", 1); + migrate_set_parameter_int(from, "downtime-limit", 1); /* 1GB/s */ - migrate_set_parameter(from, "max-bandwidth", 1000000000); + migrate_set_parameter_int(from, "max-bandwidth", 1000000000); /* Wait for the first serial output from the source */ wait_for_serial("src_serial"); @@ -1022,7 +1023,7 @@ static void test_precopy_tcp(void) wait_for_migration_pass(from); /* 300ms should converge */ - migrate_set_parameter(from, "downtime-limit", 300); + migrate_set_parameter_int(from, "downtime-limit", 300); if (!got_stop) { qtest_qmp_eventwait(from, "STOP"); @@ -1054,9 +1055,9 @@ static void test_migrate_fd_proto(void) * machine, so also set the downtime. */ /* 1 ms should make it not converge */ - migrate_set_parameter(from, "downtime-limit", 1); + migrate_set_parameter_int(from, "downtime-limit", 1); /* 1GB/s */ - migrate_set_parameter(from, "max-bandwidth", 1000000000); + migrate_set_parameter_int(from, "max-bandwidth", 1000000000); /* Wait for the first serial output from the source */ wait_for_serial("src_serial"); @@ -1090,7 +1091,7 @@ static void test_migrate_fd_proto(void) wait_for_migration_pass(from); /* 300ms should converge */ - migrate_set_parameter(from, "downtime-limit", 300); + migrate_set_parameter_int(from, "downtime-limit", 300); if (!got_stop) { qtest_qmp_eventwait(from, "STOP"); diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030 index c6311d1825..1b69f318c6 100755 --- a/tests/qemu-iotests/030 +++ b/tests/qemu-iotests/030 @@ -36,7 +36,9 @@ class TestSingleDrive(iotests.QMPTestCase): qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % mid_img, test_img) qemu_io('-f', 'raw', '-c', 'write -P 0x1 0 512', backing_img) qemu_io('-f', iotests.imgfmt, '-c', 'write -P 0x1 524288 512', mid_img) - self.vm = iotests.VM().add_drive("blkdebug::" + test_img, "backing.node-name=mid") + self.vm = iotests.VM().add_drive("blkdebug::" + test_img, + "backing.node-name=mid," + + "backing.backing.node-name=base") self.vm.launch() def tearDown(self): @@ -144,17 +146,43 @@ class TestSingleDrive(iotests.QMPTestCase): def test_device_not_found(self): result = self.vm.qmp('block-stream', device='nonexistent') - self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', + 'Cannot find device=nonexistent nor node_name=nonexistent') def test_job_id_missing(self): result = self.vm.qmp('block-stream', device='mid') - self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', "Invalid job ID ''") + + def test_read_only(self): + # Create a new file that we can attach (we need a read-only top) + with iotests.FilePath('ro-top.img') as ro_top_path: + qemu_img('create', '-f', iotests.imgfmt, ro_top_path, + str(self.image_len)) + + result = self.vm.qmp('blockdev-add', + node_name='ro-top', + driver=iotests.imgfmt, + read_only=True, + file={ + 'driver': 'file', + 'filename': ro_top_path, + 'read-only': True + }, + backing='mid') + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('block-stream', job_id='stream', + device='ro-top', base_node='base') + self.assert_qmp(result, 'error/desc', 'Block node is read-only') + + result = self.vm.qmp('blockdev-del', node_name='ro-top') + self.assert_qmp(result, 'return', {}) class TestParallelOps(iotests.QMPTestCase): num_ops = 4 # Number of parallel block-stream operations num_imgs = num_ops * 2 + 1 - image_len = num_ops * 512 * 1024 + image_len = num_ops * 4 * 1024 * 1024 imgs = [] def setUp(self): @@ -176,11 +204,11 @@ class TestParallelOps(iotests.QMPTestCase): # Put data into the images we are copying data from odd_img_indexes = [x for x in reversed(range(self.num_imgs)) if x % 2 == 1] for i in range(len(odd_img_indexes)): - # Alternate between 256KB and 512KB. + # Alternate between 2MB and 4MB. # This way jobs will not finish in the same order they were created - num_kb = 256 + 256 * (i % 2) + num_mb = 2 + 2 * (i % 2) qemu_io('-f', iotests.imgfmt, - '-c', 'write -P 0xFF %dk %dk' % (i * 512, num_kb), + '-c', 'write -P 0xFF %dM %dM' % (i * 4, num_mb), self.imgs[odd_img_indexes[i]]) # Attach the drive to the VM @@ -213,6 +241,10 @@ class TestParallelOps(iotests.QMPTestCase): result = self.vm.qmp('block-stream', device=node_name, job_id=job_id, base=self.imgs[i-2], speed=512*1024) self.assert_qmp(result, 'return', {}) + for job in pending_jobs: + result = self.vm.qmp('block-job-set-speed', device=job, speed=0) + self.assert_qmp(result, 'return', {}) + # Wait for all jobs to be finished. while len(pending_jobs) > 0: for event in self.vm.get_qmp_events(wait=True): @@ -241,24 +273,33 @@ class TestParallelOps(iotests.QMPTestCase): self.assert_qmp(result, 'return', {}) result = self.vm.qmp('block-stream', device='node5', job_id='stream-node5', base=self.imgs[2]) - self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', + "Node 'node4' is busy: block device is in use by block job: stream") result = self.vm.qmp('block-stream', device='node3', job_id='stream-node3', base=self.imgs[2]) - self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', + "Node 'node3' is busy: block device is in use by block job: stream") result = self.vm.qmp('block-stream', device='node4', job_id='stream-node4-v2') - self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', + "Node 'node4' is busy: block device is in use by block job: stream") # block-commit should also fail if it touches nodes used by the stream job result = self.vm.qmp('block-commit', device='drive0', base=self.imgs[4], job_id='commit-node4') - self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', + "Node 'node4' is busy: block device is in use by block job: stream") result = self.vm.qmp('block-commit', device='drive0', base=self.imgs[1], top=self.imgs[3], job_id='commit-node1') - self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', + "Node 'node3' is busy: block device is in use by block job: stream") # This fails because it needs to modify the backing string in node2, which is blocked result = self.vm.qmp('block-commit', device='drive0', base=self.imgs[0], top=self.imgs[1], job_id='commit-node0') - self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', + "Node 'node2' is busy: block device is in use by block job: stream") + + result = self.vm.qmp('block-job-set-speed', device='stream-node4', speed=0) + self.assert_qmp(result, 'return', {}) self.wait_until_completed(drive='stream-node4') self.assert_no_active_block_jobs() @@ -274,20 +315,28 @@ class TestParallelOps(iotests.QMPTestCase): self.assert_qmp(result, 'return', {}) result = self.vm.qmp('block-stream', device='node3', job_id='stream-node3') - self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', + "Node 'node3' is busy: block device is in use by block job: commit") result = self.vm.qmp('block-stream', device='node6', base=self.imgs[2], job_id='stream-node6') - self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', + "Node 'node5' is busy: block device is in use by block job: commit") result = self.vm.qmp('block-stream', device='node4', base=self.imgs[2], job_id='stream-node4') - self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', + "Node 'node4' is busy: block device is in use by block job: commit") result = self.vm.qmp('block-stream', device='node6', base=self.imgs[4], job_id='stream-node6-v2') - self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', + "Node 'node5' is busy: block device is in use by block job: commit") # This fails because block-commit currently blocks the active layer even if it's not used result = self.vm.qmp('block-stream', device='drive0', base=self.imgs[5], job_id='stream-drive0') - self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', + "Node 'drive0' is busy: block device is in use by block job: commit") + + result = self.vm.qmp('block-job-set-speed', device='commit-node3', speed=0) + self.assert_qmp(result, 'return', {}) self.wait_until_completed(drive='commit-node3') @@ -302,35 +351,70 @@ class TestParallelOps(iotests.QMPTestCase): self.assert_qmp(result, 'return', {}) result = self.vm.qmp('block-stream', device='node5', base=self.imgs[3], job_id='stream-node6') - self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', + "Node 'node5' is busy: block device is in use by block job: commit") event = self.vm.event_wait(name='BLOCK_JOB_READY') self.assert_qmp(event, 'data/device', 'commit-drive0') self.assert_qmp(event, 'data/type', 'commit') self.assert_qmp_absent(event, 'data/error') + result = self.vm.qmp('block-job-set-speed', device='commit-drive0', speed=0) + self.assert_qmp(result, 'return', {}) + result = self.vm.qmp('block-job-complete', device='commit-drive0') self.assert_qmp(result, 'return', {}) self.wait_until_completed(drive='commit-drive0') # In this case the base node of the stream job is the same as the - # top node of commit job. Since block-commit removes the top node - # when it finishes, this is not allowed. + # top node of commit job. Since this results in the commit filter + # node being part of the stream chain, this is not allowed. def test_overlapping_4(self): self.assert_no_active_block_jobs() # Commit from node2 into node0 - result = self.vm.qmp('block-commit', device='drive0', top=self.imgs[2], base=self.imgs[0]) + result = self.vm.qmp('block-commit', device='drive0', + top=self.imgs[2], base=self.imgs[0], + filter_node_name='commit-filter', speed=1024*1024) self.assert_qmp(result, 'return', {}) # Stream from node2 into node4 result = self.vm.qmp('block-stream', device='node4', base_node='node2', job_id='node4') - self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', + "Cannot freeze 'backing' link to 'commit-filter'") + + result = self.vm.qmp('block-job-set-speed', device='drive0', speed=0) + self.assert_qmp(result, 'return', {}) self.wait_until_completed() self.assert_no_active_block_jobs() + # In this case the base node of the stream job is the commit job's + # filter node. stream does not have a real dependency on its base + # node, so even though commit removes it when it is done, there is + # no conflict. + def test_overlapping_5(self): + self.assert_no_active_block_jobs() + + # Commit from node2 into node0 + result = self.vm.qmp('block-commit', device='drive0', + top_node='node2', base_node='node0', + filter_node_name='commit-filter', speed=1024*1024) + self.assert_qmp(result, 'return', {}) + + # Stream from node2 into node4 + result = self.vm.qmp('block-stream', device='node4', + base_node='commit-filter', job_id='node4') + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('block-job-set-speed', device='drive0', speed=0) + self.assert_qmp(result, 'return', {}) + + self.vm.run_job(job='drive0', auto_dismiss=True, use_log=False) + self.vm.run_job(job='node4', auto_dismiss=True, use_log=False) + self.assert_no_active_block_jobs() + # Test a block-stream and a block-commit job in parallel # Here the stream job is supposed to finish quickly in order to reproduce # the scenario that triggers the bug fixed in 3d5d319e1221 and 1a63a907507 @@ -378,6 +462,10 @@ class TestParallelOps(iotests.QMPTestCase): result = self.vm.qmp('block-commit', device='drive0', base=self.imgs[5], speed=1024*1024) self.assert_qmp(result, 'return', {}) + for job in ['drive0', 'node4']: + result = self.vm.qmp('block-job-set-speed', device=job, speed=0) + self.assert_qmp(result, 'return', {}) + # Wait for all jobs to be finished. pending_jobs = ['node4', 'drive0'] while len(pending_jobs) > 0: @@ -406,19 +494,23 @@ class TestParallelOps(iotests.QMPTestCase): # Error: the base node does not exist result = self.vm.qmp('block-stream', device='node4', base_node='none', job_id='stream') - self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', + 'Cannot find device= nor node_name=none') # Error: the base node is not a backing file of the top node result = self.vm.qmp('block-stream', device='node4', base_node='node6', job_id='stream') - self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', + "Node 'node6' is not a backing image of 'node4'") # Error: the base node is the same as the top node result = self.vm.qmp('block-stream', device='node4', base_node='node4', job_id='stream') - self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', + "Node 'node4' is not a backing image of 'node4'") # Error: cannot specify 'base' and 'base-node' at the same time result = self.vm.qmp('block-stream', device='node4', base=self.imgs[2], base_node='node2', job_id='stream') - self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', + "'base' and 'base-node' cannot be specified at the same time") # Success: the base node is a backing file of the top node result = self.vm.qmp('block-stream', device='node4', base_node='node2', job_id='stream') @@ -851,7 +943,7 @@ class TestSetSpeed(iotests.QMPTestCase): self.assert_no_active_block_jobs() result = self.vm.qmp('block-stream', device='drive0', speed=-1) - self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', "Invalid parameter 'speed'") self.assert_no_active_block_jobs() @@ -860,7 +952,7 @@ class TestSetSpeed(iotests.QMPTestCase): self.assert_qmp(result, 'return', {}) result = self.vm.qmp('block-job-set-speed', device='drive0', speed=-1) - self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', "Invalid parameter 'speed'") self.cancel_and_wait(resume=True) diff --git a/tests/qemu-iotests/030.out b/tests/qemu-iotests/030.out index 4fd1c2dcd2..6d9bee1a4b 100644 --- a/tests/qemu-iotests/030.out +++ b/tests/qemu-iotests/030.out @@ -1,5 +1,5 @@ -......................... +........................... ---------------------------------------------------------------------- -Ran 25 tests +Ran 27 tests OK diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index 3ecef5bc90..ce74177ab1 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -542,7 +542,7 @@ class VM(qtest.QEMUQtestMachine): # Returns None on success, and an error string on failure def run_job(self, job, auto_finalize=True, auto_dismiss=False, - pre_finalize=None, wait=60.0): + pre_finalize=None, use_log=True, wait=60.0): match_device = {'data': {'device': job}} match_id = {'data': {'id': job}} events = [ @@ -557,7 +557,8 @@ class VM(qtest.QEMUQtestMachine): while True: ev = filter_qmp_event(self.events_wait(events)) if ev['event'] != 'JOB_STATUS_CHANGE': - log(ev) + if use_log: + log(ev) continue status = ev['data']['status'] if status == 'aborting': @@ -565,13 +566,20 @@ class VM(qtest.QEMUQtestMachine): for j in result['return']: if j['id'] == job: error = j['error'] - log('Job failed: %s' % (j['error'])) + if use_log: + log('Job failed: %s' % (j['error'])) elif status == 'pending' and not auto_finalize: if pre_finalize: pre_finalize() - self.qmp_log('job-finalize', id=job) + if use_log: + self.qmp_log('job-finalize', id=job) + else: + self.qmp('job-finalize', id=job) elif status == 'concluded' and not auto_dismiss: - self.qmp_log('job-dismiss', id=job) + if use_log: + self.qmp_log('job-dismiss', id=job) + else: + self.qmp('job-dismiss', id=job) elif status == 'null': return error diff --git a/tests/test-bitmap.c b/tests/test-bitmap.c new file mode 100644 index 0000000000..cb7c5e462d --- /dev/null +++ b/tests/test-bitmap.c @@ -0,0 +1,72 @@ +/* + * SPDX-License-Identifier: GPL-2.0-or-later + * + * Bitmap.c unit-tests. + * + * Copyright (C) 2019, Red Hat, Inc. + * + * Author: Peter Xu <peterx@redhat.com> + */ + +#include <stdlib.h> +#include "qemu/osdep.h" +#include "qemu/bitmap.h" + +#define BMAP_SIZE 1024 + +static void check_bitmap_copy_with_offset(void) +{ + unsigned long *bmap1, *bmap2, *bmap3, total; + + bmap1 = bitmap_new(BMAP_SIZE); + bmap2 = bitmap_new(BMAP_SIZE); + bmap3 = bitmap_new(BMAP_SIZE); + + bmap1[0] = random(); + bmap1[1] = random(); + bmap1[2] = random(); + bmap1[3] = random(); + total = BITS_PER_LONG * 4; + + /* Shift 115 bits into bmap2 */ + bitmap_copy_with_dst_offset(bmap2, bmap1, 115, total); + /* Shift another 85 bits into bmap3 */ + bitmap_copy_with_dst_offset(bmap3, bmap2, 85, total + 115); + /* Shift back 200 bits back */ + bitmap_copy_with_src_offset(bmap2, bmap3, 200, total); + + g_assert_cmpmem(bmap1, total / BITS_PER_LONG, + bmap2, total / BITS_PER_LONG); + + bitmap_clear(bmap1, 0, BMAP_SIZE); + /* Set bits in bmap1 are 100-245 */ + bitmap_set(bmap1, 100, 145); + + /* Set bits in bmap2 are 60-205 */ + bitmap_copy_with_src_offset(bmap2, bmap1, 40, 250); + g_assert_cmpint(find_first_bit(bmap2, 60), ==, 60); + g_assert_cmpint(find_next_zero_bit(bmap2, 205, 60), ==, 205); + g_assert(test_bit(205, bmap2) == 0); + + /* Set bits in bmap3 are 135-280 */ + bitmap_copy_with_dst_offset(bmap3, bmap1, 35, 250); + g_assert_cmpint(find_first_bit(bmap3, 135), ==, 135); + g_assert_cmpint(find_next_zero_bit(bmap3, 280, 135), ==, 280); + g_assert(test_bit(280, bmap3) == 0); + + g_free(bmap1); + g_free(bmap2); + g_free(bmap3); +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + + g_test_add_func("/bitmap/bitmap_copy_with_offset", + check_bitmap_copy_with_offset); + + g_test_run(); + + return 0; +} diff --git a/util/bitmap.c b/util/bitmap.c index cb618c65a5..1753ff7f5b 100644 --- a/util/bitmap.c +++ b/util/bitmap.c @@ -402,3 +402,88 @@ void bitmap_to_le(unsigned long *dst, const unsigned long *src, { bitmap_to_from_le(dst, src, nbits); } + +/* + * Copy "src" bitmap with a positive offset and put it into the "dst" + * bitmap. The caller needs to make sure the bitmap size of "src" + * is bigger than (shift + nbits). + */ +void bitmap_copy_with_src_offset(unsigned long *dst, const unsigned long *src, + unsigned long shift, unsigned long nbits) +{ + unsigned long left_mask, right_mask, last_mask; + + /* Proper shift src pointer to the first word to copy from */ + src += BIT_WORD(shift); + shift %= BITS_PER_LONG; + + if (!shift) { + /* Fast path */ + bitmap_copy(dst, src, nbits); + return; + } + + right_mask = (1ul << shift) - 1; + left_mask = ~right_mask; + + while (nbits >= BITS_PER_LONG) { + *dst = (*src & left_mask) >> shift; + *dst |= (src[1] & right_mask) << (BITS_PER_LONG - shift); + dst++; + src++; + nbits -= BITS_PER_LONG; + } + + if (nbits > BITS_PER_LONG - shift) { + *dst = (*src & left_mask) >> shift; + nbits -= BITS_PER_LONG - shift; + last_mask = (1ul << nbits) - 1; + *dst |= (src[1] & last_mask) << (BITS_PER_LONG - shift); + } else if (nbits) { + last_mask = (1ul << nbits) - 1; + *dst = (*src >> shift) & last_mask; + } +} + +/* + * Copy "src" bitmap into the "dst" bitmap with an offset in the + * "dst". The caller needs to make sure the bitmap size of "dst" is + * bigger than (shift + nbits). + */ +void bitmap_copy_with_dst_offset(unsigned long *dst, const unsigned long *src, + unsigned long shift, unsigned long nbits) +{ + unsigned long left_mask, right_mask, last_mask; + + /* Proper shift dst pointer to the first word to copy from */ + dst += BIT_WORD(shift); + shift %= BITS_PER_LONG; + + if (!shift) { + /* Fast path */ + bitmap_copy(dst, src, nbits); + return; + } + + right_mask = (1ul << (BITS_PER_LONG - shift)) - 1; + left_mask = ~right_mask; + + *dst &= (1ul << shift) - 1; + while (nbits >= BITS_PER_LONG) { + *dst |= (*src & right_mask) << shift; + dst[1] = (*src & left_mask) >> (BITS_PER_LONG - shift); + dst++; + src++; + nbits -= BITS_PER_LONG; + } + + if (nbits > BITS_PER_LONG - shift) { + *dst |= (*src & right_mask) << shift; + nbits -= BITS_PER_LONG - shift; + last_mask = ((1ul << nbits) - 1) << (BITS_PER_LONG - shift); + dst[1] = (*src & last_mask) >> (BITS_PER_LONG - shift); + } else if (nbits) { + last_mask = (1ul << nbits) - 1; + *dst |= (*src & last_mask) << shift; + } +} diff --git a/util/cutils.c b/util/cutils.c index dfc605f1ef..fd591cadf0 100644 --- a/util/cutils.c +++ b/util/cutils.c @@ -756,11 +756,11 @@ int uleb128_encode_small(uint8_t *out, uint32_t n) { g_assert(n <= 0x3fff); if (n < 0x80) { - *out++ = n; + *out = n; return 1; } else { *out++ = (n & 0x7f) | 0x80; - *out++ = n >> 7; + *out = n >> 7; return 2; } } @@ -768,7 +768,7 @@ int uleb128_encode_small(uint8_t *out, uint32_t n) int uleb128_decode_small(const uint8_t *in, uint32_t *n) { if (!(*in & 0x80)) { - *n = *in++; + *n = *in; return 1; } else { *n = *in++ & 0x7f; @@ -776,7 +776,7 @@ int uleb128_decode_small(const uint8_t *in, uint32_t *n) if (*in & 0x80) { return -1; } - *n |= *in++ << 7; + *n |= *in << 7; return 2; } } |