diff options
author | Peter Maydell | 2020-01-20 11:41:27 +0100 |
---|---|---|
committer | Peter Maydell | 2020-01-20 11:41:27 +0100 |
commit | 26deea00260139fc5f323c3bf9db82a5d470538a (patch) | |
tree | 921d6fe8b18c6b2021ea5f8f27af52c46f3e240f | |
parent | Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20200117-... (diff) | |
parent | multifd: Be consistent about using uint64_t (diff) | |
download | qemu-26deea00260139fc5f323c3bf9db82a5d470538a.tar.gz qemu-26deea00260139fc5f323c3bf9db82a5d470538a.tar.xz qemu-26deea00260139fc5f323c3bf9db82a5d470538a.zip |
Merge remote-tracking branch 'remotes/juanquintela/tags/migration-pull-pull-request' into staging
Migration pull request
# gpg: Signature made Mon 20 Jan 2020 10:29:53 GMT
# gpg: using RSA key 1899FF8EDEBF58CCEE034B82F487EF185872D723
# gpg: Good signature from "Juan Quintela <quintela@redhat.com>" [full]
# gpg: aka "Juan Quintela <quintela@trasno.org>" [full]
# Primary key fingerprint: 1899 FF8E DEBF 58CC EE03 4B82 F487 EF18 5872 D723
* remotes/juanquintela/tags/migration-pull-pull-request: (29 commits)
multifd: Be consistent about using uint64_t
migration: Support QLIST migration
apic: Use 32bit APIC ID for migration instance ID
migration: Change SaveStateEntry.instance_id into uint32_t
migration: Define VMSTATE_INSTANCE_ID_ANY
Bug #1829242 correction.
migration/multifd: fix destroyed mutex access in terminating multifd threads
migration/multifd: fix nullptr access in terminating multifd threads
migration/multifd: not use multifd during postcopy
migration/multifd: clean pages after filling packet
migration/postcopy: enable compress during postcopy
migration/postcopy: enable random order target page arrival
migration/postcopy: set all_zero to true on the first target page
migration/postcopy: count target page number to decide the place_needed
migration/postcopy: wait for decompress thread in precopy
migration/postcopy: reduce memset when it is zero page and matches_target_page_size
migration/ram: Yield periodically to the main loop
migration: savevm_state_handler_insert: constant-time element insertion
migration: add savevm_state_handler_remove()
misc: use QEMU_IS_ALIGNED
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | backends/dbus-vmstate.c | 3 | ||||
-rw-r--r-- | exec.c | 4 | ||||
-rw-r--r-- | hw/arm/stellaris.c | 2 | ||||
-rw-r--r-- | hw/core/qdev.c | 3 | ||||
-rw-r--r-- | hw/display/ads7846.c | 2 | ||||
-rw-r--r-- | hw/i2c/core.c | 2 | ||||
-rw-r--r-- | hw/input/stellaris_input.c | 3 | ||||
-rw-r--r-- | hw/intc/apic_common.c | 7 | ||||
-rw-r--r-- | hw/misc/max111x.c | 3 | ||||
-rw-r--r-- | hw/net/eepro100.c | 3 | ||||
-rw-r--r-- | hw/pci/pci.c | 2 | ||||
-rw-r--r-- | hw/ppc/spapr.c | 2 | ||||
-rw-r--r-- | hw/timer/arm_timer.c | 2 | ||||
-rw-r--r-- | hw/tpm/tpm_emulator.c | 3 | ||||
-rw-r--r-- | include/migration/register.h | 2 | ||||
-rw-r--r-- | include/migration/vmstate.h | 25 | ||||
-rw-r--r-- | include/qemu/queue.h | 39 | ||||
-rw-r--r-- | migration/migration.c | 72 | ||||
-rw-r--r-- | migration/migration.h | 1 | ||||
-rw-r--r-- | migration/ram.c | 196 | ||||
-rw-r--r-- | migration/savevm.c | 61 | ||||
-rw-r--r-- | migration/trace-events | 9 | ||||
-rw-r--r-- | migration/vmstate-types.c | 70 | ||||
-rw-r--r-- | stubs/vmstate.c | 2 | ||||
-rw-r--r-- | tests/qtest/migration-test.c | 93 | ||||
-rw-r--r-- | tests/test-vmstate.c | 170 | ||||
-rw-r--r-- | vl.c | 10 |
27 files changed, 659 insertions, 132 deletions
diff --git a/backends/dbus-vmstate.c b/backends/dbus-vmstate.c index 56b482a7d6..cc594a722e 100644 --- a/backends/dbus-vmstate.c +++ b/backends/dbus-vmstate.c @@ -412,7 +412,8 @@ dbus_vmstate_complete(UserCreatable *uc, Error **errp) return; } - if (vmstate_register(VMSTATE_IF(self), -1, &dbus_vmstate, self) < 0) { + if (vmstate_register(VMSTATE_IF(self), VMSTATE_INSTANCE_ID_ANY, + &dbus_vmstate, self) < 0) { error_setg(errp, "Failed to register vmstate"); } } @@ -3895,7 +3895,7 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length) uint8_t *host_startaddr = rb->host + start; - if ((uintptr_t)host_startaddr & (rb->page_size - 1)) { + if (!QEMU_PTR_IS_ALIGNED(host_startaddr, rb->page_size)) { error_report("ram_block_discard_range: Unaligned start address: %p", host_startaddr); goto err; @@ -3903,7 +3903,7 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length) if ((start + length) <= rb->used_length) { bool need_madvise, need_fallocate; - if (length & (rb->page_size - 1)) { + if (!QEMU_IS_ALIGNED(length, rb->page_size)) { error_report("ram_block_discard_range: Unaligned length: %zx", length); goto err; diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c index b198066b54..bb025e0bd0 100644 --- a/hw/arm/stellaris.c +++ b/hw/arm/stellaris.c @@ -708,7 +708,7 @@ static int stellaris_sys_init(uint32_t base, qemu_irq irq, memory_region_init_io(&s->iomem, NULL, &ssys_ops, s, "ssys", 0x00001000); memory_region_add_subregion(get_system_memory(), base, &s->iomem); ssys_reset(s); - vmstate_register(NULL, -1, &vmstate_stellaris_sys, s); + vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_stellaris_sys, s); return 0; } diff --git a/hw/core/qdev.c b/hw/core/qdev.c index 9f1753f5cf..58e87d336d 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -879,7 +879,8 @@ static void device_set_realized(Object *obj, bool value, Error **errp) if (qdev_get_vmsd(dev)) { if (vmstate_register_with_alias_id(VMSTATE_IF(dev), - -1, qdev_get_vmsd(dev), dev, + VMSTATE_INSTANCE_ID_ANY, + qdev_get_vmsd(dev), dev, dev->instance_id_alias, dev->alias_required_for_version, &local_err) < 0) { diff --git a/hw/display/ads7846.c b/hw/display/ads7846.c index c12272ae72..9228b40b1a 100644 --- a/hw/display/ads7846.c +++ b/hw/display/ads7846.c @@ -154,7 +154,7 @@ static void ads7846_realize(SSISlave *d, Error **errp) ads7846_int_update(s); - vmstate_register(NULL, -1, &vmstate_ads7846, s); + vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_ads7846, s); } static void ads7846_class_init(ObjectClass *klass, void *data) diff --git a/hw/i2c/core.c b/hw/i2c/core.c index 92cd489069..d770035ba0 100644 --- a/hw/i2c/core.c +++ b/hw/i2c/core.c @@ -61,7 +61,7 @@ I2CBus *i2c_init_bus(DeviceState *parent, const char *name) bus = I2C_BUS(qbus_create(TYPE_I2C_BUS, parent, name)); QLIST_INIT(&bus->current_devs); - vmstate_register(NULL, -1, &vmstate_i2c_bus, bus); + vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_i2c_bus, bus); return bus; } diff --git a/hw/input/stellaris_input.c b/hw/input/stellaris_input.c index 59892b07fc..e6ee5e11f1 100644 --- a/hw/input/stellaris_input.c +++ b/hw/input/stellaris_input.c @@ -88,5 +88,6 @@ void stellaris_gamepad_init(int n, qemu_irq *irq, const int *keycode) } s->num_buttons = n; qemu_add_kbd_event_handler(stellaris_gamepad_put_key, s); - vmstate_register(NULL, -1, &vmstate_stellaris_gamepad, s); + vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, + &vmstate_stellaris_gamepad, s); } diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c index 375cb6abe9..b5dbeb6206 100644 --- a/hw/intc/apic_common.c +++ b/hw/intc/apic_common.c @@ -268,7 +268,10 @@ static void apic_common_realize(DeviceState *dev, Error **errp) APICCommonState *s = APIC_COMMON(dev); APICCommonClass *info; static DeviceState *vapic; - int instance_id = s->id; + uint32_t instance_id = s->initial_apic_id; + + /* Normally initial APIC ID should be no more than hundreds */ + assert(instance_id != VMSTATE_INSTANCE_ID_ANY); info = APIC_COMMON_GET_CLASS(s); info->realize(dev, errp); @@ -284,7 +287,7 @@ static void apic_common_realize(DeviceState *dev, Error **errp) } if (s->legacy_instance_id) { - instance_id = -1; + instance_id = VMSTATE_INSTANCE_ID_ANY; } vmstate_register_with_alias_id(NULL, instance_id, &vmstate_apic_common, s, -1, 0, NULL); diff --git a/hw/misc/max111x.c b/hw/misc/max111x.c index 211008ce02..2b87bdee5b 100644 --- a/hw/misc/max111x.c +++ b/hw/misc/max111x.c @@ -146,7 +146,8 @@ static int max111x_init(SSISlave *d, int inputs) s->input[7] = 0x80; s->com = 0; - vmstate_register(VMSTATE_IF(dev), -1, &vmstate_max111x, s); + vmstate_register(VMSTATE_IF(dev), VMSTATE_INSTANCE_ID_ANY, + &vmstate_max111x, s); return 0; } diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c index cc71a7a036..6cc97769d9 100644 --- a/hw/net/eepro100.c +++ b/hw/net/eepro100.c @@ -1874,7 +1874,8 @@ static void e100_nic_realize(PCIDevice *pci_dev, Error **errp) s->vmstate = g_memdup(&vmstate_eepro100, sizeof(vmstate_eepro100)); s->vmstate->name = qemu_get_queue(s->nic)->model; - vmstate_register(VMSTATE_IF(&pci_dev->qdev), -1, s->vmstate, s); + vmstate_register(VMSTATE_IF(&pci_dev->qdev), VMSTATE_INSTANCE_ID_ANY, + s->vmstate, s); } static void eepro100_instance_init(Object *obj) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index e3d310365d..3ac7961451 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -122,7 +122,7 @@ static void pci_bus_realize(BusState *qbus, Error **errp) bus->machine_done.notify = pcibus_machine_done; qemu_add_machine_init_done_notifier(&bus->machine_done); - vmstate_register(NULL, -1, &vmstate_pcibus, bus); + vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_pcibus, bus); } static void pcie_bus_realize(BusState *qbus, Error **errp) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 30a5fbd3be..02cf53fc5b 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2948,7 +2948,7 @@ static void spapr_machine_init(MachineState *machine) * interface, this is a legacy from the sPAPREnvironment structure * which predated MachineState but had a similar function */ vmstate_register(NULL, 0, &vmstate_spapr, spapr); - register_savevm_live("spapr/htab", -1, 1, + register_savevm_live("spapr/htab", VMSTATE_INSTANCE_ID_ANY, 1, &savevm_htab_handlers, spapr); qbus_set_hotplug_handler(sysbus_get_default(), OBJECT(machine), diff --git a/hw/timer/arm_timer.c b/hw/timer/arm_timer.c index af524fabf7..beaa285685 100644 --- a/hw/timer/arm_timer.c +++ b/hw/timer/arm_timer.c @@ -180,7 +180,7 @@ static arm_timer_state *arm_timer_init(uint32_t freq) s->control = TIMER_CTRL_IE; s->timer = ptimer_init(arm_timer_tick, s, PTIMER_POLICY_DEFAULT); - vmstate_register(NULL, -1, &vmstate_arm_timer, s); + vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_arm_timer, s); return s; } diff --git a/hw/tpm/tpm_emulator.c b/hw/tpm/tpm_emulator.c index 10d587ed40..3a0fc442f3 100644 --- a/hw/tpm/tpm_emulator.c +++ b/hw/tpm/tpm_emulator.c @@ -914,7 +914,8 @@ static void tpm_emulator_inst_init(Object *obj) tpm_emu->cur_locty_number = ~0; qemu_mutex_init(&tpm_emu->mutex); - vmstate_register(NULL, -1, &vmstate_tpm_emulator, obj); + vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, + &vmstate_tpm_emulator, obj); } /* diff --git a/include/migration/register.h b/include/migration/register.h index 00c38ebe9f..c1dcff0f90 100644 --- a/include/migration/register.h +++ b/include/migration/register.h @@ -71,7 +71,7 @@ typedef struct SaveVMHandlers { } SaveVMHandlers; int register_savevm_live(const char *idstr, - int instance_id, + uint32_t instance_id, int version_id, const SaveVMHandlers *ops, void *opaque); diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index 4aef72c426..30667631bc 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -229,6 +229,7 @@ extern const VMStateInfo vmstate_info_tmp; extern const VMStateInfo vmstate_info_bitmap; extern const VMStateInfo vmstate_info_qtailq; extern const VMStateInfo vmstate_info_gtree; +extern const VMStateInfo vmstate_info_qlist; #define type_check_2darray(t1,t2,n,m) ((t1(*)[n][m])0 - (t2*)0) /* @@ -798,6 +799,26 @@ extern const VMStateInfo vmstate_info_gtree; .offset = offsetof(_state, _field), \ } +/* + * For migrating a QLIST + * Target QLIST needs be properly initialized. + * _type: type of QLIST element + * _next: name of QLIST_ENTRY entry field in QLIST element + * _vmsd: VMSD for QLIST element + * size: size of QLIST element + * start: offset of QLIST_ENTRY in QTAILQ element + */ +#define VMSTATE_QLIST_V(_field, _state, _version, _vmsd, _type, _next) \ +{ \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .vmsd = &(_vmsd), \ + .size = sizeof(_type), \ + .info = &vmstate_info_qlist, \ + .offset = offsetof(_state, _field), \ + .start = offsetof(_type, _next), \ +} + /* _f : field name _f_n : num of elements field_name _n : num of elements @@ -1157,8 +1178,10 @@ int vmstate_save_state_v(QEMUFile *f, const VMStateDescription *vmsd, bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque); +#define VMSTATE_INSTANCE_ID_ANY -1 + /* Returns: 0 on success, -1 on failure */ -int vmstate_register_with_alias_id(VMStateIf *obj, int instance_id, +int vmstate_register_with_alias_id(VMStateIf *obj, uint32_t instance_id, const VMStateDescription *vmsd, void *base, int alias_id, int required_for_version, diff --git a/include/qemu/queue.h b/include/qemu/queue.h index 4764d93ea3..4d4554a7ce 100644 --- a/include/qemu/queue.h +++ b/include/qemu/queue.h @@ -501,4 +501,43 @@ union { \ QTAILQ_RAW_TQH_CIRC(head)->tql_prev = QTAILQ_RAW_TQE_CIRC(elm, entry); \ } while (/*CONSTCOND*/0) +#define QLIST_RAW_FIRST(head) \ + field_at_offset(head, 0, void *) + +#define QLIST_RAW_NEXT(elm, entry) \ + field_at_offset(elm, entry, void *) + +#define QLIST_RAW_PREVIOUS(elm, entry) \ + field_at_offset(elm, entry + sizeof(void *), void *) + +#define QLIST_RAW_FOREACH(elm, head, entry) \ + for ((elm) = *QLIST_RAW_FIRST(head); \ + (elm); \ + (elm) = *QLIST_RAW_NEXT(elm, entry)) + +#define QLIST_RAW_INSERT_HEAD(head, elm, entry) do { \ + void *first = *QLIST_RAW_FIRST(head); \ + *QLIST_RAW_FIRST(head) = elm; \ + *QLIST_RAW_PREVIOUS(elm, entry) = QLIST_RAW_FIRST(head); \ + if (first) { \ + *QLIST_RAW_NEXT(elm, entry) = first; \ + *QLIST_RAW_PREVIOUS(first, entry) = QLIST_RAW_NEXT(elm, entry); \ + } else { \ + *QLIST_RAW_NEXT(elm, entry) = NULL; \ + } \ +} while (0) + +#define QLIST_RAW_REVERSE(head, elm, entry) do { \ + void *iter = *QLIST_RAW_FIRST(head), *prev = NULL, *next; \ + while (iter) { \ + next = *QLIST_RAW_NEXT(iter, entry); \ + *QLIST_RAW_PREVIOUS(iter, entry) = QLIST_RAW_NEXT(next, entry); \ + *QLIST_RAW_NEXT(iter, entry) = prev; \ + prev = iter; \ + iter = next; \ + } \ + *QLIST_RAW_FIRST(head) = prev; \ + *QLIST_RAW_PREVIOUS(prev, entry) = QLIST_RAW_FIRST(head); \ +} while (0) + #endif /* QEMU_SYS_QUEUE_H */ diff --git a/migration/migration.c b/migration/migration.c index 354ad072fa..990bff00c0 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1005,17 +1005,6 @@ static bool migrate_caps_check(bool *cap_list, #endif if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { - if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) { - /* The decompression threads asynchronously write into RAM - * rather than use the atomic copies needed to avoid - * userfaulting. It should be possible to fix the decompression - * threads for compatibility in future. - */ - error_setg(errp, "Postcopy is not currently compatible " - "with compression"); - return false; - } - /* This check is reasonably expensive, so only when it's being * set the first time, also it's only the destination that needs * special support. @@ -1784,6 +1773,7 @@ void qmp_migrate_incoming(const char *uri, Error **errp) } if (!once) { error_setg(errp, "The incoming migration has already been started"); + return; } qemu_start_incoming_migration(uri, &local_err); @@ -2035,11 +2025,10 @@ void qmp_migrate_set_downtime(double value, Error **errp) } value *= 1000; /* Convert to milliseconds */ - value = MAX(0, MIN(INT64_MAX, value)); MigrateSetParameters p = { .has_downtime_limit = true, - .downtime_limit = value, + .downtime_limit = (int64_t)value, }; qmp_migrate_set_parameters(&p, errp); @@ -3224,6 +3213,37 @@ void migration_consume_urgent_request(void) qemu_sem_wait(&migrate_get_current()->rate_limit_sem); } +/* Returns true if the rate limiting was broken by an urgent request */ +bool migration_rate_limit(void) +{ + int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + MigrationState *s = migrate_get_current(); + + bool urgent = false; + migration_update_counters(s, now); + if (qemu_file_rate_limit(s->to_dst_file)) { + /* + * Wait for a delay to do rate limiting OR + * something urgent to post the semaphore. + */ + int ms = s->iteration_start_time + BUFFER_DELAY - now; + trace_migration_rate_limit_pre(ms); + if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) { + /* + * We were woken by one or more urgent things but + * the timedwait will have consumed one of them. + * The service routine for the urgent wake will dec + * the semaphore itself for each item it consumes, + * so add this one we just eat back. + */ + qemu_sem_post(&s->rate_limit_sem); + urgent = true; + } + trace_migration_rate_limit_post(urgent); + } + return urgent; +} + /* * Master migration thread on the source VM. * It drives the migration and pumps the data down the outgoing channel. @@ -3290,8 +3310,6 @@ static void *migration_thread(void *opaque) trace_migration_thread_setup_complete(); while (migration_is_active(s)) { - int64_t current_time; - if (urgent || !qemu_file_rate_limit(s->to_dst_file)) { MigIterateState iter_state = migration_iteration_run(s); if (iter_state == MIG_ITERATE_SKIP) { @@ -3318,29 +3336,7 @@ static void *migration_thread(void *opaque) update_iteration_initial_status(s); } - current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); - - migration_update_counters(s, current_time); - - urgent = false; - if (qemu_file_rate_limit(s->to_dst_file)) { - /* Wait for a delay to do rate limiting OR - * something urgent to post the semaphore. - */ - int ms = s->iteration_start_time + BUFFER_DELAY - current_time; - trace_migration_thread_ratelimit_pre(ms); - if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) { - /* We were worken by one or more urgent things but - * the timedwait will have consumed one of them. - * The service routine for the urgent wake will dec - * the semaphore itself for each item it consumes, - * so add this one we just eat back. - */ - qemu_sem_post(&s->rate_limit_sem); - urgent = true; - } - trace_migration_thread_ratelimit_post(urgent); - } + urgent = migration_rate_limit(); } trace_migration_thread_after_loop(); diff --git a/migration/migration.h b/migration/migration.h index 79b3dda146..aa9ff6f27b 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -341,5 +341,6 @@ int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); void migration_make_urgent_request(void); void migration_consume_urgent_request(void); +bool migration_rate_limit(void); #endif diff --git a/migration/ram.c b/migration/ram.c index 96feb4062c..d2208b5534 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -703,7 +703,7 @@ typedef struct { static int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp) { - MultiFDInit_t msg; + MultiFDInit_t msg = {}; int ret; msg.magic = cpu_to_be32(MULTIFD_MAGIC); @@ -803,7 +803,10 @@ static void multifd_send_fill_packet(MultiFDSendParams *p) } for (i = 0; i < p->pages->used; i++) { - packet->offset[i] = cpu_to_be64(p->pages->offset[i]); + /* there are architectures where ram_addr_t is 32 bit */ + uint64_t temp = p->pages->offset[i]; + + packet->offset[i] = cpu_to_be64(temp); } } @@ -877,10 +880,10 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) } for (i = 0; i < p->pages->used; i++) { - ram_addr_t offset = be64_to_cpu(packet->offset[i]); + uint64_t offset = be64_to_cpu(packet->offset[i]); if (offset > (block->used_length - TARGET_PAGE_SIZE)) { - error_setg(errp, "multifd: offset too long " RAM_ADDR_FMT + error_setg(errp, "multifd: offset too long %" PRIu64 " (max " RAM_ADDR_FMT ")", offset, block->max_length); return -1; @@ -900,6 +903,12 @@ struct { uint64_t packet_num; /* send channels ready */ QemuSemaphore channels_ready; + /* + * Have we already run terminate threads. There is a race when it + * happens that we got one error while we are exiting. + * We will use atomic operations. Only valid values are 0 and 1. + */ + int exiting; } *multifd_send_state; /* @@ -928,6 +937,10 @@ static int multifd_send_pages(RAMState *rs) MultiFDPages_t *pages = multifd_send_state->pages; uint64_t transferred; + if (atomic_read(&multifd_send_state->exiting)) { + return -1; + } + qemu_sem_wait(&multifd_send_state->channels_ready); for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) { p = &multifd_send_state->params[i]; @@ -945,10 +958,10 @@ static int multifd_send_pages(RAMState *rs) } qemu_mutex_unlock(&p->mutex); } - p->pages->used = 0; + assert(!p->pages->used); + assert(!p->pages->block); p->packet_num = multifd_send_state->packet_num++; - p->pages->block = NULL; multifd_send_state->pages = p->pages; p->pages = pages; transferred = ((uint64_t) pages->used) * TARGET_PAGE_SIZE + p->packet_len; @@ -1009,6 +1022,16 @@ static void multifd_send_terminate_threads(Error *err) } } + /* + * We don't want to exit each threads twice. Depending on where + * we get the error, or if there are two independent errors in two + * threads at the same time, we can end calling this function + * twice. + */ + if (atomic_xchg(&multifd_send_state->exiting, 1)) { + return; + } + for (i = 0; i < migrate_multifd_channels(); i++) { MultiFDSendParams *p = &multifd_send_state->params[i]; @@ -1033,6 +1056,10 @@ void multifd_save_cleanup(void) if (p->running) { qemu_thread_join(&p->thread); } + } + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + socket_send_channel_destroy(p->c); p->c = NULL; qemu_mutex_destroy(&p->mutex); @@ -1118,6 +1145,10 @@ static void *multifd_send_thread(void *opaque) while (true) { qemu_sem_wait(&p->sem); + + if (atomic_read(&multifd_send_state->exiting)) { + break; + } qemu_mutex_lock(&p->mutex); if (p->pending_job) { @@ -1130,6 +1161,8 @@ static void *multifd_send_thread(void *opaque) p->flags = 0; p->num_packets++; p->num_pages += used; + p->pages->used = 0; + p->pages->block = NULL; qemu_mutex_unlock(&p->mutex); trace_multifd_send(p->id, packet_num, used, flags, @@ -1224,6 +1257,7 @@ int multifd_save_setup(void) multifd_send_state->params = g_new0(MultiFDSendParams, thread_count); multifd_send_state->pages = multifd_pages_init(page_count); qemu_sem_init(&multifd_send_state->channels_ready, 0); + atomic_set(&multifd_send_state->exiting, 0); for (i = 0; i < thread_count; i++) { MultiFDSendParams *p = &multifd_send_state->params[i]; @@ -1236,7 +1270,7 @@ int multifd_save_setup(void) p->id = i; p->pages = multifd_pages_init(page_count); p->packet_len = sizeof(MultiFDPacket_t) - + sizeof(ram_addr_t) * page_count; + + sizeof(uint64_t) * page_count; p->packet = g_malloc0(p->packet_len); p->packet->magic = cpu_to_be32(MULTIFD_MAGIC); p->packet->version = cpu_to_be32(MULTIFD_VERSION); @@ -1281,7 +1315,9 @@ static void multifd_recv_terminate_threads(Error *err) - normal quit, i.e. everything went fine, just finished - error quit: We close the channels so the channel threads finish the qio_channel_read_all_eof() */ - qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); + if (p->c) { + qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); + } qemu_mutex_unlock(&p->mutex); } } @@ -1307,6 +1343,10 @@ int multifd_load_cleanup(Error **errp) qemu_sem_post(&p->sem_sync); qemu_thread_join(&p->thread); } + } + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDRecvParams *p = &multifd_recv_state->params[i]; + object_unref(OBJECT(p->c)); p->c = NULL; qemu_mutex_destroy(&p->mutex); @@ -1447,7 +1487,7 @@ int multifd_load_setup(void) p->id = i; p->pages = multifd_pages_init(page_count); p->packet_len = sizeof(MultiFDPacket_t) - + sizeof(ram_addr_t) * page_count; + + sizeof(uint64_t) * page_count; p->packet = g_malloc0(p->packet_len); p->name = g_strdup_printf("multifdrecv_%d", i); } @@ -1731,7 +1771,7 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs, if (rb->clear_bmap && clear_bmap_test_and_clear(rb, page)) { uint8_t shift = rb->clear_bmap_shift; hwaddr size = 1ULL << (TARGET_PAGE_BITS + shift); - hwaddr start = (page << TARGET_PAGE_BITS) & (-size); + hwaddr start = (((ram_addr_t)page) << TARGET_PAGE_BITS) & (-size); /* * CLEAR_BITMAP_SHIFT_MIN should always guarantee this... this @@ -1968,7 +2008,7 @@ static void ram_release_pages(const char *rbname, uint64_t offset, int pages) return; } - ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS); + ram_discard_range(rbname, offset, ((ram_addr_t)pages) << TARGET_PAGE_BITS); } /* @@ -2056,7 +2096,7 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage) uint8_t *p; bool send_async = true; RAMBlock *block = pss->block; - ram_addr_t offset = pss->page << TARGET_PAGE_BITS; + ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS; ram_addr_t current_addr = block->offset + offset; p = block->host + offset; @@ -2243,7 +2283,8 @@ static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again) *again = false; return false; } - if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) { + if ((((ram_addr_t)pss->page) << TARGET_PAGE_BITS) + >= pss->block->used_length) { /* Didn't find anything in this RAM Block */ pss->page = 0; pss->block = QLIST_NEXT_RCU(pss->block, next); @@ -2434,7 +2475,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) * it's the 1st request. */ error_report("ram_save_queue_pages no previous block"); - goto err; + return -1; } } else { ramblock = qemu_ram_block_by_name(rbname); @@ -2442,7 +2483,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) if (!ramblock) { /* We shouldn't be asked for a non-existent RAMBlock */ error_report("ram_save_queue_pages no block '%s'", rbname); - goto err; + return -1; } rs->last_req_rb = ramblock; } @@ -2451,7 +2492,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) error_report("%s request overrun start=" RAM_ADDR_FMT " len=" RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT, __func__, start, len, ramblock->used_length); - goto err; + return -1; } struct RAMSrcPageRequest *new_entry = @@ -2467,9 +2508,6 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) qemu_mutex_unlock(&rs->src_page_req_mutex); return 0; - -err: - return -1; } static bool save_page_use_compression(RAMState *rs) @@ -2537,7 +2575,7 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss, bool last_stage) { RAMBlock *block = pss->block; - ram_addr_t offset = pss->page << TARGET_PAGE_BITS; + ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS; int res; if (control_save_page(rs, block, offset, &res)) { @@ -2563,10 +2601,13 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss, } /* - * do not use multifd for compression as the first page in the new - * block should be posted out before sending the compressed page + * Do not use multifd for: + * 1. Compression as the first page in the new block should be posted out + * before sending the compressed page + * 2. In postcopy as one whole host page should be placed */ - if (!save_page_use_compression(rs) && migrate_use_multifd()) { + if (!save_page_use_compression(rs) && migrate_use_multifd() + && !migration_in_postcopy()) { return ram_save_multifd_page(rs, block, offset); } @@ -2617,8 +2658,11 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, pages += tmppages; pss->page++; + /* Allow rate limiting to happen in the middle of huge pages */ + migration_rate_limit(); } while ((pss->page & (pagesize_bits - 1)) && - offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS)); + offset_in_ramblock(pss->block, + ((ram_addr_t)pss->page) << TARGET_PAGE_BITS)); /* The offset we leave with is the last one we looked at */ pss->page--; @@ -2835,8 +2879,10 @@ void ram_postcopy_migrated_memory_release(MigrationState *ms) while (run_start < range) { unsigned long run_end = find_next_bit(bitmap, range, run_start + 1); - ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS, - (run_end - run_start) << TARGET_PAGE_BITS); + ram_discard_range(block->idstr, + ((ram_addr_t)run_start) << TARGET_PAGE_BITS, + ((ram_addr_t)(run_end - run_start)) + << TARGET_PAGE_BITS); run_start = find_next_zero_bit(bitmap, range, run_end + 1); } } @@ -3072,8 +3118,6 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms) */ int ram_discard_range(const char *rbname, uint64_t start, size_t length) { - int ret = -1; - trace_ram_discard_range(rbname, start, length); RCU_READ_LOCK_GUARD(); @@ -3081,7 +3125,7 @@ int ram_discard_range(const char *rbname, uint64_t start, size_t length) if (!rb) { error_report("ram_discard_range: Failed to find block '%s'", rbname); - goto err; + return -1; } /* @@ -3093,10 +3137,7 @@ int ram_discard_range(const char *rbname, uint64_t start, size_t length) length >> qemu_target_page_bits()); } - ret = ram_block_discard_range(rb, start, length); - -err: - return ret; + return ram_block_discard_range(rb, start, length); } /* @@ -3451,6 +3492,14 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) rs->target_page_count += pages; /* + * During postcopy, it is necessary to make sure one whole host + * page is sent in one chunk. + */ + if (migrate_postcopy_ram()) { + flush_compressed_data(rs); + } + + /* * we want to check in the 1st loop, just in case it was the 1st * time and we had to sync the dirty bitmap. * qemu_clock_get_ns() is a bit expensive, so we only check each @@ -4031,8 +4080,9 @@ static int ram_load_postcopy(QEMUFile *f) MigrationIncomingState *mis = migration_incoming_get_current(); /* Temporary page that is later 'placed' */ void *postcopy_host_page = mis->postcopy_tmp_page; - void *last_host = NULL; + void *this_host = NULL; bool all_zero = false; + int target_pages = 0; while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { ram_addr_t addr; @@ -4041,6 +4091,7 @@ static int ram_load_postcopy(QEMUFile *f) void *place_source = NULL; RAMBlock *block = NULL; uint8_t ch; + int len; addr = qemu_get_be64(f); @@ -4058,7 +4109,8 @@ static int ram_load_postcopy(QEMUFile *f) trace_ram_load_postcopy_loop((uint64_t)addr, flags); place_needed = false; - if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) { + if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE | + RAM_SAVE_FLAG_COMPRESS_PAGE)) { block = ram_block_from_stream(f, flags); host = host_from_ram_block_offset(block, addr); @@ -4067,6 +4119,7 @@ static int ram_load_postcopy(QEMUFile *f) ret = -EINVAL; break; } + target_pages++; matches_target_page_size = block->page_size == TARGET_PAGE_SIZE; /* * Postcopy requires that we place whole host pages atomically; @@ -4076,38 +4129,47 @@ static int ram_load_postcopy(QEMUFile *f) * that's moved into place later. * The migration protocol uses, possibly smaller, target-pages * however the source ensures it always sends all the components - * of a host page in order. + * of a host page in one chunk. */ page_buffer = postcopy_host_page + ((uintptr_t)host & (block->page_size - 1)); /* If all TP are zero then we can optimise the place */ - if (!((uintptr_t)host & (block->page_size - 1))) { + if (target_pages == 1) { all_zero = true; + this_host = (void *)QEMU_ALIGN_DOWN((uintptr_t)host, + block->page_size); } else { /* not the 1st TP within the HP */ - if (host != (last_host + TARGET_PAGE_SIZE)) { - error_report("Non-sequential target page %p/%p", - host, last_host); + if (QEMU_ALIGN_DOWN((uintptr_t)host, block->page_size) != + (uintptr_t)this_host) { + error_report("Non-same host page %p/%p", + host, this_host); ret = -EINVAL; break; } } - /* * If it's the last part of a host page then we place the host * page */ - place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) & - (block->page_size - 1)) == 0; + if (target_pages == (block->page_size / TARGET_PAGE_SIZE)) { + place_needed = true; + target_pages = 0; + } place_source = postcopy_host_page; } - last_host = host; switch (flags & ~RAM_SAVE_FLAG_CONTINUE) { case RAM_SAVE_FLAG_ZERO: ch = qemu_get_byte(f); - memset(page_buffer, ch, TARGET_PAGE_SIZE); + /* + * Can skip to set page_buffer when + * this is a zero page and (block->page_size == TARGET_PAGE_SIZE). + */ + if (ch || !matches_target_page_size) { + memset(page_buffer, ch, TARGET_PAGE_SIZE); + } if (ch) { all_zero = false; } @@ -4131,6 +4193,17 @@ static int ram_load_postcopy(QEMUFile *f) TARGET_PAGE_SIZE); } break; + case RAM_SAVE_FLAG_COMPRESS_PAGE: + all_zero = false; + len = qemu_get_be32(f); + if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) { + error_report("Invalid compressed data length: %d", len); + ret = -EINVAL; + break; + } + decompress_data_with_multi_threads(f, page_buffer, len); + break; + case RAM_SAVE_FLAG_EOS: /* normal exit */ multifd_recv_sync_main(); @@ -4142,6 +4215,11 @@ static int ram_load_postcopy(QEMUFile *f) break; } + /* Got the whole host page, wait for decompress before placing. */ + if (place_needed) { + ret |= wait_for_decompress_done(); + } + /* Detect for any possible file errors */ if (!ret && qemu_file_get_error(f)) { ret = qemu_file_get_error(f); @@ -4149,7 +4227,8 @@ static int ram_load_postcopy(QEMUFile *f) if (!ret && place_needed) { /* This gets called at the last target page in the host page */ - void *place_dest = host + TARGET_PAGE_SIZE - block->page_size; + void *place_dest = (void *)QEMU_ALIGN_DOWN((uintptr_t)host, + block->page_size); if (all_zero) { ret = postcopy_place_page_zero(mis, place_dest, @@ -4201,13 +4280,16 @@ static void colo_flush_ram_cache(void) while (block) { offset = migration_bitmap_find_dirty(ram_state, block, offset); - if (offset << TARGET_PAGE_BITS >= block->used_length) { + if (((ram_addr_t)offset) << TARGET_PAGE_BITS + >= block->used_length) { offset = 0; block = QLIST_NEXT_RCU(block, next); } else { migration_bitmap_clear_dirty(ram_state, block, offset); - dst_host = block->host + (offset << TARGET_PAGE_BITS); - src_host = block->colo_cache + (offset << TARGET_PAGE_BITS); + dst_host = block->host + + (((ram_addr_t)offset) << TARGET_PAGE_BITS); + src_host = block->colo_cache + + (((ram_addr_t)offset) << TARGET_PAGE_BITS); memcpy(dst_host, src_host, TARGET_PAGE_SIZE); } } @@ -4227,7 +4309,7 @@ static void colo_flush_ram_cache(void) */ static int ram_load_precopy(QEMUFile *f) { - int flags = 0, ret = 0, invalid_flags = 0, len = 0; + int flags = 0, ret = 0, invalid_flags = 0, len = 0, i = 0; /* ADVISE is earlier, it shows the source has the postcopy capability on */ bool postcopy_advised = postcopy_is_advised(); if (!migrate_use_compression()) { @@ -4239,6 +4321,17 @@ static int ram_load_precopy(QEMUFile *f) void *host = NULL; uint8_t ch; + /* + * Yield periodically to let main loop run, but an iteration of + * the main loop is expensive, so do it each some iterations + */ + if ((i & 32767) == 0 && qemu_in_coroutine()) { + aio_co_schedule(qemu_get_current_aio_context(), + qemu_coroutine_self()); + qemu_coroutine_yield(); + } + i++; + addr = qemu_get_be64(f); flags = addr & ~TARGET_PAGE_MASK; addr &= TARGET_PAGE_MASK; @@ -4385,6 +4478,7 @@ static int ram_load_precopy(QEMUFile *f) } } + ret |= wait_for_decompress_done(); return ret; } @@ -4416,8 +4510,6 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) } else { ret = ram_load_precopy(f); } - - ret |= wait_for_decompress_done(); } trace_ram_load_complete(ret, seq_iter); diff --git a/migration/savevm.c b/migration/savevm.c index 59efc1981d..adfdca26ac 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -233,7 +233,7 @@ typedef struct CompatEntry { typedef struct SaveStateEntry { QTAILQ_ENTRY(SaveStateEntry) entry; char idstr[256]; - int instance_id; + uint32_t instance_id; int alias_id; int version_id; /* version id read from the stream */ @@ -250,6 +250,7 @@ typedef struct SaveStateEntry { typedef struct SaveState { QTAILQ_HEAD(, SaveStateEntry) handlers; + SaveStateEntry *handler_pri_head[MIG_PRI_MAX + 1]; int global_section_id; uint32_t len; const char *name; @@ -261,6 +262,7 @@ typedef struct SaveState { static SaveState savevm_state = { .handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers), + .handler_pri_head = { [MIG_PRI_DEFAULT ... MIG_PRI_MAX] = NULL }, .global_section_id = 0, }; @@ -665,10 +667,10 @@ void dump_vmstate_json_to_file(FILE *out_file) fclose(out_file); } -static int calculate_new_instance_id(const char *idstr) +static uint32_t calculate_new_instance_id(const char *idstr) { SaveStateEntry *se; - int instance_id = 0; + uint32_t instance_id = 0; QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { if (strcmp(idstr, se->idstr) == 0 @@ -676,6 +678,8 @@ static int calculate_new_instance_id(const char *idstr) instance_id = se->instance_id + 1; } } + /* Make sure we never loop over without being noticed */ + assert(instance_id != VMSTATE_INSTANCE_ID_ANY); return instance_id; } @@ -709,20 +713,43 @@ static void savevm_state_handler_insert(SaveStateEntry *nse) { MigrationPriority priority = save_state_priority(nse); SaveStateEntry *se; + int i; assert(priority <= MIG_PRI_MAX); - QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { - if (save_state_priority(se) < priority) { + for (i = priority - 1; i >= 0; i--) { + se = savevm_state.handler_pri_head[i]; + if (se != NULL) { + assert(save_state_priority(se) < priority); break; } } - if (se) { + if (i >= 0) { QTAILQ_INSERT_BEFORE(se, nse, entry); } else { QTAILQ_INSERT_TAIL(&savevm_state.handlers, nse, entry); } + + if (savevm_state.handler_pri_head[priority] == NULL) { + savevm_state.handler_pri_head[priority] = nse; + } +} + +static void savevm_state_handler_remove(SaveStateEntry *se) +{ + SaveStateEntry *next; + MigrationPriority priority = save_state_priority(se); + + if (se == savevm_state.handler_pri_head[priority]) { + next = QTAILQ_NEXT(se, entry); + if (next != NULL && save_state_priority(next) == priority) { + savevm_state.handler_pri_head[priority] = next; + } else { + savevm_state.handler_pri_head[priority] = NULL; + } + } + QTAILQ_REMOVE(&savevm_state.handlers, se, entry); } /* TODO: Individual devices generally have very little idea about the rest @@ -730,7 +757,7 @@ static void savevm_state_handler_insert(SaveStateEntry *nse) Meanwhile pass -1 as instance_id if you do not already have a clearly distinguishing id for all instances of your device class. */ int register_savevm_live(const char *idstr, - int instance_id, + uint32_t instance_id, int version_id, const SaveVMHandlers *ops, void *opaque) @@ -750,7 +777,7 @@ int register_savevm_live(const char *idstr, pstrcat(se->idstr, sizeof(se->idstr), idstr); - if (instance_id == -1) { + if (instance_id == VMSTATE_INSTANCE_ID_ANY) { se->instance_id = calculate_new_instance_id(se->idstr); } else { se->instance_id = instance_id; @@ -777,14 +804,14 @@ void unregister_savevm(VMStateIf *obj, const char *idstr, void *opaque) QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) { if (strcmp(se->idstr, id) == 0 && se->opaque == opaque) { - QTAILQ_REMOVE(&savevm_state.handlers, se, entry); + savevm_state_handler_remove(se); g_free(se->compat); g_free(se); } } } -int vmstate_register_with_alias_id(VMStateIf *obj, int instance_id, +int vmstate_register_with_alias_id(VMStateIf *obj, uint32_t instance_id, const VMStateDescription *vmsd, void *opaque, int alias_id, int required_for_version, @@ -817,14 +844,14 @@ int vmstate_register_with_alias_id(VMStateIf *obj, int instance_id, se->compat = g_new0(CompatEntry, 1); pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name); - se->compat->instance_id = instance_id == -1 ? + se->compat->instance_id = instance_id == VMSTATE_INSTANCE_ID_ANY ? calculate_compat_instance_id(vmsd->name) : instance_id; - instance_id = -1; + instance_id = VMSTATE_INSTANCE_ID_ANY; } } pstrcat(se->idstr, sizeof(se->idstr), vmsd->name); - if (instance_id == -1) { + if (instance_id == VMSTATE_INSTANCE_ID_ANY) { se->instance_id = calculate_new_instance_id(se->idstr); } else { se->instance_id = instance_id; @@ -841,7 +868,7 @@ void vmstate_unregister(VMStateIf *obj, const VMStateDescription *vmsd, QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) { if (se->vmsd == vmsd && se->opaque == opaque) { - QTAILQ_REMOVE(&savevm_state.handlers, se, entry); + savevm_state_handler_remove(se); g_free(se->compat); g_free(se); } @@ -1600,7 +1627,7 @@ int qemu_save_device_state(QEMUFile *f) return qemu_file_get_error(f); } -static SaveStateEntry *find_se(const char *idstr, int instance_id) +static SaveStateEntry *find_se(const char *idstr, uint32_t instance_id) { SaveStateEntry *se; @@ -2267,7 +2294,7 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis) /* Find savevm section */ se = find_se(idstr, instance_id); if (se == NULL) { - error_report("Unknown savevm section or instance '%s' %d. " + error_report("Unknown savevm section or instance '%s' %"PRIu32". " "Make sure that your current VM setup matches your " "saved VM setup, including any hotplugged devices", idstr, instance_id); @@ -2291,7 +2318,7 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis) ret = vmstate_load(f, se); if (ret < 0) { - error_report("error while loading state for instance 0x%x of" + error_report("error while loading state for instance 0x%"PRIx32" of" " device '%s'", instance_id, idstr); return ret; } diff --git a/migration/trace-events b/migration/trace-events index 6dee7b5389..4ab0a503d2 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -76,6 +76,11 @@ get_gtree_end(const char *field_name, const char *key_vmsd_name, const char *val put_gtree(const char *field_name, const char *key_vmsd_name, const char *val_vmsd_name, uint32_t nnodes) "%s(%s/%s) nnodes=%d" put_gtree_end(const char *field_name, const char *key_vmsd_name, const char *val_vmsd_name, int ret) "%s(%s/%s) %d" +get_qlist(const char *field_name, const char *vmsd_name, int version_id) "%s(%s v%d)" +get_qlist_end(const char *field_name, const char *vmsd_name) "%s(%s)" +put_qlist(const char *field_name, const char *vmsd_name, int version_id) "%s(%s v%d)" +put_qlist_end(const char *field_name, const char *vmsd_name) "%s(%s)" + # qemu-file.c qemu_file_fclose(void) "" @@ -138,12 +143,12 @@ migrate_send_rp_recv_bitmap(char *name, int64_t size) "block '%s' size 0x%"PRIi6 migration_completion_file_err(void) "" migration_completion_postcopy_end(void) "" migration_completion_postcopy_end_after_complete(void) "" +migration_rate_limit_pre(int ms) "%d ms" +migration_rate_limit_post(int urgent) "urgent: %d" migration_return_path_end_before(void) "" migration_return_path_end_after(int rp_error) "%d" migration_thread_after_loop(void) "" migration_thread_file_err(void) "" -migration_thread_ratelimit_pre(int ms) "%d ms" -migration_thread_ratelimit_post(int urgent) "urgent: %d" migration_thread_setup_complete(void) "" open_return_path_on_source(void) "" open_return_path_on_source_continue(void) "" diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c index 7236cf92bc..1eee36773a 100644 --- a/migration/vmstate-types.c +++ b/migration/vmstate-types.c @@ -843,3 +843,73 @@ const VMStateInfo vmstate_info_gtree = { .get = get_gtree, .put = put_gtree, }; + +static int put_qlist(QEMUFile *f, void *pv, size_t unused_size, + const VMStateField *field, QJSON *vmdesc) +{ + const VMStateDescription *vmsd = field->vmsd; + /* offset of the QTAILQ entry in a QTAILQ element*/ + size_t entry_offset = field->start; + void *elm; + int ret; + + trace_put_qlist(field->name, vmsd->name, vmsd->version_id); + QLIST_RAW_FOREACH(elm, pv, entry_offset) { + qemu_put_byte(f, true); + ret = vmstate_save_state(f, vmsd, elm, vmdesc); + if (ret) { + error_report("%s: failed to save %s (%d)", field->name, + vmsd->name, ret); + return ret; + } + } + qemu_put_byte(f, false); + trace_put_qlist_end(field->name, vmsd->name); + + return 0; +} + +static int get_qlist(QEMUFile *f, void *pv, size_t unused_size, + const VMStateField *field) +{ + int ret = 0; + const VMStateDescription *vmsd = field->vmsd; + /* size of a QLIST element */ + size_t size = field->size; + /* offset of the QLIST entry in a QLIST element */ + size_t entry_offset = field->start; + int version_id = field->version_id; + void *elm; + + trace_get_qlist(field->name, vmsd->name, vmsd->version_id); + if (version_id > vmsd->version_id) { + error_report("%s %s", vmsd->name, "too new"); + return -EINVAL; + } + if (version_id < vmsd->minimum_version_id) { + error_report("%s %s", vmsd->name, "too old"); + return -EINVAL; + } + + while (qemu_get_byte(f)) { + elm = g_malloc(size); + ret = vmstate_load_state(f, vmsd, elm, version_id); + if (ret) { + error_report("%s: failed to load %s (%d)", field->name, + vmsd->name, ret); + g_free(elm); + return ret; + } + QLIST_RAW_INSERT_HEAD(pv, elm, entry_offset); + } + QLIST_RAW_REVERSE(pv, elm, entry_offset); + trace_get_qlist_end(field->name, vmsd->name); + + return ret; +} + +const VMStateInfo vmstate_info_qlist = { + .name = "qlist", + .get = get_qlist, + .put = put_qlist, +}; diff --git a/stubs/vmstate.c b/stubs/vmstate.c index 6951d9fdc5..cc4fe41dfc 100644 --- a/stubs/vmstate.c +++ b/stubs/vmstate.c @@ -4,7 +4,7 @@ const VMStateDescription vmstate_dummy = {}; int vmstate_register_with_alias_id(VMStateIf *obj, - int instance_id, + uint32_t instance_id, const VMStateDescription *vmsd, void *base, int alias_id, int required_for_version, diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index 341d190922..26e2e77289 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -356,6 +356,43 @@ static void migrate_set_parameter_int(QTestState *who, const char *parameter, migrate_check_parameter_int(who, parameter, value); } +static char *migrate_get_parameter_str(QTestState *who, + const char *parameter) +{ + QDict *rsp; + char *result; + + rsp = wait_command(who, "{ 'execute': 'query-migrate-parameters' }"); + result = g_strdup(qdict_get_str(rsp, parameter)); + qobject_unref(rsp); + return result; +} + +static void migrate_check_parameter_str(QTestState *who, const char *parameter, + const char *value) +{ + char *result; + + result = migrate_get_parameter_str(who, parameter); + g_assert_cmpstr(result, ==, value); + g_free(result); +} + +__attribute__((unused)) +static void migrate_set_parameter_str(QTestState *who, const char *parameter, + const char *value) +{ + QDict *rsp; + + rsp = qtest_qmp(who, + "{ 'execute': 'migrate-set-parameters'," + "'arguments': { %s: %s } }", + parameter, value); + g_assert(qdict_haskey(rsp, "return")); + qobject_unref(rsp); + migrate_check_parameter_str(who, parameter, value); +} + static void migrate_pause(QTestState *who) { QDict *rsp; @@ -1202,6 +1239,61 @@ static void test_migrate_auto_converge(void) test_migrate_end(from, to, true); } +static void test_multifd_tcp(void) +{ + MigrateStart *args = migrate_start_new(); + QTestState *from, *to; + QDict *rsp; + char *uri; + + if (test_migrate_start(&from, &to, "defer", args)) { + return; + } + + /* + * We want to pick a speed slow enough that the test completes + * quickly, but that it doesn't complete precopy even on a slow + * machine, so also set the downtime. + */ + /* 1 ms should make it not converge*/ + migrate_set_parameter_int(from, "downtime-limit", 1); + /* 1GB/s */ + migrate_set_parameter_int(from, "max-bandwidth", 1000000000); + + migrate_set_parameter_int(from, "multifd-channels", 16); + migrate_set_parameter_int(to, "multifd-channels", 16); + + migrate_set_capability(from, "multifd", "true"); + migrate_set_capability(to, "multifd", "true"); + + /* Start incoming migration from the 1st socket */ + rsp = wait_command(to, "{ 'execute': 'migrate-incoming'," + " 'arguments': { 'uri': 'tcp:127.0.0.1:0' }}"); + qobject_unref(rsp); + + /* Wait for the first serial output from the source */ + wait_for_serial("src_serial"); + + uri = migrate_get_socket_address(to, "socket-address"); + + migrate_qmp(from, uri, "{}"); + + wait_for_migration_pass(from); + + /* 300ms it should converge */ + migrate_set_parameter_int(from, "downtime-limit", 300); + + if (!got_stop) { + qtest_qmp_eventwait(from, "STOP"); + } + qtest_qmp_eventwait(to, "RESUME"); + + wait_for_serial("dest_serial"); + wait_for_migration_complete(from); + test_migrate_end(from, to, true); + free(uri); +} + int main(int argc, char **argv) { char template[] = "/tmp/migration-test-XXXXXX"; @@ -1266,6 +1358,7 @@ int main(int argc, char **argv) test_validate_uuid_dst_not_set); qtest_add_func("/migration/auto_converge", test_migrate_auto_converge); + qtest_add_func("/migration/multifd/tcp", test_multifd_tcp); ret = g_test_run(); diff --git a/tests/test-vmstate.c b/tests/test-vmstate.c index 8f184f3556..cea363dd69 100644 --- a/tests/test-vmstate.c +++ b/tests/test-vmstate.c @@ -926,6 +926,28 @@ static const VMStateDescription vmstate_domain = { } }; +/* test QLIST Migration */ + +typedef struct TestQListElement { + uint32_t id; + QLIST_ENTRY(TestQListElement) next; +} TestQListElement; + +typedef struct TestQListContainer { + uint32_t id; + QLIST_HEAD(, TestQListElement) list; +} TestQListContainer; + +static const VMStateDescription vmstate_qlist_element = { + .name = "test/queue list", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(id, TestQListElement), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_iommu = { .name = "iommu", .version_id = 1, @@ -939,6 +961,18 @@ static const VMStateDescription vmstate_iommu = { } }; +static const VMStateDescription vmstate_container = { + .name = "test/container/qlist", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(id, TestQListContainer), + VMSTATE_QLIST_V(list, TestQListContainer, 1, vmstate_qlist_element, + TestQListElement, next), + VMSTATE_END_OF_LIST() + } +}; + uint8_t first_domain_dump[] = { /* id */ 0x00, 0x0, 0x0, 0x6, @@ -1229,6 +1263,140 @@ static void test_gtree_load_iommu(void) qemu_fclose(fload); } +static uint8_t qlist_dump[] = { + 0x00, 0x00, 0x00, 0x01, /* container id */ + 0x1, /* start of a */ + 0x00, 0x00, 0x00, 0x0a, + 0x1, /* start of b */ + 0x00, 0x00, 0x0b, 0x00, + 0x1, /* start of c */ + 0x00, 0x0c, 0x00, 0x00, + 0x1, /* start of d */ + 0x0d, 0x00, 0x00, 0x00, + 0x0, /* end of list */ + QEMU_VM_EOF, /* just to ensure we won't get EOF reported prematurely */ +}; + +static TestQListContainer *alloc_container(void) +{ + TestQListElement *a = g_malloc(sizeof(TestQListElement)); + TestQListElement *b = g_malloc(sizeof(TestQListElement)); + TestQListElement *c = g_malloc(sizeof(TestQListElement)); + TestQListElement *d = g_malloc(sizeof(TestQListElement)); + TestQListContainer *container = g_malloc(sizeof(TestQListContainer)); + + a->id = 0x0a; + b->id = 0x0b00; + c->id = 0xc0000; + d->id = 0xd000000; + container->id = 1; + + QLIST_INIT(&container->list); + QLIST_INSERT_HEAD(&container->list, d, next); + QLIST_INSERT_HEAD(&container->list, c, next); + QLIST_INSERT_HEAD(&container->list, b, next); + QLIST_INSERT_HEAD(&container->list, a, next); + return container; +} + +static void free_container(TestQListContainer *container) +{ + TestQListElement *iter, *tmp; + + QLIST_FOREACH_SAFE(iter, &container->list, next, tmp) { + QLIST_REMOVE(iter, next); + g_free(iter); + } + g_free(container); +} + +static void compare_containers(TestQListContainer *c1, TestQListContainer *c2) +{ + TestQListElement *first_item_c1, *first_item_c2; + + while (!QLIST_EMPTY(&c1->list)) { + first_item_c1 = QLIST_FIRST(&c1->list); + first_item_c2 = QLIST_FIRST(&c2->list); + assert(first_item_c2); + assert(first_item_c1->id == first_item_c2->id); + QLIST_REMOVE(first_item_c1, next); + QLIST_REMOVE(first_item_c2, next); + g_free(first_item_c1); + g_free(first_item_c2); + } + assert(QLIST_EMPTY(&c2->list)); +} + +/* + * Check the prev & next fields are correct by doing list + * manipulations on the container. We will do that for both + * the source and the destination containers + */ +static void manipulate_container(TestQListContainer *c) +{ + TestQListElement *prev = NULL, *iter = QLIST_FIRST(&c->list); + TestQListElement *elem; + + elem = g_malloc(sizeof(TestQListElement)); + elem->id = 0x12; + QLIST_INSERT_AFTER(iter, elem, next); + + elem = g_malloc(sizeof(TestQListElement)); + elem->id = 0x13; + QLIST_INSERT_HEAD(&c->list, elem, next); + + while (iter) { + prev = iter; + iter = QLIST_NEXT(iter, next); + } + + elem = g_malloc(sizeof(TestQListElement)); + elem->id = 0x14; + QLIST_INSERT_BEFORE(prev, elem, next); + + elem = g_malloc(sizeof(TestQListElement)); + elem->id = 0x15; + QLIST_INSERT_AFTER(prev, elem, next); + + QLIST_REMOVE(prev, next); + g_free(prev); +} + +static void test_save_qlist(void) +{ + TestQListContainer *container = alloc_container(); + + save_vmstate(&vmstate_container, container); + compare_vmstate(qlist_dump, sizeof(qlist_dump)); + free_container(container); +} + +static void test_load_qlist(void) +{ + QEMUFile *fsave, *fload; + TestQListContainer *orig_container = alloc_container(); + TestQListContainer *dest_container = g_malloc0(sizeof(TestQListContainer)); + char eof; + + QLIST_INIT(&dest_container->list); + + fsave = open_test_file(true); + qemu_put_buffer(fsave, qlist_dump, sizeof(qlist_dump)); + g_assert(!qemu_file_get_error(fsave)); + qemu_fclose(fsave); + + fload = open_test_file(false); + vmstate_load_state(fload, &vmstate_container, dest_container, 1); + eof = qemu_get_byte(fload); + g_assert(!qemu_file_get_error(fload)); + g_assert_cmpint(eof, ==, QEMU_VM_EOF); + manipulate_container(orig_container); + manipulate_container(dest_container); + compare_containers(orig_container, dest_container); + free_container(orig_container); + free_container(dest_container); +} + typedef struct TmpTestStruct { TestStruct *parent; int64_t diff; @@ -1353,6 +1521,8 @@ int main(int argc, char **argv) g_test_add_func("/vmstate/gtree/load/loaddomain", test_gtree_load_domain); g_test_add_func("/vmstate/gtree/save/saveiommu", test_gtree_save_iommu); g_test_add_func("/vmstate/gtree/load/loadiommu", test_gtree_load_iommu); + g_test_add_func("/vmstate/qlist/save/saveqlist", test_save_qlist); + g_test_add_func("/vmstate/qlist/load/loadqlist", test_load_qlist); g_test_add_func("/vmstate/tmp_struct", test_tmp_struct); g_test_run(); @@ -1604,9 +1604,6 @@ static bool main_loop_should_exit(void) RunState r; ShutdownCause request; - if (runstate_check(RUN_STATE_FINISH_MIGRATE)) { - return false; - } if (preconfig_exit_requested) { if (runstate_check(RUN_STATE_PRECONFIG)) { runstate_set(RUN_STATE_PRELAUNCH); @@ -1635,8 +1632,13 @@ static bool main_loop_should_exit(void) pause_all_vcpus(); qemu_system_reset(request); resume_all_vcpus(); + /* + * runstate can change in pause_all_vcpus() + * as iothread mutex is unlocked + */ if (!runstate_check(RUN_STATE_RUNNING) && - !runstate_check(RUN_STATE_INMIGRATE)) { + !runstate_check(RUN_STATE_INMIGRATE) && + !runstate_check(RUN_STATE_FINISH_MIGRATE)) { runstate_set(RUN_STATE_PRELAUNCH); } } |