summaryrefslogtreecommitdiffstats
path: root/migration
diff options
context:
space:
mode:
Diffstat (limited to 'migration')
-rw-r--r--migration/Makefile.objs2
-rw-r--r--migration/colo-comm.c76
-rw-r--r--migration/colo-failover.c2
-rw-r--r--migration/colo.c212
-rw-r--r--migration/migration.c46
-rw-r--r--migration/ram.c166
-rw-r--r--migration/ram.h4
-rw-r--r--migration/savevm.c53
-rw-r--r--migration/savevm.h5
-rw-r--r--migration/trace-events3
10 files changed, 466 insertions, 103 deletions
diff --git a/migration/Makefile.objs b/migration/Makefile.objs
index c83ec47ba8..a4f3bafd86 100644
--- a/migration/Makefile.objs
+++ b/migration/Makefile.objs
@@ -1,6 +1,6 @@
common-obj-y += migration.o socket.o fd.o exec.o
common-obj-y += tls.o channel.o savevm.o
-common-obj-y += colo-comm.o colo.o colo-failover.o
+common-obj-y += colo.o colo-failover.o
common-obj-y += vmstate.o vmstate-types.o page_cache.o
common-obj-y += qemu-file.o global_state.o
common-obj-y += qemu-file-channel.o
diff --git a/migration/colo-comm.c b/migration/colo-comm.c
deleted file mode 100644
index df26e4dfe7..0000000000
--- a/migration/colo-comm.c
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
- * (a.k.a. Fault Tolerance or Continuous Replication)
- *
- * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
- * Copyright (c) 2016 FUJITSU LIMITED
- * Copyright (c) 2016 Intel Corporation
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or
- * later. See the COPYING file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include "migration.h"
-#include "migration/colo.h"
-#include "migration/vmstate.h"
-#include "trace.h"
-
-typedef struct {
- bool colo_requested;
-} COLOInfo;
-
-static COLOInfo colo_info;
-
-COLOMode get_colo_mode(void)
-{
- if (migration_in_colo_state()) {
- return COLO_MODE_PRIMARY;
- } else if (migration_incoming_in_colo_state()) {
- return COLO_MODE_SECONDARY;
- } else {
- return COLO_MODE_UNKNOWN;
- }
-}
-
-static int colo_info_pre_save(void *opaque)
-{
- COLOInfo *s = opaque;
-
- s->colo_requested = migrate_colo_enabled();
-
- return 0;
-}
-
-static bool colo_info_need(void *opaque)
-{
- return migrate_colo_enabled();
-}
-
-static const VMStateDescription colo_state = {
- .name = "COLOState",
- .version_id = 1,
- .minimum_version_id = 1,
- .pre_save = colo_info_pre_save,
- .needed = colo_info_need,
- .fields = (VMStateField[]) {
- VMSTATE_BOOL(colo_requested, COLOInfo),
- VMSTATE_END_OF_LIST()
- },
-};
-
-void colo_info_init(void)
-{
- vmstate_register(NULL, 0, &colo_state, &colo_info);
-}
-
-bool migration_incoming_enable_colo(void)
-{
- return colo_info.colo_requested;
-}
-
-void migration_incoming_exit_colo(void)
-{
- colo_info.colo_requested = false;
-}
diff --git a/migration/colo-failover.c b/migration/colo-failover.c
index 0ae0c41221..4854a96c92 100644
--- a/migration/colo-failover.c
+++ b/migration/colo-failover.c
@@ -77,7 +77,7 @@ FailoverStatus failover_get_state(void)
void qmp_x_colo_lost_heartbeat(Error **errp)
{
- if (get_colo_mode() == COLO_MODE_UNKNOWN) {
+ if (get_colo_mode() == COLO_MODE_NONE) {
error_setg(errp, QERR_FEATURE_DISABLED, "colo");
return;
}
diff --git a/migration/colo.c b/migration/colo.c
index 88936f5962..956ac236b7 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -25,8 +25,16 @@
#include "qemu/error-report.h"
#include "migration/failover.h"
#include "replication.h"
+#include "net/colo-compare.h"
+#include "net/colo.h"
+#include "block/block.h"
+#include "qapi/qapi-events-migration.h"
+#include "qapi/qmp/qerror.h"
+#include "sysemu/cpus.h"
+#include "net/filter.h"
static bool vmstate_loading;
+static Notifier packets_compare_notifier;
#define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
@@ -53,6 +61,7 @@ static void secondary_vm_do_failover(void)
{
int old_state;
MigrationIncomingState *mis = migration_incoming_get_current();
+ Error *local_err = NULL;
/* Can not do failover during the process of VM's loading VMstate, Or
* it will break the secondary VM.
@@ -70,6 +79,17 @@ static void secondary_vm_do_failover(void)
migrate_set_state(&mis->state, MIGRATION_STATUS_COLO,
MIGRATION_STATUS_COMPLETED);
+ replication_stop_all(true, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ }
+
+ /* Notify all filters of all NIC to do checkpoint */
+ colo_notify_filters_event(COLO_EVENT_FAILOVER, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ }
+
if (!autostart) {
error_report("\"-S\" qemu option will be ignored in secondary side");
/* recover runstate to normal migration finish state */
@@ -107,9 +127,15 @@ static void primary_vm_do_failover(void)
{
MigrationState *s = migrate_get_current();
int old_state;
+ Error *local_err = NULL;
migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
MIGRATION_STATUS_COMPLETED);
+ /*
+ * kick COLO thread which might wait at
+ * qemu_sem_wait(&s->colo_checkpoint_sem).
+ */
+ colo_checkpoint_notify(migrate_get_current());
/*
* Wake up COLO thread which may blocked in recv() or send(),
@@ -130,10 +156,28 @@ static void primary_vm_do_failover(void)
FailoverStatus_str(old_state));
return;
}
+
+ replication_stop_all(true, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ local_err = NULL;
+ }
+
/* Notify COLO thread that failover work is finished */
qemu_sem_post(&s->colo_exit_sem);
}
+COLOMode get_colo_mode(void)
+{
+ if (migration_in_colo_state()) {
+ return COLO_MODE_PRIMARY;
+ } else if (migration_incoming_in_colo_state()) {
+ return COLO_MODE_SECONDARY;
+ } else {
+ return COLO_MODE_NONE;
+ }
+}
+
void colo_do_failover(MigrationState *s)
{
/* Make sure VM stopped while failover happened. */
@@ -207,6 +251,26 @@ void qmp_xen_colo_do_checkpoint(Error **errp)
#endif
}
+COLOStatus *qmp_query_colo_status(Error **errp)
+{
+ COLOStatus *s = g_new0(COLOStatus, 1);
+
+ s->mode = get_colo_mode();
+
+ switch (failover_get_state()) {
+ case FAILOVER_STATUS_NONE:
+ s->reason = COLO_EXIT_REASON_NONE;
+ break;
+ case FAILOVER_STATUS_REQUIRE:
+ s->reason = COLO_EXIT_REASON_REQUEST;
+ break;
+ default:
+ s->reason = COLO_EXIT_REASON_ERROR;
+ }
+
+ return s;
+}
+
static void colo_send_message(QEMUFile *f, COLOMessage msg,
Error **errp)
{
@@ -343,21 +407,42 @@ static int colo_do_checkpoint_transaction(MigrationState *s,
goto out;
}
+ colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err);
+ if (local_err) {
+ goto out;
+ }
+
/* Disable block migration */
migrate_set_block_enabled(false, &local_err);
- qemu_savevm_state_header(fb);
- qemu_savevm_state_setup(fb);
qemu_mutex_lock_iothread();
- qemu_savevm_state_complete_precopy(fb, false, false);
- qemu_mutex_unlock_iothread();
-
- qemu_fflush(fb);
+ replication_do_checkpoint_all(&local_err);
+ if (local_err) {
+ qemu_mutex_unlock_iothread();
+ goto out;
+ }
colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err);
if (local_err) {
+ qemu_mutex_unlock_iothread();
+ goto out;
+ }
+ /* Note: device state is saved into buffer */
+ ret = qemu_save_device_state(fb);
+
+ qemu_mutex_unlock_iothread();
+ if (ret < 0) {
goto out;
}
/*
+ * Only save VM's live state, which not including device state.
+ * TODO: We may need a timeout mechanism to prevent COLO process
+ * to be blocked here.
+ */
+ qemu_savevm_live_state(s->to_dst_file);
+
+ qemu_fflush(fb);
+
+ /*
* We need the size of the VMstate data in Secondary side,
* With which we can decide how much data should be read.
*/
@@ -400,6 +485,11 @@ out:
return ret;
}
+static void colo_compare_notify_checkpoint(Notifier *notifier, void *data)
+{
+ colo_checkpoint_notify(data);
+}
+
static void colo_process_checkpoint(MigrationState *s)
{
QIOChannelBuffer *bioc;
@@ -416,6 +506,9 @@ static void colo_process_checkpoint(MigrationState *s)
goto out;
}
+ packets_compare_notifier.notify = colo_compare_notify_checkpoint;
+ colo_compare_register_notifier(&packets_compare_notifier);
+
/*
* Wait for Secondary finish loading VM states and enter COLO
* restore.
@@ -430,6 +523,12 @@ static void colo_process_checkpoint(MigrationState *s)
object_unref(OBJECT(bioc));
qemu_mutex_lock_iothread();
+ replication_start_all(REPLICATION_MODE_PRIMARY, &local_err);
+ if (local_err) {
+ qemu_mutex_unlock_iothread();
+ goto out;
+ }
+
vm_start();
qemu_mutex_unlock_iothread();
trace_colo_vm_state_change("stop", "run");
@@ -445,6 +544,9 @@ static void colo_process_checkpoint(MigrationState *s)
qemu_sem_wait(&s->colo_checkpoint_sem);
+ if (s->state != MIGRATION_STATUS_COLO) {
+ goto out;
+ }
ret = colo_do_checkpoint_transaction(s, bioc, fb);
if (ret < 0) {
goto out;
@@ -461,11 +563,38 @@ out:
qemu_fclose(fb);
}
- timer_del(s->colo_delay_timer);
+ /*
+ * There are only two reasons we can get here, some error happened
+ * or the user triggered failover.
+ */
+ switch (failover_get_state()) {
+ case FAILOVER_STATUS_NONE:
+ qapi_event_send_colo_exit(COLO_MODE_PRIMARY,
+ COLO_EXIT_REASON_ERROR);
+ break;
+ case FAILOVER_STATUS_REQUIRE:
+ qapi_event_send_colo_exit(COLO_MODE_PRIMARY,
+ COLO_EXIT_REASON_REQUEST);
+ break;
+ default:
+ abort();
+ }
/* Hope this not to be too long to wait here */
qemu_sem_wait(&s->colo_exit_sem);
qemu_sem_destroy(&s->colo_exit_sem);
+
+ /*
+ * It is safe to unregister notifier after failover finished.
+ * Besides, colo_delay_timer and colo_checkpoint_sem can't be
+ * released befor unregister notifier, or there will be use-after-free
+ * error.
+ */
+ colo_compare_unregister_notifier(&packets_compare_notifier);
+ timer_del(s->colo_delay_timer);
+ timer_free(s->colo_delay_timer);
+ qemu_sem_destroy(&s->colo_checkpoint_sem);
+
/*
* Must be called after failover BH is completed,
* Or the failover BH may shutdown the wrong fd that
@@ -533,6 +662,7 @@ void *colo_process_incoming_thread(void *opaque)
uint64_t total_size;
uint64_t value;
Error *local_err = NULL;
+ int ret;
rcu_register_thread();
qemu_sem_init(&mis->colo_incoming_sem, 0);
@@ -559,6 +689,16 @@ void *colo_process_incoming_thread(void *opaque)
fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
object_unref(OBJECT(bioc));
+ qemu_mutex_lock_iothread();
+ replication_start_all(REPLICATION_MODE_SECONDARY, &local_err);
+ if (local_err) {
+ qemu_mutex_unlock_iothread();
+ goto out;
+ }
+ vm_start();
+ trace_colo_vm_state_change("stop", "run");
+ qemu_mutex_unlock_iothread();
+
colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY,
&local_err);
if (local_err) {
@@ -578,6 +718,11 @@ void *colo_process_incoming_thread(void *opaque)
goto out;
}
+ qemu_mutex_lock_iothread();
+ vm_stop_force_state(RUN_STATE_COLO);
+ trace_colo_vm_state_change("run", "stop");
+ qemu_mutex_unlock_iothread();
+
/* FIXME: This is unnecessary for periodic checkpoint mode */
colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY,
&local_err);
@@ -591,6 +736,16 @@ void *colo_process_incoming_thread(void *opaque)
goto out;
}
+ qemu_mutex_lock_iothread();
+ cpu_synchronize_all_pre_loadvm();
+ ret = qemu_loadvm_state_main(mis->from_src_file, mis);
+ qemu_mutex_unlock_iothread();
+
+ if (ret < 0) {
+ error_report("Load VM's live state (ram) error");
+ goto out;
+ }
+
value = colo_receive_message_value(mis->from_src_file,
COLO_MESSAGE_VMSTATE_SIZE, &local_err);
if (local_err) {
@@ -622,15 +777,37 @@ void *colo_process_incoming_thread(void *opaque)
}
qemu_mutex_lock_iothread();
- qemu_system_reset(SHUTDOWN_CAUSE_NONE);
vmstate_loading = true;
- if (qemu_loadvm_state(fb) < 0) {
- error_report("COLO: loadvm failed");
+ ret = qemu_load_device_state(fb);
+ if (ret < 0) {
+ error_report("COLO: load device state failed");
+ qemu_mutex_unlock_iothread();
+ goto out;
+ }
+
+ replication_get_error_all(&local_err);
+ if (local_err) {
+ qemu_mutex_unlock_iothread();
+ goto out;
+ }
+ /* discard colo disk buffer */
+ replication_do_checkpoint_all(&local_err);
+ if (local_err) {
+ qemu_mutex_unlock_iothread();
+ goto out;
+ }
+
+ /* Notify all filters of all NIC to do checkpoint */
+ colo_notify_filters_event(COLO_EVENT_CHECKPOINT, &local_err);
+
+ if (local_err) {
qemu_mutex_unlock_iothread();
goto out;
}
vmstate_loading = false;
+ vm_start();
+ trace_colo_vm_state_change("stop", "run");
qemu_mutex_unlock_iothread();
if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
@@ -654,6 +831,19 @@ out:
error_report_err(local_err);
}
+ switch (failover_get_state()) {
+ case FAILOVER_STATUS_NONE:
+ qapi_event_send_colo_exit(COLO_MODE_SECONDARY,
+ COLO_EXIT_REASON_ERROR);
+ break;
+ case FAILOVER_STATUS_REQUIRE:
+ qapi_event_send_colo_exit(COLO_MODE_SECONDARY,
+ COLO_EXIT_REASON_REQUEST);
+ break;
+ default:
+ abort();
+ }
+
if (fb) {
qemu_fclose(fb);
}
@@ -665,7 +855,7 @@ out:
if (mis->to_src_file) {
qemu_fclose(mis->to_src_file);
}
- migration_incoming_exit_colo();
+ migration_incoming_disable_colo();
rcu_unregister_thread();
return NULL;
diff --git a/migration/migration.c b/migration/migration.c
index d6ae879dc8..7696729340 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -76,10 +76,8 @@
/* Migration XBZRLE default cache size */
#define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024)
-/* The delay time (in ms) between two COLO checkpoints
- * Note: Please change this default value to 10000 when we support hybrid mode.
- */
-#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY 200
+/* The delay time (in ms) between two COLO checkpoints */
+#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100)
#define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2
#define DEFAULT_MIGRATE_MULTIFD_PAGE_COUNT 16
@@ -298,6 +296,22 @@ int migrate_send_rp_req_pages(MigrationIncomingState *mis, const char *rbname,
return migrate_send_rp_message(mis, msg_type, msglen, bufc);
}
+static bool migration_colo_enabled;
+bool migration_incoming_colo_enabled(void)
+{
+ return migration_colo_enabled;
+}
+
+void migration_incoming_disable_colo(void)
+{
+ migration_colo_enabled = false;
+}
+
+void migration_incoming_enable_colo(void)
+{
+ migration_colo_enabled = true;
+}
+
void qemu_start_incoming_migration(const char *uri, Error **errp)
{
const char *p;
@@ -388,6 +402,7 @@ static void process_incoming_migration_co(void *opaque)
MigrationIncomingState *mis = migration_incoming_get_current();
PostcopyState ps;
int ret;
+ Error *local_err = NULL;
assert(mis->from_src_file);
mis->migration_incoming_co = qemu_coroutine_self();
@@ -419,7 +434,21 @@ static void process_incoming_migration_co(void *opaque)
}
/* we get COLO info, and know if we are in COLO mode */
- if (!ret && migration_incoming_enable_colo()) {
+ if (!ret && migration_incoming_colo_enabled()) {
+ /* Make sure all file formats flush their mutable metadata */
+ bdrv_invalidate_cache_all(&local_err);
+ if (local_err) {
+ migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
+ MIGRATION_STATUS_FAILED);
+ error_report_err(local_err);
+ exit(EXIT_FAILURE);
+ }
+
+ if (colo_init_ram_cache() < 0) {
+ error_report("Init ram cache failed");
+ exit(EXIT_FAILURE);
+ }
+
qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
mis->have_colo_incoming_thread = true;
@@ -427,6 +456,8 @@ static void process_incoming_migration_co(void *opaque)
/* Wait checkpoint incoming thread exit before free resource */
qemu_thread_join(&mis->colo_incoming_thread);
+ /* We hold the global iothread lock, so it is safe here */
+ colo_release_ram_cache();
}
if (ret < 0) {
@@ -3017,6 +3048,11 @@ static void *migration_thread(void *opaque)
qemu_savevm_send_postcopy_advise(s->to_dst_file);
}
+ if (migrate_colo_enabled()) {
+ /* Notify migration destination that we enable COLO */
+ qemu_savevm_send_colo_enable(s->to_dst_file);
+ }
+
qemu_savevm_state_setup(s->to_dst_file);
s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
diff --git a/migration/ram.c b/migration/ram.c
index bc38d98cc3..7e7deec4d8 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -3447,6 +3447,29 @@ static inline void *host_from_ram_block_offset(RAMBlock *block,
return block->host + offset;
}
+static inline void *colo_cache_from_block_offset(RAMBlock *block,
+ ram_addr_t offset)
+{
+ if (!offset_in_ramblock(block, offset)) {
+ return NULL;
+ }
+ if (!block->colo_cache) {
+ error_report("%s: colo_cache is NULL in block :%s",
+ __func__, block->idstr);
+ return NULL;
+ }
+
+ /*
+ * During colo checkpoint, we need bitmap of these migrated pages.
+ * It help us to decide which pages in ram cache should be flushed
+ * into VM's RAM later.
+ */
+ if (!test_and_set_bit(offset >> TARGET_PAGE_BITS, block->bmap)) {
+ ram_state->migration_dirty_pages++;
+ }
+ return block->colo_cache + offset;
+}
+
/**
* ram_handle_compressed: handle the zero page case
*
@@ -3651,6 +3674,88 @@ static void decompress_data_with_multi_threads(QEMUFile *f,
qemu_mutex_unlock(&decomp_done_lock);
}
+/*
+ * colo cache: this is for secondary VM, we cache the whole
+ * memory of the secondary VM, it is need to hold the global lock
+ * to call this helper.
+ */
+int colo_init_ram_cache(void)
+{
+ RAMBlock *block;
+
+ rcu_read_lock();
+ RAMBLOCK_FOREACH_MIGRATABLE(block) {
+ block->colo_cache = qemu_anon_ram_alloc(block->used_length,
+ NULL,
+ false);
+ if (!block->colo_cache) {
+ error_report("%s: Can't alloc memory for COLO cache of block %s,"
+ "size 0x" RAM_ADDR_FMT, __func__, block->idstr,
+ block->used_length);
+ goto out_locked;
+ }
+ memcpy(block->colo_cache, block->host, block->used_length);
+ }
+ rcu_read_unlock();
+ /*
+ * Record the dirty pages that sent by PVM, we use this dirty bitmap together
+ * with to decide which page in cache should be flushed into SVM's RAM. Here
+ * we use the same name 'ram_bitmap' as for migration.
+ */
+ if (ram_bytes_total()) {
+ RAMBlock *block;
+
+ RAMBLOCK_FOREACH_MIGRATABLE(block) {
+ unsigned long pages = block->max_length >> TARGET_PAGE_BITS;
+
+ block->bmap = bitmap_new(pages);
+ bitmap_set(block->bmap, 0, pages);
+ }
+ }
+ ram_state = g_new0(RAMState, 1);
+ ram_state->migration_dirty_pages = 0;
+ memory_global_dirty_log_start();
+
+ return 0;
+
+out_locked:
+
+ RAMBLOCK_FOREACH_MIGRATABLE(block) {
+ if (block->colo_cache) {
+ qemu_anon_ram_free(block->colo_cache, block->used_length);
+ block->colo_cache = NULL;
+ }
+ }
+
+ rcu_read_unlock();
+ return -errno;
+}
+
+/* It is need to hold the global lock to call this helper */
+void colo_release_ram_cache(void)
+{
+ RAMBlock *block;
+
+ memory_global_dirty_log_stop();
+ RAMBLOCK_FOREACH_MIGRATABLE(block) {
+ g_free(block->bmap);
+ block->bmap = NULL;
+ }
+
+ rcu_read_lock();
+
+ RAMBLOCK_FOREACH_MIGRATABLE(block) {
+ if (block->colo_cache) {
+ qemu_anon_ram_free(block->colo_cache, block->used_length);
+ block->colo_cache = NULL;
+ }
+ }
+
+ rcu_read_unlock();
+ g_free(ram_state);
+ ram_state = NULL;
+}
+
/**
* ram_load_setup: Setup RAM for migration incoming side
*
@@ -3667,6 +3772,7 @@ static int ram_load_setup(QEMUFile *f, void *opaque)
xbzrle_load_setup();
ramblock_recv_map_init();
+
return 0;
}
@@ -3687,6 +3793,7 @@ static int ram_load_cleanup(void *opaque)
g_free(rb->receivedmap);
rb->receivedmap = NULL;
}
+
return 0;
}
@@ -3869,6 +3976,46 @@ static bool postcopy_is_running(void)
return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
}
+/*
+ * Flush content of RAM cache into SVM's memory.
+ * Only flush the pages that be dirtied by PVM or SVM or both.
+ */
+static void colo_flush_ram_cache(void)
+{
+ RAMBlock *block = NULL;
+ void *dst_host;
+ void *src_host;
+ unsigned long offset = 0;
+
+ memory_global_dirty_log_sync();
+ rcu_read_lock();
+ RAMBLOCK_FOREACH_MIGRATABLE(block) {
+ migration_bitmap_sync_range(ram_state, block, 0, block->used_length);
+ }
+ rcu_read_unlock();
+
+ trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages);
+ rcu_read_lock();
+ block = QLIST_FIRST_RCU(&ram_list.blocks);
+
+ while (block) {
+ offset = migration_bitmap_find_dirty(ram_state, block, offset);
+
+ if (offset << TARGET_PAGE_BITS >= block->used_length) {
+ offset = 0;
+ block = QLIST_NEXT_RCU(block, next);
+ } else {
+ migration_bitmap_clear_dirty(ram_state, block, offset);
+ dst_host = block->host + (offset << TARGET_PAGE_BITS);
+ src_host = block->colo_cache + (offset << TARGET_PAGE_BITS);
+ memcpy(dst_host, src_host, TARGET_PAGE_SIZE);
+ }
+ }
+
+ rcu_read_unlock();
+ trace_colo_flush_ram_cache_end();
+}
+
static int ram_load(QEMUFile *f, void *opaque, int version_id)
{
int flags = 0, ret = 0, invalid_flags = 0;
@@ -3924,13 +4071,24 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
RAMBlock *block = ram_block_from_stream(f, flags);
- host = host_from_ram_block_offset(block, addr);
+ /*
+ * After going into COLO, we should load the Page into colo_cache.
+ */
+ if (migration_incoming_in_colo_state()) {
+ host = colo_cache_from_block_offset(block, addr);
+ } else {
+ host = host_from_ram_block_offset(block, addr);
+ }
if (!host) {
error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
ret = -EINVAL;
break;
}
- ramblock_recv_bitmap_set(block, host);
+
+ if (!migration_incoming_in_colo_state()) {
+ ramblock_recv_bitmap_set(block, host);
+ }
+
trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
}
@@ -4034,6 +4192,10 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
ret |= wait_for_decompress_done();
rcu_read_unlock();
trace_ram_load_complete(ret, seq_iter);
+
+ if (!ret && migration_incoming_in_colo_state()) {
+ colo_flush_ram_cache();
+ }
return ret;
}
diff --git a/migration/ram.h b/migration/ram.h
index a139066846..83ff1bc11a 100644
--- a/migration/ram.h
+++ b/migration/ram.h
@@ -71,4 +71,8 @@ int64_t ramblock_recv_bitmap_send(QEMUFile *file,
const char *block_name);
int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *rb);
+/* ram cache */
+int colo_init_ram_cache(void);
+void colo_release_ram_cache(void);
+
#endif
diff --git a/migration/savevm.c b/migration/savevm.c
index 2d10e45582..e4caff9a6a 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -56,6 +56,7 @@
#include "io/channel-file.h"
#include "sysemu/replay.h"
#include "qjson.h"
+#include "migration/colo.h"
#ifndef ETH_P_RARP
#define ETH_P_RARP 0x8035
@@ -82,6 +83,7 @@ enum qemu_vm_cmd {
were previously sent during
precopy but are dirty. */
MIG_CMD_PACKAGED, /* Send a wrapped stream within this stream */
+ MIG_CMD_ENABLE_COLO, /* Enable COLO */
MIG_CMD_POSTCOPY_RESUME, /* resume postcopy on dest */
MIG_CMD_RECV_BITMAP, /* Request for recved bitmap on dst */
MIG_CMD_MAX
@@ -841,6 +843,12 @@ static void qemu_savevm_command_send(QEMUFile *f,
qemu_fflush(f);
}
+void qemu_savevm_send_colo_enable(QEMUFile *f)
+{
+ trace_savevm_send_colo_enable();
+ qemu_savevm_command_send(f, MIG_CMD_ENABLE_COLO, 0, NULL);
+}
+
void qemu_savevm_send_ping(QEMUFile *f, uint32_t value)
{
uint32_t buf;
@@ -1370,13 +1378,21 @@ done:
return ret;
}
-static int qemu_save_device_state(QEMUFile *f)
+void qemu_savevm_live_state(QEMUFile *f)
{
- SaveStateEntry *se;
+ /* save QEMU_VM_SECTION_END section */
+ qemu_savevm_state_complete_precopy(f, true, false);
+ qemu_put_byte(f, QEMU_VM_EOF);
+}
- qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
- qemu_put_be32(f, QEMU_VM_FILE_VERSION);
+int qemu_save_device_state(QEMUFile *f)
+{
+ SaveStateEntry *se;
+ if (!migration_in_colo_state()) {
+ qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
+ qemu_put_be32(f, QEMU_VM_FILE_VERSION);
+ }
cpu_synchronize_all_states();
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
@@ -1432,8 +1448,6 @@ enum LoadVMExitCodes {
LOADVM_QUIT = 1,
};
-static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
-
/* ------ incoming postcopy messages ------ */
/* 'advise' arrives before any transfers just to tell us that a postcopy
* *might* happen - it might be skipped if precopy transferred everything
@@ -1922,6 +1936,12 @@ static int loadvm_handle_recv_bitmap(MigrationIncomingState *mis,
return 0;
}
+static int loadvm_process_enable_colo(MigrationIncomingState *mis)
+{
+ migration_incoming_enable_colo();
+ return colo_init_ram_cache();
+}
+
/*
* Process an incoming 'QEMU_VM_COMMAND'
* 0 just a normal return
@@ -2001,6 +2021,9 @@ static int loadvm_process_command(QEMUFile *f)
case MIG_CMD_RECV_BITMAP:
return loadvm_handle_recv_bitmap(mis, len);
+
+ case MIG_CMD_ENABLE_COLO:
+ return loadvm_process_enable_colo(mis);
}
return 0;
@@ -2230,7 +2253,7 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis)
return true;
}
-static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
+int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
{
uint8_t section_type;
int ret = 0;
@@ -2401,6 +2424,22 @@ int qemu_loadvm_state(QEMUFile *f)
return ret;
}
+int qemu_load_device_state(QEMUFile *f)
+{
+ MigrationIncomingState *mis = migration_incoming_get_current();
+ int ret;
+
+ /* Load QEMU_VM_SECTION_FULL section */
+ ret = qemu_loadvm_state_main(f, mis);
+ if (ret < 0) {
+ error_report("Failed to load device state: %d", ret);
+ return ret;
+ }
+
+ cpu_synchronize_all_post_init();
+ return 0;
+}
+
int save_snapshot(const char *name, Error **errp)
{
BlockDriverState *bs, *bs1;
diff --git a/migration/savevm.h b/migration/savevm.h
index a5e65b8ae3..51a4b9caa8 100644
--- a/migration/savevm.h
+++ b/migration/savevm.h
@@ -55,8 +55,13 @@ void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
uint16_t len,
uint64_t *start_list,
uint64_t *length_list);
+void qemu_savevm_send_colo_enable(QEMUFile *f);
+void qemu_savevm_live_state(QEMUFile *f);
+int qemu_save_device_state(QEMUFile *f);
int qemu_loadvm_state(QEMUFile *f);
void qemu_loadvm_state_cleanup(void);
+int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
+int qemu_load_device_state(QEMUFile *f);
#endif
diff --git a/migration/trace-events b/migration/trace-events
index 9430f3cbe0..bd2d0cd25a 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -37,6 +37,7 @@ savevm_send_ping(uint32_t val) "0x%x"
savevm_send_postcopy_listen(void) ""
savevm_send_postcopy_run(void) ""
savevm_send_postcopy_resume(void) ""
+savevm_send_colo_enable(void) ""
savevm_send_recv_bitmap(char *name) "%s"
savevm_state_setup(void) ""
savevm_state_resume_prepare(void) ""
@@ -101,6 +102,8 @@ ram_dirty_bitmap_sync_start(void) ""
ram_dirty_bitmap_sync_wait(void) ""
ram_dirty_bitmap_sync_complete(void) ""
ram_state_resume_prepare(uint64_t v) "%" PRId64
+colo_flush_ram_cache_begin(uint64_t dirty_pages) "dirty_pages %" PRIu64
+colo_flush_ram_cache_end(void) ""
# migration/migration.c
await_return_path_close_on_source_close(void) ""