summaryrefslogtreecommitdiffstats
path: root/migration
diff options
context:
space:
mode:
Diffstat (limited to 'migration')
-rw-r--r--migration/channel.c9
-rw-r--r--migration/dirtyrate.c227
-rw-r--r--migration/dirtyrate.h7
-rw-r--r--migration/migration.c152
-rw-r--r--migration/migration.h44
-rw-r--r--migration/multifd-zlib.c38
-rw-r--r--migration/multifd.c6
-rw-r--r--migration/multifd.h66
-rw-r--r--migration/postcopy-ram.c186
-rw-r--r--migration/postcopy-ram.h11
-rw-r--r--migration/qemu-file.c31
-rw-r--r--migration/qemu-file.h1
-rw-r--r--migration/ram.c331
-rw-r--r--migration/ram.h6
-rw-r--r--migration/savevm.c46
-rw-r--r--migration/socket.c22
-rw-r--r--migration/socket.h1
-rw-r--r--migration/tls.c9
-rw-r--r--migration/tls.h4
-rw-r--r--migration/trace-events15
20 files changed, 1026 insertions, 186 deletions
diff --git a/migration/channel.c b/migration/channel.c
index 90087d8986..1b0815039f 100644
--- a/migration/channel.c
+++ b/migration/channel.c
@@ -38,9 +38,7 @@ void migration_channel_process_incoming(QIOChannel *ioc)
trace_migration_set_incoming_channel(
ioc, object_get_typename(OBJECT(ioc)));
- if (migrate_use_tls() &&
- !object_dynamic_cast(OBJECT(ioc),
- TYPE_QIO_CHANNEL_TLS)) {
+ if (migrate_channel_requires_tls_upgrade(ioc)) {
migration_tls_channel_process_incoming(s, ioc, &local_err);
} else {
migration_ioc_register_yank(ioc);
@@ -70,10 +68,7 @@ void migration_channel_connect(MigrationState *s,
ioc, object_get_typename(OBJECT(ioc)), hostname, error);
if (!error) {
- if (s->parameters.tls_creds &&
- *s->parameters.tls_creds &&
- !object_dynamic_cast(OBJECT(ioc),
- TYPE_QIO_CHANNEL_TLS)) {
+ if (migrate_channel_requires_tls_upgrade(ioc)) {
migration_tls_channel_connect(s, ioc, hostname, &error);
if (!error) {
diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
index aace12a787..795fab5c37 100644
--- a/migration/dirtyrate.c
+++ b/migration/dirtyrate.c
@@ -46,7 +46,7 @@ static struct DirtyRateStat DirtyStat;
static DirtyRateMeasureMode dirtyrate_mode =
DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
-static int64_t set_sample_page_period(int64_t msec, int64_t initial_time)
+static int64_t dirty_stat_wait(int64_t msec, int64_t initial_time)
{
int64_t current_time;
@@ -60,6 +60,132 @@ static int64_t set_sample_page_period(int64_t msec, int64_t initial_time)
return msec;
}
+static inline void record_dirtypages(DirtyPageRecord *dirty_pages,
+ CPUState *cpu, bool start)
+{
+ if (start) {
+ dirty_pages[cpu->cpu_index].start_pages = cpu->dirty_pages;
+ } else {
+ dirty_pages[cpu->cpu_index].end_pages = cpu->dirty_pages;
+ }
+}
+
+static int64_t do_calculate_dirtyrate(DirtyPageRecord dirty_pages,
+ int64_t calc_time_ms)
+{
+ uint64_t memory_size_MB;
+ uint64_t increased_dirty_pages =
+ dirty_pages.end_pages - dirty_pages.start_pages;
+
+ memory_size_MB = (increased_dirty_pages * TARGET_PAGE_SIZE) >> 20;
+
+ return memory_size_MB * 1000 / calc_time_ms;
+}
+
+void global_dirty_log_change(unsigned int flag, bool start)
+{
+ qemu_mutex_lock_iothread();
+ if (start) {
+ memory_global_dirty_log_start(flag);
+ } else {
+ memory_global_dirty_log_stop(flag);
+ }
+ qemu_mutex_unlock_iothread();
+}
+
+/*
+ * global_dirty_log_sync
+ * 1. sync dirty log from kvm
+ * 2. stop dirty tracking if needed.
+ */
+static void global_dirty_log_sync(unsigned int flag, bool one_shot)
+{
+ qemu_mutex_lock_iothread();
+ memory_global_dirty_log_sync();
+ if (one_shot) {
+ memory_global_dirty_log_stop(flag);
+ }
+ qemu_mutex_unlock_iothread();
+}
+
+static DirtyPageRecord *vcpu_dirty_stat_alloc(VcpuStat *stat)
+{
+ CPUState *cpu;
+ DirtyPageRecord *records;
+ int nvcpu = 0;
+
+ CPU_FOREACH(cpu) {
+ nvcpu++;
+ }
+
+ stat->nvcpu = nvcpu;
+ stat->rates = g_malloc0(sizeof(DirtyRateVcpu) * nvcpu);
+
+ records = g_malloc0(sizeof(DirtyPageRecord) * nvcpu);
+
+ return records;
+}
+
+static void vcpu_dirty_stat_collect(VcpuStat *stat,
+ DirtyPageRecord *records,
+ bool start)
+{
+ CPUState *cpu;
+
+ CPU_FOREACH(cpu) {
+ record_dirtypages(records, cpu, start);
+ }
+}
+
+int64_t vcpu_calculate_dirtyrate(int64_t calc_time_ms,
+ VcpuStat *stat,
+ unsigned int flag,
+ bool one_shot)
+{
+ DirtyPageRecord *records;
+ int64_t init_time_ms;
+ int64_t duration;
+ int64_t dirtyrate;
+ int i = 0;
+ unsigned int gen_id;
+
+retry:
+ init_time_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+
+ cpu_list_lock();
+ gen_id = cpu_list_generation_id_get();
+ records = vcpu_dirty_stat_alloc(stat);
+ vcpu_dirty_stat_collect(stat, records, true);
+ cpu_list_unlock();
+
+ duration = dirty_stat_wait(calc_time_ms, init_time_ms);
+
+ global_dirty_log_sync(flag, one_shot);
+
+ cpu_list_lock();
+ if (gen_id != cpu_list_generation_id_get()) {
+ g_free(records);
+ g_free(stat->rates);
+ cpu_list_unlock();
+ goto retry;
+ }
+ vcpu_dirty_stat_collect(stat, records, false);
+ cpu_list_unlock();
+
+ for (i = 0; i < stat->nvcpu; i++) {
+ dirtyrate = do_calculate_dirtyrate(records[i], duration);
+
+ stat->rates[i].id = i;
+ stat->rates[i].dirty_rate = dirtyrate;
+
+ trace_dirtyrate_do_calculate_vcpu(i, dirtyrate);
+ }
+
+ g_free(records);
+
+ return duration;
+}
+
static bool is_sample_period_valid(int64_t sec)
{
if (sec < MIN_FETCH_DIRTYRATE_TIME_SEC ||
@@ -396,44 +522,6 @@ static bool compare_page_hash_info(struct RamblockDirtyInfo *info,
return true;
}
-static inline void record_dirtypages(DirtyPageRecord *dirty_pages,
- CPUState *cpu, bool start)
-{
- if (start) {
- dirty_pages[cpu->cpu_index].start_pages = cpu->dirty_pages;
- } else {
- dirty_pages[cpu->cpu_index].end_pages = cpu->dirty_pages;
- }
-}
-
-static void dirtyrate_global_dirty_log_start(void)
-{
- qemu_mutex_lock_iothread();
- memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE);
- qemu_mutex_unlock_iothread();
-}
-
-static void dirtyrate_global_dirty_log_stop(void)
-{
- qemu_mutex_lock_iothread();
- memory_global_dirty_log_sync();
- memory_global_dirty_log_stop(GLOBAL_DIRTY_DIRTY_RATE);
- qemu_mutex_unlock_iothread();
-}
-
-static int64_t do_calculate_dirtyrate_vcpu(DirtyPageRecord dirty_pages)
-{
- uint64_t memory_size_MB;
- int64_t time_s;
- uint64_t increased_dirty_pages =
- dirty_pages.end_pages - dirty_pages.start_pages;
-
- memory_size_MB = (increased_dirty_pages * TARGET_PAGE_SIZE) >> 20;
- time_s = DirtyStat.calc_time;
-
- return memory_size_MB / time_s;
-}
-
static inline void record_dirtypages_bitmap(DirtyPageRecord *dirty_pages,
bool start)
{
@@ -444,11 +532,6 @@ static inline void record_dirtypages_bitmap(DirtyPageRecord *dirty_pages,
}
}
-static void do_calculate_dirtyrate_bitmap(DirtyPageRecord dirty_pages)
-{
- DirtyStat.dirty_rate = do_calculate_dirtyrate_vcpu(dirty_pages);
-}
-
static inline void dirtyrate_manual_reset_protect(void)
{
RAMBlock *block = NULL;
@@ -492,71 +575,49 @@ static void calculate_dirtyrate_dirty_bitmap(struct DirtyRateConfig config)
DirtyStat.start_time = start_time / 1000;
msec = config.sample_period_seconds * 1000;
- msec = set_sample_page_period(msec, start_time);
+ msec = dirty_stat_wait(msec, start_time);
DirtyStat.calc_time = msec / 1000;
/*
- * dirtyrate_global_dirty_log_stop do two things.
+ * do two things.
* 1. fetch dirty bitmap from kvm
* 2. stop dirty tracking
*/
- dirtyrate_global_dirty_log_stop();
+ global_dirty_log_sync(GLOBAL_DIRTY_DIRTY_RATE, true);
record_dirtypages_bitmap(&dirty_pages, false);
- do_calculate_dirtyrate_bitmap(dirty_pages);
+ DirtyStat.dirty_rate = do_calculate_dirtyrate(dirty_pages, msec);
}
static void calculate_dirtyrate_dirty_ring(struct DirtyRateConfig config)
{
- CPUState *cpu;
- int64_t msec = 0;
- int64_t start_time;
+ int64_t duration;
uint64_t dirtyrate = 0;
uint64_t dirtyrate_sum = 0;
- DirtyPageRecord *dirty_pages;
- int nvcpu = 0;
int i = 0;
- CPU_FOREACH(cpu) {
- nvcpu++;
- }
-
- dirty_pages = malloc(sizeof(*dirty_pages) * nvcpu);
-
- DirtyStat.dirty_ring.nvcpu = nvcpu;
- DirtyStat.dirty_ring.rates = malloc(sizeof(DirtyRateVcpu) * nvcpu);
-
- dirtyrate_global_dirty_log_start();
-
- CPU_FOREACH(cpu) {
- record_dirtypages(dirty_pages, cpu, true);
- }
-
- start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
- DirtyStat.start_time = start_time / 1000;
+ /* start log sync */
+ global_dirty_log_change(GLOBAL_DIRTY_DIRTY_RATE, true);
- msec = config.sample_period_seconds * 1000;
- msec = set_sample_page_period(msec, start_time);
- DirtyStat.calc_time = msec / 1000;
+ DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000;
- dirtyrate_global_dirty_log_stop();
+ /* calculate vcpu dirtyrate */
+ duration = vcpu_calculate_dirtyrate(config.sample_period_seconds * 1000,
+ &DirtyStat.dirty_ring,
+ GLOBAL_DIRTY_DIRTY_RATE,
+ true);
- CPU_FOREACH(cpu) {
- record_dirtypages(dirty_pages, cpu, false);
- }
+ DirtyStat.calc_time = duration / 1000;
+ /* calculate vm dirtyrate */
for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) {
- dirtyrate = do_calculate_dirtyrate_vcpu(dirty_pages[i]);
- trace_dirtyrate_do_calculate_vcpu(i, dirtyrate);
-
- DirtyStat.dirty_ring.rates[i].id = i;
+ dirtyrate = DirtyStat.dirty_ring.rates[i].dirty_rate;
DirtyStat.dirty_ring.rates[i].dirty_rate = dirtyrate;
dirtyrate_sum += dirtyrate;
}
DirtyStat.dirty_rate = dirtyrate_sum;
- free(dirty_pages);
}
static void calculate_dirtyrate_sample_vm(struct DirtyRateConfig config)
@@ -574,7 +635,7 @@ static void calculate_dirtyrate_sample_vm(struct DirtyRateConfig config)
rcu_read_unlock();
msec = config.sample_period_seconds * 1000;
- msec = set_sample_page_period(msec, initial_time);
+ msec = dirty_stat_wait(msec, initial_time);
DirtyStat.start_time = initial_time / 1000;
DirtyStat.calc_time = msec / 1000;
diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h
index 69d4c5b865..594a5c0bb6 100644
--- a/migration/dirtyrate.h
+++ b/migration/dirtyrate.h
@@ -13,6 +13,8 @@
#ifndef QEMU_MIGRATION_DIRTYRATE_H
#define QEMU_MIGRATION_DIRTYRATE_H
+#include "sysemu/dirtyrate.h"
+
/*
* Sample 512 pages per GB as default.
*/
@@ -65,11 +67,6 @@ typedef struct SampleVMStat {
uint64_t total_block_mem_MB; /* size of total sampled pages in MB */
} SampleVMStat;
-typedef struct VcpuStat {
- int nvcpu; /* number of vcpu */
- DirtyRateVcpu *rates; /* array of dirty rate for each vcpu */
-} VcpuStat;
-
/*
* Store calculation statistics for each measure.
*/
diff --git a/migration/migration.c b/migration/migration.c
index 78f5057373..e03f698a3c 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -48,6 +48,7 @@
#include "trace.h"
#include "exec/target_page.h"
#include "io/channel-buffer.h"
+#include "io/channel-tls.h"
#include "migration/colo.h"
#include "hw/boards.h"
#include "hw/qdev-properties.h"
@@ -215,9 +216,11 @@ void migration_object_init(void)
current_incoming->postcopy_remote_fds =
g_array_new(FALSE, TRUE, sizeof(struct PostCopyFD));
qemu_mutex_init(&current_incoming->rp_mutex);
+ qemu_mutex_init(&current_incoming->postcopy_prio_thread_mutex);
qemu_event_init(&current_incoming->main_thread_load_event, false);
qemu_sem_init(&current_incoming->postcopy_pause_sem_dst, 0);
qemu_sem_init(&current_incoming->postcopy_pause_sem_fault, 0);
+ qemu_sem_init(&current_incoming->postcopy_pause_sem_fast_load, 0);
qemu_mutex_init(&current_incoming->page_request_mutex);
current_incoming->page_requested = g_tree_new(page_request_addr_cmp);
@@ -321,6 +324,12 @@ void migration_incoming_state_destroy(void)
mis->page_requested = NULL;
}
+ if (mis->postcopy_qemufile_dst) {
+ migration_ioc_unregister_yank_from_file(mis->postcopy_qemufile_dst);
+ qemu_fclose(mis->postcopy_qemufile_dst);
+ mis->postcopy_qemufile_dst = NULL;
+ }
+
yank_unregister_instance(MIGRATION_YANK_INSTANCE);
}
@@ -691,9 +700,9 @@ static bool postcopy_try_recover(void)
/*
* Here, we only wake up the main loading thread (while the
- * fault thread will still be waiting), so that we can receive
+ * rest threads will still be waiting), so that we can receive
* commands from source now, and answer it if needed. The
- * fault thread will be woken up afterwards until we are sure
+ * rest threads will be woken up afterwards until we are sure
* that source is ready to reply to page requests.
*/
qemu_sem_post(&mis->postcopy_pause_sem_dst);
@@ -714,15 +723,21 @@ void migration_fd_process_incoming(QEMUFile *f, Error **errp)
migration_incoming_process();
}
+static bool migration_needs_multiple_sockets(void)
+{
+ return migrate_use_multifd() || migrate_postcopy_preempt();
+}
+
void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
{
MigrationIncomingState *mis = migration_incoming_get_current();
Error *local_err = NULL;
bool start_migration;
+ QEMUFile *f;
if (!mis->from_src_file) {
/* The first connection (multifd may have multiple) */
- QEMUFile *f = qemu_file_new_input(ioc);
+ f = qemu_file_new_input(ioc);
if (!migration_incoming_setup(f, errp)) {
return;
@@ -730,13 +745,19 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
/*
* Common migration only needs one channel, so we can start
- * right now. Multifd needs more than one channel, we wait.
+ * right now. Some features need more than one channel, we wait.
*/
- start_migration = !migrate_use_multifd();
+ start_migration = !migration_needs_multiple_sockets();
} else {
/* Multiple connections */
- assert(migrate_use_multifd());
- start_migration = multifd_recv_new_channel(ioc, &local_err);
+ assert(migration_needs_multiple_sockets());
+ if (migrate_use_multifd()) {
+ start_migration = multifd_recv_new_channel(ioc, &local_err);
+ } else {
+ assert(migrate_postcopy_preempt());
+ f = qemu_file_new_input(ioc);
+ start_migration = postcopy_preempt_new_channel(mis, f);
+ }
if (local_err) {
error_propagate(errp, local_err);
return;
@@ -761,11 +782,20 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
bool migration_has_all_channels(void)
{
MigrationIncomingState *mis = migration_incoming_get_current();
- bool all_channels;
- all_channels = multifd_recv_all_channels_created();
+ if (!mis->from_src_file) {
+ return false;
+ }
+
+ if (migrate_use_multifd()) {
+ return multifd_recv_all_channels_created();
+ }
+
+ if (migrate_postcopy_preempt()) {
+ return mis->postcopy_qemufile_dst != NULL;
+ }
- return all_channels && mis->from_src_file != NULL;
+ return true;
}
/*
@@ -1027,6 +1057,8 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s)
info->ram->normal_bytes = ram_counters.normal * page_size;
info->ram->mbps = s->mbps;
info->ram->dirty_sync_count = ram_counters.dirty_sync_count;
+ info->ram->dirty_sync_missed_zero_copy =
+ ram_counters.dirty_sync_missed_zero_copy;
info->ram->postcopy_requests = ram_counters.postcopy_requests;
info->ram->page_size = page_size;
info->ram->multifd_bytes = ram_counters.multifd_bytes;
@@ -1274,7 +1306,9 @@ static bool migrate_caps_check(bool *cap_list,
#ifdef CONFIG_LINUX
if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND] &&
(!cap_list[MIGRATION_CAPABILITY_MULTIFD] ||
- migrate_use_compression() ||
+ cap_list[MIGRATION_CAPABILITY_COMPRESS] ||
+ cap_list[MIGRATION_CAPABILITY_XBZRLE] ||
+ migrate_multifd_compression() ||
migrate_use_tls())) {
error_setg(errp,
"Zero copy only available for non-compressed non-TLS multifd migration");
@@ -1297,6 +1331,13 @@ static bool migrate_caps_check(bool *cap_list,
return false;
}
+ if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) {
+ if (!cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) {
+ error_setg(errp, "Postcopy preempt requires postcopy-ram");
+ return false;
+ }
+ }
+
return true;
}
@@ -1511,6 +1552,17 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp)
error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: ");
return false;
}
+
+#ifdef CONFIG_LINUX
+ if (migrate_use_zero_copy_send() &&
+ ((params->has_multifd_compression && params->multifd_compression) ||
+ (params->has_tls_creds && params->tls_creds && *params->tls_creds))) {
+ error_setg(errp,
+ "Zero copy only available for non-compressed non-TLS multifd migration");
+ return false;
+ }
+#endif
+
return true;
}
@@ -1867,6 +1919,12 @@ static void migrate_fd_cleanup(MigrationState *s)
qemu_fclose(tmp);
}
+ if (s->postcopy_qemufile_src) {
+ migration_ioc_unregister_yank_from_file(s->postcopy_qemufile_src);
+ qemu_fclose(s->postcopy_qemufile_src);
+ s->postcopy_qemufile_src = NULL;
+ }
+
assert(!migration_is_active(s));
if (s->state == MIGRATION_STATUS_CANCELLING) {
@@ -2663,6 +2721,15 @@ bool migrate_background_snapshot(void)
return s->enabled_capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT];
}
+bool migrate_postcopy_preempt(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT];
+}
+
/* migration thread support */
/*
* Something bad happened to the RP stream, mark an error
@@ -3002,6 +3069,12 @@ static int postcopy_start(MigrationState *ms)
int64_t bandwidth = migrate_max_postcopy_bandwidth();
bool restart_block = false;
int cur_state = MIGRATION_STATUS_ACTIVE;
+
+ if (postcopy_preempt_wait_channel(ms)) {
+ migrate_set_state(&ms->state, ms->state, MIGRATION_STATUS_FAILED);
+ return -1;
+ }
+
if (!migrate_pause_before_switchover()) {
migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE,
MIGRATION_STATUS_POSTCOPY_ACTIVE);
@@ -3141,6 +3214,8 @@ static int postcopy_start(MigrationState *ms)
MIGRATION_STATUS_FAILED);
}
+ trace_postcopy_preempt_enabled(migrate_postcopy_preempt());
+
return ret;
fail_closefb:
@@ -3253,6 +3328,11 @@ static void migration_completion(MigrationState *s)
qemu_savevm_state_complete_postcopy(s->to_dst_file);
qemu_mutex_unlock_iothread();
+ /* Shutdown the postcopy fast path thread */
+ if (migrate_postcopy_preempt()) {
+ postcopy_preempt_shutdown_file(s);
+ }
+
trace_migration_completion_postcopy_end_after_complete();
} else {
goto fail;
@@ -3447,6 +3527,18 @@ static MigThrError postcopy_pause(MigrationState *s)
qemu_file_shutdown(file);
qemu_fclose(file);
+ /*
+ * Do the same to postcopy fast path socket too if there is. No
+ * locking needed because no racer as long as we do this before setting
+ * status to paused.
+ */
+ if (s->postcopy_qemufile_src) {
+ migration_ioc_unregister_yank_from_file(s->postcopy_qemufile_src);
+ qemu_file_shutdown(s->postcopy_qemufile_src);
+ qemu_fclose(s->postcopy_qemufile_src);
+ s->postcopy_qemufile_src = NULL;
+ }
+
migrate_set_state(&s->state, s->state,
MIGRATION_STATUS_POSTCOPY_PAUSED);
@@ -3464,6 +3556,14 @@ static MigThrError postcopy_pause(MigrationState *s)
if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
/* Woken up by a recover procedure. Give it a shot */
+ if (postcopy_preempt_wait_channel(s)) {
+ /*
+ * Preempt enabled, and new channel create failed; loop
+ * back to wait for another recovery.
+ */
+ continue;
+ }
+
/*
* Firstly, let's wake up the return path now, with a new
* return path channel.
@@ -3502,8 +3602,13 @@ static MigThrError migration_detect_error(MigrationState *s)
return MIG_THR_ERR_FATAL;
}
- /* Try to detect any file errors */
- ret = qemu_file_get_error_obj(s->to_dst_file, &local_error);
+ /*
+ * Try to detect any file errors. Note that postcopy_qemufile_src will
+ * be NULL when postcopy preempt is not enabled.
+ */
+ ret = qemu_file_get_error_obj_any(s->to_dst_file,
+ s->postcopy_qemufile_src,
+ &local_error);
if (!ret) {
/* Everything is fine */
assert(!local_error);
@@ -4141,6 +4246,15 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
}
}
+ /* This needs to be done before resuming a postcopy */
+ if (postcopy_preempt_setup(s, &local_err)) {
+ error_report_err(local_err);
+ migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
+ MIGRATION_STATUS_FAILED);
+ migrate_fd_cleanup(s);
+ return;
+ }
+
if (resume) {
/* Wakeup the main migration thread to do the recovery */
migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
@@ -4265,6 +4379,11 @@ static Property migration_properties[] = {
DEFINE_PROP_SIZE("announce-step", MigrationState,
parameters.announce_step,
DEFAULT_MIGRATE_ANNOUNCE_STEP),
+ DEFINE_PROP_BOOL("x-postcopy-preempt-break-huge", MigrationState,
+ postcopy_preempt_break_huge, true),
+ DEFINE_PROP_STRING("tls-creds", MigrationState, parameters.tls_creds),
+ DEFINE_PROP_STRING("tls-hostname", MigrationState, parameters.tls_hostname),
+ DEFINE_PROP_STRING("tls-authz", MigrationState, parameters.tls_authz),
/* Migration capabilities */
DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
@@ -4274,6 +4393,8 @@ static Property migration_properties[] = {
DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS),
DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS),
DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM),
+ DEFINE_PROP_MIG_CAP("x-postcopy-preempt",
+ MIGRATION_CAPABILITY_POSTCOPY_PREEMPT),
DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO),
DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM),
DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK),
@@ -4300,18 +4421,16 @@ static void migration_class_init(ObjectClass *klass, void *data)
static void migration_instance_finalize(Object *obj)
{
MigrationState *ms = MIGRATION_OBJ(obj);
- MigrationParameters *params = &ms->parameters;
qemu_mutex_destroy(&ms->error_mutex);
qemu_mutex_destroy(&ms->qemu_file_lock);
- g_free(params->tls_hostname);
- g_free(params->tls_creds);
qemu_sem_destroy(&ms->wait_unplug_sem);
qemu_sem_destroy(&ms->rate_limit_sem);
qemu_sem_destroy(&ms->pause_sem);
qemu_sem_destroy(&ms->postcopy_pause_sem);
qemu_sem_destroy(&ms->postcopy_pause_rp_sem);
qemu_sem_destroy(&ms->rp_state.rp_sem);
+ qemu_sem_destroy(&ms->postcopy_qemufile_src_sem);
error_free(ms->error);
}
@@ -4358,6 +4477,7 @@ static void migration_instance_init(Object *obj)
qemu_sem_init(&ms->rp_state.rp_sem, 0);
qemu_sem_init(&ms->rate_limit_sem, 0);
qemu_sem_init(&ms->wait_unplug_sem, 0);
+ qemu_sem_init(&ms->postcopy_qemufile_src_sem, 0);
qemu_mutex_init(&ms->qemu_file_lock);
}
diff --git a/migration/migration.h b/migration/migration.h
index 485d58b95f..cdad8aceaa 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -23,6 +23,7 @@
#include "io/channel-buffer.h"
#include "net/announce.h"
#include "qom/object.h"
+#include "postcopy-ram.h"
struct PostcopyBlocktimeContext;
@@ -67,7 +68,7 @@ typedef struct {
struct MigrationIncomingState {
QEMUFile *from_src_file;
/* Previously received RAM's RAMBlock pointer */
- RAMBlock *last_recv_block;
+ RAMBlock *last_recv_block[RAM_CHANNEL_MAX];
/* A hook to allow cleanup at the end of incoming migration */
void *transport_data;
void (*transport_cleanup)(void *data);
@@ -112,6 +113,23 @@ struct MigrationIncomingState {
* enabled.
*/
unsigned int postcopy_channels;
+ /* QEMUFile for postcopy only; it'll be handled by a separate thread */
+ QEMUFile *postcopy_qemufile_dst;
+ /* Postcopy priority thread is used to receive postcopy requested pages */
+ QemuThread postcopy_prio_thread;
+ bool postcopy_prio_thread_created;
+ /*
+ * Used to sync between the ram load main thread and the fast ram load
+ * thread. It protects postcopy_qemufile_dst, which is the postcopy
+ * fast channel.
+ *
+ * The ram fast load thread will take it mostly for the whole lifecycle
+ * because it needs to continuously read data from the channel, and
+ * it'll only release this mutex if postcopy is interrupted, so that
+ * the ram load main thread will take this mutex over and properly
+ * release the broken channel.
+ */
+ QemuMutex postcopy_prio_thread_mutex;
/*
* An array of temp host huge pages to be used, one for each postcopy
* channel.
@@ -141,6 +159,13 @@ struct MigrationIncomingState {
/* notify PAUSED postcopy incoming migrations to try to continue */
QemuSemaphore postcopy_pause_sem_dst;
QemuSemaphore postcopy_pause_sem_fault;
+ /*
+ * This semaphore is used to allow the ram fast load thread (only when
+ * postcopy preempt is enabled) fall into sleep when there's network
+ * interruption detected. When the recovery is done, the main load
+ * thread will kick the fast ram load thread using this semaphore.
+ */
+ QemuSemaphore postcopy_pause_sem_fast_load;
/* List of listening socket addresses */
SocketAddressList *socket_address_list;
@@ -192,6 +217,15 @@ struct MigrationState {
QEMUBH *cleanup_bh;
/* Protected by qemu_file_lock */
QEMUFile *to_dst_file;
+ /* Postcopy specific transfer channel */
+ QEMUFile *postcopy_qemufile_src;
+ /*
+ * It is posted when the preempt channel is established. Note: this is
+ * used for both the start or recover of a postcopy migration. We'll
+ * post to this sem every time a new preempt channel is created in the
+ * main thread, and we keep post() and wait() in pair.
+ */
+ QemuSemaphore postcopy_qemufile_src_sem;
QIOChannelBuffer *bioc;
/*
* Protects to_dst_file/from_dst_file pointers. We need to make sure we
@@ -306,6 +340,13 @@ struct MigrationState {
bool send_configuration;
/* Whether we send section footer during migration */
bool send_section_footer;
+ /*
+ * Whether we allow break sending huge pages when postcopy preempt is
+ * enabled. When disabled, we won't interrupt precopy within sending a
+ * host huge page, which is the old behavior of vanilla postcopy.
+ * NOTE: this parameter is ignored if postcopy preempt is not enabled.
+ */
+ bool postcopy_preempt_break_huge;
/* Needed by postcopy-pause state */
QemuSemaphore postcopy_pause_sem;
@@ -400,6 +441,7 @@ int migrate_decompress_threads(void);
bool migrate_use_events(void);
bool migrate_postcopy_blocktime(void);
bool migrate_background_snapshot(void);
+bool migrate_postcopy_preempt(void);
/* Sending on the return path - generic and then for each message type */
void migrate_send_rp_shut(MigrationIncomingState *mis,
diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c
index 3a7ae44485..18213a9513 100644
--- a/migration/multifd-zlib.c
+++ b/migration/multifd-zlib.c
@@ -27,6 +27,8 @@ struct zlib_data {
uint8_t *zbuff;
/* size of compressed buffer */
uint32_t zbuff_len;
+ /* uncompressed buffer of size qemu_target_page_size() */
+ uint8_t *buf;
};
/* Multifd zlib compression */
@@ -45,26 +47,38 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp)
{
struct zlib_data *z = g_new0(struct zlib_data, 1);
z_stream *zs = &z->zs;
+ const char *err_msg;
zs->zalloc = Z_NULL;
zs->zfree = Z_NULL;
zs->opaque = Z_NULL;
if (deflateInit(zs, migrate_multifd_zlib_level()) != Z_OK) {
- g_free(z);
- error_setg(errp, "multifd %u: deflate init failed", p->id);
- return -1;
+ err_msg = "deflate init failed";
+ goto err_free_z;
}
/* This is the maxium size of the compressed buffer */
z->zbuff_len = compressBound(MULTIFD_PACKET_SIZE);
z->zbuff = g_try_malloc(z->zbuff_len);
if (!z->zbuff) {
- deflateEnd(&z->zs);
- g_free(z);
- error_setg(errp, "multifd %u: out of memory for zbuff", p->id);
- return -1;
+ err_msg = "out of memory for zbuff";
+ goto err_deflate_end;
+ }
+ z->buf = g_try_malloc(qemu_target_page_size());
+ if (!z->buf) {
+ err_msg = "out of memory for buf";
+ goto err_free_zbuff;
}
p->data = z;
return 0;
+
+err_free_zbuff:
+ g_free(z->zbuff);
+err_deflate_end:
+ deflateEnd(&z->zs);
+err_free_z:
+ g_free(z);
+ error_setg(errp, "multifd %u: %s", p->id, err_msg);
+ return -1;
}
/**
@@ -82,6 +96,8 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp)
deflateEnd(&z->zs);
g_free(z->zbuff);
z->zbuff = NULL;
+ g_free(z->buf);
+ z->buf = NULL;
g_free(p->data);
p->data = NULL;
}
@@ -114,8 +130,14 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp)
flush = Z_SYNC_FLUSH;
}
+ /*
+ * Since the VM might be running, the page may be changing concurrently
+ * with compression. zlib does not guarantee that this is safe,
+ * therefore copy the page before calling deflate().
+ */
+ memcpy(z->buf, p->pages->block->host + p->normal[i], page_size);
zs->avail_in = page_size;
- zs->next_in = p->pages->block->host + p->normal[i];
+ zs->next_in = z->buf;
zs->avail_out = available;
zs->next_out = z->zbuff + out_size;
diff --git a/migration/multifd.c b/migration/multifd.c
index 684c014c86..586ddc9d65 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -624,6 +624,8 @@ int multifd_send_sync_main(QEMUFile *f)
if (ret < 0) {
error_report_err(err);
return -1;
+ } else if (ret == 1) {
+ dirty_sync_missed_zero_copy();
}
}
}
@@ -831,9 +833,7 @@ static bool multifd_channel_connect(MultiFDSendParams *p,
migrate_get_current()->hostname, error);
if (!error) {
- if (migrate_use_tls() &&
- !object_dynamic_cast(OBJECT(ioc),
- TYPE_QIO_CHANNEL_TLS)) {
+ if (migrate_channel_requires_tls_upgrade(ioc)) {
multifd_tls_channel_connect(p, ioc, &error);
if (!error) {
/*
diff --git a/migration/multifd.h b/migration/multifd.h
index 4d8d89e5e5..519f498643 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -65,7 +65,9 @@ typedef struct {
} MultiFDPages_t;
typedef struct {
- /* this fields are not changed once the thread is created */
+ /* Fields are only written at creating/deletion time */
+ /* No lock required for them, they are read only */
+
/* channel number */
uint8_t id;
/* channel thread name */
@@ -74,39 +76,47 @@ typedef struct {
QemuThread thread;
/* communication channel */
QIOChannel *c;
+ /* is the yank function registered */
+ bool registered_yank;
+ /* packet allocated len */
+ uint32_t packet_len;
+ /* multifd flags for sending ram */
+ int write_flags;
+
/* sem where to wait for more work */
QemuSemaphore sem;
+ /* syncs main thread and channels */
+ QemuSemaphore sem_sync;
+
/* this mutex protects the following parameters */
QemuMutex mutex;
/* is this channel thread running */
bool running;
/* should this thread finish */
bool quit;
- /* is the yank function registered */
- bool registered_yank;
+ /* multifd flags for each packet */
+ uint32_t flags;
+ /* global number of generated multifd packets */
+ uint64_t packet_num;
/* thread has work to do */
int pending_job;
- /* array of pages to sent */
+ /* array of pages to sent.
+ * The owner of 'pages' depends of 'pending_job' value:
+ * pending_job == 0 -> migration_thread can use it.
+ * pending_job != 0 -> multifd_channel can use it.
+ */
MultiFDPages_t *pages;
- /* packet allocated len */
- uint32_t packet_len;
+
+ /* thread local variables. No locking required */
+
/* pointer to the packet */
MultiFDPacket_t *packet;
- /* multifd flags for sending ram */
- int write_flags;
- /* multifd flags for each packet */
- uint32_t flags;
/* size of the next packet that contains pages */
uint32_t next_packet_size;
- /* global number of generated multifd packets */
- uint64_t packet_num;
- /* thread local variables */
/* packets sent through this channel */
uint64_t num_packets;
/* non zero pages sent through this channel */
uint64_t total_normal_pages;
- /* syncs main thread and channels */
- QemuSemaphore sem_sync;
/* buffers to send */
struct iovec *iov;
/* number of iovs used */
@@ -120,7 +130,9 @@ typedef struct {
} MultiFDSendParams;
typedef struct {
- /* this fields are not changed once the thread is created */
+ /* Fields are only written at creating/deletion time */
+ /* No lock required for them, they are read only */
+
/* channel number */
uint8_t id;
/* channel thread name */
@@ -129,31 +141,35 @@ typedef struct {
QemuThread thread;
/* communication channel */
QIOChannel *c;
+ /* packet allocated len */
+ uint32_t packet_len;
+
+ /* syncs main thread and channels */
+ QemuSemaphore sem_sync;
+
/* this mutex protects the following parameters */
QemuMutex mutex;
/* is this channel thread running */
bool running;
/* should this thread finish */
bool quit;
- /* ramblock host address */
- uint8_t *host;
- /* packet allocated len */
- uint32_t packet_len;
- /* pointer to the packet */
- MultiFDPacket_t *packet;
/* multifd flags for each packet */
uint32_t flags;
/* global number of generated multifd packets */
uint64_t packet_num;
- /* thread local variables */
+
+ /* thread local variables. No locking required */
+
+ /* pointer to the packet */
+ MultiFDPacket_t *packet;
/* size of the next packet that contains pages */
uint32_t next_packet_size;
/* packets sent through this channel */
uint64_t num_packets;
+ /* ramblock host address */
+ uint8_t *host;
/* non zero pages recv through this channel */
uint64_t total_normal_pages;
- /* syncs main thread and channels */
- QemuSemaphore sem_sync;
/* buffers to recv */
struct iovec *iov;
/* Pages that are not zero */
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index a66dd536d9..b9a37ef255 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -33,6 +33,10 @@
#include "trace.h"
#include "hw/boards.h"
#include "exec/ramblock.h"
+#include "socket.h"
+#include "qemu-file.h"
+#include "yank_functions.h"
+#include "tls.h"
/* Arbitrary limit on size of each discard command,
* keeps them around ~200 bytes
@@ -567,6 +571,11 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
{
trace_postcopy_ram_incoming_cleanup_entry();
+ if (mis->postcopy_prio_thread_created) {
+ qemu_thread_join(&mis->postcopy_prio_thread);
+ mis->postcopy_prio_thread_created = false;
+ }
+
if (mis->have_fault_thread) {
Error *local_err = NULL;
@@ -1102,8 +1111,13 @@ static int postcopy_temp_pages_setup(MigrationIncomingState *mis)
int err, i, channels;
void *temp_page;
- /* TODO: will be boosted when enable postcopy preemption */
- mis->postcopy_channels = 1;
+ if (migrate_postcopy_preempt()) {
+ /* If preemption enabled, need extra channel for urgent requests */
+ mis->postcopy_channels = RAM_CHANNEL_MAX;
+ } else {
+ /* Both precopy/postcopy on the same channel */
+ mis->postcopy_channels = 1;
+ }
channels = mis->postcopy_channels;
mis->postcopy_tmp_pages = g_malloc0_n(sizeof(PostcopyTmpPage), channels);
@@ -1170,7 +1184,7 @@ int postcopy_ram_incoming_setup(MigrationIncomingState *mis)
return -1;
}
- postcopy_thread_create(mis, &mis->fault_thread, "postcopy/fault",
+ postcopy_thread_create(mis, &mis->fault_thread, "fault-default",
postcopy_ram_fault_thread, QEMU_THREAD_JOINABLE);
mis->have_fault_thread = true;
@@ -1185,6 +1199,16 @@ int postcopy_ram_incoming_setup(MigrationIncomingState *mis)
return -1;
}
+ if (migrate_postcopy_preempt()) {
+ /*
+ * This thread needs to be created after the temp pages because
+ * it'll fetch RAM_CHANNEL_POSTCOPY PostcopyTmpPage immediately.
+ */
+ postcopy_thread_create(mis, &mis->postcopy_prio_thread, "fault-fast",
+ postcopy_preempt_thread, QEMU_THREAD_JOINABLE);
+ mis->postcopy_prio_thread_created = true;
+ }
+
trace_postcopy_ram_enable_notify();
return 0;
@@ -1514,3 +1538,159 @@ void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd)
}
}
}
+
+bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file)
+{
+ /*
+ * The new loading channel has its own threads, so it needs to be
+ * blocked too. It's by default true, just be explicit.
+ */
+ qemu_file_set_blocking(file, true);
+ mis->postcopy_qemufile_dst = file;
+ trace_postcopy_preempt_new_channel();
+
+ /* Start the migration immediately */
+ return true;
+}
+
+/*
+ * Setup the postcopy preempt channel with the IOC. If ERROR is specified,
+ * setup the error instead. This helper will free the ERROR if specified.
+ */
+static void
+postcopy_preempt_send_channel_done(MigrationState *s,
+ QIOChannel *ioc, Error *local_err)
+{
+ if (local_err) {
+ migrate_set_error(s, local_err);
+ error_free(local_err);
+ } else {
+ migration_ioc_register_yank(ioc);
+ s->postcopy_qemufile_src = qemu_file_new_output(ioc);
+ trace_postcopy_preempt_new_channel();
+ }
+
+ /*
+ * Kick the waiter in all cases. The waiter should check upon
+ * postcopy_qemufile_src to know whether it failed or not.
+ */
+ qemu_sem_post(&s->postcopy_qemufile_src_sem);
+}
+
+static void
+postcopy_preempt_tls_handshake(QIOTask *task, gpointer opaque)
+{
+ g_autoptr(QIOChannel) ioc = QIO_CHANNEL(qio_task_get_source(task));
+ MigrationState *s = opaque;
+ Error *local_err = NULL;
+
+ qio_task_propagate_error(task, &local_err);
+ postcopy_preempt_send_channel_done(s, ioc, local_err);
+}
+
+static void
+postcopy_preempt_send_channel_new(QIOTask *task, gpointer opaque)
+{
+ g_autoptr(QIOChannel) ioc = QIO_CHANNEL(qio_task_get_source(task));
+ MigrationState *s = opaque;
+ QIOChannelTLS *tioc;
+ Error *local_err = NULL;
+
+ if (qio_task_propagate_error(task, &local_err)) {
+ goto out;
+ }
+
+ if (migrate_channel_requires_tls_upgrade(ioc)) {
+ tioc = migration_tls_client_create(s, ioc, s->hostname, &local_err);
+ if (!tioc) {
+ goto out;
+ }
+ trace_postcopy_preempt_tls_handshake();
+ qio_channel_set_name(QIO_CHANNEL(tioc), "migration-tls-preempt");
+ qio_channel_tls_handshake(tioc, postcopy_preempt_tls_handshake,
+ s, NULL, NULL);
+ /* Setup the channel until TLS handshake finished */
+ return;
+ }
+
+out:
+ /* This handles both good and error cases */
+ postcopy_preempt_send_channel_done(s, ioc, local_err);
+}
+
+/* Returns 0 if channel established, -1 for error. */
+int postcopy_preempt_wait_channel(MigrationState *s)
+{
+ /* If preempt not enabled, no need to wait */
+ if (!migrate_postcopy_preempt()) {
+ return 0;
+ }
+
+ /*
+ * We need the postcopy preempt channel to be established before
+ * starting doing anything.
+ */
+ qemu_sem_wait(&s->postcopy_qemufile_src_sem);
+
+ return s->postcopy_qemufile_src ? 0 : -1;
+}
+
+int postcopy_preempt_setup(MigrationState *s, Error **errp)
+{
+ if (!migrate_postcopy_preempt()) {
+ return 0;
+ }
+
+ if (!migrate_multi_channels_is_allowed()) {
+ error_setg(errp, "Postcopy preempt is not supported as current "
+ "migration stream does not support multi-channels.");
+ return -1;
+ }
+
+ /* Kick an async task to connect */
+ socket_send_channel_create(postcopy_preempt_send_channel_new, s);
+
+ return 0;
+}
+
+static void postcopy_pause_ram_fast_load(MigrationIncomingState *mis)
+{
+ trace_postcopy_pause_fast_load();
+ qemu_mutex_unlock(&mis->postcopy_prio_thread_mutex);
+ qemu_sem_wait(&mis->postcopy_pause_sem_fast_load);
+ qemu_mutex_lock(&mis->postcopy_prio_thread_mutex);
+ trace_postcopy_pause_fast_load_continued();
+}
+
+void *postcopy_preempt_thread(void *opaque)
+{
+ MigrationIncomingState *mis = opaque;
+ int ret;
+
+ trace_postcopy_preempt_thread_entry();
+
+ rcu_register_thread();
+
+ qemu_sem_post(&mis->thread_sync_sem);
+
+ /* Sending RAM_SAVE_FLAG_EOS to terminate this thread */
+ qemu_mutex_lock(&mis->postcopy_prio_thread_mutex);
+ while (1) {
+ ret = ram_load_postcopy(mis->postcopy_qemufile_dst,
+ RAM_CHANNEL_POSTCOPY);
+ /* If error happened, go into recovery routine */
+ if (ret) {
+ postcopy_pause_ram_fast_load(mis);
+ } else {
+ /* We're done */
+ break;
+ }
+ }
+ qemu_mutex_unlock(&mis->postcopy_prio_thread_mutex);
+
+ rcu_unregister_thread();
+
+ trace_postcopy_preempt_thread_exit();
+
+ return NULL;
+}
diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h
index 07684c0e1d..6147bf7d1d 100644
--- a/migration/postcopy-ram.h
+++ b/migration/postcopy-ram.h
@@ -183,4 +183,15 @@ int postcopy_wake_shared(struct PostCopyFD *pcfd, uint64_t client_addr,
int postcopy_request_shared_page(struct PostCopyFD *pcfd, RAMBlock *rb,
uint64_t client_addr, uint64_t offset);
+/* Hard-code channels for now for postcopy preemption */
+enum PostcopyChannels {
+ RAM_CHANNEL_PRECOPY = 0,
+ RAM_CHANNEL_POSTCOPY = 1,
+ RAM_CHANNEL_MAX,
+};
+
+bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file);
+int postcopy_preempt_setup(MigrationState *s, Error **errp);
+int postcopy_preempt_wait_channel(MigrationState *s);
+
#endif
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 1e80d496b7..4f400c2e52 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -161,6 +161,33 @@ int qemu_file_get_error_obj(QEMUFile *f, Error **errp)
}
/*
+ * Get last error for either stream f1 or f2 with optional Error*.
+ * The error returned (non-zero) can be either from f1 or f2.
+ *
+ * If any of the qemufile* is NULL, then skip the check on that file.
+ *
+ * When there is no error on both qemufile, zero is returned.
+ */
+int qemu_file_get_error_obj_any(QEMUFile *f1, QEMUFile *f2, Error **errp)
+{
+ int ret = 0;
+
+ if (f1) {
+ ret = qemu_file_get_error_obj(f1, errp);
+ /* If there's already error detected, return */
+ if (ret) {
+ return ret;
+ }
+ }
+
+ if (f2) {
+ ret = qemu_file_get_error_obj(f2, errp);
+ }
+
+ return ret;
+}
+
+/*
* Set the last error for stream f with optional Error*
*/
void qemu_file_set_error_obj(QEMUFile *f, int ret, Error *err)
@@ -384,10 +411,8 @@ static ssize_t qemu_fill_buffer(QEMUFile *f)
f->total_transferred += len;
} else if (len == 0) {
qemu_file_set_error_obj(f, -EIO, local_error);
- } else if (len != -EAGAIN) {
- qemu_file_set_error_obj(f, len, local_error);
} else {
- error_free(local_error);
+ qemu_file_set_error_obj(f, len, local_error);
}
return len;
diff --git a/migration/qemu-file.h b/migration/qemu-file.h
index 96e72d8bd8..fa13d04d78 100644
--- a/migration/qemu-file.h
+++ b/migration/qemu-file.h
@@ -141,6 +141,7 @@ void qemu_file_acct_rate_limit(QEMUFile *f, int64_t len);
void qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate);
int64_t qemu_file_get_rate_limit(QEMUFile *f);
int qemu_file_get_error_obj(QEMUFile *f, Error **errp);
+int qemu_file_get_error_obj_any(QEMUFile *f1, QEMUFile *f2, Error **errp);
void qemu_file_set_error_obj(QEMUFile *f, int ret, Error *err);
void qemu_file_set_error(QEMUFile *f, int ret);
int qemu_file_shutdown(QEMUFile *f);
diff --git a/migration/ram.c b/migration/ram.c
index 01f9cc1d72..b94669ba5d 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -296,6 +296,20 @@ struct RAMSrcPageRequest {
QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
};
+typedef struct {
+ /*
+ * Cached ramblock/offset values if preempted. They're only meaningful if
+ * preempted==true below.
+ */
+ RAMBlock *ram_block;
+ unsigned long ram_page;
+ /*
+ * Whether a postcopy preemption just happened. Will be reset after
+ * precopy recovered to background migration.
+ */
+ bool preempted;
+} PostcopyPreemptState;
+
/* State of RAM for migration */
struct RAMState {
/* QEMUFile used for this migration */
@@ -350,6 +364,14 @@ struct RAMState {
/* Queue of outstanding page requests from the destination */
QemuMutex src_page_req_mutex;
QSIMPLEQ_HEAD(, RAMSrcPageRequest) src_page_requests;
+
+ /* Postcopy preemption informations */
+ PostcopyPreemptState postcopy_preempt_state;
+ /*
+ * Current channel we're using on src VM. Only valid if postcopy-preempt
+ * is enabled.
+ */
+ unsigned int postcopy_channel;
};
typedef struct RAMState RAMState;
@@ -357,6 +379,11 @@ static RAMState *ram_state;
static NotifierWithReturnList precopy_notifier_list;
+static void postcopy_preempt_reset(RAMState *rs)
+{
+ memset(&rs->postcopy_preempt_state, 0, sizeof(PostcopyPreemptState));
+}
+
/* Whether postcopy has queued requests? */
static bool postcopy_has_request(RAMState *rs)
{
@@ -407,6 +434,11 @@ static void ram_transferred_add(uint64_t bytes)
ram_counters.transferred += bytes;
}
+void dirty_sync_missed_zero_copy(void)
+{
+ ram_counters.dirty_sync_missed_zero_copy++;
+}
+
/* used by the search for pages to send */
struct PageSearchStatus {
/* Current block being searched */
@@ -415,8 +447,28 @@ struct PageSearchStatus {
unsigned long page;
/* Set once we wrap around */
bool complete_round;
- /* Whether current page is explicitly requested by postcopy */
+ /*
+ * [POSTCOPY-ONLY] Whether current page is explicitly requested by
+ * postcopy. When set, the request is "urgent" because the dest QEMU
+ * threads are waiting for us.
+ */
bool postcopy_requested;
+ /*
+ * [POSTCOPY-ONLY] The target channel to use to send current page.
+ *
+ * Note: This may _not_ match with the value in postcopy_requested
+ * above. Let's imagine the case where the postcopy request is exactly
+ * the page that we're sending in progress during precopy. In this case
+ * we'll have postcopy_requested set to true but the target channel
+ * will be the precopy channel (so that we don't split brain on that
+ * specific page since the precopy channel already contains partial of
+ * that page data).
+ *
+ * Besides that specific use case, postcopy_target_channel should
+ * always be equal to postcopy_requested, because by default we send
+ * postcopy pages via postcopy preempt channel.
+ */
+ bool postcopy_target_channel;
};
typedef struct PageSearchStatus PageSearchStatus;
@@ -468,6 +520,9 @@ static QemuCond decomp_done_cond;
static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
ram_addr_t offset, uint8_t *source_buf);
+static void postcopy_preempt_restore(RAMState *rs, PageSearchStatus *pss,
+ bool postcopy_requested);
+
static void *do_data_compress(void *opaque)
{
CompressParam *param = opaque;
@@ -1489,8 +1544,12 @@ retry:
*/
static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
{
- /* This is not a postcopy requested page */
+ /*
+ * This is not a postcopy requested page, mark it "not urgent", and use
+ * precopy channel to send it.
+ */
pss->postcopy_requested = false;
+ pss->postcopy_target_channel = RAM_CHANNEL_PRECOPY;
pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
if (pss->complete_round && pss->block == rs->last_seen_block &&
@@ -1947,6 +2006,55 @@ void ram_write_tracking_stop(void)
}
#endif /* defined(__linux__) */
+/*
+ * Check whether two addr/offset of the ramblock falls onto the same host huge
+ * page. Returns true if so, false otherwise.
+ */
+static bool offset_on_same_huge_page(RAMBlock *rb, uint64_t addr1,
+ uint64_t addr2)
+{
+ size_t page_size = qemu_ram_pagesize(rb);
+
+ addr1 = ROUND_DOWN(addr1, page_size);
+ addr2 = ROUND_DOWN(addr2, page_size);
+
+ return addr1 == addr2;
+}
+
+/*
+ * Whether a previous preempted precopy huge page contains current requested
+ * page? Returns true if so, false otherwise.
+ *
+ * This should really happen very rarely, because it means when we were sending
+ * during background migration for postcopy we're sending exactly the page that
+ * some vcpu got faulted on on dest node. When it happens, we probably don't
+ * need to do much but drop the request, because we know right after we restore
+ * the precopy stream it'll be serviced. It'll slightly affect the order of
+ * postcopy requests to be serviced (e.g. it'll be the same as we move current
+ * request to the end of the queue) but it shouldn't be a big deal. The most
+ * imporant thing is we can _never_ try to send a partial-sent huge page on the
+ * POSTCOPY channel again, otherwise that huge page will got "split brain" on
+ * two channels (PRECOPY, POSTCOPY).
+ */
+static bool postcopy_preempted_contains(RAMState *rs, RAMBlock *block,
+ ram_addr_t offset)
+{
+ PostcopyPreemptState *state = &rs->postcopy_preempt_state;
+
+ /* No preemption at all? */
+ if (!state->preempted) {
+ return false;
+ }
+
+ /* Not even the same ramblock? */
+ if (state->ram_block != block) {
+ return false;
+ }
+
+ return offset_on_same_huge_page(block, offset,
+ state->ram_page << TARGET_PAGE_BITS);
+}
+
/**
* get_queued_page: unqueue a page from the postcopy requests
*
@@ -1964,7 +2072,20 @@ static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
block = unqueue_page(rs, &offset);
- if (!block) {
+ if (block) {
+ /* See comment above postcopy_preempted_contains() */
+ if (postcopy_preempted_contains(rs, block, offset)) {
+ trace_postcopy_preempt_hit(block->idstr, offset);
+ /*
+ * If what we preempted previously was exactly what we're
+ * requesting right now, restore the preempted precopy
+ * immediately, boosting its priority as it's requested by
+ * postcopy.
+ */
+ postcopy_preempt_restore(rs, pss, true);
+ return true;
+ }
+ } else {
/*
* Poll write faults too if background snapshot is enabled; that's
* when we have vcpus got blocked by the write protected pages.
@@ -1986,7 +2107,9 @@ static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
* really rare.
*/
pss->complete_round = false;
+ /* Mark it an urgent request, meanwhile using POSTCOPY channel */
pss->postcopy_requested = true;
+ pss->postcopy_target_channel = RAM_CHANNEL_POSTCOPY;
}
return !!block;
@@ -2180,6 +2303,129 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
return ram_save_page(rs, pss);
}
+static bool postcopy_needs_preempt(RAMState *rs, PageSearchStatus *pss)
+{
+ MigrationState *ms = migrate_get_current();
+
+ /* Not enabled eager preempt? Then never do that. */
+ if (!migrate_postcopy_preempt()) {
+ return false;
+ }
+
+ /* If the user explicitly disabled breaking of huge page, skip */
+ if (!ms->postcopy_preempt_break_huge) {
+ return false;
+ }
+
+ /* If the ramblock we're sending is a small page? Never bother. */
+ if (qemu_ram_pagesize(pss->block) == TARGET_PAGE_SIZE) {
+ return false;
+ }
+
+ /* Not in postcopy at all? */
+ if (!migration_in_postcopy()) {
+ return false;
+ }
+
+ /*
+ * If we're already handling a postcopy request, don't preempt as this page
+ * has got the same high priority.
+ */
+ if (pss->postcopy_requested) {
+ return false;
+ }
+
+ /* If there's postcopy requests, then check it up! */
+ return postcopy_has_request(rs);
+}
+
+/* Returns true if we preempted precopy, false otherwise */
+static void postcopy_do_preempt(RAMState *rs, PageSearchStatus *pss)
+{
+ PostcopyPreemptState *p_state = &rs->postcopy_preempt_state;
+
+ trace_postcopy_preempt_triggered(pss->block->idstr, pss->page);
+
+ /*
+ * Time to preempt precopy. Cache current PSS into preempt state, so that
+ * after handling the postcopy pages we can recover to it. We need to do
+ * so because the dest VM will have partial of the precopy huge page kept
+ * over in its tmp huge page caches; better move on with it when we can.
+ */
+ p_state->ram_block = pss->block;
+ p_state->ram_page = pss->page;
+ p_state->preempted = true;
+}
+
+/* Whether we're preempted by a postcopy request during sending a huge page */
+static bool postcopy_preempt_triggered(RAMState *rs)
+{
+ return rs->postcopy_preempt_state.preempted;
+}
+
+static void postcopy_preempt_restore(RAMState *rs, PageSearchStatus *pss,
+ bool postcopy_requested)
+{
+ PostcopyPreemptState *state = &rs->postcopy_preempt_state;
+
+ assert(state->preempted);
+
+ pss->block = state->ram_block;
+ pss->page = state->ram_page;
+
+ /* Whether this is a postcopy request? */
+ pss->postcopy_requested = postcopy_requested;
+ /*
+ * When restoring a preempted page, the old data resides in PRECOPY
+ * slow channel, even if postcopy_requested is set. So always use
+ * PRECOPY channel here.
+ */
+ pss->postcopy_target_channel = RAM_CHANNEL_PRECOPY;
+
+ trace_postcopy_preempt_restored(pss->block->idstr, pss->page);
+
+ /* Reset preempt state, most importantly, set preempted==false */
+ postcopy_preempt_reset(rs);
+}
+
+static void postcopy_preempt_choose_channel(RAMState *rs, PageSearchStatus *pss)
+{
+ MigrationState *s = migrate_get_current();
+ unsigned int channel = pss->postcopy_target_channel;
+ QEMUFile *next;
+
+ if (channel != rs->postcopy_channel) {
+ if (channel == RAM_CHANNEL_PRECOPY) {
+ next = s->to_dst_file;
+ } else {
+ next = s->postcopy_qemufile_src;
+ }
+ /* Update and cache the current channel */
+ rs->f = next;
+ rs->postcopy_channel = channel;
+
+ /*
+ * If channel switched, reset last_sent_block since the old sent block
+ * may not be on the same channel.
+ */
+ rs->last_sent_block = NULL;
+
+ trace_postcopy_preempt_switch_channel(channel);
+ }
+
+ trace_postcopy_preempt_send_host_page(pss->block->idstr, pss->page);
+}
+
+/* We need to make sure rs->f always points to the default channel elsewhere */
+static void postcopy_preempt_reset_channel(RAMState *rs)
+{
+ if (migrate_postcopy_preempt() && migration_in_postcopy()) {
+ rs->postcopy_channel = RAM_CHANNEL_PRECOPY;
+ rs->f = migrate_get_current()->to_dst_file;
+ trace_postcopy_preempt_reset_channel();
+ }
+}
+
/**
* ram_save_host_page: save a whole host page
*
@@ -2211,7 +2457,16 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss)
return 0;
}
+ if (migrate_postcopy_preempt() && migration_in_postcopy()) {
+ postcopy_preempt_choose_channel(rs, pss);
+ }
+
do {
+ if (postcopy_needs_preempt(rs, pss)) {
+ postcopy_do_preempt(rs, pss);
+ break;
+ }
+
/* Check the pages is dirty and if it is send it */
if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
tmppages = ram_save_target_page(rs, pss);
@@ -2235,6 +2490,19 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss)
/* The offset we leave with is the min boundary of host page and block */
pss->page = MIN(pss->page, hostpage_boundary);
+ /*
+ * When with postcopy preempt mode, flush the data as soon as possible for
+ * postcopy requests, because we've already sent a whole huge page, so the
+ * dst node should already have enough resource to atomically filling in
+ * the current missing page.
+ *
+ * More importantly, when using separate postcopy channel, we must do
+ * explicit flush or it won't flush until the buffer is full.
+ */
+ if (migrate_postcopy_preempt() && pss->postcopy_requested) {
+ qemu_fflush(rs->f);
+ }
+
res = ram_save_release_protection(rs, pss, start_page);
return (res < 0 ? res : pages);
}
@@ -2276,8 +2544,17 @@ static int ram_find_and_save_block(RAMState *rs)
found = get_queued_page(rs, &pss);
if (!found) {
- /* priority queue empty, so just search for something dirty */
- found = find_dirty_block(rs, &pss, &again);
+ /*
+ * Recover previous precopy ramblock/offset if postcopy has
+ * preempted precopy. Otherwise find the next dirty bit.
+ */
+ if (postcopy_preempt_triggered(rs)) {
+ postcopy_preempt_restore(rs, &pss, false);
+ found = true;
+ } else {
+ /* priority queue empty, so just search for something dirty */
+ found = find_dirty_block(rs, &pss, &again);
+ }
}
if (found) {
@@ -2405,6 +2682,8 @@ static void ram_state_reset(RAMState *rs)
rs->last_page = 0;
rs->last_version = ram_list.version;
rs->xbzrle_enabled = false;
+ postcopy_preempt_reset(rs);
+ rs->postcopy_channel = RAM_CHANNEL_PRECOPY;
}
#define MAX_WAIT 50 /* ms, half buffered_file limit */
@@ -3048,6 +3327,8 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
}
qemu_mutex_unlock(&rs->bitmap_mutex);
+ postcopy_preempt_reset_channel(rs);
+
/*
* Must occur before EOS (or any QEMUFile operation)
* because of RDMA protocol.
@@ -3125,6 +3406,8 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
return ret;
}
+ postcopy_preempt_reset_channel(rs);
+
ret = multifd_send_sync_main(rs->f);
if (ret < 0) {
return ret;
@@ -3209,11 +3492,13 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
* @mis: the migration incoming state pointer
* @f: QEMUFile where to read the data from
* @flags: Page flags (mostly to see if it's a continuation of previous block)
+ * @channel: the channel we're using
*/
static inline RAMBlock *ram_block_from_stream(MigrationIncomingState *mis,
- QEMUFile *f, int flags)
+ QEMUFile *f, int flags,
+ int channel)
{
- RAMBlock *block = mis->last_recv_block;
+ RAMBlock *block = mis->last_recv_block[channel];
char id[256];
uint8_t len;
@@ -3240,7 +3525,7 @@ static inline RAMBlock *ram_block_from_stream(MigrationIncomingState *mis,
return NULL;
}
- mis->last_recv_block = block;
+ mis->last_recv_block[channel] = block;
return block;
}
@@ -3659,15 +3944,15 @@ int ram_postcopy_incoming_init(MigrationIncomingState *mis)
* rcu_read_lock is taken prior to this being called.
*
* @f: QEMUFile where to send the data
+ * @channel: the channel to use for loading
*/
-int ram_load_postcopy(QEMUFile *f)
+int ram_load_postcopy(QEMUFile *f, int channel)
{
int flags = 0, ret = 0;
bool place_needed = false;
bool matches_target_page_size = false;
MigrationIncomingState *mis = migration_incoming_get_current();
- /* Currently we only use channel 0. TODO: use all the channels */
- PostcopyTmpPage *tmp_page = &mis->postcopy_tmp_pages[0];
+ PostcopyTmpPage *tmp_page = &mis->postcopy_tmp_pages[channel];
while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
ram_addr_t addr;
@@ -3691,10 +3976,10 @@ int ram_load_postcopy(QEMUFile *f)
flags = addr & ~TARGET_PAGE_MASK;
addr &= TARGET_PAGE_MASK;
- trace_ram_load_postcopy_loop((uint64_t)addr, flags);
+ trace_ram_load_postcopy_loop(channel, (uint64_t)addr, flags);
if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
RAM_SAVE_FLAG_COMPRESS_PAGE)) {
- block = ram_block_from_stream(mis, f, flags);
+ block = ram_block_from_stream(mis, f, flags, channel);
if (!block) {
ret = -EINVAL;
break;
@@ -3732,10 +4017,10 @@ int ram_load_postcopy(QEMUFile *f)
} else if (tmp_page->host_addr !=
host_page_from_ram_block_offset(block, addr)) {
/* not the 1st TP within the HP */
- error_report("Non-same host page detected. "
+ error_report("Non-same host page detected on channel %d: "
"Target host page %p, received host page %p "
"(rb %s offset 0x"RAM_ADDR_FMT" target_pages %d)",
- tmp_page->host_addr,
+ channel, tmp_page->host_addr,
host_page_from_ram_block_offset(block, addr),
block->idstr, addr, tmp_page->target_pages);
ret = -EINVAL;
@@ -3945,7 +4230,8 @@ static int ram_load_precopy(QEMUFile *f)
if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
- RAMBlock *block = ram_block_from_stream(mis, f, flags);
+ RAMBlock *block = ram_block_from_stream(mis, f, flags,
+ RAM_CHANNEL_PRECOPY);
host = host_from_ram_block_offset(block, addr);
/*
@@ -4122,7 +4408,12 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
*/
WITH_RCU_READ_LOCK_GUARD() {
if (postcopy_running) {
- ret = ram_load_postcopy(f);
+ /*
+ * Note! Here RAM_CHANNEL_PRECOPY is the precopy channel of
+ * postcopy migration, we have another RAM_CHANNEL_POSTCOPY to
+ * service fast page faults.
+ */
+ ret = ram_load_postcopy(f, RAM_CHANNEL_PRECOPY);
} else {
ret = ram_load_precopy(f);
}
@@ -4284,6 +4575,12 @@ static int ram_resume_prepare(MigrationState *s, void *opaque)
return 0;
}
+void postcopy_preempt_shutdown_file(MigrationState *s)
+{
+ qemu_put_be64(s->postcopy_qemufile_src, RAM_SAVE_FLAG_EOS);
+ qemu_fflush(s->postcopy_qemufile_src);
+}
+
static SaveVMHandlers savevm_ram_handlers = {
.save_setup = ram_save_setup,
.save_live_iterate = ram_save_iterate,
diff --git a/migration/ram.h b/migration/ram.h
index ded0a3a086..c7af65ac74 100644
--- a/migration/ram.h
+++ b/migration/ram.h
@@ -61,7 +61,7 @@ void ram_postcopy_send_discard_bitmap(MigrationState *ms);
/* For incoming postcopy discard */
int ram_discard_range(const char *block_name, uint64_t start, size_t length);
int ram_postcopy_incoming_init(MigrationIncomingState *mis);
-int ram_load_postcopy(QEMUFile *f);
+int ram_load_postcopy(QEMUFile *f, int channel);
void ram_handle_compressed(void *host, uint8_t ch, uint64_t size);
@@ -73,6 +73,8 @@ int64_t ramblock_recv_bitmap_send(QEMUFile *file,
const char *block_name);
int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *rb);
bool ramblock_page_is_discarded(RAMBlock *rb, ram_addr_t start);
+void postcopy_preempt_shutdown_file(MigrationState *s);
+void *postcopy_preempt_thread(void *opaque);
/* ram cache */
int colo_init_ram_cache(void);
@@ -87,4 +89,6 @@ void ram_write_tracking_prepare(void);
int ram_write_tracking_start(void);
void ram_write_tracking_stop(void);
+void dirty_sync_missed_zero_copy(void);
+
#endif
diff --git a/migration/savevm.c b/migration/savevm.c
index e8a1b96fcd..48e85c052c 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -2117,6 +2117,13 @@ static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
*/
qemu_sem_post(&mis->postcopy_pause_sem_fault);
+ if (migrate_postcopy_preempt()) {
+ /* The channel should already be setup again; make sure of it */
+ assert(mis->postcopy_qemufile_dst);
+ /* Kick the fast ram load thread too */
+ qemu_sem_post(&mis->postcopy_pause_sem_fast_load);
+ }
+
return 0;
}
@@ -2540,16 +2547,6 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis)
{
int i;
- /*
- * If network is interrupted, any temp page we received will be useless
- * because we didn't mark them as "received" in receivedmap. After a
- * proper recovery later (which will sync src dirty bitmap with receivedmap
- * on dest) these cached small pages will be resent again.
- */
- for (i = 0; i < mis->postcopy_channels; i++) {
- postcopy_temp_page_reset(&mis->postcopy_tmp_pages[i]);
- }
-
trace_postcopy_pause_incoming();
assert(migrate_postcopy_ram());
@@ -2572,12 +2569,37 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis)
mis->to_src_file = NULL;
qemu_mutex_unlock(&mis->rp_mutex);
+ /*
+ * NOTE: this must happen before reset the PostcopyTmpPages below,
+ * otherwise it's racy to reset those fields when the fast load thread
+ * can be accessing it in parallel.
+ */
+ if (mis->postcopy_qemufile_dst) {
+ qemu_file_shutdown(mis->postcopy_qemufile_dst);
+ /* Take the mutex to make sure the fast ram load thread halted */
+ qemu_mutex_lock(&mis->postcopy_prio_thread_mutex);
+ migration_ioc_unregister_yank_from_file(mis->postcopy_qemufile_dst);
+ qemu_fclose(mis->postcopy_qemufile_dst);
+ mis->postcopy_qemufile_dst = NULL;
+ qemu_mutex_unlock(&mis->postcopy_prio_thread_mutex);
+ }
+
migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
MIGRATION_STATUS_POSTCOPY_PAUSED);
/* Notify the fault thread for the invalidated file handle */
postcopy_fault_thread_notify(mis);
+ /*
+ * If network is interrupted, any temp page we received will be useless
+ * because we didn't mark them as "received" in receivedmap. After a
+ * proper recovery later (which will sync src dirty bitmap with receivedmap
+ * on dest) these cached small pages will be resent again.
+ */
+ for (i = 0; i < mis->postcopy_channels; i++) {
+ postcopy_temp_page_reset(&mis->postcopy_tmp_pages[i]);
+ }
+
error_report("Detected IO failure for postcopy. "
"Migration paused.");
@@ -2599,8 +2621,8 @@ retry:
while (true) {
section_type = qemu_get_byte(f);
- if (qemu_file_get_error(f)) {
- ret = qemu_file_get_error(f);
+ ret = qemu_file_get_error_obj_any(f, mis->postcopy_qemufile_dst, NULL);
+ if (ret) {
break;
}
diff --git a/migration/socket.c b/migration/socket.c
index 4fd5e85f50..e6fdf3c5e1 100644
--- a/migration/socket.c
+++ b/migration/socket.c
@@ -26,7 +26,7 @@
#include "io/channel-socket.h"
#include "io/net-listener.h"
#include "trace.h"
-
+#include "postcopy-ram.h"
struct SocketOutgoingArgs {
SocketAddress *saddr;
@@ -39,6 +39,24 @@ void socket_send_channel_create(QIOTaskFunc f, void *data)
f, data, NULL, NULL);
}
+QIOChannel *socket_send_channel_create_sync(Error **errp)
+{
+ QIOChannelSocket *sioc = qio_channel_socket_new();
+
+ if (!outgoing_args.saddr) {
+ object_unref(OBJECT(sioc));
+ error_setg(errp, "Initial sock address not set!");
+ return NULL;
+ }
+
+ if (qio_channel_socket_connect_sync(sioc, outgoing_args.saddr, errp) < 0) {
+ object_unref(OBJECT(sioc));
+ return NULL;
+ }
+
+ return QIO_CHANNEL(sioc);
+}
+
int socket_send_channel_destroy(QIOChannel *send)
{
/* Remove channel */
@@ -166,6 +184,8 @@ socket_start_incoming_migration_internal(SocketAddress *saddr,
if (migrate_use_multifd()) {
num = migrate_multifd_channels();
+ } else if (migrate_postcopy_preempt()) {
+ num = RAM_CHANNEL_MAX;
}
if (qio_net_listener_open_sync(listener, saddr, num, errp) < 0) {
diff --git a/migration/socket.h b/migration/socket.h
index 891dbccceb..dc54df4e6c 100644
--- a/migration/socket.h
+++ b/migration/socket.h
@@ -21,6 +21,7 @@
#include "io/task.h"
void socket_send_channel_create(QIOTaskFunc f, void *data);
+QIOChannel *socket_send_channel_create_sync(Error **errp);
int socket_send_channel_destroy(QIOChannel *send);
void socket_start_incoming_migration(const char *str, Error **errp);
diff --git a/migration/tls.c b/migration/tls.c
index 32c384a8b6..73e8c9d3c2 100644
--- a/migration/tls.c
+++ b/migration/tls.c
@@ -166,3 +166,12 @@ void migration_tls_channel_connect(MigrationState *s,
NULL,
NULL);
}
+
+bool migrate_channel_requires_tls_upgrade(QIOChannel *ioc)
+{
+ if (!migrate_use_tls()) {
+ return false;
+ }
+
+ return !object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS);
+}
diff --git a/migration/tls.h b/migration/tls.h
index de4fe2cafd..98e23c9b0e 100644
--- a/migration/tls.h
+++ b/migration/tls.h
@@ -37,4 +37,8 @@ void migration_tls_channel_connect(MigrationState *s,
QIOChannel *ioc,
const char *hostname,
Error **errp);
+
+/* Whether the QIO channel requires further TLS handshake? */
+bool migrate_channel_requires_tls_upgrade(QIOChannel *ioc);
+
#endif
diff --git a/migration/trace-events b/migration/trace-events
index 1aec580e92..a34afe7b85 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -91,7 +91,7 @@ migration_bitmap_clear_dirty(char *str, uint64_t start, uint64_t size, unsigned
migration_throttle(void) ""
ram_discard_range(const char *rbname, uint64_t start, size_t len) "%s: start: %" PRIx64 " %zx"
ram_load_loop(const char *rbname, uint64_t addr, int flags, void *host) "%s: addr: 0x%" PRIx64 " flags: 0x%x host: %p"
-ram_load_postcopy_loop(uint64_t addr, int flags) "@%" PRIx64 " %x"
+ram_load_postcopy_loop(int channel, uint64_t addr, int flags) "chan=%d addr=0x%" PRIx64 " flags=0x%x"
ram_postcopy_send_discard_bitmap(void) ""
ram_save_page(const char *rbname, uint64_t offset, void *host) "%s: offset: 0x%" PRIx64 " host: %p"
ram_save_queue_pages(const char *rbname, size_t start, size_t len) "%s: start: 0x%zx len: 0x%zx"
@@ -111,6 +111,12 @@ ram_load_complete(int ret, uint64_t seq_iter) "exit_code %d seq iteration %" PRI
ram_write_tracking_ramblock_start(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu"
ram_write_tracking_ramblock_stop(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu"
unqueue_page(char *block, uint64_t offset, bool dirty) "ramblock '%s' offset 0x%"PRIx64" dirty %d"
+postcopy_preempt_triggered(char *str, unsigned long page) "during sending ramblock %s offset 0x%lx"
+postcopy_preempt_restored(char *str, unsigned long page) "ramblock %s offset 0x%lx"
+postcopy_preempt_hit(char *str, uint64_t offset) "ramblock %s offset 0x%"PRIx64
+postcopy_preempt_send_host_page(char *str, uint64_t offset) "ramblock %s offset 0x%"PRIx64
+postcopy_preempt_switch_channel(int channel) "%d"
+postcopy_preempt_reset_channel(void) ""
# multifd.c
multifd_new_send_channel_async(uint8_t id) "channel %u"
@@ -176,6 +182,7 @@ migration_thread_low_pending(uint64_t pending) "%" PRIu64
migrate_transferred(uint64_t tranferred, uint64_t time_spent, uint64_t bandwidth, uint64_t size) "transferred %" PRIu64 " time_spent %" PRIu64 " bandwidth %" PRIu64 " max_size %" PRId64
process_incoming_migration_co_end(int ret, int ps) "ret=%d postcopy-state=%d"
process_incoming_migration_co_postcopy_end_main(void) ""
+postcopy_preempt_enabled(bool value) "%d"
# channel.c
migration_set_incoming_channel(void *ioc, const char *ioctype) "ioc=%p ioctype=%s"
@@ -263,6 +270,8 @@ mark_postcopy_blocktime_begin(uint64_t addr, void *dd, uint32_t time, int cpu, i
mark_postcopy_blocktime_end(uint64_t addr, void *dd, uint32_t time, int affected_cpu) "addr: 0x%" PRIx64 ", dd: %p, time: %u, affected_cpu: %d"
postcopy_pause_fault_thread(void) ""
postcopy_pause_fault_thread_continued(void) ""
+postcopy_pause_fast_load(void) ""
+postcopy_pause_fast_load_continued(void) ""
postcopy_ram_fault_thread_entry(void) ""
postcopy_ram_fault_thread_exit(void) ""
postcopy_ram_fault_thread_fds_core(int baseufd, int quitfd) "ufd: %d quitfd: %d"
@@ -278,6 +287,10 @@ postcopy_request_shared_page(const char *sharer, const char *rb, uint64_t rb_off
postcopy_request_shared_page_present(const char *sharer, const char *rb, uint64_t rb_offset) "%s already %s offset 0x%"PRIx64
postcopy_wake_shared(uint64_t client_addr, const char *rb) "at 0x%"PRIx64" in %s"
postcopy_page_req_del(void *addr, int count) "resolved page req %p total %d"
+postcopy_preempt_tls_handshake(void) ""
+postcopy_preempt_new_channel(void) ""
+postcopy_preempt_thread_entry(void) ""
+postcopy_preempt_thread_exit(void) ""
get_mem_fault_cpu_index(int cpu, uint32_t pid) "cpu: %d, pid: %u"