From 0ffcece32519e85a2971cafdc421f4fd3107d766 Mon Sep 17 00:00:00 2001 From: Zhang Chen Date: Mon, 3 Sep 2018 12:38:43 +0800 Subject: colo-compare: implement the process of checkpoint While do checkpoint, we need to flush all the unhandled packets, By using the filter notifier mechanism, we can easily to notify every compare object to do this process, which runs inside of compare threads as a coroutine. Signed-off-by: zhanghailiang Signed-off-by: Zhang Chen Signed-off-by: Zhang Chen Signed-off-by: Jason Wang --- include/migration/colo.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/migration/colo.h b/include/migration/colo.h index 2fe48ad353..fefb2fcf4c 100644 --- a/include/migration/colo.h +++ b/include/migration/colo.h @@ -16,6 +16,12 @@ #include "qemu-common.h" #include "qapi/qapi-types-migration.h" +enum colo_event { + COLO_EVENT_NONE, + COLO_EVENT_CHECKPOINT, + COLO_EVENT_FAILOVER, +}; + void colo_info_init(void); void migrate_start_colo_process(MigrationState *s); -- cgit v1.2.3-55-g7522 From aad555c2294b5de22524b7dbacc728d51cc63bcc Mon Sep 17 00:00:00 2001 From: Zhang Chen Date: Mon, 3 Sep 2018 12:38:47 +0800 Subject: COLO: Remove colo_state migration struct We need to know if migration is going into COLO state for incoming side before start normal migration. Instead by using the VMStateDescription to send colo_state from source side to destination side, we use MIG_CMD_ENABLE_COLO to indicate whether COLO is enabled or not. Signed-off-by: zhanghailiang Signed-off-by: Zhang Chen Signed-off-by: Zhang Chen Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Jason Wang --- include/migration/colo.h | 5 ++-- migration/Makefile.objs | 2 +- migration/colo-comm.c | 76 ------------------------------------------------ migration/colo.c | 13 ++++++++- migration/migration.c | 23 ++++++++++++++- migration/savevm.c | 17 +++++++++++ migration/savevm.h | 1 + migration/trace-events | 1 + vl.c | 2 -- 9 files changed, 57 insertions(+), 83 deletions(-) delete mode 100644 migration/colo-comm.c (limited to 'include') diff --git a/include/migration/colo.h b/include/migration/colo.h index fefb2fcf4c..99ce17aca7 100644 --- a/include/migration/colo.h +++ b/include/migration/colo.h @@ -28,8 +28,9 @@ void migrate_start_colo_process(MigrationState *s); bool migration_in_colo_state(void); /* loadvm */ -bool migration_incoming_enable_colo(void); -void migration_incoming_exit_colo(void); +void migration_incoming_enable_colo(void); +void migration_incoming_disable_colo(void); +bool migration_incoming_colo_enabled(void); void *colo_process_incoming_thread(void *opaque); bool migration_incoming_in_colo_state(void); diff --git a/migration/Makefile.objs b/migration/Makefile.objs index c83ec47ba8..a4f3bafd86 100644 --- a/migration/Makefile.objs +++ b/migration/Makefile.objs @@ -1,6 +1,6 @@ common-obj-y += migration.o socket.o fd.o exec.o common-obj-y += tls.o channel.o savevm.o -common-obj-y += colo-comm.o colo.o colo-failover.o +common-obj-y += colo.o colo-failover.o common-obj-y += vmstate.o vmstate-types.o page_cache.o common-obj-y += qemu-file.o global_state.o common-obj-y += qemu-file-channel.o diff --git a/migration/colo-comm.c b/migration/colo-comm.c deleted file mode 100644 index df26e4dfe7..0000000000 --- a/migration/colo-comm.c +++ /dev/null @@ -1,76 +0,0 @@ -/* - * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO) - * (a.k.a. Fault Tolerance or Continuous Replication) - * - * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. - * Copyright (c) 2016 FUJITSU LIMITED - * Copyright (c) 2016 Intel Corporation - * - * This work is licensed under the terms of the GNU GPL, version 2 or - * later. See the COPYING file in the top-level directory. - * - */ - -#include "qemu/osdep.h" -#include "migration.h" -#include "migration/colo.h" -#include "migration/vmstate.h" -#include "trace.h" - -typedef struct { - bool colo_requested; -} COLOInfo; - -static COLOInfo colo_info; - -COLOMode get_colo_mode(void) -{ - if (migration_in_colo_state()) { - return COLO_MODE_PRIMARY; - } else if (migration_incoming_in_colo_state()) { - return COLO_MODE_SECONDARY; - } else { - return COLO_MODE_UNKNOWN; - } -} - -static int colo_info_pre_save(void *opaque) -{ - COLOInfo *s = opaque; - - s->colo_requested = migrate_colo_enabled(); - - return 0; -} - -static bool colo_info_need(void *opaque) -{ - return migrate_colo_enabled(); -} - -static const VMStateDescription colo_state = { - .name = "COLOState", - .version_id = 1, - .minimum_version_id = 1, - .pre_save = colo_info_pre_save, - .needed = colo_info_need, - .fields = (VMStateField[]) { - VMSTATE_BOOL(colo_requested, COLOInfo), - VMSTATE_END_OF_LIST() - }, -}; - -void colo_info_init(void) -{ - vmstate_register(NULL, 0, &colo_state, &colo_info); -} - -bool migration_incoming_enable_colo(void) -{ - return colo_info.colo_requested; -} - -void migration_incoming_exit_colo(void) -{ - colo_info.colo_requested = false; -} diff --git a/migration/colo.c b/migration/colo.c index af04010061..d3163b51c8 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -152,6 +152,17 @@ static void primary_vm_do_failover(void) qemu_sem_post(&s->colo_exit_sem); } +COLOMode get_colo_mode(void) +{ + if (migration_in_colo_state()) { + return COLO_MODE_PRIMARY; + } else if (migration_incoming_in_colo_state()) { + return COLO_MODE_SECONDARY; + } else { + return COLO_MODE_UNKNOWN; + } +} + void colo_do_failover(MigrationState *s) { /* Make sure VM stopped while failover happened. */ @@ -746,7 +757,7 @@ out: if (mis->to_src_file) { qemu_fclose(mis->to_src_file); } - migration_incoming_exit_colo(); + migration_incoming_disable_colo(); rcu_unregister_thread(); return NULL; diff --git a/migration/migration.c b/migration/migration.c index bf5fcd1009..215e81a190 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -296,6 +296,22 @@ int migrate_send_rp_req_pages(MigrationIncomingState *mis, const char *rbname, return migrate_send_rp_message(mis, msg_type, msglen, bufc); } +static bool migration_colo_enabled; +bool migration_incoming_colo_enabled(void) +{ + return migration_colo_enabled; +} + +void migration_incoming_disable_colo(void) +{ + migration_colo_enabled = false; +} + +void migration_incoming_enable_colo(void) +{ + migration_colo_enabled = true; +} + void qemu_start_incoming_migration(const char *uri, Error **errp) { const char *p; @@ -418,7 +434,7 @@ static void process_incoming_migration_co(void *opaque) } /* we get COLO info, and know if we are in COLO mode */ - if (!ret && migration_incoming_enable_colo()) { + if (!ret && migration_incoming_colo_enabled()) { /* Make sure all file formats flush their mutable metadata */ bdrv_invalidate_cache_all(&local_err); if (local_err) { @@ -3025,6 +3041,11 @@ static void *migration_thread(void *opaque) qemu_savevm_send_postcopy_advise(s->to_dst_file); } + if (migrate_colo_enabled()) { + /* Notify migration destination that we enable COLO */ + qemu_savevm_send_colo_enable(s->to_dst_file); + } + qemu_savevm_state_setup(s->to_dst_file); s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; diff --git a/migration/savevm.c b/migration/savevm.c index 2d10e45582..09ad962a8f 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -56,6 +56,7 @@ #include "io/channel-file.h" #include "sysemu/replay.h" #include "qjson.h" +#include "migration/colo.h" #ifndef ETH_P_RARP #define ETH_P_RARP 0x8035 @@ -82,6 +83,7 @@ enum qemu_vm_cmd { were previously sent during precopy but are dirty. */ MIG_CMD_PACKAGED, /* Send a wrapped stream within this stream */ + MIG_CMD_ENABLE_COLO, /* Enable COLO */ MIG_CMD_POSTCOPY_RESUME, /* resume postcopy on dest */ MIG_CMD_RECV_BITMAP, /* Request for recved bitmap on dst */ MIG_CMD_MAX @@ -841,6 +843,12 @@ static void qemu_savevm_command_send(QEMUFile *f, qemu_fflush(f); } +void qemu_savevm_send_colo_enable(QEMUFile *f) +{ + trace_savevm_send_colo_enable(); + qemu_savevm_command_send(f, MIG_CMD_ENABLE_COLO, 0, NULL); +} + void qemu_savevm_send_ping(QEMUFile *f, uint32_t value) { uint32_t buf; @@ -1922,6 +1930,12 @@ static int loadvm_handle_recv_bitmap(MigrationIncomingState *mis, return 0; } +static int loadvm_process_enable_colo(MigrationIncomingState *mis) +{ + migration_incoming_enable_colo(); + return 0; +} + /* * Process an incoming 'QEMU_VM_COMMAND' * 0 just a normal return @@ -2001,6 +2015,9 @@ static int loadvm_process_command(QEMUFile *f) case MIG_CMD_RECV_BITMAP: return loadvm_handle_recv_bitmap(mis, len); + + case MIG_CMD_ENABLE_COLO: + return loadvm_process_enable_colo(mis); } return 0; diff --git a/migration/savevm.h b/migration/savevm.h index a5e65b8ae3..8373c2f6bd 100644 --- a/migration/savevm.h +++ b/migration/savevm.h @@ -55,6 +55,7 @@ void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name, uint16_t len, uint64_t *start_list, uint64_t *length_list); +void qemu_savevm_send_colo_enable(QEMUFile *f); int qemu_loadvm_state(QEMUFile *f); void qemu_loadvm_state_cleanup(void); diff --git a/migration/trace-events b/migration/trace-events index 9430f3cbe0..fa0ff3f3bf 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -37,6 +37,7 @@ savevm_send_ping(uint32_t val) "0x%x" savevm_send_postcopy_listen(void) "" savevm_send_postcopy_run(void) "" savevm_send_postcopy_resume(void) "" +savevm_send_colo_enable(void) "" savevm_send_recv_bitmap(char *name) "%s" savevm_state_setup(void) "" savevm_state_resume_prepare(void) "" diff --git a/vl.c b/vl.c index 4e25c78bff..ac3ed17de4 100644 --- a/vl.c +++ b/vl.c @@ -4365,8 +4365,6 @@ int main(int argc, char **argv, char **envp) #endif } - colo_info_init(); - if (net_init_clients(&err) < 0) { error_report_err(err); exit(1); -- cgit v1.2.3-55-g7522 From 13af18f2228892d19d40ff96672677d168da7e9e Mon Sep 17 00:00:00 2001 From: Zhang Chen Date: Mon, 3 Sep 2018 12:38:48 +0800 Subject: COLO: Load dirty pages into SVM's RAM cache firstly We should not load PVM's state directly into SVM, because there maybe some errors happen when SVM is receving data, which will break SVM. We need to ensure receving all data before load the state into SVM. We use an extra memory to cache these data (PVM's ram). The ram cache in secondary side is initially the same as SVM/PVM's memory. And in the process of checkpoint, we cache the dirty pages of PVM into this ram cache firstly, so this ram cache always the same as PVM's memory at every checkpoint, then we flush this cached ram to SVM after we receive all PVM's state. Signed-off-by: zhanghailiang Signed-off-by: Li Zhijian Signed-off-by: Zhang Chen Signed-off-by: Zhang Chen Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Jason Wang --- include/exec/ram_addr.h | 1 + migration/migration.c | 7 +++++ migration/ram.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++-- migration/ram.h | 4 +++ migration/savevm.c | 2 +- 5 files changed, 94 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index 3abb639056..9ecd911c3e 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -27,6 +27,7 @@ struct RAMBlock { struct rcu_head rcu; struct MemoryRegion *mr; uint8_t *host; + uint8_t *colo_cache; /* For colo, VM's ram cache */ ram_addr_t offset; ram_addr_t used_length; ram_addr_t max_length; diff --git a/migration/migration.c b/migration/migration.c index 215e81a190..7696729340 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -444,6 +444,11 @@ static void process_incoming_migration_co(void *opaque) exit(EXIT_FAILURE); } + if (colo_init_ram_cache() < 0) { + error_report("Init ram cache failed"); + exit(EXIT_FAILURE); + } + qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming", colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE); mis->have_colo_incoming_thread = true; @@ -451,6 +456,8 @@ static void process_incoming_migration_co(void *opaque) /* Wait checkpoint incoming thread exit before free resource */ qemu_thread_join(&mis->colo_incoming_thread); + /* We hold the global iothread lock, so it is safe here */ + colo_release_ram_cache(); } if (ret < 0) { diff --git a/migration/ram.c b/migration/ram.c index bc38d98cc3..cd7a446c95 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -3447,6 +3447,20 @@ static inline void *host_from_ram_block_offset(RAMBlock *block, return block->host + offset; } +static inline void *colo_cache_from_block_offset(RAMBlock *block, + ram_addr_t offset) +{ + if (!offset_in_ramblock(block, offset)) { + return NULL; + } + if (!block->colo_cache) { + error_report("%s: colo_cache is NULL in block :%s", + __func__, block->idstr); + return NULL; + } + return block->colo_cache + offset; +} + /** * ram_handle_compressed: handle the zero page case * @@ -3651,6 +3665,58 @@ static void decompress_data_with_multi_threads(QEMUFile *f, qemu_mutex_unlock(&decomp_done_lock); } +/* + * colo cache: this is for secondary VM, we cache the whole + * memory of the secondary VM, it is need to hold the global lock + * to call this helper. + */ +int colo_init_ram_cache(void) +{ + RAMBlock *block; + + rcu_read_lock(); + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + block->colo_cache = qemu_anon_ram_alloc(block->used_length, + NULL, + false); + if (!block->colo_cache) { + error_report("%s: Can't alloc memory for COLO cache of block %s," + "size 0x" RAM_ADDR_FMT, __func__, block->idstr, + block->used_length); + goto out_locked; + } + memcpy(block->colo_cache, block->host, block->used_length); + } + rcu_read_unlock(); + return 0; + +out_locked: + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + if (block->colo_cache) { + qemu_anon_ram_free(block->colo_cache, block->used_length); + block->colo_cache = NULL; + } + } + + rcu_read_unlock(); + return -errno; +} + +/* It is need to hold the global lock to call this helper */ +void colo_release_ram_cache(void) +{ + RAMBlock *block; + + rcu_read_lock(); + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + if (block->colo_cache) { + qemu_anon_ram_free(block->colo_cache, block->used_length); + block->colo_cache = NULL; + } + } + rcu_read_unlock(); +} + /** * ram_load_setup: Setup RAM for migration incoming side * @@ -3667,6 +3733,7 @@ static int ram_load_setup(QEMUFile *f, void *opaque) xbzrle_load_setup(); ramblock_recv_map_init(); + return 0; } @@ -3687,6 +3754,7 @@ static int ram_load_cleanup(void *opaque) g_free(rb->receivedmap); rb->receivedmap = NULL; } + return 0; } @@ -3924,13 +3992,24 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) { RAMBlock *block = ram_block_from_stream(f, flags); - host = host_from_ram_block_offset(block, addr); + /* + * After going into COLO, we should load the Page into colo_cache. + */ + if (migration_incoming_in_colo_state()) { + host = colo_cache_from_block_offset(block, addr); + } else { + host = host_from_ram_block_offset(block, addr); + } if (!host) { error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); ret = -EINVAL; break; } - ramblock_recv_bitmap_set(block, host); + + if (!migration_incoming_in_colo_state()) { + ramblock_recv_bitmap_set(block, host); + } + trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host); } diff --git a/migration/ram.h b/migration/ram.h index a139066846..83ff1bc11a 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -71,4 +71,8 @@ int64_t ramblock_recv_bitmap_send(QEMUFile *file, const char *block_name); int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *rb); +/* ram cache */ +int colo_init_ram_cache(void); +void colo_release_ram_cache(void); + #endif diff --git a/migration/savevm.c b/migration/savevm.c index 09ad962a8f..288b8075be 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1933,7 +1933,7 @@ static int loadvm_handle_recv_bitmap(MigrationIncomingState *mis, static int loadvm_process_enable_colo(MigrationIncomingState *mis) { migration_incoming_enable_colo(); - return 0; + return colo_init_ram_cache(); } /* -- cgit v1.2.3-55-g7522 From 5fbba3d6594aab91a26c255776b80d454682d535 Mon Sep 17 00:00:00 2001 From: Zhang Chen Date: Mon, 3 Sep 2018 12:38:56 +0800 Subject: filter: Add handle_event method for NetFilterClass Filter needs to process the event of checkpoint/failover or other event passed by COLO frame. Signed-off-by: zhanghailiang Signed-off-by: Zhang Chen Signed-off-by: Zhang Chen Signed-off-by: Jason Wang --- include/net/filter.h | 5 +++++ net/filter.c | 17 +++++++++++++++++ net/net.c | 19 +++++++++++++++++++ 3 files changed, 41 insertions(+) (limited to 'include') diff --git a/include/net/filter.h b/include/net/filter.h index 435acd6f82..49da666ac0 100644 --- a/include/net/filter.h +++ b/include/net/filter.h @@ -38,6 +38,8 @@ typedef ssize_t (FilterReceiveIOV)(NetFilterState *nc, typedef void (FilterStatusChanged) (NetFilterState *nf, Error **errp); +typedef void (FilterHandleEvent) (NetFilterState *nf, int event, Error **errp); + typedef struct NetFilterClass { ObjectClass parent_class; @@ -45,6 +47,7 @@ typedef struct NetFilterClass { FilterSetup *setup; FilterCleanup *cleanup; FilterStatusChanged *status_changed; + FilterHandleEvent *handle_event; /* mandatory */ FilterReceiveIOV *receive_iov; } NetFilterClass; @@ -77,4 +80,6 @@ ssize_t qemu_netfilter_pass_to_next(NetClientState *sender, int iovcnt, void *opaque); +void colo_notify_filters_event(int event, Error **errp); + #endif /* QEMU_NET_FILTER_H */ diff --git a/net/filter.c b/net/filter.c index 2fd7d7d663..c9f9e5fa08 100644 --- a/net/filter.c +++ b/net/filter.c @@ -17,6 +17,8 @@ #include "net/vhost_net.h" #include "qom/object_interfaces.h" #include "qemu/iov.h" +#include "net/colo.h" +#include "migration/colo.h" static inline bool qemu_can_skip_netfilter(NetFilterState *nf) { @@ -245,11 +247,26 @@ static void netfilter_finalize(Object *obj) g_free(nf->netdev_id); } +static void default_handle_event(NetFilterState *nf, int event, Error **errp) +{ + switch (event) { + case COLO_EVENT_CHECKPOINT: + break; + case COLO_EVENT_FAILOVER: + object_property_set_str(OBJECT(nf), "off", "status", errp); + break; + default: + break; + } +} + static void netfilter_class_init(ObjectClass *oc, void *data) { UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); + NetFilterClass *nfc = NETFILTER_CLASS(oc); ucc->complete = netfilter_complete; + nfc->handle_event = default_handle_event; } static const TypeInfo netfilter_info = { diff --git a/net/net.c b/net/net.c index cdcd5cf634..c66847ed76 100644 --- a/net/net.c +++ b/net/net.c @@ -1335,6 +1335,25 @@ void hmp_info_network(Monitor *mon, const QDict *qdict) } } +void colo_notify_filters_event(int event, Error **errp) +{ + NetClientState *nc; + NetFilterState *nf; + NetFilterClass *nfc = NULL; + Error *local_err = NULL; + + QTAILQ_FOREACH(nc, &net_clients, next) { + QTAILQ_FOREACH(nf, &nc->filters, next) { + nfc = NETFILTER_GET_CLASS(OBJECT(nf)); + nfc->handle_event(nf, event, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } + } +} + void qmp_set_link(const char *name, bool up, Error **errp) { NetClientState *ncs[MAX_QUEUE_NUM]; -- cgit v1.2.3-55-g7522