From 13af18f2228892d19d40ff96672677d168da7e9e Mon Sep 17 00:00:00 2001 From: Zhang Chen Date: Mon, 3 Sep 2018 12:38:48 +0800 Subject: COLO: Load dirty pages into SVM's RAM cache firstly We should not load PVM's state directly into SVM, because there maybe some errors happen when SVM is receving data, which will break SVM. We need to ensure receving all data before load the state into SVM. We use an extra memory to cache these data (PVM's ram). The ram cache in secondary side is initially the same as SVM/PVM's memory. And in the process of checkpoint, we cache the dirty pages of PVM into this ram cache firstly, so this ram cache always the same as PVM's memory at every checkpoint, then we flush this cached ram to SVM after we receive all PVM's state. Signed-off-by: zhanghailiang Signed-off-by: Li Zhijian Signed-off-by: Zhang Chen Signed-off-by: Zhang Chen Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Jason Wang --- migration/ram.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 81 insertions(+), 2 deletions(-) (limited to 'migration/ram.c') diff --git a/migration/ram.c b/migration/ram.c index bc38d98cc3..cd7a446c95 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -3447,6 +3447,20 @@ static inline void *host_from_ram_block_offset(RAMBlock *block, return block->host + offset; } +static inline void *colo_cache_from_block_offset(RAMBlock *block, + ram_addr_t offset) +{ + if (!offset_in_ramblock(block, offset)) { + return NULL; + } + if (!block->colo_cache) { + error_report("%s: colo_cache is NULL in block :%s", + __func__, block->idstr); + return NULL; + } + return block->colo_cache + offset; +} + /** * ram_handle_compressed: handle the zero page case * @@ -3651,6 +3665,58 @@ static void decompress_data_with_multi_threads(QEMUFile *f, qemu_mutex_unlock(&decomp_done_lock); } +/* + * colo cache: this is for secondary VM, we cache the whole + * memory of the secondary VM, it is need to hold the global lock + * to call this helper. + */ +int colo_init_ram_cache(void) +{ + RAMBlock *block; + + rcu_read_lock(); + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + block->colo_cache = qemu_anon_ram_alloc(block->used_length, + NULL, + false); + if (!block->colo_cache) { + error_report("%s: Can't alloc memory for COLO cache of block %s," + "size 0x" RAM_ADDR_FMT, __func__, block->idstr, + block->used_length); + goto out_locked; + } + memcpy(block->colo_cache, block->host, block->used_length); + } + rcu_read_unlock(); + return 0; + +out_locked: + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + if (block->colo_cache) { + qemu_anon_ram_free(block->colo_cache, block->used_length); + block->colo_cache = NULL; + } + } + + rcu_read_unlock(); + return -errno; +} + +/* It is need to hold the global lock to call this helper */ +void colo_release_ram_cache(void) +{ + RAMBlock *block; + + rcu_read_lock(); + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + if (block->colo_cache) { + qemu_anon_ram_free(block->colo_cache, block->used_length); + block->colo_cache = NULL; + } + } + rcu_read_unlock(); +} + /** * ram_load_setup: Setup RAM for migration incoming side * @@ -3667,6 +3733,7 @@ static int ram_load_setup(QEMUFile *f, void *opaque) xbzrle_load_setup(); ramblock_recv_map_init(); + return 0; } @@ -3687,6 +3754,7 @@ static int ram_load_cleanup(void *opaque) g_free(rb->receivedmap); rb->receivedmap = NULL; } + return 0; } @@ -3924,13 +3992,24 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) { RAMBlock *block = ram_block_from_stream(f, flags); - host = host_from_ram_block_offset(block, addr); + /* + * After going into COLO, we should load the Page into colo_cache. + */ + if (migration_incoming_in_colo_state()) { + host = colo_cache_from_block_offset(block, addr); + } else { + host = host_from_ram_block_offset(block, addr); + } if (!host) { error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); ret = -EINVAL; break; } - ramblock_recv_bitmap_set(block, host); + + if (!migration_incoming_in_colo_state()) { + ramblock_recv_bitmap_set(block, host); + } + trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host); } -- cgit v1.2.3-55-g7522 From 7d9acafa2cc094d03f46abc522786a1696983639 Mon Sep 17 00:00:00 2001 From: Zhang Chen Date: Mon, 3 Sep 2018 12:38:49 +0800 Subject: ram/COLO: Record the dirty pages that SVM received We record the address of the dirty pages that received, it will help flushing pages that cached into SVM. Here, it is a trick, we record dirty pages by re-using migration dirty bitmap. In the later patch, we will start the dirty log for SVM, just like migration, in this way, we can record both the dirty pages caused by PVM and SVM, we only flush those dirty pages from RAM cache while do checkpoint. Signed-off-by: zhanghailiang Signed-off-by: Zhang Chen Signed-off-by: Zhang Chen Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Jason Wang --- migration/ram.c | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) (limited to 'migration/ram.c') diff --git a/migration/ram.c b/migration/ram.c index cd7a446c95..404c8f0853 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -3458,6 +3458,15 @@ static inline void *colo_cache_from_block_offset(RAMBlock *block, __func__, block->idstr); return NULL; } + + /* + * During colo checkpoint, we need bitmap of these migrated pages. + * It help us to decide which pages in ram cache should be flushed + * into VM's RAM later. + */ + if (!test_and_set_bit(offset >> TARGET_PAGE_BITS, block->bmap)) { + ram_state->migration_dirty_pages++; + } return block->colo_cache + offset; } @@ -3675,7 +3684,7 @@ int colo_init_ram_cache(void) RAMBlock *block; rcu_read_lock(); - QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + RAMBLOCK_FOREACH_MIGRATABLE(block) { block->colo_cache = qemu_anon_ram_alloc(block->used_length, NULL, false); @@ -3688,10 +3697,29 @@ int colo_init_ram_cache(void) memcpy(block->colo_cache, block->host, block->used_length); } rcu_read_unlock(); + /* + * Record the dirty pages that sent by PVM, we use this dirty bitmap together + * with to decide which page in cache should be flushed into SVM's RAM. Here + * we use the same name 'ram_bitmap' as for migration. + */ + if (ram_bytes_total()) { + RAMBlock *block; + + RAMBLOCK_FOREACH_MIGRATABLE(block) { + unsigned long pages = block->max_length >> TARGET_PAGE_BITS; + + block->bmap = bitmap_new(pages); + bitmap_set(block->bmap, 0, pages); + } + } + ram_state = g_new0(RAMState, 1); + ram_state->migration_dirty_pages = 0; + return 0; out_locked: - QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + + RAMBLOCK_FOREACH_MIGRATABLE(block) { if (block->colo_cache) { qemu_anon_ram_free(block->colo_cache, block->used_length); block->colo_cache = NULL; @@ -3707,14 +3735,23 @@ void colo_release_ram_cache(void) { RAMBlock *block; + RAMBLOCK_FOREACH_MIGRATABLE(block) { + g_free(block->bmap); + block->bmap = NULL; + } + rcu_read_lock(); - QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + + RAMBLOCK_FOREACH_MIGRATABLE(block) { if (block->colo_cache) { qemu_anon_ram_free(block->colo_cache, block->used_length); block->colo_cache = NULL; } } + rcu_read_unlock(); + g_free(ram_state); + ram_state = NULL; } /** -- cgit v1.2.3-55-g7522 From e6f4aa188cf1849b2a4949e62fb04ea44ca0d083 Mon Sep 17 00:00:00 2001 From: Zhang Chen Date: Mon, 3 Sep 2018 12:38:50 +0800 Subject: COLO: Flush memory data from ram cache During the time of VM's running, PVM may dirty some pages, we will transfer PVM's dirty pages to SVM and store them into SVM's RAM cache at next checkpoint time. So, the content of SVM's RAM cache will always be same with PVM's memory after checkpoint. Instead of flushing all content of PVM's RAM cache into SVM's MEMORY, we do this in a more efficient way: Only flush any page that dirtied by PVM since last checkpoint. In this way, we can ensure SVM's memory same with PVM's. Besides, we must ensure flush RAM cache before load device state. Signed-off-by: zhanghailiang Signed-off-by: Li Zhijian Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Jason Wang --- migration/ram.c | 37 +++++++++++++++++++++++++++++++++++++ migration/trace-events | 2 ++ 2 files changed, 39 insertions(+) (limited to 'migration/ram.c') diff --git a/migration/ram.c b/migration/ram.c index 404c8f0853..477853d777 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -3974,6 +3974,39 @@ static bool postcopy_is_running(void) return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END; } +/* + * Flush content of RAM cache into SVM's memory. + * Only flush the pages that be dirtied by PVM or SVM or both. + */ +static void colo_flush_ram_cache(void) +{ + RAMBlock *block = NULL; + void *dst_host; + void *src_host; + unsigned long offset = 0; + + trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages); + rcu_read_lock(); + block = QLIST_FIRST_RCU(&ram_list.blocks); + + while (block) { + offset = migration_bitmap_find_dirty(ram_state, block, offset); + + if (offset << TARGET_PAGE_BITS >= block->used_length) { + offset = 0; + block = QLIST_NEXT_RCU(block, next); + } else { + migration_bitmap_clear_dirty(ram_state, block, offset); + dst_host = block->host + (offset << TARGET_PAGE_BITS); + src_host = block->colo_cache + (offset << TARGET_PAGE_BITS); + memcpy(dst_host, src_host, TARGET_PAGE_SIZE); + } + } + + rcu_read_unlock(); + trace_colo_flush_ram_cache_end(); +} + static int ram_load(QEMUFile *f, void *opaque, int version_id) { int flags = 0, ret = 0, invalid_flags = 0; @@ -4150,6 +4183,10 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) ret |= wait_for_decompress_done(); rcu_read_unlock(); trace_ram_load_complete(ret, seq_iter); + + if (!ret && migration_incoming_in_colo_state()) { + colo_flush_ram_cache(); + } return ret; } diff --git a/migration/trace-events b/migration/trace-events index fa0ff3f3bf..bd2d0cd25a 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -102,6 +102,8 @@ ram_dirty_bitmap_sync_start(void) "" ram_dirty_bitmap_sync_wait(void) "" ram_dirty_bitmap_sync_complete(void) "" ram_state_resume_prepare(uint64_t v) "%" PRId64 +colo_flush_ram_cache_begin(uint64_t dirty_pages) "dirty_pages %" PRIu64 +colo_flush_ram_cache_end(void) "" # migration/migration.c await_return_path_close_on_source_close(void) "" -- cgit v1.2.3-55-g7522 From d1955d22197615eb65ec7b7ddc1242e5103f5b50 Mon Sep 17 00:00:00 2001 From: zhanghailiang Date: Mon, 3 Sep 2018 12:38:55 +0800 Subject: COLO: flush host dirty ram from cache Don't need to flush all VM's ram from cache, only flush the dirty pages since last checkpoint Signed-off-by: Li Zhijian Signed-off-by: Zhang Chen Signed-off-by: Zhang Chen Signed-off-by: zhanghailiang Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Jason Wang --- migration/ram.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'migration/ram.c') diff --git a/migration/ram.c b/migration/ram.c index 477853d777..7e7deec4d8 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -3714,6 +3714,7 @@ int colo_init_ram_cache(void) } ram_state = g_new0(RAMState, 1); ram_state->migration_dirty_pages = 0; + memory_global_dirty_log_start(); return 0; @@ -3735,6 +3736,7 @@ void colo_release_ram_cache(void) { RAMBlock *block; + memory_global_dirty_log_stop(); RAMBLOCK_FOREACH_MIGRATABLE(block) { g_free(block->bmap); block->bmap = NULL; @@ -3985,6 +3987,13 @@ static void colo_flush_ram_cache(void) void *src_host; unsigned long offset = 0; + memory_global_dirty_log_sync(); + rcu_read_lock(); + RAMBLOCK_FOREACH_MIGRATABLE(block) { + migration_bitmap_sync_range(ram_state, block, 0, block->used_length); + } + rcu_read_unlock(); + trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages); rcu_read_lock(); block = QLIST_FIRST_RCU(&ram_list.blocks); -- cgit v1.2.3-55-g7522