diff options
Diffstat (limited to 'migration/ram.c')
-rw-r--r-- | migration/ram.c | 182 |
1 files changed, 165 insertions, 17 deletions
diff --git a/migration/ram.c b/migration/ram.c index bb908822d5..680a5158aa 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -811,7 +811,7 @@ static void migration_clear_memory_region_dirty_bitmap(RAMBlock *rb, assert(shift >= 6); size = 1ULL << (TARGET_PAGE_BITS + shift); - start = (((ram_addr_t)page) << TARGET_PAGE_BITS) & (-size); + start = QEMU_ALIGN_DOWN((ram_addr_t)page << TARGET_PAGE_BITS, size); trace_migration_bitmap_clear_dirty(rb->idstr, start, size, page); memory_region_clear_dirty_bitmap(rb->mr, start, size); } @@ -858,6 +858,81 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs, return ret; } +static void dirty_bitmap_clear_section(MemoryRegionSection *section, + void *opaque) +{ + const hwaddr offset = section->offset_within_region; + const hwaddr size = int128_get64(section->size); + const unsigned long start = offset >> TARGET_PAGE_BITS; + const unsigned long npages = size >> TARGET_PAGE_BITS; + RAMBlock *rb = section->mr->ram_block; + uint64_t *cleared_bits = opaque; + + /* + * We don't grab ram_state->bitmap_mutex because we expect to run + * only when starting migration or during postcopy recovery where + * we don't have concurrent access. + */ + if (!migration_in_postcopy() && !migrate_background_snapshot()) { + migration_clear_memory_region_dirty_bitmap_range(rb, start, npages); + } + *cleared_bits += bitmap_count_one_with_offset(rb->bmap, start, npages); + bitmap_clear(rb->bmap, start, npages); +} + +/* + * Exclude all dirty pages from migration that fall into a discarded range as + * managed by a RamDiscardManager responsible for the mapped memory region of + * the RAMBlock. Clear the corresponding bits in the dirty bitmaps. + * + * Discarded pages ("logically unplugged") have undefined content and must + * not get migrated, because even reading these pages for migration might + * result in undesired behavior. + * + * Returns the number of cleared bits in the RAMBlock dirty bitmap. + * + * Note: The result is only stable while migrating (precopy/postcopy). + */ +static uint64_t ramblock_dirty_bitmap_clear_discarded_pages(RAMBlock *rb) +{ + uint64_t cleared_bits = 0; + + if (rb->mr && rb->bmap && memory_region_has_ram_discard_manager(rb->mr)) { + RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr); + MemoryRegionSection section = { + .mr = rb->mr, + .offset_within_region = 0, + .size = int128_make64(qemu_ram_get_used_length(rb)), + }; + + ram_discard_manager_replay_discarded(rdm, §ion, + dirty_bitmap_clear_section, + &cleared_bits); + } + return cleared_bits; +} + +/* + * Check if a host-page aligned page falls into a discarded range as managed by + * a RamDiscardManager responsible for the mapped memory region of the RAMBlock. + * + * Note: The result is only stable while migrating (precopy/postcopy). + */ +bool ramblock_page_is_discarded(RAMBlock *rb, ram_addr_t start) +{ + if (rb->mr && memory_region_has_ram_discard_manager(rb->mr)) { + RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr); + MemoryRegionSection section = { + .mr = rb->mr, + .offset_within_region = start, + .size = int128_make64(qemu_ram_pagesize(rb)), + }; + + return !ram_discard_manager_is_populated(rdm, §ion); + } + return false; +} + /* Called with RCU critical section */ static void ramblock_sync_dirty_bitmap(RAMState *rs, RAMBlock *rb) { @@ -1564,25 +1639,68 @@ out: return ret; } +static inline void populate_read_range(RAMBlock *block, ram_addr_t offset, + ram_addr_t size) +{ + /* + * We read one byte of each page; this will preallocate page tables if + * required and populate the shared zeropage on MAP_PRIVATE anonymous memory + * where no page was populated yet. This might require adaption when + * supporting other mappings, like shmem. + */ + for (; offset < size; offset += block->page_size) { + char tmp = *((char *)block->host + offset); + + /* Don't optimize the read out */ + asm volatile("" : "+r" (tmp)); + } +} + +static inline int populate_read_section(MemoryRegionSection *section, + void *opaque) +{ + const hwaddr size = int128_get64(section->size); + hwaddr offset = section->offset_within_region; + RAMBlock *block = section->mr->ram_block; + + populate_read_range(block, offset, size); + return 0; +} + /* - * ram_block_populate_pages: populate memory in the RAM block by reading - * an integer from the beginning of each page. + * ram_block_populate_read: preallocate page tables and populate pages in the + * RAM block by reading a byte of each page. * * Since it's solely used for userfault_fd WP feature, here we just * hardcode page size to qemu_real_host_page_size. * * @block: RAM block to populate */ -static void ram_block_populate_pages(RAMBlock *block) +static void ram_block_populate_read(RAMBlock *rb) { - char *ptr = (char *) block->host; - - for (ram_addr_t offset = 0; offset < block->used_length; - offset += qemu_real_host_page_size) { - char tmp = *(ptr + offset); - - /* Don't optimize the read out */ - asm volatile("" : "+r" (tmp)); + /* + * Skip populating all pages that fall into a discarded range as managed by + * a RamDiscardManager responsible for the mapped memory region of the + * RAMBlock. Such discarded ("logically unplugged") parts of a RAMBlock + * must not get populated automatically. We don't have to track + * modifications via userfaultfd WP reliably, because these pages will + * not be part of the migration stream either way -- see + * ramblock_dirty_bitmap_exclude_discarded_pages(). + * + * Note: The result is only stable while migrating (precopy/postcopy). + */ + if (rb->mr && memory_region_has_ram_discard_manager(rb->mr)) { + RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr); + MemoryRegionSection section = { + .mr = rb->mr, + .offset_within_region = 0, + .size = rb->mr->size, + }; + + ram_discard_manager_replay_populated(rdm, §ion, + populate_read_section, NULL); + } else { + populate_read_range(rb, 0, rb->used_length); } } @@ -1609,7 +1727,7 @@ void ram_write_tracking_prepare(void) * UFFDIO_WRITEPROTECT_MODE_WP mode setting would silently skip * pages with pte_none() entries in page table. */ - ram_block_populate_pages(block); + ram_block_populate_read(block); } } @@ -2216,7 +2334,14 @@ static void ram_save_cleanup(void *opaque) /* caller have hold iothread lock or is in a bh, so there is * no writing race against the migration bitmap */ - memory_global_dirty_log_stop(); + if (global_dirty_tracking & GLOBAL_DIRTY_MIGRATION) { + /* + * do not stop dirty log without starting it, since + * memory_global_dirty_log_stop will assert that + * memory_global_dirty_log_start/stop used in pairs + */ + memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION); + } } RAMBLOCK_FOREACH_NOT_IGNORED(block) { @@ -2668,6 +2793,19 @@ static void ram_list_init_bitmaps(void) } } +static void migration_bitmap_clear_discarded_pages(RAMState *rs) +{ + unsigned long pages; + RAMBlock *rb; + + RCU_READ_LOCK_GUARD(); + + RAMBLOCK_FOREACH_NOT_IGNORED(rb) { + pages = ramblock_dirty_bitmap_clear_discarded_pages(rb); + rs->migration_dirty_pages -= pages; + } +} + static void ram_init_bitmaps(RAMState *rs) { /* For memory_global_dirty_log_start below. */ @@ -2678,12 +2816,18 @@ static void ram_init_bitmaps(RAMState *rs) ram_list_init_bitmaps(); /* We don't use dirty log with background snapshots */ if (!migrate_background_snapshot()) { - memory_global_dirty_log_start(); + memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION); migration_bitmap_sync_precopy(rs); } } qemu_mutex_unlock_ramlist(); qemu_mutex_unlock_iothread(); + + /* + * After an eventual first bitmap sync, fixup the initial bitmap + * containing all 1s to exclude any discarded pages from migration. + */ + migration_bitmap_clear_discarded_pages(rs); } static int ram_init_all(RAMState **rsp) @@ -3434,7 +3578,7 @@ void colo_incoming_start_dirty_log(void) /* Discard this dirty bitmap record */ bitmap_zero(block->bmap, block->max_length >> TARGET_PAGE_BITS); } - memory_global_dirty_log_start(); + memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION); } ram_state->migration_dirty_pages = 0; qemu_mutex_unlock_ramlist(); @@ -3446,7 +3590,7 @@ void colo_release_ram_cache(void) { RAMBlock *block; - memory_global_dirty_log_stop(); + memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION); RAMBLOCK_FOREACH_NOT_IGNORED(block) { g_free(block->bmap); block->bmap = NULL; @@ -4112,6 +4256,10 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block) */ bitmap_complement(block->bmap, block->bmap, nbits); + /* Clear dirty bits of discarded ranges that we don't want to migrate. */ + ramblock_dirty_bitmap_clear_discarded_pages(block); + + /* We'll recalculate migration_dirty_pages in ram_state_resume_prepare(). */ trace_ram_dirty_bitmap_reload_complete(block->idstr); /* |