diff options
author | Stefan Hajnoczi | 2022-11-21 15:26:18 +0100 |
---|---|---|
committer | Stefan Hajnoczi | 2022-11-21 15:26:18 +0100 |
commit | af29446f32df4f369d4ee32d721fc3c989095731 (patch) | |
tree | 48d402001ba9b7f0f7a8439359a2683bbb7536ff | |
parent | Merge tag 'chr-pull-request' of https://gitlab.com/marcandre.lureau/qemu into... (diff) | |
parent | migration: Block migration comment or code is wrong (diff) | |
download | qemu-af29446f32df4f369d4ee32d721fc3c989095731.tar.gz qemu-af29446f32df4f369d4ee32d721fc3c989095731.tar.xz qemu-af29446f32df4f369d4ee32d721fc3c989095731.zip |
Merge tag 'next-pull-request' of https://gitlab.com/juan.quintela/qemu into staging
Migration PULL request (take 3)
Hi
Drop everything that is not a bug fix:
- fixes by peter
- fix comment on block creation (me)
- fix return values from qio_channel_block()
Please, apply.
(take 1)
It includes:
- Leonardo fix for zero_copy flush
- Fiona fix for return value of readv/writev
- Peter Xu cleanups
- Peter Xu preempt patches
- Patches ready from zero page (me)
- AVX2 support (ling)
- fix for slow networking and reordering of first packets (manish)
Please, apply.
# -----BEGIN PGP SIGNATURE-----
#
# iQIzBAABCAAdFiEEGJn/jt6/WMzuA0uC9IfvGFhy1yMFAmN7dhUACgkQ9IfvGFhy
# 1yN0GhAAmpBGFomPXqOhixXcZdCOpFvLVKU13O+okp2NgY9W5Qlicf6ANo0cbvUh
# VVLCnXToySbP+7TLLqZjT4mVgM6EUIk1xqUXXICJ1mXIznvMnMtnseMNX033E2RL
# mhIVx+2AsoClWR9AdQVrzvjwR/gmzEa915w1HnHVfLFSPWmIfd9iWvOEenf5SYY5
# R7yAq0tWohOAtPiyrFAchcyTidW7pB2ZqD85ZEuGQ6EBpPxHM2NZ46NuK52j02k3
# eKGrKBFAh4QTRf5+QT0ASAGUqxPYM3iT/WOw3FZkZDQoedcReeECgDh1gfdd27iH
# Rebn+UHThgofBAspFVrJs9rSVlOnDdDp7yY1YDC6s6285Dci9JyWe0raIyvfdBK7
# h+AtBFLZVkIR0LXu4NlVe4IHnO5t/XVsLPwZ+7SQ9fc3gezAn4kAiEf+m8umTgho
# n3Jo+2dl52QoMOW2OsX9199g0lorQAby6bJVG4xbq82ijE9N1NHuLe44w9OGZTKg
# 697cNPDaoSRrvAdCPPh5KaZXsxpfLPxoMlZWxCTsNvs/jCzGs7AnvbU0QHlB+skU
# R2Ae42QBq6ZSogtN8tNZFPH82Z6xTOJNILtmMgEQGAjLf3yOd8T5gZLsYNujTOyJ
# ZsahXU0yRTkGmCkzCyr//mGu4KEPWtDOq27QqQPFfayvhr16ECw=
# =dosb
# -----END PGP SIGNATURE-----
# gpg: Signature made Mon 21 Nov 2022 07:59:01 EST
# gpg: using RSA key 1899FF8EDEBF58CCEE034B82F487EF185872D723
# gpg: Good signature from "Juan Quintela <quintela@redhat.com>" [full]
# gpg: aka "Juan Quintela <quintela@trasno.org>" [full]
# Primary key fingerprint: 1899 FF8E DEBF 58CC EE03 4B82 F487 EF18 5872 D723
* tag 'next-pull-request' of https://gitlab.com/juan.quintela/qemu:
migration: Block migration comment or code is wrong
migration: Disable multifd explicitly with compression
migration: Use non-atomic ops for clear log bitmap
migration: Disallow postcopy preempt to be used with compress
migration: Fix race on qemu_file_shutdown()
migration: Fix possible infinite loop of ram save process
migration/multifd/zero-copy: Create helper function for flushing
migration/channel-block: fix return value for qio_channel_block_{readv,writev}
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
-rw-r--r-- | include/exec/ram_addr.h | 11 | ||||
-rw-r--r-- | include/exec/ramblock.h | 3 | ||||
-rw-r--r-- | include/qemu/bitmap.h | 1 | ||||
-rw-r--r-- | migration/block.c | 4 | ||||
-rw-r--r-- | migration/channel-block.c | 6 | ||||
-rw-r--r-- | migration/migration.c | 18 | ||||
-rw-r--r-- | migration/multifd.c | 30 | ||||
-rw-r--r-- | migration/qemu-file.c | 27 | ||||
-rw-r--r-- | migration/ram.c | 27 | ||||
-rw-r--r-- | util/bitmap.c | 45 |
10 files changed, 139 insertions, 33 deletions
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index 1500680458..f4fb6a2111 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -42,7 +42,8 @@ static inline long clear_bmap_size(uint64_t pages, uint8_t shift) } /** - * clear_bmap_set: set clear bitmap for the page range + * clear_bmap_set: set clear bitmap for the page range. Must be with + * bitmap_mutex held. * * @rb: the ramblock to operate on * @start: the start page number @@ -55,12 +56,12 @@ static inline void clear_bmap_set(RAMBlock *rb, uint64_t start, { uint8_t shift = rb->clear_bmap_shift; - bitmap_set_atomic(rb->clear_bmap, start >> shift, - clear_bmap_size(npages, shift)); + bitmap_set(rb->clear_bmap, start >> shift, clear_bmap_size(npages, shift)); } /** - * clear_bmap_test_and_clear: test clear bitmap for the page, clear if set + * clear_bmap_test_and_clear: test clear bitmap for the page, clear if set. + * Must be with bitmap_mutex held. * * @rb: the ramblock to operate on * @page: the page number to check @@ -71,7 +72,7 @@ static inline bool clear_bmap_test_and_clear(RAMBlock *rb, uint64_t page) { uint8_t shift = rb->clear_bmap_shift; - return bitmap_test_and_clear_atomic(rb->clear_bmap, page >> shift, 1); + return bitmap_test_and_clear(rb->clear_bmap, page >> shift, 1); } static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset) diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h index 6cbedf9e0c..adc03df59c 100644 --- a/include/exec/ramblock.h +++ b/include/exec/ramblock.h @@ -53,6 +53,9 @@ struct RAMBlock { * and split clearing of dirty bitmap on the remote node (e.g., * KVM). The bitmap will be set only when doing global sync. * + * It is only used during src side of ram migration, and it is + * protected by the global ram_state.bitmap_mutex. + * * NOTE: this bitmap is different comparing to the other bitmaps * in that one bit can represent multiple guest pages (which is * decided by the `clear_bmap_shift' variable below). On diff --git a/include/qemu/bitmap.h b/include/qemu/bitmap.h index 82a1d2f41f..3ccb00865f 100644 --- a/include/qemu/bitmap.h +++ b/include/qemu/bitmap.h @@ -253,6 +253,7 @@ void bitmap_set(unsigned long *map, long i, long len); void bitmap_set_atomic(unsigned long *map, long i, long len); void bitmap_clear(unsigned long *map, long start, long nr); bool bitmap_test_and_clear_atomic(unsigned long *map, long start, long nr); +bool bitmap_test_and_clear(unsigned long *map, long start, long nr); void bitmap_copy_and_clear_atomic(unsigned long *dst, unsigned long *src, long nr); unsigned long bitmap_find_next_zero_area(unsigned long *map, diff --git a/migration/block.c b/migration/block.c index 3577c815a9..4347da1526 100644 --- a/migration/block.c +++ b/migration/block.c @@ -880,8 +880,8 @@ static void block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size, blk_mig_unlock(); /* Report at least one block pending during bulk phase */ - if (pending <= max_size && !block_mig_state.bulk_completed) { - pending = max_size + BLK_MIG_BLOCK_SIZE; + if (!pending && !block_mig_state.bulk_completed) { + pending = BLK_MIG_BLOCK_SIZE; } trace_migration_block_save_pending(pending); diff --git a/migration/channel-block.c b/migration/channel-block.c index c55c8c93ce..f4ab53acdb 100644 --- a/migration/channel-block.c +++ b/migration/channel-block.c @@ -62,7 +62,8 @@ qio_channel_block_readv(QIOChannel *ioc, qemu_iovec_init_external(&qiov, (struct iovec *)iov, niov); ret = bdrv_readv_vmstate(bioc->bs, &qiov, bioc->offset); if (ret < 0) { - return ret; + error_setg_errno(errp, -ret, "bdrv_readv_vmstate failed"); + return -1; } bioc->offset += qiov.size; @@ -86,7 +87,8 @@ qio_channel_block_writev(QIOChannel *ioc, qemu_iovec_init_external(&qiov, (struct iovec *)iov, niov); ret = bdrv_writev_vmstate(bioc->bs, &qiov, bioc->offset); if (ret < 0) { - return ret; + error_setg_errno(errp, -ret, "bdrv_writev_vmstate failed"); + return -1; } bioc->offset += qiov.size; diff --git a/migration/migration.c b/migration/migration.c index 739bb683f3..f485eea5fb 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1337,6 +1337,24 @@ static bool migrate_caps_check(bool *cap_list, error_setg(errp, "Postcopy preempt requires postcopy-ram"); return false; } + + /* + * Preempt mode requires urgent pages to be sent in separate + * channel, OTOH compression logic will disorder all pages into + * different compression channels, which is not compatible with the + * preempt assumptions on channel assignments. + */ + if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) { + error_setg(errp, "Postcopy preempt not compatible with compress"); + return false; + } + } + + if (cap_list[MIGRATION_CAPABILITY_MULTIFD]) { + if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) { + error_setg(errp, "Multifd is not compatible with compress"); + return false; + } } return true; diff --git a/migration/multifd.c b/migration/multifd.c index 586ddc9d65..509bbbe3bf 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -566,6 +566,23 @@ void multifd_save_cleanup(void) multifd_send_state = NULL; } +static int multifd_zero_copy_flush(QIOChannel *c) +{ + int ret; + Error *err = NULL; + + ret = qio_channel_flush(c, &err); + if (ret < 0) { + error_report_err(err); + return -1; + } + if (ret == 1) { + dirty_sync_missed_zero_copy(); + } + + return ret; +} + int multifd_send_sync_main(QEMUFile *f) { int i; @@ -616,17 +633,8 @@ int multifd_send_sync_main(QEMUFile *f) qemu_mutex_unlock(&p->mutex); qemu_sem_post(&p->sem); - if (flush_zero_copy && p->c) { - int ret; - Error *err = NULL; - - ret = qio_channel_flush(p->c, &err); - if (ret < 0) { - error_report_err(err); - return -1; - } else if (ret == 1) { - dirty_sync_missed_zero_copy(); - } + if (flush_zero_copy && p->c && (multifd_zero_copy_flush(p->c) < 0)) { + return -1; } } for (i = 0; i < migrate_multifd_channels(); i++) { diff --git a/migration/qemu-file.c b/migration/qemu-file.c index 4f400c2e52..2d5f74ffc2 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -79,6 +79,30 @@ int qemu_file_shutdown(QEMUFile *f) int ret = 0; f->shutdown = true; + + /* + * We must set qemufile error before the real shutdown(), otherwise + * there can be a race window where we thought IO all went though + * (because last_error==NULL) but actually IO has already stopped. + * + * If without correct ordering, the race can happen like this: + * + * page receiver other thread + * ------------- ------------ + * qemu_get_buffer() + * do shutdown() + * returns 0 (buffer all zero) + * (we didn't check this retcode) + * try to detect IO error + * last_error==NULL, IO okay + * install ALL-ZERO page + * set last_error + * --> guest crash! + */ + if (!f->last_error) { + qemu_file_set_error(f, -EIO); + } + if (!qio_channel_has_feature(f->ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)) { return -ENOSYS; @@ -88,9 +112,6 @@ int qemu_file_shutdown(QEMUFile *f) ret = -EIO; } - if (!f->last_error) { - qemu_file_set_error(f, -EIO); - } return ret; } diff --git a/migration/ram.c b/migration/ram.c index dc1de9ddbc..1338e47665 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2305,13 +2305,12 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss) } /* - * Do not use multifd for: - * 1. Compression as the first page in the new block should be posted out - * before sending the compressed page - * 2. In postcopy as one whole host page should be placed + * Do not use multifd in postcopy as one whole host page should be + * placed. Meanwhile postcopy requires atomic update of pages, so even + * if host page size == guest page size the dest guest during run may + * still see partially copied pages which is data corruption. */ - if (!save_page_use_compression(rs) && migrate_use_multifd() - && !migration_in_postcopy()) { + if (migrate_use_multifd() && !migration_in_postcopy()) { return ram_save_multifd_page(rs, block, offset); } @@ -2546,14 +2545,22 @@ static int ram_find_and_save_block(RAMState *rs) return pages; } + /* + * Always keep last_seen_block/last_page valid during this procedure, + * because find_dirty_block() relies on these values (e.g., we compare + * last_seen_block with pss.block to see whether we searched all the + * ramblocks) to detect the completion of migration. Having NULL value + * of last_seen_block can conditionally cause below loop to run forever. + */ + if (!rs->last_seen_block) { + rs->last_seen_block = QLIST_FIRST_RCU(&ram_list.blocks); + rs->last_page = 0; + } + pss.block = rs->last_seen_block; pss.page = rs->last_page; pss.complete_round = false; - if (!pss.block) { - pss.block = QLIST_FIRST_RCU(&ram_list.blocks); - } - do { again = true; found = get_queued_page(rs, &pss); diff --git a/util/bitmap.c b/util/bitmap.c index f81d8057a7..8d12e90a5a 100644 --- a/util/bitmap.c +++ b/util/bitmap.c @@ -240,6 +240,51 @@ void bitmap_clear(unsigned long *map, long start, long nr) } } +bool bitmap_test_and_clear(unsigned long *map, long start, long nr) +{ + unsigned long *p = map + BIT_WORD(start); + const long size = start + nr; + int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); + unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); + bool dirty = false; + + assert(start >= 0 && nr >= 0); + + /* First word */ + if (nr - bits_to_clear > 0) { + if ((*p) & mask_to_clear) { + dirty = true; + } + *p &= ~mask_to_clear; + nr -= bits_to_clear; + bits_to_clear = BITS_PER_LONG; + p++; + } + + /* Full words */ + if (bits_to_clear == BITS_PER_LONG) { + while (nr >= BITS_PER_LONG) { + if (*p) { + dirty = true; + *p = 0; + } + nr -= BITS_PER_LONG; + p++; + } + } + + /* Last word */ + if (nr) { + mask_to_clear &= BITMAP_LAST_WORD_MASK(size); + if ((*p) & mask_to_clear) { + dirty = true; + } + *p &= ~mask_to_clear; + } + + return dirty; +} + bool bitmap_test_and_clear_atomic(unsigned long *map, long start, long nr) { unsigned long *p = map + BIT_WORD(start); |