summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/exec/ram_addr.h11
-rw-r--r--include/exec/ramblock.h3
-rw-r--r--include/qemu/bitmap.h1
-rw-r--r--migration/block.c4
-rw-r--r--migration/channel-block.c6
-rw-r--r--migration/migration.c18
-rw-r--r--migration/multifd.c30
-rw-r--r--migration/qemu-file.c27
-rw-r--r--migration/ram.c27
-rw-r--r--util/bitmap.c45
10 files changed, 139 insertions, 33 deletions
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 1500680458..f4fb6a2111 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -42,7 +42,8 @@ static inline long clear_bmap_size(uint64_t pages, uint8_t shift)
}
/**
- * clear_bmap_set: set clear bitmap for the page range
+ * clear_bmap_set: set clear bitmap for the page range. Must be with
+ * bitmap_mutex held.
*
* @rb: the ramblock to operate on
* @start: the start page number
@@ -55,12 +56,12 @@ static inline void clear_bmap_set(RAMBlock *rb, uint64_t start,
{
uint8_t shift = rb->clear_bmap_shift;
- bitmap_set_atomic(rb->clear_bmap, start >> shift,
- clear_bmap_size(npages, shift));
+ bitmap_set(rb->clear_bmap, start >> shift, clear_bmap_size(npages, shift));
}
/**
- * clear_bmap_test_and_clear: test clear bitmap for the page, clear if set
+ * clear_bmap_test_and_clear: test clear bitmap for the page, clear if set.
+ * Must be with bitmap_mutex held.
*
* @rb: the ramblock to operate on
* @page: the page number to check
@@ -71,7 +72,7 @@ static inline bool clear_bmap_test_and_clear(RAMBlock *rb, uint64_t page)
{
uint8_t shift = rb->clear_bmap_shift;
- return bitmap_test_and_clear_atomic(rb->clear_bmap, page >> shift, 1);
+ return bitmap_test_and_clear(rb->clear_bmap, page >> shift, 1);
}
static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset)
diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h
index 6cbedf9e0c..adc03df59c 100644
--- a/include/exec/ramblock.h
+++ b/include/exec/ramblock.h
@@ -53,6 +53,9 @@ struct RAMBlock {
* and split clearing of dirty bitmap on the remote node (e.g.,
* KVM). The bitmap will be set only when doing global sync.
*
+ * It is only used during src side of ram migration, and it is
+ * protected by the global ram_state.bitmap_mutex.
+ *
* NOTE: this bitmap is different comparing to the other bitmaps
* in that one bit can represent multiple guest pages (which is
* decided by the `clear_bmap_shift' variable below). On
diff --git a/include/qemu/bitmap.h b/include/qemu/bitmap.h
index 82a1d2f41f..3ccb00865f 100644
--- a/include/qemu/bitmap.h
+++ b/include/qemu/bitmap.h
@@ -253,6 +253,7 @@ void bitmap_set(unsigned long *map, long i, long len);
void bitmap_set_atomic(unsigned long *map, long i, long len);
void bitmap_clear(unsigned long *map, long start, long nr);
bool bitmap_test_and_clear_atomic(unsigned long *map, long start, long nr);
+bool bitmap_test_and_clear(unsigned long *map, long start, long nr);
void bitmap_copy_and_clear_atomic(unsigned long *dst, unsigned long *src,
long nr);
unsigned long bitmap_find_next_zero_area(unsigned long *map,
diff --git a/migration/block.c b/migration/block.c
index 3577c815a9..4347da1526 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -880,8 +880,8 @@ static void block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
blk_mig_unlock();
/* Report at least one block pending during bulk phase */
- if (pending <= max_size && !block_mig_state.bulk_completed) {
- pending = max_size + BLK_MIG_BLOCK_SIZE;
+ if (!pending && !block_mig_state.bulk_completed) {
+ pending = BLK_MIG_BLOCK_SIZE;
}
trace_migration_block_save_pending(pending);
diff --git a/migration/channel-block.c b/migration/channel-block.c
index c55c8c93ce..f4ab53acdb 100644
--- a/migration/channel-block.c
+++ b/migration/channel-block.c
@@ -62,7 +62,8 @@ qio_channel_block_readv(QIOChannel *ioc,
qemu_iovec_init_external(&qiov, (struct iovec *)iov, niov);
ret = bdrv_readv_vmstate(bioc->bs, &qiov, bioc->offset);
if (ret < 0) {
- return ret;
+ error_setg_errno(errp, -ret, "bdrv_readv_vmstate failed");
+ return -1;
}
bioc->offset += qiov.size;
@@ -86,7 +87,8 @@ qio_channel_block_writev(QIOChannel *ioc,
qemu_iovec_init_external(&qiov, (struct iovec *)iov, niov);
ret = bdrv_writev_vmstate(bioc->bs, &qiov, bioc->offset);
if (ret < 0) {
- return ret;
+ error_setg_errno(errp, -ret, "bdrv_writev_vmstate failed");
+ return -1;
}
bioc->offset += qiov.size;
diff --git a/migration/migration.c b/migration/migration.c
index 739bb683f3..f485eea5fb 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1337,6 +1337,24 @@ static bool migrate_caps_check(bool *cap_list,
error_setg(errp, "Postcopy preempt requires postcopy-ram");
return false;
}
+
+ /*
+ * Preempt mode requires urgent pages to be sent in separate
+ * channel, OTOH compression logic will disorder all pages into
+ * different compression channels, which is not compatible with the
+ * preempt assumptions on channel assignments.
+ */
+ if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) {
+ error_setg(errp, "Postcopy preempt not compatible with compress");
+ return false;
+ }
+ }
+
+ if (cap_list[MIGRATION_CAPABILITY_MULTIFD]) {
+ if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) {
+ error_setg(errp, "Multifd is not compatible with compress");
+ return false;
+ }
}
return true;
diff --git a/migration/multifd.c b/migration/multifd.c
index 586ddc9d65..509bbbe3bf 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -566,6 +566,23 @@ void multifd_save_cleanup(void)
multifd_send_state = NULL;
}
+static int multifd_zero_copy_flush(QIOChannel *c)
+{
+ int ret;
+ Error *err = NULL;
+
+ ret = qio_channel_flush(c, &err);
+ if (ret < 0) {
+ error_report_err(err);
+ return -1;
+ }
+ if (ret == 1) {
+ dirty_sync_missed_zero_copy();
+ }
+
+ return ret;
+}
+
int multifd_send_sync_main(QEMUFile *f)
{
int i;
@@ -616,17 +633,8 @@ int multifd_send_sync_main(QEMUFile *f)
qemu_mutex_unlock(&p->mutex);
qemu_sem_post(&p->sem);
- if (flush_zero_copy && p->c) {
- int ret;
- Error *err = NULL;
-
- ret = qio_channel_flush(p->c, &err);
- if (ret < 0) {
- error_report_err(err);
- return -1;
- } else if (ret == 1) {
- dirty_sync_missed_zero_copy();
- }
+ if (flush_zero_copy && p->c && (multifd_zero_copy_flush(p->c) < 0)) {
+ return -1;
}
}
for (i = 0; i < migrate_multifd_channels(); i++) {
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 4f400c2e52..2d5f74ffc2 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -79,6 +79,30 @@ int qemu_file_shutdown(QEMUFile *f)
int ret = 0;
f->shutdown = true;
+
+ /*
+ * We must set qemufile error before the real shutdown(), otherwise
+ * there can be a race window where we thought IO all went though
+ * (because last_error==NULL) but actually IO has already stopped.
+ *
+ * If without correct ordering, the race can happen like this:
+ *
+ * page receiver other thread
+ * ------------- ------------
+ * qemu_get_buffer()
+ * do shutdown()
+ * returns 0 (buffer all zero)
+ * (we didn't check this retcode)
+ * try to detect IO error
+ * last_error==NULL, IO okay
+ * install ALL-ZERO page
+ * set last_error
+ * --> guest crash!
+ */
+ if (!f->last_error) {
+ qemu_file_set_error(f, -EIO);
+ }
+
if (!qio_channel_has_feature(f->ioc,
QIO_CHANNEL_FEATURE_SHUTDOWN)) {
return -ENOSYS;
@@ -88,9 +112,6 @@ int qemu_file_shutdown(QEMUFile *f)
ret = -EIO;
}
- if (!f->last_error) {
- qemu_file_set_error(f, -EIO);
- }
return ret;
}
diff --git a/migration/ram.c b/migration/ram.c
index dc1de9ddbc..1338e47665 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2305,13 +2305,12 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
}
/*
- * Do not use multifd for:
- * 1. Compression as the first page in the new block should be posted out
- * before sending the compressed page
- * 2. In postcopy as one whole host page should be placed
+ * Do not use multifd in postcopy as one whole host page should be
+ * placed. Meanwhile postcopy requires atomic update of pages, so even
+ * if host page size == guest page size the dest guest during run may
+ * still see partially copied pages which is data corruption.
*/
- if (!save_page_use_compression(rs) && migrate_use_multifd()
- && !migration_in_postcopy()) {
+ if (migrate_use_multifd() && !migration_in_postcopy()) {
return ram_save_multifd_page(rs, block, offset);
}
@@ -2546,14 +2545,22 @@ static int ram_find_and_save_block(RAMState *rs)
return pages;
}
+ /*
+ * Always keep last_seen_block/last_page valid during this procedure,
+ * because find_dirty_block() relies on these values (e.g., we compare
+ * last_seen_block with pss.block to see whether we searched all the
+ * ramblocks) to detect the completion of migration. Having NULL value
+ * of last_seen_block can conditionally cause below loop to run forever.
+ */
+ if (!rs->last_seen_block) {
+ rs->last_seen_block = QLIST_FIRST_RCU(&ram_list.blocks);
+ rs->last_page = 0;
+ }
+
pss.block = rs->last_seen_block;
pss.page = rs->last_page;
pss.complete_round = false;
- if (!pss.block) {
- pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
- }
-
do {
again = true;
found = get_queued_page(rs, &pss);
diff --git a/util/bitmap.c b/util/bitmap.c
index f81d8057a7..8d12e90a5a 100644
--- a/util/bitmap.c
+++ b/util/bitmap.c
@@ -240,6 +240,51 @@ void bitmap_clear(unsigned long *map, long start, long nr)
}
}
+bool bitmap_test_and_clear(unsigned long *map, long start, long nr)
+{
+ unsigned long *p = map + BIT_WORD(start);
+ const long size = start + nr;
+ int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
+ unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
+ bool dirty = false;
+
+ assert(start >= 0 && nr >= 0);
+
+ /* First word */
+ if (nr - bits_to_clear > 0) {
+ if ((*p) & mask_to_clear) {
+ dirty = true;
+ }
+ *p &= ~mask_to_clear;
+ nr -= bits_to_clear;
+ bits_to_clear = BITS_PER_LONG;
+ p++;
+ }
+
+ /* Full words */
+ if (bits_to_clear == BITS_PER_LONG) {
+ while (nr >= BITS_PER_LONG) {
+ if (*p) {
+ dirty = true;
+ *p = 0;
+ }
+ nr -= BITS_PER_LONG;
+ p++;
+ }
+ }
+
+ /* Last word */
+ if (nr) {
+ mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
+ if ((*p) & mask_to_clear) {
+ dirty = true;
+ }
+ *p &= ~mask_to_clear;
+ }
+
+ return dirty;
+}
+
bool bitmap_test_and_clear_atomic(unsigned long *map, long start, long nr)
{
unsigned long *p = map + BIT_WORD(start);