Merge tag 'next-pull-request' of https://gitlab.com/juan.quintela/qemu into staging

Migration PULL request (take 3) Hi Drop everything that is not a bug fix: - fixes by peter - fix comment on block creation (me) - fix return values from qio_channel_block() Please, apply. (take 1) It includes: - Leonardo fix for zero_copy flush - Fiona fix for return value of readv/writev - Peter Xu cleanups - Peter Xu preempt patches - Patches ready from zero page (me) - AVX2 support (ling) - fix for slow networking and reordering of first packets (manish) Please, apply. # -----BEGIN PGP SIGNATURE----- # # iQIzBAABCAAdFiEEGJn/jt6/WMzuA0uC9IfvGFhy1yMFAmN7dhUACgkQ9IfvGFhy # 1yN0GhAAmpBGFomPXqOhixXcZdCOpFvLVKU13O+okp2NgY9W5Qlicf6ANo0cbvUh # VVLCnXToySbP+7TLLqZjT4mVgM6EUIk1xqUXXICJ1mXIznvMnMtnseMNX033E2RL # mhIVx+2AsoClWR9AdQVrzvjwR/gmzEa915w1HnHVfLFSPWmIfd9iWvOEenf5SYY5 # R7yAq0tWohOAtPiyrFAchcyTidW7pB2ZqD85ZEuGQ6EBpPxHM2NZ46NuK52j02k3 # eKGrKBFAh4QTRf5+QT0ASAGUqxPYM3iT/WOw3FZkZDQoedcReeECgDh1gfdd27iH # Rebn+UHThgofBAspFVrJs9rSVlOnDdDp7yY1YDC6s6285Dci9JyWe0raIyvfdBK7 # h+AtBFLZVkIR0LXu4NlVe4IHnO5t/XVsLPwZ+7SQ9fc3gezAn4kAiEf+m8umTgho # n3Jo+2dl52QoMOW2OsX9199g0lorQAby6bJVG4xbq82ijE9N1NHuLe44w9OGZTKg # 697cNPDaoSRrvAdCPPh5KaZXsxpfLPxoMlZWxCTsNvs/jCzGs7AnvbU0QHlB+skU # R2Ae42QBq6ZSogtN8tNZFPH82Z6xTOJNILtmMgEQGAjLf3yOd8T5gZLsYNujTOyJ # ZsahXU0yRTkGmCkzCyr//mGu4KEPWtDOq27QqQPFfayvhr16ECw= # =dosb # -----END PGP SIGNATURE----- # gpg: Signature made Mon 21 Nov 2022 07:59:01 EST # gpg: using RSA key 1899FF8EDEBF58CCEE034B82F487EF185872D723 # gpg: Good signature from "Juan Quintela <quintela@redhat.com>" [full] # gpg: aka "Juan Quintela <quintela@trasno.org>" [full] # Primary key fingerprint: 1899 FF8E DEBF 58CC EE03 4B82 F487 EF18 5872 D723 * tag 'next-pull-request' of https://gitlab.com/juan.quintela/qemu: migration: Block migration comment or code is wrong migration: Disable multifd explicitly with compression migration: Use non-atomic ops for clear log bitmap migration: Disallow postcopy preempt to be used with compress migration: Fix race on qemu_file_shutdown() migration: Fix possible infinite loop of ram save process migration/multifd/zero-copy: Create helper function for flushing migration/channel-block: fix return value for qio_channel_block_{readv,writev} Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
author: Stefan Hajnoczi 2022-11-21 15:26:18 +0100
committer: Stefan Hajnoczi 2022-11-21 15:26:18 +0100
commit: af29446f32df4f369d4ee32d721fc3c989095731 (patch)
tree: 48d402001ba9b7f0f7a8439359a2683bbb7536ff
parent: Merge tag 'chr-pull-request' of https://gitlab.com/marcandre.lureau/qemu into... (diff)
parent: migration: Block migration comment or code is wrong (diff)
download: qemu-af29446f32df4f369d4ee32d721fc3c989095731.tar.gz
qemu-af29446f32df4f369d4ee32d721fc3c989095731.tar.xz
qemu-af29446f32df4f369d4ee32d721fc3c989095731.zip
10 files changed, 139 insertions, 33 deletions
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 1500680458..f4fb6a2111 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -42,7 +42,8 @@ static inline long clear_bmap_size(uint64_t pages, uint8_t shift)
 }
 
 /**
- * clear_bmap_set: set clear bitmap for the page range
+ * clear_bmap_set: set clear bitmap for the page range.  Must be with
+ * bitmap_mutex held.
  *
  * @rb: the ramblock to operate on
  * @start: the start page number
@@ -55,12 +56,12 @@ static inline void clear_bmap_set(RAMBlock *rb, uint64_t start,
 {
     uint8_t shift = rb->clear_bmap_shift;
 
-    bitmap_set_atomic(rb->clear_bmap, start >> shift,
-                      clear_bmap_size(npages, shift));
+    bitmap_set(rb->clear_bmap, start >> shift, clear_bmap_size(npages, shift));
 }
 
 /**
- * clear_bmap_test_and_clear: test clear bitmap for the page, clear if set
+ * clear_bmap_test_and_clear: test clear bitmap for the page, clear if set.
+ * Must be with bitmap_mutex held.
  *
  * @rb: the ramblock to operate on
  * @page: the page number to check
@@ -71,7 +72,7 @@ static inline bool clear_bmap_test_and_clear(RAMBlock *rb, uint64_t page)
 {
     uint8_t shift = rb->clear_bmap_shift;
 
-    return bitmap_test_and_clear_atomic(rb->clear_bmap, page >> shift, 1);
+    return bitmap_test_and_clear(rb->clear_bmap, page >> shift, 1);
 }
 
 static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset)
diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h
index 6cbedf9e0c..adc03df59c 100644
--- a/include/exec/ramblock.h
+++ b/include/exec/ramblock.h
@@ -53,6 +53,9 @@ struct RAMBlock {
      * and split clearing of dirty bitmap on the remote node (e.g.,
      * KVM).  The bitmap will be set only when doing global sync.
      *
+     * It is only used during src side of ram migration, and it is
+     * protected by the global ram_state.bitmap_mutex.
+     *
      * NOTE: this bitmap is different comparing to the other bitmaps
      * in that one bit can represent multiple guest pages (which is
      * decided by the `clear_bmap_shift' variable below).  On
diff --git a/include/qemu/bitmap.h b/include/qemu/bitmap.h
index 82a1d2f41f..3ccb00865f 100644
--- a/include/qemu/bitmap.h
+++ b/include/qemu/bitmap.h
@@ -253,6 +253,7 @@ void bitmap_set(unsigned long *map, long i, long len);
 void bitmap_set_atomic(unsigned long *map, long i, long len);
 void bitmap_clear(unsigned long *map, long start, long nr);
 bool bitmap_test_and_clear_atomic(unsigned long *map, long start, long nr);
+bool bitmap_test_and_clear(unsigned long *map, long start, long nr);
 void bitmap_copy_and_clear_atomic(unsigned long *dst, unsigned long *src,
                                   long nr);
 unsigned long bitmap_find_next_zero_area(unsigned long *map,
diff --git a/migration/block.c b/migration/block.c
index 3577c815a9..4347da1526 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -880,8 +880,8 @@ static void block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
     blk_mig_unlock();
 
     /* Report at least one block pending during bulk phase */
-    if (pending <= max_size && !block_mig_state.bulk_completed) {
-        pending = max_size + BLK_MIG_BLOCK_SIZE;
+    if (!pending && !block_mig_state.bulk_completed) {
+        pending = BLK_MIG_BLOCK_SIZE;
     }
 
     trace_migration_block_save_pending(pending);
diff --git a/migration/channel-block.c b/migration/channel-block.c
index c55c8c93ce..f4ab53acdb 100644
--- a/migration/channel-block.c
+++ b/migration/channel-block.c
@@ -62,7 +62,8 @@ qio_channel_block_readv(QIOChannel *ioc,
     qemu_iovec_init_external(&qiov, (struct iovec *)iov, niov);
     ret = bdrv_readv_vmstate(bioc->bs, &qiov, bioc->offset);
     if (ret < 0) {
-        return ret;
+        error_setg_errno(errp, -ret, "bdrv_readv_vmstate failed");
+        return -1;
     }
 
     bioc->offset += qiov.size;
@@ -86,7 +87,8 @@ qio_channel_block_writev(QIOChannel *ioc,
     qemu_iovec_init_external(&qiov, (struct iovec *)iov, niov);
     ret = bdrv_writev_vmstate(bioc->bs, &qiov, bioc->offset);
     if (ret < 0) {
-        return ret;
+        error_setg_errno(errp, -ret, "bdrv_writev_vmstate failed");
+        return -1;
     }
 
     bioc->offset += qiov.size;
diff --git a/migration/migration.c b/migration/migration.c
index 739bb683f3..f485eea5fb 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1337,6 +1337,24 @@ static bool migrate_caps_check(bool *cap_list,
             error_setg(errp, "Postcopy preempt requires postcopy-ram");
             return false;
         }
+
+        /*
+         * Preempt mode requires urgent pages to be sent in separate
+         * channel, OTOH compression logic will disorder all pages into
+         * different compression channels, which is not compatible with the
+         * preempt assumptions on channel assignments.
+         */
+        if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) {
+            error_setg(errp, "Postcopy preempt not compatible with compress");
+            return false;
+        }
+    }
+
+    if (cap_list[MIGRATION_CAPABILITY_MULTIFD]) {
+        if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) {
+            error_setg(errp, "Multifd is not compatible with compress");
+            return false;
+        }
     }
 
     return true;
diff --git a/migration/multifd.c b/migration/multifd.c
index 586ddc9d65..509bbbe3bf 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -566,6 +566,23 @@ void multifd_save_cleanup(void)
     multifd_send_state = NULL;
 }
 
+static int multifd_zero_copy_flush(QIOChannel *c)
+{
+    int ret;
+    Error *err = NULL;
+
+    ret = qio_channel_flush(c, &err);
+    if (ret < 0) {
+        error_report_err(err);
+        return -1;
+    }
+    if (ret == 1) {
+        dirty_sync_missed_zero_copy();
+    }
+
+    return ret;
+}
+
 int multifd_send_sync_main(QEMUFile *f)
 {
     int i;
@@ -616,17 +633,8 @@ int multifd_send_sync_main(QEMUFile *f)
         qemu_mutex_unlock(&p->mutex);
         qemu_sem_post(&p->sem);
 
-        if (flush_zero_copy && p->c) {
-            int ret;
-            Error *err = NULL;
-
-            ret = qio_channel_flush(p->c, &err);
-            if (ret < 0) {
-                error_report_err(err);
-                return -1;
-            } else if (ret == 1) {
-                dirty_sync_missed_zero_copy();
-            }
+        if (flush_zero_copy && p->c && (multifd_zero_copy_flush(p->c) < 0)) {
+            return -1;
         }
     }
     for (i = 0; i < migrate_multifd_channels(); i++) {
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 4f400c2e52..2d5f74ffc2 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -79,6 +79,30 @@ int qemu_file_shutdown(QEMUFile *f)
     int ret = 0;
 
     f->shutdown = true;
+
+    /*
+     * We must set qemufile error before the real shutdown(), otherwise
+     * there can be a race window where we thought IO all went though
+     * (because last_error==NULL) but actually IO has already stopped.
+     *
+     * If without correct ordering, the race can happen like this:
+     *
+     *      page receiver                     other thread
+     *      -------------                     ------------
+     *      qemu_get_buffer()
+     *                                        do shutdown()
+     *        returns 0 (buffer all zero)
+     *        (we didn't check this retcode)
+     *      try to detect IO error
+     *        last_error==NULL, IO okay
+     *      install ALL-ZERO page
+     *                                        set last_error
+     *      --> guest crash!
+     */
+    if (!f->last_error) {
+        qemu_file_set_error(f, -EIO);
+    }
+
     if (!qio_channel_has_feature(f->ioc,
                                  QIO_CHANNEL_FEATURE_SHUTDOWN)) {
         return -ENOSYS;
@@ -88,9 +112,6 @@ int qemu_file_shutdown(QEMUFile *f)
         ret = -EIO;
     }
 
-    if (!f->last_error) {
-        qemu_file_set_error(f, -EIO);
-    }
     return ret;
 }
 
diff --git a/migration/ram.c b/migration/ram.c
index dc1de9ddbc..1338e47665 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2305,13 +2305,12 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
     }
 
     /*
-     * Do not use multifd for:
-     * 1. Compression as the first page in the new block should be posted out
-     *    before sending the compressed page
-     * 2. In postcopy as one whole host page should be placed
+     * Do not use multifd in postcopy as one whole host page should be
+     * placed.  Meanwhile postcopy requires atomic update of pages, so even
+     * if host page size == guest page size the dest guest during run may
+     * still see partially copied pages which is data corruption.
      */
-    if (!save_page_use_compression(rs) && migrate_use_multifd()
-        && !migration_in_postcopy()) {
+    if (migrate_use_multifd() && !migration_in_postcopy()) {
         return ram_save_multifd_page(rs, block, offset);
     }
 
@@ -2546,14 +2545,22 @@ static int ram_find_and_save_block(RAMState *rs)
         return pages;
     }
 
+    /*
+     * Always keep last_seen_block/last_page valid during this procedure,
+     * because find_dirty_block() relies on these values (e.g., we compare
+     * last_seen_block with pss.block to see whether we searched all the
+     * ramblocks) to detect the completion of migration.  Having NULL value
+     * of last_seen_block can conditionally cause below loop to run forever.
+     */
+    if (!rs->last_seen_block) {
+        rs->last_seen_block = QLIST_FIRST_RCU(&ram_list.blocks);
+        rs->last_page = 0;
+    }
+
     pss.block = rs->last_seen_block;
     pss.page = rs->last_page;
     pss.complete_round = false;
 
-    if (!pss.block) {
-        pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
-    }
-
     do {
         again = true;
         found = get_queued_page(rs, &pss);
diff --git a/util/bitmap.c b/util/bitmap.c
index f81d8057a7..8d12e90a5a 100644
--- a/util/bitmap.c
+++ b/util/bitmap.c
@@ -240,6 +240,51 @@ void bitmap_clear(unsigned long *map, long start, long nr)
     }
 }
 
+bool bitmap_test_and_clear(unsigned long *map, long start, long nr)
+{
+    unsigned long *p = map + BIT_WORD(start);
+    const long size = start + nr;
+    int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
+    unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
+    bool dirty = false;
+
+    assert(start >= 0 && nr >= 0);
+
+    /* First word */
+    if (nr - bits_to_clear > 0) {
+        if ((*p) & mask_to_clear) {
+            dirty = true;
+        }
+        *p &= ~mask_to_clear;
+        nr -= bits_to_clear;
+        bits_to_clear = BITS_PER_LONG;
+        p++;
+    }
+
+    /* Full words */
+    if (bits_to_clear == BITS_PER_LONG) {
+        while (nr >= BITS_PER_LONG) {
+            if (*p) {
+                dirty = true;
+                *p = 0;
+            }
+            nr -= BITS_PER_LONG;
+            p++;
+        }
+    }
+
+    /* Last word */
+    if (nr) {
+        mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
+        if ((*p) & mask_to_clear) {
+            dirty = true;
+        }
+        *p &= ~mask_to_clear;
+    }
+
+    return dirty;
+}
+
 bool bitmap_test_and_clear_atomic(unsigned long *map, long start, long nr)
 {
     unsigned long *p = map + BIT_WORD(start);
author	Stefan Hajnoczi	2022-11-21 15:26:18 +0100
committer	Stefan Hajnoczi	2022-11-21 15:26:18 +0100
commit	af29446f32df4f369d4ee32d721fc3c989095731 (patch)
tree	48d402001ba9b7f0f7a8439359a2683bbb7536ff
parent	Merge tag 'chr-pull-request' of https://gitlab.com/marcandre.lureau/qemu into... (diff)
parent	migration: Block migration comment or code is wrong (diff)
download	qemu-af29446f32df4f369d4ee32d721fc3c989095731.tar.gz qemu-af29446f32df4f369d4ee32d721fc3c989095731.tar.xz qemu-af29446f32df4f369d4ee32d721fc3c989095731.zip