Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging

Block pull request # gpg: Signature made Fri 29 Aug 2014 17:25:58 BST using RSA key ID 81AB73C8 # gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>" # gpg: aka "Stefan Hajnoczi <stefanha@gmail.com>" * remotes/stefanha/tags/block-pull-request: (35 commits) quorum: Fix leak of opts in quorum_open blkverify: Fix leak of opts in blkverify_open nfs: Fix leak of opts in nfs_file_open curl: Don't deref NULL pointer in call to aio_poll. curl: Allow a cookie or cookies to be sent with http/https requests. virtio-blk: allow drive_del with dataplane block: acquire AioContext in do_drive_del() linux-aio: avoid deadlock in nested aio_poll() calls qemu-iotests: add multiwrite test cases block: fix overlapping multiwrite requests nbd: Follow the BDS' AIO context block: Add AIO context notifiers nbd: Drop nbd_can_read() sheepdog: fix a core dump while do auto-reconnecting aio-win32: add support for sockets qemu-coroutine-io: fix for Win32 AioContext: introduce aio_prepare aio-win32: add aio_set_dispatching optimization test-aio: test timers on Windows too AioContext: export and use aio_dispatch ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
author: Peter Maydell 2014-08-29 19:40:04 +0200
committer: Peter Maydell 2014-08-29 19:40:04 +0200
commit: 988f46361465db0d4fce50e71fa0ff8f9d20483e (patch)
tree: fa637b2f366d47581d9fc6dc01a0ef722e906d9a
parent: Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20140829'... (diff)
parent: quorum: Fix leak of opts in quorum_open (diff)
download: qemu-988f46361465db0d4fce50e71fa0ff8f9d20483e.tar.gz
qemu-988f46361465db0d4fce50e71fa0ff8f9d20483e.tar.xz
qemu-988f46361465db0d4fce50e71fa0ff8f9d20483e.zip
28 files changed, 979 insertions, 297 deletions
diff --git a/aio-posix.c b/aio-posix.c
index 2eada2e049..d3ac06e238 100644
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -100,6 +100,11 @@ void aio_set_event_notifier(AioContext *ctx,
                        (IOHandler *)io_read, NULL, notifier);
 }
 
+bool aio_prepare(AioContext *ctx)
+{
+    return false;
+}
+
 bool aio_pending(AioContext *ctx)
 {
     AioHandler *node;
@@ -119,12 +124,21 @@ bool aio_pending(AioContext *ctx)
     return false;
 }
 
-static bool aio_dispatch(AioContext *ctx)
+bool aio_dispatch(AioContext *ctx)
 {
     AioHandler *node;
     bool progress = false;
 
     /*
+     * If there are callbacks left that have been queued, we need to call them.
+     * Do not call select in this case, because it is possible that the caller
+     * does not need a complete flush (as is the case for aio_poll loops).
+     */
+    if (aio_bh_poll(ctx)) {
+        progress = true;
+    }
+
+    /*
      * We have to walk very carefully in case aio_set_fd_handler is
      * called while we're walking.
      */
@@ -184,22 +198,9 @@ bool aio_poll(AioContext *ctx, bool blocking)
 
     /* aio_notify can avoid the expensive event_notifier_set if
      * everything (file descriptors, bottom halves, timers) will
-     * be re-evaluated before the next blocking poll().  This happens
-     * in two cases:
-     *
-     * 1) when aio_poll is called with blocking == false
-     *
-     * 2) when we are called after poll().  If we are called before
-     *    poll(), bottom halves will not be re-evaluated and we need
-     *    aio_notify() if blocking == true.
-     *
-     * The first aio_dispatch() only does something when AioContext is
-     * running as a GSource, and in that case aio_poll is used only
-     * with blocking == false, so this optimization is already quite
-     * effective.  However, the code is ugly and should be restructured
-     * to have a single aio_dispatch() call.  To do this, we need to
-     * reorganize aio_poll into a prepare/poll/dispatch model like
-     * glib's.
+     * be re-evaluated before the next blocking poll().  This is
+     * already true when aio_poll is called with blocking == false;
+     * if blocking == true, it is only true after poll() returns.
      *
      * If we're in a nested event loop, ctx->dispatching might be true.
      * In that case we can restore it just before returning, but we
@@ -207,26 +208,6 @@ bool aio_poll(AioContext *ctx, bool blocking)
      */
     aio_set_dispatching(ctx, !blocking);
 
-    /*
-     * If there are callbacks left that have been queued, we need to call them.
-     * Do not call select in this case, because it is possible that the caller
-     * does not need a complete flush (as is the case for aio_poll loops).
-     */
-    if (aio_bh_poll(ctx)) {
-        blocking = false;
-        progress = true;
-    }
-
-    /* Re-evaluate condition (1) above.  */
-    aio_set_dispatching(ctx, !blocking);
-    if (aio_dispatch(ctx)) {
-        progress = true;
-    }
-
-    if (progress && !blocking) {
-        goto out;
-    }
-
     ctx->walking_handlers++;
 
     g_array_set_size(ctx->pollfds, 0);
@@ -249,7 +230,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
     /* wait until next event */
     ret = qemu_poll_ns((GPollFD *)ctx->pollfds->data,
                          ctx->pollfds->len,
-                         blocking ? timerlistgroup_deadline_ns(&ctx->tlg) : 0);
+                         blocking ? aio_compute_timeout(ctx) : 0);
 
     /* if we have any readable fds, dispatch event */
     if (ret > 0) {
@@ -268,7 +249,6 @@ bool aio_poll(AioContext *ctx, bool blocking)
         progress = true;
     }
 
-out:
     aio_set_dispatching(ctx, was_dispatching);
     return progress;
 }
diff --git a/aio-win32.c b/aio-win32.c
index c12f61e97d..61e3d2ddfe 100644
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -22,12 +22,80 @@
 
 struct AioHandler {
     EventNotifier *e;
+    IOHandler *io_read;
+    IOHandler *io_write;
     EventNotifierHandler *io_notify;
     GPollFD pfd;
     int deleted;
+    void *opaque;
     QLIST_ENTRY(AioHandler) node;
 };
 
+void aio_set_fd_handler(AioContext *ctx,
+                        int fd,
+                        IOHandler *io_read,
+                        IOHandler *io_write,
+                        void *opaque)
+{
+    /* fd is a SOCKET in our case */
+    AioHandler *node;
+
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+        if (node->pfd.fd == fd && !node->deleted) {
+            break;
+        }
+    }
+
+    /* Are we deleting the fd handler? */
+    if (!io_read && !io_write) {
+        if (node) {
+            /* If the lock is held, just mark the node as deleted */
+            if (ctx->walking_handlers) {
+                node->deleted = 1;
+                node->pfd.revents = 0;
+            } else {
+                /* Otherwise, delete it for real.  We can't just mark it as
+                 * deleted because deleted nodes are only cleaned up after
+                 * releasing the walking_handlers lock.
+                 */
+                QLIST_REMOVE(node, node);
+                g_free(node);
+            }
+        }
+    } else {
+        HANDLE event;
+
+        if (node == NULL) {
+            /* Alloc and insert if it's not already there */
+            node = g_malloc0(sizeof(AioHandler));
+            node->pfd.fd = fd;
+            QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
+        }
+
+        node->pfd.events = 0;
+        if (node->io_read) {
+            node->pfd.events |= G_IO_IN;
+        }
+        if (node->io_write) {
+            node->pfd.events |= G_IO_OUT;
+        }
+
+        node->e = &ctx->notifier;
+
+        /* Update handler with latest information */
+        node->opaque = opaque;
+        node->io_read = io_read;
+        node->io_write = io_write;
+
+        event = event_notifier_get_handle(&ctx->notifier);
+        WSAEventSelect(node->pfd.fd, event,
+                       FD_READ | FD_ACCEPT | FD_CLOSE |
+                       FD_CONNECT | FD_WRITE | FD_OOB);
+    }
+
+    aio_notify(ctx);
+}
+
 void aio_set_event_notifier(AioContext *ctx,
                             EventNotifier *e,
                             EventNotifierHandler *io_notify)
@@ -76,55 +144,82 @@ void aio_set_event_notifier(AioContext *ctx,
     aio_notify(ctx);
 }
 
-bool aio_pending(AioContext *ctx)
+bool aio_prepare(AioContext *ctx)
 {
+    static struct timeval tv0;
     AioHandler *node;
+    bool have_select_revents = false;
+    fd_set rfds, wfds;
 
+    /* fill fd sets */
+    FD_ZERO(&rfds);
+    FD_ZERO(&wfds);
     QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-        if (node->pfd.revents && node->io_notify) {
-            return true;
+        if (node->io_read) {
+            FD_SET ((SOCKET)node->pfd.fd, &rfds);
+        }
+        if (node->io_write) {
+            FD_SET ((SOCKET)node->pfd.fd, &wfds);
         }
     }
 
-    return false;
+    if (select(0, &rfds, &wfds, NULL, &tv0) > 0) {
+        QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+            node->pfd.revents = 0;
+            if (FD_ISSET(node->pfd.fd, &rfds)) {
+                node->pfd.revents |= G_IO_IN;
+                have_select_revents = true;
+            }
+
+            if (FD_ISSET(node->pfd.fd, &wfds)) {
+                node->pfd.revents |= G_IO_OUT;
+                have_select_revents = true;
+            }
+        }
+    }
+
+    return have_select_revents;
 }
 
-bool aio_poll(AioContext *ctx, bool blocking)
+bool aio_pending(AioContext *ctx)
 {
     AioHandler *node;
-    HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
-    bool progress;
-    int count;
-    int timeout;
 
-    progress = false;
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+        if (node->pfd.revents && node->io_notify) {
+            return true;
+        }
 
-    /*
-     * If there are callbacks left that have been queued, we need to call then.
-     * Do not call select in this case, because it is possible that the caller
-     * does not need a complete flush (as is the case for aio_poll loops).
-     */
-    if (aio_bh_poll(ctx)) {
-        blocking = false;
-        progress = true;
+        if ((node->pfd.revents & G_IO_IN) && node->io_read) {
+            return true;
+        }
+        if ((node->pfd.revents & G_IO_OUT) && node->io_write) {
+            return true;
+        }
     }
 
-    /* Run timers */
-    progress |= timerlistgroup_run_timers(&ctx->tlg);
+    return false;
+}
+
+static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
+{
+    AioHandler *node;
+    bool progress = false;
 
     /*
-     * Then dispatch any pending callbacks from the GSource.
-     *
      * We have to walk very carefully in case aio_set_fd_handler is
      * called while we're walking.
      */
     node = QLIST_FIRST(&ctx->aio_handlers);
     while (node) {
         AioHandler *tmp;
+        int revents = node->pfd.revents;
 
         ctx->walking_handlers++;
 
-        if (node->pfd.revents && node->io_notify) {
+        if (!node->deleted &&
+            (revents || event_notifier_get_handle(node->e) == event) &&
+            node->io_notify) {
             node->pfd.revents = 0;
             node->io_notify(node->e);
 
@@ -134,6 +229,28 @@ bool aio_poll(AioContext *ctx, bool blocking)
             }
         }
 
+        if (!node->deleted &&
+            (node->io_read || node->io_write)) {
+            node->pfd.revents = 0;
+            if ((revents & G_IO_IN) && node->io_read) {
+                node->io_read(node->opaque);
+                progress = true;
+            }
+            if ((revents & G_IO_OUT) && node->io_write) {
+                node->io_write(node->opaque);
+                progress = true;
+            }
+
+            /* if the next select() will return an event, we have progressed */
+            if (event == event_notifier_get_handle(&ctx->notifier)) {
+                WSANETWORKEVENTS ev;
+                WSAEnumNetworkEvents(node->pfd.fd, event, &ev);
+                if (ev.lNetworkEvents) {
+                    progress = true;
+                }
+            }
+        }
+
         tmp = node;
         node = QLIST_NEXT(node, node);
 
@@ -145,10 +262,47 @@ bool aio_poll(AioContext *ctx, bool blocking)
         }
     }
 
-    if (progress && !blocking) {
-        return true;
+    return progress;
+}
+
+bool aio_dispatch(AioContext *ctx)
+{
+    bool progress;
+
+    progress = aio_bh_poll(ctx);
+    progress |= aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE);
+    progress |= timerlistgroup_run_timers(&ctx->tlg);
+    return progress;
+}
+
+bool aio_poll(AioContext *ctx, bool blocking)
+{
+    AioHandler *node;
+    HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
+    bool was_dispatching, progress, have_select_revents, first;
+    int count;
+    int timeout;
+
+    if (aio_prepare(ctx)) {
+        blocking = false;
+        have_select_revents = true;
     }
 
+    was_dispatching = ctx->dispatching;
+    progress = false;
+
+    /* aio_notify can avoid the expensive event_notifier_set if
+     * everything (file descriptors, bottom halves, timers) will
+     * be re-evaluated before the next blocking poll().  This is
+     * already true when aio_poll is called with blocking == false;
+     * if blocking == true, it is only true after poll() returns.
+     *
+     * If we're in a nested event loop, ctx->dispatching might be true.
+     * In that case we can restore it just before returning, but we
+     * have to clear it now.
+     */
+    aio_set_dispatching(ctx, !blocking);
+
     ctx->walking_handlers++;
 
     /* fill fd sets */
@@ -160,64 +314,42 @@ bool aio_poll(AioContext *ctx, bool blocking)
     }
 
     ctx->walking_handlers--;
+    first = true;
 
     /* wait until next event */
     while (count > 0) {
+        HANDLE event;
         int ret;
 
-        timeout = blocking ?
-            qemu_timeout_ns_to_ms(timerlistgroup_deadline_ns(&ctx->tlg)) : 0;
+        timeout = blocking
+            ? qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)) : 0;
         ret = WaitForMultipleObjects(count, events, FALSE, timeout);
+        aio_set_dispatching(ctx, true);
+
+        if (first && aio_bh_poll(ctx)) {
+            progress = true;
+        }
+        first = false;
 
         /* if we have any signaled events, dispatch event */
-        if ((DWORD) (ret - WAIT_OBJECT_0) >= count) {
+        event = NULL;
+        if ((DWORD) (ret - WAIT_OBJECT_0) < count) {
+            event = events[ret - WAIT_OBJECT_0];
+        } else if (!have_select_revents) {
             break;
         }
 
+        have_select_revents = false;
         blocking = false;
 
-        /* we have to walk very carefully in case
-         * aio_set_fd_handler is called while we're walking */
-        node = QLIST_FIRST(&ctx->aio_handlers);
-        while (node) {
-            AioHandler *tmp;
-
-            ctx->walking_handlers++;
-
-            if (!node->deleted &&
-                event_notifier_get_handle(node->e) == events[ret - WAIT_OBJECT_0] &&
-                node->io_notify) {
-                node->io_notify(node->e);
-
-                /* aio_notify() does not count as progress */
-                if (node->e != &ctx->notifier) {
-                    progress = true;
-                }
-            }
-
-            tmp = node;
-            node = QLIST_NEXT(node, node);
-
-            ctx->walking_handlers--;
-
-            if (!ctx->walking_handlers && tmp->deleted) {
-                QLIST_REMOVE(tmp, node);
-                g_free(tmp);
-            }
-        }
+        progress |= aio_dispatch_handlers(ctx, event);
 
         /* Try again, but only call each handler once.  */
         events[ret - WAIT_OBJECT_0] = events[--count];
     }
 
-    if (blocking) {
-        /* Run the timers a second time. We do this because otherwise aio_wait
-         * will not note progress - and will stop a drain early - if we have
-         * a timer that was not ready to run entering g_poll but is ready
-         * after g_poll. This will only do anything if a timer has expired.
-         */
-        progress |= timerlistgroup_run_timers(&ctx->tlg);
-    }
+    progress |= timerlistgroup_run_timers(&ctx->tlg);
 
+    aio_set_dispatching(ctx, was_dispatching);
     return progress;
 }
diff --git a/async.c b/async.c
index 34af0b25ca..a99e7f639a 100644
--- a/async.c
+++ b/async.c
@@ -152,39 +152,48 @@ void qemu_bh_delete(QEMUBH *bh)
     bh->deleted = 1;
 }
 
-static gboolean
-aio_ctx_prepare(GSource *source, gint    *timeout)
+int64_t
+aio_compute_timeout(AioContext *ctx)
 {
-    AioContext *ctx = (AioContext *) source;
+    int64_t deadline;
+    int timeout = -1;
     QEMUBH *bh;
-    int deadline;
 
-    /* We assume there is no timeout already supplied */
-    *timeout = -1;
     for (bh = ctx->first_bh; bh; bh = bh->next) {
         if (!bh->deleted && bh->scheduled) {
             if (bh->idle) {
                 /* idle bottom halves will be polled at least
                  * every 10ms */
-                *timeout = 10;
+                timeout = 10000000;
             } else {
                 /* non-idle bottom halves will be executed
                  * immediately */
-                *timeout = 0;
-                return true;
+                return 0;
             }
         }
     }
 
-    deadline = qemu_timeout_ns_to_ms(timerlistgroup_deadline_ns(&ctx->tlg));
+    deadline = timerlistgroup_deadline_ns(&ctx->tlg);
     if (deadline == 0) {
-        *timeout = 0;
-        return true;
+        return 0;
     } else {
-        *timeout = qemu_soonest_timeout(*timeout, deadline);
+        return qemu_soonest_timeout(timeout, deadline);
+    }
+}
+
+static gboolean
+aio_ctx_prepare(GSource *source, gint    *timeout)
+{
+    AioContext *ctx = (AioContext *) source;
+
+    /* We assume there is no timeout already supplied */
+    *timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx));
+
+    if (aio_prepare(ctx)) {
+        *timeout = 0;
     }
 
-    return false;
+    return *timeout == 0;
 }
 
 static gboolean
@@ -209,7 +218,7 @@ aio_ctx_dispatch(GSource     *source,
     AioContext *ctx = (AioContext *) source;
 
     assert(callback == NULL);
-    aio_poll(ctx, false);
+    aio_dispatch(ctx);
     return true;
 }
 
diff --git a/block.c b/block.c
index 1df13ac1c7..cb670fd54d 100644
--- a/block.c
+++ b/block.c
@@ -1819,6 +1819,8 @@ void bdrv_reopen_abort(BDRVReopenState *reopen_state)
 
 void bdrv_close(BlockDriverState *bs)
 {
+    BdrvAioNotifier *ban, *ban_next;
+
     if (bs->job) {
         block_job_cancel_sync(bs->job);
     }
@@ -1863,6 +1865,11 @@ void bdrv_close(BlockDriverState *bs)
     if (bs->io_limits_enabled) {
         bdrv_io_limits_disable(bs);
     }
+
+    QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
+        g_free(ban);
+    }
+    QLIST_INIT(&bs->aio_notifiers);
 }
 
 void bdrv_close_all(void)
@@ -4546,6 +4553,12 @@ static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
             // Add the second request
             qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
 
+            // Add tail of first request, if necessary
+            if (qiov->size < reqs[outidx].qiov->size) {
+                qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size,
+                                  reqs[outidx].qiov->size - qiov->size);
+            }
+
             reqs[outidx].nb_sectors = qiov->size >> 9;
             reqs[outidx].qiov = qiov;
 
@@ -5729,10 +5742,16 @@ AioContext *bdrv_get_aio_context(BlockDriverState *bs)
 
 void bdrv_detach_aio_context(BlockDriverState *bs)
 {
+    BdrvAioNotifier *baf;
+
     if (!bs->drv) {
         return;
     }
 
+    QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
+        baf->detach_aio_context(baf->opaque);
+    }
+
     if (bs->io_limits_enabled) {
         throttle_detach_aio_context(&bs->throttle_state);
     }
@@ -5752,6 +5771,8 @@ void bdrv_detach_aio_context(BlockDriverState *bs)
 void bdrv_attach_aio_context(BlockDriverState *bs,
                              AioContext *new_context)
 {
+    BdrvAioNotifier *ban;
+
     if (!bs->drv) {
         return;
     }
@@ -5770,6 +5791,10 @@ void bdrv_attach_aio_context(BlockDriverState *bs,
     if (bs->io_limits_enabled) {
         throttle_attach_aio_context(&bs->throttle_state, new_context);
     }
+
+    QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
+        ban->attached_aio_context(new_context, ban->opaque);
+    }
 }
 
 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
@@ -5786,6 +5811,43 @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
     aio_context_release(new_context);
 }
 
+void bdrv_add_aio_context_notifier(BlockDriverState *bs,
+        void (*attached_aio_context)(AioContext *new_context, void *opaque),
+        void (*detach_aio_context)(void *opaque), void *opaque)
+{
+    BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
+    *ban = (BdrvAioNotifier){
+        .attached_aio_context = attached_aio_context,
+        .detach_aio_context   = detach_aio_context,
+        .opaque               = opaque
+    };
+
+    QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
+}
+
+void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
+                                      void (*attached_aio_context)(AioContext *,
+                                                                   void *),
+                                      void (*detach_aio_context)(void *),
+                                      void *opaque)
+{
+    BdrvAioNotifier *ban, *ban_next;
+
+    QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
+        if (ban->attached_aio_context == attached_aio_context &&
+            ban->detach_aio_context   == detach_aio_context   &&
+            ban->opaque               == opaque)
+        {
+            QLIST_REMOVE(ban, list);
+            g_free(ban);
+
+            return;
+        }
+    }
+
+    abort();
+}
+
 void bdrv_add_before_write_notifier(BlockDriverState *bs,
                                     NotifierWithReturn *notifier)
 {
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 858d2b387b..f45f9399aa 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -10,7 +10,6 @@ block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
 block-obj-$(CONFIG_POSIX) += raw-posix.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
 
-ifeq ($(CONFIG_POSIX),y)
 block-obj-y += nbd.o nbd-client.o sheepdog.o
 block-obj-$(CONFIG_LIBISCSI) += iscsi.o
 block-obj-$(CONFIG_LIBNFS) += nfs.o
@@ -19,7 +18,6 @@ block-obj-$(CONFIG_RBD) += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
 block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
 block-obj-$(CONFIG_LIBSSH2) += ssh.o
-endif
 
 common-obj-y += stream.o
 common-obj-y += commit.o
diff --git a/block/blkverify.c b/block/blkverify.c
index 7c78ca41a5..163064cf6b 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -158,6 +158,7 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags,
 
     ret = 0;
 fail:
+    qemu_opts_del(opts);
     return ret;
 }
 
diff --git a/block/curl.c b/block/curl.c
index d4b85d20a5..025833994c 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -63,6 +63,7 @@ static CURLMcode __curl_multi_socket_action(CURLM *multi_handle,
 #define CURL_NUM_ACB    8
 #define SECTOR_SIZE     512
 #define READ_AHEAD_DEFAULT (256 * 1024)
+#define CURL_TIMEOUT_DEFAULT 5
 
 #define FIND_RET_NONE   0
 #define FIND_RET_OK     1
@@ -71,6 +72,8 @@ static CURLMcode __curl_multi_socket_action(CURLM *multi_handle,
 #define CURL_BLOCK_OPT_URL       "url"
 #define CURL_BLOCK_OPT_READAHEAD "readahead"
 #define CURL_BLOCK_OPT_SSLVERIFY "sslverify"
+#define CURL_BLOCK_OPT_TIMEOUT "timeout"
+#define CURL_BLOCK_OPT_COOKIE    "cookie"
 
 struct BDRVCURLState;
 
@@ -109,6 +112,8 @@ typedef struct BDRVCURLState {
     char *url;
     size_t readahead_size;
     bool sslverify;
+    int timeout;
+    char *cookie;
     bool accept_range;
     AioContext *aio_context;
 } BDRVCURLState;
@@ -352,7 +357,7 @@ static void curl_multi_timeout_do(void *arg)
 #endif
 }
 
-static CURLState *curl_init_state(BDRVCURLState *s)
+static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s)
 {
     CURLState *state = NULL;
     int i, j;
@@ -370,7 +375,7 @@ static CURLState *curl_init_state(BDRVCURLState *s)
             break;
         }
         if (!state) {
-            aio_poll(state->s->aio_context, true);
+            aio_poll(bdrv_get_aio_context(bs), true);
         }
     } while(!state);
 
@@ -382,7 +387,10 @@ static CURLState *curl_init_state(BDRVCURLState *s)
         curl_easy_setopt(state->curl, CURLOPT_URL, s->url);
         curl_easy_setopt(state->curl, CURLOPT_SSL_VERIFYPEER,
                          (long) s->sslverify);
-        curl_easy_setopt(state->curl, CURLOPT_TIMEOUT, 5);
+        if (s->cookie) {
+            curl_easy_setopt(state->curl, CURLOPT_COOKIE, s->cookie);
+        }
+        curl_easy_setopt(state->curl, CURLOPT_TIMEOUT, s->timeout);
         curl_easy_setopt(state->curl, CURLOPT_WRITEFUNCTION,
                          (void *)curl_read_cb);
         curl_easy_setopt(state->curl, CURLOPT_WRITEDATA, (void *)state);
@@ -489,6 +497,16 @@ static QemuOptsList runtime_opts = {
             .type = QEMU_OPT_BOOL,
             .help = "Verify SSL certificate"
         },
+        {
+            .name = CURL_BLOCK_OPT_TIMEOUT,
+            .type = QEMU_OPT_NUMBER,
+            .help = "Curl timeout"
+        },
+        {
+            .name = CURL_BLOCK_OPT_COOKIE,
+            .type = QEMU_OPT_STRING,
+            .help = "Pass the cookie or list of cookies with each request"
+        },
         { /* end of list */ }
     },
 };
@@ -501,6 +519,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
     QemuOpts *opts;
     Error *local_err = NULL;
     const char *file;
+    const char *cookie;
     double d;
 
     static int inited = 0;
@@ -525,8 +544,14 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
         goto out_noclean;
     }
 
+    s->timeout = qemu_opt_get_number(opts, CURL_BLOCK_OPT_TIMEOUT,
+                                     CURL_TIMEOUT_DEFAULT);
+
     s->sslverify = qemu_opt_get_bool(opts, CURL_BLOCK_OPT_SSLVERIFY, true);
 
+    cookie = qemu_opt_get(opts, CURL_BLOCK_OPT_COOKIE);
+    s->cookie = g_strdup(cookie);
+
     file = qemu_opt_get(opts, CURL_BLOCK_OPT_URL);
     if (file == NULL) {
         error_setg(errp, "curl block driver requires an 'url' option");
@@ -541,7 +566,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
     DPRINTF("CURL: Opening %s\n", file);
     s->aio_context = bdrv_get_aio_context(bs);
     s->url = g_strdup(file);
-    state = curl_init_state(s);
+    state = curl_init_state(bs, s);
     if (!state)
         goto out_noclean;
 
@@ -582,6 +607,7 @@ out:
     curl_easy_cleanup(state->curl);
     state->curl = NULL;
 out_noclean:
+    g_free(s->cookie);
     g_free(s->url);
     qemu_opts_del(opts);
     return -EINVAL;
@@ -625,7 +651,7 @@ static void curl_readv_bh_cb(void *p)
     }
 
     // No cache found, so let's start a new request
-    state = curl_init_state(s);
+    state = curl_init_state(acb->common.bs, s);
     if (!state) {
         acb->common.cb(acb->common.opaque, -EIO);
         qemu_aio_release(acb);
@@ -684,6 +710,7 @@ static void curl_close(BlockDriverState *bs)
     DPRINTF("CURL: Close\n");
     curl_detach_aio_context(bs);
 
+    g_free(s->cookie);
     g_free(s->url);
 }
 
diff --git a/block/linux-aio.c b/block/linux-aio.c
index 7ac7e8c99c..9aca758b10 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -51,6 +51,12 @@ struct qemu_laio_state {
 
     /* io queue for submit at batch */
     LaioQueue io_q;
+
+    /* I/O completion processing */
+    QEMUBH *completion_bh;
+    struct io_event events[MAX_EVENTS];
+    int event_idx;
+    int event_max;
 };
 
 static inline ssize_t io_event_ret(struct io_event *ev)
@@ -86,27 +92,58 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s,
     qemu_aio_release(laiocb);
 }
 
-static void qemu_laio_completion_cb(EventNotifier *e)
+/* The completion BH fetches completed I/O requests and invokes their
+ * callbacks.
+ *
+ * The function is somewhat tricky because it supports nested event loops, for
+ * example when a request callback invokes aio_poll().  In order to do this,
+ * the completion events array and index are kept in qemu_laio_state.  The BH
+ * reschedules itself as long as there are completions pending so it will
+ * either be called again in a nested event loop or will be called after all
+ * events have been completed.  When there are no events left to complete, the
+ * BH returns without rescheduling.
+ */
+static void qemu_laio_completion_bh(void *opaque)
 {
-    struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e);
-
-    while (event_notifier_test_and_clear(&s->e)) {
-        struct io_event events[MAX_EVENTS];
-        struct timespec ts = { 0 };
-        int nevents, i;
+    struct qemu_laio_state *s = opaque;
 
+    /* Fetch more completion events when empty */
+    if (s->event_idx == s->event_max) {
         do {
-            nevents = io_getevents(s->ctx, MAX_EVENTS, MAX_EVENTS, events, &ts);
-        } while (nevents == -EINTR);
+            struct timespec ts = { 0 };
+            s->event_max = io_getevents(s->ctx, MAX_EVENTS, MAX_EVENTS,
+                                        s->events, &ts);
+        } while (s->event_max == -EINTR);
+
+        s->event_idx = 0;
+        if (s->event_max <= 0) {
+            s->event_max = 0;
+            return; /* no more events */
+        }
+    }
 
-        for (i = 0; i < nevents; i++) {
-            struct iocb *iocb = events[i].obj;
-            struct qemu_laiocb *laiocb =
-                    container_of(iocb, struct qemu_laiocb, iocb);
+    /* Reschedule so nested event loops see currently pending completions */
+    qemu_bh_schedule(s->completion_bh);
 
-            laiocb->ret = io_event_ret(&events[i]);
-            qemu_laio_process_completion(s, laiocb);
-        }
+    /* Process completion events */
+    while (s->event_idx < s->event_max) {
+        struct iocb *iocb = s->events[s->event_idx].obj;
+        struct qemu_laiocb *laiocb =
+                container_of(iocb, struct qemu_laiocb, iocb);
+
+        laiocb->ret = io_event_ret(&s->events[s->event_idx]);
+        s->event_idx++;
+
+        qemu_laio_process_completion(s, laiocb);
+    }
+}
+
+static void qemu_laio_completion_cb(EventNotifier *e)
+{
+    struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e);
+
+    if (event_notifier_test_and_clear(&s->e)) {
+        qemu_bh_schedule(s->completion_bh);
     }
 }
 
@@ -272,12 +309,14 @@ void laio_detach_aio_context(void *s_, AioContext *old_context)
     struct qemu_laio_state *s = s_;
 
     aio_set_event_notifier(old_context, &s->e, NULL);
+    qemu_bh_delete(s->completion_bh);
 }
 
 void laio_attach_aio_context(void *s_, AioContext *new_context)
 {
     struct qemu_laio_state *s = s_;
 
+    s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s);
     aio_set_event_notifier(new_context, &s->e, qemu_laio_completion_cb);
 }
 
diff --git a/block/nfs.c b/block/nfs.c
index 93d87f3256..194f301501 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -393,16 +393,20 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
     qemu_opts_absorb_qdict(opts, options, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
-        return -EINVAL;
+        ret = -EINVAL;
+        goto out;
     }
     ret = nfs_client_open(client, qemu_opt_get(opts, "filename"),
                           (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
                           errp);
     if (ret < 0) {
-        return ret;
+        goto out;
     }
     bs->total_sectors = ret;
-    return 0;
+    ret = 0;
+out:
+    qemu_opts_del(opts);
+    return ret;
 }
 
 static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp)
diff --git a/block/quorum.c b/block/quorum.c
index 0de07bb036..093382e8f5 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -29,6 +29,7 @@
 #define QUORUM_OPT_VOTE_THRESHOLD "vote-threshold"
 #define QUORUM_OPT_BLKVERIFY      "blkverify"
 #define QUORUM_OPT_REWRITE        "rewrite-corrupted"
+#define QUORUM_OPT_READ_PATTERN   "read-pattern"
 
 /* This union holds a vote hash value */
 typedef union QuorumVoteValue {
@@ -79,6 +80,8 @@ typedef struct BDRVQuorumState {
     bool rewrite_corrupted;/* true if the driver must rewrite-on-read corrupted
                             * block if Quorum is reached.
                             */
+
+    QuorumReadPattern read_pattern;
 } BDRVQuorumState;
 
 typedef struct QuorumAIOCB QuorumAIOCB;
@@ -122,6 +125,7 @@ struct QuorumAIOCB {
 
     bool is_read;
     int vote_ret;
+    int child_iter;             /* which child to read in fifo pattern */
 };
 
 static bool quorum_vote(QuorumAIOCB *acb);
@@ -148,7 +152,6 @@ static AIOCBInfo quorum_aiocb_info = {
 
 static void quorum_aio_finalize(QuorumAIOCB *acb)
 {
-    BDRVQuorumState *s = acb->common.bs->opaque;
     int i, ret = 0;
 
     if (acb->vote_ret) {
@@ -158,7 +161,8 @@ static void quorum_aio_finalize(QuorumAIOCB *acb)
     acb->common.cb(acb->common.opaque, ret);
 
     if (acb->is_read) {
-        for (i = 0; i < s->num_children; i++) {
+        /* on the quorum case acb->child_iter == s->num_children - 1 */
+        for (i = 0; i <= acb->child_iter; i++) {
             qemu_vfree(acb->qcrs[i].buf);
             qemu_iovec_destroy(&acb->qcrs[i].qiov);
         }
@@ -261,6 +265,21 @@ static void quorum_rewrite_aio_cb(void *opaque, int ret)
     quorum_aio_finalize(acb);
 }
 
+static BlockDriverAIOCB *read_fifo_child(QuorumAIOCB *acb);
+
+static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
+{
+    int i;
+    assert(dest->niov == source->niov);
+    assert(dest->size == source->size);
+    for (i = 0; i < source->niov; i++) {
+        assert(dest->iov[i].iov_len == source->iov[i].iov_len);
+        memcpy(dest->iov[i].iov_base,
+               source->iov[i].iov_base,
+               source->iov[i].iov_len);
+    }
+}
+
 static void quorum_aio_cb(void *opaque, int ret)
 {
     QuorumChildRequest *sacb = opaque;
@@ -268,6 +287,21 @@ static void quorum_aio_cb(void *opaque, int ret)
     BDRVQuorumState *s = acb->common.bs->opaque;
     bool rewrite = false;
 
+    if (acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO) {
+        /* We try to read next child in FIFO order if we fail to read */
+        if (ret < 0 && ++acb->child_iter < s->num_children) {
+            read_fifo_child(acb);
+            return;
+        }
+
+        if (ret == 0) {
+            quorum_copy_qiov(acb->qiov, &acb->qcrs[acb->child_iter].qiov);
+        }
+        acb->vote_ret = ret;
+        quorum_aio_finalize(acb);
+        return;
+    }
+
     sacb->ret = ret;
     acb->count++;
     if (ret == 0) {
@@ -348,19 +382,6 @@ static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
     return count;
 }
 
-static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
-{
-    int i;
-    assert(dest->niov == source->niov);
-    assert(dest->size == source->size);
-    for (i = 0; i < source->niov; i++) {
-        assert(dest->iov[i].iov_len == source->iov[i].iov_len);
-        memcpy(dest->iov[i].iov_base,
-               source->iov[i].iov_base,
-               source->iov[i].iov_len);
-    }
-}
-
 static void quorum_count_vote(QuorumVotes *votes,
                               QuorumVoteValue *value,
                               int index)
@@ -620,34 +641,62 @@ free_exit:
     return rewrite;
 }
 
-static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
-                                         int64_t sector_num,
-                                         QEMUIOVector *qiov,
-                                         int nb_sectors,
-                                         BlockDriverCompletionFunc *cb,
-                                         void *opaque)
+static BlockDriverAIOCB *read_quorum_children(QuorumAIOCB *acb)
 {
-    BDRVQuorumState *s = bs->opaque;
-    QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num,
-                                      nb_sectors, cb, opaque);
+    BDRVQuorumState *s = acb->common.bs->opaque;
     int i;
 
-    acb->is_read = true;
-
     for (i = 0; i < s->num_children; i++) {
-        acb->qcrs[i].buf = qemu_blockalign(s->bs[i], qiov->size);
-        qemu_iovec_init(&acb->qcrs[i].qiov, qiov->niov);
-        qemu_iovec_clone(&acb->qcrs[i].qiov, qiov, acb->qcrs[i].buf);
+        acb->qcrs[i].buf = qemu_blockalign(s->bs[i], acb->qiov->size);
+        qemu_iovec_init(&acb->qcrs[i].qiov, acb->qiov->niov);
+        qemu_iovec_clone(&acb->qcrs[i].qiov, acb->qiov, acb->qcrs[i].buf);
     }
 
     for (i = 0; i < s->num_children; i++) {
-        bdrv_aio_readv(s->bs[i], sector_num, &acb->qcrs[i].qiov, nb_sectors,
-                       quorum_aio_cb, &acb->qcrs[i]);
+        bdrv_aio_readv(s->bs[i], acb->sector_num, &acb->qcrs[i].qiov,
+                       acb->nb_sectors, quorum_aio_cb, &acb->qcrs[i]);
     }
 
     return &acb->common;
 }
 
+static BlockDriverAIOCB *read_fifo_child(QuorumAIOCB *acb)
+{
+    BDRVQuorumState *s = acb->common.bs->opaque;
+
+    acb->qcrs[acb->child_iter].buf = qemu_blockalign(s->bs[acb->child_iter],
+                                                     acb->qiov->size);
+    qemu_iovec_init(&acb->qcrs[acb->child_iter].qiov, acb->qiov->niov);
+    qemu_iovec_clone(&acb->qcrs[acb->child_iter].qiov, acb->qiov,
+                     acb->qcrs[acb->child_iter].buf);
+    bdrv_aio_readv(s->bs[acb->child_iter], acb->sector_num,
+                   &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors,
+                   quorum_aio_cb, &acb->qcrs[acb->child_iter]);
+
+    return &acb->common;
+}
+
+static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
+                                          int64_t sector_num,
+                                          QEMUIOVector *qiov,
+                                          int nb_sectors,
+                                          BlockDriverCompletionFunc *cb,
+                                          void *opaque)
+{
+    BDRVQuorumState *s = bs->opaque;
+    QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num,
+                                      nb_sectors, cb, opaque);
+    acb->is_read = true;
+
+    if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) {
+        acb->child_iter = s->num_children - 1;
+        return read_quorum_children(acb);
+    }
+
+    acb->child_iter = 0;
+    return read_fifo_child(acb);
+}
+
 static BlockDriverAIOCB *quorum_aio_writev(BlockDriverState *bs,
                                           int64_t sector_num,
                                           QEMUIOVector *qiov,
@@ -787,16 +836,39 @@ static QemuOptsList quorum_runtime_opts = {
             .type = QEMU_OPT_BOOL,
             .help = "Rewrite corrupted block on read quorum",
         },
+        {
+            .name = QUORUM_OPT_READ_PATTERN,
+            .type = QEMU_OPT_STRING,
+            .help = "Allowed pattern: quorum, fifo. Quorum is default",
+        },
         { /* end of list */ }
     },
 };
 
+static int parse_read_pattern(const char *opt)
+{
+    int i;
+
+    if (!opt) {
+        /* Set quorum as default */
+        return QUORUM_READ_PATTERN_QUORUM;
+    }
+
+    for (i = 0; i < QUORUM_READ_PATTERN_MAX; i++) {
+        if (!strcmp(opt, QuorumReadPattern_lookup[i])) {
+            return i;
+        }
+    }
+
+    return -EINVAL;
+}
+
 static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
                        Error **errp)
 {
     BDRVQuorumState *s = bs->opaque;
     Error *local_err = NULL;
-    QemuOpts *opts;
+    QemuOpts *opts = NULL;
     bool *opened;
     QDict *sub = NULL;
     QList *list = NULL;
@@ -832,28 +904,37 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     s->threshold = qemu_opt_get_number(opts, QUORUM_OPT_VOTE_THRESHOLD, 0);
-
-    /* and validate it against s->num_children */
-    ret = quorum_valid_threshold(s->threshold, s->num_children, &local_err);
+    ret = parse_read_pattern(qemu_opt_get(opts, QUORUM_OPT_READ_PATTERN));
     if (ret < 0) {
+        error_setg(&local_err, "Please set read-pattern as fifo or quorum");
         goto exit;
     }
+    s->read_pattern = ret;
 
-    /* is the driver in blkverify mode */
-    if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false) &&
-        s->num_children == 2 && s->threshold == 2) {
-        s->is_blkverify = true;
-    } else if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false)) {
-        fprintf(stderr, "blkverify mode is set by setting blkverify=on "
-                "and using two files with vote_threshold=2\n");
-    }
+    if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) {
+        /* and validate it against s->num_children */
+        ret = quorum_valid_threshold(s->threshold, s->num_children, &local_err);
+        if (ret < 0) {
+            goto exit;
+        }
 
-    s->rewrite_corrupted = qemu_opt_get_bool(opts, QUORUM_OPT_REWRITE, false);
-    if (s->rewrite_corrupted && s->is_blkverify) {
-        error_setg(&local_err,
-                   "rewrite-corrupted=on cannot be used with blkverify=on");
-        ret = -EINVAL;
-        goto exit;
+        /* is the driver in blkverify mode */
+        if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false) &&
+            s->num_children == 2 && s->threshold == 2) {
+            s->is_blkverify = true;
+        } else if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false)) {
+            fprintf(stderr, "blkverify mode is set by setting blkverify=on "
+                    "and using two files with vote_threshold=2\n");
+        }
+
+        s->rewrite_corrupted = qemu_opt_get_bool(opts, QUORUM_OPT_REWRITE,
+                                                 false);
+        if (s->rewrite_corrupted && s->is_blkverify) {
+            error_setg(&local_err,
+                       "rewrite-corrupted=on cannot be used with blkverify=on");
+            ret = -EINVAL;
+            goto exit;
+        }
     }
 
     /* allocate the children BlockDriverState array */
@@ -908,6 +989,7 @@ close_exit:
     g_free(s->bs);
     g_free(opened);
 exit:
+    qemu_opts_del(opts);
     /* propagate error */
     if (local_err) {
         error_propagate(errp, local_err);
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 12cbd9dcb4..f91afc3a5b 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -103,6 +103,9 @@
 #define SD_INODE_SIZE (sizeof(SheepdogInode))
 #define CURRENT_VDI_ID 0
 
+#define LOCK_TYPE_NORMAL 0
+#define LOCK_TYPE_SHARED 1      /* for iSCSI multipath */
+
 typedef struct SheepdogReq {
     uint8_t proto_ver;
     uint8_t opcode;
@@ -166,7 +169,8 @@ typedef struct SheepdogVdiReq {
     uint8_t copy_policy;
     uint8_t reserved[2];
     uint32_t snapid;
-    uint32_t pad[3];
+    uint32_t type;
+    uint32_t pad[2];
 } SheepdogVdiReq;
 
 typedef struct SheepdogVdiRsp {
@@ -712,7 +716,6 @@ static void coroutine_fn send_pending_req(BDRVSheepdogState *s, uint64_t oid)
 
 static coroutine_fn void reconnect_to_sdog(void *opaque)
 {
-    Error *local_err = NULL;
     BDRVSheepdogState *s = opaque;
     AIOReq *aio_req, *next;
 
@@ -727,6 +730,7 @@ static coroutine_fn void reconnect_to_sdog(void *opaque)
 
     /* Try to reconnect the sheepdog server every one second. */
     while (s->fd < 0) {
+        Error *local_err = NULL;
         s->fd = get_sheep_fd(s, &local_err);
         if (s->fd < 0) {
             DPRINTF("Wait for connection to be established\n");
@@ -1090,6 +1094,7 @@ static int find_vdi_name(BDRVSheepdogState *s, const char *filename,
     memset(&hdr, 0, sizeof(hdr));
     if (lock) {
         hdr.opcode = SD_OP_LOCK_VDI;
+        hdr.type = LOCK_TYPE_NORMAL;
     } else {
         hdr.opcode = SD_OP_GET_VDI_INFO;
     }
@@ -1110,6 +1115,8 @@ static int find_vdi_name(BDRVSheepdogState *s, const char *filename,
                    sd_strerror(rsp->result), filename, snapid, tag);
         if (rsp->result == SD_RES_NO_VDI) {
             ret = -ENOENT;
+        } else if (rsp->result == SD_RES_VDI_LOCKED) {
+            ret = -EBUSY;
         } else {
             ret = -EIO;
         }
@@ -1793,6 +1800,7 @@ static void sd_close(BlockDriverState *bs)
     memset(&hdr, 0, sizeof(hdr));
 
     hdr.opcode = SD_OP_RELEASE_VDI;
+    hdr.type = LOCK_TYPE_NORMAL;
     hdr.base_vdi_id = s->inode.vdi_id;
     wlen = strlen(s->name) + 1;
     hdr.data_length = wlen;
diff --git a/blockdev.c b/blockdev.c
index 6a204c662d..e37b068e9e 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1757,6 +1757,7 @@ int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
 {
     const char *id = qdict_get_str(qdict, "id");
     BlockDriverState *bs;
+    AioContext *aio_context;
     Error *local_err = NULL;
 
     bs = bdrv_find(id);
@@ -1764,9 +1765,14 @@ int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
         error_report("Device '%s' not found", id);
         return -1;
     }
+
+    aio_context = bdrv_get_aio_context(bs);
+    aio_context_acquire(aio_context);
+
     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, &local_err)) {
         error_report("%s", error_get_pretty(local_err));
         error_free(local_err);
+        aio_context_release(aio_context);
         return -1;
     }
 
@@ -1790,6 +1796,7 @@ int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
         drive_del(drive_get_by_blockdev(bs));
     }
 
+    aio_context_release(aio_context);
     return 0;
 }
 
@@ -2179,11 +2186,12 @@ void qmp_drive_mirror(const char *device, const char *target,
     }
 
     if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) {
-        error_set(errp, QERR_INVALID_PARAMETER, device);
+        error_set(errp, QERR_INVALID_PARAMETER_VALUE, "granularity",
+                  "a value in range [512B, 64MB]");
         return;
     }
     if (granularity & (granularity - 1)) {
-        error_set(errp, QERR_INVALID_PARAMETER, device);
+        error_set(errp, QERR_INVALID_PARAMETER_VALUE, "granularity", "power of 2");
         return;
     }
 
diff --git a/blockjob.c b/blockjob.c
index ca0b4e25d0..0689fdd2b5 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -205,7 +205,7 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns)
     if (block_job_is_paused(job)) {
         qemu_coroutine_yield();
     } else {
-        co_sleep_ns(type, ns);
+        co_aio_sleep_ns(bdrv_get_aio_context(job->bs), type, ns);
     }
     job->busy = true;
 }
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index c07adc6e4f..b55188cb82 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -194,6 +194,7 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk,
     error_setg(&s->blocker, "block device is in use by data plane");
     bdrv_op_block_all(blk->conf.bs, s->blocker);
     bdrv_op_unblock(blk->conf.bs, BLOCK_OP_TYPE_RESIZE, s->blocker);
+    bdrv_op_unblock(blk->conf.bs, BLOCK_OP_TYPE_DRIVE_DEL, s->blocker);
 
     *dataplane = s;
 }
diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
index b4a467116e..efab95b320 100644
--- a/hw/ide/qdev.c
+++ b/hw/ide/qdev.c
@@ -59,7 +59,7 @@ static char *idebus_get_fw_dev_path(DeviceState *dev)
 {
     char path[30];
 
-    snprintf(path, sizeof(path), "%s@%d", qdev_fw_name(dev),
+    snprintf(path, sizeof(path), "%s@%x", qdev_fw_name(dev),
              ((IDEBus*)dev->parent_bus)->bus_id);
 
     return g_strdup(path);
diff --git a/include/block/aio.h b/include/block/aio.h
index c23de3cd1f..4603c0f066 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -205,12 +205,25 @@ void qemu_bh_cancel(QEMUBH *bh);
 void qemu_bh_delete(QEMUBH *bh);
 
 /* Return whether there are any pending callbacks from the GSource
- * attached to the AioContext.
+ * attached to the AioContext, before g_poll is invoked.
+ *
+ * This is used internally in the implementation of the GSource.
+ */
+bool aio_prepare(AioContext *ctx);
+
+/* Return whether there are any pending callbacks from the GSource
+ * attached to the AioContext, after g_poll is invoked.
  *
  * This is used internally in the implementation of the GSource.
  */
 bool aio_pending(AioContext *ctx);
 
+/* Dispatch any pending callbacks from the GSource attached to the AioContext.
+ *
+ * This is used internally in the implementation of the GSource.
+ */
+bool aio_dispatch(AioContext *ctx);
+
 /* Progress in completing AIO work to occur.  This can issue new pending
  * aio as a result of executing I/O completion or bh callbacks.
  *
@@ -226,7 +239,6 @@ bool aio_pending(AioContext *ctx);
  */
 bool aio_poll(AioContext *ctx, bool blocking);
 
-#ifdef CONFIG_POSIX
 /* Register a file descriptor and associated callbacks.  Behaves very similarly
  * to qemu_set_fd_handler2.  Unlike qemu_set_fd_handler2, these callbacks will
  * be invoked when using aio_poll().
@@ -239,7 +251,6 @@ void aio_set_fd_handler(AioContext *ctx,
                         IOHandler *io_read,
                         IOHandler *io_write,
                         void *opaque);
-#endif
 
 /* Register an event notifier and associated callbacks.  Behaves very similarly
  * to event_notifier_set_handler.  Unlike event_notifier_set_handler, these callbacks
@@ -303,4 +314,12 @@ static inline void aio_timer_init(AioContext *ctx,
     timer_init(ts, ctx->tlg.tl[type], scale, cb, opaque);
 }
 
+/**
+ * aio_compute_timeout:
+ * @ctx: the aio context
+ *
+ * Compute the timeout that a blocking aio_poll should use.
+ */
+int64_t aio_compute_timeout(AioContext *ctx);
+
 #endif
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 233489547e..8a61215ac0 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -294,6 +294,15 @@ typedef struct BlockLimits {
 
 typedef struct BdrvOpBlocker BdrvOpBlocker;
 
+typedef struct BdrvAioNotifier {
+    void (*attached_aio_context)(AioContext *new_context, void *opaque);
+    void (*detach_aio_context)(void *opaque);
+
+    void *opaque;
+
+    QLIST_ENTRY(BdrvAioNotifier) list;
+} BdrvAioNotifier;
+
 /*
  * Note: the function bdrv_append() copies and swaps contents of
  * BlockDriverStates, so if you add new fields to this struct, please
@@ -320,6 +329,10 @@ struct BlockDriverState {
     void *dev_opaque;
 
     AioContext *aio_context; /* event loop used for fd handlers, timers, etc */
+    /* long-running tasks intended to always use the same AioContext as this
+     * BDS may register themselves in this list to be notified of changes
+     * regarding this BDS's context */
+    QLIST_HEAD(, BdrvAioNotifier) aio_notifiers;
 
     char filename[1024];
     char backing_file[1024]; /* if non zero, the image is a diff of
@@ -437,6 +450,34 @@ void bdrv_detach_aio_context(BlockDriverState *bs);
 void bdrv_attach_aio_context(BlockDriverState *bs,
                              AioContext *new_context);
 
+/**
+ * bdrv_add_aio_context_notifier:
+ *
+ * If a long-running job intends to be always run in the same AioContext as a
+ * certain BDS, it may use this function to be notified of changes regarding the
+ * association of the BDS to an AioContext.
+ *
+ * attached_aio_context() is called after the target BDS has been attached to a
+ * new AioContext; detach_aio_context() is called before the target BDS is being
+ * detached from its old AioContext.
+ */
+void bdrv_add_aio_context_notifier(BlockDriverState *bs,
+        void (*attached_aio_context)(AioContext *new_context, void *opaque),
+        void (*detach_aio_context)(void *opaque), void *opaque);
+
+/**
+ * bdrv_remove_aio_context_notifier:
+ *
+ * Unsubscribe of change notifications regarding the BDS's AioContext. The
+ * parameters given here have to be the same as those given to
+ * bdrv_add_aio_context_notifier().
+ */
+void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
+                                      void (*aio_context_attached)(AioContext *,
+                                                                   void *),
+                                      void (*aio_context_detached)(void *),
+                                      void *opaque);
+
 #ifdef _WIN32
 int is_windows_drive(const char *filename);
 #endif
diff --git a/include/block/coroutine.h b/include/block/coroutine.h
index b9b7f488c9..793df0ef8b 100644
--- a/include/block/coroutine.h
+++ b/include/block/coroutine.h
@@ -203,14 +203,6 @@ void qemu_co_rwlock_unlock(CoRwlock *lock);
 /**
  * Yield the coroutine for a given duration
  *
- * Note this function uses timers and hence only works when a main loop is in
- * use.  See main-loop.h and do not use from qemu-tool programs.
- */
-void coroutine_fn co_sleep_ns(QEMUClockType type, int64_t ns);
-
-/**
- * Yield the coroutine for a given duration
- *
  * Behaves similarly to co_sleep_ns(), but the sleeping coroutine will be
  * resumed when using aio_poll().
  */
diff --git a/nbd.c b/nbd.c
index e7d1ceec43..e9b539be58 100644
--- a/nbd.c
+++ b/nbd.c
@@ -18,6 +18,7 @@
 
 #include "block/nbd.h"
 #include "block/block.h"
+#include "block/block_int.h"
 
 #include "block/coroutine.h"
 
@@ -107,6 +108,8 @@ struct NBDExport {
     uint32_t nbdflags;
     QTAILQ_HEAD(, NBDClient) clients;
     QTAILQ_ENTRY(NBDExport) next;
+
+    AioContext *ctx;
 };
 
 static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
@@ -123,6 +126,8 @@ struct NBDClient {
     CoMutex send_lock;
     Coroutine *send_coroutine;
 
+    bool can_read;
+
     QTAILQ_ENTRY(NBDClient) next;
     int nb_requests;
     bool closing;
@@ -130,6 +135,10 @@ struct NBDClient {
 
 /* That's all folks */
 
+static void nbd_set_handlers(NBDClient *client);
+static void nbd_unset_handlers(NBDClient *client);
+static void nbd_update_can_read(NBDClient *client);
+
 ssize_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read)
 {
     size_t offset = 0;
@@ -156,7 +165,7 @@ ssize_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read)
             err = socket_error();
 
             /* recoverable error */
-            if (err == EINTR || (offset > 0 && err == EAGAIN)) {
+            if (err == EINTR || (offset > 0 && (err == EAGAIN || err == EWOULDBLOCK))) {
                 continue;
             }
 
@@ -862,7 +871,7 @@ void nbd_client_put(NBDClient *client)
          */
         assert(client->closing);
 
-        qemu_set_fd_handler2(client->sock, NULL, NULL, NULL, NULL);
+        nbd_unset_handlers(client);
         close(client->sock);
         client->sock = -1;
         if (client->exp) {
@@ -898,6 +907,7 @@ static NBDRequest *nbd_request_get(NBDClient *client)
 
     assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
     client->nb_requests++;
+    nbd_update_can_read(client);
 
     req = g_slice_new0(NBDRequest);
     nbd_client_get(client);
@@ -914,12 +924,39 @@ static void nbd_request_put(NBDRequest *req)
     }
     g_slice_free(NBDRequest, req);
 
-    if (client->nb_requests-- == MAX_NBD_REQUESTS) {
-        qemu_notify_event();
-    }
+    client->nb_requests--;
+    nbd_update_can_read(client);
     nbd_client_put(client);
 }
 
+static void bs_aio_attached(AioContext *ctx, void *opaque)
+{
+    NBDExport *exp = opaque;
+    NBDClient *client;
+
+    TRACE("Export %s: Attaching clients to AIO context %p\n", exp->name, ctx);
+
+    exp->ctx = ctx;
+
+    QTAILQ_FOREACH(client, &exp->clients, next) {
+        nbd_set_handlers(client);
+    }
+}
+
+static void bs_aio_detach(void *opaque)
+{
+    NBDExport *exp = opaque;
+    NBDClient *client;
+
+    TRACE("Export %s: Detaching clients from AIO context %p\n", exp->name, exp->ctx);
+
+    QTAILQ_FOREACH(client, &exp->clients, next) {
+        nbd_unset_handlers(client);
+    }
+
+    exp->ctx = NULL;
+}
+
 NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset,
                           off_t size, uint32_t nbdflags,
                           void (*close)(NBDExport *))
@@ -932,7 +969,9 @@ NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset,
     exp->nbdflags = nbdflags;
     exp->size = size == -1 ? bdrv_getlength(bs) : size;
     exp->close = close;
+    exp->ctx = bdrv_get_aio_context(bs);
     bdrv_ref(bs);
+    bdrv_add_aio_context_notifier(bs, bs_aio_attached, bs_aio_detach, exp);
     return exp;
 }
 
@@ -980,6 +1019,8 @@ void nbd_export_close(NBDExport *exp)
     nbd_export_set_name(exp, NULL);
     nbd_export_put(exp);
     if (exp->bs) {
+        bdrv_remove_aio_context_notifier(exp->bs, bs_aio_attached,
+                                         bs_aio_detach, exp);
         bdrv_unref(exp->bs);
         exp->bs = NULL;
     }
@@ -1023,10 +1064,6 @@ void nbd_export_close_all(void)
     }
 }
 
-static int nbd_can_read(void *opaque);
-static void nbd_read(void *opaque);
-static void nbd_restart_write(void *opaque);
-
 static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply,
                                  int len)
 {
@@ -1035,9 +1072,8 @@ static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply,
     ssize_t rc, ret;
 
     qemu_co_mutex_lock(&client->send_lock);
-    qemu_set_fd_handler2(csock, nbd_can_read, nbd_read,
-                         nbd_restart_write, client);
     client->send_coroutine = qemu_coroutine_self();
+    nbd_set_handlers(client);
 
     if (!len) {
         rc = nbd_send_reply(csock, reply);
@@ -1054,7 +1090,7 @@ static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply,
     }
 
     client->send_coroutine = NULL;
-    qemu_set_fd_handler2(csock, nbd_can_read, nbd_read, NULL, client);
+    nbd_set_handlers(client);
     qemu_co_mutex_unlock(&client->send_lock);
     return rc;
 }
@@ -1067,6 +1103,8 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *reque
     ssize_t rc;
 
     client->recv_coroutine = qemu_coroutine_self();
+    nbd_update_can_read(client);
+
     rc = nbd_receive_request(csock, request);
     if (rc < 0) {
         if (rc != -EAGAIN) {
@@ -1108,6 +1146,8 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *reque
 
 out:
     client->recv_coroutine = NULL;
+    nbd_update_can_read(client);
+
     return rc;
 }
 
@@ -1259,13 +1299,6 @@ out:
     nbd_client_close(client);
 }
 
-static int nbd_can_read(void *opaque)
-{
-    NBDClient *client = opaque;
-
-    return client->recv_coroutine || client->nb_requests < MAX_NBD_REQUESTS;
-}
-
 static void nbd_read(void *opaque)
 {
     NBDClient *client = opaque;
@@ -1284,6 +1317,37 @@ static void nbd_restart_write(void *opaque)
     qemu_coroutine_enter(client->send_coroutine, NULL);
 }
 
+static void nbd_set_handlers(NBDClient *client)
+{
+    if (client->exp && client->exp->ctx) {
+        aio_set_fd_handler(client->exp->ctx, client->sock,
+                           client->can_read ? nbd_read : NULL,
+                           client->send_coroutine ? nbd_restart_write : NULL,
+                           client);
+    }
+}
+
+static void nbd_unset_handlers(NBDClient *client)
+{
+    if (client->exp && client->exp->ctx) {
+        aio_set_fd_handler(client->exp->ctx, client->sock, NULL, NULL, NULL);
+    }
+}
+
+static void nbd_update_can_read(NBDClient *client)
+{
+    bool can_read = client->recv_coroutine ||
+                    client->nb_requests < MAX_NBD_REQUESTS;
+
+    if (can_read != client->can_read) {
+        client->can_read = can_read;
+        nbd_set_handlers(client);
+
+        /* There is no need to invoke aio_notify(), since aio_set_fd_handler()
+         * in nbd_set_handlers() will have taken care of that */
+    }
+}
+
 NBDClient *nbd_client_new(NBDExport *exp, int csock,
                           void (*close)(NBDClient *))
 {
@@ -1292,13 +1356,14 @@ NBDClient *nbd_client_new(NBDExport *exp, int csock,
     client->refcount = 1;
     client->exp = exp;
     client->sock = csock;
+    client->can_read = true;
     if (nbd_send_negotiate(client)) {
         g_free(client);
         return NULL;
     }
     client->close = close;
     qemu_co_mutex_init(&client->send_lock);
-    qemu_set_fd_handler2(csock, nbd_can_read, nbd_read, NULL, client);
+    nbd_set_handlers(client);
 
     if (exp) {
         QTAILQ_INSERT_TAIL(&exp->clients, client, next);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index fb74c56e32..a685d02728 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1419,6 +1419,19 @@
             'raw': 'BlockdevRef' } }
 
 ##
+# @QuorumReadPattern
+#
+# An enumeration of quorum read patterns.
+#
+# @quorum: read all the children and do a quorum vote on reads
+#
+# @fifo: read only from the first child that has not failed
+#
+# Since: 2.2
+##
+{ 'enum': 'QuorumReadPattern', 'data': [ 'quorum', 'fifo' ] }
+
+##
 # @BlockdevOptionsQuorum
 #
 # Driver specific block device options for Quorum
@@ -1433,12 +1446,17 @@
 # @rewrite-corrupted: #optional rewrite corrupted data when quorum is reached
 #                     (Since 2.1)
 #
+# @read-pattern: #optional choose read pattern and set to quorum by default
+#                (Since 2.2)
+#
 # Since: 2.0
 ##
 { 'type': 'BlockdevOptionsQuorum',
   'data': { '*blkverify': 'bool',
             'children': [ 'BlockdevRef' ],
-            'vote-threshold': 'int', '*rewrite-corrupted': 'bool' } }
+            'vote-threshold': 'int',
+            '*rewrite-corrupted': 'bool',
+            '*read-pattern': 'QuorumReadPattern' } }
 
 ##
 # @BlockdevOptions
diff --git a/qemu-coroutine-io.c b/qemu-coroutine-io.c
index 054ca70627..d4049260da 100644
--- a/qemu-coroutine-io.c
+++ b/qemu-coroutine-io.c
@@ -34,13 +34,15 @@ qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt,
 {
     size_t done = 0;
     ssize_t ret;
+    int err;
     while (done < bytes) {
         ret = iov_send_recv(sockfd, iov, iov_cnt,
                             offset + done, bytes - done, do_send);
         if (ret > 0) {
             done += ret;
         } else if (ret < 0) {
-            if (errno == EAGAIN) {
+            err = socket_error();
+            if (err == EAGAIN || err == EWOULDBLOCK) {
                 qemu_coroutine_yield();
             } else if (done == 0) {
                 return -1;
diff --git a/qemu-coroutine-sleep.c b/qemu-coroutine-sleep.c
index ad78fbaa2a..9abb7fdf31 100644
--- a/qemu-coroutine-sleep.c
+++ b/qemu-coroutine-sleep.c
@@ -27,18 +27,6 @@ static void co_sleep_cb(void *opaque)
     qemu_coroutine_enter(sleep_cb->co, NULL);
 }
 
-void coroutine_fn co_sleep_ns(QEMUClockType type, int64_t ns)
-{
-    CoSleepCB sleep_cb = {
-        .co = qemu_coroutine_self(),
-    };
-    sleep_cb.ts = timer_new(type, SCALE_NS, co_sleep_cb, &sleep_cb);
-    timer_mod(sleep_cb.ts, qemu_clock_get_ns(type) + ns);
-    qemu_coroutine_yield();
-    timer_del(sleep_cb.ts);
-    timer_free(sleep_cb.ts);
-}
-
 void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
                                   int64_t ns)
 {
diff --git a/qemu-img.c b/qemu-img.c
index 2052b14b84..ff29ed1c67 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -752,7 +752,7 @@ static int img_commit(int argc, char **argv)
     ret = bdrv_parse_cache_flags(cache, &flags);
     if (ret < 0) {
         error_report("Invalid cache option: %s", cache);
-        return -1;
+        return 1;
     }
 
     bs = bdrv_new_open("image", filename, fmt, flags, true, quiet);
@@ -999,6 +999,9 @@ static int img_compare(int argc, char **argv)
     filename1 = argv[optind++];
     filename2 = argv[optind++];
 
+    /* Initialize before goto out */
+    qemu_progress_init(progress, 2.0);
+
     flags = BDRV_O_FLAGS;
     ret = bdrv_parse_cache_flags(cache, &flags);
     if (ret < 0) {
@@ -1007,9 +1010,6 @@ static int img_compare(int argc, char **argv)
         goto out3;
     }
 
-    /* Initialize before goto out */
-    qemu_progress_init(progress, 2.0);
-
     bs1 = bdrv_new_open("image 1", filename1, fmt1, flags, true, quiet);
     if (!bs1) {
         error_report("Can't open file %s", filename1);
@@ -2304,7 +2304,7 @@ static int img_snapshot(int argc, char **argv)
 
 static int img_rebase(int argc, char **argv)
 {
-    BlockDriverState *bs, *bs_old_backing = NULL, *bs_new_backing = NULL;
+    BlockDriverState *bs = NULL, *bs_old_backing = NULL, *bs_new_backing = NULL;
     BlockDriver *old_backing_drv, *new_backing_drv;
     char *filename;
     const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
@@ -2376,14 +2376,14 @@ static int img_rebase(int argc, char **argv)
     ret = bdrv_parse_cache_flags(cache, &flags);
     if (ret < 0) {
         error_report("Invalid cache option: %s", cache);
-        return -1;
+        goto out;
     }
 
     src_flags = BDRV_O_FLAGS;
     ret = bdrv_parse_cache_flags(src_cache, &src_flags);
     if (ret < 0) {
         error_report("Invalid source cache option: %s", src_cache);
-        return -1;
+        goto out;
     }
 
     /*
@@ -2394,7 +2394,8 @@ static int img_rebase(int argc, char **argv)
      */
     bs = bdrv_new_open("image", filename, fmt, flags, true, quiet);
     if (!bs) {
-        return 1;
+        ret = -1;
+        goto out;
     }
 
     /* Find the right drivers for the backing files */
@@ -2420,11 +2421,7 @@ static int img_rebase(int argc, char **argv)
     }
 
     /* For safe rebasing we need to compare old and new backing file */
-    if (unsafe) {
-        /* Make the compiler happy */
-        bs_old_backing = NULL;
-        bs_new_backing = NULL;
-    } else {
+    if (!unsafe) {
         char backing_name[1024];
 
         bs_old_backing = bdrv_new("old_backing", &error_abort);
diff --git a/qemu-options.hx b/qemu-options.hx
index c573dd8893..5479cf54f4 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -2351,6 +2351,16 @@ multiple of 512 bytes. It defaults to 256k.
 @item sslverify
 Whether to verify the remote server's certificate when connecting over SSL. It
 can have the value 'on' or 'off'. It defaults to 'on'.
+
+@item cookie
+Send this cookie (it can also be a list of cookies separated by ';') with
+each outgoing request.  Only supported when using protocols such as HTTP
+which support cookies, otherwise ignored.
+
+@item timeout
+Set the timeout in seconds of the CURL connection. This timeout is the time
+that CURL waits for a response from the remote server to get the size of the
+image to be downloaded. If not set, the default timeout of 5 seconds is used.
 @end table
 
 Note that when passing options to qemu explicitly, @option{driver} is the value
@@ -2372,9 +2382,10 @@ qemu-system-x86_64 -drive file=/tmp/Fedora-x86_64-20-20131211.1-sda.qcow2,copy-o
 @end example
 
 Example: boot from an image stored on a VMware vSphere server with a self-signed
-certificate using a local overlay for writes and a readahead of 64k
+certificate using a local overlay for writes, a readahead of 64k and a timeout
+of 10 seconds.
 @example
-qemu-img create -f qcow2 -o backing_file='json:@{"file.driver":"https",, "file.url":"https://user:password@@vsphere.example.com/folder/test/test-flat.vmdk?dcPath=Datacenter&dsName=datastore1",, "file.sslverify":"off",, "file.readahead":"64k"@}' /tmp/test.qcow2
+qemu-img create -f qcow2 -o backing_file='json:@{"file.driver":"https",, "file.url":"https://user:password@@vsphere.example.com/folder/test/test-flat.vmdk?dcPath=Datacenter&dsName=datastore1",, "file.sslverify":"off",, "file.readahead":"64k",, "file.timeout":10@}' /tmp/test.qcow2
 
 qemu-system-x86_64 -drive file=/tmp/test.qcow2
 @end example
diff --git a/tests/qemu-iotests/100 b/tests/qemu-iotests/100
new file mode 100755
index 0000000000..9124aba760
--- /dev/null
+++ b/tests/qemu-iotests/100
@@ -0,0 +1,134 @@
+#!/bin/bash
+#
+# Test simple read/write using plain bdrv_read/bdrv_write
+#
+# Copyright (C) 2014 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=stefanha@redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1	# failure is the default!
+
+_cleanup()
+{
+	_cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt generic
+_supported_proto generic
+_supported_os Linux
+
+
+size=128M
+
+echo
+echo "== Single request =="
+_make_test_img $size
+$QEMU_IO -c "multiwrite 0 4k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "== verify pattern =="
+$QEMU_IO -c "read -P 0xcd 0 4k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0 4k 4k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "== Sequential requests =="
+_make_test_img $size
+$QEMU_IO -c "multiwrite 0 4k ; 4k 4k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "== verify pattern =="
+$QEMU_IO -c "read -P 0xcd 0 4k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0xce 4k 4k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0 8k 4k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "== Superset overlapping requests =="
+_make_test_img $size
+$QEMU_IO -c "multiwrite 0 4k ; 1k 2k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "== verify pattern =="
+# Order of overlapping in-flight requests is not guaranteed so we cannot verify
+# [1k, 3k) since it could have either pattern 0xcd or 0xce.
+$QEMU_IO -c "read -P 0xcd 0 1k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0xcd 3k 1k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0 4k 4k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "== Subset overlapping requests =="
+_make_test_img $size
+$QEMU_IO -c "multiwrite 1k 2k ; 0k 4k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "== verify pattern =="
+# Order of overlapping in-flight requests is not guaranteed so we cannot verify
+# [1k, 3k) since it could have either pattern 0xcd or 0xce.
+$QEMU_IO -c "read -P 0xce 0 1k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0xce 3k 1k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0 4k 4k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "== Head overlapping requests =="
+_make_test_img $size
+$QEMU_IO -c "multiwrite 0k 2k ; 0k 4k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "== verify pattern =="
+# Order of overlapping in-flight requests is not guaranteed so we cannot verify
+# [0k, 2k) since it could have either pattern 0xcd or 0xce.
+$QEMU_IO -c "read -P 0xce 2k 2k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0 4k 4k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "== Tail overlapping requests =="
+_make_test_img $size
+$QEMU_IO -c "multiwrite 2k 2k ; 0k 4k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "== verify pattern =="
+# Order of overlapping in-flight requests is not guaranteed so we cannot verify
+# [2k, 4k) since it could have either pattern 0xcd or 0xce.
+$QEMU_IO -c "read -P 0xce 0k 2k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0 4k 4k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "== Disjoint requests =="
+_make_test_img $size
+$QEMU_IO -c "multiwrite 0 4k ; 64k 4k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "== verify pattern =="
+$QEMU_IO -c "read -P 0xcd 0 4k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0 4k 60k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0xce 64k 4k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0 68k 4k" "$TEST_IMG" | _filter_qemu_io
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/100.out b/tests/qemu-iotests/100.out
new file mode 100644
index 0000000000..2d6e9f0a7d
--- /dev/null
+++ b/tests/qemu-iotests/100.out
@@ -0,0 +1,89 @@
+QA output created by 100
+
+== Single request ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 
+wrote 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== verify pattern ==
+read 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4096/4096 bytes at offset 4096
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== Sequential requests ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 
+wrote 8192/8192 bytes at offset 0
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== verify pattern ==
+read 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4096/4096 bytes at offset 4096
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4096/4096 bytes at offset 8192
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== Superset overlapping requests ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 
+wrote 6144/6144 bytes at offset 0
+6 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== verify pattern ==
+read 1024/1024 bytes at offset 0
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1024/1024 bytes at offset 3072
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4096/4096 bytes at offset 4096
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== Subset overlapping requests ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 
+wrote 6144/6144 bytes at offset 1024
+6 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== verify pattern ==
+read 1024/1024 bytes at offset 0
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1024/1024 bytes at offset 3072
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4096/4096 bytes at offset 4096
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== Head overlapping requests ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 
+wrote 6144/6144 bytes at offset 0
+6 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== verify pattern ==
+read 2048/2048 bytes at offset 2048
+2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4096/4096 bytes at offset 4096
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== Tail overlapping requests ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 
+wrote 6144/6144 bytes at offset 2048
+6 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== verify pattern ==
+read 2048/2048 bytes at offset 0
+2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4096/4096 bytes at offset 4096
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== Disjoint requests ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 
+wrote 8192/8192 bytes at offset 0
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== verify pattern ==
+read 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 61440/61440 bytes at offset 4096
+60 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4096/4096 bytes at offset 65536
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4096/4096 bytes at offset 69632
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+*** done
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 2803d68d62..0920b28db4 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -101,5 +101,6 @@
 092 rw auto quick
 095 rw auto quick
 099 rw auto quick
+100 rw auto quick
 101 rw auto quick
 103 rw auto quick
diff --git a/tests/test-aio.c b/tests/test-aio.c
index f12b6e0ae8..c6a8713e83 100644
--- a/tests/test-aio.c
+++ b/tests/test-aio.c
@@ -57,8 +57,6 @@ static void bh_test_cb(void *opaque)
     }
 }
 
-#if !defined(_WIN32)
-
 static void timer_test_cb(void *opaque)
 {
     TimerTestData *data = opaque;
@@ -68,12 +66,10 @@ static void timer_test_cb(void *opaque)
     }
 }
 
-static void dummy_io_handler_read(void *opaque)
+static void dummy_io_handler_read(EventNotifier *e)
 {
 }
 
-#endif /* !_WIN32 */
-
 static void bh_delete_cb(void *opaque)
 {
     BHTestData *data = opaque;
@@ -428,24 +424,18 @@ static void test_wait_event_notifier_noflush(void)
     event_notifier_cleanup(&data.e);
 }
 
-#if !defined(_WIN32)
-
 static void test_timer_schedule(void)
 {
     TimerTestData data = { .n = 0, .ctx = ctx, .ns = SCALE_MS * 750LL,
                            .max = 2,
                            .clock_type = QEMU_CLOCK_VIRTUAL };
-    int pipefd[2];
+    EventNotifier e;
 
     /* aio_poll will not block to wait for timers to complete unless it has
      * an fd to wait on. Fixing this breaks other tests. So create a dummy one.
      */
-    g_assert(!qemu_pipe(pipefd));
-    qemu_set_nonblock(pipefd[0]);
-    qemu_set_nonblock(pipefd[1]);
-
-    aio_set_fd_handler(ctx, pipefd[0],
-                       dummy_io_handler_read, NULL, NULL);
+    event_notifier_init(&e, false);
+    aio_set_event_notifier(ctx, &e, dummy_io_handler_read);
     aio_poll(ctx, false);
 
     aio_timer_init(ctx, &data.timer, data.clock_type,
@@ -484,15 +474,12 @@ static void test_timer_schedule(void)
     g_assert(!aio_poll(ctx, false));
     g_assert_cmpint(data.n, ==, 2);
 
-    aio_set_fd_handler(ctx, pipefd[0], NULL, NULL, NULL);
-    close(pipefd[0]);
-    close(pipefd[1]);
+    aio_set_event_notifier(ctx, &e, NULL);
+    event_notifier_cleanup(&e);
 
     timer_del(&data.timer);
 }
 
-#endif /* !_WIN32 */
-
 /* Now the same tests, using the context as a GSource.  They are
  * very similar to the ones above, with g_main_context_iteration
  * replacing aio_poll.  However:
@@ -775,25 +762,19 @@ static void test_source_wait_event_notifier_noflush(void)
     event_notifier_cleanup(&data.e);
 }
 
-#if !defined(_WIN32)
-
 static void test_source_timer_schedule(void)
 {
     TimerTestData data = { .n = 0, .ctx = ctx, .ns = SCALE_MS * 750LL,
                            .max = 2,
                            .clock_type = QEMU_CLOCK_VIRTUAL };
-    int pipefd[2];
+    EventNotifier e;
     int64_t expiry;
 
     /* aio_poll will not block to wait for timers to complete unless it has
      * an fd to wait on. Fixing this breaks other tests. So create a dummy one.
      */
-    g_assert(!qemu_pipe(pipefd));
-    qemu_set_nonblock(pipefd[0]);
-    qemu_set_nonblock(pipefd[1]);
-
-    aio_set_fd_handler(ctx, pipefd[0],
-                       dummy_io_handler_read, NULL, NULL);
+    event_notifier_init(&e, false);
+    aio_set_event_notifier(ctx, &e, dummy_io_handler_read);
     do {} while (g_main_context_iteration(NULL, false));
 
     aio_timer_init(ctx, &data.timer, data.clock_type,
@@ -818,15 +799,12 @@ static void test_source_timer_schedule(void)
     g_assert_cmpint(data.n, ==, 2);
     g_assert(qemu_clock_get_ns(data.clock_type) > expiry);
 
-    aio_set_fd_handler(ctx, pipefd[0], NULL, NULL, NULL);
-    close(pipefd[0]);
-    close(pipefd[1]);
+    aio_set_event_notifier(ctx, &e, NULL);
+    event_notifier_cleanup(&e);
 
     timer_del(&data.timer);
 }
 
-#endif /* !_WIN32 */
-
 
 /* End of tests.  */
 
@@ -857,9 +835,7 @@ int main(int argc, char **argv)
     g_test_add_func("/aio/event/wait",              test_wait_event_notifier);
     g_test_add_func("/aio/event/wait/no-flush-cb",  test_wait_event_notifier_noflush);
     g_test_add_func("/aio/event/flush",             test_flush_event_notifier);
-#if !defined(_WIN32)
     g_test_add_func("/aio/timer/schedule",          test_timer_schedule);
-#endif
 
     g_test_add_func("/aio-gsource/notify",                  test_source_notify);
     g_test_add_func("/aio-gsource/flush",                   test_source_flush);
@@ -874,8 +850,6 @@ int main(int argc, char **argv)
     g_test_add_func("/aio-gsource/event/wait",              test_source_wait_event_notifier);
     g_test_add_func("/aio-gsource/event/wait/no-flush-cb",  test_source_wait_event_notifier_noflush);
     g_test_add_func("/aio-gsource/event/flush",             test_source_flush_event_notifier);
-#if !defined(_WIN32)
     g_test_add_func("/aio-gsource/timer/schedule",          test_source_timer_schedule);
-#endif
     return g_test_run();
 }
author	Peter Maydell	2014-08-29 19:40:04 +0200
committer	Peter Maydell	2014-08-29 19:40:04 +0200
commit	988f46361465db0d4fce50e71fa0ff8f9d20483e (patch)
tree	fa637b2f366d47581d9fc6dc01a0ef722e906d9a
parent	Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20140829'... (diff)
parent	quorum: Fix leak of opts in quorum_open (diff)
download	qemu-988f46361465db0d4fce50e71fa0ff8f9d20483e.tar.gz qemu-988f46361465db0d4fce50e71fa0ff8f9d20483e.tar.xz qemu-988f46361465db0d4fce50e71fa0ff8f9d20483e.zip