From 0b9caf9b3166c8deb3c4f3a774c2384b069dc29c Mon Sep 17 00:00:00 2001
From: Fam Zheng
Date: Tue, 26 Aug 2014 15:15:43 +0800
Subject: coroutine: Drop co_sleep_ns

block_job_sleep_ns is the only user. Since we are moving towards
AioContext aware code, it's better to use the explicit version and drop
the old one.

Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Benoît Canet <benoit.canet@nodalink.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 include/block/coroutine.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include')

diff --git a/include/block/coroutine.h b/include/block/coroutine.h
index b9b7f488c9..793df0ef8b 100644
--- a/include/block/coroutine.h
+++ b/include/block/coroutine.h
@@ -200,14 +200,6 @@ void qemu_co_rwlock_wrlock(CoRwlock *lock);
  */
 void qemu_co_rwlock_unlock(CoRwlock *lock);
 
-/**
- * Yield the coroutine for a given duration
- *
- * Note this function uses timers and hence only works when a main loop is in
- * use.  See main-loop.h and do not use from qemu-tool programs.
- */
-void coroutine_fn co_sleep_ns(QEMUClockType type, int64_t ns);
-
 /**
  * Yield the coroutine for a given duration
  *
-- 
cgit v1.2.3-55-g7522


From 845ca10dd089b4e48f0a79bad005fb30eb77584e Mon Sep 17 00:00:00 2001
From: Paolo Bonzini
Date: Wed, 9 Jul 2014 11:53:01 +0200
Subject: AioContext: take bottom halves into account when computing aio_poll
 timeout

Right now, QEMU invokes aio_bh_poll before the "poll" phase
of aio_poll.  It is simpler to do it afterwards and skip the
"poll" phase altogether when the OS-dependent parts of AioContext
are invoked from GSource.  This way, AioContext behaves more
similarly when used as a GSource vs. when used as stand-alone.

As a start, take bottom halves into account when computing the
poll timeout.  If a bottom half is ready, do a non-blocking
poll.  As a side effect, this makes idle bottom halves work
with aio_poll; an improvement, but not really an important
one since they are deprecated.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 aio-posix.c         |  2 +-
 aio-win32.c         |  4 ++--
 async.c             | 32 ++++++++++++++++++--------------
 include/block/aio.h |  8 ++++++++
 4 files changed, 29 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/aio-posix.c b/aio-posix.c
index 2eada2e049..55706f8205 100644
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -249,7 +249,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
     /* wait until next event */
     ret = qemu_poll_ns((GPollFD *)ctx->pollfds->data,
                          ctx->pollfds->len,
-                         blocking ? timerlistgroup_deadline_ns(&ctx->tlg) : 0);
+                         blocking ? aio_compute_timeout(ctx) : 0);
 
     /* if we have any readable fds, dispatch event */
     if (ret > 0) {
diff --git a/aio-win32.c b/aio-win32.c
index c12f61e97d..fe7ee5bb22 100644
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -165,8 +165,8 @@ bool aio_poll(AioContext *ctx, bool blocking)
     while (count > 0) {
         int ret;
 
-        timeout = blocking ?
-            qemu_timeout_ns_to_ms(timerlistgroup_deadline_ns(&ctx->tlg)) : 0;
+        timeout = blocking
+            ? qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)) : 0;
         ret = WaitForMultipleObjects(count, events, FALSE, timeout);
 
         /* if we have any signaled events, dispatch event */
diff --git a/async.c b/async.c
index 34af0b25ca..09e09c6526 100644
--- a/async.c
+++ b/async.c
@@ -152,39 +152,43 @@ void qemu_bh_delete(QEMUBH *bh)
     bh->deleted = 1;
 }
 
-static gboolean
-aio_ctx_prepare(GSource *source, gint    *timeout)
+int64_t
+aio_compute_timeout(AioContext *ctx)
 {
-    AioContext *ctx = (AioContext *) source;
+    int64_t deadline;
+    int timeout = -1;
     QEMUBH *bh;
-    int deadline;
 
-    /* We assume there is no timeout already supplied */
-    *timeout = -1;
     for (bh = ctx->first_bh; bh; bh = bh->next) {
         if (!bh->deleted && bh->scheduled) {
             if (bh->idle) {
                 /* idle bottom halves will be polled at least
                  * every 10ms */
-                *timeout = 10;
+                timeout = 10000000;
             } else {
                 /* non-idle bottom halves will be executed
                  * immediately */
-                *timeout = 0;
-                return true;
+                return 0;
             }
         }
     }
 
-    deadline = qemu_timeout_ns_to_ms(timerlistgroup_deadline_ns(&ctx->tlg));
+    deadline = timerlistgroup_deadline_ns(&ctx->tlg);
     if (deadline == 0) {
-        *timeout = 0;
-        return true;
+        return 0;
     } else {
-        *timeout = qemu_soonest_timeout(*timeout, deadline);
+        return qemu_soonest_timeout(timeout, deadline);
     }
+}
 
-    return false;
+static gboolean
+aio_ctx_prepare(GSource *source, gint    *timeout)
+{
+    AioContext *ctx = (AioContext *) source;
+
+    /* We assume there is no timeout already supplied */
+    *timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx));
+    return *timeout == 0;
 }
 
 static gboolean
diff --git a/include/block/aio.h b/include/block/aio.h
index c23de3cd1f..05b531ca25 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -303,4 +303,12 @@ static inline void aio_timer_init(AioContext *ctx,
     timer_init(ts, ctx->tlg.tl[type], scale, cb, opaque);
 }
 
+/**
+ * aio_compute_timeout:
+ * @ctx: the aio context
+ *
+ * Compute the timeout that a blocking aio_poll should use.
+ */
+int64_t aio_compute_timeout(AioContext *ctx);
+
 #endif
-- 
cgit v1.2.3-55-g7522


From e4c7e2d12d7b1c4501ab3397218a206d4953e633 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini
Date: Wed, 9 Jul 2014 11:53:05 +0200
Subject: AioContext: export and use aio_dispatch

So far, aio_poll's scheme was dispatch/poll/dispatch, where
the first dispatch phase was used only in the GSource case in
order to avoid a blocking poll.  Earlier patches changed it to
dispatch/prepare/poll/dispatch, where prepare is aio_compute_timeout.

By making aio_dispatch public, we can remove the first dispatch
phase altogether, so that both aio_poll and the GSource use the same
prepare/poll/dispatch scheme.

This patch breaks the invariant that aio_poll(..., true) will not block
the first time it returns false.  This used to be fundamental for
qemu_aio_flush's implementation as "while (qemu_aio_wait()) {}" but
no code in QEMU relies on this invariant anymore.  The return value
of aio_poll() is now comparable with that of g_main_context_iteration.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 aio-posix.c         | 55 +++++++++++++----------------------------------------
 aio-win32.c         | 31 ++++--------------------------
 async.c             |  2 +-
 include/block/aio.h |  6 ++++++
 4 files changed, 24 insertions(+), 70 deletions(-)

(limited to 'include')

diff --git a/aio-posix.c b/aio-posix.c
index 798a3ff532..0936b4ff9d 100644
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -119,11 +119,20 @@ bool aio_pending(AioContext *ctx)
     return false;
 }
 
-static bool aio_dispatch(AioContext *ctx)
+bool aio_dispatch(AioContext *ctx)
 {
     AioHandler *node;
     bool progress = false;
 
+    /*
+     * If there are callbacks left that have been queued, we need to call them.
+     * Do not call select in this case, because it is possible that the caller
+     * does not need a complete flush (as is the case for aio_poll loops).
+     */
+    if (aio_bh_poll(ctx)) {
+        progress = true;
+    }
+
     /*
      * We have to walk very carefully in case aio_set_fd_handler is
      * called while we're walking.
@@ -184,22 +193,9 @@ bool aio_poll(AioContext *ctx, bool blocking)
 
     /* aio_notify can avoid the expensive event_notifier_set if
      * everything (file descriptors, bottom halves, timers) will
-     * be re-evaluated before the next blocking poll().  This happens
-     * in two cases:
-     *
-     * 1) when aio_poll is called with blocking == false
-     *
-     * 2) when we are called after poll().  If we are called before
-     *    poll(), bottom halves will not be re-evaluated and we need
-     *    aio_notify() if blocking == true.
-     *
-     * The first aio_dispatch() only does something when AioContext is
-     * running as a GSource, and in that case aio_poll is used only
-     * with blocking == false, so this optimization is already quite
-     * effective.  However, the code is ugly and should be restructured
-     * to have a single aio_dispatch() call.  To do this, we need to
-     * reorganize aio_poll into a prepare/poll/dispatch model like
-     * glib's.
+     * be re-evaluated before the next blocking poll().  This is
+     * already true when aio_poll is called with blocking == false;
+     * if blocking == true, it is only true after poll() returns.
      *
      * If we're in a nested event loop, ctx->dispatching might be true.
      * In that case we can restore it just before returning, but we
@@ -207,26 +203,6 @@ bool aio_poll(AioContext *ctx, bool blocking)
      */
     aio_set_dispatching(ctx, !blocking);
 
-    /*
-     * If there are callbacks left that have been queued, we need to call them.
-     * Do not call select in this case, because it is possible that the caller
-     * does not need a complete flush (as is the case for aio_poll loops).
-     */
-    if (aio_bh_poll(ctx)) {
-        blocking = false;
-        progress = true;
-    }
-
-    /* Re-evaluate condition (1) above.  */
-    aio_set_dispatching(ctx, !blocking);
-    if (aio_dispatch(ctx)) {
-        progress = true;
-    }
-
-    if (progress && !blocking) {
-        goto out;
-    }
-
     ctx->walking_handlers++;
 
     g_array_set_size(ctx->pollfds, 0);
@@ -264,15 +240,10 @@ bool aio_poll(AioContext *ctx, bool blocking)
 
     /* Run dispatch even if there were no readable fds to run timers */
     aio_set_dispatching(ctx, true);
-    if (aio_bh_poll(ctx)) {
-        progress = true;
-    }
-
     if (aio_dispatch(ctx)) {
         progress = true;
     }
 
-out:
     aio_set_dispatching(ctx, was_dispatching);
     return progress;
 }
diff --git a/aio-win32.c b/aio-win32.c
index 2ac38a897c..1ec434a83c 100644
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -130,11 +130,12 @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
     return progress;
 }
 
-static bool aio_dispatch(AioContext *ctx)
+bool aio_dispatch(AioContext *ctx)
 {
     bool progress;
 
-    progress = aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE);
+    progress = aio_bh_poll(ctx);
+    progress |= aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE);
     progress |= timerlistgroup_run_timers(&ctx->tlg);
     return progress;
 }
@@ -149,23 +150,6 @@ bool aio_poll(AioContext *ctx, bool blocking)
 
     progress = false;
 
-    /*
-     * If there are callbacks left that have been queued, we need to call then.
-     * Do not call select in this case, because it is possible that the caller
-     * does not need a complete flush (as is the case for aio_poll loops).
-     */
-    if (aio_bh_poll(ctx)) {
-        blocking = false;
-        progress = true;
-    }
-
-    /* Dispatch any pending callbacks from the GSource.  */
-    progress |= aio_dispatch(ctx);
-
-    if (progress && !blocking) {
-        return true;
-    }
-
     ctx->walking_handlers++;
 
     /* fill fd sets */
@@ -205,14 +189,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
         events[ret - WAIT_OBJECT_0] = events[--count];
     }
 
-    if (blocking) {
-        /* Run the timers a second time. We do this because otherwise aio_wait
-         * will not note progress - and will stop a drain early - if we have
-         * a timer that was not ready to run entering g_poll but is ready
-         * after g_poll. This will only do anything if a timer has expired.
-         */
-        progress |= timerlistgroup_run_timers(&ctx->tlg);
-    }
+    progress |= timerlistgroup_run_timers(&ctx->tlg);
 
     return progress;
 }
diff --git a/async.c b/async.c
index 09e09c6526..293a52a0a7 100644
--- a/async.c
+++ b/async.c
@@ -213,7 +213,7 @@ aio_ctx_dispatch(GSource     *source,
     AioContext *ctx = (AioContext *) source;
 
     assert(callback == NULL);
-    aio_poll(ctx, false);
+    aio_dispatch(ctx);
     return true;
 }
 
diff --git a/include/block/aio.h b/include/block/aio.h
index 05b531ca25..7ba3e9675b 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -211,6 +211,12 @@ void qemu_bh_delete(QEMUBH *bh);
  */
 bool aio_pending(AioContext *ctx);
 
+/* Dispatch any pending callbacks from the GSource attached to the AioContext.
+ *
+ * This is used internally in the implementation of the GSource.
+ */
+bool aio_dispatch(AioContext *ctx);
+
 /* Progress in completing AIO work to occur.  This can issue new pending
  * aio as a result of executing I/O completion or bh callbacks.
  *
-- 
cgit v1.2.3-55-g7522


From a3462c656128e7b900ccc5d436f9e858d07de264 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini
Date: Wed, 9 Jul 2014 11:53:08 +0200
Subject: AioContext: introduce aio_prepare

This will be used to implement socket polling on Windows.
On Windows, select() and g_poll() are completely different;
sockets are polled with select() before calling g_poll,
and the g_poll must be nonblocking if select() says a
socket is ready.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 aio-posix.c         | 5 +++++
 aio-win32.c         | 5 +++++
 async.c             | 5 +++++
 include/block/aio.h | 9 ++++++++-
 4 files changed, 23 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/aio-posix.c b/aio-posix.c
index 0936b4ff9d..d3ac06e238 100644
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -100,6 +100,11 @@ void aio_set_event_notifier(AioContext *ctx,
                        (IOHandler *)io_read, NULL, notifier);
 }
 
+bool aio_prepare(AioContext *ctx)
+{
+    return false;
+}
+
 bool aio_pending(AioContext *ctx)
 {
     AioHandler *node;
diff --git a/aio-win32.c b/aio-win32.c
index fd526864be..45422704a4 100644
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -76,6 +76,11 @@ void aio_set_event_notifier(AioContext *ctx,
     aio_notify(ctx);
 }
 
+bool aio_prepare(AioContext *ctx)
+{
+    return false;
+}
+
 bool aio_pending(AioContext *ctx)
 {
     AioHandler *node;
diff --git a/async.c b/async.c
index 293a52a0a7..a99e7f639a 100644
--- a/async.c
+++ b/async.c
@@ -188,6 +188,11 @@ aio_ctx_prepare(GSource *source, gint    *timeout)
 
     /* We assume there is no timeout already supplied */
     *timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx));
+
+    if (aio_prepare(ctx)) {
+        *timeout = 0;
+    }
+
     return *timeout == 0;
 }
 
diff --git a/include/block/aio.h b/include/block/aio.h
index 7ba3e9675b..ef4197b861 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -205,7 +205,14 @@ void qemu_bh_cancel(QEMUBH *bh);
 void qemu_bh_delete(QEMUBH *bh);
 
 /* Return whether there are any pending callbacks from the GSource
- * attached to the AioContext.
+ * attached to the AioContext, before g_poll is invoked.
+ *
+ * This is used internally in the implementation of the GSource.
+ */
+bool aio_prepare(AioContext *ctx);
+
+/* Return whether there are any pending callbacks from the GSource
+ * attached to the AioContext, after g_poll is invoked.
  *
  * This is used internally in the implementation of the GSource.
  */
-- 
cgit v1.2.3-55-g7522


From b493317d344357f7ac56606246d09b5604e54ab6 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini
Date: Wed, 9 Jul 2014 11:53:10 +0200
Subject: aio-win32: add support for sockets

Uses the same select/WSAEventSelect scheme as main-loop.c.
WSAEventSelect() is edge-triggered, so it cannot be used
directly, but it is still used as a way to exit from a
blocking g_poll().

Before g_poll() is called, we poll sockets with a non-blocking
select() to achieve the level-triggered semantics we require:
if a socket is ready, the g_poll() is made non-blocking too.

Based on a patch from Or Goshen.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 aio-win32.c         | 150 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 block/Makefile.objs |   2 -
 include/block/aio.h |   2 -
 3 files changed, 145 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/aio-win32.c b/aio-win32.c
index 45422704a4..61e3d2ddfe 100644
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -22,12 +22,80 @@
 
 struct AioHandler {
     EventNotifier *e;
+    IOHandler *io_read;
+    IOHandler *io_write;
     EventNotifierHandler *io_notify;
     GPollFD pfd;
     int deleted;
+    void *opaque;
     QLIST_ENTRY(AioHandler) node;
 };
 
+void aio_set_fd_handler(AioContext *ctx,
+                        int fd,
+                        IOHandler *io_read,
+                        IOHandler *io_write,
+                        void *opaque)
+{
+    /* fd is a SOCKET in our case */
+    AioHandler *node;
+
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+        if (node->pfd.fd == fd && !node->deleted) {
+            break;
+        }
+    }
+
+    /* Are we deleting the fd handler? */
+    if (!io_read && !io_write) {
+        if (node) {
+            /* If the lock is held, just mark the node as deleted */
+            if (ctx->walking_handlers) {
+                node->deleted = 1;
+                node->pfd.revents = 0;
+            } else {
+                /* Otherwise, delete it for real.  We can't just mark it as
+                 * deleted because deleted nodes are only cleaned up after
+                 * releasing the walking_handlers lock.
+                 */
+                QLIST_REMOVE(node, node);
+                g_free(node);
+            }
+        }
+    } else {
+        HANDLE event;
+
+        if (node == NULL) {
+            /* Alloc and insert if it's not already there */
+            node = g_malloc0(sizeof(AioHandler));
+            node->pfd.fd = fd;
+            QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
+        }
+
+        node->pfd.events = 0;
+        if (node->io_read) {
+            node->pfd.events |= G_IO_IN;
+        }
+        if (node->io_write) {
+            node->pfd.events |= G_IO_OUT;
+        }
+
+        node->e = &ctx->notifier;
+
+        /* Update handler with latest information */
+        node->opaque = opaque;
+        node->io_read = io_read;
+        node->io_write = io_write;
+
+        event = event_notifier_get_handle(&ctx->notifier);
+        WSAEventSelect(node->pfd.fd, event,
+                       FD_READ | FD_ACCEPT | FD_CLOSE |
+                       FD_CONNECT | FD_WRITE | FD_OOB);
+    }
+
+    aio_notify(ctx);
+}
+
 void aio_set_event_notifier(AioContext *ctx,
                             EventNotifier *e,
                             EventNotifierHandler *io_notify)
@@ -78,7 +146,39 @@ void aio_set_event_notifier(AioContext *ctx,
 
 bool aio_prepare(AioContext *ctx)
 {
-    return false;
+    static struct timeval tv0;
+    AioHandler *node;
+    bool have_select_revents = false;
+    fd_set rfds, wfds;
+
+    /* fill fd sets */
+    FD_ZERO(&rfds);
+    FD_ZERO(&wfds);
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+        if (node->io_read) {
+            FD_SET ((SOCKET)node->pfd.fd, &rfds);
+        }
+        if (node->io_write) {
+            FD_SET ((SOCKET)node->pfd.fd, &wfds);
+        }
+    }
+
+    if (select(0, &rfds, &wfds, NULL, &tv0) > 0) {
+        QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+            node->pfd.revents = 0;
+            if (FD_ISSET(node->pfd.fd, &rfds)) {
+                node->pfd.revents |= G_IO_IN;
+                have_select_revents = true;
+            }
+
+            if (FD_ISSET(node->pfd.fd, &wfds)) {
+                node->pfd.revents |= G_IO_OUT;
+                have_select_revents = true;
+            }
+        }
+    }
+
+    return have_select_revents;
 }
 
 bool aio_pending(AioContext *ctx)
@@ -89,6 +189,13 @@ bool aio_pending(AioContext *ctx)
         if (node->pfd.revents && node->io_notify) {
             return true;
         }
+
+        if ((node->pfd.revents & G_IO_IN) && node->io_read) {
+            return true;
+        }
+        if ((node->pfd.revents & G_IO_OUT) && node->io_write) {
+            return true;
+        }
     }
 
     return false;
@@ -106,11 +213,12 @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
     node = QLIST_FIRST(&ctx->aio_handlers);
     while (node) {
         AioHandler *tmp;
+        int revents = node->pfd.revents;
 
         ctx->walking_handlers++;
 
         if (!node->deleted &&
-            (node->pfd.revents || event_notifier_get_handle(node->e) == event) &&
+            (revents || event_notifier_get_handle(node->e) == event) &&
             node->io_notify) {
             node->pfd.revents = 0;
             node->io_notify(node->e);
@@ -121,6 +229,28 @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
             }
         }
 
+        if (!node->deleted &&
+            (node->io_read || node->io_write)) {
+            node->pfd.revents = 0;
+            if ((revents & G_IO_IN) && node->io_read) {
+                node->io_read(node->opaque);
+                progress = true;
+            }
+            if ((revents & G_IO_OUT) && node->io_write) {
+                node->io_write(node->opaque);
+                progress = true;
+            }
+
+            /* if the next select() will return an event, we have progressed */
+            if (event == event_notifier_get_handle(&ctx->notifier)) {
+                WSANETWORKEVENTS ev;
+                WSAEnumNetworkEvents(node->pfd.fd, event, &ev);
+                if (ev.lNetworkEvents) {
+                    progress = true;
+                }
+            }
+        }
+
         tmp = node;
         node = QLIST_NEXT(node, node);
 
@@ -149,10 +279,15 @@ bool aio_poll(AioContext *ctx, bool blocking)
 {
     AioHandler *node;
     HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
-    bool was_dispatching, progress, first;
+    bool was_dispatching, progress, have_select_revents, first;
     int count;
     int timeout;
 
+    if (aio_prepare(ctx)) {
+        blocking = false;
+        have_select_revents = true;
+    }
+
     was_dispatching = ctx->dispatching;
     progress = false;
 
@@ -183,6 +318,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
 
     /* wait until next event */
     while (count > 0) {
+        HANDLE event;
         int ret;
 
         timeout = blocking
@@ -196,13 +332,17 @@ bool aio_poll(AioContext *ctx, bool blocking)
         first = false;
 
         /* if we have any signaled events, dispatch event */
-        if ((DWORD) (ret - WAIT_OBJECT_0) >= count) {
+        event = NULL;
+        if ((DWORD) (ret - WAIT_OBJECT_0) < count) {
+            event = events[ret - WAIT_OBJECT_0];
+        } else if (!have_select_revents) {
             break;
         }
 
+        have_select_revents = false;
         blocking = false;
 
-        progress |= aio_dispatch_handlers(ctx, events[ret - WAIT_OBJECT_0]);
+        progress |= aio_dispatch_handlers(ctx, event);
 
         /* Try again, but only call each handler once.  */
         events[ret - WAIT_OBJECT_0] = events[--count];
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 858d2b387b..f45f9399aa 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -10,7 +10,6 @@ block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
 block-obj-$(CONFIG_POSIX) += raw-posix.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
 
-ifeq ($(CONFIG_POSIX),y)
 block-obj-y += nbd.o nbd-client.o sheepdog.o
 block-obj-$(CONFIG_LIBISCSI) += iscsi.o
 block-obj-$(CONFIG_LIBNFS) += nfs.o
@@ -19,7 +18,6 @@ block-obj-$(CONFIG_RBD) += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
 block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
 block-obj-$(CONFIG_LIBSSH2) += ssh.o
-endif
 
 common-obj-y += stream.o
 common-obj-y += commit.o
diff --git a/include/block/aio.h b/include/block/aio.h
index ef4197b861..4603c0f066 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -239,7 +239,6 @@ bool aio_dispatch(AioContext *ctx);
  */
 bool aio_poll(AioContext *ctx, bool blocking);
 
-#ifdef CONFIG_POSIX
 /* Register a file descriptor and associated callbacks.  Behaves very similarly
  * to qemu_set_fd_handler2.  Unlike qemu_set_fd_handler2, these callbacks will
  * be invoked when using aio_poll().
@@ -252,7 +251,6 @@ void aio_set_fd_handler(AioContext *ctx,
                         IOHandler *io_read,
                         IOHandler *io_write,
                         void *opaque);
-#endif
 
 /* Register an event notifier and associated callbacks.  Behaves very similarly
  * to event_notifier_set_handler.  Unlike event_notifier_set_handler, these callbacks
-- 
cgit v1.2.3-55-g7522


From 33384421b3abe74555baeaf788b17204cd8c6080 Mon Sep 17 00:00:00 2001
From: Max Reitz
Date: Fri, 20 Jun 2014 21:57:33 +0200
Subject: block: Add AIO context notifiers

If a long-running operation on a BDS wants to always remain in the same
AIO context, it somehow needs to keep track of the BDS changing its
context. This adds a function for registering callbacks on a BDS which
are called whenever the BDS is attached or detached from an AIO context.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block.c                   | 56 +++++++++++++++++++++++++++++++++++++++++++++++
 include/block/block_int.h | 41 ++++++++++++++++++++++++++++++++++
 2 files changed, 97 insertions(+)

(limited to 'include')

diff --git a/block.c b/block.c
index 1df13ac1c7..9c5566b55b 100644
--- a/block.c
+++ b/block.c
@@ -1819,6 +1819,8 @@ void bdrv_reopen_abort(BDRVReopenState *reopen_state)
 
 void bdrv_close(BlockDriverState *bs)
 {
+    BdrvAioNotifier *ban, *ban_next;
+
     if (bs->job) {
         block_job_cancel_sync(bs->job);
     }
@@ -1863,6 +1865,11 @@ void bdrv_close(BlockDriverState *bs)
     if (bs->io_limits_enabled) {
         bdrv_io_limits_disable(bs);
     }
+
+    QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
+        g_free(ban);
+    }
+    QLIST_INIT(&bs->aio_notifiers);
 }
 
 void bdrv_close_all(void)
@@ -5729,10 +5736,16 @@ AioContext *bdrv_get_aio_context(BlockDriverState *bs)
 
 void bdrv_detach_aio_context(BlockDriverState *bs)
 {
+    BdrvAioNotifier *baf;
+
     if (!bs->drv) {
         return;
     }
 
+    QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
+        baf->detach_aio_context(baf->opaque);
+    }
+
     if (bs->io_limits_enabled) {
         throttle_detach_aio_context(&bs->throttle_state);
     }
@@ -5752,6 +5765,8 @@ void bdrv_detach_aio_context(BlockDriverState *bs)
 void bdrv_attach_aio_context(BlockDriverState *bs,
                              AioContext *new_context)
 {
+    BdrvAioNotifier *ban;
+
     if (!bs->drv) {
         return;
     }
@@ -5770,6 +5785,10 @@ void bdrv_attach_aio_context(BlockDriverState *bs,
     if (bs->io_limits_enabled) {
         throttle_attach_aio_context(&bs->throttle_state, new_context);
     }
+
+    QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
+        ban->attached_aio_context(new_context, ban->opaque);
+    }
 }
 
 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
@@ -5786,6 +5805,43 @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
     aio_context_release(new_context);
 }
 
+void bdrv_add_aio_context_notifier(BlockDriverState *bs,
+        void (*attached_aio_context)(AioContext *new_context, void *opaque),
+        void (*detach_aio_context)(void *opaque), void *opaque)
+{
+    BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
+    *ban = (BdrvAioNotifier){
+        .attached_aio_context = attached_aio_context,
+        .detach_aio_context   = detach_aio_context,
+        .opaque               = opaque
+    };
+
+    QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
+}
+
+void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
+                                      void (*attached_aio_context)(AioContext *,
+                                                                   void *),
+                                      void (*detach_aio_context)(void *),
+                                      void *opaque)
+{
+    BdrvAioNotifier *ban, *ban_next;
+
+    QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
+        if (ban->attached_aio_context == attached_aio_context &&
+            ban->detach_aio_context   == detach_aio_context   &&
+            ban->opaque               == opaque)
+        {
+            QLIST_REMOVE(ban, list);
+            g_free(ban);
+
+            return;
+        }
+    }
+
+    abort();
+}
+
 void bdrv_add_before_write_notifier(BlockDriverState *bs,
                                     NotifierWithReturn *notifier)
 {
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 233489547e..8a61215ac0 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -294,6 +294,15 @@ typedef struct BlockLimits {
 
 typedef struct BdrvOpBlocker BdrvOpBlocker;
 
+typedef struct BdrvAioNotifier {
+    void (*attached_aio_context)(AioContext *new_context, void *opaque);
+    void (*detach_aio_context)(void *opaque);
+
+    void *opaque;
+
+    QLIST_ENTRY(BdrvAioNotifier) list;
+} BdrvAioNotifier;
+
 /*
  * Note: the function bdrv_append() copies and swaps contents of
  * BlockDriverStates, so if you add new fields to this struct, please
@@ -320,6 +329,10 @@ struct BlockDriverState {
     void *dev_opaque;
 
     AioContext *aio_context; /* event loop used for fd handlers, timers, etc */
+    /* long-running tasks intended to always use the same AioContext as this
+     * BDS may register themselves in this list to be notified of changes
+     * regarding this BDS's context */
+    QLIST_HEAD(, BdrvAioNotifier) aio_notifiers;
 
     char filename[1024];
     char backing_file[1024]; /* if non zero, the image is a diff of
@@ -437,6 +450,34 @@ void bdrv_detach_aio_context(BlockDriverState *bs);
 void bdrv_attach_aio_context(BlockDriverState *bs,
                              AioContext *new_context);
 
+/**
+ * bdrv_add_aio_context_notifier:
+ *
+ * If a long-running job intends to be always run in the same AioContext as a
+ * certain BDS, it may use this function to be notified of changes regarding the
+ * association of the BDS to an AioContext.
+ *
+ * attached_aio_context() is called after the target BDS has been attached to a
+ * new AioContext; detach_aio_context() is called before the target BDS is being
+ * detached from its old AioContext.
+ */
+void bdrv_add_aio_context_notifier(BlockDriverState *bs,
+        void (*attached_aio_context)(AioContext *new_context, void *opaque),
+        void (*detach_aio_context)(void *opaque), void *opaque);
+
+/**
+ * bdrv_remove_aio_context_notifier:
+ *
+ * Unsubscribe of change notifications regarding the BDS's AioContext. The
+ * parameters given here have to be the same as those given to
+ * bdrv_add_aio_context_notifier().
+ */
+void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
+                                      void (*aio_context_attached)(AioContext *,
+                                                                   void *),
+                                      void (*aio_context_detached)(void *),
+                                      void *opaque);
+
 #ifdef _WIN32
 int is_windows_drive(const char *filename);
 #endif
-- 
cgit v1.2.3-55-g7522