From b5042a36229b4fa5eeb66bbcde78f704975aec00 Mon Sep 17 00:00:00 2001 From: Benoît Canet Date: Mon, 3 Mar 2014 19:11:34 +0100 Subject: block: Rewrite the snapshot authorization mechanism for block filters. This patch keep the recursive way of doing things but simplify it by giving two responsabilities to all block filters implementors. They will need to do two things: -Set the is_filter field of their block driver to true. -Implement the bdrv_recurse_is_first_non_filter method of their block driver like it is done on the Quorum block driver. (block/quorum.c) [Paolo Bonzini pointed out that this patch changes the semantics of blkverify, which now recurses down both bs->file and s->test_file. -- Stefan] Reported-by: Paolo Bonzini Signed-off-by: Benoit Canet Signed-off-by: Stefan Hajnoczi --- include/block/block.h | 9 --------- include/block/block_int.h | 8 ++++---- 2 files changed, 4 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/block/block.h b/include/block/block.h index 780f48b7b3..bd34d14109 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -286,15 +286,6 @@ int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix); int bdrv_amend_options(BlockDriverState *bs_new, QEMUOptionParameter *options); /* external snapshots */ - -typedef enum { - BS_IS_A_FILTER, - BS_FILTER_PASS_DOWN, - BS_AUTHORIZATION_COUNT, -} BsAuthorization; - -bool bdrv_generic_is_first_non_filter(BlockDriverState *bs, - BlockDriverState *candidate); bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs, BlockDriverState *candidate); bool bdrv_is_first_non_filter(BlockDriverState *candidate); diff --git a/include/block/block_int.h b/include/block/block_int.h index 0bcf1c9b8c..4fc5ea8a65 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -76,10 +76,10 @@ struct BlockDriver { const char *format_name; int instance_size; - /* this table of boolean contains authorizations for the block operations */ - bool authorizations[BS_AUTHORIZATION_COUNT]; - /* for snapshots complex block filter like Quorum can implement the - * following recursive callback instead of BS_IS_A_FILTER. + /* set to true if the BlockDriver is a block filter */ + bool is_filter; + /* for snapshots block filter like Quorum can implement the + * following recursive callback. * It's purpose is to recurse on the filter children while calling * bdrv_recurse_is_first_non_filter on them. * For a sample implementation look in the future Quorum block filter. -- cgit v1.2.3-55-g7522 From 11f590b1a242492a0108da42f40f0e2b20f0a778 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 3 Mar 2014 11:30:02 +0100 Subject: object: add object_get_canonical_path_component() It is often useful to find an object's child property name. Also use this new function to simplify the implementation of object_get_canonical_path(). Reviewed-by: Andreas Färber Signed-off-by: Stefan Hajnoczi --- include/qom/object.h | 8 ++++++++ qom/object.c | 54 +++++++++++++++++++++++++++++++--------------------- 2 files changed, 40 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/qom/object.h b/include/qom/object.h index 9c7c361d30..4cd77049e4 100644 --- a/include/qom/object.h +++ b/include/qom/object.h @@ -973,6 +973,14 @@ const char *object_property_get_type(Object *obj, const char *name, */ Object *object_get_root(void); +/** + * object_get_canonical_path_component: + * + * Returns: The final component in the object's canonical path. The canonical + * path is the path within the composition tree starting from the root. + */ +gchar *object_get_canonical_path_component(Object *obj); + /** * object_get_canonical_path: * diff --git a/qom/object.c b/qom/object.c index 660859c0e7..3290375016 100644 --- a/qom/object.c +++ b/qom/object.c @@ -1102,39 +1102,49 @@ void object_property_add_link(Object *obj, const char *name, g_free(full_type); } +gchar *object_get_canonical_path_component(Object *obj) +{ + ObjectProperty *prop = NULL; + + g_assert(obj); + g_assert(obj->parent != NULL); + + QTAILQ_FOREACH(prop, &obj->parent->properties, node) { + if (!object_property_is_child(prop)) { + continue; + } + + if (prop->opaque == obj) { + return g_strdup(prop->name); + } + } + + /* obj had a parent but was not a child, should never happen */ + g_assert_not_reached(); + return NULL; +} + gchar *object_get_canonical_path(Object *obj) { Object *root = object_get_root(); - char *newpath = NULL, *path = NULL; + char *newpath, *path = NULL; while (obj != root) { - ObjectProperty *prop = NULL; - - g_assert(obj->parent != NULL); - - QTAILQ_FOREACH(prop, &obj->parent->properties, node) { - if (!object_property_is_child(prop)) { - continue; - } + char *component = object_get_canonical_path_component(obj); - if (prop->opaque == obj) { - if (path) { - newpath = g_strdup_printf("%s/%s", prop->name, path); - g_free(path); - path = newpath; - } else { - path = g_strdup(prop->name); - } - break; - } + if (path) { + newpath = g_strdup_printf("%s/%s", component, path); + g_free(component); + g_free(path); + path = newpath; + } else { + path = component; } - g_assert(prop != NULL); - obj = obj->parent; } - newpath = g_strdup_printf("/%s", path); + newpath = g_strdup_printf("/%s", path ? path : ""); g_free(path); return newpath; -- cgit v1.2.3-55-g7522 From 2da61b671eb89fcaa306738f44eed472977d6587 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 3 Mar 2014 11:30:03 +0100 Subject: rfifolock: add recursive FIFO lock QemuMutex does not guarantee fairness and cannot be acquired recursively: Fairness means each locker gets a turn and the scheduler cannot cause starvation. Recursive locking is useful for composition, it allows a sequence of locking operations to be invoked atomically by acquiring the lock around them. This patch adds RFifoLock, a recursive lock that guarantees FIFO order. Its first user is added in the next patch. RFifoLock has one additional feature: it can be initialized with an optional contention callback. The callback is invoked whenever a thread must wait for the lock. For example, it can be used to poke the current owner so that they release the lock soon. Signed-off-by: Stefan Hajnoczi --- include/qemu/rfifolock.h | 54 ++++++++++++++++++++++++++++ tests/Makefile | 2 ++ tests/test-rfifolock.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++ util/Makefile.objs | 1 + util/rfifolock.c | 78 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 226 insertions(+) create mode 100644 include/qemu/rfifolock.h create mode 100644 tests/test-rfifolock.c create mode 100644 util/rfifolock.c (limited to 'include') diff --git a/include/qemu/rfifolock.h b/include/qemu/rfifolock.h new file mode 100644 index 0000000000..b23ab538a6 --- /dev/null +++ b/include/qemu/rfifolock.h @@ -0,0 +1,54 @@ +/* + * Recursive FIFO lock + * + * Copyright Red Hat, Inc. 2013 + * + * Authors: + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_RFIFOLOCK_H +#define QEMU_RFIFOLOCK_H + +#include "qemu/thread.h" + +/* Recursive FIFO lock + * + * This lock provides more features than a plain mutex: + * + * 1. Fairness - enforces FIFO order. + * 2. Nesting - can be taken recursively. + * 3. Contention callback - optional, called when thread must wait. + * + * The recursive FIFO lock is heavyweight so prefer other synchronization + * primitives if you do not need its features. + */ +typedef struct { + QemuMutex lock; /* protects all fields */ + + /* FIFO order */ + unsigned int head; /* active ticket number */ + unsigned int tail; /* waiting ticket number */ + QemuCond cond; /* used to wait for our ticket number */ + + /* Nesting */ + QemuThread owner_thread; /* thread that currently has ownership */ + unsigned int nesting; /* amount of nesting levels */ + + /* Contention callback */ + void (*cb)(void *); /* called when thread must wait, with ->lock + * held so it may not recursively lock/unlock + */ + void *cb_opaque; +} RFifoLock; + +void rfifolock_init(RFifoLock *r, void (*cb)(void *), void *opaque); +void rfifolock_destroy(RFifoLock *r); +void rfifolock_lock(RFifoLock *r); +void rfifolock_unlock(RFifoLock *r); + +#endif /* QEMU_RFIFOLOCK_H */ diff --git a/tests/Makefile b/tests/Makefile index e146f81d44..190e596689 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -35,6 +35,7 @@ check-unit-y += tests/test-visitor-serialization$(EXESUF) check-unit-y += tests/test-iov$(EXESUF) gcov-files-test-iov-y = util/iov.c check-unit-y += tests/test-aio$(EXESUF) +check-unit-y += tests/test-rfifolock$(EXESUF) check-unit-y += tests/test-throttle$(EXESUF) gcov-files-test-aio-$(CONFIG_WIN32) = aio-win32.c gcov-files-test-aio-$(CONFIG_POSIX) = aio-posix.c @@ -176,6 +177,7 @@ tests/check-qjson$(EXESUF): tests/check-qjson.o libqemuutil.a libqemustub.a tests/check-qom-interface$(EXESUF): tests/check-qom-interface.o $(qom-core-obj) libqemuutil.a libqemustub.a tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(block-obj-y) libqemuutil.a libqemustub.a tests/test-aio$(EXESUF): tests/test-aio.o $(block-obj-y) libqemuutil.a libqemustub.a +tests/test-rfifolock$(EXESUF): tests/test-rfifolock.o libqemuutil.a libqemustub.a tests/test-throttle$(EXESUF): tests/test-throttle.o $(block-obj-y) libqemuutil.a libqemustub.a tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(block-obj-y) libqemuutil.a libqemustub.a tests/test-iov$(EXESUF): tests/test-iov.o libqemuutil.a diff --git a/tests/test-rfifolock.c b/tests/test-rfifolock.c new file mode 100644 index 0000000000..0572ebb42a --- /dev/null +++ b/tests/test-rfifolock.c @@ -0,0 +1,91 @@ +/* + * RFifoLock tests + * + * Copyright Red Hat, Inc. 2013 + * + * Authors: + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ + +#include +#include "qemu-common.h" +#include "qemu/rfifolock.h" + +static void test_nesting(void) +{ + RFifoLock lock; + + /* Trivial test, ensure the lock is recursive */ + rfifolock_init(&lock, NULL, NULL); + rfifolock_lock(&lock); + rfifolock_lock(&lock); + rfifolock_lock(&lock); + rfifolock_unlock(&lock); + rfifolock_unlock(&lock); + rfifolock_unlock(&lock); + rfifolock_destroy(&lock); +} + +typedef struct { + RFifoLock lock; + int fd[2]; +} CallbackTestData; + +static void rfifolock_cb(void *opaque) +{ + CallbackTestData *data = opaque; + int ret; + char c = 0; + + ret = write(data->fd[1], &c, sizeof(c)); + g_assert(ret == 1); +} + +static void *callback_thread(void *opaque) +{ + CallbackTestData *data = opaque; + + /* The other thread holds the lock so the contention callback will be + * invoked... + */ + rfifolock_lock(&data->lock); + rfifolock_unlock(&data->lock); + return NULL; +} + +static void test_callback(void) +{ + CallbackTestData data; + QemuThread thread; + int ret; + char c; + + rfifolock_init(&data.lock, rfifolock_cb, &data); + ret = qemu_pipe(data.fd); + g_assert(ret == 0); + + /* Hold lock but allow the callback to kick us by writing to the pipe */ + rfifolock_lock(&data.lock); + qemu_thread_create(&thread, "callback_thread", + callback_thread, &data, QEMU_THREAD_JOINABLE); + ret = read(data.fd[0], &c, sizeof(c)); + g_assert(ret == 1); + rfifolock_unlock(&data.lock); + /* If we got here then the callback was invoked, as expected */ + + qemu_thread_join(&thread); + close(data.fd[0]); + close(data.fd[1]); + rfifolock_destroy(&data.lock); +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + g_test_add_func("/nesting", test_nesting); + g_test_add_func("/callback", test_callback); + return g_test_run(); +} diff --git a/util/Makefile.objs b/util/Makefile.objs index 937376b082..df83b629a0 100644 --- a/util/Makefile.objs +++ b/util/Makefile.objs @@ -14,3 +14,4 @@ util-obj-y += crc32c.o util-obj-y += throttle.o util-obj-y += getauxval.o util-obj-y += readline.o +util-obj-y += rfifolock.o diff --git a/util/rfifolock.c b/util/rfifolock.c new file mode 100644 index 0000000000..afbf7488df --- /dev/null +++ b/util/rfifolock.c @@ -0,0 +1,78 @@ +/* + * Recursive FIFO lock + * + * Copyright Red Hat, Inc. 2013 + * + * Authors: + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include +#include "qemu/rfifolock.h" + +void rfifolock_init(RFifoLock *r, void (*cb)(void *), void *opaque) +{ + qemu_mutex_init(&r->lock); + r->head = 0; + r->tail = 0; + qemu_cond_init(&r->cond); + r->nesting = 0; + r->cb = cb; + r->cb_opaque = opaque; +} + +void rfifolock_destroy(RFifoLock *r) +{ + qemu_cond_destroy(&r->cond); + qemu_mutex_destroy(&r->lock); +} + +/* + * Theory of operation: + * + * In order to ensure FIFO ordering, implement a ticketlock. Threads acquiring + * the lock enqueue themselves by incrementing the tail index. When the lock + * is unlocked, the head is incremented and waiting threads are notified. + * + * Recursive locking does not take a ticket since the head is only incremented + * when the outermost recursive caller unlocks. + */ +void rfifolock_lock(RFifoLock *r) +{ + qemu_mutex_lock(&r->lock); + + /* Take a ticket */ + unsigned int ticket = r->tail++; + + if (r->nesting > 0 && qemu_thread_is_self(&r->owner_thread)) { + r->tail--; /* put ticket back, we're nesting */ + } else { + while (ticket != r->head) { + /* Invoke optional contention callback */ + if (r->cb) { + r->cb(r->cb_opaque); + } + qemu_cond_wait(&r->cond, &r->lock); + } + } + + qemu_thread_get_self(&r->owner_thread); + r->nesting++; + qemu_mutex_unlock(&r->lock); +} + +void rfifolock_unlock(RFifoLock *r) +{ + qemu_mutex_lock(&r->lock); + assert(r->nesting > 0); + assert(qemu_thread_is_self(&r->owner_thread)); + if (--r->nesting == 0) { + r->head++; + qemu_cond_broadcast(&r->cond); + } + qemu_mutex_unlock(&r->lock); +} -- cgit v1.2.3-55-g7522 From 98563fc3ec44c1becce6f1720ad6b0a82ed101b4 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 3 Mar 2014 11:30:04 +0100 Subject: aio: add aio_context_acquire() and aio_context_release() It can be useful to run an AioContext from a thread which normally does not "own" the AioContext. For example, request draining can be implemented by acquiring the AioContext and looping aio_poll() until all requests have been completed. The following pattern should work: /* Event loop thread */ while (running) { aio_context_acquire(ctx); aio_poll(ctx, true); aio_context_release(ctx); } /* Another thread */ aio_context_acquire(ctx); bdrv_read(bs, 0x1000, buf, 1); aio_context_release(ctx); This patch implements aio_context_acquire() and aio_context_release(). Note that existing aio_poll() callers do not need to worry about acquiring and releasing - it is only needed when multiple threads will call aio_poll() on the same AioContext. Signed-off-by: Stefan Hajnoczi --- async.c | 18 ++++++++++++++++ include/block/aio.h | 18 ++++++++++++++++ tests/test-aio.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 95 insertions(+) (limited to 'include') diff --git a/async.c b/async.c index 5fb3fa61df..6930185e64 100644 --- a/async.c +++ b/async.c @@ -214,6 +214,7 @@ aio_ctx_finalize(GSource *source) thread_pool_free(ctx->thread_pool); aio_set_event_notifier(ctx, &ctx->notifier, NULL); event_notifier_cleanup(&ctx->notifier); + rfifolock_destroy(&ctx->lock); qemu_mutex_destroy(&ctx->bh_lock); g_array_free(ctx->pollfds, TRUE); timerlistgroup_deinit(&ctx->tlg); @@ -250,6 +251,12 @@ static void aio_timerlist_notify(void *opaque) aio_notify(opaque); } +static void aio_rfifolock_cb(void *opaque) +{ + /* Kick owner thread in case they are blocked in aio_poll() */ + aio_notify(opaque); +} + AioContext *aio_context_new(void) { AioContext *ctx; @@ -257,6 +264,7 @@ AioContext *aio_context_new(void) ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD)); ctx->thread_pool = NULL; qemu_mutex_init(&ctx->bh_lock); + rfifolock_init(&ctx->lock, aio_rfifolock_cb, ctx); event_notifier_init(&ctx->notifier, false); aio_set_event_notifier(ctx, &ctx->notifier, (EventNotifierHandler *) @@ -275,3 +283,13 @@ void aio_context_unref(AioContext *ctx) { g_source_unref(&ctx->source); } + +void aio_context_acquire(AioContext *ctx) +{ + rfifolock_lock(&ctx->lock); +} + +void aio_context_release(AioContext *ctx) +{ + rfifolock_unlock(&ctx->lock); +} diff --git a/include/block/aio.h b/include/block/aio.h index 2efdf416cf..a92511bd3b 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -19,6 +19,7 @@ #include "qemu/queue.h" #include "qemu/event_notifier.h" #include "qemu/thread.h" +#include "qemu/rfifolock.h" #include "qemu/timer.h" typedef struct BlockDriverAIOCB BlockDriverAIOCB; @@ -47,6 +48,9 @@ typedef void IOHandler(void *opaque); struct AioContext { GSource source; + /* Protects all fields from multi-threaded access */ + RFifoLock lock; + /* The list of registered AIO handlers */ QLIST_HEAD(, AioHandler) aio_handlers; @@ -104,6 +108,20 @@ void aio_context_ref(AioContext *ctx); */ void aio_context_unref(AioContext *ctx); +/* Take ownership of the AioContext. If the AioContext will be shared between + * threads, a thread must have ownership when calling aio_poll(). + * + * Note that multiple threads calling aio_poll() means timers, BHs, and + * callbacks may be invoked from a different thread than they were registered + * from. Therefore, code must use AioContext acquire/release or use + * fine-grained synchronization to protect shared state if other threads will + * be accessing it simultaneously. + */ +void aio_context_acquire(AioContext *ctx); + +/* Relinquish ownership of the AioContext. */ +void aio_context_release(AioContext *ctx); + /** * aio_bh_new: Allocate a new bottom half structure. * diff --git a/tests/test-aio.c b/tests/test-aio.c index 592721ed3f..56f4288ca8 100644 --- a/tests/test-aio.c +++ b/tests/test-aio.c @@ -112,6 +112,64 @@ static void test_notify(void) g_assert(!aio_poll(ctx, false)); } +typedef struct { + QemuMutex start_lock; + bool thread_acquired; +} AcquireTestData; + +static void *test_acquire_thread(void *opaque) +{ + AcquireTestData *data = opaque; + + /* Wait for other thread to let us start */ + qemu_mutex_lock(&data->start_lock); + qemu_mutex_unlock(&data->start_lock); + + aio_context_acquire(ctx); + aio_context_release(ctx); + + data->thread_acquired = true; /* success, we got here */ + + return NULL; +} + +static void dummy_notifier_read(EventNotifier *unused) +{ + g_assert(false); /* should never be invoked */ +} + +static void test_acquire(void) +{ + QemuThread thread; + EventNotifier notifier; + AcquireTestData data; + + /* Dummy event notifier ensures aio_poll() will block */ + event_notifier_init(¬ifier, false); + aio_set_event_notifier(ctx, ¬ifier, dummy_notifier_read); + g_assert(!aio_poll(ctx, false)); /* consume aio_notify() */ + + qemu_mutex_init(&data.start_lock); + qemu_mutex_lock(&data.start_lock); + data.thread_acquired = false; + + qemu_thread_create(&thread, "test_acquire_thread", + test_acquire_thread, + &data, QEMU_THREAD_JOINABLE); + + /* Block in aio_poll(), let other thread kick us and acquire context */ + aio_context_acquire(ctx); + qemu_mutex_unlock(&data.start_lock); /* let the thread run */ + g_assert(!aio_poll(ctx, true)); + aio_context_release(ctx); + + qemu_thread_join(&thread); + aio_set_event_notifier(ctx, ¬ifier, NULL); + event_notifier_cleanup(¬ifier); + + g_assert(data.thread_acquired); +} + static void test_bh_schedule(void) { BHTestData data = { .n = 0 }; @@ -775,6 +833,7 @@ int main(int argc, char **argv) g_test_init(&argc, &argv, NULL); g_test_add_func("/aio/notify", test_notify); + g_test_add_func("/aio/acquire", test_acquire); g_test_add_func("/aio/bh/schedule", test_bh_schedule); g_test_add_func("/aio/bh/schedule10", test_bh_schedule10); g_test_add_func("/aio/bh/cancel", test_bh_cancel); -- cgit v1.2.3-55-g7522 From be8d8537668c9be7a8dee6aed94b2b3f9fcd4a9f Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 3 Mar 2014 11:30:05 +0100 Subject: iothread: add I/O thread object This is a stand-in for Michael Roth's QContext. I expect this to be replaced once QContext is completed. The IOThread object is an AioContext event loop thread. This patch adds the concept of multiple event loop threads, allowing users to define them. When SMP guests run on SMP hosts it makes sense to instantiate multiple IOThreads. This spreads event loop processing across multiple cores. Note that additional patches are required to actually bind a device to an IOThread. [Andreas Färber pointed out that the embedded parent object instance should be called "parent_obj" and have a newline afterwards. This patch has been changed to reflect this. -- Stefan] Signed-off-by: Stefan Hajnoczi --- Makefile.objs | 1 + include/sysemu/iothread.h | 30 ++++++++++++ iothread.c | 120 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 151 insertions(+) create mode 100644 include/sysemu/iothread.h create mode 100644 iothread.c (limited to 'include') diff --git a/Makefile.objs b/Makefile.objs index 5cd3d816ff..a6e0e2aacc 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -39,6 +39,7 @@ libcacard-y += libcacard/vcardt.o ifeq ($(CONFIG_SOFTMMU),y) common-obj-y = blockdev.o blockdev-nbd.o block/ +common-obj-y += iothread.o common-obj-y += net/ common-obj-y += qdev-monitor.o device-hotplug.o common-obj-$(CONFIG_WIN32) += os-win32.o diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h new file mode 100644 index 0000000000..a32214a647 --- /dev/null +++ b/include/sysemu/iothread.h @@ -0,0 +1,30 @@ +/* + * Event loop thread + * + * Copyright Red Hat Inc., 2013 + * + * Authors: + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef IOTHREAD_H +#define IOTHREAD_H + +#include "block/aio.h" + +#define TYPE_IOTHREAD "iothread" + +typedef struct IOThread IOThread; + +#define IOTHREAD(obj) \ + OBJECT_CHECK(IOThread, obj, TYPE_IOTHREAD) + +IOThread *iothread_find(const char *id); +char *iothread_get_id(IOThread *iothread); +AioContext *iothread_get_aio_context(IOThread *iothread); + +#endif /* IOTHREAD_H */ diff --git a/iothread.c b/iothread.c new file mode 100644 index 0000000000..f263ee2744 --- /dev/null +++ b/iothread.c @@ -0,0 +1,120 @@ +/* + * Event loop thread + * + * Copyright Red Hat Inc., 2013 + * + * Authors: + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qom/object.h" +#include "qom/object_interfaces.h" +#include "qemu/module.h" +#include "qemu/thread.h" +#include "block/aio.h" +#include "sysemu/iothread.h" + +#define IOTHREADS_PATH "/objects" + +typedef ObjectClass IOThreadClass; +struct IOThread { + Object parent_obj; + + QemuThread thread; + AioContext *ctx; + bool stopping; +}; + +#define IOTHREAD_GET_CLASS(obj) \ + OBJECT_GET_CLASS(IOThreadClass, obj, TYPE_IOTHREAD) +#define IOTHREAD_CLASS(klass) \ + OBJECT_CLASS_CHECK(IOThreadClass, klass, TYPE_IOTHREAD) + +static void *iothread_run(void *opaque) +{ + IOThread *iothread = opaque; + + while (!iothread->stopping) { + aio_context_acquire(iothread->ctx); + while (!iothread->stopping && aio_poll(iothread->ctx, true)) { + /* Progress was made, keep going */ + } + aio_context_release(iothread->ctx); + } + return NULL; +} + +static void iothread_instance_finalize(Object *obj) +{ + IOThread *iothread = IOTHREAD(obj); + + iothread->stopping = true; + aio_notify(iothread->ctx); + qemu_thread_join(&iothread->thread); + aio_context_unref(iothread->ctx); +} + +static void iothread_complete(UserCreatable *obj, Error **errp) +{ + IOThread *iothread = IOTHREAD(obj); + + iothread->stopping = false; + iothread->ctx = aio_context_new(); + + /* This assumes we are called from a thread with useful CPU affinity for us + * to inherit. + */ + qemu_thread_create(&iothread->thread, "iothread", iothread_run, + iothread, QEMU_THREAD_JOINABLE); +} + +static void iothread_class_init(ObjectClass *klass, void *class_data) +{ + UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); + ucc->complete = iothread_complete; +} + +static const TypeInfo iothread_info = { + .name = TYPE_IOTHREAD, + .parent = TYPE_OBJECT, + .class_init = iothread_class_init, + .instance_size = sizeof(IOThread), + .instance_finalize = iothread_instance_finalize, + .interfaces = (InterfaceInfo[]) { + {TYPE_USER_CREATABLE}, + {} + }, +}; + +static void iothread_register_types(void) +{ + type_register_static(&iothread_info); +} + +type_init(iothread_register_types) + +IOThread *iothread_find(const char *id) +{ + Object *container = container_get(object_get_root(), IOTHREADS_PATH); + Object *child; + + child = object_property_get_link(container, id, NULL); + if (!child) { + return NULL; + } + return (IOThread *)object_dynamic_cast(child, TYPE_IOTHREAD); +} + +char *iothread_get_id(IOThread *iothread) +{ + return object_get_canonical_path_component(OBJECT(iothread)); +} + +AioContext *iothread_get_aio_context(IOThread *iothread) +{ + return iothread->ctx; +} -- cgit v1.2.3-55-g7522 From 6e4a876b433f78f72724f45ae3f9e26596da1b4d Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 3 Mar 2014 11:30:07 +0100 Subject: iothread: add "iothread" qdev property type Add a "iothread" qdev property type so devices can be hooked up to an IOThread from the comand-line: qemu -object iothread,id=iothread0 \ -device some-device,x-iothread=iothread0 Note that Paolo Bonzini has suggested using QOM links instead. This way the relationship between the objects is reflected in QOM. There are currently shortcomings of object_property_add_link() which prevent this use case. I will attempt to fix them and move to QOM links in a separate series. Signed-off-by: Stefan Hajnoczi --- hw/core/qdev-properties-system.c | 51 ++++++++++++++++++++++++++++++++++++++++ include/hw/qdev-properties.h | 3 +++ 2 files changed, 54 insertions(+) (limited to 'include') diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index 878c3bc049..de835612f0 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -18,6 +18,7 @@ #include "net/hub.h" #include "qapi/visitor.h" #include "sysemu/char.h" +#include "sysemu/iothread.h" static void get_pointer(Object *obj, Visitor *v, Property *prop, char *(*print)(void *ptr), @@ -385,6 +386,56 @@ void qdev_set_nic_properties(DeviceState *dev, NICInfo *nd) nd->instantiated = 1; } +/* --- iothread --- */ + +static char *print_iothread(void *ptr) +{ + return iothread_get_id(ptr); +} + +static int parse_iothread(DeviceState *dev, const char *str, void **ptr) +{ + IOThread *iothread; + + iothread = iothread_find(str); + if (!iothread) { + return -ENOENT; + } + object_ref(OBJECT(iothread)); + *ptr = iothread; + return 0; +} + +static void get_iothread(Object *obj, struct Visitor *v, void *opaque, + const char *name, Error **errp) +{ + get_pointer(obj, v, opaque, print_iothread, name, errp); +} + +static void set_iothread(Object *obj, struct Visitor *v, void *opaque, + const char *name, Error **errp) +{ + set_pointer(obj, v, opaque, parse_iothread, name, errp); +} + +static void release_iothread(Object *obj, const char *name, void *opaque) +{ + DeviceState *dev = DEVICE(obj); + Property *prop = opaque; + IOThread **ptr = qdev_get_prop_ptr(dev, prop); + + if (*ptr) { + object_unref(OBJECT(*ptr)); + } +} + +PropertyInfo qdev_prop_iothread = { + .name = "iothread", + .get = get_iothread, + .set = set_iothread, + .release = release_iothread, +}; + static int qdev_add_one_global(QemuOpts *opts, void *opaque) { GlobalProperty *g; diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h index 0c0babfa6a..3c000eea75 100644 --- a/include/hw/qdev-properties.h +++ b/include/hw/qdev-properties.h @@ -22,6 +22,7 @@ extern PropertyInfo qdev_prop_bios_chs_trans; extern PropertyInfo qdev_prop_drive; extern PropertyInfo qdev_prop_netdev; extern PropertyInfo qdev_prop_vlan; +extern PropertyInfo qdev_prop_iothread; extern PropertyInfo qdev_prop_pci_devfn; extern PropertyInfo qdev_prop_blocksize; extern PropertyInfo qdev_prop_pci_host_devaddr; @@ -142,6 +143,8 @@ extern PropertyInfo qdev_prop_arraylen; DEFINE_PROP(_n, _s, _f, qdev_prop_vlan, NICPeers) #define DEFINE_PROP_DRIVE(_n, _s, _f) \ DEFINE_PROP(_n, _s, _f, qdev_prop_drive, BlockDriverState *) +#define DEFINE_PROP_IOTHREAD(_n, _s, _f) \ + DEFINE_PROP(_n, _s, _f, qdev_prop_iothread, IOThread *) #define DEFINE_PROP_MACADDR(_n, _s, _f) \ DEFINE_PROP(_n, _s, _f, qdev_prop_macaddr, MACAddr) #define DEFINE_PROP_LOSTTICKPOLICY(_n, _s, _f, _d) \ -- cgit v1.2.3-55-g7522 From 48ff269272f18d2b8fa53cb08365df417588f585 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 3 Mar 2014 11:30:08 +0100 Subject: dataplane: replace internal thread with IOThread Today virtio-blk dataplane uses a 1:1 device-per-thread model. Now that IOThreads have been introduced we can generalize this to N:M devices per threads. This patch drops thread code from dataplane in favor of running inside an IOThread AioContext. As a bonus we solve the case where a guest keeps submitting I/O requests while dataplane is trying to stop. Previously the dataplane thread would continue to process requests until the request gave it a break. Now we can shut down in bounded time thanks to aio_context_acquire/release. Signed-off-by: Stefan Hajnoczi --- hw/block/dataplane/virtio-blk.c | 96 +++++++++++++++++++++++------------------ include/hw/virtio/virtio-blk.h | 8 +++- 2 files changed, 60 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index d1c7ad4574..a5afc217c0 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -23,6 +23,7 @@ #include "virtio-blk.h" #include "block/aio.h" #include "hw/virtio/virtio-bus.h" +#include "monitor/monitor.h" /* for object_add() */ enum { SEG_MAX = 126, /* maximum number of I/O segments */ @@ -44,8 +45,6 @@ struct VirtIOBlockDataPlane { bool started; bool starting; bool stopping; - QEMUBH *start_bh; - QemuThread thread; VirtIOBlkConf *blk; int fd; /* image file descriptor */ @@ -59,12 +58,14 @@ struct VirtIOBlockDataPlane { * (because you don't own the file descriptor or handle; you just * use it). */ + IOThread *iothread; + bool internal_iothread; AioContext *ctx; EventNotifier io_notifier; /* Linux AIO completion */ EventNotifier host_notifier; /* doorbell */ IOQueue ioqueue; /* Linux AIO queue (should really be per - dataplane thread) */ + IOThread) */ VirtIOBlockRequest requests[REQ_MAX]; /* pool of requests, managed by the queue */ @@ -342,26 +343,7 @@ static void handle_io(EventNotifier *e) } } -static void *data_plane_thread(void *opaque) -{ - VirtIOBlockDataPlane *s = opaque; - - while (!s->stopping || s->num_reqs > 0) { - aio_poll(s->ctx, true); - } - return NULL; -} - -static void start_data_plane_bh(void *opaque) -{ - VirtIOBlockDataPlane *s = opaque; - - qemu_bh_delete(s->start_bh); - s->start_bh = NULL; - qemu_thread_create(&s->thread, "data_plane", data_plane_thread, - s, QEMU_THREAD_JOINABLE); -} - +/* Context: QEMU global mutex held */ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk, VirtIOBlockDataPlane **dataplane, Error **errp) @@ -408,12 +390,33 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk, s->fd = fd; s->blk = blk; + if (blk->iothread) { + s->internal_iothread = false; + s->iothread = blk->iothread; + object_ref(OBJECT(s->iothread)); + } else { + /* Create per-device IOThread if none specified */ + Error *local_err = NULL; + + s->internal_iothread = true; + object_add(TYPE_IOTHREAD, vdev->name, NULL, NULL, &local_err); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + g_free(s); + return; + } + s->iothread = iothread_find(vdev->name); + assert(s->iothread); + } + s->ctx = iothread_get_aio_context(s->iothread); + /* Prevent block operations that conflict with data plane thread */ bdrv_set_in_use(blk->conf.bs, 1); *dataplane = s; } +/* Context: QEMU global mutex held */ void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s) { if (!s) { @@ -422,9 +425,14 @@ void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s) virtio_blk_data_plane_stop(s); bdrv_set_in_use(s->blk->conf.bs, 0); + object_unref(OBJECT(s->iothread)); + if (s->internal_iothread) { + object_unparent(OBJECT(s->iothread)); + } g_free(s); } +/* Context: QEMU global mutex held */ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s) { BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s->vdev))); @@ -448,8 +456,6 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s) return; } - s->ctx = aio_context_new(); - /* Set up guest notifier (irq) */ if (k->set_guest_notifiers(qbus->parent, 1, true) != 0) { fprintf(stderr, "virtio-blk failed to set guest notifier, " @@ -464,7 +470,6 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s) exit(1); } s->host_notifier = *virtio_queue_get_host_notifier(vq); - aio_set_event_notifier(s->ctx, &s->host_notifier, handle_notify); /* Set up ioqueue */ ioq_init(&s->ioqueue, s->fd, REQ_MAX); @@ -472,7 +477,6 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s) ioq_put_iocb(&s->ioqueue, &s->requests[i].iocb); } s->io_notifier = *ioq_get_notifier(&s->ioqueue); - aio_set_event_notifier(s->ctx, &s->io_notifier, handle_io); s->starting = false; s->started = true; @@ -481,11 +485,14 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s) /* Kick right away to begin processing requests already in vring */ event_notifier_set(virtio_queue_get_host_notifier(vq)); - /* Spawn thread in BH so it inherits iothread cpusets */ - s->start_bh = qemu_bh_new(start_data_plane_bh, s); - qemu_bh_schedule(s->start_bh); + /* Get this show started by hooking up our callbacks */ + aio_context_acquire(s->ctx); + aio_set_event_notifier(s->ctx, &s->host_notifier, handle_notify); + aio_set_event_notifier(s->ctx, &s->io_notifier, handle_io); + aio_context_release(s->ctx); } +/* Context: QEMU global mutex held */ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s) { BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s->vdev))); @@ -496,27 +503,32 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s) s->stopping = true; trace_virtio_blk_data_plane_stop(s); - /* Stop thread or cancel pending thread creation BH */ - if (s->start_bh) { - qemu_bh_delete(s->start_bh); - s->start_bh = NULL; - } else { - aio_notify(s->ctx); - qemu_thread_join(&s->thread); + aio_context_acquire(s->ctx); + + /* Stop notifications for new requests from guest */ + aio_set_event_notifier(s->ctx, &s->host_notifier, NULL); + + /* Complete pending requests */ + while (s->num_reqs > 0) { + aio_poll(s->ctx, true); } + /* Stop ioq callbacks (there are no pending requests left) */ aio_set_event_notifier(s->ctx, &s->io_notifier, NULL); - ioq_cleanup(&s->ioqueue); - aio_set_event_notifier(s->ctx, &s->host_notifier, NULL); - k->set_host_notifier(qbus->parent, 0, false); + aio_context_release(s->ctx); - aio_context_unref(s->ctx); + /* Sync vring state back to virtqueue so that non-dataplane request + * processing can continue when we disable the host notifier below. + */ + vring_teardown(&s->vring, s->vdev, 0); + + ioq_cleanup(&s->ioqueue); + k->set_host_notifier(qbus->parent, 0, false); /* Clean up guest notifier (irq) */ k->set_guest_notifiers(qbus->parent, 1, false); - vring_teardown(&s->vring, s->vdev, 0); s->started = false; s->stopping = false; } diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h index 41885da1a0..e4c41ff2ef 100644 --- a/include/hw/virtio/virtio-blk.h +++ b/include/hw/virtio/virtio-blk.h @@ -16,6 +16,7 @@ #include "hw/virtio/virtio.h" #include "hw/block/block.h" +#include "sysemu/iothread.h" #define TYPE_VIRTIO_BLK "virtio-blk-device" #define VIRTIO_BLK(obj) \ @@ -106,6 +107,7 @@ struct virtio_scsi_inhdr struct VirtIOBlkConf { BlockConf conf; + IOThread *iothread; char *serial; uint32_t scsi; uint32_t config_wce; @@ -140,13 +142,15 @@ typedef struct VirtIOBlock { DEFINE_BLOCK_CHS_PROPERTIES(_state, _field.conf), \ DEFINE_PROP_STRING("serial", _state, _field.serial), \ DEFINE_PROP_BIT("config-wce", _state, _field.config_wce, 0, true), \ - DEFINE_PROP_BIT("scsi", _state, _field.scsi, 0, true) + DEFINE_PROP_BIT("scsi", _state, _field.scsi, 0, true), \ + DEFINE_PROP_IOTHREAD("x-iothread", _state, _field.iothread) #else #define DEFINE_VIRTIO_BLK_PROPERTIES(_state, _field) \ DEFINE_BLOCK_PROPERTIES(_state, _field.conf), \ DEFINE_BLOCK_CHS_PROPERTIES(_state, _field.conf), \ DEFINE_PROP_STRING("serial", _state, _field.serial), \ - DEFINE_PROP_BIT("config-wce", _state, _field.config_wce, 0, true) + DEFINE_PROP_BIT("config-wce", _state, _field.config_wce, 0, true), \ + DEFINE_PROP_IOTHREAD("x-iothread", _state, _field.iothread) #endif /* __linux__ */ void virtio_blk_set_conf(DeviceState *dev, VirtIOBlkConf *blk); -- cgit v1.2.3-55-g7522 From f988388025c230ef3293cc0c3820cb40e03adfbf Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Wed, 5 Mar 2014 22:23:00 +0100 Subject: qemu-io: Fix warnings from static code analysis Smatch complains about several global symbols which should be local. Add the missing 'static' attributes and move the 'extern' declaration of variable qemuio_misalign to qemu-io.h. This variable also changes the type from 'int' to 'bool' which better fits documents its use. Signed-off-by: Stefan Weil Acked-by: Stefan Hajnoczi Signed-off-by: Stefan Hajnoczi --- include/qemu-io.h | 2 ++ qemu-io-cmds.c | 2 +- qemu-io.c | 7 +++---- 3 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/qemu-io.h b/include/qemu-io.h index 7e7c07c09b..5d6006f73b 100644 --- a/include/qemu-io.h +++ b/include/qemu-io.h @@ -38,6 +38,8 @@ typedef struct cmdinfo { helpfunc_t help; } cmdinfo_t; +extern bool qemuio_misalign; + bool qemuio_command(BlockDriverState *bs, const char *cmd); void qemuio_add_command(const cmdinfo_t *ci); diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index f1de24c91c..fb1db53c6b 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -16,7 +16,7 @@ #define CMD_NOFILE_OK 0x01 -int qemuio_misalign; +bool qemuio_misalign; static cmdinfo_t *cmdtab; static int ncmds; diff --git a/qemu-io.c b/qemu-io.c index fc3860884c..2d119c28a6 100644 --- a/qemu-io.c +++ b/qemu-io.c @@ -24,10 +24,9 @@ #define CMD_NOFILE_OK 0x01 -char *progname; +static char *progname; -BlockDriverState *qemuio_bs; -extern int qemuio_misalign; +static BlockDriverState *qemuio_bs; /* qemu-io commands passed using -c */ static int ncmdline; @@ -408,7 +407,7 @@ int main(int argc, char **argv) readonly = 1; break; case 'm': - qemuio_misalign = 1; + qemuio_misalign = true; break; case 'g': growable = 1; -- cgit v1.2.3-55-g7522