summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Maydell2014-06-09 12:54:22 +0200
committerPeter Maydell2014-06-09 12:54:22 +0200
commit5dfc05cb1d342f081df7009703559b29dadc33e9 (patch)
tree1d0e5aed31482994d4e16d00e33b3f262e2ecade
parentslirp/arp: do not special-case bogus IP addresses (diff)
parentqapi: Extract qapi/block.json definitions (diff)
downloadqemu-5dfc05cb1d342f081df7009703559b29dadc33e9.tar.gz
qemu-5dfc05cb1d342f081df7009703559b29dadc33e9.tar.xz
qemu-5dfc05cb1d342f081df7009703559b29dadc33e9.zip
Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
Block pull request # gpg: Signature made Fri 06 Jun 2014 17:08:50 BST using RSA key ID 81AB73C8 # gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>" # gpg: aka "Stefan Hajnoczi <stefanha@gmail.com>" * remotes/stefanha/tags/block-pull-request: (42 commits) qapi: Extract qapi/block.json definitions qapi: Extract qapi/block-core.json definitions qapi: create two block related json modules qapi: Extract qapi/common.json definitions sheepdog: reload only header in a case of live snapshot sheepdog: fix vdi object update after live snapshot rbd: Fix leaks in rbd_start_aio() error path qemu-img: Document check exit codes block: fix wrong order in live block migration setup blockdev: acquire AioContext in block_set_io_throttle throttle: add detach/attach test case throttle: add throttle_detach/attach_aio_context() dataplane: Support VIRTIO_BLK_T_SCSI_CMD virtio-blk: Factor out virtio_blk_handle_scsi_req from virtio_blk_handle_scsi virtio-blk: Allow config-wce in dataplane block: Move declaration of bdrv_get_aio_context to block.h raw-posix: drop raw_get_aio_fd() since it is no longer used dataplane: implement async flush dataplane: delete IOQueue since it is no longer used dataplane: use the QEMU block layer for I/O ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--async.c14
-rw-r--r--block-migration.c3
-rw-r--r--block.c140
-rw-r--r--block/blkdebug.c2
-rw-r--r--block/blkverify.c47
-rw-r--r--block/curl.c192
-rw-r--r--block/gluster.c7
-rw-r--r--block/iscsi.c80
-rw-r--r--block/linux-aio.c24
-rw-r--r--block/nbd-client.c24
-rw-r--r--block/nbd-client.h4
-rw-r--r--block/nbd.c87
-rw-r--r--block/nfs.c81
-rw-r--r--block/qed-table.c8
-rw-r--r--block/qed.c35
-rw-r--r--block/quorum.c48
-rw-r--r--block/raw-aio.h8
-rw-r--r--block/raw-posix.c82
-rw-r--r--block/raw-win32.c54
-rw-r--r--block/rbd.c10
-rw-r--r--block/sheepdog.c167
-rw-r--r--block/ssh.c36
-rw-r--r--block/vmdk.c23
-rw-r--r--block/win32-aio.c27
-rw-r--r--blockdev.c6
-rw-r--r--hw/block/dataplane/Makefile.objs2
-rw-r--r--hw/block/dataplane/ioq.c117
-rw-r--r--hw/block/dataplane/ioq.h57
-rw-r--r--hw/block/dataplane/virtio-blk.c256
-rw-r--r--hw/block/virtio-blk.c83
-rw-r--r--include/block/block.h27
-rw-r--r--include/block/block_int.h35
-rw-r--r--include/hw/virtio/virtio-blk.h3
-rw-r--r--include/qemu/throttle.h10
-rw-r--r--qapi-schema.json1653
-rw-r--r--qapi/block-core.json1412
-rw-r--r--qapi/block.json166
-rw-r--r--qapi/common.json89
-rw-r--r--qemu-img.c9
-rw-r--r--qemu-img.texi23
-rw-r--r--tests/test-throttle.c49
-rw-r--r--util/throttle.c27
42 files changed, 2826 insertions, 2401 deletions
diff --git a/async.c b/async.c
index 6930185e64..5b6fe6b4cc 100644
--- a/async.c
+++ b/async.c
@@ -117,15 +117,21 @@ void qemu_bh_schedule_idle(QEMUBH *bh)
void qemu_bh_schedule(QEMUBH *bh)
{
+ AioContext *ctx;
+
if (bh->scheduled)
return;
+ ctx = bh->ctx;
bh->idle = 0;
- /* Make sure that idle & any writes needed by the callback are done
- * before the locations are read in the aio_bh_poll.
+ /* Make sure that:
+ * 1. idle & any writes needed by the callback are done before the
+ * locations are read in the aio_bh_poll.
+ * 2. ctx is loaded before scheduled is set and the callback has a chance
+ * to execute.
*/
- smp_wmb();
+ smp_mb();
bh->scheduled = 1;
- aio_notify(bh->ctx);
+ aio_notify(ctx);
}
diff --git a/block-migration.c b/block-migration.c
index 16562709c8..25a03889f4 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -629,6 +629,7 @@ static int block_save_setup(QEMUFile *f, void *opaque)
block_mig_state.submitted, block_mig_state.transferred);
qemu_mutex_lock_iothread();
+ init_blk_migration(f);
/* start track dirty blocks */
ret = set_dirty_tracking();
@@ -638,8 +639,6 @@ static int block_save_setup(QEMUFile *f, void *opaque)
return ret;
}
- init_blk_migration(f);
-
qemu_mutex_unlock_iothread();
ret = flush_blks(f);
diff --git a/block.c b/block.c
index 310ea89fce..17f763db79 100644
--- a/block.c
+++ b/block.c
@@ -179,6 +179,7 @@ void bdrv_io_limits_enable(BlockDriverState *bs)
{
assert(!bs->io_limits_enabled);
throttle_init(&bs->throttle_state,
+ bdrv_get_aio_context(bs),
QEMU_CLOCK_VIRTUAL,
bdrv_throttle_read_timer_cb,
bdrv_throttle_write_timer_cb,
@@ -363,6 +364,7 @@ BlockDriverState *bdrv_new(const char *device_name, Error **errp)
qemu_co_queue_init(&bs->throttled_reqs[0]);
qemu_co_queue_init(&bs->throttled_reqs[1]);
bs->refcnt = 1;
+ bs->aio_context = qemu_get_aio_context();
return bs;
}
@@ -1856,7 +1858,11 @@ void bdrv_close_all(void)
BlockDriverState *bs;
QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
+ AioContext *aio_context = bdrv_get_aio_context(bs);
+
+ aio_context_acquire(aio_context);
bdrv_close(bs);
+ aio_context_release(aio_context);
}
}
@@ -1881,17 +1887,6 @@ static bool bdrv_requests_pending(BlockDriverState *bs)
return false;
}
-static bool bdrv_requests_pending_all(void)
-{
- BlockDriverState *bs;
- QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
- if (bdrv_requests_pending(bs)) {
- return true;
- }
- }
- return false;
-}
-
/*
* Wait for pending requests to complete across all BlockDriverStates
*
@@ -1911,12 +1906,20 @@ void bdrv_drain_all(void)
BlockDriverState *bs;
while (busy) {
+ busy = false;
+
QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
+ AioContext *aio_context = bdrv_get_aio_context(bs);
+ bool bs_busy;
+
+ aio_context_acquire(aio_context);
bdrv_start_throttled_reqs(bs);
- }
+ bs_busy = bdrv_requests_pending(bs);
+ bs_busy |= aio_poll(aio_context, bs_busy);
+ aio_context_release(aio_context);
- busy = bdrv_requests_pending_all();
- busy |= aio_poll(qemu_get_aio_context(), busy);
+ busy |= bs_busy;
+ }
}
}
@@ -2352,12 +2355,17 @@ int bdrv_commit_all(void)
BlockDriverState *bs;
QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
+ AioContext *aio_context = bdrv_get_aio_context(bs);
+
+ aio_context_acquire(aio_context);
if (bs->drv && bs->backing_hd) {
int ret = bdrv_commit(bs);
if (ret < 0) {
+ aio_context_release(aio_context);
return ret;
}
}
+ aio_context_release(aio_context);
}
return 0;
}
@@ -2775,10 +2783,12 @@ static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
/* Fast-path if already in coroutine context */
bdrv_rw_co_entry(&rwco);
} else {
+ AioContext *aio_context = bdrv_get_aio_context(bs);
+
co = qemu_coroutine_create(bdrv_rw_co_entry);
qemu_coroutine_enter(co, &rwco);
while (rwco.ret == NOT_DONE) {
- qemu_aio_wait();
+ aio_poll(aio_context, true);
}
}
return rwco.ret;
@@ -3831,10 +3841,15 @@ int bdrv_flush_all(void)
int result = 0;
QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
- int ret = bdrv_flush(bs);
+ AioContext *aio_context = bdrv_get_aio_context(bs);
+ int ret;
+
+ aio_context_acquire(aio_context);
+ ret = bdrv_flush(bs);
if (ret < 0 && !result) {
result = ret;
}
+ aio_context_release(aio_context);
}
return result;
@@ -4025,10 +4040,12 @@ int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
/* Fast-path if already in coroutine context */
bdrv_get_block_status_co_entry(&data);
} else {
+ AioContext *aio_context = bdrv_get_aio_context(bs);
+
co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
qemu_coroutine_enter(co, &data);
while (!data.done) {
- qemu_aio_wait();
+ aio_poll(aio_context, true);
}
}
return data.ret;
@@ -4621,7 +4638,7 @@ static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
acb->is_write = is_write;
acb->qiov = qiov;
acb->bounce = qemu_blockalign(bs, qiov->size);
- acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
+ acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
if (is_write) {
qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
@@ -4660,13 +4677,14 @@ typedef struct BlockDriverAIOCBCoroutine {
static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
{
+ AioContext *aio_context = bdrv_get_aio_context(blockacb->bs);
BlockDriverAIOCBCoroutine *acb =
container_of(blockacb, BlockDriverAIOCBCoroutine, common);
bool done = false;
acb->done = &done;
while (!done) {
- qemu_aio_wait();
+ aio_poll(aio_context, true);
}
}
@@ -4703,7 +4721,7 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque)
acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
}
- acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
+ acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
qemu_bh_schedule(acb->bh);
}
@@ -4739,7 +4757,7 @@ static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
BlockDriverState *bs = acb->common.bs;
acb->req.error = bdrv_co_flush(bs);
- acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
+ acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
qemu_bh_schedule(acb->bh);
}
@@ -4766,7 +4784,7 @@ static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
BlockDriverState *bs = acb->common.bs;
acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
- acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
+ acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
qemu_bh_schedule(acb->bh);
}
@@ -4977,7 +4995,11 @@ void bdrv_invalidate_cache_all(Error **errp)
Error *local_err = NULL;
QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
+ AioContext *aio_context = bdrv_get_aio_context(bs);
+
+ aio_context_acquire(aio_context);
bdrv_invalidate_cache(bs, &local_err);
+ aio_context_release(aio_context);
if (local_err) {
error_propagate(errp, local_err);
return;
@@ -4990,7 +5012,11 @@ void bdrv_clear_incoming_migration_all(void)
BlockDriverState *bs;
QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
+ AioContext *aio_context = bdrv_get_aio_context(bs);
+
+ aio_context_acquire(aio_context);
bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
+ aio_context_release(aio_context);
}
}
@@ -5006,10 +5032,12 @@ int bdrv_flush(BlockDriverState *bs)
/* Fast-path if already in coroutine context */
bdrv_flush_co_entry(&rwco);
} else {
+ AioContext *aio_context = bdrv_get_aio_context(bs);
+
co = qemu_coroutine_create(bdrv_flush_co_entry);
qemu_coroutine_enter(co, &rwco);
while (rwco.ret == NOT_DONE) {
- qemu_aio_wait();
+ aio_poll(aio_context, true);
}
}
@@ -5119,10 +5147,12 @@ int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
/* Fast-path if already in coroutine context */
bdrv_discard_co_entry(&rwco);
} else {
+ AioContext *aio_context = bdrv_get_aio_context(bs);
+
co = qemu_coroutine_create(bdrv_discard_co_entry);
qemu_coroutine_enter(co, &rwco);
while (rwco.ret == NOT_DONE) {
- qemu_aio_wait();
+ aio_poll(aio_context, true);
}
}
@@ -5633,8 +5663,66 @@ out:
AioContext *bdrv_get_aio_context(BlockDriverState *bs)
{
- /* Currently BlockDriverState always uses the main loop AioContext */
- return qemu_get_aio_context();
+ return bs->aio_context;
+}
+
+void bdrv_detach_aio_context(BlockDriverState *bs)
+{
+ if (!bs->drv) {
+ return;
+ }
+
+ if (bs->io_limits_enabled) {
+ throttle_detach_aio_context(&bs->throttle_state);
+ }
+ if (bs->drv->bdrv_detach_aio_context) {
+ bs->drv->bdrv_detach_aio_context(bs);
+ }
+ if (bs->file) {
+ bdrv_detach_aio_context(bs->file);
+ }
+ if (bs->backing_hd) {
+ bdrv_detach_aio_context(bs->backing_hd);
+ }
+
+ bs->aio_context = NULL;
+}
+
+void bdrv_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context)
+{
+ if (!bs->drv) {
+ return;
+ }
+
+ bs->aio_context = new_context;
+
+ if (bs->backing_hd) {
+ bdrv_attach_aio_context(bs->backing_hd, new_context);
+ }
+ if (bs->file) {
+ bdrv_attach_aio_context(bs->file, new_context);
+ }
+ if (bs->drv->bdrv_attach_aio_context) {
+ bs->drv->bdrv_attach_aio_context(bs, new_context);
+ }
+ if (bs->io_limits_enabled) {
+ throttle_attach_aio_context(&bs->throttle_state, new_context);
+ }
+}
+
+void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
+{
+ bdrv_drain_all(); /* ensure there are no in-flight requests */
+
+ bdrv_detach_aio_context(bs);
+
+ /* This function executes in the old AioContext so acquire the new one in
+ * case it runs in a different thread.
+ */
+ aio_context_acquire(new_context);
+ bdrv_attach_aio_context(bs, new_context);
+ aio_context_release(new_context);
}
void bdrv_add_before_write_notifier(BlockDriverState *bs,
diff --git a/block/blkdebug.c b/block/blkdebug.c
index 380c736101..f51407de3f 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -471,7 +471,7 @@ static BlockDriverAIOCB *inject_error(BlockDriverState *bs,
acb = qemu_aio_get(&blkdebug_aiocb_info, bs, cb, opaque);
acb->ret = -error;
- bh = qemu_bh_new(error_callback_bh, acb);
+ bh = aio_bh_new(bdrv_get_aio_context(bs), error_callback_bh, acb);
acb->bh = bh;
qemu_bh_schedule(bh);
diff --git a/block/blkverify.c b/block/blkverify.c
index e1c31171c3..621b78593b 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -39,12 +39,13 @@ struct BlkverifyAIOCB {
static void blkverify_aio_cancel(BlockDriverAIOCB *blockacb)
{
BlkverifyAIOCB *acb = (BlkverifyAIOCB *)blockacb;
+ AioContext *aio_context = bdrv_get_aio_context(blockacb->bs);
bool finished = false;
/* Wait until request completes, invokes its callback, and frees itself */
acb->finished = &finished;
while (!finished) {
- qemu_aio_wait();
+ aio_poll(aio_context, true);
}
}
@@ -228,7 +229,8 @@ static void blkverify_aio_cb(void *opaque, int ret)
acb->verify(acb);
}
- acb->bh = qemu_bh_new(blkverify_aio_bh, acb);
+ acb->bh = aio_bh_new(bdrv_get_aio_context(acb->common.bs),
+ blkverify_aio_bh, acb);
qemu_bh_schedule(acb->bh);
break;
}
@@ -302,21 +304,40 @@ static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
return bdrv_recurse_is_first_non_filter(s->test_file, candidate);
}
+/* Propagate AioContext changes to ->test_file */
+static void blkverify_detach_aio_context(BlockDriverState *bs)
+{
+ BDRVBlkverifyState *s = bs->opaque;
+
+ bdrv_detach_aio_context(s->test_file);
+}
+
+static void blkverify_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context)
+{
+ BDRVBlkverifyState *s = bs->opaque;
+
+ bdrv_attach_aio_context(s->test_file, new_context);
+}
+
static BlockDriver bdrv_blkverify = {
- .format_name = "blkverify",
- .protocol_name = "blkverify",
- .instance_size = sizeof(BDRVBlkverifyState),
+ .format_name = "blkverify",
+ .protocol_name = "blkverify",
+ .instance_size = sizeof(BDRVBlkverifyState),
+
+ .bdrv_parse_filename = blkverify_parse_filename,
+ .bdrv_file_open = blkverify_open,
+ .bdrv_close = blkverify_close,
+ .bdrv_getlength = blkverify_getlength,
- .bdrv_parse_filename = blkverify_parse_filename,
- .bdrv_file_open = blkverify_open,
- .bdrv_close = blkverify_close,
- .bdrv_getlength = blkverify_getlength,
+ .bdrv_aio_readv = blkverify_aio_readv,
+ .bdrv_aio_writev = blkverify_aio_writev,
+ .bdrv_aio_flush = blkverify_aio_flush,
- .bdrv_aio_readv = blkverify_aio_readv,
- .bdrv_aio_writev = blkverify_aio_writev,
- .bdrv_aio_flush = blkverify_aio_flush,
+ .bdrv_attach_aio_context = blkverify_attach_aio_context,
+ .bdrv_detach_aio_context = blkverify_detach_aio_context,
- .is_filter = true,
+ .is_filter = true,
.bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter,
};
diff --git a/block/curl.c b/block/curl.c
index f491b0ba4c..8c84141ced 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -110,6 +110,7 @@ typedef struct BDRVCURLState {
size_t readahead_size;
bool sslverify;
bool accept_range;
+ AioContext *aio_context;
} BDRVCURLState;
static void curl_clean_state(CURLState *s);
@@ -134,25 +135,29 @@ static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque)
#endif
static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
- void *s, void *sp)
+ void *userp, void *sp)
{
+ BDRVCURLState *s;
CURLState *state = NULL;
curl_easy_getinfo(curl, CURLINFO_PRIVATE, (char **)&state);
state->sock_fd = fd;
+ s = state->s;
DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, fd);
switch (action) {
case CURL_POLL_IN:
- qemu_aio_set_fd_handler(fd, curl_multi_read, NULL, state);
+ aio_set_fd_handler(s->aio_context, fd, curl_multi_read,
+ NULL, state);
break;
case CURL_POLL_OUT:
- qemu_aio_set_fd_handler(fd, NULL, curl_multi_do, state);
+ aio_set_fd_handler(s->aio_context, fd, NULL, curl_multi_do, state);
break;
case CURL_POLL_INOUT:
- qemu_aio_set_fd_handler(fd, curl_multi_read, curl_multi_do, state);
+ aio_set_fd_handler(s->aio_context, fd, curl_multi_read,
+ curl_multi_do, state);
break;
case CURL_POLL_REMOVE:
- qemu_aio_set_fd_handler(fd, NULL, NULL, NULL);
+ aio_set_fd_handler(s->aio_context, fd, NULL, NULL, NULL);
break;
}
@@ -365,7 +370,7 @@ static CURLState *curl_init_state(BDRVCURLState *s)
break;
}
if (!state) {
- qemu_aio_wait();
+ aio_poll(state->s->aio_context, true);
}
} while(!state);
@@ -422,6 +427,51 @@ static void curl_parse_filename(const char *filename, QDict *options,
qdict_put(options, CURL_BLOCK_OPT_URL, qstring_from_str(filename));
}
+static void curl_detach_aio_context(BlockDriverState *bs)
+{
+ BDRVCURLState *s = bs->opaque;
+ int i;
+
+ for (i = 0; i < CURL_NUM_STATES; i++) {
+ if (s->states[i].in_use) {
+ curl_clean_state(&s->states[i]);
+ }
+ if (s->states[i].curl) {
+ curl_easy_cleanup(s->states[i].curl);
+ s->states[i].curl = NULL;
+ }
+ if (s->states[i].orig_buf) {
+ g_free(s->states[i].orig_buf);
+ s->states[i].orig_buf = NULL;
+ }
+ }
+ if (s->multi) {
+ curl_multi_cleanup(s->multi);
+ s->multi = NULL;
+ }
+
+ timer_del(&s->timer);
+}
+
+static void curl_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context)
+{
+ BDRVCURLState *s = bs->opaque;
+
+ aio_timer_init(new_context, &s->timer,
+ QEMU_CLOCK_REALTIME, SCALE_NS,
+ curl_multi_timeout_do, s);
+
+ assert(!s->multi);
+ s->multi = curl_multi_init();
+ s->aio_context = new_context;
+ curl_multi_setopt(s->multi, CURLMOPT_SOCKETFUNCTION, curl_sock_cb);
+#ifdef NEED_CURL_TIMER_CALLBACK
+ curl_multi_setopt(s->multi, CURLMOPT_TIMERDATA, s);
+ curl_multi_setopt(s->multi, CURLMOPT_TIMERFUNCTION, curl_timer_cb);
+#endif
+}
+
static QemuOptsList runtime_opts = {
.name = "curl",
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
@@ -491,6 +541,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
}
DPRINTF("CURL: Opening %s\n", file);
+ s->aio_context = bdrv_get_aio_context(bs);
s->url = g_strdup(file);
state = curl_init_state(s);
if (!state)
@@ -523,19 +574,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
curl_easy_cleanup(state->curl);
state->curl = NULL;
- aio_timer_init(bdrv_get_aio_context(bs), &s->timer,
- QEMU_CLOCK_REALTIME, SCALE_NS,
- curl_multi_timeout_do, s);
-
- // Now we know the file exists and its size, so let's
- // initialize the multi interface!
-
- s->multi = curl_multi_init();
- curl_multi_setopt(s->multi, CURLMOPT_SOCKETFUNCTION, curl_sock_cb);
-#ifdef NEED_CURL_TIMER_CALLBACK
- curl_multi_setopt(s->multi, CURLMOPT_TIMERDATA, s);
- curl_multi_setopt(s->multi, CURLMOPT_TIMERFUNCTION, curl_timer_cb);
-#endif
+ curl_attach_aio_context(bs, bdrv_get_aio_context(bs));
qemu_opts_del(opts);
return 0;
@@ -630,7 +669,7 @@ static BlockDriverAIOCB *curl_aio_readv(BlockDriverState *bs,
acb->sector_num = sector_num;
acb->nb_sectors = nb_sectors;
- acb->bh = qemu_bh_new(curl_readv_bh_cb, acb);
+ acb->bh = aio_bh_new(bdrv_get_aio_context(bs), curl_readv_bh_cb, acb);
qemu_bh_schedule(acb->bh);
return &acb->common;
}
@@ -638,25 +677,9 @@ static BlockDriverAIOCB *curl_aio_readv(BlockDriverState *bs,
static void curl_close(BlockDriverState *bs)
{
BDRVCURLState *s = bs->opaque;
- int i;
DPRINTF("CURL: Close\n");
- for (i=0; i<CURL_NUM_STATES; i++) {
- if (s->states[i].in_use)
- curl_clean_state(&s->states[i]);
- if (s->states[i].curl) {
- curl_easy_cleanup(s->states[i].curl);
- s->states[i].curl = NULL;
- }
- if (s->states[i].orig_buf) {
- g_free(s->states[i].orig_buf);
- s->states[i].orig_buf = NULL;
- }
- }
- if (s->multi)
- curl_multi_cleanup(s->multi);
-
- timer_del(&s->timer);
+ curl_detach_aio_context(bs);
g_free(s->url);
}
@@ -668,68 +691,83 @@ static int64_t curl_getlength(BlockDriverState *bs)
}
static BlockDriver bdrv_http = {
- .format_name = "http",
- .protocol_name = "http",
+ .format_name = "http",
+ .protocol_name = "http",
+
+ .instance_size = sizeof(BDRVCURLState),
+ .bdrv_parse_filename = curl_parse_filename,
+ .bdrv_file_open = curl_open,
+ .bdrv_close = curl_close,
+ .bdrv_getlength = curl_getlength,
- .instance_size = sizeof(BDRVCURLState),
- .bdrv_parse_filename = curl_parse_filename,
- .bdrv_file_open = curl_open,
- .bdrv_close = curl_close,
- .bdrv_getlength = curl_getlength,
+ .bdrv_aio_readv = curl_aio_readv,
- .bdrv_aio_readv = curl_aio_readv,
+ .bdrv_detach_aio_context = curl_detach_aio_context,
+ .bdrv_attach_aio_context = curl_attach_aio_context,
};
static BlockDriver bdrv_https = {
- .format_name = "https",
- .protocol_name = "https",
+ .format_name = "https",
+ .protocol_name = "https",
- .instance_size = sizeof(BDRVCURLState),
- .bdrv_parse_filename = curl_parse_filename,
- .bdrv_file_open = curl_open,
- .bdrv_close = curl_close,
- .bdrv_getlength = curl_getlength,
+ .instance_size = sizeof(BDRVCURLState),
+ .bdrv_parse_filename = curl_parse_filename,
+ .bdrv_file_open = curl_open,
+ .bdrv_close = curl_close,
+ .bdrv_getlength = curl_getlength,
- .bdrv_aio_readv = curl_aio_readv,
+ .bdrv_aio_readv = curl_aio_readv,
+
+ .bdrv_detach_aio_context = curl_detach_aio_context,
+ .bdrv_attach_aio_context = curl_attach_aio_context,
};
static BlockDriver bdrv_ftp = {
- .format_name = "ftp",
- .protocol_name = "ftp",
+ .format_name = "ftp",
+ .protocol_name = "ftp",
+
+ .instance_size = sizeof(BDRVCURLState),
+ .bdrv_parse_filename = curl_parse_filename,
+ .bdrv_file_open = curl_open,
+ .bdrv_close = curl_close,
+ .bdrv_getlength = curl_getlength,
- .instance_size = sizeof(BDRVCURLState),
- .bdrv_parse_filename = curl_parse_filename,
- .bdrv_file_open = curl_open,
- .bdrv_close = curl_close,
- .bdrv_getlength = curl_getlength,
+ .bdrv_aio_readv = curl_aio_readv,
- .bdrv_aio_readv = curl_aio_readv,
+ .bdrv_detach_aio_context = curl_detach_aio_context,
+ .bdrv_attach_aio_context = curl_attach_aio_context,
};
static BlockDriver bdrv_ftps = {
- .format_name = "ftps",
- .protocol_name = "ftps",
+ .format_name = "ftps",
+ .protocol_name = "ftps",
- .instance_size = sizeof(BDRVCURLState),
- .bdrv_parse_filename = curl_parse_filename,
- .bdrv_file_open = curl_open,
- .bdrv_close = curl_close,
- .bdrv_getlength = curl_getlength,
+ .instance_size = sizeof(BDRVCURLState),
+ .bdrv_parse_filename = curl_parse_filename,
+ .bdrv_file_open = curl_open,
+ .bdrv_close = curl_close,
+ .bdrv_getlength = curl_getlength,
- .bdrv_aio_readv = curl_aio_readv,
+ .bdrv_aio_readv = curl_aio_readv,
+
+ .bdrv_detach_aio_context = curl_detach_aio_context,
+ .bdrv_attach_aio_context = curl_attach_aio_context,
};
static BlockDriver bdrv_tftp = {
- .format_name = "tftp",
- .protocol_name = "tftp",
+ .format_name = "tftp",
+ .protocol_name = "tftp",
+
+ .instance_size = sizeof(BDRVCURLState),
+ .bdrv_parse_filename = curl_parse_filename,
+ .bdrv_file_open = curl_open,
+ .bdrv_close = curl_close,
+ .bdrv_getlength = curl_getlength,
- .instance_size = sizeof(BDRVCURLState),
- .bdrv_parse_filename = curl_parse_filename,
- .bdrv_file_open = curl_open,
- .bdrv_close = curl_close,
- .bdrv_getlength = curl_getlength,
+ .bdrv_aio_readv = curl_aio_readv,
- .bdrv_aio_readv = curl_aio_readv,
+ .bdrv_detach_aio_context = curl_detach_aio_context,
+ .bdrv_attach_aio_context = curl_attach_aio_context,
};
static void curl_block_init(void)
diff --git a/block/gluster.c b/block/gluster.c
index d0726ec92c..114689e441 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -16,6 +16,7 @@ typedef struct GlusterAIOCB {
int ret;
QEMUBH *bh;
Coroutine *coroutine;
+ AioContext *aio_context;
} GlusterAIOCB;
typedef struct BDRVGlusterState {
@@ -249,7 +250,7 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
acb->ret = -EIO; /* Partial read/write - fail it */
}
- acb->bh = qemu_bh_new(qemu_gluster_complete_aio, acb);
+ acb->bh = aio_bh_new(acb->aio_context, qemu_gluster_complete_aio, acb);
qemu_bh_schedule(acb->bh);
}
@@ -436,6 +437,7 @@ static coroutine_fn int qemu_gluster_co_write_zeroes(BlockDriverState *bs,
acb->size = size;
acb->ret = 0;
acb->coroutine = qemu_coroutine_self();
+ acb->aio_context = bdrv_get_aio_context(bs);
ret = glfs_zerofill_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
if (ret < 0) {
@@ -549,6 +551,7 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
acb->size = size;
acb->ret = 0;
acb->coroutine = qemu_coroutine_self();
+ acb->aio_context = bdrv_get_aio_context(bs);
if (write) {
ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0,
@@ -605,6 +608,7 @@ static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
acb->size = 0;
acb->ret = 0;
acb->coroutine = qemu_coroutine_self();
+ acb->aio_context = bdrv_get_aio_context(bs);
ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb);
if (ret < 0) {
@@ -633,6 +637,7 @@ static coroutine_fn int qemu_gluster_co_discard(BlockDriverState *bs,
acb->size = 0;
acb->ret = 0;
acb->coroutine = qemu_coroutine_self();
+ acb->aio_context = bdrv_get_aio_context(bs);
ret = glfs_discard_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
if (ret < 0) {
diff --git a/block/iscsi.c b/block/iscsi.c
index 3892cc551e..877b877cf2 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -49,6 +49,7 @@
typedef struct IscsiLun {
struct iscsi_context *iscsi;
+ AioContext *aio_context;
int lun;
enum scsi_inquiry_peripheral_device_type type;
int block_size;
@@ -73,6 +74,7 @@ typedef struct IscsiTask {
struct scsi_task *task;
Coroutine *co;
QEMUBH *bh;
+ IscsiLun *iscsilun;
} IscsiTask;
typedef struct IscsiAIOCB {
@@ -133,7 +135,7 @@ iscsi_schedule_bh(IscsiAIOCB *acb)
if (acb->bh) {
return;
}
- acb->bh = qemu_bh_new(iscsi_bh_cb, acb);
+ acb->bh = aio_bh_new(acb->iscsilun->aio_context, iscsi_bh_cb, acb);
qemu_bh_schedule(acb->bh);
}
@@ -169,7 +171,8 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
out:
if (iTask->co) {
- iTask->bh = qemu_bh_new(iscsi_co_generic_bh_cb, iTask);
+ iTask->bh = aio_bh_new(iTask->iscsilun->aio_context,
+ iscsi_co_generic_bh_cb, iTask);
qemu_bh_schedule(iTask->bh);
}
}
@@ -177,8 +180,9 @@ out:
static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
{
*iTask = (struct IscsiTask) {
- .co = qemu_coroutine_self(),
- .retries = ISCSI_CMD_RETRIES,
+ .co = qemu_coroutine_self(),
+ .retries = ISCSI_CMD_RETRIES,
+ .iscsilun = iscsilun,
};
}
@@ -209,7 +213,7 @@ iscsi_aio_cancel(BlockDriverAIOCB *blockacb)
iscsi_abort_task_cb, acb);
while (acb->status == -EINPROGRESS) {
- qemu_aio_wait();
+ aio_poll(iscsilun->aio_context, true);
}
}
@@ -232,10 +236,11 @@ iscsi_set_events(IscsiLun *iscsilun)
ev = POLLIN;
ev |= iscsi_which_events(iscsi);
if (ev != iscsilun->events) {
- qemu_aio_set_fd_handler(iscsi_get_fd(iscsi),
- iscsi_process_read,
- (ev & POLLOUT) ? iscsi_process_write : NULL,
- iscsilun);
+ aio_set_fd_handler(iscsilun->aio_context,
+ iscsi_get_fd(iscsi),
+ iscsi_process_read,
+ (ev & POLLOUT) ? iscsi_process_write : NULL,
+ iscsilun);
}
@@ -791,7 +796,7 @@ static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status);
while (status == -EINPROGRESS) {
- qemu_aio_wait();
+ aio_poll(iscsilun->aio_context, true);
}
return 0;
@@ -1195,6 +1200,40 @@ fail_with_err:
return NULL;
}
+static void iscsi_detach_aio_context(BlockDriverState *bs)
+{
+ IscsiLun *iscsilun = bs->opaque;
+
+ aio_set_fd_handler(iscsilun->aio_context,
+ iscsi_get_fd(iscsilun->iscsi),
+ NULL, NULL, NULL);
+ iscsilun->events = 0;
+
+ if (iscsilun->nop_timer) {
+ timer_del(iscsilun->nop_timer);
+ timer_free(iscsilun->nop_timer);
+ iscsilun->nop_timer = NULL;
+ }
+}
+
+static void iscsi_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context)
+{
+ IscsiLun *iscsilun = bs->opaque;
+
+ iscsilun->aio_context = new_context;
+ iscsi_set_events(iscsilun);
+
+#if defined(LIBISCSI_FEATURE_NOP_COUNTER)
+ /* Set up a timer for sending out iSCSI NOPs */
+ iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context,
+ QEMU_CLOCK_REALTIME, SCALE_MS,
+ iscsi_nop_timed_event, iscsilun);
+ timer_mod(iscsilun->nop_timer,
+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
+#endif
+}
+
/*
* We support iscsi url's on the form
* iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
@@ -1301,6 +1340,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
}
iscsilun->iscsi = iscsi;
+ iscsilun->aio_context = bdrv_get_aio_context(bs);
iscsilun->lun = iscsi_url->lun;
iscsilun->has_write_same = true;
@@ -1374,11 +1414,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
scsi_free_scsi_task(task);
task = NULL;
-#if defined(LIBISCSI_FEATURE_NOP_COUNTER)
- /* Set up a timer for sending out iSCSI NOPs */
- iscsilun->nop_timer = timer_new_ms(QEMU_CLOCK_REALTIME, iscsi_nop_timed_event, iscsilun);
- timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
-#endif
+ iscsi_attach_aio_context(bs, iscsilun->aio_context);
/* Guess the internal cluster (page) size of the iscsi target by the means
* of opt_unmap_gran. Transfer the unmap granularity only if it has a
@@ -1422,11 +1458,7 @@ static void iscsi_close(BlockDriverState *bs)
IscsiLun *iscsilun = bs->opaque;
struct iscsi_context *iscsi = iscsilun->iscsi;
- if (iscsilun->nop_timer) {
- timer_del(iscsilun->nop_timer);
- timer_free(iscsilun->nop_timer);
- }
- qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL);
+ iscsi_detach_aio_context(bs);
iscsi_destroy_context(iscsi);
g_free(iscsilun->zeroblock);
g_free(iscsilun->allocationmap);
@@ -1530,10 +1562,7 @@ static int iscsi_create(const char *filename, QEMUOptionParameter *options,
if (ret != 0) {
goto out;
}
- if (iscsilun->nop_timer) {
- timer_del(iscsilun->nop_timer);
- timer_free(iscsilun->nop_timer);
- }
+ iscsi_detach_aio_context(bs);
if (iscsilun->type != TYPE_DISK) {
ret = -ENODEV;
goto out;
@@ -1604,6 +1633,9 @@ static BlockDriver bdrv_iscsi = {
.bdrv_ioctl = iscsi_ioctl,
.bdrv_aio_ioctl = iscsi_aio_ioctl,
#endif
+
+ .bdrv_detach_aio_context = iscsi_detach_aio_context,
+ .bdrv_attach_aio_context = iscsi_attach_aio_context,
};
static QemuOptsList qemu_iscsi_opts = {
diff --git a/block/linux-aio.c b/block/linux-aio.c
index 53434e2df5..f0a2c087b2 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -177,6 +177,20 @@ out_free_aiocb:
return NULL;
}
+void laio_detach_aio_context(void *s_, AioContext *old_context)
+{
+ struct qemu_laio_state *s = s_;
+
+ aio_set_event_notifier(old_context, &s->e, NULL);
+}
+
+void laio_attach_aio_context(void *s_, AioContext *new_context)
+{
+ struct qemu_laio_state *s = s_;
+
+ aio_set_event_notifier(new_context, &s->e, qemu_laio_completion_cb);
+}
+
void *laio_init(void)
{
struct qemu_laio_state *s;
@@ -190,8 +204,6 @@ void *laio_init(void)
goto out_close_efd;
}
- qemu_aio_set_event_notifier(&s->e, qemu_laio_completion_cb);
-
return s;
out_close_efd:
@@ -200,3 +212,11 @@ out_free_state:
g_free(s);
return NULL;
}
+
+void laio_cleanup(void *s_)
+{
+ struct qemu_laio_state *s = s_;
+
+ event_notifier_cleanup(&s->e);
+ g_free(s);
+}
diff --git a/block/nbd-client.c b/block/nbd-client.c
index 7d698cb619..6e1c97cad0 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -49,7 +49,7 @@ static void nbd_teardown_connection(NbdClientSession *client)
shutdown(client->sock, 2);
nbd_recv_coroutines_enter_all(client);
- qemu_aio_set_fd_handler(client->sock, NULL, NULL, NULL);
+ nbd_client_session_detach_aio_context(client);
closesocket(client->sock);
client->sock = -1;
}
@@ -103,11 +103,14 @@ static int nbd_co_send_request(NbdClientSession *s,
struct nbd_request *request,
QEMUIOVector *qiov, int offset)
{
+ AioContext *aio_context;
int rc, ret;
qemu_co_mutex_lock(&s->send_mutex);
s->send_coroutine = qemu_coroutine_self();
- qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write, s);
+ aio_context = bdrv_get_aio_context(s->bs);
+ aio_set_fd_handler(aio_context, s->sock,
+ nbd_reply_ready, nbd_restart_write, s);
if (qiov) {
if (!s->is_unix) {
socket_set_cork(s->sock, 1);
@@ -126,7 +129,7 @@ static int nbd_co_send_request(NbdClientSession *s,
} else {
rc = nbd_send_request(s->sock, request);
}
- qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, NULL, s);
+ aio_set_fd_handler(aio_context, s->sock, nbd_reply_ready, NULL, s);
s->send_coroutine = NULL;
qemu_co_mutex_unlock(&s->send_mutex);
return rc;
@@ -335,6 +338,19 @@ int nbd_client_session_co_discard(NbdClientSession *client, int64_t sector_num,
}
+void nbd_client_session_detach_aio_context(NbdClientSession *client)
+{
+ aio_set_fd_handler(bdrv_get_aio_context(client->bs), client->sock,
+ NULL, NULL, NULL);
+}
+
+void nbd_client_session_attach_aio_context(NbdClientSession *client,
+ AioContext *new_context)
+{
+ aio_set_fd_handler(new_context, client->sock,
+ nbd_reply_ready, NULL, client);
+}
+
void nbd_client_session_close(NbdClientSession *client)
{
struct nbd_request request = {
@@ -381,7 +397,7 @@ int nbd_client_session_init(NbdClientSession *client, BlockDriverState *bs,
/* Now that we're connected, set the socket to be non-blocking and
* kick the reply mechanism. */
qemu_set_nonblock(sock);
- qemu_aio_set_fd_handler(sock, nbd_reply_ready, NULL, client);
+ nbd_client_session_attach_aio_context(client, bdrv_get_aio_context(bs));
logout("Established connection with NBD server\n");
return 0;
diff --git a/block/nbd-client.h b/block/nbd-client.h
index f2a63378bb..cd478f3a98 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -47,4 +47,8 @@ int nbd_client_session_co_writev(NbdClientSession *client, int64_t sector_num,
int nbd_client_session_co_readv(NbdClientSession *client, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov);
+void nbd_client_session_detach_aio_context(NbdClientSession *client);
+void nbd_client_session_attach_aio_context(NbdClientSession *client,
+ AioContext *new_context);
+
#endif /* NBD_CLIENT_H */
diff --git a/block/nbd.c b/block/nbd.c
index 613f2581ae..4eda0958d7 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -323,46 +323,67 @@ static int64_t nbd_getlength(BlockDriverState *bs)
return s->client.size;
}
+static void nbd_detach_aio_context(BlockDriverState *bs)
+{
+ BDRVNBDState *s = bs->opaque;
+
+ nbd_client_session_detach_aio_context(&s->client);
+}
+
+static void nbd_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context)
+{
+ BDRVNBDState *s = bs->opaque;
+
+ nbd_client_session_attach_aio_context(&s->client, new_context);
+}
+
static BlockDriver bdrv_nbd = {
- .format_name = "nbd",
- .protocol_name = "nbd",
- .instance_size = sizeof(BDRVNBDState),
- .bdrv_parse_filename = nbd_parse_filename,
- .bdrv_file_open = nbd_open,
- .bdrv_co_readv = nbd_co_readv,
- .bdrv_co_writev = nbd_co_writev,
- .bdrv_close = nbd_close,
- .bdrv_co_flush_to_os = nbd_co_flush,
- .bdrv_co_discard = nbd_co_discard,
- .bdrv_getlength = nbd_getlength,
+ .format_name = "nbd",
+ .protocol_name = "nbd",
+ .instance_size = sizeof(BDRVNBDState),
+ .bdrv_parse_filename = nbd_parse_filename,
+ .bdrv_file_open = nbd_open,
+ .bdrv_co_readv = nbd_co_readv,
+ .bdrv_co_writev = nbd_co_writev,
+ .bdrv_close = nbd_close,
+ .bdrv_co_flush_to_os = nbd_co_flush,
+ .bdrv_co_discard = nbd_co_discard,
+ .bdrv_getlength = nbd_getlength,
+ .bdrv_detach_aio_context = nbd_detach_aio_context,
+ .bdrv_attach_aio_context = nbd_attach_aio_context,
};
static BlockDriver bdrv_nbd_tcp = {
- .format_name = "nbd",
- .protocol_name = "nbd+tcp",
- .instance_size = sizeof(BDRVNBDState),
- .bdrv_parse_filename = nbd_parse_filename,
- .bdrv_file_open = nbd_open,
- .bdrv_co_readv = nbd_co_readv,
- .bdrv_co_writev = nbd_co_writev,
- .bdrv_close = nbd_close,
- .bdrv_co_flush_to_os = nbd_co_flush,
- .bdrv_co_discard = nbd_co_discard,
- .bdrv_getlength = nbd_getlength,
+ .format_name = "nbd",
+ .protocol_name = "nbd+tcp",
+ .instance_size = sizeof(BDRVNBDState),
+ .bdrv_parse_filename = nbd_parse_filename,
+ .bdrv_file_open = nbd_open,
+ .bdrv_co_readv = nbd_co_readv,
+ .bdrv_co_writev = nbd_co_writev,
+ .bdrv_close = nbd_close,
+ .bdrv_co_flush_to_os = nbd_co_flush,
+ .bdrv_co_discard = nbd_co_discard,
+ .bdrv_getlength = nbd_getlength,
+ .bdrv_detach_aio_context = nbd_detach_aio_context,
+ .bdrv_attach_aio_context = nbd_attach_aio_context,
};
static BlockDriver bdrv_nbd_unix = {
- .format_name = "nbd",
- .protocol_name = "nbd+unix",
- .instance_size = sizeof(BDRVNBDState),
- .bdrv_parse_filename = nbd_parse_filename,
- .bdrv_file_open = nbd_open,
- .bdrv_co_readv = nbd_co_readv,
- .bdrv_co_writev = nbd_co_writev,
- .bdrv_close = nbd_close,
- .bdrv_co_flush_to_os = nbd_co_flush,
- .bdrv_co_discard = nbd_co_discard,
- .bdrv_getlength = nbd_getlength,
+ .format_name = "nbd",
+ .protocol_name = "nbd+unix",
+ .instance_size = sizeof(BDRVNBDState),
+ .bdrv_parse_filename = nbd_parse_filename,
+ .bdrv_file_open = nbd_open,
+ .bdrv_co_readv = nbd_co_readv,
+ .bdrv_co_writev = nbd_co_writev,
+ .bdrv_close = nbd_close,
+ .bdrv_co_flush_to_os = nbd_co_flush,
+ .bdrv_co_discard = nbd_co_discard,
+ .bdrv_getlength = nbd_getlength,
+ .bdrv_detach_aio_context = nbd_detach_aio_context,
+ .bdrv_attach_aio_context = nbd_attach_aio_context,
};
static void bdrv_nbd_init(void)
diff --git a/block/nfs.c b/block/nfs.c
index 539bd951df..bd9177f3ae 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -40,6 +40,7 @@ typedef struct NFSClient {
struct nfsfh *fh;
int events;
bool has_zero_init;
+ AioContext *aio_context;
} NFSClient;
typedef struct NFSRPC {
@@ -49,6 +50,7 @@ typedef struct NFSRPC {
struct stat *st;
Coroutine *co;
QEMUBH *bh;
+ NFSClient *client;
} NFSRPC;
static void nfs_process_read(void *arg);
@@ -58,10 +60,11 @@ static void nfs_set_events(NFSClient *client)
{
int ev = nfs_which_events(client->context);
if (ev != client->events) {
- qemu_aio_set_fd_handler(nfs_get_fd(client->context),
- (ev & POLLIN) ? nfs_process_read : NULL,
- (ev & POLLOUT) ? nfs_process_write : NULL,
- client);
+ aio_set_fd_handler(client->aio_context,
+ nfs_get_fd(client->context),
+ (ev & POLLIN) ? nfs_process_read : NULL,
+ (ev & POLLOUT) ? nfs_process_write : NULL,
+ client);
}
client->events = ev;
@@ -84,7 +87,8 @@ static void nfs_process_write(void *arg)
static void nfs_co_init_task(NFSClient *client, NFSRPC *task)
{
*task = (NFSRPC) {
- .co = qemu_coroutine_self(),
+ .co = qemu_coroutine_self(),
+ .client = client,
};
}
@@ -116,7 +120,8 @@ nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
error_report("NFS Error: %s", nfs_get_error(nfs));
}
if (task->co) {
- task->bh = qemu_bh_new(nfs_co_generic_bh_cb, task);
+ task->bh = aio_bh_new(task->client->aio_context,
+ nfs_co_generic_bh_cb, task);
qemu_bh_schedule(task->bh);
}
}
@@ -224,13 +229,34 @@ static QemuOptsList runtime_opts = {
},
};
+static void nfs_detach_aio_context(BlockDriverState *bs)
+{
+ NFSClient *client = bs->opaque;
+
+ aio_set_fd_handler(client->aio_context,
+ nfs_get_fd(client->context),
+ NULL, NULL, NULL);
+ client->events = 0;
+}
+
+static void nfs_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context)
+{
+ NFSClient *client = bs->opaque;
+
+ client->aio_context = new_context;
+ nfs_set_events(client);
+}
+
static void nfs_client_close(NFSClient *client)
{
if (client->context) {
if (client->fh) {
nfs_close(client->context, client->fh);
}
- qemu_aio_set_fd_handler(nfs_get_fd(client->context), NULL, NULL, NULL);
+ aio_set_fd_handler(client->aio_context,
+ nfs_get_fd(client->context),
+ NULL, NULL, NULL);
nfs_destroy_context(client->context);
}
memset(client, 0, sizeof(NFSClient));
@@ -345,6 +371,8 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
QemuOpts *opts;
Error *local_err = NULL;
+ client->aio_context = bdrv_get_aio_context(bs);
+
opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &local_err);
if (local_err) {
@@ -368,6 +396,8 @@ static int nfs_file_create(const char *url, QEMUOptionParameter *options,
int64_t total_size = 0;
NFSClient *client = g_malloc0(sizeof(NFSClient));
+ client->aio_context = qemu_get_aio_context();
+
/* Read out options */
while (options && options->name) {
if (!strcmp(options->name, "size")) {
@@ -407,7 +437,7 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
while (!task.complete) {
nfs_set_events(client);
- qemu_aio_wait();
+ aio_poll(client->aio_context, true);
}
return (task.ret < 0 ? task.ret : st.st_blocks * st.st_blksize);
@@ -420,22 +450,25 @@ static int nfs_file_truncate(BlockDriverState *bs, int64_t offset)
}
static BlockDriver bdrv_nfs = {
- .format_name = "nfs",
- .protocol_name = "nfs",
-
- .instance_size = sizeof(NFSClient),
- .bdrv_needs_filename = true,
- .bdrv_has_zero_init = nfs_has_zero_init,
- .bdrv_get_allocated_file_size = nfs_get_allocated_file_size,
- .bdrv_truncate = nfs_file_truncate,
-
- .bdrv_file_open = nfs_file_open,
- .bdrv_close = nfs_file_close,
- .bdrv_create = nfs_file_create,
-
- .bdrv_co_readv = nfs_co_readv,
- .bdrv_co_writev = nfs_co_writev,
- .bdrv_co_flush_to_disk = nfs_co_flush,
+ .format_name = "nfs",
+ .protocol_name = "nfs",
+
+ .instance_size = sizeof(NFSClient),
+ .bdrv_needs_filename = true,
+ .bdrv_has_zero_init = nfs_has_zero_init,
+ .bdrv_get_allocated_file_size = nfs_get_allocated_file_size,
+ .bdrv_truncate = nfs_file_truncate,
+
+ .bdrv_file_open = nfs_file_open,
+ .bdrv_close = nfs_file_close,
+ .bdrv_create = nfs_file_create,
+
+ .bdrv_co_readv = nfs_co_readv,
+ .bdrv_co_writev = nfs_co_writev,
+ .bdrv_co_flush_to_disk = nfs_co_flush,
+
+ .bdrv_detach_aio_context = nfs_detach_aio_context,
+ .bdrv_attach_aio_context = nfs_attach_aio_context,
};
static void nfs_block_init(void)
diff --git a/block/qed-table.c b/block/qed-table.c
index 76d2dcccf8..f61107a1cf 100644
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -173,7 +173,7 @@ int qed_read_l1_table_sync(BDRVQEDState *s)
qed_read_table(s, s->header.l1_table_offset,
s->l1_table, qed_sync_cb, &ret);
while (ret == -EINPROGRESS) {
- qemu_aio_wait();
+ aio_poll(bdrv_get_aio_context(s->bs), true);
}
return ret;
@@ -194,7 +194,7 @@ int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
qed_write_l1_table(s, index, n, qed_sync_cb, &ret);
while (ret == -EINPROGRESS) {
- qemu_aio_wait();
+ aio_poll(bdrv_get_aio_context(s->bs), true);
}
return ret;
@@ -267,7 +267,7 @@ int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset
qed_read_l2_table(s, request, offset, qed_sync_cb, &ret);
while (ret == -EINPROGRESS) {
- qemu_aio_wait();
+ aio_poll(bdrv_get_aio_context(s->bs), true);
}
return ret;
@@ -289,7 +289,7 @@ int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret);
while (ret == -EINPROGRESS) {
- qemu_aio_wait();
+ aio_poll(bdrv_get_aio_context(s->bs), true);
}
return ret;
diff --git a/block/qed.c b/block/qed.c
index c130e42d0d..79f5bd392a 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -21,12 +21,13 @@
static void qed_aio_cancel(BlockDriverAIOCB *blockacb)
{
QEDAIOCB *acb = (QEDAIOCB *)blockacb;
+ AioContext *aio_context = bdrv_get_aio_context(blockacb->bs);
bool finished = false;
/* Wait for the request to finish */
acb->finished = &finished;
while (!finished) {
- qemu_aio_wait();
+ aio_poll(aio_context, true);
}
}
@@ -373,6 +374,27 @@ static void bdrv_qed_rebind(BlockDriverState *bs)
s->bs = bs;
}
+static void bdrv_qed_detach_aio_context(BlockDriverState *bs)
+{
+ BDRVQEDState *s = bs->opaque;
+
+ qed_cancel_need_check_timer(s);
+ timer_free(s->need_check_timer);
+}
+
+static void bdrv_qed_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context)
+{
+ BDRVQEDState *s = bs->opaque;
+
+ s->need_check_timer = aio_timer_new(new_context,
+ QEMU_CLOCK_VIRTUAL, SCALE_NS,
+ qed_need_check_timer_cb, s);
+ if (s->header.features & QED_F_NEED_CHECK) {
+ qed_start_need_check_timer(s);
+ }
+}
+
static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
@@ -496,8 +518,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
}
}
- s->need_check_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
- qed_need_check_timer_cb, s);
+ bdrv_qed_attach_aio_context(bs, bdrv_get_aio_context(bs));
out:
if (ret) {
@@ -528,8 +549,7 @@ static void bdrv_qed_close(BlockDriverState *bs)
{
BDRVQEDState *s = bs->opaque;
- qed_cancel_need_check_timer(s);
- timer_free(s->need_check_timer);
+ bdrv_qed_detach_aio_context(bs);
/* Ensure writes reach stable storage */
bdrv_flush(bs->file);
@@ -919,7 +939,8 @@ static void qed_aio_complete(QEDAIOCB *acb, int ret)
/* Arrange for a bh to invoke the completion function */
acb->bh_ret = ret;
- acb->bh = qemu_bh_new(qed_aio_complete_bh, acb);
+ acb->bh = aio_bh_new(bdrv_get_aio_context(acb->common.bs),
+ qed_aio_complete_bh, acb);
qemu_bh_schedule(acb->bh);
/* Start next allocating write request waiting behind this one. Note that
@@ -1644,6 +1665,8 @@ static BlockDriver bdrv_qed = {
.bdrv_change_backing_file = bdrv_qed_change_backing_file,
.bdrv_invalidate_cache = bdrv_qed_invalidate_cache,
.bdrv_check = bdrv_qed_check,
+ .bdrv_detach_aio_context = bdrv_qed_detach_aio_context,
+ .bdrv_attach_aio_context = bdrv_qed_attach_aio_context,
};
static void bdrv_qed_init(void)
diff --git a/block/quorum.c b/block/quorum.c
index ecec3a5407..426077a520 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -848,25 +848,49 @@ static void quorum_close(BlockDriverState *bs)
g_free(s->bs);
}
+static void quorum_detach_aio_context(BlockDriverState *bs)
+{
+ BDRVQuorumState *s = bs->opaque;
+ int i;
+
+ for (i = 0; i < s->num_children; i++) {
+ bdrv_detach_aio_context(s->bs[i]);
+ }
+}
+
+static void quorum_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context)
+{
+ BDRVQuorumState *s = bs->opaque;
+ int i;
+
+ for (i = 0; i < s->num_children; i++) {
+ bdrv_attach_aio_context(s->bs[i], new_context);
+ }
+}
+
static BlockDriver bdrv_quorum = {
- .format_name = "quorum",
- .protocol_name = "quorum",
+ .format_name = "quorum",
+ .protocol_name = "quorum",
+
+ .instance_size = sizeof(BDRVQuorumState),
- .instance_size = sizeof(BDRVQuorumState),
+ .bdrv_file_open = quorum_open,
+ .bdrv_close = quorum_close,
- .bdrv_file_open = quorum_open,
- .bdrv_close = quorum_close,
+ .bdrv_co_flush_to_disk = quorum_co_flush,
- .bdrv_co_flush_to_disk = quorum_co_flush,
+ .bdrv_getlength = quorum_getlength,
- .bdrv_getlength = quorum_getlength,
+ .bdrv_aio_readv = quorum_aio_readv,
+ .bdrv_aio_writev = quorum_aio_writev,
+ .bdrv_invalidate_cache = quorum_invalidate_cache,
- .bdrv_aio_readv = quorum_aio_readv,
- .bdrv_aio_writev = quorum_aio_writev,
- .bdrv_invalidate_cache = quorum_invalidate_cache,
+ .bdrv_detach_aio_context = quorum_detach_aio_context,
+ .bdrv_attach_aio_context = quorum_attach_aio_context,
- .is_filter = true,
- .bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter,
+ .is_filter = true,
+ .bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter,
};
static void bdrv_quorum_init(void)
diff --git a/block/raw-aio.h b/block/raw-aio.h
index 7ad0a8a0a7..8cf084eeb5 100644
--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -34,19 +34,27 @@
/* linux-aio.c - Linux native implementation */
#ifdef CONFIG_LINUX_AIO
void *laio_init(void);
+void laio_cleanup(void *s);
BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque, int type);
+void laio_detach_aio_context(void *s, AioContext *old_context);
+void laio_attach_aio_context(void *s, AioContext *new_context);
#endif
#ifdef _WIN32
typedef struct QEMUWin32AIOState QEMUWin32AIOState;
QEMUWin32AIOState *win32_aio_init(void);
+void win32_aio_cleanup(QEMUWin32AIOState *aio);
int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile);
BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs,
QEMUWin32AIOState *aio, HANDLE hfile,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque, int type);
+void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
+ AioContext *old_context);
+void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
+ AioContext *new_context);
#endif
#endif /* QEMU_RAW_AIO_H */
diff --git a/block/raw-posix.c b/block/raw-posix.c
index b7f0f2624b..c2b30be3d3 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -307,6 +307,29 @@ static void raw_parse_flags(int bdrv_flags, int *open_flags)
}
}
+static void raw_detach_aio_context(BlockDriverState *bs)
+{
+#ifdef CONFIG_LINUX_AIO
+ BDRVRawState *s = bs->opaque;
+
+ if (s->use_aio) {
+ laio_detach_aio_context(s->aio_ctx, bdrv_get_aio_context(bs));
+ }
+#endif
+}
+
+static void raw_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context)
+{
+#ifdef CONFIG_LINUX_AIO
+ BDRVRawState *s = bs->opaque;
+
+ if (s->use_aio) {
+ laio_attach_aio_context(s->aio_ctx, new_context);
+ }
+#endif
+}
+
#ifdef CONFIG_LINUX_AIO
static int raw_set_aio(void **aio_ctx, int *use_aio, int bdrv_flags)
{
@@ -447,6 +470,8 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
}
#endif
+ raw_attach_aio_context(bs, bdrv_get_aio_context(bs));
+
ret = 0;
fail:
if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) {
@@ -1059,6 +1084,14 @@ static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
static void raw_close(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
+
+ raw_detach_aio_context(bs);
+
+#ifdef CONFIG_LINUX_AIO
+ if (s->use_aio) {
+ laio_cleanup(s->aio_ctx);
+ }
+#endif
if (s->fd >= 0) {
qemu_close(s->fd);
s->fd = -1;
@@ -1478,6 +1511,9 @@ static BlockDriver bdrv_file = {
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
+ .bdrv_detach_aio_context = raw_detach_aio_context,
+ .bdrv_attach_aio_context = raw_attach_aio_context,
+
.create_options = raw_create_options,
};
@@ -1878,6 +1914,9 @@ static BlockDriver bdrv_host_device = {
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
+ .bdrv_detach_aio_context = raw_detach_aio_context,
+ .bdrv_attach_aio_context = raw_attach_aio_context,
+
/* generic scsi device */
#ifdef __linux__
.bdrv_ioctl = hdev_ioctl,
@@ -2020,6 +2059,9 @@ static BlockDriver bdrv_host_floppy = {
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
+ .bdrv_detach_aio_context = raw_detach_aio_context,
+ .bdrv_attach_aio_context = raw_attach_aio_context,
+
/* removable device support */
.bdrv_is_inserted = floppy_is_inserted,
.bdrv_media_changed = floppy_media_changed,
@@ -2145,6 +2187,9 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
+ .bdrv_detach_aio_context = raw_detach_aio_context,
+ .bdrv_attach_aio_context = raw_attach_aio_context,
+
/* removable device support */
.bdrv_is_inserted = cdrom_is_inserted,
.bdrv_eject = cdrom_eject,
@@ -2276,6 +2321,9 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
+ .bdrv_detach_aio_context = raw_detach_aio_context,
+ .bdrv_attach_aio_context = raw_attach_aio_context,
+
/* removable device support */
.bdrv_is_inserted = cdrom_is_inserted,
.bdrv_eject = cdrom_eject,
@@ -2283,40 +2331,6 @@ static BlockDriver bdrv_host_cdrom = {
};
#endif /* __FreeBSD__ */
-#ifdef CONFIG_LINUX_AIO
-/**
- * Return the file descriptor for Linux AIO
- *
- * This function is a layering violation and should be removed when it becomes
- * possible to call the block layer outside the global mutex. It allows the
- * caller to hijack the file descriptor so I/O can be performed outside the
- * block layer.
- */
-int raw_get_aio_fd(BlockDriverState *bs)
-{
- BDRVRawState *s;
-
- if (!bs->drv) {
- return -ENOMEDIUM;
- }
-
- if (bs->drv == bdrv_find_format("raw")) {
- bs = bs->file;
- }
-
- /* raw-posix has several protocols so just check for raw_aio_readv */
- if (bs->drv->bdrv_aio_readv != raw_aio_readv) {
- return -ENOTSUP;
- }
-
- s = bs->opaque;
- if (!s->use_aio) {
- return -ENOTSUP;
- }
- return s->fd;
-}
-#endif /* CONFIG_LINUX_AIO */
-
static void bdrv_file_init(void)
{
/*
diff --git a/block/raw-win32.c b/block/raw-win32.c
index 064ea3123c..324e8187f5 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -36,8 +36,6 @@
#define FTYPE_CD 1
#define FTYPE_HARDDISK 2
-static QEMUWin32AIOState *aio;
-
typedef struct RawWin32AIOData {
BlockDriverState *bs;
HANDLE hfile;
@@ -202,6 +200,25 @@ static int set_sparse(int fd)
NULL, 0, NULL, 0, &returned, NULL);
}
+static void raw_detach_aio_context(BlockDriverState *bs)
+{
+ BDRVRawState *s = bs->opaque;
+
+ if (s->aio) {
+ win32_aio_detach_aio_context(s->aio, bdrv_get_aio_context(bs));
+ }
+}
+
+static void raw_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context)
+{
+ BDRVRawState *s = bs->opaque;
+
+ if (s->aio) {
+ win32_aio_attach_aio_context(s->aio, new_context);
+ }
+}
+
static void raw_probe_alignment(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
@@ -300,15 +317,6 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
raw_parse_flags(flags, &access_flags, &overlapped);
- if ((flags & BDRV_O_NATIVE_AIO) && aio == NULL) {
- aio = win32_aio_init();
- if (aio == NULL) {
- error_setg(errp, "Could not initialize AIO");
- ret = -EINVAL;
- goto fail;
- }
- }
-
if (filename[0] && filename[1] == ':') {
snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", filename[0]);
} else if (filename[0] == '\\' && filename[1] == '\\') {
@@ -335,13 +343,23 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
}
if (flags & BDRV_O_NATIVE_AIO) {
- ret = win32_aio_attach(aio, s->hfile);
+ s->aio = win32_aio_init();
+ if (s->aio == NULL) {
+ CloseHandle(s->hfile);
+ error_setg(errp, "Could not initialize AIO");
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ ret = win32_aio_attach(s->aio, s->hfile);
if (ret < 0) {
+ win32_aio_cleanup(s->aio);
CloseHandle(s->hfile);
error_setg_errno(errp, -ret, "Could not enable AIO");
goto fail;
}
- s->aio = aio;
+
+ win32_aio_attach_aio_context(s->aio, bdrv_get_aio_context(bs));
}
raw_probe_alignment(bs);
@@ -389,6 +407,13 @@ static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
static void raw_close(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
+
+ if (s->aio) {
+ win32_aio_detach_aio_context(s->aio, bdrv_get_aio_context(bs));
+ win32_aio_cleanup(s->aio);
+ s->aio = NULL;
+ }
+
CloseHandle(s->hfile);
if (bs->open_flags & BDRV_O_TEMPORARY) {
unlink(bs->filename);
@@ -684,6 +709,9 @@ static BlockDriver bdrv_host_device = {
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
+ .bdrv_detach_aio_context = raw_detach_aio_context,
+ .bdrv_attach_aio_context = raw_attach_aio_context,
+
.bdrv_getlength = raw_getlength,
.has_variable_length = true,
diff --git a/block/rbd.c b/block/rbd.c
index 09af48426e..93639f783c 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -555,7 +555,7 @@ static void qemu_rbd_aio_cancel(BlockDriverAIOCB *blockacb)
acb->cancelled = 1;
while (acb->status == -EINPROGRESS) {
- qemu_aio_wait();
+ aio_poll(bdrv_get_aio_context(acb->common.bs), true);
}
qemu_aio_release(acb);
@@ -588,7 +588,8 @@ static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb)
rcb->ret = rbd_aio_get_return_value(c);
rbd_aio_release(c);
- acb->bh = qemu_bh_new(rbd_finish_bh, rcb);
+ acb->bh = aio_bh_new(bdrv_get_aio_context(acb->common.bs),
+ rbd_finish_bh, rcb);
qemu_bh_schedule(acb->bh);
}
@@ -684,13 +685,16 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
}
if (r < 0) {
- goto failed;
+ goto failed_completion;
}
return &acb->common;
+failed_completion:
+ rbd_aio_release(c);
failed:
g_free(rcb);
+ qemu_vfree(acb->bounce);
qemu_aio_release(acb);
return NULL;
}
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 4ecbf5f498..1fa19399f0 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -200,6 +200,8 @@ typedef struct SheepdogInode {
uint32_t data_vdi_id[MAX_DATA_OBJS];
} SheepdogInode;
+#define SD_INODE_HEADER_SIZE offsetof(SheepdogInode, data_vdi_id)
+
/*
* 64 bit FNV-1a non-zero initial basis
*/
@@ -282,6 +284,7 @@ typedef struct AIOReq {
unsigned int data_len;
uint8_t flags;
uint32_t id;
+ bool create;
QLIST_ENTRY(AIOReq) aio_siblings;
} AIOReq;
@@ -314,6 +317,7 @@ struct SheepdogAIOCB {
typedef struct BDRVSheepdogState {
BlockDriverState *bs;
+ AioContext *aio_context;
SheepdogInode inode;
@@ -404,7 +408,7 @@ static const char * sd_strerror(int err)
static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb,
uint64_t oid, unsigned int data_len,
- uint64_t offset, uint8_t flags,
+ uint64_t offset, uint8_t flags, bool create,
uint64_t base_oid, unsigned int iov_offset)
{
AIOReq *aio_req;
@@ -418,6 +422,7 @@ static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb,
aio_req->data_len = data_len;
aio_req->flags = flags;
aio_req->id = s->aioreq_seq_num++;
+ aio_req->create = create;
acb->nr_pending++;
return aio_req;
@@ -496,7 +501,7 @@ static void sd_aio_cancel(BlockDriverAIOCB *blockacb)
sd_finish_aiocb(acb);
return;
}
- qemu_aio_wait();
+ aio_poll(s->aio_context, true);
}
}
@@ -578,6 +583,7 @@ static void restart_co_req(void *opaque)
typedef struct SheepdogReqCo {
int sockfd;
+ AioContext *aio_context;
SheepdogReq *hdr;
void *data;
unsigned int *wlen;
@@ -598,14 +604,14 @@ static coroutine_fn void do_co_req(void *opaque)
unsigned int *rlen = srco->rlen;
co = qemu_coroutine_self();
- qemu_aio_set_fd_handler(sockfd, NULL, restart_co_req, co);
+ aio_set_fd_handler(srco->aio_context, sockfd, NULL, restart_co_req, co);
ret = send_co_req(sockfd, hdr, data, wlen);
if (ret < 0) {
goto out;
}
- qemu_aio_set_fd_handler(sockfd, restart_co_req, NULL, co);
+ aio_set_fd_handler(srco->aio_context, sockfd, restart_co_req, NULL, co);
ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
if (ret != sizeof(*hdr)) {
@@ -630,18 +636,19 @@ static coroutine_fn void do_co_req(void *opaque)
out:
/* there is at most one request for this sockfd, so it is safe to
* set each handler to NULL. */
- qemu_aio_set_fd_handler(sockfd, NULL, NULL, NULL);
+ aio_set_fd_handler(srco->aio_context, sockfd, NULL, NULL, NULL);
srco->ret = ret;
srco->finished = true;
}
-static int do_req(int sockfd, SheepdogReq *hdr, void *data,
- unsigned int *wlen, unsigned int *rlen)
+static int do_req(int sockfd, AioContext *aio_context, SheepdogReq *hdr,
+ void *data, unsigned int *wlen, unsigned int *rlen)
{
Coroutine *co;
SheepdogReqCo srco = {
.sockfd = sockfd,
+ .aio_context = aio_context,
.hdr = hdr,
.data = data,
.wlen = wlen,
@@ -656,7 +663,7 @@ static int do_req(int sockfd, SheepdogReq *hdr, void *data,
co = qemu_coroutine_create(do_co_req);
qemu_coroutine_enter(co, &srco);
while (!srco.finished) {
- qemu_aio_wait();
+ aio_poll(aio_context, true);
}
}
@@ -664,8 +671,8 @@ static int do_req(int sockfd, SheepdogReq *hdr, void *data,
}
static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
- struct iovec *iov, int niov, bool create,
- enum AIOCBState aiocb_type);
+ struct iovec *iov, int niov,
+ enum AIOCBState aiocb_type);
static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req);
static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag);
static int get_sheep_fd(BDRVSheepdogState *s, Error **errp);
@@ -698,7 +705,7 @@ static void coroutine_fn send_pending_req(BDRVSheepdogState *s, uint64_t oid)
/* move aio_req from pending list to inflight one */
QLIST_REMOVE(aio_req, aio_siblings);
QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
- add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, false,
+ add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
acb->aiocb_type);
}
}
@@ -709,7 +716,7 @@ static coroutine_fn void reconnect_to_sdog(void *opaque)
BDRVSheepdogState *s = opaque;
AIOReq *aio_req, *next;
- qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL);
+ aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL);
close(s->fd);
s->fd = -1;
@@ -797,7 +804,7 @@ static void coroutine_fn aio_read_response(void *opaque)
}
idx = data_oid_to_idx(aio_req->oid);
- if (s->inode.data_vdi_id[idx] != s->inode.vdi_id) {
+ if (aio_req->create) {
/*
* If the object is newly created one, we need to update
* the vdi object (metadata object). min_dirty_data_idx
@@ -922,7 +929,7 @@ static int get_sheep_fd(BDRVSheepdogState *s, Error **errp)
return fd;
}
- qemu_aio_set_fd_handler(fd, co_read_response, NULL, s);
+ aio_set_fd_handler(s->aio_context, fd, co_read_response, NULL, s);
return fd;
}
@@ -1092,7 +1099,7 @@ static int find_vdi_name(BDRVSheepdogState *s, const char *filename,
hdr.snapid = snapid;
hdr.flags = SD_FLAG_CMD_WRITE;
- ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
+ ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
if (ret) {
error_setg_errno(errp, -ret, "cannot get vdi info");
goto out;
@@ -1117,8 +1124,8 @@ out:
}
static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
- struct iovec *iov, int niov, bool create,
- enum AIOCBState aiocb_type)
+ struct iovec *iov, int niov,
+ enum AIOCBState aiocb_type)
{
int nr_copies = s->inode.nr_copies;
SheepdogObjReq hdr;
@@ -1129,6 +1136,7 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
uint64_t offset = aio_req->offset;
uint8_t flags = aio_req->flags;
uint64_t old_oid = aio_req->base_oid;
+ bool create = aio_req->create;
if (!nr_copies) {
error_report("bug");
@@ -1173,7 +1181,8 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
qemu_co_mutex_lock(&s->lock);
s->co_send = qemu_coroutine_self();
- qemu_aio_set_fd_handler(s->fd, co_read_response, co_write_request, s);
+ aio_set_fd_handler(s->aio_context, s->fd,
+ co_read_response, co_write_request, s);
socket_set_cork(s->fd, 1);
/* send a header */
@@ -1191,12 +1200,13 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
}
out:
socket_set_cork(s->fd, 0);
- qemu_aio_set_fd_handler(s->fd, co_read_response, NULL, s);
+ aio_set_fd_handler(s->aio_context, s->fd, co_read_response, NULL, s);
s->co_send = NULL;
qemu_co_mutex_unlock(&s->lock);
}
-static int read_write_object(int fd, char *buf, uint64_t oid, uint8_t copies,
+static int read_write_object(int fd, AioContext *aio_context, char *buf,
+ uint64_t oid, uint8_t copies,
unsigned int datalen, uint64_t offset,
bool write, bool create, uint32_t cache_flags)
{
@@ -1229,7 +1239,7 @@ static int read_write_object(int fd, char *buf, uint64_t oid, uint8_t copies,
hdr.offset = offset;
hdr.copies = copies;
- ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
+ ret = do_req(fd, aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
if (ret) {
error_report("failed to send a request to the sheep");
return ret;
@@ -1244,19 +1254,23 @@ static int read_write_object(int fd, char *buf, uint64_t oid, uint8_t copies,
}
}
-static int read_object(int fd, char *buf, uint64_t oid, uint8_t copies,
+static int read_object(int fd, AioContext *aio_context, char *buf,
+ uint64_t oid, uint8_t copies,
unsigned int datalen, uint64_t offset,
uint32_t cache_flags)
{
- return read_write_object(fd, buf, oid, copies, datalen, offset, false,
+ return read_write_object(fd, aio_context, buf, oid, copies,
+ datalen, offset, false,
false, cache_flags);
}
-static int write_object(int fd, char *buf, uint64_t oid, uint8_t copies,
+static int write_object(int fd, AioContext *aio_context, char *buf,
+ uint64_t oid, uint8_t copies,
unsigned int datalen, uint64_t offset, bool create,
uint32_t cache_flags)
{
- return read_write_object(fd, buf, oid, copies, datalen, offset, true,
+ return read_write_object(fd, aio_context, buf, oid, copies,
+ datalen, offset, true,
create, cache_flags);
}
@@ -1275,7 +1289,7 @@ static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag)
return -EIO;
}
- inode = g_malloc(sizeof(s->inode));
+ inode = g_malloc(SD_INODE_HEADER_SIZE);
ret = find_vdi_name(s, s->name, snapid, tag, &vid, false, &local_err);
if (ret) {
@@ -1284,14 +1298,15 @@ static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag)
goto out;
}
- ret = read_object(fd, (char *)inode, vid_to_vdi_oid(vid),
- s->inode.nr_copies, sizeof(*inode), 0, s->cache_flags);
+ ret = read_object(fd, s->aio_context, (char *)inode, vid_to_vdi_oid(vid),
+ s->inode.nr_copies, SD_INODE_HEADER_SIZE, 0,
+ s->cache_flags);
if (ret < 0) {
goto out;
}
if (inode->vdi_id != s->inode.vdi_id) {
- memcpy(&s->inode, inode, sizeof(s->inode));
+ memcpy(&s->inode, inode, SD_INODE_HEADER_SIZE);
}
out:
@@ -1315,6 +1330,7 @@ static bool check_simultaneous_create(BDRVSheepdogState *s, AIOReq *aio_req)
DPRINTF("simultaneous create to %" PRIx64 "\n", aio_req->oid);
aio_req->flags = 0;
aio_req->base_oid = 0;
+ aio_req->create = false;
QLIST_REMOVE(aio_req, aio_siblings);
QLIST_INSERT_HEAD(&s->pending_aio_head, aio_req, aio_siblings);
return true;
@@ -1327,7 +1343,8 @@ static bool check_simultaneous_create(BDRVSheepdogState *s, AIOReq *aio_req)
static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req)
{
SheepdogAIOCB *acb = aio_req->aiocb;
- bool create = false;
+
+ aio_req->create = false;
/* check whether this request becomes a CoW one */
if (acb->aiocb_type == AIOCB_WRITE_UDATA && is_data_obj(aio_req->oid)) {
@@ -1345,20 +1362,36 @@ static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req)
aio_req->base_oid = vid_to_data_oid(s->inode.data_vdi_id[idx], idx);
aio_req->flags |= SD_FLAG_CMD_COW;
}
- create = true;
+ aio_req->create = true;
}
out:
if (is_data_obj(aio_req->oid)) {
- add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, create,
+ add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
acb->aiocb_type);
} else {
struct iovec iov;
iov.iov_base = &s->inode;
iov.iov_len = sizeof(s->inode);
- add_aio_request(s, aio_req, &iov, 1, false, AIOCB_WRITE_UDATA);
+ add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA);
}
}
+static void sd_detach_aio_context(BlockDriverState *bs)
+{
+ BDRVSheepdogState *s = bs->opaque;
+
+ aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL);
+}
+
+static void sd_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context)
+{
+ BDRVSheepdogState *s = bs->opaque;
+
+ s->aio_context = new_context;
+ aio_set_fd_handler(new_context, s->fd, co_read_response, NULL, s);
+}
+
/* TODO Convert to fine grained options */
static QemuOptsList runtime_opts = {
.name = "sheepdog",
@@ -1387,6 +1420,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
const char *filename;
s->bs = bs;
+ s->aio_context = bdrv_get_aio_context(bs);
opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -1448,8 +1482,8 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
}
buf = g_malloc(SD_INODE_SIZE);
- ret = read_object(fd, buf, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE, 0,
- s->cache_flags);
+ ret = read_object(fd, s->aio_context, buf, vid_to_vdi_oid(vid),
+ 0, SD_INODE_SIZE, 0, s->cache_flags);
closesocket(fd);
@@ -1469,7 +1503,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
g_free(buf);
return 0;
out:
- qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL);
+ aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, NULL, NULL, NULL);
if (s->fd >= 0) {
closesocket(s->fd);
}
@@ -1512,7 +1546,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,
hdr.copy_policy = s->inode.copy_policy;
hdr.copies = s->inode.nr_copies;
- ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
+ ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
closesocket(fd);
@@ -1766,7 +1800,8 @@ static void sd_close(BlockDriverState *bs)
hdr.data_length = wlen;
hdr.flags = SD_FLAG_CMD_WRITE;
- ret = do_req(fd, (SheepdogReq *)&hdr, s->name, &wlen, &rlen);
+ ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
+ s->name, &wlen, &rlen);
closesocket(fd);
@@ -1775,7 +1810,7 @@ static void sd_close(BlockDriverState *bs)
error_report("%s, %s", sd_strerror(rsp->result), s->name);
}
- qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL);
+ aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, NULL, NULL, NULL);
closesocket(s->fd);
g_free(s->host_spec);
}
@@ -1812,8 +1847,9 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)
/* we don't need to update entire object */
datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
s->inode.vdi_size = offset;
- ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
- s->inode.nr_copies, datalen, 0, false, s->cache_flags);
+ ret = write_object(fd, s->aio_context, (char *)&s->inode,
+ vid_to_vdi_oid(s->inode.vdi_id), s->inode.nr_copies,
+ datalen, 0, false, s->cache_flags);
close(fd);
if (ret < 0) {
@@ -1849,9 +1885,9 @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
iov.iov_base = &s->inode;
iov.iov_len = sizeof(s->inode);
aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
- data_len, offset, 0, 0, offset);
+ data_len, offset, 0, false, 0, offset);
QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
- add_aio_request(s, aio_req, &iov, 1, false, AIOCB_WRITE_UDATA);
+ add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA);
acb->aio_done_func = sd_finish_aiocb;
acb->aiocb_type = AIOCB_WRITE_UDATA;
@@ -1882,7 +1918,8 @@ static bool sd_delete(BDRVSheepdogState *s)
return false;
}
- ret = do_req(fd, (SheepdogReq *)&hdr, s->name, &wlen, &rlen);
+ ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
+ s->name, &wlen, &rlen);
closesocket(fd);
if (ret) {
return false;
@@ -1939,8 +1976,8 @@ static int sd_create_branch(BDRVSheepdogState *s)
goto out;
}
- ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies,
- SD_INODE_SIZE, 0, s->cache_flags);
+ ret = read_object(fd, s->aio_context, buf, vid_to_vdi_oid(vid),
+ s->inode.nr_copies, SD_INODE_SIZE, 0, s->cache_flags);
closesocket(fd);
@@ -2049,7 +2086,8 @@ static int coroutine_fn sd_co_rw_vector(void *p)
DPRINTF("new oid %" PRIx64 "\n", oid);
}
- aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, old_oid, done);
+ aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, create,
+ old_oid, done);
QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
if (create) {
@@ -2058,7 +2096,7 @@ static int coroutine_fn sd_co_rw_vector(void *p)
}
}
- add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, create,
+ add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
acb->aiocb_type);
done:
offset = 0;
@@ -2138,9 +2176,9 @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
acb->aio_done_func = sd_finish_aiocb;
aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
- 0, 0, 0, 0, 0);
+ 0, 0, 0, false, 0, 0);
QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
- add_aio_request(s, aio_req, NULL, 0, false, acb->aiocb_type);
+ add_aio_request(s, aio_req, NULL, 0, acb->aiocb_type);
qemu_coroutine_yield();
return acb->ret;
@@ -2187,8 +2225,9 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
goto cleanup;
}
- ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
- s->inode.nr_copies, datalen, 0, false, s->cache_flags);
+ ret = write_object(fd, s->aio_context, (char *)&s->inode,
+ vid_to_vdi_oid(s->inode.vdi_id), s->inode.nr_copies,
+ datalen, 0, false, s->cache_flags);
if (ret < 0) {
error_report("failed to write snapshot's inode.");
goto cleanup;
@@ -2203,8 +2242,9 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
goto cleanup;
}
- ret = read_object(fd, (char *)inode, vid_to_vdi_oid(new_vid),
- s->inode.nr_copies, datalen, 0, s->cache_flags);
+ ret = read_object(fd, s->aio_context, (char *)inode,
+ vid_to_vdi_oid(new_vid), s->inode.nr_copies, datalen, 0,
+ s->cache_flags);
if (ret < 0) {
error_report("failed to read new inode info. %s", strerror(errno));
@@ -2311,7 +2351,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
req.opcode = SD_OP_READ_VDIS;
req.data_length = max;
- ret = do_req(fd, (SheepdogReq *)&req, vdi_inuse, &wlen, &rlen);
+ ret = do_req(fd, s->aio_context, (SheepdogReq *)&req,
+ vdi_inuse, &wlen, &rlen);
closesocket(fd);
if (ret) {
@@ -2338,7 +2379,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
}
/* we don't need to read entire object */
- ret = read_object(fd, (char *)&inode, vid_to_vdi_oid(vid),
+ ret = read_object(fd, s->aio_context, (char *)&inode,
+ vid_to_vdi_oid(vid),
0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0,
s->cache_flags);
@@ -2403,11 +2445,11 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
create = (offset == 0);
if (load) {
- ret = read_object(fd, (char *)data, vmstate_oid,
+ ret = read_object(fd, s->aio_context, (char *)data, vmstate_oid,
s->inode.nr_copies, data_len, offset,
s->cache_flags);
} else {
- ret = write_object(fd, (char *)data, vmstate_oid,
+ ret = write_object(fd, s->aio_context, (char *)data, vmstate_oid,
s->inode.nr_copies, data_len, offset, create,
s->cache_flags);
}
@@ -2580,6 +2622,9 @@ static BlockDriver bdrv_sheepdog = {
.bdrv_save_vmstate = sd_save_vmstate,
.bdrv_load_vmstate = sd_load_vmstate,
+ .bdrv_detach_aio_context = sd_detach_aio_context,
+ .bdrv_attach_aio_context = sd_attach_aio_context,
+
.create_options = sd_create_options,
};
@@ -2610,6 +2655,9 @@ static BlockDriver bdrv_sheepdog_tcp = {
.bdrv_save_vmstate = sd_save_vmstate,
.bdrv_load_vmstate = sd_load_vmstate,
+ .bdrv_detach_aio_context = sd_detach_aio_context,
+ .bdrv_attach_aio_context = sd_attach_aio_context,
+
.create_options = sd_create_options,
};
@@ -2640,6 +2688,9 @@ static BlockDriver bdrv_sheepdog_unix = {
.bdrv_save_vmstate = sd_save_vmstate,
.bdrv_load_vmstate = sd_load_vmstate,
+ .bdrv_detach_aio_context = sd_detach_aio_context,
+ .bdrv_attach_aio_context = sd_attach_aio_context,
+
.create_options = sd_create_options,
};
diff --git a/block/ssh.c b/block/ssh.c
index b2129714bc..9779eac2bd 100644
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -773,7 +773,7 @@ static void restart_coroutine(void *opaque)
qemu_coroutine_enter(co, NULL);
}
-static coroutine_fn void set_fd_handler(BDRVSSHState *s)
+static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs)
{
int r;
IOHandler *rd_handler = NULL, *wr_handler = NULL;
@@ -791,24 +791,26 @@ static coroutine_fn void set_fd_handler(BDRVSSHState *s)
DPRINTF("s->sock=%d rd_handler=%p wr_handler=%p", s->sock,
rd_handler, wr_handler);
- qemu_aio_set_fd_handler(s->sock, rd_handler, wr_handler, co);
+ aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
+ rd_handler, wr_handler, co);
}
-static coroutine_fn void clear_fd_handler(BDRVSSHState *s)
+static coroutine_fn void clear_fd_handler(BDRVSSHState *s,
+ BlockDriverState *bs)
{
DPRINTF("s->sock=%d", s->sock);
- qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL);
+ aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, NULL, NULL, NULL);
}
/* A non-blocking call returned EAGAIN, so yield, ensuring the
* handlers are set up so that we'll be rescheduled when there is an
* interesting event on the socket.
*/
-static coroutine_fn void co_yield(BDRVSSHState *s)
+static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
{
- set_fd_handler(s);
+ set_fd_handler(s, bs);
qemu_coroutine_yield();
- clear_fd_handler(s);
+ clear_fd_handler(s, bs);
}
/* SFTP has a function `libssh2_sftp_seek64' which seeks to a position
@@ -838,7 +840,7 @@ static void ssh_seek(BDRVSSHState *s, int64_t offset, int flags)
}
}
-static coroutine_fn int ssh_read(BDRVSSHState *s,
+static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
int64_t offset, size_t size,
QEMUIOVector *qiov)
{
@@ -871,7 +873,7 @@ static coroutine_fn int ssh_read(BDRVSSHState *s,
DPRINTF("sftp_read returned %zd", r);
if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
- co_yield(s);
+ co_yield(s, bs);
goto again;
}
if (r < 0) {
@@ -906,14 +908,14 @@ static coroutine_fn int ssh_co_readv(BlockDriverState *bs,
int ret;
qemu_co_mutex_lock(&s->lock);
- ret = ssh_read(s, sector_num * BDRV_SECTOR_SIZE,
+ ret = ssh_read(s, bs, sector_num * BDRV_SECTOR_SIZE,
nb_sectors * BDRV_SECTOR_SIZE, qiov);
qemu_co_mutex_unlock(&s->lock);
return ret;
}
-static int ssh_write(BDRVSSHState *s,
+static int ssh_write(BDRVSSHState *s, BlockDriverState *bs,
int64_t offset, size_t size,
QEMUIOVector *qiov)
{
@@ -941,7 +943,7 @@ static int ssh_write(BDRVSSHState *s,
DPRINTF("sftp_write returned %zd", r);
if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
- co_yield(s);
+ co_yield(s, bs);
goto again;
}
if (r < 0) {
@@ -960,7 +962,7 @@ static int ssh_write(BDRVSSHState *s,
*/
if (r == 0) {
ssh_seek(s, offset + written, SSH_SEEK_WRITE|SSH_SEEK_FORCE);
- co_yield(s);
+ co_yield(s, bs);
goto again;
}
@@ -988,7 +990,7 @@ static coroutine_fn int ssh_co_writev(BlockDriverState *bs,
int ret;
qemu_co_mutex_lock(&s->lock);
- ret = ssh_write(s, sector_num * BDRV_SECTOR_SIZE,
+ ret = ssh_write(s, bs, sector_num * BDRV_SECTOR_SIZE,
nb_sectors * BDRV_SECTOR_SIZE, qiov);
qemu_co_mutex_unlock(&s->lock);
@@ -1009,7 +1011,7 @@ static void unsafe_flush_warning(BDRVSSHState *s, const char *what)
#ifdef HAS_LIBSSH2_SFTP_FSYNC
-static coroutine_fn int ssh_flush(BDRVSSHState *s)
+static coroutine_fn int ssh_flush(BDRVSSHState *s, BlockDriverState *bs)
{
int r;
@@ -1017,7 +1019,7 @@ static coroutine_fn int ssh_flush(BDRVSSHState *s)
again:
r = libssh2_sftp_fsync(s->sftp_handle);
if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
- co_yield(s);
+ co_yield(s, bs);
goto again;
}
if (r == LIBSSH2_ERROR_SFTP_PROTOCOL &&
@@ -1039,7 +1041,7 @@ static coroutine_fn int ssh_co_flush(BlockDriverState *bs)
int ret;
qemu_co_mutex_lock(&s->lock);
- ret = ssh_flush(s);
+ ret = ssh_flush(s, bs);
qemu_co_mutex_unlock(&s->lock);
return ret;
diff --git a/block/vmdk.c b/block/vmdk.c
index 2b38f61fcd..b8a476278a 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -2096,6 +2096,27 @@ static int vmdk_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
return 0;
}
+static void vmdk_detach_aio_context(BlockDriverState *bs)
+{
+ BDRVVmdkState *s = bs->opaque;
+ int i;
+
+ for (i = 0; i < s->num_extents; i++) {
+ bdrv_detach_aio_context(s->extents[i].file);
+ }
+}
+
+static void vmdk_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context)
+{
+ BDRVVmdkState *s = bs->opaque;
+ int i;
+
+ for (i = 0; i < s->num_extents; i++) {
+ bdrv_attach_aio_context(s->extents[i].file, new_context);
+ }
+}
+
static QEMUOptionParameter vmdk_create_options[] = {
{
.name = BLOCK_OPT_SIZE,
@@ -2153,6 +2174,8 @@ static BlockDriver bdrv_vmdk = {
.bdrv_get_specific_info = vmdk_get_specific_info,
.bdrv_refresh_limits = vmdk_refresh_limits,
.bdrv_get_info = vmdk_get_info,
+ .bdrv_detach_aio_context = vmdk_detach_aio_context,
+ .bdrv_attach_aio_context = vmdk_attach_aio_context,
.create_options = vmdk_create_options,
};
diff --git a/block/win32-aio.c b/block/win32-aio.c
index 5d1d199b61..8e417f70ae 100644
--- a/block/win32-aio.c
+++ b/block/win32-aio.c
@@ -40,6 +40,7 @@ struct QEMUWin32AIOState {
HANDLE hIOCP;
EventNotifier e;
int count;
+ bool is_aio_context_attached;
};
typedef struct QEMUWin32AIOCB {
@@ -114,7 +115,7 @@ static void win32_aio_cancel(BlockDriverAIOCB *blockacb)
* wait for completion.
*/
while (!HasOverlappedIoCompleted(&waiocb->ov)) {
- qemu_aio_wait();
+ aio_poll(bdrv_get_aio_context(blockacb->bs), true);
}
}
@@ -180,6 +181,20 @@ int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile)
}
}
+void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
+ AioContext *old_context)
+{
+ aio_set_event_notifier(old_context, &aio->e, NULL);
+ aio->is_aio_context_attached = false;
+}
+
+void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
+ AioContext *new_context)
+{
+ aio->is_aio_context_attached = true;
+ aio_set_event_notifier(new_context, &aio->e, win32_aio_completion_cb);
+}
+
QEMUWin32AIOState *win32_aio_init(void)
{
QEMUWin32AIOState *s;
@@ -194,8 +209,6 @@ QEMUWin32AIOState *win32_aio_init(void)
goto out_close_efd;
}
- qemu_aio_set_event_notifier(&s->e, win32_aio_completion_cb);
-
return s;
out_close_efd:
@@ -204,3 +217,11 @@ out_free_state:
g_free(s);
return NULL;
}
+
+void win32_aio_cleanup(QEMUWin32AIOState *aio)
+{
+ assert(!aio->is_aio_context_attached);
+ CloseHandle(aio->hIOCP);
+ event_notifier_cleanup(&aio->e);
+ g_free(aio);
+}
diff --git a/blockdev.c b/blockdev.c
index 9b5261b765..4cbcc56b5e 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1703,6 +1703,7 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
{
ThrottleConfig cfg;
BlockDriverState *bs;
+ AioContext *aio_context;
bs = bdrv_find(device);
if (!bs) {
@@ -1746,6 +1747,9 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
return;
}
+ aio_context = bdrv_get_aio_context(bs);
+ aio_context_acquire(aio_context);
+
if (!bs->io_limits_enabled && throttle_enabled(&cfg)) {
bdrv_io_limits_enable(bs);
} else if (bs->io_limits_enabled && !throttle_enabled(&cfg)) {
@@ -1755,6 +1759,8 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
if (bs->io_limits_enabled) {
bdrv_set_io_limits(bs, &cfg);
}
+
+ aio_context_release(aio_context);
}
int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
diff --git a/hw/block/dataplane/Makefile.objs b/hw/block/dataplane/Makefile.objs
index 9da2eb82ba..e786f66421 100644
--- a/hw/block/dataplane/Makefile.objs
+++ b/hw/block/dataplane/Makefile.objs
@@ -1 +1 @@
-obj-y += ioq.o virtio-blk.o
+obj-y += virtio-blk.o
diff --git a/hw/block/dataplane/ioq.c b/hw/block/dataplane/ioq.c
deleted file mode 100644
index f709f87ed6..0000000000
--- a/hw/block/dataplane/ioq.c
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Linux AIO request queue
- *
- * Copyright 2012 IBM, Corp.
- * Copyright 2012 Red Hat, Inc. and/or its affiliates
- *
- * Authors:
- * Stefan Hajnoczi <stefanha@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#include "ioq.h"
-
-void ioq_init(IOQueue *ioq, int fd, unsigned int max_reqs)
-{
- int rc;
-
- ioq->fd = fd;
- ioq->max_reqs = max_reqs;
-
- memset(&ioq->io_ctx, 0, sizeof ioq->io_ctx);
- rc = io_setup(max_reqs, &ioq->io_ctx);
- if (rc != 0) {
- fprintf(stderr, "ioq io_setup failed %d\n", rc);
- exit(1);
- }
-
- rc = event_notifier_init(&ioq->io_notifier, 0);
- if (rc != 0) {
- fprintf(stderr, "ioq io event notifier creation failed %d\n", rc);
- exit(1);
- }
-
- ioq->freelist = g_malloc0(sizeof ioq->freelist[0] * max_reqs);
- ioq->freelist_idx = 0;
-
- ioq->queue = g_malloc0(sizeof ioq->queue[0] * max_reqs);
- ioq->queue_idx = 0;
-}
-
-void ioq_cleanup(IOQueue *ioq)
-{
- g_free(ioq->freelist);
- g_free(ioq->queue);
-
- event_notifier_cleanup(&ioq->io_notifier);
- io_destroy(ioq->io_ctx);
-}
-
-EventNotifier *ioq_get_notifier(IOQueue *ioq)
-{
- return &ioq->io_notifier;
-}
-
-struct iocb *ioq_get_iocb(IOQueue *ioq)
-{
- /* Underflow cannot happen since ioq is sized for max_reqs */
- assert(ioq->freelist_idx != 0);
-
- struct iocb *iocb = ioq->freelist[--ioq->freelist_idx];
- ioq->queue[ioq->queue_idx++] = iocb;
- return iocb;
-}
-
-void ioq_put_iocb(IOQueue *ioq, struct iocb *iocb)
-{
- /* Overflow cannot happen since ioq is sized for max_reqs */
- assert(ioq->freelist_idx != ioq->max_reqs);
-
- ioq->freelist[ioq->freelist_idx++] = iocb;
-}
-
-struct iocb *ioq_rdwr(IOQueue *ioq, bool read, struct iovec *iov,
- unsigned int count, long long offset)
-{
- struct iocb *iocb = ioq_get_iocb(ioq);
-
- if (read) {
- io_prep_preadv(iocb, ioq->fd, iov, count, offset);
- } else {
- io_prep_pwritev(iocb, ioq->fd, iov, count, offset);
- }
- io_set_eventfd(iocb, event_notifier_get_fd(&ioq->io_notifier));
- return iocb;
-}
-
-int ioq_submit(IOQueue *ioq)
-{
- int rc = io_submit(ioq->io_ctx, ioq->queue_idx, ioq->queue);
- ioq->queue_idx = 0; /* reset */
- return rc;
-}
-
-int ioq_run_completion(IOQueue *ioq, IOQueueCompletion *completion,
- void *opaque)
-{
- struct io_event events[ioq->max_reqs];
- int nevents, i;
-
- do {
- nevents = io_getevents(ioq->io_ctx, 0, ioq->max_reqs, events, NULL);
- } while (nevents < 0 && errno == EINTR);
- if (nevents < 0) {
- return nevents;
- }
-
- for (i = 0; i < nevents; i++) {
- ssize_t ret = ((uint64_t)events[i].res2 << 32) | events[i].res;
-
- completion(events[i].obj, ret, opaque);
- ioq_put_iocb(ioq, events[i].obj);
- }
- return nevents;
-}
diff --git a/hw/block/dataplane/ioq.h b/hw/block/dataplane/ioq.h
deleted file mode 100644
index b49b5de7f4..0000000000
--- a/hw/block/dataplane/ioq.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Linux AIO request queue
- *
- * Copyright 2012 IBM, Corp.
- * Copyright 2012 Red Hat, Inc. and/or its affiliates
- *
- * Authors:
- * Stefan Hajnoczi <stefanha@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#ifndef IOQ_H
-#define IOQ_H
-
-#include <libaio.h>
-#include "qemu/event_notifier.h"
-
-typedef struct {
- int fd; /* file descriptor */
- unsigned int max_reqs; /* max length of freelist and queue */
-
- io_context_t io_ctx; /* Linux AIO context */
- EventNotifier io_notifier; /* Linux AIO eventfd */
-
- /* Requests can complete in any order so a free list is necessary to manage
- * available iocbs.
- */
- struct iocb **freelist; /* free iocbs */
- unsigned int freelist_idx;
-
- /* Multiple requests are queued up before submitting them all in one go */
- struct iocb **queue; /* queued iocbs */
- unsigned int queue_idx;
-} IOQueue;
-
-void ioq_init(IOQueue *ioq, int fd, unsigned int max_reqs);
-void ioq_cleanup(IOQueue *ioq);
-EventNotifier *ioq_get_notifier(IOQueue *ioq);
-struct iocb *ioq_get_iocb(IOQueue *ioq);
-void ioq_put_iocb(IOQueue *ioq, struct iocb *iocb);
-struct iocb *ioq_rdwr(IOQueue *ioq, bool read, struct iovec *iov,
- unsigned int count, long long offset);
-int ioq_submit(IOQueue *ioq);
-
-static inline unsigned int ioq_num_queued(IOQueue *ioq)
-{
- return ioq->queue_idx;
-}
-
-typedef void IOQueueCompletion(struct iocb *iocb, ssize_t ret, void *opaque);
-int ioq_run_completion(IOQueue *ioq, IOQueueCompletion *completion,
- void *opaque);
-
-#endif /* IOQ_H */
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index e49c2536b1..c10b7b70fb 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -17,7 +17,6 @@
#include "qemu/thread.h"
#include "qemu/error-report.h"
#include "hw/virtio/dataplane/vring.h"
-#include "ioq.h"
#include "block/block.h"
#include "hw/virtio/virtio-blk.h"
#include "virtio-blk.h"
@@ -25,20 +24,14 @@
#include "hw/virtio/virtio-bus.h"
#include "qom/object_interfaces.h"
-enum {
- SEG_MAX = 126, /* maximum number of I/O segments */
- VRING_MAX = SEG_MAX + 2, /* maximum number of vring descriptors */
- REQ_MAX = VRING_MAX, /* maximum number of requests in the vring,
- * is VRING_MAX / 2 with traditional and
- * VRING_MAX with indirect descriptors */
-};
-
typedef struct {
- struct iocb iocb; /* Linux AIO control block */
+ VirtIOBlockDataPlane *s;
QEMUIOVector *inhdr; /* iovecs for virtio_blk_inhdr */
VirtQueueElement *elem; /* saved data from the virtqueue */
- struct iovec *bounce_iov; /* used if guest buffers are unaligned */
- QEMUIOVector *read_qiov; /* for read completion /w bounce buffer */
+ QEMUIOVector qiov; /* original request iovecs */
+ struct iovec bounce_iov; /* used if guest buffers are unaligned */
+ QEMUIOVector bounce_qiov; /* bounce buffer iovecs */
+ bool read; /* read or write? */
} VirtIOBlockRequest;
struct VirtIOBlockDataPlane {
@@ -47,7 +40,6 @@ struct VirtIOBlockDataPlane {
bool stopping;
VirtIOBlkConf *blk;
- int fd; /* image file descriptor */
VirtIODevice *vdev;
Vring vring; /* virtqueue vring */
@@ -61,16 +53,8 @@ struct VirtIOBlockDataPlane {
IOThread *iothread;
IOThread internal_iothread_obj;
AioContext *ctx;
- EventNotifier io_notifier; /* Linux AIO completion */
EventNotifier host_notifier; /* doorbell */
- IOQueue ioqueue; /* Linux AIO queue (should really be per
- IOThread) */
- VirtIOBlockRequest requests[REQ_MAX]; /* pool of requests, managed by the
- queue */
-
- unsigned int num_reqs;
-
/* Operation blocker on BDS */
Error *blocker;
};
@@ -85,33 +69,28 @@ static void notify_guest(VirtIOBlockDataPlane *s)
event_notifier_set(s->guest_notifier);
}
-static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque)
+static void complete_rdwr(void *opaque, int ret)
{
- VirtIOBlockDataPlane *s = opaque;
- VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb);
+ VirtIOBlockRequest *req = opaque;
struct virtio_blk_inhdr hdr;
int len;
- if (likely(ret >= 0)) {
+ if (likely(ret == 0)) {
hdr.status = VIRTIO_BLK_S_OK;
- len = ret;
+ len = req->qiov.size;
} else {
hdr.status = VIRTIO_BLK_S_IOERR;
len = 0;
}
- trace_virtio_blk_data_plane_complete_request(s, req->elem->index, ret);
+ trace_virtio_blk_data_plane_complete_request(req->s, req->elem->index, ret);
- if (req->read_qiov) {
- assert(req->bounce_iov);
- qemu_iovec_from_buf(req->read_qiov, 0, req->bounce_iov->iov_base, len);
- qemu_iovec_destroy(req->read_qiov);
- g_slice_free(QEMUIOVector, req->read_qiov);
+ if (req->read && req->bounce_iov.iov_base) {
+ qemu_iovec_from_buf(&req->qiov, 0, req->bounce_iov.iov_base, len);
}
- if (req->bounce_iov) {
- qemu_vfree(req->bounce_iov->iov_base);
- g_slice_free(struct iovec, req->bounce_iov);
+ if (req->bounce_iov.iov_base) {
+ qemu_vfree(req->bounce_iov.iov_base);
}
qemu_iovec_from_buf(req->inhdr, 0, &hdr, sizeof(hdr));
@@ -122,9 +101,9 @@ static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque)
* written to, but for virtio-blk it seems to be the number of bytes
* transferred plus the status bytes.
*/
- vring_push(&s->vring, req->elem, len + sizeof(hdr));
- req->elem = NULL;
- s->num_reqs--;
+ vring_push(&req->s->vring, req->elem, len + sizeof(hdr));
+ notify_guest(req->s);
+ g_slice_free(VirtIOBlockRequest, req);
}
static void complete_request_early(VirtIOBlockDataPlane *s, VirtQueueElement *elem,
@@ -155,51 +134,87 @@ static void do_get_id_cmd(VirtIOBlockDataPlane *s,
complete_request_early(s, elem, inhdr, VIRTIO_BLK_S_OK);
}
-static int do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read,
- struct iovec *iov, unsigned iov_cnt,
- long long offset, VirtQueueElement *elem,
- QEMUIOVector *inhdr)
+static void do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read,
+ struct iovec *iov, unsigned iov_cnt,
+ int64_t sector_num, VirtQueueElement *elem,
+ QEMUIOVector *inhdr)
{
- struct iocb *iocb;
- QEMUIOVector qiov;
- struct iovec *bounce_iov = NULL;
- QEMUIOVector *read_qiov = NULL;
-
- qemu_iovec_init_external(&qiov, iov, iov_cnt);
- if (!bdrv_qiov_is_aligned(s->blk->conf.bs, &qiov)) {
- void *bounce_buffer = qemu_blockalign(s->blk->conf.bs, qiov.size);
-
- if (read) {
- /* Need to copy back from bounce buffer on completion */
- read_qiov = g_slice_new(QEMUIOVector);
- qemu_iovec_init(read_qiov, iov_cnt);
- qemu_iovec_concat_iov(read_qiov, iov, iov_cnt, 0, qiov.size);
- } else {
- qemu_iovec_to_buf(&qiov, 0, bounce_buffer, qiov.size);
+ VirtIOBlockRequest *req = g_slice_new0(VirtIOBlockRequest);
+ QEMUIOVector *qiov;
+ int nb_sectors;
+
+ /* Fill in virtio block metadata needed for completion */
+ req->s = s;
+ req->elem = elem;
+ req->inhdr = inhdr;
+ req->read = read;
+ qemu_iovec_init_external(&req->qiov, iov, iov_cnt);
+
+ qiov = &req->qiov;
+
+ if (!bdrv_qiov_is_aligned(s->blk->conf.bs, qiov)) {
+ void *bounce_buffer = qemu_blockalign(s->blk->conf.bs, qiov->size);
+
+ /* Populate bounce buffer with data for writes */
+ if (!read) {
+ qemu_iovec_to_buf(qiov, 0, bounce_buffer, qiov->size);
}
/* Redirect I/O to aligned bounce buffer */
- bounce_iov = g_slice_new(struct iovec);
- bounce_iov->iov_base = bounce_buffer;
- bounce_iov->iov_len = qiov.size;
- iov = bounce_iov;
- iov_cnt = 1;
+ req->bounce_iov.iov_base = bounce_buffer;
+ req->bounce_iov.iov_len = qiov->size;
+ qemu_iovec_init_external(&req->bounce_qiov, &req->bounce_iov, 1);
+ qiov = &req->bounce_qiov;
}
- iocb = ioq_rdwr(&s->ioqueue, read, iov, iov_cnt, offset);
+ nb_sectors = qiov->size / BDRV_SECTOR_SIZE;
- /* Fill in virtio block metadata needed for completion */
- VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb);
+ if (read) {
+ bdrv_aio_readv(s->blk->conf.bs, sector_num, qiov, nb_sectors,
+ complete_rdwr, req);
+ } else {
+ bdrv_aio_writev(s->blk->conf.bs, sector_num, qiov, nb_sectors,
+ complete_rdwr, req);
+ }
+}
+
+static void complete_flush(void *opaque, int ret)
+{
+ VirtIOBlockRequest *req = opaque;
+ unsigned char status;
+
+ if (ret == 0) {
+ status = VIRTIO_BLK_S_OK;
+ } else {
+ status = VIRTIO_BLK_S_IOERR;
+ }
+
+ complete_request_early(req->s, req->elem, req->inhdr, status);
+ g_slice_free(VirtIOBlockRequest, req);
+}
+
+static void do_flush_cmd(VirtIOBlockDataPlane *s, VirtQueueElement *elem,
+ QEMUIOVector *inhdr)
+{
+ VirtIOBlockRequest *req = g_slice_new(VirtIOBlockRequest);
+ req->s = s;
req->elem = elem;
req->inhdr = inhdr;
- req->bounce_iov = bounce_iov;
- req->read_qiov = read_qiov;
- return 0;
+
+ bdrv_aio_flush(s->blk->conf.bs, complete_flush, req);
+}
+
+static void do_scsi_cmd(VirtIOBlockDataPlane *s, VirtQueueElement *elem,
+ QEMUIOVector *inhdr)
+{
+ int status;
+
+ status = virtio_blk_handle_scsi_req(VIRTIO_BLK(s->vdev), elem);
+ complete_request_early(s, elem, inhdr, status);
}
-static int process_request(IOQueue *ioq, VirtQueueElement *elem)
+static int process_request(VirtIOBlockDataPlane *s, VirtQueueElement *elem)
{
- VirtIOBlockDataPlane *s = container_of(ioq, VirtIOBlockDataPlane, ioqueue);
struct iovec *iov = elem->out_sg;
struct iovec *in_iov = elem->in_sg;
unsigned out_num = elem->out_num;
@@ -234,25 +249,23 @@ static int process_request(IOQueue *ioq, VirtQueueElement *elem)
switch (outhdr.type) {
case VIRTIO_BLK_T_IN:
- do_rdwr_cmd(s, true, in_iov, in_num, outhdr.sector * 512, elem, inhdr);
+ do_rdwr_cmd(s, true, in_iov, in_num,
+ outhdr.sector * 512 / BDRV_SECTOR_SIZE,
+ elem, inhdr);
return 0;
case VIRTIO_BLK_T_OUT:
- do_rdwr_cmd(s, false, iov, out_num, outhdr.sector * 512, elem, inhdr);
+ do_rdwr_cmd(s, false, iov, out_num,
+ outhdr.sector * 512 / BDRV_SECTOR_SIZE,
+ elem, inhdr);
return 0;
case VIRTIO_BLK_T_SCSI_CMD:
- /* TODO support SCSI commands */
- complete_request_early(s, elem, inhdr, VIRTIO_BLK_S_UNSUPP);
+ do_scsi_cmd(s, elem, inhdr);
return 0;
case VIRTIO_BLK_T_FLUSH:
- /* TODO fdsync not supported by Linux AIO, do it synchronously here! */
- if (qemu_fdatasync(s->fd) < 0) {
- complete_request_early(s, elem, inhdr, VIRTIO_BLK_S_IOERR);
- } else {
- complete_request_early(s, elem, inhdr, VIRTIO_BLK_S_OK);
- }
+ do_flush_cmd(s, elem, inhdr);
return 0;
case VIRTIO_BLK_T_GET_ID:
@@ -274,7 +287,6 @@ static void handle_notify(EventNotifier *e)
VirtQueueElement *elem;
int ret;
- unsigned int num_queued;
event_notifier_test_and_clear(&s->host_notifier);
for (;;) {
@@ -291,7 +303,7 @@ static void handle_notify(EventNotifier *e)
trace_virtio_blk_data_plane_process_request(s, elem->out_num,
elem->in_num, elem->index);
- if (process_request(&s->ioqueue, elem) < 0) {
+ if (process_request(s, elem) < 0) {
vring_set_broken(&s->vring);
vring_free_element(elem);
ret = -EFAULT;
@@ -306,44 +318,10 @@ static void handle_notify(EventNotifier *e)
if (vring_enable_notification(s->vdev, &s->vring)) {
break;
}
- } else { /* ret == -ENOBUFS or fatal error, iovecs[] is depleted */
- /* Since there are no iovecs[] left, stop processing for now. Do
- * not re-enable guest->host notifies since the I/O completion
- * handler knows to check for more vring descriptors anyway.
- */
+ } else { /* fatal error */
break;
}
}
-
- num_queued = ioq_num_queued(&s->ioqueue);
- if (num_queued > 0) {
- s->num_reqs += num_queued;
-
- int rc = ioq_submit(&s->ioqueue);
- if (unlikely(rc < 0)) {
- fprintf(stderr, "ioq_submit failed %d\n", rc);
- exit(1);
- }
- }
-}
-
-static void handle_io(EventNotifier *e)
-{
- VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane,
- io_notifier);
-
- event_notifier_test_and_clear(&s->io_notifier);
- if (ioq_run_completion(&s->ioqueue, complete_request, s) > 0) {
- notify_guest(s);
- }
-
- /* If there were more requests than iovecs, the vring will not be empty yet
- * so check again. There should now be enough resources to process more
- * requests.
- */
- if (unlikely(vring_more_avail(&s->vring))) {
- handle_notify(&s->host_notifier);
- }
}
/* Context: QEMU global mutex held */
@@ -352,7 +330,6 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk,
Error **errp)
{
VirtIOBlockDataPlane *s;
- int fd;
Error *local_err = NULL;
*dataplane = NULL;
@@ -361,18 +338,6 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk,
return;
}
- if (blk->scsi) {
- error_setg(errp,
- "device is incompatible with x-data-plane, use scsi=off");
- return;
- }
-
- if (blk->config_wce) {
- error_setg(errp, "device is incompatible with x-data-plane, "
- "use config-wce=off");
- return;
- }
-
/* If dataplane is (re-)enabled while the guest is running there could be
* block jobs that can conflict.
*/
@@ -383,16 +348,8 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk,
return;
}
- fd = raw_get_aio_fd(blk->conf.bs);
- if (fd < 0) {
- error_setg(errp, "drive is incompatible with x-data-plane, "
- "use format=raw,cache=none,aio=native");
- return;
- }
-
s = g_new0(VirtIOBlockDataPlane, 1);
s->vdev = vdev;
- s->fd = fd;
s->blk = blk;
if (blk->iothread) {
@@ -437,7 +394,6 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s->vdev)));
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
VirtQueue *vq;
- int i;
if (s->started) {
return;
@@ -470,24 +426,18 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
}
s->host_notifier = *virtio_queue_get_host_notifier(vq);
- /* Set up ioqueue */
- ioq_init(&s->ioqueue, s->fd, REQ_MAX);
- for (i = 0; i < ARRAY_SIZE(s->requests); i++) {
- ioq_put_iocb(&s->ioqueue, &s->requests[i].iocb);
- }
- s->io_notifier = *ioq_get_notifier(&s->ioqueue);
-
s->starting = false;
s->started = true;
trace_virtio_blk_data_plane_start(s);
+ bdrv_set_aio_context(s->blk->conf.bs, s->ctx);
+
/* Kick right away to begin processing requests already in vring */
event_notifier_set(virtio_queue_get_host_notifier(vq));
/* Get this show started by hooking up our callbacks */
aio_context_acquire(s->ctx);
aio_set_event_notifier(s->ctx, &s->host_notifier, handle_notify);
- aio_set_event_notifier(s->ctx, &s->io_notifier, handle_io);
aio_context_release(s->ctx);
}
@@ -507,13 +457,8 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s)
/* Stop notifications for new requests from guest */
aio_set_event_notifier(s->ctx, &s->host_notifier, NULL);
- /* Complete pending requests */
- while (s->num_reqs > 0) {
- aio_poll(s->ctx, true);
- }
-
- /* Stop ioq callbacks (there are no pending requests left) */
- aio_set_event_notifier(s->ctx, &s->io_notifier, NULL);
+ /* Drain and switch bs back to the QEMU main loop */
+ bdrv_set_aio_context(s->blk->conf.bs, qemu_get_aio_context());
aio_context_release(s->ctx);
@@ -522,7 +467,6 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s)
*/
vring_teardown(&s->vring, s->vdev, 0);
- ioq_cleanup(&s->ioqueue);
k->set_host_notifier(qbus->parent, 0, false);
/* Clean up guest notifier (irq) */
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index b1fc1de0dc..85aa8715ba 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -33,7 +33,6 @@ typedef struct VirtIOBlockReq
VirtQueueElement elem;
struct virtio_blk_inhdr *in;
struct virtio_blk_outhdr *out;
- struct virtio_scsi_inhdr *scsi;
QEMUIOVector qiov;
struct VirtIOBlockReq *next;
BlockAcctCookie acct;
@@ -125,13 +124,15 @@ static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s)
return req;
}
-static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
+int virtio_blk_handle_scsi_req(VirtIOBlock *blk,
+ VirtQueueElement *elem)
{
+ int status = VIRTIO_BLK_S_OK;
+ struct virtio_scsi_inhdr *scsi = NULL;
#ifdef __linux__
- int ret;
int i;
+ struct sg_io_hdr hdr;
#endif
- int status = VIRTIO_BLK_S_OK;
/*
* We require at least one output segment each for the virtio_blk_outhdr
@@ -140,19 +141,18 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
* We also at least require the virtio_blk_inhdr, the virtio_scsi_inhdr
* and the sense buffer pointer in the input segments.
*/
- if (req->elem.out_num < 2 || req->elem.in_num < 3) {
- virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
- g_free(req);
- return;
+ if (elem->out_num < 2 || elem->in_num < 3) {
+ status = VIRTIO_BLK_S_IOERR;
+ goto fail;
}
/*
* The scsi inhdr is placed in the second-to-last input segment, just
* before the regular inhdr.
*/
- req->scsi = (void *)req->elem.in_sg[req->elem.in_num - 2].iov_base;
+ scsi = (void *)elem->in_sg[elem->in_num - 2].iov_base;
- if (!req->dev->blk.scsi) {
+ if (!blk->blk.scsi) {
status = VIRTIO_BLK_S_UNSUPP;
goto fail;
}
@@ -160,43 +160,42 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
/*
* No support for bidirection commands yet.
*/
- if (req->elem.out_num > 2 && req->elem.in_num > 3) {
+ if (elem->out_num > 2 && elem->in_num > 3) {
status = VIRTIO_BLK_S_UNSUPP;
goto fail;
}
#ifdef __linux__
- struct sg_io_hdr hdr;
memset(&hdr, 0, sizeof(struct sg_io_hdr));
hdr.interface_id = 'S';
- hdr.cmd_len = req->elem.out_sg[1].iov_len;
- hdr.cmdp = req->elem.out_sg[1].iov_base;
+ hdr.cmd_len = elem->out_sg[1].iov_len;
+ hdr.cmdp = elem->out_sg[1].iov_base;
hdr.dxfer_len = 0;
- if (req->elem.out_num > 2) {
+ if (elem->out_num > 2) {
/*
* If there are more than the minimally required 2 output segments
* there is write payload starting from the third iovec.
*/
hdr.dxfer_direction = SG_DXFER_TO_DEV;
- hdr.iovec_count = req->elem.out_num - 2;
+ hdr.iovec_count = elem->out_num - 2;
for (i = 0; i < hdr.iovec_count; i++)
- hdr.dxfer_len += req->elem.out_sg[i + 2].iov_len;
+ hdr.dxfer_len += elem->out_sg[i + 2].iov_len;
- hdr.dxferp = req->elem.out_sg + 2;
+ hdr.dxferp = elem->out_sg + 2;
- } else if (req->elem.in_num > 3) {
+ } else if (elem->in_num > 3) {
/*
* If we have more than 3 input segments the guest wants to actually
* read data.
*/
hdr.dxfer_direction = SG_DXFER_FROM_DEV;
- hdr.iovec_count = req->elem.in_num - 3;
+ hdr.iovec_count = elem->in_num - 3;
for (i = 0; i < hdr.iovec_count; i++)
- hdr.dxfer_len += req->elem.in_sg[i].iov_len;
+ hdr.dxfer_len += elem->in_sg[i].iov_len;
- hdr.dxferp = req->elem.in_sg;
+ hdr.dxferp = elem->in_sg;
} else {
/*
* Some SCSI commands don't actually transfer any data.
@@ -204,11 +203,11 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
hdr.dxfer_direction = SG_DXFER_NONE;
}
- hdr.sbp = req->elem.in_sg[req->elem.in_num - 3].iov_base;
- hdr.mx_sb_len = req->elem.in_sg[req->elem.in_num - 3].iov_len;
+ hdr.sbp = elem->in_sg[elem->in_num - 3].iov_base;
+ hdr.mx_sb_len = elem->in_sg[elem->in_num - 3].iov_len;
- ret = bdrv_ioctl(req->dev->bs, SG_IO, &hdr);
- if (ret) {
+ status = bdrv_ioctl(blk->bs, SG_IO, &hdr);
+ if (status) {
status = VIRTIO_BLK_S_UNSUPP;
goto fail;
}
@@ -224,23 +223,31 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
hdr.status = CHECK_CONDITION;
}
- stl_p(&req->scsi->errors,
+ stl_p(&scsi->errors,
hdr.status | (hdr.msg_status << 8) |
(hdr.host_status << 16) | (hdr.driver_status << 24));
- stl_p(&req->scsi->residual, hdr.resid);
- stl_p(&req->scsi->sense_len, hdr.sb_len_wr);
- stl_p(&req->scsi->data_len, hdr.dxfer_len);
+ stl_p(&scsi->residual, hdr.resid);
+ stl_p(&scsi->sense_len, hdr.sb_len_wr);
+ stl_p(&scsi->data_len, hdr.dxfer_len);
- virtio_blk_req_complete(req, status);
- g_free(req);
- return;
+ return status;
#else
abort();
#endif
fail:
/* Just put anything nonzero so that the ioctl fails in the guest. */
- stl_p(&req->scsi->errors, 255);
+ if (scsi) {
+ stl_p(&scsi->errors, 255);
+ }
+ return status;
+}
+
+static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
+{
+ int status;
+
+ status = virtio_blk_handle_scsi_req(req->dev, &req->elem);
virtio_blk_req_complete(req, status);
g_free(req);
}
@@ -523,7 +530,10 @@ static void virtio_blk_set_config(VirtIODevice *vdev, const uint8_t *config)
struct virtio_blk_config blkcfg;
memcpy(&blkcfg, config, sizeof(blkcfg));
+
+ aio_context_acquire(bdrv_get_aio_context(s->bs));
bdrv_set_enable_write_cache(s->bs, blkcfg.wce != 0);
+ aio_context_release(bdrv_get_aio_context(s->bs));
}
static uint32_t virtio_blk_get_features(VirtIODevice *vdev, uint32_t features)
@@ -582,7 +592,10 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status)
* s->bs would erroneously be placed in writethrough mode.
*/
if (!(features & (1 << VIRTIO_BLK_F_CONFIG_WCE))) {
- bdrv_set_enable_write_cache(s->bs, !!(features & (1 << VIRTIO_BLK_F_WCE)));
+ aio_context_acquire(bdrv_get_aio_context(s->bs));
+ bdrv_set_enable_write_cache(s->bs,
+ !!(features & (1 << VIRTIO_BLK_F_WCE)));
+ aio_context_release(bdrv_get_aio_context(s->bs));
}
}
diff --git a/include/block/block.h b/include/block/block.h
index faee3aa246..7d86e29cf4 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -481,15 +481,6 @@ void bdrv_op_block_all(BlockDriverState *bs, Error *reason);
void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason);
bool bdrv_op_blocker_is_empty(BlockDriverState *bs);
-#ifdef CONFIG_LINUX_AIO
-int raw_get_aio_fd(BlockDriverState *bs);
-#else
-static inline int raw_get_aio_fd(BlockDriverState *bs)
-{
- return -ENOTSUP;
-}
-#endif
-
enum BlockAcctType {
BDRV_ACCT_READ,
BDRV_ACCT_WRITE,
@@ -574,4 +565,22 @@ int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag);
int bdrv_debug_resume(BlockDriverState *bs, const char *tag);
bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag);
+/**
+ * bdrv_get_aio_context:
+ *
+ * Returns: the currently bound #AioContext
+ */
+AioContext *bdrv_get_aio_context(BlockDriverState *bs);
+
+/**
+ * bdrv_set_aio_context:
+ *
+ * Changes the #AioContext used for fd handlers, timers, and BHs by this
+ * BlockDriverState and all its children.
+ *
+ * This function must be called from the old #AioContext or with a lock held so
+ * the old #AioContext is not executing.
+ */
+void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context);
+
#endif
diff --git a/include/block/block_int.h b/include/block/block_int.h
index f2e753f632..8d58334c1d 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -247,6 +247,19 @@ struct BlockDriver {
*/
int (*bdrv_has_zero_init)(BlockDriverState *bs);
+ /* Remove fd handlers, timers, and other event loop callbacks so the event
+ * loop is no longer in use. Called with no in-flight requests and in
+ * depth-first traversal order with parents before child nodes.
+ */
+ void (*bdrv_detach_aio_context)(BlockDriverState *bs);
+
+ /* Add fd handlers, timers, and other event loop callbacks so I/O requests
+ * can be processed again. Called with no in-flight requests and in
+ * depth-first traversal order with child nodes before parent nodes.
+ */
+ void (*bdrv_attach_aio_context)(BlockDriverState *bs,
+ AioContext *new_context);
+
QLIST_ENTRY(BlockDriver) list;
};
@@ -297,6 +310,8 @@ struct BlockDriverState {
const BlockDevOps *dev_ops;
void *dev_opaque;
+ AioContext *aio_context; /* event loop used for fd handlers, timers, etc */
+
char filename[1024];
char backing_file[1024]; /* if non zero, the image is a diff of
this file image */
@@ -390,11 +405,25 @@ void bdrv_add_before_write_notifier(BlockDriverState *bs,
NotifierWithReturn *notifier);
/**
- * bdrv_get_aio_context:
+ * bdrv_detach_aio_context:
+ *
+ * May be called from .bdrv_detach_aio_context() to detach children from the
+ * current #AioContext. This is only needed by block drivers that manage their
+ * own children. Both ->file and ->backing_hd are automatically handled and
+ * block drivers should not call this function on them explicitly.
+ */
+void bdrv_detach_aio_context(BlockDriverState *bs);
+
+/**
+ * bdrv_attach_aio_context:
*
- * Returns: the currently bound #AioContext
+ * May be called from .bdrv_attach_aio_context() to attach children to the new
+ * #AioContext. This is only needed by block drivers that manage their own
+ * children. Both ->file and ->backing_hd are automatically handled and block
+ * drivers should not call this function on them explicitly.
*/
-AioContext *bdrv_get_aio_context(BlockDriverState *bs);
+void bdrv_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context);
#ifdef _WIN32
int is_windows_drive(const char *filename);
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index e4c41ff2ef..4bc9b549ad 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -155,4 +155,7 @@ typedef struct VirtIOBlock {
void virtio_blk_set_conf(DeviceState *dev, VirtIOBlkConf *blk);
+int virtio_blk_handle_scsi_req(VirtIOBlock *blk,
+ VirtQueueElement *elem);
+
#endif
diff --git a/include/qemu/throttle.h b/include/qemu/throttle.h
index ab29b0b918..b890613a9c 100644
--- a/include/qemu/throttle.h
+++ b/include/qemu/throttle.h
@@ -67,6 +67,11 @@ typedef struct ThrottleState {
int64_t previous_leak; /* timestamp of the last leak done */
QEMUTimer * timers[2]; /* timers used to do the throttling */
QEMUClockType clock_type; /* the clock used */
+
+ /* Callbacks */
+ QEMUTimerCB *read_timer_cb;
+ QEMUTimerCB *write_timer_cb;
+ void *timer_opaque;
} ThrottleState;
/* operations on single leaky buckets */
@@ -82,6 +87,7 @@ bool throttle_compute_timer(ThrottleState *ts,
/* init/destroy cycle */
void throttle_init(ThrottleState *ts,
+ AioContext *aio_context,
QEMUClockType clock_type,
void (read_timer)(void *),
void (write_timer)(void *),
@@ -89,6 +95,10 @@ void throttle_init(ThrottleState *ts,
void throttle_destroy(ThrottleState *ts);
+void throttle_detach_aio_context(ThrottleState *ts);
+
+void throttle_attach_aio_context(ThrottleState *ts, AioContext *new_context);
+
bool throttle_have_timer(ThrottleState *ts);
/* configuration */
diff --git a/qapi-schema.json b/qapi-schema.json
index 7bc33ea717..14b498b442 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2,32 +2,11 @@
#
# QAPI Schema
-##
-# @ErrorClass
-#
-# QEMU error classes
-#
-# @GenericError: this is used for errors that don't require a specific error
-# class. This should be the default case for most errors
-#
-# @CommandNotFound: the requested command has not been found
-#
-# @DeviceEncrypted: the requested operation can't be fulfilled because the
-# selected device is encrypted
-#
-# @DeviceNotActive: a device has failed to be become active
-#
-# @DeviceNotFound: the requested device has not been found
-#
-# @KVMMissingCap: the requested operation can't be fulfilled because a
-# required KVM capability is missing
-#
-# Since: 1.2
-##
-{ 'enum': 'ErrorClass',
- 'data': [ 'GenericError', 'CommandNotFound', 'DeviceEncrypted',
- 'DeviceNotActive', 'DeviceNotFound', 'KVMMissingCap' ] }
+# QAPI common definitions
+{ 'include': 'qapi/common.json' }
+# QAPI block definitions
+{ 'include': 'qapi/block.json' }
##
# LostTickPolicy:
@@ -53,40 +32,6 @@
{ 'enum': 'LostTickPolicy',
'data': ['discard', 'delay', 'merge', 'slew' ] }
-##
-# BiosAtaTranslation:
-#
-# Policy that BIOS should use to interpret cylinder/head/sector
-# addresses. Note that Bochs BIOS and SeaBIOS will not actually
-# translate logical CHS to physical; instead, they will use logical
-# block addressing.
-#
-# @auto: If cylinder/heads/sizes are passed, choose between none and LBA
-# depending on the size of the disk. If they are not passed,
-# choose none if QEMU can guess that the disk had 16 or fewer
-# heads, large if QEMU can guess that the disk had 131072 or
-# fewer tracks across all heads (i.e. cylinders*heads<131072),
-# otherwise LBA.
-#
-# @none: The physical disk geometry is equal to the logical geometry.
-#
-# @lba: Assume 63 sectors per track and one of 16, 32, 64, 128 or 255
-# heads (if fewer than 255 are enough to cover the whole disk
-# with 1024 cylinders/head). The number of cylinders/head is
-# then computed based on the number of sectors and heads.
-#
-# @large: The number of cylinders per head is scaled down to 1024
-# by correspondingly scaling up the number of heads.
-#
-# @rechs: Same as @large, but first convert a 16-head geometry to
-# 15-head, by proportionally scaling up the number of
-# cylinders/head.
-#
-# Since: 2.0
-##
-{ 'enum': 'BiosAtaTranslation',
- 'data': ['auto', 'none', 'lba', 'large', 'rechs']}
-
# @add_client
#
# Allow client connections for VNC, Spice and socket based
@@ -134,43 +79,6 @@
{ 'command': 'query-name', 'returns': 'NameInfo' }
##
-# @VersionInfo:
-#
-# A description of QEMU's version.
-#
-# @qemu.major: The major version of QEMU
-#
-# @qemu.minor: The minor version of QEMU
-#
-# @qemu.micro: The micro version of QEMU. By current convention, a micro
-# version of 50 signifies a development branch. A micro version
-# greater than or equal to 90 signifies a release candidate for
-# the next minor version. A micro version of less than 50
-# signifies a stable release.
-#
-# @package: QEMU will always set this field to an empty string. Downstream
-# versions of QEMU should set this to a non-empty string. The
-# exact format depends on the downstream however it highly
-# recommended that a unique name is used.
-#
-# Since: 0.14.0
-##
-{ 'type': 'VersionInfo',
- 'data': {'qemu': {'major': 'int', 'minor': 'int', 'micro': 'int'},
- 'package': 'str'} }
-
-##
-# @query-version:
-#
-# Returns the current version of QEMU.
-#
-# Returns: A @VersionInfo object describing the current version of QEMU.
-#
-# Since: 0.14.0
-##
-{ 'command': 'query-version', 'returns': 'VersionInfo' }
-
-##
# @KvmInfo:
#
# Information about support for KVM acceleration
@@ -242,179 +150,6 @@
'guest-panicked' ] }
##
-# @SnapshotInfo
-#
-# @id: unique snapshot id
-#
-# @name: user chosen name
-#
-# @vm-state-size: size of the VM state
-#
-# @date-sec: UTC date of the snapshot in seconds
-#
-# @date-nsec: fractional part in nano seconds to be used with date-sec
-#
-# @vm-clock-sec: VM clock relative to boot in seconds
-#
-# @vm-clock-nsec: fractional part in nano seconds to be used with vm-clock-sec
-#
-# Since: 1.3
-#
-##
-
-{ 'type': 'SnapshotInfo',
- 'data': { 'id': 'str', 'name': 'str', 'vm-state-size': 'int',
- 'date-sec': 'int', 'date-nsec': 'int',
- 'vm-clock-sec': 'int', 'vm-clock-nsec': 'int' } }
-
-##
-# @ImageInfoSpecificQCow2:
-#
-# @compat: compatibility level
-#
-# @lazy-refcounts: #optional on or off; only valid for compat >= 1.1
-#
-# Since: 1.7
-##
-{ 'type': 'ImageInfoSpecificQCow2',
- 'data': {
- 'compat': 'str',
- '*lazy-refcounts': 'bool'
- } }
-
-##
-# @ImageInfoSpecificVmdk:
-#
-# @create-type: The create type of VMDK image
-#
-# @cid: Content id of image
-#
-# @parent-cid: Parent VMDK image's cid
-#
-# @extents: List of extent files
-#
-# Since: 1.7
-##
-{ 'type': 'ImageInfoSpecificVmdk',
- 'data': {
- 'create-type': 'str',
- 'cid': 'int',
- 'parent-cid': 'int',
- 'extents': ['ImageInfo']
- } }
-
-##
-# @ImageInfoSpecific:
-#
-# A discriminated record of image format specific information structures.
-#
-# Since: 1.7
-##
-
-{ 'union': 'ImageInfoSpecific',
- 'data': {
- 'qcow2': 'ImageInfoSpecificQCow2',
- 'vmdk': 'ImageInfoSpecificVmdk'
- } }
-
-##
-# @ImageInfo:
-#
-# Information about a QEMU image file
-#
-# @filename: name of the image file
-#
-# @format: format of the image file
-#
-# @virtual-size: maximum capacity in bytes of the image
-#
-# @actual-size: #optional actual size on disk in bytes of the image
-#
-# @dirty-flag: #optional true if image is not cleanly closed
-#
-# @cluster-size: #optional size of a cluster in bytes
-#
-# @encrypted: #optional true if the image is encrypted
-#
-# @compressed: #optional true if the image is compressed (Since 1.7)
-#
-# @backing-filename: #optional name of the backing file
-#
-# @full-backing-filename: #optional full path of the backing file
-#
-# @backing-filename-format: #optional the format of the backing file
-#
-# @snapshots: #optional list of VM snapshots
-#
-# @backing-image: #optional info of the backing image (since 1.6)
-#
-# @format-specific: #optional structure supplying additional format-specific
-# information (since 1.7)
-#
-# Since: 1.3
-#
-##
-
-{ 'type': 'ImageInfo',
- 'data': {'filename': 'str', 'format': 'str', '*dirty-flag': 'bool',
- '*actual-size': 'int', 'virtual-size': 'int',
- '*cluster-size': 'int', '*encrypted': 'bool', '*compressed': 'bool',
- '*backing-filename': 'str', '*full-backing-filename': 'str',
- '*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'],
- '*backing-image': 'ImageInfo',
- '*format-specific': 'ImageInfoSpecific' } }
-
-##
-# @ImageCheck:
-#
-# Information about a QEMU image file check
-#
-# @filename: name of the image file checked
-#
-# @format: format of the image file checked
-#
-# @check-errors: number of unexpected errors occurred during check
-#
-# @image-end-offset: #optional offset (in bytes) where the image ends, this
-# field is present if the driver for the image format
-# supports it
-#
-# @corruptions: #optional number of corruptions found during the check if any
-#
-# @leaks: #optional number of leaks found during the check if any
-#
-# @corruptions-fixed: #optional number of corruptions fixed during the check
-# if any
-#
-# @leaks-fixed: #optional number of leaks fixed during the check if any
-#
-# @total-clusters: #optional total number of clusters, this field is present
-# if the driver for the image format supports it
-#
-# @allocated-clusters: #optional total number of allocated clusters, this
-# field is present if the driver for the image format
-# supports it
-#
-# @fragmented-clusters: #optional total number of fragmented clusters, this
-# field is present if the driver for the image format
-# supports it
-#
-# @compressed-clusters: #optional total number of compressed clusters, this
-# field is present if the driver for the image format
-# supports it
-#
-# Since: 1.4
-#
-##
-
-{ 'type': 'ImageCheck',
- 'data': {'filename': 'str', 'format': 'str', 'check-errors': 'int',
- '*image-end-offset': 'int', '*corruptions': 'int', '*leaks': 'int',
- '*corruptions-fixed': 'int', '*leaks-fixed': 'int',
- '*total-clusters': 'int', '*allocated-clusters': 'int',
- '*fragmented-clusters': 'int', '*compressed-clusters': 'int' } }
-
-##
# @StatusInfo:
#
# Information about VCPU run state
@@ -584,28 +319,6 @@
'returns': 'str' }
##
-# @CommandInfo:
-#
-# Information about a QMP command
-#
-# @name: The command name
-#
-# Since: 0.14.0
-##
-{ 'type': 'CommandInfo', 'data': {'name': 'str'} }
-
-##
-# @query-commands:
-#
-# Return a list of supported QMP commands by this server
-#
-# Returns: A list of @CommandInfo for all supported commands
-#
-# Since: 0.14.0
-##
-{ 'command': 'query-commands', 'returns': ['CommandInfo'] }
-
-##
# @EventInfo:
#
# Information about a QMP event
@@ -917,252 +630,6 @@
{ 'command': 'query-iothreads', 'returns': ['IOThreadInfo'] }
##
-# @BlockDeviceInfo:
-#
-# Information about the backing device for a block device.
-#
-# @file: the filename of the backing device
-#
-# @node-name: #optional the name of the block driver node (Since 2.0)
-#
-# @ro: true if the backing device was open read-only
-#
-# @drv: the name of the block format used to open the backing device. As of
-# 0.14.0 this can be: 'blkdebug', 'bochs', 'cloop', 'cow', 'dmg',
-# 'file', 'file', 'ftp', 'ftps', 'host_cdrom', 'host_device',
-# 'host_floppy', 'http', 'https', 'nbd', 'parallels', 'qcow',
-# 'qcow2', 'raw', 'tftp', 'vdi', 'vmdk', 'vpc', 'vvfat'
-#
-# @backing_file: #optional the name of the backing file (for copy-on-write)
-#
-# @backing_file_depth: number of files in the backing file chain (since: 1.2)
-#
-# @encrypted: true if the backing device is encrypted
-#
-# @encryption_key_missing: true if the backing device is encrypted but an
-# valid encryption key is missing
-#
-# @detect_zeroes: detect and optimize zero writes (Since 2.1)
-#
-# @bps: total throughput limit in bytes per second is specified
-#
-# @bps_rd: read throughput limit in bytes per second is specified
-#
-# @bps_wr: write throughput limit in bytes per second is specified
-#
-# @iops: total I/O operations per second is specified
-#
-# @iops_rd: read I/O operations per second is specified
-#
-# @iops_wr: write I/O operations per second is specified
-#
-# @image: the info of image used (since: 1.6)
-#
-# @bps_max: #optional total max in bytes (Since 1.7)
-#
-# @bps_rd_max: #optional read max in bytes (Since 1.7)
-#
-# @bps_wr_max: #optional write max in bytes (Since 1.7)
-#
-# @iops_max: #optional total I/O operations max (Since 1.7)
-#
-# @iops_rd_max: #optional read I/O operations max (Since 1.7)
-#
-# @iops_wr_max: #optional write I/O operations max (Since 1.7)
-#
-# @iops_size: #optional an I/O size in bytes (Since 1.7)
-#
-# Since: 0.14.0
-#
-##
-{ 'type': 'BlockDeviceInfo',
- 'data': { 'file': 'str', '*node-name': 'str', 'ro': 'bool', 'drv': 'str',
- '*backing_file': 'str', 'backing_file_depth': 'int',
- 'encrypted': 'bool', 'encryption_key_missing': 'bool',
- 'detect_zeroes': 'BlockdevDetectZeroesOptions',
- 'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
- 'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
- 'image': 'ImageInfo',
- '*bps_max': 'int', '*bps_rd_max': 'int',
- '*bps_wr_max': 'int', '*iops_max': 'int',
- '*iops_rd_max': 'int', '*iops_wr_max': 'int',
- '*iops_size': 'int' } }
-
-##
-# @BlockDeviceIoStatus:
-#
-# An enumeration of block device I/O status.
-#
-# @ok: The last I/O operation has succeeded
-#
-# @failed: The last I/O operation has failed
-#
-# @nospace: The last I/O operation has failed due to a no-space condition
-#
-# Since: 1.0
-##
-{ 'enum': 'BlockDeviceIoStatus', 'data': [ 'ok', 'failed', 'nospace' ] }
-
-##
-# @BlockDeviceMapEntry:
-#
-# Entry in the metadata map of the device (returned by "qemu-img map")
-#
-# @start: Offset in the image of the first byte described by this entry
-# (in bytes)
-#
-# @length: Length of the range described by this entry (in bytes)
-#
-# @depth: Number of layers (0 = top image, 1 = top image's backing file, etc.)
-# before reaching one for which the range is allocated. The value is
-# in the range 0 to the depth of the image chain - 1.
-#
-# @zero: the sectors in this range read as zeros
-#
-# @data: reading the image will actually read data from a file (in particular,
-# if @offset is present this means that the sectors are not simply
-# preallocated, but contain actual data in raw format)
-#
-# @offset: if present, the image file stores the data for this range in
-# raw format at the given offset.
-#
-# Since 1.7
-##
-{ 'type': 'BlockDeviceMapEntry',
- 'data': { 'start': 'int', 'length': 'int', 'depth': 'int', 'zero': 'bool',
- 'data': 'bool', '*offset': 'int' } }
-
-##
-# @BlockDirtyInfo:
-#
-# Block dirty bitmap information.
-#
-# @count: number of dirty bytes according to the dirty bitmap
-#
-# @granularity: granularity of the dirty bitmap in bytes (since 1.4)
-#
-# Since: 1.3
-##
-{ 'type': 'BlockDirtyInfo',
- 'data': {'count': 'int', 'granularity': 'int'} }
-
-##
-# @BlockInfo:
-#
-# Block device information. This structure describes a virtual device and
-# the backing device associated with it.
-#
-# @device: The device name associated with the virtual device.
-#
-# @type: This field is returned only for compatibility reasons, it should
-# not be used (always returns 'unknown')
-#
-# @removable: True if the device supports removable media.
-#
-# @locked: True if the guest has locked this device from having its media
-# removed
-#
-# @tray_open: #optional True if the device has a tray and it is open
-# (only present if removable is true)
-#
-# @dirty-bitmaps: #optional dirty bitmaps information (only present if the
-# driver has one or more dirty bitmaps) (Since 2.0)
-#
-# @io-status: #optional @BlockDeviceIoStatus. Only present if the device
-# supports it and the VM is configured to stop on errors
-#
-# @inserted: #optional @BlockDeviceInfo describing the device if media is
-# present
-#
-# Since: 0.14.0
-##
-{ 'type': 'BlockInfo',
- 'data': {'device': 'str', 'type': 'str', 'removable': 'bool',
- 'locked': 'bool', '*inserted': 'BlockDeviceInfo',
- '*tray_open': 'bool', '*io-status': 'BlockDeviceIoStatus',
- '*dirty-bitmaps': ['BlockDirtyInfo'] } }
-
-##
-# @query-block:
-#
-# Get a list of BlockInfo for all virtual block devices.
-#
-# Returns: a list of @BlockInfo describing each virtual block device
-#
-# Since: 0.14.0
-##
-{ 'command': 'query-block', 'returns': ['BlockInfo'] }
-
-##
-# @BlockDeviceStats:
-#
-# Statistics of a virtual block device or a block backing device.
-#
-# @rd_bytes: The number of bytes read by the device.
-#
-# @wr_bytes: The number of bytes written by the device.
-#
-# @rd_operations: The number of read operations performed by the device.
-#
-# @wr_operations: The number of write operations performed by the device.
-#
-# @flush_operations: The number of cache flush operations performed by the
-# device (since 0.15.0)
-#
-# @flush_total_time_ns: Total time spend on cache flushes in nano-seconds
-# (since 0.15.0).
-#
-# @wr_total_time_ns: Total time spend on writes in nano-seconds (since 0.15.0).
-#
-# @rd_total_time_ns: Total_time_spend on reads in nano-seconds (since 0.15.0).
-#
-# @wr_highest_offset: The offset after the greatest byte written to the
-# device. The intended use of this information is for
-# growable sparse files (like qcow2) that are used on top
-# of a physical device.
-#
-# Since: 0.14.0
-##
-{ 'type': 'BlockDeviceStats',
- 'data': {'rd_bytes': 'int', 'wr_bytes': 'int', 'rd_operations': 'int',
- 'wr_operations': 'int', 'flush_operations': 'int',
- 'flush_total_time_ns': 'int', 'wr_total_time_ns': 'int',
- 'rd_total_time_ns': 'int', 'wr_highest_offset': 'int' } }
-
-##
-# @BlockStats:
-#
-# Statistics of a virtual block device or a block backing device.
-#
-# @device: #optional If the stats are for a virtual block device, the name
-# corresponding to the virtual block device.
-#
-# @stats: A @BlockDeviceStats for the device.
-#
-# @parent: #optional This describes the file block device if it has one.
-#
-# @backing: #optional This describes the backing block device if it has one.
-# (Since 2.0)
-#
-# Since: 0.14.0
-##
-{ 'type': 'BlockStats',
- 'data': {'*device': 'str', 'stats': 'BlockDeviceStats',
- '*parent': 'BlockStats',
- '*backing': 'BlockStats'} }
-
-##
-# @query-blockstats:
-#
-# Query the @BlockStats for all virtual block devices.
-#
-# Returns: A list of @BlockStats for each virtual block devices.
-#
-# Since: 0.14.0
-##
-{ 'command': 'query-blockstats', 'returns': ['BlockStats'] }
-
-##
# @VncClientInfo:
#
# Information about a connected VNC client.
@@ -1501,105 +968,6 @@
{ 'command': 'query-pci', 'returns': ['PciInfo'] }
##
-# @BlockdevOnError:
-#
-# An enumeration of possible behaviors for errors on I/O operations.
-# The exact meaning depends on whether the I/O was initiated by a guest
-# or by a block job
-#
-# @report: for guest operations, report the error to the guest;
-# for jobs, cancel the job
-#
-# @ignore: ignore the error, only report a QMP event (BLOCK_IO_ERROR
-# or BLOCK_JOB_ERROR)
-#
-# @enospc: same as @stop on ENOSPC, same as @report otherwise.
-#
-# @stop: for guest operations, stop the virtual machine;
-# for jobs, pause the job
-#
-# Since: 1.3
-##
-{ 'enum': 'BlockdevOnError',
- 'data': ['report', 'ignore', 'enospc', 'stop'] }
-
-##
-# @MirrorSyncMode:
-#
-# An enumeration of possible behaviors for the initial synchronization
-# phase of storage mirroring.
-#
-# @top: copies data in the topmost image to the destination
-#
-# @full: copies data from all images to the destination
-#
-# @none: only copy data written from now on
-#
-# Since: 1.3
-##
-{ 'enum': 'MirrorSyncMode',
- 'data': ['top', 'full', 'none'] }
-
-##
-# @BlockJobType:
-#
-# Type of a block job.
-#
-# @commit: block commit job type, see "block-commit"
-#
-# @stream: block stream job type, see "block-stream"
-#
-# @mirror: drive mirror job type, see "drive-mirror"
-#
-# @backup: drive backup job type, see "drive-backup"
-#
-# Since: 1.7
-##
-{ 'enum': 'BlockJobType',
- 'data': ['commit', 'stream', 'mirror', 'backup'] }
-
-##
-# @BlockJobInfo:
-#
-# Information about a long-running block device operation.
-#
-# @type: the job type ('stream' for image streaming)
-#
-# @device: the block device name
-#
-# @len: the maximum progress value
-#
-# @busy: false if the job is known to be in a quiescent state, with
-# no pending I/O. Since 1.3.
-#
-# @paused: whether the job is paused or, if @busy is true, will
-# pause itself as soon as possible. Since 1.3.
-#
-# @offset: the current progress value
-#
-# @speed: the rate limit, bytes per second
-#
-# @io-status: the status of the job (since 1.3)
-#
-# Since: 1.1
-##
-{ 'type': 'BlockJobInfo',
- 'data': {'type': 'str', 'device': 'str', 'len': 'int',
- 'offset': 'int', 'busy': 'bool', 'paused': 'bool', 'speed': 'int',
- 'io-status': 'BlockDeviceIoStatus'} }
-
-##
-# @query-block-jobs:
-#
-# Return information about long-running block device operations.
-#
-# Returns: a list of @BlockJobInfo for each active block job
-#
-# Since: 1.1
-##
-{ 'command': 'query-block-jobs', 'returns': ['BlockJobInfo'] }
-
-##
# @quit:
#
# This command will cause the QEMU process to exit gracefully. While every
@@ -1779,43 +1147,6 @@
{ 'command': 'set_link', 'data': {'name': 'str', 'up': 'bool'} }
##
-# @block_passwd:
-#
-# This command sets the password of a block device that has not been open
-# with a password and requires one.
-#
-# The two cases where this can happen are a block device is created through
-# QEMU's initial command line or a block device is changed through the legacy
-# @change interface.
-#
-# In the event that the block device is created through the initial command
-# line, the VM will start in the stopped state regardless of whether '-S' is
-# used. The intention is for a management tool to query the block devices to
-# determine which ones are encrypted, set the passwords with this command, and
-# then start the guest with the @cont command.
-#
-# Either @device or @node-name must be set but not both.
-#
-# @device: #optional the name of the block backend device to set the password on
-#
-# @node-name: #optional graph node name to set the password on (Since 2.0)
-#
-# @password: the password to use for the device
-#
-# Returns: nothing on success
-# If @device is not a valid block device, DeviceNotFound
-# If @device is not encrypted, DeviceNotEncrypted
-#
-# Notes: Not all block formats support encryption and some that do are not
-# able to validate that a password is correct. Disk corruption may
-# occur if an invalid password is specified.
-#
-# Since: 0.14.0
-##
-{ 'command': 'block_passwd', 'data': {'*device': 'str',
- '*node-name': 'str', 'password': 'str'} }
-
-##
# @balloon:
#
# Request the balloon driver to change its balloon size.
@@ -1836,126 +1167,6 @@
{ 'command': 'balloon', 'data': {'value': 'int'} }
##
-# @block_resize
-#
-# Resize a block image while a guest is running.
-#
-# Either @device or @node-name must be set but not both.
-#
-# @device: #optional the name of the device to get the image resized
-#
-# @node-name: #optional graph node name to get the image resized (Since 2.0)
-#
-# @size: new image size in bytes
-#
-# Returns: nothing on success
-# If @device is not a valid block device, DeviceNotFound
-#
-# Since: 0.14.0
-##
-{ 'command': 'block_resize', 'data': { '*device': 'str',
- '*node-name': 'str',
- 'size': 'int' }}
-
-##
-# @NewImageMode
-#
-# An enumeration that tells QEMU how to set the backing file path in
-# a new image file.
-#
-# @existing: QEMU should look for an existing image file.
-#
-# @absolute-paths: QEMU should create a new image with absolute paths
-# for the backing file. If there is no backing file available, the new
-# image will not be backed either.
-#
-# Since: 1.1
-##
-{ 'enum': 'NewImageMode',
- 'data': [ 'existing', 'absolute-paths' ] }
-
-##
-# @BlockdevSnapshot
-#
-# Either @device or @node-name must be set but not both.
-#
-# @device: #optional the name of the device to generate the snapshot from.
-#
-# @node-name: #optional graph node name to generate the snapshot from (Since 2.0)
-#
-# @snapshot-file: the target of the new image. A new file will be created.
-#
-# @snapshot-node-name: #optional the graph node name of the new image (Since 2.0)
-#
-# @format: #optional the format of the snapshot image, default is 'qcow2'.
-#
-# @mode: #optional whether and how QEMU should create a new image, default is
-# 'absolute-paths'.
-##
-{ 'type': 'BlockdevSnapshot',
- 'data': { '*device': 'str', '*node-name': 'str',
- 'snapshot-file': 'str', '*snapshot-node-name': 'str',
- '*format': 'str', '*mode': 'NewImageMode' } }
-
-##
-# @BlockdevSnapshotInternal
-#
-# @device: the name of the device to generate the snapshot from
-#
-# @name: the name of the internal snapshot to be created
-#
-# Notes: In transaction, if @name is empty, or any snapshot matching @name
-# exists, the operation will fail. Only some image formats support it,
-# for example, qcow2, rbd, and sheepdog.
-#
-# Since: 1.7
-##
-{ 'type': 'BlockdevSnapshotInternal',
- 'data': { 'device': 'str', 'name': 'str' } }
-
-##
-# @DriveBackup
-#
-# @device: the name of the device which should be copied.
-#
-# @target: the target of the new image. If the file exists, or if it
-# is a device, the existing file/device will be used as the new
-# destination. If it does not exist, a new file will be created.
-#
-# @format: #optional the format of the new destination, default is to
-# probe if @mode is 'existing', else the format of the source
-#
-# @sync: what parts of the disk image should be copied to the destination
-# (all the disk, only the sectors allocated in the topmost image, or
-# only new I/O).
-#
-# @mode: #optional whether and how QEMU should create a new image, default is
-# 'absolute-paths'.
-#
-# @speed: #optional the maximum speed, in bytes per second
-#
-# @on-source-error: #optional the action to take on an error on the source,
-# default 'report'. 'stop' and 'enospc' can only be used
-# if the block device supports io-status (see BlockInfo).
-#
-# @on-target-error: #optional the action to take on an error on the target,
-# default 'report' (no limitations, since this applies to
-# a different block device than @device).
-#
-# Note that @on-source-error and @on-target-error only affect background I/O.
-# If an error occurs during a guest write request, the device's rerror/werror
-# actions will be used.
-#
-# Since: 1.6
-##
-{ 'type': 'DriveBackup',
- 'data': { 'device': 'str', 'target': 'str', '*format': 'str',
- 'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
- '*speed': 'int',
- '*on-source-error': 'BlockdevOnError',
- '*on-target-error': 'BlockdevOnError' } }
-
-##
# @Abort
#
# This action can be used to test transaction failure.
@@ -2002,68 +1213,6 @@
'data': { 'actions': [ 'TransactionAction' ] } }
##
-# @blockdev-snapshot-sync
-#
-# Generates a synchronous snapshot of a block device.
-#
-# For the arguments, see the documentation of BlockdevSnapshot.
-#
-# Returns: nothing on success
-# If @device is not a valid block device, DeviceNotFound
-#
-# Since 0.14.0
-##
-{ 'command': 'blockdev-snapshot-sync',
- 'data': 'BlockdevSnapshot' }
-
-##
-# @blockdev-snapshot-internal-sync
-#
-# Synchronously take an internal snapshot of a block device, when the format
-# of the image used supports it.
-#
-# For the arguments, see the documentation of BlockdevSnapshotInternal.
-#
-# Returns: nothing on success
-# If @device is not a valid block device, DeviceNotFound
-# If any snapshot matching @name exists, or @name is empty,
-# GenericError
-# If the format of the image used does not support it,
-# BlockFormatFeatureNotSupported
-#
-# Since 1.7
-##
-{ 'command': 'blockdev-snapshot-internal-sync',
- 'data': 'BlockdevSnapshotInternal' }
-
-##
-# @blockdev-snapshot-delete-internal-sync
-#
-# Synchronously delete an internal snapshot of a block device, when the format
-# of the image used support it. The snapshot is identified by name or id or
-# both. One of the name or id is required. Return SnapshotInfo for the
-# successfully deleted snapshot.
-#
-# @device: the name of the device to delete the snapshot from
-#
-# @id: optional the snapshot's ID to be deleted
-#
-# @name: optional the snapshot's name to be deleted
-#
-# Returns: SnapshotInfo on success
-# If @device is not a valid block device, DeviceNotFound
-# If snapshot not found, GenericError
-# If the format of the image used does not support it,
-# BlockFormatFeatureNotSupported
-# If @id and @name are both not specified, GenericError
-#
-# Since 1.7
-##
-{ 'command': 'blockdev-snapshot-delete-internal-sync',
- 'data': { 'device': 'str', '*id': 'str', '*name': 'str'},
- 'returns': 'SnapshotInfo' }
-
-##
# @human-monitor-command:
#
# Execute a command on the human monitor and return the output.
@@ -2092,129 +1241,6 @@
'returns': 'str' }
##
-# @block-commit
-#
-# Live commit of data from overlay image nodes into backing nodes - i.e.,
-# writes data between 'top' and 'base' into 'base'.
-#
-# @device: the name of the device
-#
-# @base: #optional The file name of the backing image to write data into.
-# If not specified, this is the deepest backing image
-#
-# @top: The file name of the backing image within the image chain,
-# which contains the topmost data to be committed down.
-#
-# If top == base, that is an error.
-# If top == active, the job will not be completed by itself,
-# user needs to complete the job with the block-job-complete
-# command after getting the ready event. (Since 2.0)
-#
-# If the base image is smaller than top, then the base image
-# will be resized to be the same size as top. If top is
-# smaller than the base image, the base will not be
-# truncated. If you want the base image size to match the
-# size of the smaller top, you can safely truncate it
-# yourself once the commit operation successfully completes.
-#
-#
-# @speed: #optional the maximum speed, in bytes per second
-#
-# Returns: Nothing on success
-# If commit or stream is already active on this device, DeviceInUse
-# If @device does not exist, DeviceNotFound
-# If image commit is not supported by this device, NotSupported
-# If @base or @top is invalid, a generic error is returned
-# If @speed is invalid, InvalidParameter
-#
-# Since: 1.3
-#
-##
-{ 'command': 'block-commit',
- 'data': { 'device': 'str', '*base': 'str', 'top': 'str',
- '*speed': 'int' } }
-
-##
-# @drive-backup
-#
-# Start a point-in-time copy of a block device to a new destination. The
-# status of ongoing drive-backup operations can be checked with
-# query-block-jobs where the BlockJobInfo.type field has the value 'backup'.
-# The operation can be stopped before it has completed using the
-# block-job-cancel command.
-#
-# For the arguments, see the documentation of DriveBackup.
-#
-# Returns: nothing on success
-# If @device is not a valid block device, DeviceNotFound
-#
-# Since 1.6
-##
-{ 'command': 'drive-backup', 'data': 'DriveBackup' }
-
-##
-# @query-named-block-nodes
-#
-# Get the named block driver list
-#
-# Returns: the list of BlockDeviceInfo
-#
-# Since 2.0
-##
-{ 'command': 'query-named-block-nodes', 'returns': [ 'BlockDeviceInfo' ] }
-
-##
-# @drive-mirror
-#
-# Start mirroring a block device's writes to a new destination.
-#
-# @device: the name of the device whose writes should be mirrored.
-#
-# @target: the target of the new image. If the file exists, or if it
-# is a device, the existing file/device will be used as the new
-# destination. If it does not exist, a new file will be created.
-#
-# @format: #optional the format of the new destination, default is to
-# probe if @mode is 'existing', else the format of the source
-#
-# @mode: #optional whether and how QEMU should create a new image, default is
-# 'absolute-paths'.
-#
-# @speed: #optional the maximum speed, in bytes per second
-#
-# @sync: what parts of the disk image should be copied to the destination
-# (all the disk, only the sectors allocated in the topmost image, or
-# only new I/O).
-#
-# @granularity: #optional granularity of the dirty bitmap, default is 64K
-# if the image format doesn't have clusters, 4K if the clusters
-# are smaller than that, else the cluster size. Must be a
-# power of 2 between 512 and 64M (since 1.4).
-#
-# @buf-size: #optional maximum amount of data in flight from source to
-# target (since 1.4).
-#
-# @on-source-error: #optional the action to take on an error on the source,
-# default 'report'. 'stop' and 'enospc' can only be used
-# if the block device supports io-status (see BlockInfo).
-#
-# @on-target-error: #optional the action to take on an error on the target,
-# default 'report' (no limitations, since this applies to
-# a different block device than @device).
-#
-# Returns: nothing on success
-# If @device is not a valid block device, DeviceNotFound
-#
-# Since 1.3
-##
-{ 'command': 'drive-mirror',
- 'data': { 'device': 'str', 'target': 'str', '*format': 'str',
- 'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
- '*speed': 'int', '*granularity': 'uint32',
- '*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
- '*on-target-error': 'BlockdevOnError' } }
-
-##
# @migrate_cancel
#
# Cancel the current executing migration process.
@@ -2430,25 +1456,6 @@
{ 'command': 'expire_password', 'data': {'protocol': 'str', 'time': 'str'} }
##
-# @eject:
-#
-# Ejects a device from a removable drive.
-#
-# @device: The name of the device
-#
-# @force: @optional If true, eject regardless of whether the drive is locked.
-# If not specified, the default value is false.
-#
-# Returns: Nothing on success
-# If @device is not a valid block device, DeviceNotFound
-#
-# Notes: Ejecting a device will no media results in success
-#
-# Since: 0.14.0
-##
-{ 'command': 'eject', 'data': {'device': 'str', '*force': 'bool'} }
-
-##
# @change-vnc-password:
#
# Change the VNC server password.
@@ -2498,211 +1505,6 @@
'data': {'device': 'str', 'target': 'str', '*arg': 'str'} }
##
-# @block_set_io_throttle:
-#
-# Change I/O throttle limits for a block drive.
-#
-# @device: The name of the device
-#
-# @bps: total throughput limit in bytes per second
-#
-# @bps_rd: read throughput limit in bytes per second
-#
-# @bps_wr: write throughput limit in bytes per second
-#
-# @iops: total I/O operations per second
-#
-# @ops_rd: read I/O operations per second
-#
-# @iops_wr: write I/O operations per second
-#
-# @bps_max: #optional total max in bytes (Since 1.7)
-#
-# @bps_rd_max: #optional read max in bytes (Since 1.7)
-#
-# @bps_wr_max: #optional write max in bytes (Since 1.7)
-#
-# @iops_max: #optional total I/O operations max (Since 1.7)
-#
-# @iops_rd_max: #optional read I/O operations max (Since 1.7)
-#
-# @iops_wr_max: #optional write I/O operations max (Since 1.7)
-#
-# @iops_size: #optional an I/O size in bytes (Since 1.7)
-#
-# Returns: Nothing on success
-# If @device is not a valid block device, DeviceNotFound
-#
-# Since: 1.1
-##
-{ 'command': 'block_set_io_throttle',
- 'data': { 'device': 'str', 'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
- 'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
- '*bps_max': 'int', '*bps_rd_max': 'int',
- '*bps_wr_max': 'int', '*iops_max': 'int',
- '*iops_rd_max': 'int', '*iops_wr_max': 'int',
- '*iops_size': 'int' } }
-
-##
-# @block-stream:
-#
-# Copy data from a backing file into a block device.
-#
-# The block streaming operation is performed in the background until the entire
-# backing file has been copied. This command returns immediately once streaming
-# has started. The status of ongoing block streaming operations can be checked
-# with query-block-jobs. The operation can be stopped before it has completed
-# using the block-job-cancel command.
-#
-# If a base file is specified then sectors are not copied from that base file and
-# its backing chain. When streaming completes the image file will have the base
-# file as its backing file. This can be used to stream a subset of the backing
-# file chain instead of flattening the entire image.
-#
-# On successful completion the image file is updated to drop the backing file
-# and the BLOCK_JOB_COMPLETED event is emitted.
-#
-# @device: the device name
-#
-# @base: #optional the common backing file name
-#
-# @speed: #optional the maximum speed, in bytes per second
-#
-# @on-error: #optional the action to take on an error (default report).
-# 'stop' and 'enospc' can only be used if the block device
-# supports io-status (see BlockInfo). Since 1.3.
-#
-# Returns: Nothing on success
-# If @device does not exist, DeviceNotFound
-#
-# Since: 1.1
-##
-{ 'command': 'block-stream',
- 'data': { 'device': 'str', '*base': 'str', '*speed': 'int',
- '*on-error': 'BlockdevOnError' } }
-
-##
-# @block-job-set-speed:
-#
-# Set maximum speed for a background block operation.
-#
-# This command can only be issued when there is an active block job.
-#
-# Throttling can be disabled by setting the speed to 0.
-#
-# @device: the device name
-#
-# @speed: the maximum speed, in bytes per second, or 0 for unlimited.
-# Defaults to 0.
-#
-# Returns: Nothing on success
-# If no background operation is active on this device, DeviceNotActive
-#
-# Since: 1.1
-##
-{ 'command': 'block-job-set-speed',
- 'data': { 'device': 'str', 'speed': 'int' } }
-
-##
-# @block-job-cancel:
-#
-# Stop an active background block operation.
-#
-# This command returns immediately after marking the active background block
-# operation for cancellation. It is an error to call this command if no
-# operation is in progress.
-#
-# The operation will cancel as soon as possible and then emit the
-# BLOCK_JOB_CANCELLED event. Before that happens the job is still visible when
-# enumerated using query-block-jobs.
-#
-# For streaming, the image file retains its backing file unless the streaming
-# operation happens to complete just as it is being cancelled. A new streaming
-# operation can be started at a later time to finish copying all data from the
-# backing file.
-#
-# @device: the device name
-#
-# @force: #optional whether to allow cancellation of a paused job (default
-# false). Since 1.3.
-#
-# Returns: Nothing on success
-# If no background operation is active on this device, DeviceNotActive
-#
-# Since: 1.1
-##
-{ 'command': 'block-job-cancel', 'data': { 'device': 'str', '*force': 'bool' } }
-
-##
-# @block-job-pause:
-#
-# Pause an active background block operation.
-#
-# This command returns immediately after marking the active background block
-# operation for pausing. It is an error to call this command if no
-# operation is in progress. Pausing an already paused job has no cumulative
-# effect; a single block-job-resume command will resume the job.
-#
-# The operation will pause as soon as possible. No event is emitted when
-# the operation is actually paused. Cancelling a paused job automatically
-# resumes it.
-#
-# @device: the device name
-#
-# Returns: Nothing on success
-# If no background operation is active on this device, DeviceNotActive
-#
-# Since: 1.3
-##
-{ 'command': 'block-job-pause', 'data': { 'device': 'str' } }
-
-##
-# @block-job-resume:
-#
-# Resume an active background block operation.
-#
-# This command returns immediately after resuming a paused background block
-# operation. It is an error to call this command if no operation is in
-# progress. Resuming an already running job is not an error.
-#
-# This command also clears the error status of the job.
-#
-# @device: the device name
-#
-# Returns: Nothing on success
-# If no background operation is active on this device, DeviceNotActive
-#
-# Since: 1.3
-##
-{ 'command': 'block-job-resume', 'data': { 'device': 'str' } }
-
-##
-# @block-job-complete:
-#
-# Manually trigger completion of an active background block operation. This
-# is supported for drive mirroring, where it also switches the device to
-# write to the target path only. The ability to complete is signaled with
-# a BLOCK_JOB_READY event.
-#
-# This command completes an active background block operation synchronously.
-# The ordering of this command's return with the BLOCK_JOB_COMPLETED event
-# is not defined. Note that if an I/O error occurs during the processing of
-# this command: 1) the command itself will fail; 2) the error will be processed
-# according to the rerror/werror arguments that were specified when starting
-# the operation.
-#
-# A cancelled or paused job cannot be completed.
-#
-# @device: the device name
-#
-# Returns: Nothing on success
-# If no background operation is active on this device, DeviceNotActive
-#
-# Since: 1.3
-##
-{ 'command': 'block-job-complete', 'data': { 'device': 'str' } }
-
-##
# @ObjectTypeInfo:
#
# This structure describes a search result from @qom-list-types
@@ -3661,49 +2463,6 @@
{ 'command': 'screendump', 'data': {'filename': 'str'} }
##
-# @nbd-server-start:
-#
-# Start an NBD server listening on the given host and port. Block
-# devices can then be exported using @nbd-server-add. The NBD
-# server will present them as named exports; for example, another
-# QEMU instance could refer to them as "nbd:HOST:PORT:exportname=NAME".
-#
-# @addr: Address on which to listen.
-#
-# Returns: error if the server is already running.
-#
-# Since: 1.3.0
-##
-{ 'command': 'nbd-server-start',
- 'data': { 'addr': 'SocketAddress' } }
-
-##
-# @nbd-server-add:
-#
-# Export a device to QEMU's embedded NBD server.
-#
-# @device: Block device to be exported
-#
-# @writable: Whether clients should be able to write to the device via the
-# NBD connection (default false). #optional
-#
-# Returns: error if the device is already marked for export.
-#
-# Since: 1.3.0
-##
-{ 'command': 'nbd-server-add', 'data': {'device': 'str', '*writable': 'bool'} }
-
-##
-# @nbd-server-stop:
-#
-# Stop QEMU's embedded NBD server, and unregister all devices previously
-# added via @nbd-server-add.
-#
-# Since: 1.3.0
-##
-{ 'command': 'nbd-server-stop' }
-
-##
# @ChardevFile:
#
# Configuration info for file chardevs.
@@ -4243,410 +3002,6 @@
{ 'command': 'query-rx-filter', 'data': { '*name': 'str' },
'returns': ['RxFilterInfo'] }
-
-##
-# @BlockdevDiscardOptions
-#
-# Determines how to handle discard requests.
-#
-# @ignore: Ignore the request
-# @unmap: Forward as an unmap request
-#
-# Since: 1.7
-##
-{ 'enum': 'BlockdevDiscardOptions',
- 'data': [ 'ignore', 'unmap' ] }
-
-##
-# @BlockdevDetectZeroesOptions
-#
-# Describes the operation mode for the automatic conversion of plain
-# zero writes by the OS to driver specific optimized zero write commands.
-#
-# @off: Disabled (default)
-# @on: Enabled
-# @unmap: Enabled and even try to unmap blocks if possible. This requires
-# also that @BlockdevDiscardOptions is set to unmap for this device.
-#
-# Since: 2.1
-##
-{ 'enum': 'BlockdevDetectZeroesOptions',
- 'data': [ 'off', 'on', 'unmap' ] }
-
-##
-# @BlockdevAioOptions
-#
-# Selects the AIO backend to handle I/O requests
-#
-# @threads: Use qemu's thread pool
-# @native: Use native AIO backend (only Linux and Windows)
-#
-# Since: 1.7
-##
-{ 'enum': 'BlockdevAioOptions',
- 'data': [ 'threads', 'native' ] }
-
-##
-# @BlockdevCacheOptions
-#
-# Includes cache-related options for block devices
-#
-# @writeback: #optional enables writeback mode for any caches (default: true)
-# @direct: #optional enables use of O_DIRECT (bypass the host page cache;
-# default: false)
-# @no-flush: #optional ignore any flush requests for the device (default:
-# false)
-#
-# Since: 1.7
-##
-{ 'type': 'BlockdevCacheOptions',
- 'data': { '*writeback': 'bool',
- '*direct': 'bool',
- '*no-flush': 'bool' } }
-
-##
-# @BlockdevDriver
-#
-# Drivers that are supported in block device operations.
-#
-# @host_device, @host_cdrom, @host_floppy: Since 2.1
-#
-# Since: 2.0
-##
-{ 'enum': 'BlockdevDriver',
- 'data': [ 'file', 'host_device', 'host_cdrom', 'host_floppy',
- 'http', 'https', 'ftp', 'ftps', 'tftp', 'vvfat', 'blkdebug',
- 'blkverify', 'bochs', 'cloop', 'cow', 'dmg', 'parallels', 'qcow',
- 'qcow2', 'qed', 'raw', 'vdi', 'vhdx', 'vmdk', 'vpc', 'quorum' ] }
-
-##
-# @BlockdevOptionsBase
-#
-# Options that are available for all block devices, independent of the block
-# driver.
-#
-# @driver: block driver name
-# @id: #optional id by which the new block device can be referred to.
-# This is a required option on the top level of blockdev-add, and
-# currently not allowed on any other level.
-# @node-name: #optional the name of a block driver state node (Since 2.0)
-# @discard: #optional discard-related options (default: ignore)
-# @cache: #optional cache-related options
-# @aio: #optional AIO backend (default: threads)
-# @rerror: #optional how to handle read errors on the device
-# (default: report)
-# @werror: #optional how to handle write errors on the device
-# (default: enospc)
-# @read-only: #optional whether the block device should be read-only
-# (default: false)
-# @detect-zeroes: #optional detect and optimize zero writes (Since 2.1)
-# (default: off)
-#
-# Since: 1.7
-##
-{ 'type': 'BlockdevOptionsBase',
- 'data': { 'driver': 'BlockdevDriver',
- '*id': 'str',
- '*node-name': 'str',
- '*discard': 'BlockdevDiscardOptions',
- '*cache': 'BlockdevCacheOptions',
- '*aio': 'BlockdevAioOptions',
- '*rerror': 'BlockdevOnError',
- '*werror': 'BlockdevOnError',
- '*read-only': 'bool',
- '*detect-zeroes': 'BlockdevDetectZeroesOptions' } }
-
-##
-# @BlockdevOptionsFile
-#
-# Driver specific block device options for the file backend and similar
-# protocols.
-#
-# @filename: path to the image file
-#
-# Since: 1.7
-##
-{ 'type': 'BlockdevOptionsFile',
- 'data': { 'filename': 'str' } }
-
-##
-# @BlockdevOptionsVVFAT
-#
-# Driver specific block device options for the vvfat protocol.
-#
-# @dir: directory to be exported as FAT image
-# @fat-type: #optional FAT type: 12, 16 or 32
-# @floppy: #optional whether to export a floppy image (true) or
-# partitioned hard disk (false; default)
-# @rw: #optional whether to allow write operations (default: false)
-#
-# Since: 1.7
-##
-{ 'type': 'BlockdevOptionsVVFAT',
- 'data': { 'dir': 'str', '*fat-type': 'int', '*floppy': 'bool',
- '*rw': 'bool' } }
-
-##
-# @BlockdevOptionsGenericFormat
-#
-# Driver specific block device options for image format that have no option
-# besides their data source.
-#
-# @file: reference to or definition of the data source block device
-#
-# Since: 1.7
-##
-{ 'type': 'BlockdevOptionsGenericFormat',
- 'data': { 'file': 'BlockdevRef' } }
-
-##
-# @BlockdevOptionsGenericCOWFormat
-#
-# Driver specific block device options for image format that have no option
-# besides their data source and an optional backing file.
-#
-# @backing: #optional reference to or definition of the backing file block
-# device (if missing, taken from the image file content). It is
-# allowed to pass an empty string here in order to disable the
-# default backing file.
-#
-# Since: 1.7
-##
-{ 'type': 'BlockdevOptionsGenericCOWFormat',
- 'base': 'BlockdevOptionsGenericFormat',
- 'data': { '*backing': 'BlockdevRef' } }
-
-##
-# @BlockdevOptionsQcow2
-#
-# Driver specific block device options for qcow2.
-#
-# @lazy-refcounts: #optional whether to enable the lazy refcounts
-# feature (default is taken from the image file)
-#
-# @pass-discard-request: #optional whether discard requests to the qcow2
-# device should be forwarded to the data source
-#
-# @pass-discard-snapshot: #optional whether discard requests for the data source
-# should be issued when a snapshot operation (e.g.
-# deleting a snapshot) frees clusters in the qcow2 file
-#
-# @pass-discard-other: #optional whether discard requests for the data source
-# should be issued on other occasions where a cluster
-# gets freed
-#
-# Since: 1.7
-##
-{ 'type': 'BlockdevOptionsQcow2',
- 'base': 'BlockdevOptionsGenericCOWFormat',
- 'data': { '*lazy-refcounts': 'bool',
- '*pass-discard-request': 'bool',
- '*pass-discard-snapshot': 'bool',
- '*pass-discard-other': 'bool' } }
-
-##
-# @BlkdebugEvent
-#
-# Trigger events supported by blkdebug.
-##
-{ 'enum': 'BlkdebugEvent',
- 'data': [ 'l1_update', 'l1_grow.alloc_table', 'l1_grow.write_table',
- 'l1_grow.activate_table', 'l2_load', 'l2_update',
- 'l2_update_compressed', 'l2_alloc.cow_read', 'l2_alloc.write',
- 'read_aio', 'read_backing_aio', 'read_compressed', 'write_aio',
- 'write_compressed', 'vmstate_load', 'vmstate_save', 'cow_read',
- 'cow_write', 'reftable_load', 'reftable_grow', 'reftable_update',
- 'refblock_load', 'refblock_update', 'refblock_update_part',
- 'refblock_alloc', 'refblock_alloc.hookup', 'refblock_alloc.write',
- 'refblock_alloc.write_blocks', 'refblock_alloc.write_table',
- 'refblock_alloc.switch_table', 'cluster_alloc',
- 'cluster_alloc_bytes', 'cluster_free', 'flush_to_os',
- 'flush_to_disk' ] }
-
-##
-# @BlkdebugInjectErrorOptions
-#
-# Describes a single error injection for blkdebug.
-#
-# @event: trigger event
-#
-# @state: #optional the state identifier blkdebug needs to be in to
-# actually trigger the event; defaults to "any"
-#
-# @errno: #optional error identifier (errno) to be returned; defaults to
-# EIO
-#
-# @sector: #optional specifies the sector index which has to be affected
-# in order to actually trigger the event; defaults to "any
-# sector"
-#
-# @once: #optional disables further events after this one has been
-# triggered; defaults to false
-#
-# @immediately: #optional fail immediately; defaults to false
-#
-# Since: 2.0
-##
-{ 'type': 'BlkdebugInjectErrorOptions',
- 'data': { 'event': 'BlkdebugEvent',
- '*state': 'int',
- '*errno': 'int',
- '*sector': 'int',
- '*once': 'bool',
- '*immediately': 'bool' } }
-
-##
-# @BlkdebugSetStateOptions
-#
-# Describes a single state-change event for blkdebug.
-#
-# @event: trigger event
-#
-# @state: #optional the current state identifier blkdebug needs to be in;
-# defaults to "any"
-#
-# @new_state: the state identifier blkdebug is supposed to assume if
-# this event is triggered
-#
-# Since: 2.0
-##
-{ 'type': 'BlkdebugSetStateOptions',
- 'data': { 'event': 'BlkdebugEvent',
- '*state': 'int',
- 'new_state': 'int' } }
-
-##
-# @BlockdevOptionsBlkdebug
-#
-# Driver specific block device options for blkdebug.
-#
-# @image: underlying raw block device (or image file)
-#
-# @config: #optional filename of the configuration file
-#
-# @align: #optional required alignment for requests in bytes
-#
-# @inject-error: #optional array of error injection descriptions
-#
-# @set-state: #optional array of state-change descriptions
-#
-# Since: 2.0
-##
-{ 'type': 'BlockdevOptionsBlkdebug',
- 'data': { 'image': 'BlockdevRef',
- '*config': 'str',
- '*align': 'int',
- '*inject-error': ['BlkdebugInjectErrorOptions'],
- '*set-state': ['BlkdebugSetStateOptions'] } }
-
-##
-# @BlockdevOptionsBlkverify
-#
-# Driver specific block device options for blkverify.
-#
-# @test: block device to be tested
-#
-# @raw: raw image used for verification
-#
-# Since: 2.0
-##
-{ 'type': 'BlockdevOptionsBlkverify',
- 'data': { 'test': 'BlockdevRef',
- 'raw': 'BlockdevRef' } }
-
-##
-# @BlockdevOptionsQuorum
-#
-# Driver specific block device options for Quorum
-#
-# @blkverify: #optional true if the driver must print content mismatch
-# set to false by default
-#
-# @children: the children block devices to use
-#
-# @vote-threshold: the vote limit under which a read will fail
-#
-# Since: 2.0
-##
-{ 'type': 'BlockdevOptionsQuorum',
- 'data': { '*blkverify': 'bool',
- 'children': [ 'BlockdevRef' ],
- 'vote-threshold': 'int' } }
-
-##
-# @BlockdevOptions
-#
-# Options for creating a block device.
-#
-# Since: 1.7
-##
-{ 'union': 'BlockdevOptions',
- 'base': 'BlockdevOptionsBase',
- 'discriminator': 'driver',
- 'data': {
- 'file': 'BlockdevOptionsFile',
- 'host_device':'BlockdevOptionsFile',
- 'host_cdrom': 'BlockdevOptionsFile',
- 'host_floppy':'BlockdevOptionsFile',
- 'http': 'BlockdevOptionsFile',
- 'https': 'BlockdevOptionsFile',
- 'ftp': 'BlockdevOptionsFile',
- 'ftps': 'BlockdevOptionsFile',
- 'tftp': 'BlockdevOptionsFile',
-# TODO gluster: Wait for structured options
-# TODO iscsi: Wait for structured options
-# TODO nbd: Should take InetSocketAddress for 'host'?
-# TODO nfs: Wait for structured options
-# TODO rbd: Wait for structured options
-# TODO sheepdog: Wait for structured options
-# TODO ssh: Should take InetSocketAddress for 'host'?
- 'vvfat': 'BlockdevOptionsVVFAT',
- 'blkdebug': 'BlockdevOptionsBlkdebug',
- 'blkverify': 'BlockdevOptionsBlkverify',
- 'bochs': 'BlockdevOptionsGenericFormat',
- 'cloop': 'BlockdevOptionsGenericFormat',
- 'cow': 'BlockdevOptionsGenericCOWFormat',
- 'dmg': 'BlockdevOptionsGenericFormat',
- 'parallels': 'BlockdevOptionsGenericFormat',
- 'qcow': 'BlockdevOptionsGenericCOWFormat',
- 'qcow2': 'BlockdevOptionsQcow2',
- 'qed': 'BlockdevOptionsGenericCOWFormat',
- 'raw': 'BlockdevOptionsGenericFormat',
- 'vdi': 'BlockdevOptionsGenericFormat',
- 'vhdx': 'BlockdevOptionsGenericFormat',
- 'vmdk': 'BlockdevOptionsGenericCOWFormat',
- 'vpc': 'BlockdevOptionsGenericFormat',
- 'quorum': 'BlockdevOptionsQuorum'
- } }
-
-##
-# @BlockdevRef
-#
-# Reference to a block device.
-#
-# @definition: defines a new block device inline
-# @reference: references the ID of an existing block device. An
-# empty string means that no block device should be
-# referenced.
-#
-# Since: 1.7
-##
-{ 'union': 'BlockdevRef',
- 'discriminator': {},
- 'data': { 'definition': 'BlockdevOptions',
- 'reference': 'str' } }
-
-##
-# @blockdev-add:
-#
-# Creates a new block device.
-#
-# @options: block device options for the new device
-#
-# Since: 1.7
-##
-{ 'command': 'blockdev-add', 'data': { 'options': 'BlockdevOptions' } }
-
##
# @InputButton
#
diff --git a/qapi/block-core.json b/qapi/block-core.json
new file mode 100644
index 0000000000..7215e48130
--- /dev/null
+++ b/qapi/block-core.json
@@ -0,0 +1,1412 @@
+# -*- Mode: Python -*-
+#
+# QAPI block core definitions (vm unrelated)
+
+# QAPI common definitions
+{ 'include': 'common.json' }
+
+##
+# @SnapshotInfo
+#
+# @id: unique snapshot id
+#
+# @name: user chosen name
+#
+# @vm-state-size: size of the VM state
+#
+# @date-sec: UTC date of the snapshot in seconds
+#
+# @date-nsec: fractional part in nano seconds to be used with date-sec
+#
+# @vm-clock-sec: VM clock relative to boot in seconds
+#
+# @vm-clock-nsec: fractional part in nano seconds to be used with vm-clock-sec
+#
+# Since: 1.3
+#
+##
+
+{ 'type': 'SnapshotInfo',
+ 'data': { 'id': 'str', 'name': 'str', 'vm-state-size': 'int',
+ 'date-sec': 'int', 'date-nsec': 'int',
+ 'vm-clock-sec': 'int', 'vm-clock-nsec': 'int' } }
+
+##
+# @ImageInfoSpecificQCow2:
+#
+# @compat: compatibility level
+#
+# @lazy-refcounts: #optional on or off; only valid for compat >= 1.1
+#
+# Since: 1.7
+##
+{ 'type': 'ImageInfoSpecificQCow2',
+ 'data': {
+ 'compat': 'str',
+ '*lazy-refcounts': 'bool'
+ } }
+
+##
+# @ImageInfoSpecificVmdk:
+#
+# @create-type: The create type of VMDK image
+#
+# @cid: Content id of image
+#
+# @parent-cid: Parent VMDK image's cid
+#
+# @extents: List of extent files
+#
+# Since: 1.7
+##
+{ 'type': 'ImageInfoSpecificVmdk',
+ 'data': {
+ 'create-type': 'str',
+ 'cid': 'int',
+ 'parent-cid': 'int',
+ 'extents': ['ImageInfo']
+ } }
+
+##
+# @ImageInfoSpecific:
+#
+# A discriminated record of image format specific information structures.
+#
+# Since: 1.7
+##
+
+{ 'union': 'ImageInfoSpecific',
+ 'data': {
+ 'qcow2': 'ImageInfoSpecificQCow2',
+ 'vmdk': 'ImageInfoSpecificVmdk'
+ } }
+
+##
+# @ImageInfo:
+#
+# Information about a QEMU image file
+#
+# @filename: name of the image file
+#
+# @format: format of the image file
+#
+# @virtual-size: maximum capacity in bytes of the image
+#
+# @actual-size: #optional actual size on disk in bytes of the image
+#
+# @dirty-flag: #optional true if image is not cleanly closed
+#
+# @cluster-size: #optional size of a cluster in bytes
+#
+# @encrypted: #optional true if the image is encrypted
+#
+# @compressed: #optional true if the image is compressed (Since 1.7)
+#
+# @backing-filename: #optional name of the backing file
+#
+# @full-backing-filename: #optional full path of the backing file
+#
+# @backing-filename-format: #optional the format of the backing file
+#
+# @snapshots: #optional list of VM snapshots
+#
+# @backing-image: #optional info of the backing image (since 1.6)
+#
+# @format-specific: #optional structure supplying additional format-specific
+# information (since 1.7)
+#
+# Since: 1.3
+#
+##
+
+{ 'type': 'ImageInfo',
+ 'data': {'filename': 'str', 'format': 'str', '*dirty-flag': 'bool',
+ '*actual-size': 'int', 'virtual-size': 'int',
+ '*cluster-size': 'int', '*encrypted': 'bool', '*compressed': 'bool',
+ '*backing-filename': 'str', '*full-backing-filename': 'str',
+ '*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'],
+ '*backing-image': 'ImageInfo',
+ '*format-specific': 'ImageInfoSpecific' } }
+
+##
+# @ImageCheck:
+#
+# Information about a QEMU image file check
+#
+# @filename: name of the image file checked
+#
+# @format: format of the image file checked
+#
+# @check-errors: number of unexpected errors occurred during check
+#
+# @image-end-offset: #optional offset (in bytes) where the image ends, this
+# field is present if the driver for the image format
+# supports it
+#
+# @corruptions: #optional number of corruptions found during the check if any
+#
+# @leaks: #optional number of leaks found during the check if any
+#
+# @corruptions-fixed: #optional number of corruptions fixed during the check
+# if any
+#
+# @leaks-fixed: #optional number of leaks fixed during the check if any
+#
+# @total-clusters: #optional total number of clusters, this field is present
+# if the driver for the image format supports it
+#
+# @allocated-clusters: #optional total number of allocated clusters, this
+# field is present if the driver for the image format
+# supports it
+#
+# @fragmented-clusters: #optional total number of fragmented clusters, this
+# field is present if the driver for the image format
+# supports it
+#
+# @compressed-clusters: #optional total number of compressed clusters, this
+# field is present if the driver for the image format
+# supports it
+#
+# Since: 1.4
+#
+##
+
+{ 'type': 'ImageCheck',
+ 'data': {'filename': 'str', 'format': 'str', 'check-errors': 'int',
+ '*image-end-offset': 'int', '*corruptions': 'int', '*leaks': 'int',
+ '*corruptions-fixed': 'int', '*leaks-fixed': 'int',
+ '*total-clusters': 'int', '*allocated-clusters': 'int',
+ '*fragmented-clusters': 'int', '*compressed-clusters': 'int' } }
+
+##
+# @BlockDeviceInfo:
+#
+# Information about the backing device for a block device.
+#
+# @file: the filename of the backing device
+#
+# @node-name: #optional the name of the block driver node (Since 2.0)
+#
+# @ro: true if the backing device was open read-only
+#
+# @drv: the name of the block format used to open the backing device. As of
+# 0.14.0 this can be: 'blkdebug', 'bochs', 'cloop', 'cow', 'dmg',
+# 'file', 'file', 'ftp', 'ftps', 'host_cdrom', 'host_device',
+# 'host_floppy', 'http', 'https', 'nbd', 'parallels', 'qcow',
+# 'qcow2', 'raw', 'tftp', 'vdi', 'vmdk', 'vpc', 'vvfat'
+#
+# @backing_file: #optional the name of the backing file (for copy-on-write)
+#
+# @backing_file_depth: number of files in the backing file chain (since: 1.2)
+#
+# @encrypted: true if the backing device is encrypted
+#
+# @encryption_key_missing: true if the backing device is encrypted but an
+# valid encryption key is missing
+#
+# @detect_zeroes: detect and optimize zero writes (Since 2.1)
+#
+# @bps: total throughput limit in bytes per second is specified
+#
+# @bps_rd: read throughput limit in bytes per second is specified
+#
+# @bps_wr: write throughput limit in bytes per second is specified
+#
+# @iops: total I/O operations per second is specified
+#
+# @iops_rd: read I/O operations per second is specified
+#
+# @iops_wr: write I/O operations per second is specified
+#
+# @image: the info of image used (since: 1.6)
+#
+# @bps_max: #optional total max in bytes (Since 1.7)
+#
+# @bps_rd_max: #optional read max in bytes (Since 1.7)
+#
+# @bps_wr_max: #optional write max in bytes (Since 1.7)
+#
+# @iops_max: #optional total I/O operations max (Since 1.7)
+#
+# @iops_rd_max: #optional read I/O operations max (Since 1.7)
+#
+# @iops_wr_max: #optional write I/O operations max (Since 1.7)
+#
+# @iops_size: #optional an I/O size in bytes (Since 1.7)
+#
+# Since: 0.14.0
+#
+##
+{ 'type': 'BlockDeviceInfo',
+ 'data': { 'file': 'str', '*node-name': 'str', 'ro': 'bool', 'drv': 'str',
+ '*backing_file': 'str', 'backing_file_depth': 'int',
+ 'encrypted': 'bool', 'encryption_key_missing': 'bool',
+ 'detect_zeroes': 'BlockdevDetectZeroesOptions',
+ 'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
+ 'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
+ 'image': 'ImageInfo',
+ '*bps_max': 'int', '*bps_rd_max': 'int',
+ '*bps_wr_max': 'int', '*iops_max': 'int',
+ '*iops_rd_max': 'int', '*iops_wr_max': 'int',
+ '*iops_size': 'int' } }
+
+##
+# @BlockDeviceIoStatus:
+#
+# An enumeration of block device I/O status.
+#
+# @ok: The last I/O operation has succeeded
+#
+# @failed: The last I/O operation has failed
+#
+# @nospace: The last I/O operation has failed due to a no-space condition
+#
+# Since: 1.0
+##
+{ 'enum': 'BlockDeviceIoStatus', 'data': [ 'ok', 'failed', 'nospace' ] }
+
+##
+# @BlockDeviceMapEntry:
+#
+# Entry in the metadata map of the device (returned by "qemu-img map")
+#
+# @start: Offset in the image of the first byte described by this entry
+# (in bytes)
+#
+# @length: Length of the range described by this entry (in bytes)
+#
+# @depth: Number of layers (0 = top image, 1 = top image's backing file, etc.)
+# before reaching one for which the range is allocated. The value is
+# in the range 0 to the depth of the image chain - 1.
+#
+# @zero: the sectors in this range read as zeros
+#
+# @data: reading the image will actually read data from a file (in particular,
+# if @offset is present this means that the sectors are not simply
+# preallocated, but contain actual data in raw format)
+#
+# @offset: if present, the image file stores the data for this range in
+# raw format at the given offset.
+#
+# Since 1.7
+##
+{ 'type': 'BlockDeviceMapEntry',
+ 'data': { 'start': 'int', 'length': 'int', 'depth': 'int', 'zero': 'bool',
+ 'data': 'bool', '*offset': 'int' } }
+
+##
+# @BlockDirtyInfo:
+#
+# Block dirty bitmap information.
+#
+# @count: number of dirty bytes according to the dirty bitmap
+#
+# @granularity: granularity of the dirty bitmap in bytes (since 1.4)
+#
+# Since: 1.3
+##
+{ 'type': 'BlockDirtyInfo',
+ 'data': {'count': 'int', 'granularity': 'int'} }
+
+##
+# @BlockInfo:
+#
+# Block device information. This structure describes a virtual device and
+# the backing device associated with it.
+#
+# @device: The device name associated with the virtual device.
+#
+# @type: This field is returned only for compatibility reasons, it should
+# not be used (always returns 'unknown')
+#
+# @removable: True if the device supports removable media.
+#
+# @locked: True if the guest has locked this device from having its media
+# removed
+#
+# @tray_open: #optional True if the device has a tray and it is open
+# (only present if removable is true)
+#
+# @dirty-bitmaps: #optional dirty bitmaps information (only present if the
+# driver has one or more dirty bitmaps) (Since 2.0)
+#
+# @io-status: #optional @BlockDeviceIoStatus. Only present if the device
+# supports it and the VM is configured to stop on errors
+#
+# @inserted: #optional @BlockDeviceInfo describing the device if media is
+# present
+#
+# Since: 0.14.0
+##
+{ 'type': 'BlockInfo',
+ 'data': {'device': 'str', 'type': 'str', 'removable': 'bool',
+ 'locked': 'bool', '*inserted': 'BlockDeviceInfo',
+ '*tray_open': 'bool', '*io-status': 'BlockDeviceIoStatus',
+ '*dirty-bitmaps': ['BlockDirtyInfo'] } }
+
+##
+# @query-block:
+#
+# Get a list of BlockInfo for all virtual block devices.
+#
+# Returns: a list of @BlockInfo describing each virtual block device
+#
+# Since: 0.14.0
+##
+{ 'command': 'query-block', 'returns': ['BlockInfo'] }
+
+##
+# @BlockDeviceStats:
+#
+# Statistics of a virtual block device or a block backing device.
+#
+# @rd_bytes: The number of bytes read by the device.
+#
+# @wr_bytes: The number of bytes written by the device.
+#
+# @rd_operations: The number of read operations performed by the device.
+#
+# @wr_operations: The number of write operations performed by the device.
+#
+# @flush_operations: The number of cache flush operations performed by the
+# device (since 0.15.0)
+#
+# @flush_total_time_ns: Total time spend on cache flushes in nano-seconds
+# (since 0.15.0).
+#
+# @wr_total_time_ns: Total time spend on writes in nano-seconds (since 0.15.0).
+#
+# @rd_total_time_ns: Total_time_spend on reads in nano-seconds (since 0.15.0).
+#
+# @wr_highest_offset: The offset after the greatest byte written to the
+# device. The intended use of this information is for
+# growable sparse files (like qcow2) that are used on top
+# of a physical device.
+#
+# Since: 0.14.0
+##
+{ 'type': 'BlockDeviceStats',
+ 'data': {'rd_bytes': 'int', 'wr_bytes': 'int', 'rd_operations': 'int',
+ 'wr_operations': 'int', 'flush_operations': 'int',
+ 'flush_total_time_ns': 'int', 'wr_total_time_ns': 'int',
+ 'rd_total_time_ns': 'int', 'wr_highest_offset': 'int' } }
+
+##
+# @BlockStats:
+#
+# Statistics of a virtual block device or a block backing device.
+#
+# @device: #optional If the stats are for a virtual block device, the name
+# corresponding to the virtual block device.
+#
+# @stats: A @BlockDeviceStats for the device.
+#
+# @parent: #optional This describes the file block device if it has one.
+#
+# @backing: #optional This describes the backing block device if it has one.
+# (Since 2.0)
+#
+# Since: 0.14.0
+##
+{ 'type': 'BlockStats',
+ 'data': {'*device': 'str', 'stats': 'BlockDeviceStats',
+ '*parent': 'BlockStats',
+ '*backing': 'BlockStats'} }
+
+##
+# @query-blockstats:
+#
+# Query the @BlockStats for all virtual block devices.
+#
+# Returns: A list of @BlockStats for each virtual block devices.
+#
+# Since: 0.14.0
+##
+{ 'command': 'query-blockstats', 'returns': ['BlockStats'] }
+
+##
+# @BlockdevOnError:
+#
+# An enumeration of possible behaviors for errors on I/O operations.
+# The exact meaning depends on whether the I/O was initiated by a guest
+# or by a block job
+#
+# @report: for guest operations, report the error to the guest;
+# for jobs, cancel the job
+#
+# @ignore: ignore the error, only report a QMP event (BLOCK_IO_ERROR
+# or BLOCK_JOB_ERROR)
+#
+# @enospc: same as @stop on ENOSPC, same as @report otherwise.
+#
+# @stop: for guest operations, stop the virtual machine;
+# for jobs, pause the job
+#
+# Since: 1.3
+##
+{ 'enum': 'BlockdevOnError',
+ 'data': ['report', 'ignore', 'enospc', 'stop'] }
+
+##
+# @MirrorSyncMode:
+#
+# An enumeration of possible behaviors for the initial synchronization
+# phase of storage mirroring.
+#
+# @top: copies data in the topmost image to the destination
+#
+# @full: copies data from all images to the destination
+#
+# @none: only copy data written from now on
+#
+# Since: 1.3
+##
+{ 'enum': 'MirrorSyncMode',
+ 'data': ['top', 'full', 'none'] }
+
+##
+# @BlockJobType:
+#
+# Type of a block job.
+#
+# @commit: block commit job type, see "block-commit"
+#
+# @stream: block stream job type, see "block-stream"
+#
+# @mirror: drive mirror job type, see "drive-mirror"
+#
+# @backup: drive backup job type, see "drive-backup"
+#
+# Since: 1.7
+##
+{ 'enum': 'BlockJobType',
+ 'data': ['commit', 'stream', 'mirror', 'backup'] }
+
+##
+# @BlockJobInfo:
+#
+# Information about a long-running block device operation.
+#
+# @type: the job type ('stream' for image streaming)
+#
+# @device: the block device name
+#
+# @len: the maximum progress value
+#
+# @busy: false if the job is known to be in a quiescent state, with
+# no pending I/O. Since 1.3.
+#
+# @paused: whether the job is paused or, if @busy is true, will
+# pause itself as soon as possible. Since 1.3.
+#
+# @offset: the current progress value
+#
+# @speed: the rate limit, bytes per second
+#
+# @io-status: the status of the job (since 1.3)
+#
+# Since: 1.1
+##
+{ 'type': 'BlockJobInfo',
+ 'data': {'type': 'str', 'device': 'str', 'len': 'int',
+ 'offset': 'int', 'busy': 'bool', 'paused': 'bool', 'speed': 'int',
+ 'io-status': 'BlockDeviceIoStatus'} }
+
+##
+# @query-block-jobs:
+#
+# Return information about long-running block device operations.
+#
+# Returns: a list of @BlockJobInfo for each active block job
+#
+# Since: 1.1
+##
+{ 'command': 'query-block-jobs', 'returns': ['BlockJobInfo'] }
+
+##
+# @block_passwd:
+#
+# This command sets the password of a block device that has not been open
+# with a password and requires one.
+#
+# The two cases where this can happen are a block device is created through
+# QEMU's initial command line or a block device is changed through the legacy
+# @change interface.
+#
+# In the event that the block device is created through the initial command
+# line, the VM will start in the stopped state regardless of whether '-S' is
+# used. The intention is for a management tool to query the block devices to
+# determine which ones are encrypted, set the passwords with this command, and
+# then start the guest with the @cont command.
+#
+# Either @device or @node-name must be set but not both.
+#
+# @device: #optional the name of the block backend device to set the password on
+#
+# @node-name: #optional graph node name to set the password on (Since 2.0)
+#
+# @password: the password to use for the device
+#
+# Returns: nothing on success
+# If @device is not a valid block device, DeviceNotFound
+# If @device is not encrypted, DeviceNotEncrypted
+#
+# Notes: Not all block formats support encryption and some that do are not
+# able to validate that a password is correct. Disk corruption may
+# occur if an invalid password is specified.
+#
+# Since: 0.14.0
+##
+{ 'command': 'block_passwd', 'data': {'*device': 'str',
+ '*node-name': 'str', 'password': 'str'} }
+
+##
+# @block_resize
+#
+# Resize a block image while a guest is running.
+#
+# Either @device or @node-name must be set but not both.
+#
+# @device: #optional the name of the device to get the image resized
+#
+# @node-name: #optional graph node name to get the image resized (Since 2.0)
+#
+# @size: new image size in bytes
+#
+# Returns: nothing on success
+# If @device is not a valid block device, DeviceNotFound
+#
+# Since: 0.14.0
+##
+{ 'command': 'block_resize', 'data': { '*device': 'str',
+ '*node-name': 'str',
+ 'size': 'int' }}
+
+##
+# @NewImageMode
+#
+# An enumeration that tells QEMU how to set the backing file path in
+# a new image file.
+#
+# @existing: QEMU should look for an existing image file.
+#
+# @absolute-paths: QEMU should create a new image with absolute paths
+# for the backing file. If there is no backing file available, the new
+# image will not be backed either.
+#
+# Since: 1.1
+##
+{ 'enum': 'NewImageMode',
+ 'data': [ 'existing', 'absolute-paths' ] }
+
+##
+# @BlockdevSnapshot
+#
+# Either @device or @node-name must be set but not both.
+#
+# @device: #optional the name of the device to generate the snapshot from.
+#
+# @node-name: #optional graph node name to generate the snapshot from (Since 2.0)
+#
+# @snapshot-file: the target of the new image. A new file will be created.
+#
+# @snapshot-node-name: #optional the graph node name of the new image (Since 2.0)
+#
+# @format: #optional the format of the snapshot image, default is 'qcow2'.
+#
+# @mode: #optional whether and how QEMU should create a new image, default is
+# 'absolute-paths'.
+##
+{ 'type': 'BlockdevSnapshot',
+ 'data': { '*device': 'str', '*node-name': 'str',
+ 'snapshot-file': 'str', '*snapshot-node-name': 'str',
+ '*format': 'str', '*mode': 'NewImageMode' } }
+
+##
+# @DriveBackup
+#
+# @device: the name of the device which should be copied.
+#
+# @target: the target of the new image. If the file exists, or if it
+# is a device, the existing file/device will be used as the new
+# destination. If it does not exist, a new file will be created.
+#
+# @format: #optional the format of the new destination, default is to
+# probe if @mode is 'existing', else the format of the source
+#
+# @sync: what parts of the disk image should be copied to the destination
+# (all the disk, only the sectors allocated in the topmost image, or
+# only new I/O).
+#
+# @mode: #optional whether and how QEMU should create a new image, default is
+# 'absolute-paths'.
+#
+# @speed: #optional the maximum speed, in bytes per second
+#
+# @on-source-error: #optional the action to take on an error on the source,
+# default 'report'. 'stop' and 'enospc' can only be used
+# if the block device supports io-status (see BlockInfo).
+#
+# @on-target-error: #optional the action to take on an error on the target,
+# default 'report' (no limitations, since this applies to
+# a different block device than @device).
+#
+# Note that @on-source-error and @on-target-error only affect background I/O.
+# If an error occurs during a guest write request, the device's rerror/werror
+# actions will be used.
+#
+# Since: 1.6
+##
+{ 'type': 'DriveBackup',
+ 'data': { 'device': 'str', 'target': 'str', '*format': 'str',
+ 'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
+ '*speed': 'int',
+ '*on-source-error': 'BlockdevOnError',
+ '*on-target-error': 'BlockdevOnError' } }
+
+##
+# @blockdev-snapshot-sync
+#
+# Generates a synchronous snapshot of a block device.
+#
+# For the arguments, see the documentation of BlockdevSnapshot.
+#
+# Returns: nothing on success
+# If @device is not a valid block device, DeviceNotFound
+#
+# Since 0.14.0
+##
+{ 'command': 'blockdev-snapshot-sync',
+ 'data': 'BlockdevSnapshot' }
+
+##
+# @block-commit
+#
+# Live commit of data from overlay image nodes into backing nodes - i.e.,
+# writes data between 'top' and 'base' into 'base'.
+#
+# @device: the name of the device
+#
+# @base: #optional The file name of the backing image to write data into.
+# If not specified, this is the deepest backing image
+#
+# @top: The file name of the backing image within the image chain,
+# which contains the topmost data to be committed down.
+#
+# If top == base, that is an error.
+# If top == active, the job will not be completed by itself,
+# user needs to complete the job with the block-job-complete
+# command after getting the ready event. (Since 2.0)
+#
+# If the base image is smaller than top, then the base image
+# will be resized to be the same size as top. If top is
+# smaller than the base image, the base will not be
+# truncated. If you want the base image size to match the
+# size of the smaller top, you can safely truncate it
+# yourself once the commit operation successfully completes.
+#
+#
+# @speed: #optional the maximum speed, in bytes per second
+#
+# Returns: Nothing on success
+# If commit or stream is already active on this device, DeviceInUse
+# If @device does not exist, DeviceNotFound
+# If image commit is not supported by this device, NotSupported
+# If @base or @top is invalid, a generic error is returned
+# If @speed is invalid, InvalidParameter
+#
+# Since: 1.3
+#
+##
+{ 'command': 'block-commit',
+ 'data': { 'device': 'str', '*base': 'str', 'top': 'str',
+ '*speed': 'int' } }
+
+##
+# @drive-backup
+#
+# Start a point-in-time copy of a block device to a new destination. The
+# status of ongoing drive-backup operations can be checked with
+# query-block-jobs where the BlockJobInfo.type field has the value 'backup'.
+# The operation can be stopped before it has completed using the
+# block-job-cancel command.
+#
+# For the arguments, see the documentation of DriveBackup.
+#
+# Returns: nothing on success
+# If @device is not a valid block device, DeviceNotFound
+#
+# Since 1.6
+##
+{ 'command': 'drive-backup', 'data': 'DriveBackup' }
+
+##
+# @query-named-block-nodes
+#
+# Get the named block driver list
+#
+# Returns: the list of BlockDeviceInfo
+#
+# Since 2.0
+##
+{ 'command': 'query-named-block-nodes', 'returns': [ 'BlockDeviceInfo' ] }
+
+##
+# @drive-mirror
+#
+# Start mirroring a block device's writes to a new destination.
+#
+# @device: the name of the device whose writes should be mirrored.
+#
+# @target: the target of the new image. If the file exists, or if it
+# is a device, the existing file/device will be used as the new
+# destination. If it does not exist, a new file will be created.
+#
+# @format: #optional the format of the new destination, default is to
+# probe if @mode is 'existing', else the format of the source
+#
+# @mode: #optional whether and how QEMU should create a new image, default is
+# 'absolute-paths'.
+#
+# @speed: #optional the maximum speed, in bytes per second
+#
+# @sync: what parts of the disk image should be copied to the destination
+# (all the disk, only the sectors allocated in the topmost image, or
+# only new I/O).
+#
+# @granularity: #optional granularity of the dirty bitmap, default is 64K
+# if the image format doesn't have clusters, 4K if the clusters
+# are smaller than that, else the cluster size. Must be a
+# power of 2 between 512 and 64M (since 1.4).
+#
+# @buf-size: #optional maximum amount of data in flight from source to
+# target (since 1.4).
+#
+# @on-source-error: #optional the action to take on an error on the source,
+# default 'report'. 'stop' and 'enospc' can only be used
+# if the block device supports io-status (see BlockInfo).
+#
+# @on-target-error: #optional the action to take on an error on the target,
+# default 'report' (no limitations, since this applies to
+# a different block device than @device).
+#
+# Returns: nothing on success
+# If @device is not a valid block device, DeviceNotFound
+#
+# Since 1.3
+##
+{ 'command': 'drive-mirror',
+ 'data': { 'device': 'str', 'target': 'str', '*format': 'str',
+ 'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
+ '*speed': 'int', '*granularity': 'uint32',
+ '*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
+ '*on-target-error': 'BlockdevOnError' } }
+
+##
+# @block_set_io_throttle:
+#
+# Change I/O throttle limits for a block drive.
+#
+# @device: The name of the device
+#
+# @bps: total throughput limit in bytes per second
+#
+# @bps_rd: read throughput limit in bytes per second
+#
+# @bps_wr: write throughput limit in bytes per second
+#
+# @iops: total I/O operations per second
+#
+# @ops_rd: read I/O operations per second
+#
+# @iops_wr: write I/O operations per second
+#
+# @bps_max: #optional total max in bytes (Since 1.7)
+#
+# @bps_rd_max: #optional read max in bytes (Since 1.7)
+#
+# @bps_wr_max: #optional write max in bytes (Since 1.7)
+#
+# @iops_max: #optional total I/O operations max (Since 1.7)
+#
+# @iops_rd_max: #optional read I/O operations max (Since 1.7)
+#
+# @iops_wr_max: #optional write I/O operations max (Since 1.7)
+#
+# @iops_size: #optional an I/O size in bytes (Since 1.7)
+#
+# Returns: Nothing on success
+# If @device is not a valid block device, DeviceNotFound
+#
+# Since: 1.1
+##
+{ 'command': 'block_set_io_throttle',
+ 'data': { 'device': 'str', 'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
+ 'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
+ '*bps_max': 'int', '*bps_rd_max': 'int',
+ '*bps_wr_max': 'int', '*iops_max': 'int',
+ '*iops_rd_max': 'int', '*iops_wr_max': 'int',
+ '*iops_size': 'int' } }
+
+##
+# @block-stream:
+#
+# Copy data from a backing file into a block device.
+#
+# The block streaming operation is performed in the background until the entire
+# backing file has been copied. This command returns immediately once streaming
+# has started. The status of ongoing block streaming operations can be checked
+# with query-block-jobs. The operation can be stopped before it has completed
+# using the block-job-cancel command.
+#
+# If a base file is specified then sectors are not copied from that base file and
+# its backing chain. When streaming completes the image file will have the base
+# file as its backing file. This can be used to stream a subset of the backing
+# file chain instead of flattening the entire image.
+#
+# On successful completion the image file is updated to drop the backing file
+# and the BLOCK_JOB_COMPLETED event is emitted.
+#
+# @device: the device name
+#
+# @base: #optional the common backing file name
+#
+# @speed: #optional the maximum speed, in bytes per second
+#
+# @on-error: #optional the action to take on an error (default report).
+# 'stop' and 'enospc' can only be used if the block device
+# supports io-status (see BlockInfo). Since 1.3.
+#
+# Returns: Nothing on success
+# If @device does not exist, DeviceNotFound
+#
+# Since: 1.1
+##
+{ 'command': 'block-stream',
+ 'data': { 'device': 'str', '*base': 'str', '*speed': 'int',
+ '*on-error': 'BlockdevOnError' } }
+
+##
+# @block-job-set-speed:
+#
+# Set maximum speed for a background block operation.
+#
+# This command can only be issued when there is an active block job.
+#
+# Throttling can be disabled by setting the speed to 0.
+#
+# @device: the device name
+#
+# @speed: the maximum speed, in bytes per second, or 0 for unlimited.
+# Defaults to 0.
+#
+# Returns: Nothing on success
+# If no background operation is active on this device, DeviceNotActive
+#
+# Since: 1.1
+##
+{ 'command': 'block-job-set-speed',
+ 'data': { 'device': 'str', 'speed': 'int' } }
+
+##
+# @block-job-cancel:
+#
+# Stop an active background block operation.
+#
+# This command returns immediately after marking the active background block
+# operation for cancellation. It is an error to call this command if no
+# operation is in progress.
+#
+# The operation will cancel as soon as possible and then emit the
+# BLOCK_JOB_CANCELLED event. Before that happens the job is still visible when
+# enumerated using query-block-jobs.
+#
+# For streaming, the image file retains its backing file unless the streaming
+# operation happens to complete just as it is being cancelled. A new streaming
+# operation can be started at a later time to finish copying all data from the
+# backing file.
+#
+# @device: the device name
+#
+# @force: #optional whether to allow cancellation of a paused job (default
+# false). Since 1.3.
+#
+# Returns: Nothing on success
+# If no background operation is active on this device, DeviceNotActive
+#
+# Since: 1.1
+##
+{ 'command': 'block-job-cancel', 'data': { 'device': 'str', '*force': 'bool' } }
+
+##
+# @block-job-pause:
+#
+# Pause an active background block operation.
+#
+# This command returns immediately after marking the active background block
+# operation for pausing. It is an error to call this command if no
+# operation is in progress. Pausing an already paused job has no cumulative
+# effect; a single block-job-resume command will resume the job.
+#
+# The operation will pause as soon as possible. No event is emitted when
+# the operation is actually paused. Cancelling a paused job automatically
+# resumes it.
+#
+# @device: the device name
+#
+# Returns: Nothing on success
+# If no background operation is active on this device, DeviceNotActive
+#
+# Since: 1.3
+##
+{ 'command': 'block-job-pause', 'data': { 'device': 'str' } }
+
+##
+# @block-job-resume:
+#
+# Resume an active background block operation.
+#
+# This command returns immediately after resuming a paused background block
+# operation. It is an error to call this command if no operation is in
+# progress. Resuming an already running job is not an error.
+#
+# This command also clears the error status of the job.
+#
+# @device: the device name
+#
+# Returns: Nothing on success
+# If no background operation is active on this device, DeviceNotActive
+#
+# Since: 1.3
+##
+{ 'command': 'block-job-resume', 'data': { 'device': 'str' } }
+
+##
+# @block-job-complete:
+#
+# Manually trigger completion of an active background block operation. This
+# is supported for drive mirroring, where it also switches the device to
+# write to the target path only. The ability to complete is signaled with
+# a BLOCK_JOB_READY event.
+#
+# This command completes an active background block operation synchronously.
+# The ordering of this command's return with the BLOCK_JOB_COMPLETED event
+# is not defined. Note that if an I/O error occurs during the processing of
+# this command: 1) the command itself will fail; 2) the error will be processed
+# according to the rerror/werror arguments that were specified when starting
+# the operation.
+#
+# A cancelled or paused job cannot be completed.
+#
+# @device: the device name
+#
+# Returns: Nothing on success
+# If no background operation is active on this device, DeviceNotActive
+#
+# Since: 1.3
+##
+{ 'command': 'block-job-complete', 'data': { 'device': 'str' } }
+
+##
+# @BlockdevDiscardOptions
+#
+# Determines how to handle discard requests.
+#
+# @ignore: Ignore the request
+# @unmap: Forward as an unmap request
+#
+# Since: 1.7
+##
+{ 'enum': 'BlockdevDiscardOptions',
+ 'data': [ 'ignore', 'unmap' ] }
+
+##
+# @BlockdevDetectZeroesOptions
+#
+# Describes the operation mode for the automatic conversion of plain
+# zero writes by the OS to driver specific optimized zero write commands.
+#
+# @off: Disabled (default)
+# @on: Enabled
+# @unmap: Enabled and even try to unmap blocks if possible. This requires
+# also that @BlockdevDiscardOptions is set to unmap for this device.
+#
+# Since: 2.1
+##
+{ 'enum': 'BlockdevDetectZeroesOptions',
+ 'data': [ 'off', 'on', 'unmap' ] }
+
+##
+# @BlockdevAioOptions
+#
+# Selects the AIO backend to handle I/O requests
+#
+# @threads: Use qemu's thread pool
+# @native: Use native AIO backend (only Linux and Windows)
+#
+# Since: 1.7
+##
+{ 'enum': 'BlockdevAioOptions',
+ 'data': [ 'threads', 'native' ] }
+
+##
+# @BlockdevCacheOptions
+#
+# Includes cache-related options for block devices
+#
+# @writeback: #optional enables writeback mode for any caches (default: true)
+# @direct: #optional enables use of O_DIRECT (bypass the host page cache;
+# default: false)
+# @no-flush: #optional ignore any flush requests for the device (default:
+# false)
+#
+# Since: 1.7
+##
+{ 'type': 'BlockdevCacheOptions',
+ 'data': { '*writeback': 'bool',
+ '*direct': 'bool',
+ '*no-flush': 'bool' } }
+
+##
+# @BlockdevDriver
+#
+# Drivers that are supported in block device operations.
+#
+# @host_device, @host_cdrom, @host_floppy: Since 2.1
+#
+# Since: 2.0
+##
+{ 'enum': 'BlockdevDriver',
+ 'data': [ 'file', 'host_device', 'host_cdrom', 'host_floppy',
+ 'http', 'https', 'ftp', 'ftps', 'tftp', 'vvfat', 'blkdebug',
+ 'blkverify', 'bochs', 'cloop', 'cow', 'dmg', 'parallels', 'qcow',
+ 'qcow2', 'qed', 'raw', 'vdi', 'vhdx', 'vmdk', 'vpc', 'quorum' ] }
+
+##
+# @BlockdevOptionsBase
+#
+# Options that are available for all block devices, independent of the block
+# driver.
+#
+# @driver: block driver name
+# @id: #optional id by which the new block device can be referred to.
+# This is a required option on the top level of blockdev-add, and
+# currently not allowed on any other level.
+# @node-name: #optional the name of a block driver state node (Since 2.0)
+# @discard: #optional discard-related options (default: ignore)
+# @cache: #optional cache-related options
+# @aio: #optional AIO backend (default: threads)
+# @rerror: #optional how to handle read errors on the device
+# (default: report)
+# @werror: #optional how to handle write errors on the device
+# (default: enospc)
+# @read-only: #optional whether the block device should be read-only
+# (default: false)
+# @detect-zeroes: #optional detect and optimize zero writes (Since 2.1)
+# (default: off)
+#
+# Since: 1.7
+##
+{ 'type': 'BlockdevOptionsBase',
+ 'data': { 'driver': 'BlockdevDriver',
+ '*id': 'str',
+ '*node-name': 'str',
+ '*discard': 'BlockdevDiscardOptions',
+ '*cache': 'BlockdevCacheOptions',
+ '*aio': 'BlockdevAioOptions',
+ '*rerror': 'BlockdevOnError',
+ '*werror': 'BlockdevOnError',
+ '*read-only': 'bool',
+ '*detect-zeroes': 'BlockdevDetectZeroesOptions' } }
+
+##
+# @BlockdevOptionsFile
+#
+# Driver specific block device options for the file backend and similar
+# protocols.
+#
+# @filename: path to the image file
+#
+# Since: 1.7
+##
+{ 'type': 'BlockdevOptionsFile',
+ 'data': { 'filename': 'str' } }
+
+##
+# @BlockdevOptionsVVFAT
+#
+# Driver specific block device options for the vvfat protocol.
+#
+# @dir: directory to be exported as FAT image
+# @fat-type: #optional FAT type: 12, 16 or 32
+# @floppy: #optional whether to export a floppy image (true) or
+# partitioned hard disk (false; default)
+# @rw: #optional whether to allow write operations (default: false)
+#
+# Since: 1.7
+##
+{ 'type': 'BlockdevOptionsVVFAT',
+ 'data': { 'dir': 'str', '*fat-type': 'int', '*floppy': 'bool',
+ '*rw': 'bool' } }
+
+##
+# @BlockdevOptionsGenericFormat
+#
+# Driver specific block device options for image format that have no option
+# besides their data source.
+#
+# @file: reference to or definition of the data source block device
+#
+# Since: 1.7
+##
+{ 'type': 'BlockdevOptionsGenericFormat',
+ 'data': { 'file': 'BlockdevRef' } }
+
+##
+# @BlockdevOptionsGenericCOWFormat
+#
+# Driver specific block device options for image format that have no option
+# besides their data source and an optional backing file.
+#
+# @backing: #optional reference to or definition of the backing file block
+# device (if missing, taken from the image file content). It is
+# allowed to pass an empty string here in order to disable the
+# default backing file.
+#
+# Since: 1.7
+##
+{ 'type': 'BlockdevOptionsGenericCOWFormat',
+ 'base': 'BlockdevOptionsGenericFormat',
+ 'data': { '*backing': 'BlockdevRef' } }
+
+##
+# @BlockdevOptionsQcow2
+#
+# Driver specific block device options for qcow2.
+#
+# @lazy-refcounts: #optional whether to enable the lazy refcounts
+# feature (default is taken from the image file)
+#
+# @pass-discard-request: #optional whether discard requests to the qcow2
+# device should be forwarded to the data source
+#
+# @pass-discard-snapshot: #optional whether discard requests for the data source
+# should be issued when a snapshot operation (e.g.
+# deleting a snapshot) frees clusters in the qcow2 file
+#
+# @pass-discard-other: #optional whether discard requests for the data source
+# should be issued on other occasions where a cluster
+# gets freed
+#
+# Since: 1.7
+##
+{ 'type': 'BlockdevOptionsQcow2',
+ 'base': 'BlockdevOptionsGenericCOWFormat',
+ 'data': { '*lazy-refcounts': 'bool',
+ '*pass-discard-request': 'bool',
+ '*pass-discard-snapshot': 'bool',
+ '*pass-discard-other': 'bool' } }
+
+##
+# @BlkdebugEvent
+#
+# Trigger events supported by blkdebug.
+##
+{ 'enum': 'BlkdebugEvent',
+ 'data': [ 'l1_update', 'l1_grow.alloc_table', 'l1_grow.write_table',
+ 'l1_grow.activate_table', 'l2_load', 'l2_update',
+ 'l2_update_compressed', 'l2_alloc.cow_read', 'l2_alloc.write',
+ 'read_aio', 'read_backing_aio', 'read_compressed', 'write_aio',
+ 'write_compressed', 'vmstate_load', 'vmstate_save', 'cow_read',
+ 'cow_write', 'reftable_load', 'reftable_grow', 'reftable_update',
+ 'refblock_load', 'refblock_update', 'refblock_update_part',
+ 'refblock_alloc', 'refblock_alloc.hookup', 'refblock_alloc.write',
+ 'refblock_alloc.write_blocks', 'refblock_alloc.write_table',
+ 'refblock_alloc.switch_table', 'cluster_alloc',
+ 'cluster_alloc_bytes', 'cluster_free', 'flush_to_os',
+ 'flush_to_disk' ] }
+
+##
+# @BlkdebugInjectErrorOptions
+#
+# Describes a single error injection for blkdebug.
+#
+# @event: trigger event
+#
+# @state: #optional the state identifier blkdebug needs to be in to
+# actually trigger the event; defaults to "any"
+#
+# @errno: #optional error identifier (errno) to be returned; defaults to
+# EIO
+#
+# @sector: #optional specifies the sector index which has to be affected
+# in order to actually trigger the event; defaults to "any
+# sector"
+#
+# @once: #optional disables further events after this one has been
+# triggered; defaults to false
+#
+# @immediately: #optional fail immediately; defaults to false
+#
+# Since: 2.0
+##
+{ 'type': 'BlkdebugInjectErrorOptions',
+ 'data': { 'event': 'BlkdebugEvent',
+ '*state': 'int',
+ '*errno': 'int',
+ '*sector': 'int',
+ '*once': 'bool',
+ '*immediately': 'bool' } }
+
+##
+# @BlkdebugSetStateOptions
+#
+# Describes a single state-change event for blkdebug.
+#
+# @event: trigger event
+#
+# @state: #optional the current state identifier blkdebug needs to be in;
+# defaults to "any"
+#
+# @new_state: the state identifier blkdebug is supposed to assume if
+# this event is triggered
+#
+# Since: 2.0
+##
+{ 'type': 'BlkdebugSetStateOptions',
+ 'data': { 'event': 'BlkdebugEvent',
+ '*state': 'int',
+ 'new_state': 'int' } }
+
+##
+# @BlockdevOptionsBlkdebug
+#
+# Driver specific block device options for blkdebug.
+#
+# @image: underlying raw block device (or image file)
+#
+# @config: #optional filename of the configuration file
+#
+# @align: #optional required alignment for requests in bytes
+#
+# @inject-error: #optional array of error injection descriptions
+#
+# @set-state: #optional array of state-change descriptions
+#
+# Since: 2.0
+##
+{ 'type': 'BlockdevOptionsBlkdebug',
+ 'data': { 'image': 'BlockdevRef',
+ '*config': 'str',
+ '*align': 'int',
+ '*inject-error': ['BlkdebugInjectErrorOptions'],
+ '*set-state': ['BlkdebugSetStateOptions'] } }
+
+##
+# @BlockdevOptionsBlkverify
+#
+# Driver specific block device options for blkverify.
+#
+# @test: block device to be tested
+#
+# @raw: raw image used for verification
+#
+# Since: 2.0
+##
+{ 'type': 'BlockdevOptionsBlkverify',
+ 'data': { 'test': 'BlockdevRef',
+ 'raw': 'BlockdevRef' } }
+
+##
+# @BlockdevOptionsQuorum
+#
+# Driver specific block device options for Quorum
+#
+# @blkverify: #optional true if the driver must print content mismatch
+# set to false by default
+#
+# @children: the children block devices to use
+#
+# @vote-threshold: the vote limit under which a read will fail
+#
+# Since: 2.0
+##
+{ 'type': 'BlockdevOptionsQuorum',
+ 'data': { '*blkverify': 'bool',
+ 'children': [ 'BlockdevRef' ],
+ 'vote-threshold': 'int' } }
+
+##
+# @BlockdevOptions
+#
+# Options for creating a block device.
+#
+# Since: 1.7
+##
+{ 'union': 'BlockdevOptions',
+ 'base': 'BlockdevOptionsBase',
+ 'discriminator': 'driver',
+ 'data': {
+ 'file': 'BlockdevOptionsFile',
+ 'host_device':'BlockdevOptionsFile',
+ 'host_cdrom': 'BlockdevOptionsFile',
+ 'host_floppy':'BlockdevOptionsFile',
+ 'http': 'BlockdevOptionsFile',
+ 'https': 'BlockdevOptionsFile',
+ 'ftp': 'BlockdevOptionsFile',
+ 'ftps': 'BlockdevOptionsFile',
+ 'tftp': 'BlockdevOptionsFile',
+# TODO gluster: Wait for structured options
+# TODO iscsi: Wait for structured options
+# TODO nbd: Should take InetSocketAddress for 'host'?
+# TODO nfs: Wait for structured options
+# TODO rbd: Wait for structured options
+# TODO sheepdog: Wait for structured options
+# TODO ssh: Should take InetSocketAddress for 'host'?
+ 'vvfat': 'BlockdevOptionsVVFAT',
+ 'blkdebug': 'BlockdevOptionsBlkdebug',
+ 'blkverify': 'BlockdevOptionsBlkverify',
+ 'bochs': 'BlockdevOptionsGenericFormat',
+ 'cloop': 'BlockdevOptionsGenericFormat',
+ 'cow': 'BlockdevOptionsGenericCOWFormat',
+ 'dmg': 'BlockdevOptionsGenericFormat',
+ 'parallels': 'BlockdevOptionsGenericFormat',
+ 'qcow': 'BlockdevOptionsGenericCOWFormat',
+ 'qcow2': 'BlockdevOptionsQcow2',
+ 'qed': 'BlockdevOptionsGenericCOWFormat',
+ 'raw': 'BlockdevOptionsGenericFormat',
+ 'vdi': 'BlockdevOptionsGenericFormat',
+ 'vhdx': 'BlockdevOptionsGenericFormat',
+ 'vmdk': 'BlockdevOptionsGenericCOWFormat',
+ 'vpc': 'BlockdevOptionsGenericFormat',
+ 'quorum': 'BlockdevOptionsQuorum'
+ } }
+
+##
+# @BlockdevRef
+#
+# Reference to a block device.
+#
+# @definition: defines a new block device inline
+# @reference: references the ID of an existing block device. An
+# empty string means that no block device should be
+# referenced.
+#
+# Since: 1.7
+##
+{ 'union': 'BlockdevRef',
+ 'discriminator': {},
+ 'data': { 'definition': 'BlockdevOptions',
+ 'reference': 'str' } }
+
+##
+# @blockdev-add:
+#
+# Creates a new block device.
+#
+# @options: block device options for the new device
+#
+# Since: 1.7
+##
+{ 'command': 'blockdev-add', 'data': { 'options': 'BlockdevOptions' } }
+
diff --git a/qapi/block.json b/qapi/block.json
new file mode 100644
index 0000000000..61c463ab05
--- /dev/null
+++ b/qapi/block.json
@@ -0,0 +1,166 @@
+# -*- Mode: Python -*-
+#
+# QAPI block definitions (vm related)
+
+# QAPI block core definitions
+{ 'include': 'block-core.json' }
+
+##
+# BiosAtaTranslation:
+#
+# Policy that BIOS should use to interpret cylinder/head/sector
+# addresses. Note that Bochs BIOS and SeaBIOS will not actually
+# translate logical CHS to physical; instead, they will use logical
+# block addressing.
+#
+# @auto: If cylinder/heads/sizes are passed, choose between none and LBA
+# depending on the size of the disk. If they are not passed,
+# choose none if QEMU can guess that the disk had 16 or fewer
+# heads, large if QEMU can guess that the disk had 131072 or
+# fewer tracks across all heads (i.e. cylinders*heads<131072),
+# otherwise LBA.
+#
+# @none: The physical disk geometry is equal to the logical geometry.
+#
+# @lba: Assume 63 sectors per track and one of 16, 32, 64, 128 or 255
+# heads (if fewer than 255 are enough to cover the whole disk
+# with 1024 cylinders/head). The number of cylinders/head is
+# then computed based on the number of sectors and heads.
+#
+# @large: The number of cylinders per head is scaled down to 1024
+# by correspondingly scaling up the number of heads.
+#
+# @rechs: Same as @large, but first convert a 16-head geometry to
+# 15-head, by proportionally scaling up the number of
+# cylinders/head.
+#
+# Since: 2.0
+##
+{ 'enum': 'BiosAtaTranslation',
+ 'data': ['auto', 'none', 'lba', 'large', 'rechs']}
+
+##
+# @BlockdevSnapshotInternal
+#
+# @device: the name of the device to generate the snapshot from
+#
+# @name: the name of the internal snapshot to be created
+#
+# Notes: In transaction, if @name is empty, or any snapshot matching @name
+# exists, the operation will fail. Only some image formats support it,
+# for example, qcow2, rbd, and sheepdog.
+#
+# Since: 1.7
+##
+{ 'type': 'BlockdevSnapshotInternal',
+ 'data': { 'device': 'str', 'name': 'str' } }
+
+##
+# @blockdev-snapshot-internal-sync
+#
+# Synchronously take an internal snapshot of a block device, when the format
+# of the image used supports it.
+#
+# For the arguments, see the documentation of BlockdevSnapshotInternal.
+#
+# Returns: nothing on success
+# If @device is not a valid block device, DeviceNotFound
+# If any snapshot matching @name exists, or @name is empty,
+# GenericError
+# If the format of the image used does not support it,
+# BlockFormatFeatureNotSupported
+#
+# Since 1.7
+##
+{ 'command': 'blockdev-snapshot-internal-sync',
+ 'data': 'BlockdevSnapshotInternal' }
+
+##
+# @blockdev-snapshot-delete-internal-sync
+#
+# Synchronously delete an internal snapshot of a block device, when the format
+# of the image used support it. The snapshot is identified by name or id or
+# both. One of the name or id is required. Return SnapshotInfo for the
+# successfully deleted snapshot.
+#
+# @device: the name of the device to delete the snapshot from
+#
+# @id: optional the snapshot's ID to be deleted
+#
+# @name: optional the snapshot's name to be deleted
+#
+# Returns: SnapshotInfo on success
+# If @device is not a valid block device, DeviceNotFound
+# If snapshot not found, GenericError
+# If the format of the image used does not support it,
+# BlockFormatFeatureNotSupported
+# If @id and @name are both not specified, GenericError
+#
+# Since 1.7
+##
+{ 'command': 'blockdev-snapshot-delete-internal-sync',
+ 'data': { 'device': 'str', '*id': 'str', '*name': 'str'},
+ 'returns': 'SnapshotInfo' }
+
+##
+# @eject:
+#
+# Ejects a device from a removable drive.
+#
+# @device: The name of the device
+#
+# @force: @optional If true, eject regardless of whether the drive is locked.
+# If not specified, the default value is false.
+#
+# Returns: Nothing on success
+# If @device is not a valid block device, DeviceNotFound
+#
+# Notes: Ejecting a device will no media results in success
+#
+# Since: 0.14.0
+##
+{ 'command': 'eject', 'data': {'device': 'str', '*force': 'bool'} }
+
+##
+# @nbd-server-start:
+#
+# Start an NBD server listening on the given host and port. Block
+# devices can then be exported using @nbd-server-add. The NBD
+# server will present them as named exports; for example, another
+# QEMU instance could refer to them as "nbd:HOST:PORT:exportname=NAME".
+#
+# @addr: Address on which to listen.
+#
+# Returns: error if the server is already running.
+#
+# Since: 1.3.0
+##
+{ 'command': 'nbd-server-start',
+ 'data': { 'addr': 'SocketAddress' } }
+
+##
+# @nbd-server-add:
+#
+# Export a device to QEMU's embedded NBD server.
+#
+# @device: Block device to be exported
+#
+# @writable: Whether clients should be able to write to the device via the
+# NBD connection (default false). #optional
+#
+# Returns: error if the device is already marked for export.
+#
+# Since: 1.3.0
+##
+{ 'command': 'nbd-server-add', 'data': {'device': 'str', '*writable': 'bool'} }
+
+##
+# @nbd-server-stop:
+#
+# Stop QEMU's embedded NBD server, and unregister all devices previously
+# added via @nbd-server-add.
+#
+# Since: 1.3.0
+##
+{ 'command': 'nbd-server-stop' }
+
diff --git a/qapi/common.json b/qapi/common.json
new file mode 100644
index 0000000000..4e9a21f2f6
--- /dev/null
+++ b/qapi/common.json
@@ -0,0 +1,89 @@
+# -*- Mode: Python -*-
+#
+# QAPI common definitions
+
+##
+# @ErrorClass
+#
+# QEMU error classes
+#
+# @GenericError: this is used for errors that don't require a specific error
+# class. This should be the default case for most errors
+#
+# @CommandNotFound: the requested command has not been found
+#
+# @DeviceEncrypted: the requested operation can't be fulfilled because the
+# selected device is encrypted
+#
+# @DeviceNotActive: a device has failed to be become active
+#
+# @DeviceNotFound: the requested device has not been found
+#
+# @KVMMissingCap: the requested operation can't be fulfilled because a
+# required KVM capability is missing
+#
+# Since: 1.2
+##
+{ 'enum': 'ErrorClass',
+ 'data': [ 'GenericError', 'CommandNotFound', 'DeviceEncrypted',
+ 'DeviceNotActive', 'DeviceNotFound', 'KVMMissingCap' ] }
+
+##
+# @VersionInfo:
+#
+# A description of QEMU's version.
+#
+# @qemu.major: The major version of QEMU
+#
+# @qemu.minor: The minor version of QEMU
+#
+# @qemu.micro: The micro version of QEMU. By current convention, a micro
+# version of 50 signifies a development branch. A micro version
+# greater than or equal to 90 signifies a release candidate for
+# the next minor version. A micro version of less than 50
+# signifies a stable release.
+#
+# @package: QEMU will always set this field to an empty string. Downstream
+# versions of QEMU should set this to a non-empty string. The
+# exact format depends on the downstream however it highly
+# recommended that a unique name is used.
+#
+# Since: 0.14.0
+##
+{ 'type': 'VersionInfo',
+ 'data': {'qemu': {'major': 'int', 'minor': 'int', 'micro': 'int'},
+ 'package': 'str'} }
+
+##
+# @query-version:
+#
+# Returns the current version of QEMU.
+#
+# Returns: A @VersionInfo object describing the current version of QEMU.
+#
+# Since: 0.14.0
+##
+{ 'command': 'query-version', 'returns': 'VersionInfo' }
+
+##
+# @CommandInfo:
+#
+# Information about a QMP command
+#
+# @name: The command name
+#
+# Since: 0.14.0
+##
+{ 'type': 'CommandInfo', 'data': {'name': 'str'} }
+
+##
+# @query-commands:
+#
+# Return a list of supported QMP commands by this server
+#
+# Returns: A list of @CommandInfo for all supported commands
+#
+# Since: 0.14.0
+##
+{ 'command': 'query-commands', 'returns': ['CommandInfo'] }
+
diff --git a/qemu-img.c b/qemu-img.c
index b3d2bc6f02..aa89ba21fd 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -580,10 +580,11 @@ static int collect_image_check(BlockDriverState *bs,
/*
* Checks an image for consistency. Exit codes:
*
- * 0 - Check completed, image is good
- * 1 - Check not completed because of internal errors
- * 2 - Check completed, image is corrupted
- * 3 - Check completed, image has leaked clusters, but is good otherwise
+ * 0 - Check completed, image is good
+ * 1 - Check not completed because of internal errors
+ * 2 - Check completed, image is corrupted
+ * 3 - Check completed, image has leaked clusters, but is good otherwise
+ * 63 - Checks are not supported by the image format
*/
static int img_check(int argc, char **argv)
{
diff --git a/qemu-img.texi b/qemu-img.texi
index f84590ebf0..c68b54148a 100644
--- a/qemu-img.texi
+++ b/qemu-img.texi
@@ -126,6 +126,29 @@ wrong fix or hiding corruption that has already occurred.
Only the formats @code{qcow2}, @code{qed} and @code{vdi} support
consistency checks.
+In case the image does not have any inconsistencies, check exits with @code{0}.
+Other exit codes indicate the kind of inconsistency found or if another error
+occurred. The following table summarizes all exit codes of the check subcommand:
+
+@table @option
+
+@item 0
+Check completed, the image is (now) consistent
+@item 1
+Check not completed because of internal errors
+@item 2
+Check completed, image is corrupted
+@item 3
+Check completed, image has leaked clusters, but is not corrupted
+@item 63
+Checks are not supported by the image format
+
+@end table
+
+If @code{-r} is specified, exit codes representing the image state refer to the
+state after (the attempt at) repairing it. That is, a successful @code{-r all}
+will yield the exit code 0, independently of the image state before.
+
@item create [-f @var{fmt}] [-o @var{options}] @var{filename} [@var{size}]
Create the new disk image @var{filename} of size @var{size} and format
diff --git a/tests/test-throttle.c b/tests/test-throttle.c
index 1d4ffd3603..3de6ab80e0 100644
--- a/tests/test-throttle.c
+++ b/tests/test-throttle.c
@@ -12,8 +12,10 @@
#include <glib.h>
#include <math.h>
+#include "block/aio.h"
#include "qemu/throttle.h"
+AioContext *ctx;
LeakyBucket bkt;
ThrottleConfig cfg;
ThrottleState ts;
@@ -104,7 +106,8 @@ static void test_init(void)
memset(&ts, 1, sizeof(ts));
/* init the structure */
- throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts);
+ throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
+ read_timer_cb, write_timer_cb, &ts);
/* check initialized fields */
g_assert(ts.clock_type == QEMU_CLOCK_VIRTUAL);
@@ -126,7 +129,8 @@ static void test_init(void)
static void test_destroy(void)
{
int i;
- throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts);
+ throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
+ read_timer_cb, write_timer_cb, &ts);
throttle_destroy(&ts);
for (i = 0; i < 2; i++) {
g_assert(!ts.timers[i]);
@@ -165,7 +169,8 @@ static void test_config_functions(void)
orig_cfg.op_size = 1;
- throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts);
+ throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
+ read_timer_cb, write_timer_cb, &ts);
/* structure reset by throttle_init previous_leak should be null */
g_assert(!ts.previous_leak);
throttle_config(&ts, &orig_cfg);
@@ -324,7 +329,8 @@ static void test_have_timer(void)
g_assert(!throttle_have_timer(&ts));
/* init the structure */
- throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts);
+ throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
+ read_timer_cb, write_timer_cb, &ts);
/* timer set by init should return true */
g_assert(throttle_have_timer(&ts));
@@ -332,6 +338,29 @@ static void test_have_timer(void)
throttle_destroy(&ts);
}
+static void test_detach_attach(void)
+{
+ /* zero the structure */
+ memset(&ts, 0, sizeof(ts));
+
+ /* init the structure */
+ throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
+ read_timer_cb, write_timer_cb, &ts);
+
+ /* timer set by init should return true */
+ g_assert(throttle_have_timer(&ts));
+
+ /* timer should no longer exist after detaching */
+ throttle_detach_aio_context(&ts);
+ g_assert(!throttle_have_timer(&ts));
+
+ /* timer should exist again after attaching */
+ throttle_attach_aio_context(&ts, ctx);
+ g_assert(throttle_have_timer(&ts));
+
+ throttle_destroy(&ts);
+}
+
static bool do_test_accounting(bool is_ops, /* are we testing bps or ops */
int size, /* size of the operation to do */
double avg, /* io limit */
@@ -357,7 +386,8 @@ static bool do_test_accounting(bool is_ops, /* are we testing bps or ops */
cfg.op_size = op_size;
- throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts);
+ throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
+ read_timer_cb, write_timer_cb, &ts);
throttle_config(&ts, &cfg);
/* account a read */
@@ -461,7 +491,15 @@ static void test_accounting(void)
int main(int argc, char **argv)
{
+ GSource *src;
+
init_clocks();
+
+ ctx = aio_context_new();
+ src = aio_get_g_source(ctx);
+ g_source_attach(src, NULL);
+ g_source_unref(src);
+
do {} while (g_main_context_iteration(NULL, false));
/* tests in the same order as the header function declarations */
@@ -471,6 +509,7 @@ int main(int argc, char **argv)
g_test_add_func("/throttle/init", test_init);
g_test_add_func("/throttle/destroy", test_destroy);
g_test_add_func("/throttle/have_timer", test_have_timer);
+ g_test_add_func("/throttle/detach_attach", test_detach_attach);
g_test_add_func("/throttle/config/enabled", test_enabled);
g_test_add_func("/throttle/config/conflicting", test_conflicting_config);
g_test_add_func("/throttle/config/is_valid", test_is_valid);
diff --git a/util/throttle.c b/util/throttle.c
index 02e6f15587..f976ac7de5 100644
--- a/util/throttle.c
+++ b/util/throttle.c
@@ -22,6 +22,7 @@
#include "qemu/throttle.h"
#include "qemu/timer.h"
+#include "block/aio.h"
/* This function make a bucket leak
*
@@ -157,8 +158,18 @@ bool throttle_compute_timer(ThrottleState *ts,
return false;
}
+/* Add timers to event loop */
+void throttle_attach_aio_context(ThrottleState *ts, AioContext *new_context)
+{
+ ts->timers[0] = aio_timer_new(new_context, ts->clock_type, SCALE_NS,
+ ts->read_timer_cb, ts->timer_opaque);
+ ts->timers[1] = aio_timer_new(new_context, ts->clock_type, SCALE_NS,
+ ts->write_timer_cb, ts->timer_opaque);
+}
+
/* To be called first on the ThrottleState */
void throttle_init(ThrottleState *ts,
+ AioContext *aio_context,
QEMUClockType clock_type,
QEMUTimerCB *read_timer_cb,
QEMUTimerCB *write_timer_cb,
@@ -167,8 +178,10 @@ void throttle_init(ThrottleState *ts,
memset(ts, 0, sizeof(ThrottleState));
ts->clock_type = clock_type;
- ts->timers[0] = timer_new_ns(clock_type, read_timer_cb, timer_opaque);
- ts->timers[1] = timer_new_ns(clock_type, write_timer_cb, timer_opaque);
+ ts->read_timer_cb = read_timer_cb;
+ ts->write_timer_cb = write_timer_cb;
+ ts->timer_opaque = timer_opaque;
+ throttle_attach_aio_context(ts, aio_context);
}
/* destroy a timer */
@@ -181,8 +194,8 @@ static void throttle_timer_destroy(QEMUTimer **timer)
*timer = NULL;
}
-/* To be called last on the ThrottleState */
-void throttle_destroy(ThrottleState *ts)
+/* Remove timers from event loop */
+void throttle_detach_aio_context(ThrottleState *ts)
{
int i;
@@ -191,6 +204,12 @@ void throttle_destroy(ThrottleState *ts)
}
}
+/* To be called last on the ThrottleState */
+void throttle_destroy(ThrottleState *ts)
+{
+ throttle_detach_aio_context(ts);
+}
+
/* is any throttling timer configured */
bool throttle_have_timer(ThrottleState *ts)
{