From 1cc8e54ada97f7ac479554e15ca9e426c895b158 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 22 Mar 2018 10:57:14 +0100 Subject: block: Avoid unnecessary aio_poll() in AIO_WAIT_WHILE() Commit 91af091f923 added an additional aio_poll() to BDRV_POLL_WHILE() in order to make sure that all pending BHs are executed on drain. This was the wrong place to make the fix, as it is useless overhead for all other users of the macro and unnecessarily complicates the mechanism. This patch effectively reverts said commit (the context has changed a bit and the code has moved to AIO_WAIT_WHILE()) and instead polls in the loop condition for drain. The effect is probably hard to measure in any real-world use case because actual I/O will dominate, but if I run only the initialisation part of 'qemu-img convert' where it calls bdrv_block_status() for the whole image to find out how much data there is copy, this phase actually needs only roughly half the time after this patch. Signed-off-by: Kevin Wolf Reviewed-by: Stefan Hajnoczi --- include/block/aio-wait.h | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h index 8c90a2e66e..783d3678dd 100644 --- a/include/block/aio-wait.h +++ b/include/block/aio-wait.h @@ -73,29 +73,23 @@ typedef struct { */ #define AIO_WAIT_WHILE(wait, ctx, cond) ({ \ bool waited_ = false; \ - bool busy_ = true; \ AioWait *wait_ = (wait); \ AioContext *ctx_ = (ctx); \ if (in_aio_context_home_thread(ctx_)) { \ - while ((cond) || busy_) { \ - busy_ = aio_poll(ctx_, (cond)); \ - waited_ |= !!(cond) | busy_; \ + while ((cond)) { \ + aio_poll(ctx_, true); \ + waited_ = true; \ } \ } else { \ assert(qemu_get_current_aio_context() == \ qemu_get_aio_context()); \ /* Increment wait_->num_waiters before evaluating cond. */ \ atomic_inc(&wait_->num_waiters); \ - while (busy_) { \ - if ((cond)) { \ - waited_ = busy_ = true; \ - aio_context_release(ctx_); \ - aio_poll(qemu_get_aio_context(), true); \ - aio_context_acquire(ctx_); \ - } else { \ - busy_ = aio_poll(ctx_, false); \ - waited_ |= busy_; \ - } \ + while ((cond)) { \ + aio_context_release(ctx_); \ + aio_poll(qemu_get_aio_context(), true); \ + aio_context_acquire(ctx_); \ + waited_ = true; \ } \ atomic_dec(&wait_->num_waiters); \ } \ -- cgit v1.2.3-55-g7522 From 89bd030533e3592ca0a995450dcfc5d53e459e20 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 22 Mar 2018 14:11:20 +0100 Subject: block: Really pause block jobs on drain We already requested that block jobs be paused in .bdrv_drained_begin, but no guarantee was made that the job was actually inactive at the point where bdrv_drained_begin() returned. This introduces a new callback BdrvChildRole.bdrv_drained_poll() and uses it to make bdrv_drain_poll() consider block jobs using the node to be drained. For the test case to work as expected, we have to switch from block_job_sleep_ns() to qemu_co_sleep_ns() so that the test job is even considered active and must be waited for when draining the node. Signed-off-by: Kevin Wolf --- block.c | 9 +++++++++ block/io.c | 40 ++++++++++++++++++++++++++++++++++------ block/mirror.c | 8 ++++++++ blockjob.c | 23 +++++++++++++++++++++++ include/block/block.h | 8 ++++++++ include/block/block_int.h | 7 +++++++ include/block/blockjob_int.h | 8 ++++++++ tests/test-bdrv-drain.c | 18 ++++++++++-------- 8 files changed, 107 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/block.c b/block.c index afe30caac3..8cf9cd8855 100644 --- a/block.c +++ b/block.c @@ -821,6 +821,12 @@ static void bdrv_child_cb_drained_begin(BdrvChild *child) bdrv_drained_begin(bs); } +static bool bdrv_child_cb_drained_poll(BdrvChild *child) +{ + BlockDriverState *bs = child->opaque; + return bdrv_drain_poll(bs, NULL); +} + static void bdrv_child_cb_drained_end(BdrvChild *child) { BlockDriverState *bs = child->opaque; @@ -905,6 +911,7 @@ const BdrvChildRole child_file = { .get_parent_desc = bdrv_child_get_parent_desc, .inherit_options = bdrv_inherited_options, .drained_begin = bdrv_child_cb_drained_begin, + .drained_poll = bdrv_child_cb_drained_poll, .drained_end = bdrv_child_cb_drained_end, .attach = bdrv_child_cb_attach, .detach = bdrv_child_cb_detach, @@ -929,6 +936,7 @@ const BdrvChildRole child_format = { .get_parent_desc = bdrv_child_get_parent_desc, .inherit_options = bdrv_inherited_fmt_options, .drained_begin = bdrv_child_cb_drained_begin, + .drained_poll = bdrv_child_cb_drained_poll, .drained_end = bdrv_child_cb_drained_end, .attach = bdrv_child_cb_attach, .detach = bdrv_child_cb_detach, @@ -1048,6 +1056,7 @@ const BdrvChildRole child_backing = { .detach = bdrv_backing_detach, .inherit_options = bdrv_backing_options, .drained_begin = bdrv_child_cb_drained_begin, + .drained_poll = bdrv_child_cb_drained_poll, .drained_end = bdrv_child_cb_drained_end, .inactivate = bdrv_child_cb_inactivate, .update_filename = bdrv_backing_update_filename, diff --git a/block/io.c b/block/io.c index bc7a2d78b8..5820e73bb2 100644 --- a/block/io.c +++ b/block/io.c @@ -69,6 +69,23 @@ void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) } } +static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore) +{ + BdrvChild *c, *next; + bool busy = false; + + QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { + if (c == ignore) { + continue; + } + if (c->role->drained_poll) { + busy |= c->role->drained_poll(c); + } + } + + return busy; +} + static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src) { dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer); @@ -183,21 +200,32 @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) } /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ -static bool bdrv_drain_poll(BlockDriverState *bs) +bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent) +{ + if (bdrv_parent_drained_poll(bs, ignore_parent)) { + return true; + } + + return atomic_read(&bs->in_flight); +} + +static bool bdrv_drain_poll_top_level(BlockDriverState *bs, + BdrvChild *ignore_parent) { /* Execute pending BHs first and check everything else only after the BHs * have executed. */ while (aio_poll(bs->aio_context, false)); - return atomic_read(&bs->in_flight); + + return bdrv_drain_poll(bs, ignore_parent); } -static bool bdrv_drain_recurse(BlockDriverState *bs) +static bool bdrv_drain_recurse(BlockDriverState *bs, BdrvChild *parent) { BdrvChild *child, *tmp; bool waited; /* Wait for drained requests to finish */ - waited = BDRV_POLL_WHILE(bs, bdrv_drain_poll(bs)); + waited = BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent)); QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) { BlockDriverState *bs = child->bs; @@ -214,7 +242,7 @@ static bool bdrv_drain_recurse(BlockDriverState *bs) */ bdrv_ref(bs); } - waited |= bdrv_drain_recurse(bs); + waited |= bdrv_drain_recurse(bs, child); if (in_main_loop) { bdrv_unref(bs); } @@ -290,7 +318,7 @@ void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, bdrv_parent_drained_begin(bs, parent); bdrv_drain_invoke(bs, true); - bdrv_drain_recurse(bs); + bdrv_drain_recurse(bs, parent); if (recursive) { bs->recursive_quiesce_counter++; diff --git a/block/mirror.c b/block/mirror.c index 435268bbbf..c2146c1ab3 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -964,6 +964,12 @@ static void mirror_pause(Job *job) mirror_wait_for_all_io(s); } +static bool mirror_drained_poll(BlockJob *job) +{ + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); + return !!s->in_flight; +} + static void mirror_attached_aio_context(BlockJob *job, AioContext *new_context) { MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); @@ -997,6 +1003,7 @@ static const BlockJobDriver mirror_job_driver = { .pause = mirror_pause, .complete = mirror_complete, }, + .drained_poll = mirror_drained_poll, .attached_aio_context = mirror_attached_aio_context, .drain = mirror_drain, }; @@ -1012,6 +1019,7 @@ static const BlockJobDriver commit_active_job_driver = { .pause = mirror_pause, .complete = mirror_complete, }, + .drained_poll = mirror_drained_poll, .attached_aio_context = mirror_attached_aio_context, .drain = mirror_drain, }; diff --git a/blockjob.c b/blockjob.c index 0306533a2e..be5903aa96 100644 --- a/blockjob.c +++ b/blockjob.c @@ -155,6 +155,28 @@ static void child_job_drained_begin(BdrvChild *c) job_pause(&job->job); } +static bool child_job_drained_poll(BdrvChild *c) +{ + BlockJob *bjob = c->opaque; + Job *job = &bjob->job; + const BlockJobDriver *drv = block_job_driver(bjob); + + /* An inactive or completed job doesn't have any pending requests. Jobs + * with !job->busy are either already paused or have a pause point after + * being reentered, so no job driver code will run before they pause. */ + if (!job->busy || job_is_completed(job) || job->deferred_to_main_loop) { + return false; + } + + /* Otherwise, assume that it isn't fully stopped yet, but allow the job to + * override this assumption. */ + if (drv->drained_poll) { + return drv->drained_poll(bjob); + } else { + return true; + } +} + static void child_job_drained_end(BdrvChild *c) { BlockJob *job = c->opaque; @@ -164,6 +186,7 @@ static void child_job_drained_end(BdrvChild *c) static const BdrvChildRole child_job = { .get_parent_desc = child_job_get_parent_desc, .drained_begin = child_job_drained_begin, + .drained_poll = child_job_drained_poll, .drained_end = child_job_drained_end, .stay_at_node = true, }; diff --git a/include/block/block.h b/include/block/block.h index e677080c4e..cebbb39c6c 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -567,6 +567,14 @@ void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore); */ void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore); +/** + * bdrv_drain_poll: + * + * Poll for pending requests in @bs and its parents (except for + * @ignore_parent). This is part of bdrv_drained_begin. + */ +bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent); + /** * bdrv_drained_begin: * diff --git a/include/block/block_int.h b/include/block/block_int.h index 327e478a73..1b811db8ec 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -605,6 +605,13 @@ struct BdrvChildRole { void (*drained_begin)(BdrvChild *child); void (*drained_end)(BdrvChild *child); + /* + * Returns whether the parent has pending requests for the child. This + * callback is polled after .drained_begin() has been called until all + * activity on the child has stopped. + */ + bool (*drained_poll)(BdrvChild *child); + /* Notifies the parent that the child has been activated/inactivated (e.g. * when migration is completing) and it can start/stop requesting * permissions and doing I/O on it. */ diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h index 5cd50c6639..e4a318dd15 100644 --- a/include/block/blockjob_int.h +++ b/include/block/blockjob_int.h @@ -38,6 +38,14 @@ struct BlockJobDriver { /** Generic JobDriver callbacks and settings */ JobDriver job_driver; + /* + * Returns whether the job has pending requests for the child or will + * submit new requests before the next pause point. This callback is polled + * in the context of draining a job node after requesting that the job be + * paused, until all activity on the child has stopped. + */ + bool (*drained_poll)(BlockJob *job); + /* * If the callback is not NULL, it will be invoked before the job is * resumed in a new AioContext. This is the place to move any resources diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c index cc03bc171b..22d31c953e 100644 --- a/tests/test-bdrv-drain.c +++ b/tests/test-bdrv-drain.c @@ -686,7 +686,11 @@ static void coroutine_fn test_job_start(void *opaque) job_transition_to_ready(&s->common.job); while (!s->should_complete) { - job_sleep_ns(&s->common.job, 100000); + /* Avoid block_job_sleep_ns() because it marks the job as !busy. We + * want to emulate some actual activity (probably some I/O) here so + * that drain has to wait for this acitivity to stop. */ + qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); + job_pause_point(&s->common.job); } job_defer_to_main_loop(&s->common.job, test_job_completed, NULL); @@ -733,7 +737,7 @@ static void test_blockjob_common(enum drain_type drain_type) g_assert_cmpint(job->job.pause_count, ==, 0); g_assert_false(job->job.paused); - g_assert_false(job->job.busy); /* We're in job_sleep_ns() */ + g_assert_true(job->job.busy); /* We're in job_sleep_ns() */ do_drain_begin(drain_type, src); @@ -743,15 +747,14 @@ static void test_blockjob_common(enum drain_type drain_type) } else { g_assert_cmpint(job->job.pause_count, ==, 1); } - /* XXX We don't wait until the job is actually paused. Is this okay? */ - /* g_assert_true(job->job.paused); */ + g_assert_true(job->job.paused); g_assert_false(job->job.busy); /* The job is paused */ do_drain_end(drain_type, src); g_assert_cmpint(job->job.pause_count, ==, 0); g_assert_false(job->job.paused); - g_assert_false(job->job.busy); /* We're in job_sleep_ns() */ + g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */ do_drain_begin(drain_type, target); @@ -761,15 +764,14 @@ static void test_blockjob_common(enum drain_type drain_type) } else { g_assert_cmpint(job->job.pause_count, ==, 1); } - /* XXX We don't wait until the job is actually paused. Is this okay? */ - /* g_assert_true(job->job.paused); */ + g_assert_true(job->job.paused); g_assert_false(job->job.busy); /* The job is paused */ do_drain_end(drain_type, target); g_assert_cmpint(job->job.pause_count, ==, 0); g_assert_false(job->job.paused); - g_assert_false(job->job.busy); /* We're in job_sleep_ns() */ + g_assert_true(job->job.busy); /* We're in job_sleep_ns() */ ret = job_complete_sync(&job->job, &error_abort); g_assert_cmpint(ret, ==, 0); -- cgit v1.2.3-55-g7522 From fe4f0614ef9e361dae12012d3c400657444836cf Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 23 Mar 2018 12:40:41 +0100 Subject: block: Drain recursively with a single BDRV_POLL_WHILE() Anything can happen inside BDRV_POLL_WHILE(), including graph changes that may interfere with its callers (e.g. child list iteration in recursive callers of bdrv_do_drained_begin). Switch to a single BDRV_POLL_WHILE() call for the whole subtree at the end of bdrv_do_drained_begin() to avoid such effects. The recursion happens now inside the loop condition. As the graph can only change between bdrv_drain_poll() calls, but not inside of it, doing the recursion here is safe. Signed-off-by: Kevin Wolf --- block.c | 2 +- block/io.c | 63 ++++++++++++++++++++++++++++++++++++--------------- include/block/block.h | 9 +++++--- 3 files changed, 52 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/block.c b/block.c index 8cf9cd8855..80abd3c2ae 100644 --- a/block.c +++ b/block.c @@ -824,7 +824,7 @@ static void bdrv_child_cb_drained_begin(BdrvChild *child) static bool bdrv_child_cb_drained_poll(BdrvChild *child) { BlockDriverState *bs = child->opaque; - return bdrv_drain_poll(bs, NULL); + return bdrv_drain_poll(bs, false, NULL); } static void bdrv_child_cb_drained_end(BdrvChild *child) diff --git a/block/io.c b/block/io.c index 5f6d5eed52..a413841bfc 100644 --- a/block/io.c +++ b/block/io.c @@ -165,6 +165,7 @@ typedef struct { bool done; bool begin; bool recursive; + bool poll; BdrvChild *parent; } BdrvCoDrainData; @@ -200,27 +201,42 @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) } /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ -bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent) +bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, + BdrvChild *ignore_parent) { + BdrvChild *child, *next; + if (bdrv_parent_drained_poll(bs, ignore_parent)) { return true; } - return atomic_read(&bs->in_flight); + if (atomic_read(&bs->in_flight)) { + return true; + } + + if (recursive) { + QLIST_FOREACH_SAFE(child, &bs->children, next, next) { + if (bdrv_drain_poll(child->bs, recursive, child)) { + return true; + } + } + } + + return false; } -static bool bdrv_drain_poll_top_level(BlockDriverState *bs, +static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive, BdrvChild *ignore_parent) { /* Execute pending BHs first and check everything else only after the BHs * have executed. */ while (aio_poll(bs->aio_context, false)); - return bdrv_drain_poll(bs, ignore_parent); + return bdrv_drain_poll(bs, recursive, ignore_parent); } static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, - BdrvChild *parent); + BdrvChild *parent, bool poll); static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, BdrvChild *parent); @@ -232,7 +248,7 @@ static void bdrv_co_drain_bh_cb(void *opaque) bdrv_dec_in_flight(bs); if (data->begin) { - bdrv_do_drained_begin(bs, data->recursive, data->parent); + bdrv_do_drained_begin(bs, data->recursive, data->parent, data->poll); } else { bdrv_do_drained_end(bs, data->recursive, data->parent); } @@ -243,7 +259,7 @@ static void bdrv_co_drain_bh_cb(void *opaque) static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, bool begin, bool recursive, - BdrvChild *parent) + BdrvChild *parent, bool poll) { BdrvCoDrainData data; @@ -258,6 +274,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, .begin = begin, .recursive = recursive, .parent = parent, + .poll = poll, }; bdrv_inc_in_flight(bs); aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), @@ -270,12 +287,12 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, } void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, - BdrvChild *parent) + BdrvChild *parent, bool poll) { BdrvChild *child, *next; if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(bs, true, recursive, parent); + bdrv_co_yield_to_drain(bs, true, recursive, parent, poll); return; } @@ -287,25 +304,35 @@ void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, bdrv_parent_drained_begin(bs, parent); bdrv_drain_invoke(bs, true); - /* Wait for drained requests to finish */ - BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent)); - if (recursive) { bs->recursive_quiesce_counter++; QLIST_FOREACH_SAFE(child, &bs->children, next, next) { - bdrv_do_drained_begin(child->bs, true, child); + bdrv_do_drained_begin(child->bs, true, child, false); } } + + /* + * Wait for drained requests to finish. + * + * Calling BDRV_POLL_WHILE() only once for the top-level node is okay: The + * call is needed so things in this AioContext can make progress even + * though we don't return to the main AioContext loop - this automatically + * includes other nodes in the same AioContext and therefore all child + * nodes. + */ + if (poll) { + BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent)); + } } void bdrv_drained_begin(BlockDriverState *bs) { - bdrv_do_drained_begin(bs, false, NULL); + bdrv_do_drained_begin(bs, false, NULL, true); } void bdrv_subtree_drained_begin(BlockDriverState *bs) { - bdrv_do_drained_begin(bs, true, NULL); + bdrv_do_drained_begin(bs, true, NULL, true); } void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, @@ -315,7 +342,7 @@ void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, int old_quiesce_counter; if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(bs, false, recursive, parent); + bdrv_co_yield_to_drain(bs, false, recursive, parent, false); return; } assert(bs->quiesce_counter > 0); @@ -351,7 +378,7 @@ void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) int i; for (i = 0; i < new_parent->recursive_quiesce_counter; i++) { - bdrv_do_drained_begin(child->bs, true, child); + bdrv_do_drained_begin(child->bs, true, child, true); } } @@ -421,7 +448,7 @@ void bdrv_drain_all_begin(void) AioContext *aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); - bdrv_do_drained_begin(bs, true, NULL); + bdrv_do_drained_begin(bs, true, NULL, true); aio_context_release(aio_context); } diff --git a/include/block/block.h b/include/block/block.h index cebbb39c6c..254ed2e4c9 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -570,10 +570,13 @@ void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore); /** * bdrv_drain_poll: * - * Poll for pending requests in @bs and its parents (except for - * @ignore_parent). This is part of bdrv_drained_begin. + * Poll for pending requests in @bs, its parents (except for @ignore_parent), + * and if @recursive is true its children as well. + * + * This is part of bdrv_drained_begin. */ -bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent); +bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, + BdrvChild *ignore_parent); /** * bdrv_drained_begin: -- cgit v1.2.3-55-g7522 From dcf94a23b1add0f856db51e9ff5ba0774e096076 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 23 Mar 2018 15:57:20 +0100 Subject: block: Don't poll in parent drain callbacks bdrv_do_drained_begin() is only safe if we have a single BDRV_POLL_WHILE() after quiescing all affected nodes. We cannot allow that parent callbacks introduce a nested polling loop that could cause graph changes while we're traversing the graph. Split off bdrv_do_drained_begin_quiesce(), which only quiesces a single node without waiting for its requests to complete. These requests will be waited for in the BDRV_POLL_WHILE() call down the call chain. Signed-off-by: Kevin Wolf --- block.c | 2 +- block/io.c | 24 ++++++++++++++++-------- include/block/block.h | 9 +++++++++ 3 files changed, 26 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/block.c b/block.c index 80abd3c2ae..50f8e3dc3b 100644 --- a/block.c +++ b/block.c @@ -818,7 +818,7 @@ static char *bdrv_child_get_parent_desc(BdrvChild *c) static void bdrv_child_cb_drained_begin(BdrvChild *child) { BlockDriverState *bs = child->opaque; - bdrv_drained_begin(bs); + bdrv_do_drained_begin_quiesce(bs, NULL); } static bool bdrv_child_cb_drained_poll(BdrvChild *child) diff --git a/block/io.c b/block/io.c index a413841bfc..ffb273708a 100644 --- a/block/io.c +++ b/block/io.c @@ -286,15 +286,10 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, assert(data.done); } -void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, - BdrvChild *parent, bool poll) +void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, + BdrvChild *parent) { - BdrvChild *child, *next; - - if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(bs, true, recursive, parent, poll); - return; - } + assert(!qemu_in_coroutine()); /* Stop things in parent-to-child order */ if (atomic_fetch_inc(&bs->quiesce_counter) == 0) { @@ -303,6 +298,19 @@ void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, bdrv_parent_drained_begin(bs, parent); bdrv_drain_invoke(bs, true); +} + +static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, + BdrvChild *parent, bool poll) +{ + BdrvChild *child, *next; + + if (qemu_in_coroutine()) { + bdrv_co_yield_to_drain(bs, true, recursive, parent, poll); + return; + } + + bdrv_do_drained_begin_quiesce(bs, parent); if (recursive) { bs->recursive_quiesce_counter++; diff --git a/include/block/block.h b/include/block/block.h index 254ed2e4c9..067d24cc4a 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -590,6 +590,15 @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, */ void bdrv_drained_begin(BlockDriverState *bs); +/** + * bdrv_do_drained_begin_quiesce: + * + * Quiesces a BDS like bdrv_drained_begin(), but does not wait for already + * running requests to complete. + */ +void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, + BdrvChild *parent); + /** * Like bdrv_drained_begin, but recursively begins a quiesced section for * exclusive access to all child nodes as well. -- cgit v1.2.3-55-g7522 From 4d22bbf4ef72583eefdf44db6bf9fc7683fbc4c2 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 10 Apr 2018 15:51:52 +0200 Subject: block: Allow AIO_WAIT_WHILE with NULL ctx bdrv_drain_all() wants to have a single polling loop for draining the in-flight requests of all nodes. This means that the AIO_WAIT_WHILE() condition relies on activity in multiple AioContexts, which is polled from the mainloop context. We must therefore call AIO_WAIT_WHILE() from the mainloop thread and use the AioWait notification mechanism. Just randomly picking the AioContext of any non-mainloop thread would work, but instead of bothering to find such a context in the caller, we can just as well accept NULL for ctx. Signed-off-by: Kevin Wolf --- include/block/aio-wait.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h index 783d3678dd..c85a62f798 100644 --- a/include/block/aio-wait.h +++ b/include/block/aio-wait.h @@ -57,7 +57,8 @@ typedef struct { /** * AIO_WAIT_WHILE: * @wait: the aio wait object - * @ctx: the aio context + * @ctx: the aio context, or NULL if multiple aio contexts (for which the + * caller does not hold a lock) are involved in the polling condition. * @cond: wait while this conditional expression is true * * Wait while a condition is true. Use this to implement synchronous @@ -75,7 +76,7 @@ typedef struct { bool waited_ = false; \ AioWait *wait_ = (wait); \ AioContext *ctx_ = (ctx); \ - if (in_aio_context_home_thread(ctx_)) { \ + if (ctx_ && in_aio_context_home_thread(ctx_)) { \ while ((cond)) { \ aio_poll(ctx_, true); \ waited_ = true; \ @@ -86,9 +87,13 @@ typedef struct { /* Increment wait_->num_waiters before evaluating cond. */ \ atomic_inc(&wait_->num_waiters); \ while ((cond)) { \ - aio_context_release(ctx_); \ + if (ctx_) { \ + aio_context_release(ctx_); \ + } \ aio_poll(qemu_get_aio_context(), true); \ - aio_context_acquire(ctx_); \ + if (ctx_) { \ + aio_context_acquire(ctx_); \ + } \ waited_ = true; \ } \ atomic_dec(&wait_->num_waiters); \ -- cgit v1.2.3-55-g7522 From 6cd5c9d7b2df93ef54144f170d4c908934a4767f Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 29 May 2018 17:17:45 +0200 Subject: block: ignore_bds_parents parameter for drain functions In the future, bdrv_drained_all_begin/end() will drain all invidiual nodes separately rather than whole subtrees. This means that we don't want to propagate the drain to all parents any more: If the parent is a BDS, it will already be drained separately. Recursing to all parents is unnecessary work and would make it an O(n²) operation. Prepare the drain function for the changed drain_all by adding an ignore_bds_parents parameter to the internal implementation that prevents the propagation of the drain to BDS parents. We still (have to) propagate it to non-BDS parents like BlockBackends or Jobs because those are not drained separately. Signed-off-by: Kevin Wolf --- block.c | 11 +++--- block/io.c | 88 ++++++++++++++++++++++++++++------------------- block/vvfat.c | 1 + include/block/block.h | 16 ++++++--- include/block/block_int.h | 6 ++++ 5 files changed, 78 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/block.c b/block.c index 50f8e3dc3b..c8586f41ba 100644 --- a/block.c +++ b/block.c @@ -818,13 +818,13 @@ static char *bdrv_child_get_parent_desc(BdrvChild *c) static void bdrv_child_cb_drained_begin(BdrvChild *child) { BlockDriverState *bs = child->opaque; - bdrv_do_drained_begin_quiesce(bs, NULL); + bdrv_do_drained_begin_quiesce(bs, NULL, false); } static bool bdrv_child_cb_drained_poll(BdrvChild *child) { BlockDriverState *bs = child->opaque; - return bdrv_drain_poll(bs, false, NULL); + return bdrv_drain_poll(bs, false, NULL, false); } static void bdrv_child_cb_drained_end(BdrvChild *child) @@ -908,6 +908,7 @@ static void bdrv_inherited_options(int *child_flags, QDict *child_options, } const BdrvChildRole child_file = { + .parent_is_bds = true, .get_parent_desc = bdrv_child_get_parent_desc, .inherit_options = bdrv_inherited_options, .drained_begin = bdrv_child_cb_drained_begin, @@ -933,6 +934,7 @@ static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options, } const BdrvChildRole child_format = { + .parent_is_bds = true, .get_parent_desc = bdrv_child_get_parent_desc, .inherit_options = bdrv_inherited_fmt_options, .drained_begin = bdrv_child_cb_drained_begin, @@ -1051,6 +1053,7 @@ static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base, } const BdrvChildRole child_backing = { + .parent_is_bds = true, .get_parent_desc = bdrv_child_get_parent_desc, .attach = bdrv_backing_attach, .detach = bdrv_backing_detach, @@ -4957,7 +4960,7 @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) AioContext *ctx = bdrv_get_aio_context(bs); aio_disable_external(ctx); - bdrv_parent_drained_begin(bs, NULL); + bdrv_parent_drained_begin(bs, NULL, false); bdrv_drain(bs); /* ensure there are no in-flight requests */ while (aio_poll(ctx, false)) { @@ -4971,7 +4974,7 @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) */ aio_context_acquire(new_context); bdrv_attach_aio_context(bs, new_context); - bdrv_parent_drained_end(bs, NULL); + bdrv_parent_drained_end(bs, NULL, false); aio_enable_external(ctx); aio_context_release(new_context); } diff --git a/block/io.c b/block/io.c index 1a9974c136..1834a14aa6 100644 --- a/block/io.c +++ b/block/io.c @@ -41,12 +41,13 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, BdrvRequestFlags flags); -void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) +void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore, + bool ignore_bds_parents) { BdrvChild *c, *next; QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { - if (c == ignore) { + if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) { continue; } if (c->role->drained_begin) { @@ -55,12 +56,13 @@ void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) } } -void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) +void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore, + bool ignore_bds_parents) { BdrvChild *c, *next; QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { - if (c == ignore) { + if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) { continue; } if (c->role->drained_end) { @@ -69,13 +71,14 @@ void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) } } -static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore) +static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore, + bool ignore_bds_parents) { BdrvChild *c, *next; bool busy = false; QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { - if (c == ignore) { + if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) { continue; } if (c->role->drained_poll) { @@ -167,6 +170,7 @@ typedef struct { bool recursive; bool poll; BdrvChild *parent; + bool ignore_bds_parents; } BdrvCoDrainData; static void coroutine_fn bdrv_drain_invoke_entry(void *opaque) @@ -220,11 +224,11 @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, - BdrvChild *ignore_parent) + BdrvChild *ignore_parent, bool ignore_bds_parents) { BdrvChild *child, *next; - if (bdrv_parent_drained_poll(bs, ignore_parent)) { + if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) { return true; } @@ -233,8 +237,9 @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, } if (recursive) { + assert(!ignore_bds_parents); QLIST_FOREACH_SAFE(child, &bs->children, next, next) { - if (bdrv_drain_poll(child->bs, recursive, child)) { + if (bdrv_drain_poll(child->bs, recursive, child, false)) { return true; } } @@ -250,13 +255,14 @@ static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive, * have executed. */ while (aio_poll(bs->aio_context, false)); - return bdrv_drain_poll(bs, recursive, ignore_parent); + return bdrv_drain_poll(bs, recursive, ignore_parent, false); } static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, - BdrvChild *parent, bool poll); + BdrvChild *parent, bool ignore_bds_parents, + bool poll); static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, - BdrvChild *parent); + BdrvChild *parent, bool ignore_bds_parents); static void bdrv_co_drain_bh_cb(void *opaque) { @@ -267,9 +273,11 @@ static void bdrv_co_drain_bh_cb(void *opaque) if (bs) { bdrv_dec_in_flight(bs); if (data->begin) { - bdrv_do_drained_begin(bs, data->recursive, data->parent, data->poll); + bdrv_do_drained_begin(bs, data->recursive, data->parent, + data->ignore_bds_parents, data->poll); } else { - bdrv_do_drained_end(bs, data->recursive, data->parent); + bdrv_do_drained_end(bs, data->recursive, data->parent, + data->ignore_bds_parents); } } else { assert(data->begin); @@ -282,7 +290,9 @@ static void bdrv_co_drain_bh_cb(void *opaque) static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, bool begin, bool recursive, - BdrvChild *parent, bool poll) + BdrvChild *parent, + bool ignore_bds_parents, + bool poll) { BdrvCoDrainData data; @@ -297,6 +307,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, .begin = begin, .recursive = recursive, .parent = parent, + .ignore_bds_parents = ignore_bds_parents, .poll = poll, }; if (bs) { @@ -312,7 +323,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, } void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, - BdrvChild *parent) + BdrvChild *parent, bool ignore_bds_parents) { assert(!qemu_in_coroutine()); @@ -321,26 +332,30 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, aio_disable_external(bdrv_get_aio_context(bs)); } - bdrv_parent_drained_begin(bs, parent); + bdrv_parent_drained_begin(bs, parent, ignore_bds_parents); bdrv_drain_invoke(bs, true); } static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, - BdrvChild *parent, bool poll) + BdrvChild *parent, bool ignore_bds_parents, + bool poll) { BdrvChild *child, *next; if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(bs, true, recursive, parent, poll); + bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents, + poll); return; } - bdrv_do_drained_begin_quiesce(bs, parent); + bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents); if (recursive) { + assert(!ignore_bds_parents); bs->recursive_quiesce_counter++; QLIST_FOREACH_SAFE(child, &bs->children, next, next) { - bdrv_do_drained_begin(child->bs, true, child, false); + bdrv_do_drained_begin(child->bs, true, child, ignore_bds_parents, + false); } } @@ -354,28 +369,30 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, * nodes. */ if (poll) { + assert(!ignore_bds_parents); BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent)); } } void bdrv_drained_begin(BlockDriverState *bs) { - bdrv_do_drained_begin(bs, false, NULL, true); + bdrv_do_drained_begin(bs, false, NULL, false, true); } void bdrv_subtree_drained_begin(BlockDriverState *bs) { - bdrv_do_drained_begin(bs, true, NULL, true); + bdrv_do_drained_begin(bs, true, NULL, false, true); } -void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, - BdrvChild *parent) +static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, + BdrvChild *parent, bool ignore_bds_parents) { BdrvChild *child, *next; int old_quiesce_counter; if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(bs, false, recursive, parent, false); + bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents, + false); return; } assert(bs->quiesce_counter > 0); @@ -383,27 +400,28 @@ void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, /* Re-enable things in child-to-parent order */ bdrv_drain_invoke(bs, false); - bdrv_parent_drained_end(bs, parent); + bdrv_parent_drained_end(bs, parent, ignore_bds_parents); if (old_quiesce_counter == 1) { aio_enable_external(bdrv_get_aio_context(bs)); } if (recursive) { + assert(!ignore_bds_parents); bs->recursive_quiesce_counter--; QLIST_FOREACH_SAFE(child, &bs->children, next, next) { - bdrv_do_drained_end(child->bs, true, child); + bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents); } } } void bdrv_drained_end(BlockDriverState *bs) { - bdrv_do_drained_end(bs, false, NULL); + bdrv_do_drained_end(bs, false, NULL, false); } void bdrv_subtree_drained_end(BlockDriverState *bs) { - bdrv_do_drained_end(bs, true, NULL); + bdrv_do_drained_end(bs, true, NULL, false); } void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) @@ -411,7 +429,7 @@ void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) int i; for (i = 0; i < new_parent->recursive_quiesce_counter; i++) { - bdrv_do_drained_begin(child->bs, true, child, true); + bdrv_do_drained_begin(child->bs, true, child, false, true); } } @@ -420,7 +438,7 @@ void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent) int i; for (i = 0; i < old_parent->recursive_quiesce_counter; i++) { - bdrv_do_drained_end(child->bs, true, child); + bdrv_do_drained_end(child->bs, true, child, false); } } @@ -472,7 +490,7 @@ void bdrv_drain_all_begin(void) BdrvNextIterator it; if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(NULL, true, false, NULL, true); + bdrv_co_yield_to_drain(NULL, true, false, NULL, false, true); return; } @@ -486,7 +504,7 @@ void bdrv_drain_all_begin(void) AioContext *aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); - bdrv_do_drained_begin(bs, true, NULL, true); + bdrv_do_drained_begin(bs, true, NULL, false, true); aio_context_release(aio_context); } @@ -504,7 +522,7 @@ void bdrv_drain_all_end(void) AioContext *aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); - bdrv_do_drained_end(bs, true, NULL); + bdrv_do_drained_end(bs, true, NULL, false); aio_context_release(aio_context); } } diff --git a/block/vvfat.c b/block/vvfat.c index 4595f335b8..c7d2ed2d96 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -3134,6 +3134,7 @@ static void vvfat_qcow_options(int *child_flags, QDict *child_options, } static const BdrvChildRole child_vvfat_qcow = { + .parent_is_bds = true, .inherit_options = vvfat_qcow_options, }; diff --git a/include/block/block.h b/include/block/block.h index 067d24cc4a..836746e4e1 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -557,7 +557,8 @@ void bdrv_io_unplug(BlockDriverState *bs); * Begin a quiesced section of all users of @bs. This is part of * bdrv_drained_begin. */ -void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore); +void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore, + bool ignore_bds_parents); /** * bdrv_parent_drained_end: @@ -565,18 +566,23 @@ void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore); * End a quiesced section of all users of @bs. This is part of * bdrv_drained_end. */ -void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore); +void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore, + bool ignore_bds_parents); /** * bdrv_drain_poll: * * Poll for pending requests in @bs, its parents (except for @ignore_parent), - * and if @recursive is true its children as well. + * and if @recursive is true its children as well (used for subtree drain). + * + * If @ignore_bds_parents is true, parents that are BlockDriverStates must + * ignore the drain request because they will be drained separately (used for + * drain_all). * * This is part of bdrv_drained_begin. */ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, - BdrvChild *ignore_parent); + BdrvChild *ignore_parent, bool ignore_bds_parents); /** * bdrv_drained_begin: @@ -597,7 +603,7 @@ void bdrv_drained_begin(BlockDriverState *bs); * running requests to complete. */ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, - BdrvChild *parent); + BdrvChild *parent, bool ignore_bds_parents); /** * Like bdrv_drained_begin, but recursively begins a quiesced section for diff --git a/include/block/block_int.h b/include/block/block_int.h index 1b811db8ec..1abfc26d76 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -577,6 +577,12 @@ struct BdrvChildRole { * points to. */ bool stay_at_node; + /* If true, the parent is a BlockDriverState and bdrv_next_all_states() + * will return it. This information is used for drain_all, where every node + * will be drained separately, so the drain only needs to be propagated to + * non-BDS parents. */ + bool parent_is_bds; + void (*inherit_options)(int *child_flags, QDict *child_options, int parent_flags, QDict *parent_options); -- cgit v1.2.3-55-g7522 From 0f12264e7a41458179ad10276a7c33c72024861a Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 28 Mar 2018 18:29:18 +0200 Subject: block: Allow graph changes in bdrv_drain_all_begin/end sections bdrv_drain_all_*() used bdrv_next() to iterate over all root nodes and did a subtree drain for each of them. This works fine as long as the graph is static, but sadly, reality looks different. If the graph changes so that root nodes are added or removed, we would have to compensate for this. bdrv_next() returns each root node only once even if it's the root node for multiple BlockBackends or for a monitor-owned block driver tree, which would only complicate things. The much easier and more obviously correct way is to fundamentally change the way the functions work: Iterate over all BlockDriverStates, no matter who owns them, and drain them individually. Compensation is only necessary when a new BDS is created inside a drain_all section. Removal of a BDS doesn't require any action because it's gone afterwards anyway. Signed-off-by: Kevin Wolf --- block.c | 34 ++++++++++++++++++++++++--- block/io.c | 60 ++++++++++++++++++++++++++++++++++++----------- include/block/block.h | 1 + include/block/block_int.h | 1 + 4 files changed, 79 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/block.c b/block.c index c8586f41ba..6c128007fd 100644 --- a/block.c +++ b/block.c @@ -333,6 +333,10 @@ BlockDriverState *bdrv_new(void) qemu_co_queue_init(&bs->flush_queue); + for (i = 0; i < bdrv_drain_all_count; i++) { + bdrv_drained_begin(bs); + } + QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list); return bs; @@ -1164,7 +1168,7 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, int open_flags, Error **errp) { Error *local_err = NULL; - int ret; + int i, ret; bdrv_assign_node_name(bs, node_name, &local_err); if (local_err) { @@ -1212,6 +1216,12 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, assert(bdrv_min_mem_align(bs) != 0); assert(is_power_of_2(bs->bl.request_alignment)); + for (i = 0; i < bs->quiesce_counter; i++) { + if (drv->bdrv_co_drain_begin) { + drv->bdrv_co_drain_begin(bs); + } + } + return 0; open_failed: bs->drv = NULL; @@ -2033,7 +2043,12 @@ static void bdrv_replace_child_noperm(BdrvChild *child, child->role->detach(child); } if (old_bs->quiesce_counter && child->role->drained_end) { - for (i = 0; i < old_bs->quiesce_counter; i++) { + int num = old_bs->quiesce_counter; + if (child->role->parent_is_bds) { + num -= bdrv_drain_all_count; + } + assert(num >= 0); + for (i = 0; i < num; i++) { child->role->drained_end(child); } } @@ -2045,7 +2060,12 @@ static void bdrv_replace_child_noperm(BdrvChild *child, if (new_bs) { QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); if (new_bs->quiesce_counter && child->role->drained_begin) { - for (i = 0; i < new_bs->quiesce_counter; i++) { + int num = new_bs->quiesce_counter; + if (child->role->parent_is_bds) { + num -= bdrv_drain_all_count; + } + assert(num >= 0); + for (i = 0; i < num; i++) { child->role->drained_begin(child); } } @@ -4049,6 +4069,14 @@ BlockDriverState *bdrv_next_node(BlockDriverState *bs) return QTAILQ_NEXT(bs, node_list); } +BlockDriverState *bdrv_next_all_states(BlockDriverState *bs) +{ + if (!bs) { + return QTAILQ_FIRST(&all_bdrv_states); + } + return QTAILQ_NEXT(bs, bs_list); +} + const char *bdrv_get_node_name(const BlockDriverState *bs) { return bs->node_name; diff --git a/block/io.c b/block/io.c index 1834a14aa6..ef4fedd364 100644 --- a/block/io.c +++ b/block/io.c @@ -38,6 +38,8 @@ /* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */ #define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS) +static AioWait drain_all_aio_wait; + static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, BdrvRequestFlags flags); @@ -472,6 +474,29 @@ static void bdrv_drain_assert_idle(BlockDriverState *bs) } } +unsigned int bdrv_drain_all_count = 0; + +static bool bdrv_drain_all_poll(void) +{ + BlockDriverState *bs = NULL; + bool result = false; + + /* Execute pending BHs first (may modify the graph) and check everything + * else only after the BHs have executed. */ + while (aio_poll(qemu_get_aio_context(), false)); + + /* bdrv_drain_poll() can't make changes to the graph and we are holding the + * main AioContext lock, so iterating bdrv_next_all_states() is safe. */ + while ((bs = bdrv_next_all_states(bs))) { + AioContext *aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + result |= bdrv_drain_poll(bs, false, NULL, true); + aio_context_release(aio_context); + } + + return result; +} + /* * Wait for pending requests to complete across all BlockDriverStates * @@ -486,45 +511,51 @@ static void bdrv_drain_assert_idle(BlockDriverState *bs) */ void bdrv_drain_all_begin(void) { - BlockDriverState *bs; - BdrvNextIterator it; + BlockDriverState *bs = NULL; if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(NULL, true, false, NULL, false, true); + bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true); return; } - /* BDRV_POLL_WHILE() for a node can only be called from its own I/O thread - * or the main loop AioContext. We potentially use BDRV_POLL_WHILE() on - * nodes in several different AioContexts, so make sure we're in the main - * context. */ + /* AIO_WAIT_WHILE() with a NULL context can only be called from the main + * loop AioContext, so make sure we're in the main context. */ assert(qemu_get_current_aio_context() == qemu_get_aio_context()); + assert(bdrv_drain_all_count < INT_MAX); + bdrv_drain_all_count++; - for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { + /* Quiesce all nodes, without polling in-flight requests yet. The graph + * cannot change during this loop. */ + while ((bs = bdrv_next_all_states(bs))) { AioContext *aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); - bdrv_do_drained_begin(bs, true, NULL, false, true); + bdrv_do_drained_begin(bs, false, NULL, true, false); aio_context_release(aio_context); } - for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { + /* Now poll the in-flight requests */ + AIO_WAIT_WHILE(&drain_all_aio_wait, NULL, bdrv_drain_all_poll()); + + while ((bs = bdrv_next_all_states(bs))) { bdrv_drain_assert_idle(bs); } } void bdrv_drain_all_end(void) { - BlockDriverState *bs; - BdrvNextIterator it; + BlockDriverState *bs = NULL; - for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { + while ((bs = bdrv_next_all_states(bs))) { AioContext *aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); - bdrv_do_drained_end(bs, true, NULL, false); + bdrv_do_drained_end(bs, false, NULL, true); aio_context_release(aio_context); } + + assert(bdrv_drain_all_count > 0); + bdrv_drain_all_count--; } void bdrv_drain_all(void) @@ -647,6 +678,7 @@ void bdrv_inc_in_flight(BlockDriverState *bs) void bdrv_wakeup(BlockDriverState *bs) { aio_wait_kick(bdrv_get_aio_wait(bs)); + aio_wait_kick(&drain_all_aio_wait); } void bdrv_dec_in_flight(BlockDriverState *bs) diff --git a/include/block/block.h b/include/block/block.h index 836746e4e1..b1d6fdb97a 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -421,6 +421,7 @@ BlockDriverState *bdrv_lookup_bs(const char *device, Error **errp); bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base); BlockDriverState *bdrv_next_node(BlockDriverState *bs); +BlockDriverState *bdrv_next_all_states(BlockDriverState *bs); typedef struct BdrvNextIterator { enum { diff --git a/include/block/block_int.h b/include/block/block_int.h index 1abfc26d76..7cd7eed83b 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -854,6 +854,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes, QEMUIOVector *qiov, BdrvRequestFlags flags); +extern unsigned int bdrv_drain_all_count; void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent); void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent); -- cgit v1.2.3-55-g7522 From a33fbb4f8b64226becf502a123733776ce319b24 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 13 Jun 2018 20:18:16 +0200 Subject: hbitmap: Add @advance param to hbitmap_iter_next() This new parameter allows the caller to just query the next dirty position without moving the iterator. Signed-off-by: Max Reitz Reviewed-by: Fam Zheng Reviewed-by: John Snow Message-id: 20180613181823.13618-8-mreitz@redhat.com Signed-off-by: Max Reitz --- block/backup.c | 2 +- block/dirty-bitmap.c | 2 +- include/qemu/hbitmap.h | 5 ++++- tests/test-hbitmap.c | 26 +++++++++++++------------- util/hbitmap.c | 10 +++++++--- 5 files changed, 26 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/block/backup.c b/block/backup.c index 5661435675..d18be40caf 100644 --- a/block/backup.c +++ b/block/backup.c @@ -354,7 +354,7 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job) HBitmapIter hbi; hbitmap_iter_init(&hbi, job->copy_bitmap, 0); - while ((cluster = hbitmap_iter_next(&hbi)) != -1) { + while ((cluster = hbitmap_iter_next(&hbi, true)) != -1) { do { if (yield_and_check(job)) { return 0; diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 383d742cdb..cedb971765 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -519,7 +519,7 @@ void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter) int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter) { - return hbitmap_iter_next(&iter->hbi); + return hbitmap_iter_next(&iter->hbi, true); } /* Called within bdrv_dirty_bitmap_lock..unlock */ diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h index 6b6490ecad..ddca52c48e 100644 --- a/include/qemu/hbitmap.h +++ b/include/qemu/hbitmap.h @@ -324,11 +324,14 @@ void hbitmap_free_meta(HBitmap *hb); /** * hbitmap_iter_next: * @hbi: HBitmapIter to operate on. + * @advance: If true, advance the iterator. Otherwise, the next call + * of this function will return the same result (if that + * position is still dirty). * * Return the next bit that is set in @hbi's associated HBitmap, * or -1 if all remaining bits are zero. */ -int64_t hbitmap_iter_next(HBitmapIter *hbi); +int64_t hbitmap_iter_next(HBitmapIter *hbi, bool advance); /** * hbitmap_iter_next_word: diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c index f29631f939..f2158f767d 100644 --- a/tests/test-hbitmap.c +++ b/tests/test-hbitmap.c @@ -46,7 +46,7 @@ static void hbitmap_test_check(TestHBitmapData *data, i = first; for (;;) { - next = hbitmap_iter_next(&hbi); + next = hbitmap_iter_next(&hbi, true); if (next < 0) { next = data->size; } @@ -435,25 +435,25 @@ static void test_hbitmap_iter_granularity(TestHBitmapData *data, /* Note that hbitmap_test_check has to be invoked manually in this test. */ hbitmap_test_init(data, 131072 << 7, 7); hbitmap_iter_init(&hbi, data->hb, 0); - g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0); + g_assert_cmpint(hbitmap_iter_next(&hbi, true), <, 0); hbitmap_test_set(data, ((L2 + L1 + 1) << 7) + 8, 8); hbitmap_iter_init(&hbi, data->hb, 0); - g_assert_cmpint(hbitmap_iter_next(&hbi), ==, (L2 + L1 + 1) << 7); - g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0); + g_assert_cmpint(hbitmap_iter_next(&hbi, true), ==, (L2 + L1 + 1) << 7); + g_assert_cmpint(hbitmap_iter_next(&hbi, true), <, 0); hbitmap_iter_init(&hbi, data->hb, (L2 + L1 + 2) << 7); - g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0); + g_assert_cmpint(hbitmap_iter_next(&hbi, true), <, 0); hbitmap_test_set(data, (131072 << 7) - 8, 8); hbitmap_iter_init(&hbi, data->hb, 0); - g_assert_cmpint(hbitmap_iter_next(&hbi), ==, (L2 + L1 + 1) << 7); - g_assert_cmpint(hbitmap_iter_next(&hbi), ==, 131071 << 7); - g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0); + g_assert_cmpint(hbitmap_iter_next(&hbi, true), ==, (L2 + L1 + 1) << 7); + g_assert_cmpint(hbitmap_iter_next(&hbi, true), ==, 131071 << 7); + g_assert_cmpint(hbitmap_iter_next(&hbi, true), <, 0); hbitmap_iter_init(&hbi, data->hb, (L2 + L1 + 2) << 7); - g_assert_cmpint(hbitmap_iter_next(&hbi), ==, 131071 << 7); - g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0); + g_assert_cmpint(hbitmap_iter_next(&hbi, true), ==, 131071 << 7); + g_assert_cmpint(hbitmap_iter_next(&hbi, true), <, 0); } static void hbitmap_test_set_boundary_bits(TestHBitmapData *data, ssize_t diff) @@ -893,7 +893,7 @@ static void test_hbitmap_serialize_zeroes(TestHBitmapData *data, for (i = 0; i < num_positions; i++) { hbitmap_deserialize_zeroes(data->hb, positions[i], min_l1, true); hbitmap_iter_init(&iter, data->hb, 0); - next = hbitmap_iter_next(&iter); + next = hbitmap_iter_next(&iter, true); if (i == num_positions - 1) { g_assert_cmpint(next, ==, -1); } else { @@ -919,10 +919,10 @@ static void test_hbitmap_iter_and_reset(TestHBitmapData *data, hbitmap_iter_init(&hbi, data->hb, BITS_PER_LONG - 1); - hbitmap_iter_next(&hbi); + hbitmap_iter_next(&hbi, true); hbitmap_reset_all(data->hb); - hbitmap_iter_next(&hbi); + hbitmap_iter_next(&hbi, true); } static void test_hbitmap_next_zero_check(TestHBitmapData *data, int64_t start) diff --git a/util/hbitmap.c b/util/hbitmap.c index 58a2c93842..bcd304041a 100644 --- a/util/hbitmap.c +++ b/util/hbitmap.c @@ -141,7 +141,7 @@ unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi) return cur; } -int64_t hbitmap_iter_next(HBitmapIter *hbi) +int64_t hbitmap_iter_next(HBitmapIter *hbi, bool advance) { unsigned long cur = hbi->cur[HBITMAP_LEVELS - 1] & hbi->hb->levels[HBITMAP_LEVELS - 1][hbi->pos]; @@ -154,8 +154,12 @@ int64_t hbitmap_iter_next(HBitmapIter *hbi) } } - /* The next call will resume work from the next bit. */ - hbi->cur[HBITMAP_LEVELS - 1] = cur & (cur - 1); + if (advance) { + /* The next call will resume work from the next bit. */ + hbi->cur[HBITMAP_LEVELS - 1] = cur & (cur - 1); + } else { + hbi->cur[HBITMAP_LEVELS - 1] = cur; + } item = ((uint64_t)hbi->pos << BITS_PER_LEVEL) + ctzl(cur); return item << hbi->granularity; -- cgit v1.2.3-55-g7522 From 72d10a94213a954ad569095cb4491f2ae0853c40 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 13 Jun 2018 20:18:18 +0200 Subject: block/dirty-bitmap: Add bdrv_dirty_iter_next_area This new function allows to look for a consecutively dirty area in a dirty bitmap. Signed-off-by: Max Reitz Reviewed-by: Fam Zheng Reviewed-by: John Snow Message-id: 20180613181823.13618-10-mreitz@redhat.com Signed-off-by: Max Reitz --- block/dirty-bitmap.c | 55 ++++++++++++++++++++++++++++++++++++++++++++ include/block/dirty-bitmap.h | 2 ++ 2 files changed, 57 insertions(+) (limited to 'include') diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index cedb971765..db1782ec1f 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -522,6 +522,61 @@ int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter) return hbitmap_iter_next(&iter->hbi, true); } +/** + * Return the next consecutively dirty area in the dirty bitmap + * belonging to the given iterator @iter. + * + * @max_offset: Maximum value that may be returned for + * *offset + *bytes + * @offset: Will contain the start offset of the next dirty area + * @bytes: Will contain the length of the next dirty area + * + * Returns: True if a dirty area could be found before max_offset + * (which means that *offset and *bytes then contain valid + * values), false otherwise. + * + * Note that @iter is never advanced if false is returned. If an area + * is found (which means that true is returned), it will be advanced + * past that area. + */ +bool bdrv_dirty_iter_next_area(BdrvDirtyBitmapIter *iter, uint64_t max_offset, + uint64_t *offset, int *bytes) +{ + uint32_t granularity = bdrv_dirty_bitmap_granularity(iter->bitmap); + uint64_t gran_max_offset; + int64_t ret; + int size; + + if (max_offset == iter->bitmap->size) { + /* If max_offset points to the image end, round it up by the + * bitmap granularity */ + gran_max_offset = ROUND_UP(max_offset, granularity); + } else { + gran_max_offset = max_offset; + } + + ret = hbitmap_iter_next(&iter->hbi, false); + if (ret < 0 || ret + granularity > gran_max_offset) { + return false; + } + + *offset = ret; + size = 0; + + assert(granularity <= INT_MAX); + + do { + /* Advance iterator */ + ret = hbitmap_iter_next(&iter->hbi, true); + size += granularity; + } while (ret + granularity <= gran_max_offset && + hbitmap_iter_next(&iter->hbi, false) == ret + granularity && + size <= INT_MAX - granularity); + + *bytes = MIN(size, max_offset - *offset); + return true; +} + /* Called within bdrv_dirty_bitmap_lock..unlock */ void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, int64_t offset, int64_t bytes) diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h index 02e0cbabd2..288dc6adb6 100644 --- a/include/block/dirty-bitmap.h +++ b/include/block/dirty-bitmap.h @@ -82,6 +82,8 @@ void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, int64_t offset, int64_t bytes); int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter); +bool bdrv_dirty_iter_next_area(BdrvDirtyBitmapIter *iter, uint64_t max_offset, + uint64_t *offset, int *bytes); void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t offset); int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap); int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap); -- cgit v1.2.3-55-g7522 From 62f13600593322b8e796f15fd6742064fba6ab65 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 13 Jun 2018 20:18:20 +0200 Subject: job: Add job_progress_increase_remaining() Signed-off-by: Max Reitz Message-id: 20180613181823.13618-12-mreitz@redhat.com Reviewed-by: Kevin Wolf Signed-off-by: Max Reitz --- include/qemu/job.h | 15 +++++++++++++++ job.c | 5 +++++ 2 files changed, 20 insertions(+) (limited to 'include') diff --git a/include/qemu/job.h b/include/qemu/job.h index 1d820530fa..18c9223e31 100644 --- a/include/qemu/job.h +++ b/include/qemu/job.h @@ -335,6 +335,21 @@ void job_progress_update(Job *job, uint64_t done); */ void job_progress_set_remaining(Job *job, uint64_t remaining); +/** + * @job: The job whose expected progress end value is updated + * @delta: Value which is to be added to the current expected end + * value + * + * Increases the expected end value of the progress counter of a job. + * This is useful for parenthesis operations: If a job has to + * conditionally perform a high-priority operation as part of its + * progress, it calls this function with the expected operation's + * length before, and job_progress_update() afterwards. + * (So the operation acts as a parenthesis in regards to the main job + * operation running in background.) + */ +void job_progress_increase_remaining(Job *job, uint64_t delta); + /** To be called when a cancelled job is finalised. */ void job_event_cancelled(Job *job); diff --git a/job.c b/job.c index 84e140238b..fa671b431a 100644 --- a/job.c +++ b/job.c @@ -385,6 +385,11 @@ void job_progress_set_remaining(Job *job, uint64_t remaining) job->progress_total = job->progress_current + remaining; } +void job_progress_increase_remaining(Job *job, uint64_t delta) +{ + job->progress_total += delta; +} + void job_event_cancelled(Job *job) { notifier_list_notify(&job->on_finalize_cancelled, job); -- cgit v1.2.3-55-g7522 From 481debaa3270fb276dcf27205aa27ad52cc34590 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 13 Jun 2018 20:18:22 +0200 Subject: block/mirror: Add copy mode QAPI interface This patch allows the user to specify whether to use active or only background mode for mirror block jobs. Currently, this setting will remain constant for the duration of the entire block job. Signed-off-by: Max Reitz Reviewed-by: Alberto Garcia Message-id: 20180613181823.13618-14-mreitz@redhat.com Signed-off-by: Max Reitz --- block/mirror.c | 12 +++++++----- blockdev.c | 9 ++++++++- include/block/block_int.h | 4 +++- qapi/block-core.json | 11 +++++++++-- 4 files changed, 27 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/block/mirror.c b/block/mirror.c index 99b9b92c30..61bd9f3cf1 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1473,7 +1473,7 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, const BlockJobDriver *driver, bool is_none_mode, BlockDriverState *base, bool auto_complete, const char *filter_node_name, - bool is_mirror, + bool is_mirror, MirrorCopyMode copy_mode, Error **errp) { MirrorBlockJob *s; @@ -1581,7 +1581,7 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, s->on_target_error = on_target_error; s->is_none_mode = is_none_mode; s->backing_mode = backing_mode; - s->copy_mode = MIRROR_COPY_MODE_BACKGROUND; + s->copy_mode = copy_mode; s->base = base; s->granularity = granularity; s->buf_size = ROUND_UP(buf_size, granularity); @@ -1648,7 +1648,8 @@ void mirror_start(const char *job_id, BlockDriverState *bs, MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, BlockdevOnError on_source_error, BlockdevOnError on_target_error, - bool unmap, const char *filter_node_name, Error **errp) + bool unmap, const char *filter_node_name, + MirrorCopyMode copy_mode, Error **errp) { bool is_none_mode; BlockDriverState *base; @@ -1663,7 +1664,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs, speed, granularity, buf_size, backing_mode, on_source_error, on_target_error, unmap, NULL, NULL, &mirror_job_driver, is_none_mode, base, false, - filter_node_name, true, errp); + filter_node_name, true, copy_mode, errp); } void commit_active_start(const char *job_id, BlockDriverState *bs, @@ -1686,7 +1687,8 @@ void commit_active_start(const char *job_id, BlockDriverState *bs, MIRROR_LEAVE_BACKING_CHAIN, on_error, on_error, true, cb, opaque, &commit_active_job_driver, false, base, auto_complete, - filter_node_name, false, &local_err); + filter_node_name, false, MIRROR_COPY_MODE_BACKGROUND, + &local_err); if (local_err) { error_propagate(errp, local_err); goto error_restore_flags; diff --git a/blockdev.c b/blockdev.c index 7f65cd7497..58d7570932 100644 --- a/blockdev.c +++ b/blockdev.c @@ -3586,6 +3586,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, bool has_unmap, bool unmap, bool has_filter_node_name, const char *filter_node_name, + bool has_copy_mode, MirrorCopyMode copy_mode, Error **errp) { @@ -3610,6 +3611,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, if (!has_filter_node_name) { filter_node_name = NULL; } + if (!has_copy_mode) { + copy_mode = MIRROR_COPY_MODE_BACKGROUND; + } if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity", @@ -3640,7 +3644,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, has_replaces ? replaces : NULL, speed, granularity, buf_size, sync, backing_mode, on_source_error, on_target_error, unmap, filter_node_name, - errp); + copy_mode, errp); } void qmp_drive_mirror(DriveMirror *arg, Error **errp) @@ -3786,6 +3790,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) arg->has_on_target_error, arg->on_target_error, arg->has_unmap, arg->unmap, false, NULL, + arg->has_copy_mode, arg->copy_mode, &local_err); bdrv_unref(target_bs); error_propagate(errp, local_err); @@ -3806,6 +3811,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, BlockdevOnError on_target_error, bool has_filter_node_name, const char *filter_node_name, + bool has_copy_mode, MirrorCopyMode copy_mode, Error **errp) { BlockDriverState *bs; @@ -3838,6 +3844,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, has_on_target_error, on_target_error, true, true, has_filter_node_name, filter_node_name, + has_copy_mode, copy_mode, &local_err); error_propagate(errp, local_err); diff --git a/include/block/block_int.h b/include/block/block_int.h index 7cd7eed83b..74646ed722 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -1031,6 +1031,7 @@ void commit_active_start(const char *job_id, BlockDriverState *bs, * @filter_node_name: The node name that should be assigned to the filter * driver that the mirror job inserts into the graph above @bs. NULL means that * a node name should be autogenerated. + * @copy_mode: When to trigger writes to the target. * @errp: Error object. * * Start a mirroring operation on @bs. Clusters that are allocated @@ -1044,7 +1045,8 @@ void mirror_start(const char *job_id, BlockDriverState *bs, MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, BlockdevOnError on_source_error, BlockdevOnError on_target_error, - bool unmap, const char *filter_node_name, Error **errp); + bool unmap, const char *filter_node_name, + MirrorCopyMode copy_mode, Error **errp); /* * backup_job_create: diff --git a/qapi/block-core.json b/qapi/block-core.json index 96f8da1322..cc3ede0630 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1710,6 +1710,9 @@ # written. Both will result in identical contents. # Default is true. (Since 2.4) # +# @copy-mode: when to copy data to the destination; defaults to 'background' +# (Since: 3.0) +# # Since: 1.3 ## { 'struct': 'DriveMirror', @@ -1719,7 +1722,7 @@ '*speed': 'int', '*granularity': 'uint32', '*buf-size': 'int', '*on-source-error': 'BlockdevOnError', '*on-target-error': 'BlockdevOnError', - '*unmap': 'bool' } } + '*unmap': 'bool', '*copy-mode': 'MirrorCopyMode' } } ## # @BlockDirtyBitmap: @@ -1982,6 +1985,9 @@ # above @device. If this option is not given, a node name is # autogenerated. (Since: 2.9) # +# @copy-mode: when to copy data to the destination; defaults to 'background' +# (Since: 3.0) +# # Returns: nothing on success. # # Since: 2.6 @@ -2002,7 +2008,8 @@ '*speed': 'int', '*granularity': 'uint32', '*buf-size': 'int', '*on-source-error': 'BlockdevOnError', '*on-target-error': 'BlockdevOnError', - '*filter-node-name': 'str' } } + '*filter-node-name': 'str', + '*copy-mode': 'MirrorCopyMode' } } ## # @block_set_io_throttle: -- cgit v1.2.3-55-g7522