From 67acfd21885b70ae0cd11f12e34821be3d8b90bb Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 12 Jun 2019 17:48:11 +0200 Subject: stream: Deal with filters Because of the (not so recent anymore) changes that make the stream job independent of the base node and instead track the node above it, we have to split that "bottom" node into two cases: The bottom COW node, and the node directly above the base node (which may be an R/W filter or the bottom COW node). Signed-off-by: Max Reitz --- qapi/block-core.json | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'qapi') diff --git a/qapi/block-core.json b/qapi/block-core.json index 55b58ba892..b04df13bea 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -2484,13 +2484,20 @@ # of 'device'. # # If a base file is specified then sectors are not copied from that base file and -# its backing chain. When streaming completes the image file will have the base -# file as its backing file. This can be used to stream a subset of the backing -# file chain instead of flattening the entire image. +# its backing chain. This can be used to stream a subset of the backing file +# chain instead of flattening the entire image. +# When streaming completes the image file will have the base file as its backing +# file, unless that node was changed while the job was running. In that case, +# base's parent's backing (or filtered, whichever exists) child (i.e., base at +# the beginning of the job) will be the new backing file. # # On successful completion the image file is updated to drop the backing file # and the BLOCK_JOB_COMPLETED event is emitted. # +# In case @device is a filter node, block-stream modifies the first non-filter +# overlay node below it to point to the new backing node instead of modifying +# @device itself. +# # @job-id: identifier for the newly-created block job. If # omitted, the device name will be used. (Since 2.7) # -- cgit v1.2.3-55-g7522 From 3f072a7fb747413bbf6a63fd6476888b6b671a04 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 12 Jun 2019 16:27:32 +0200 Subject: mirror: Deal with filters This includes some permission limiting (for example, we only need to take the RESIZE permission for active commits where the base is smaller than the top). base_overlay is introduced so we can query bdrv_is_allocated_above() on it - we cannot do that with base itself, because a filter's block_status is the same as its child node, so if there are filters on base, bdrv_is_allocated_above() on base would return information including base. Use this opportunity to rename qmp_drive_mirror()'s "source" BDS to "target_backing_bs", because that is what it really refers to. Signed-off-by: Max Reitz --- block/mirror.c | 118 +++++++++++++++++++++++++++++++++++++++------------ blockdev.c | 32 ++++++++++---- qapi/block-core.json | 6 ++- 3 files changed, 118 insertions(+), 38 deletions(-) (limited to 'qapi') diff --git a/block/mirror.c b/block/mirror.c index e8e8844afc..f16b0d62bc 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -42,6 +42,7 @@ typedef struct MirrorBlockJob { BlockBackend *target; BlockDriverState *mirror_top_bs; BlockDriverState *base; + BlockDriverState *base_overlay; /* The name of the graph node to replace */ char *replaces; @@ -677,8 +678,10 @@ static int mirror_exit_common(Job *job) &error_abort); if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { BlockDriverState *backing = s->is_none_mode ? src : s->base; - if (backing_bs(target_bs) != backing) { - bdrv_set_backing_hd(target_bs, backing, &local_err); + BlockDriverState *unfiltered_target = bdrv_skip_filters(target_bs); + + if (bdrv_cow_bs(unfiltered_target) != backing) { + bdrv_set_backing_hd(unfiltered_target, backing, &local_err); if (local_err) { error_report_err(local_err); local_err = NULL; @@ -740,7 +743,7 @@ static int mirror_exit_common(Job *job) * valid. */ block_job_remove_all_bdrv(bjob); - bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort); + bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort); /* We just changed the BDS the job BB refers to (with either or both of the * bdrv_replace_node() calls), so switch the BB back so the cleanup does @@ -786,7 +789,6 @@ static void coroutine_fn mirror_throttle(MirrorBlockJob *s) static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) { int64_t offset; - BlockDriverState *base = s->base; BlockDriverState *bs = s->mirror_top_bs->backing->bs; BlockDriverState *target_bs = blk_bs(s->target); int ret; @@ -837,7 +839,8 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) return 0; } - ret = bdrv_is_allocated_above(bs, base, false, offset, bytes, &count); + ret = bdrv_is_allocated_above(bs, s->base_overlay, true, offset, bytes, + &count); if (ret < 0) { return ret; } @@ -936,7 +939,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) } else { s->target_cluster_size = BDRV_SECTOR_SIZE; } - if (backing_filename[0] && !target_bs->backing && + if (backing_filename[0] && !bdrv_backing_chain_next(target_bs) && s->granularity < s->target_cluster_size) { s->buf_size = MAX(s->buf_size, s->target_cluster_size); s->cow_bitmap = bitmap_new(length); @@ -1116,8 +1119,9 @@ static void mirror_complete(Job *job, Error **errp) if (s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) { int ret; - assert(!target->backing); - ret = bdrv_open_backing_file(target, NULL, "backing", errp); + assert(!bdrv_backing_chain_next(target)); + ret = bdrv_open_backing_file(bdrv_skip_filters(target), NULL, + "backing", errp); if (ret < 0) { return; } @@ -1555,8 +1559,8 @@ static BlockJob *mirror_start_job( MirrorBlockJob *s; MirrorBDSOpaque *bs_opaque; BlockDriverState *mirror_top_bs; - bool target_graph_mod; bool target_is_backing; + uint64_t target_perms, target_shared_perms; Error *local_err = NULL; int ret; @@ -1575,7 +1579,7 @@ static BlockJob *mirror_start_job( buf_size = DEFAULT_MIRROR_BUF_SIZE; } - if (bs == target) { + if (bdrv_skip_filters(bs) == bdrv_skip_filters(target)) { error_setg(errp, "Can't mirror node into itself"); return NULL; } @@ -1639,15 +1643,50 @@ static BlockJob *mirror_start_job( * In the case of active commit, things look a bit different, though, * because the target is an already populated backing file in active use. * We can allow anything except resize there.*/ + + target_perms = BLK_PERM_WRITE; + target_shared_perms = BLK_PERM_WRITE_UNCHANGED; + target_is_backing = bdrv_chain_contains(bs, target); - target_graph_mod = (backing_mode != MIRROR_LEAVE_BACKING_CHAIN); + if (target_is_backing) { + int64_t bs_size, target_size; + bs_size = bdrv_getlength(bs); + if (bs_size < 0) { + error_setg_errno(errp, -bs_size, + "Could not inquire top image size"); + goto fail; + } + + target_size = bdrv_getlength(target); + if (target_size < 0) { + error_setg_errno(errp, -target_size, + "Could not inquire base image size"); + goto fail; + } + + if (target_size < bs_size) { + target_perms |= BLK_PERM_RESIZE; + } + + target_shared_perms |= BLK_PERM_CONSISTENT_READ + | BLK_PERM_WRITE + | BLK_PERM_GRAPH_MOD; + } else if (bdrv_chain_contains(bs, bdrv_skip_filters(target))) { + /* + * We may want to allow this in the future, but it would + * require taking some extra care. + */ + error_setg(errp, "Cannot mirror to a filter on top of a node in the " + "source's backing chain"); + goto fail; + } + + if (backing_mode != MIRROR_LEAVE_BACKING_CHAIN) { + target_perms |= BLK_PERM_GRAPH_MOD; + } + s->target = blk_new(s->common.job.aio_context, - BLK_PERM_WRITE | BLK_PERM_RESIZE | - (target_graph_mod ? BLK_PERM_GRAPH_MOD : 0), - BLK_PERM_WRITE_UNCHANGED | - (target_is_backing ? BLK_PERM_CONSISTENT_READ | - BLK_PERM_WRITE | - BLK_PERM_GRAPH_MOD : 0)); + target_perms, target_shared_perms); ret = blk_insert_bs(s->target, target, errp); if (ret < 0) { goto fail; @@ -1672,6 +1711,7 @@ static BlockJob *mirror_start_job( s->zero_target = zero_target; s->copy_mode = copy_mode; s->base = base; + s->base_overlay = bdrv_find_overlay(bs, base); s->granularity = granularity; s->buf_size = ROUND_UP(buf_size, granularity); s->unmap = unmap; @@ -1702,15 +1742,39 @@ static BlockJob *mirror_start_job( /* In commit_active_start() all intermediate nodes disappear, so * any jobs in them must be blocked */ if (target_is_backing) { - BlockDriverState *iter; - for (iter = backing_bs(bs); iter != target; iter = backing_bs(iter)) { - /* XXX BLK_PERM_WRITE needs to be allowed so we don't block - * ourselves at s->base (if writes are blocked for a node, they are - * also blocked for its backing file). The other options would be a - * second filter driver above s->base (== target). */ + BlockDriverState *iter, *filtered_target; + uint64_t iter_shared_perms; + + /* + * The topmost node with + * bdrv_skip_filters(filtered_target) == bdrv_skip_filters(target) + */ + filtered_target = bdrv_cow_bs(bdrv_find_overlay(bs, target)); + + assert(bdrv_skip_filters(filtered_target) == + bdrv_skip_filters(target)); + + /* + * XXX BLK_PERM_WRITE needs to be allowed so we don't block + * ourselves at s->base (if writes are blocked for a node, they are + * also blocked for its backing file). The other options would be a + * second filter driver above s->base (== target). + */ + iter_shared_perms = BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE; + + for (iter = bdrv_filter_or_cow_bs(bs); iter != target; + iter = bdrv_filter_or_cow_bs(iter)) + { + if (iter == filtered_target) { + /* + * From here on, all nodes are filters on the base. + * This allows us to share BLK_PERM_CONSISTENT_READ. + */ + iter_shared_perms |= BLK_PERM_CONSISTENT_READ; + } + ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0, - BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE, - errp); + iter_shared_perms, errp); if (ret < 0) { goto fail; } @@ -1746,7 +1810,7 @@ fail: bs_opaque->stop = true; bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing, &error_abort); - bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort); + bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort); bdrv_unref(mirror_top_bs); @@ -1774,7 +1838,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs, return; } is_none_mode = mode == MIRROR_SYNC_MODE_NONE; - base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL; + base = mode == MIRROR_SYNC_MODE_TOP ? bdrv_backing_chain_next(bs) : NULL; mirror_start_job(job_id, bs, creation_flags, target, replaces, speed, granularity, buf_size, backing_mode, zero_target, on_source_error, on_target_error, unmap, NULL, NULL, diff --git a/blockdev.c b/blockdev.c index 57ee41b73e..73d96ce21c 100644 --- a/blockdev.c +++ b/blockdev.c @@ -2899,6 +2899,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, bool has_auto_dismiss, bool auto_dismiss, Error **errp) { + BlockDriverState *unfiltered_bs; int job_flags = JOB_DEFAULT; if (!has_speed) { @@ -2950,10 +2951,19 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, return; } - if (!bs->backing && sync == MIRROR_SYNC_MODE_TOP) { + if (!bdrv_backing_chain_next(bs) && sync == MIRROR_SYNC_MODE_TOP) { sync = MIRROR_SYNC_MODE_FULL; } + if (!has_replaces) { + /* We want to mirror from @bs, but keep implicit filters on top */ + unfiltered_bs = bdrv_skip_implicit_filters(bs); + if (unfiltered_bs != bs) { + replaces = unfiltered_bs->node_name; + has_replaces = true; + } + } + if (has_replaces) { BlockDriverState *to_replace_bs; AioContext *replace_aio_context; @@ -3000,7 +3010,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, void qmp_drive_mirror(DriveMirror *arg, Error **errp) { BlockDriverState *bs; - BlockDriverState *source, *target_bs; + BlockDriverState *target_backing_bs, *target_bs; AioContext *aio_context; AioContext *old_context; BlockMirrorBackingMode backing_mode; @@ -3035,12 +3045,12 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) } flags = bs->open_flags | BDRV_O_RDWR; - source = backing_bs(bs); - if (!source && arg->sync == MIRROR_SYNC_MODE_TOP) { + target_backing_bs = bdrv_cow_bs(bdrv_skip_filters(bs)); + if (!target_backing_bs && arg->sync == MIRROR_SYNC_MODE_TOP) { arg->sync = MIRROR_SYNC_MODE_FULL; } if (arg->sync == MIRROR_SYNC_MODE_NONE) { - source = bs; + target_backing_bs = bs; } size = bdrv_getlength(bs); @@ -3066,7 +3076,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) /* Don't open backing image in create() */ flags |= BDRV_O_NO_BACKING; - if ((arg->sync == MIRROR_SYNC_MODE_FULL || !source) + if ((arg->sync == MIRROR_SYNC_MODE_FULL || !target_backing_bs) && arg->mode != NEW_IMAGE_MODE_EXISTING) { /* create new image w/o backing file */ @@ -3074,15 +3084,19 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) bdrv_img_create(arg->target, format, NULL, NULL, NULL, size, flags, false, &local_err); } else { + /* Implicit filters should not appear in the filename */ + BlockDriverState *explicit_backing = + bdrv_skip_implicit_filters(target_backing_bs); + switch (arg->mode) { case NEW_IMAGE_MODE_EXISTING: break; case NEW_IMAGE_MODE_ABSOLUTE_PATHS: /* create new image with backing file */ - bdrv_refresh_filename(source); + bdrv_refresh_filename(explicit_backing); bdrv_img_create(arg->target, format, - source->filename, - source->drv->format_name, + explicit_backing->filename, + explicit_backing->drv->format_name, NULL, size, flags, false, &local_err); break; default: diff --git a/qapi/block-core.json b/qapi/block-core.json index b04df13bea..e34796d98f 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1948,7 +1948,8 @@ # # @replaces: with sync=full graph node name to be replaced by the new # image when a whole image copy is done. This can be used to repair -# broken Quorum files. (Since 2.1) +# broken Quorum files. By default, @device is replaced, although +# implicitly created filters on it are kept. (Since 2.1) # # @mode: whether and how QEMU should create a new image, default is # 'absolute-paths'. @@ -2259,7 +2260,8 @@ # # @replaces: with sync=full graph node name to be replaced by the new # image when a whole image copy is done. This can be used to repair -# broken Quorum files. +# broken Quorum files. By default, @device is replaced, although +# implicitly created filters on it are kept. # # @speed: the maximum speed, in bytes per second # -- cgit v1.2.3-55-g7522 From 05ea385afdfe5d0886265880bfd14d17192beb03 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 11 Jun 2019 21:19:26 +0200 Subject: blockdev: Fix active commit choice We have to perform an active commit whenever the top node has a parent that has taken the WRITE permission on it. This means that block-commit's @backing-file parameter is no longer allowed for such nodes, and that users will have to issue a block-job-complete command. Neither should pose a problem in practice, because this case was basically just broken until now. (Since this commit already touches block-commit's documentation, it also moves up the chunk explaining general block-commit behavior that for some reason was situated under @backing-file.) Signed-off-by: Max Reitz --- blockdev.c | 35 ++++++++++++++++++++++++++++++----- qapi/block-core.json | 39 ++++++++++++++++++++------------------- 2 files changed, 50 insertions(+), 24 deletions(-) (limited to 'qapi') diff --git a/blockdev.c b/blockdev.c index f308adc9aa..7f2561081e 100644 --- a/blockdev.c +++ b/blockdev.c @@ -2602,6 +2602,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, AioContext *aio_context; Error *local_err = NULL; int job_flags = JOB_DEFAULT; + uint64_t top_perm, top_shared; if (!has_speed) { speed = 0; @@ -2717,14 +2718,38 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, goto out; } - if (top_bs == bs) { + /* + * Active commit is required if and only if someone has taken a + * WRITE permission on the top node. Historically, we have always + * used active commit for top nodes, so continue that practice + * lest we possibly break clients that rely on this behavior, e.g. + * to later attach this node to a writing parent. + * (Active commit is never really wrong.) + */ + bdrv_get_cumulative_perm(top_bs, &top_perm, &top_shared); + if (top_perm & BLK_PERM_WRITE || + bdrv_skip_filters(top_bs) == bdrv_skip_filters(bs)) + { if (has_backing_file) { - error_setg(errp, "'backing-file' specified," - " but 'top' is the active layer"); + if (bdrv_skip_filters(top_bs) == bdrv_skip_filters(bs)) { + error_setg(errp, "'backing-file' specified," + " but 'top' is the active layer"); + } else { + error_setg(errp, "'backing-file' specified, but 'top' has a " + "writer on it"); + } goto out; } - commit_active_start(has_job_id ? job_id : NULL, bs, base_bs, - job_flags, speed, on_error, + if (!has_job_id) { + /* + * Emulate here what block_job_create() does, because it + * is possible that @bs != @top_bs (the block job should + * be named after @bs, even if @top_bs is the actual + * source) + */ + job_id = bdrv_get_device_name(bs); + } + commit_active_start(job_id, top_bs, base_bs, job_flags, speed, on_error, filter_node_name, NULL, NULL, false, &local_err); } else { BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs); diff --git a/qapi/block-core.json b/qapi/block-core.json index e34796d98f..0345f6f2d2 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1569,6 +1569,18 @@ # Live commit of data from overlay image nodes into backing nodes - i.e., # writes data between 'top' and 'base' into 'base'. # +# If top == base, that is an error. +# If top has no overlays on top of it, or if it is in use by a writer, +# the job will not be completed by itself. The user needs to complete +# the job with the block-job-complete command after getting the ready +# event. (Since 2.0) +# +# If the base image is smaller than top, then the base image will be +# resized to be the same size as top. If top is smaller than the base +# image, the base will not be truncated. If you want the base image +# size to match the size of the smaller top, you can safely truncate +# it yourself once the commit operation successfully completes. +# # @job-id: identifier for the newly-created block job. If # omitted, the device name will be used. (Since 2.7) # @@ -1593,14 +1605,15 @@ # accepted # # @backing-file: The backing file string to write into the overlay -# image of 'top'. If 'top' is the active layer, -# specifying a backing file string is an error. This -# filename is not validated. +# image of 'top'. If 'top' does not have an overlay +# image, or if 'top' is in use by a writer, specifying +# a backing file string is an error. # -# If a pathname string is such that it cannot be -# resolved by QEMU, that means that subsequent QMP or -# HMP commands must use node-names for the image in -# question, as filename lookup methods will fail. +# This filename is not validated. If a pathname string +# is such that it cannot be resolved by QEMU, that +# means that subsequent QMP or HMP commands must use +# node-names for the image in question, as filename +# lookup methods will fail. # # If not specified, QEMU will automatically determine # the backing file string to use, or error out if @@ -1609,18 +1622,6 @@ # filename or protocol. # (Since 2.1) # -# If top == base, that is an error. -# If top == active, the job will not be completed by itself, -# user needs to complete the job with the block-job-complete -# command after getting the ready event. (Since 2.0) -# -# If the base image is smaller than top, then the base image -# will be resized to be the same size as top. If top is -# smaller than the base image, the base will not be -# truncated. If you want the base image size to match the -# size of the smaller top, you can safely truncate it -# yourself once the commit operation successfully completes. -# # @speed: the maximum speed, in bytes per second # # @on-error: the action to take on an error. 'ignore' means that the request -- cgit v1.2.3-55-g7522