From ffa244c84a1a30dff69ecc80b0137a2b6d428ecb Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 7 Jul 2020 16:23:29 +0200 Subject: file-posix: Mitigate file fragmentation with extent size hints Especially when O_DIRECT is used with image files so that the page cache indirection can't cause a merge of allocating requests, the file will fragment on the file system layer, with a potentially very small fragment size (this depends on the requests the guest sent). On Linux, fragmentation can be reduced by setting an extent size hint when creating the file (at least on XFS, it can't be set any more after the first extent has been allocated), basically giving raw files a "cluster size" for allocation. This adds a create option to set the extent size hint, and changes the default from not setting a hint to setting it to 1 MB. The main reason why qcow2 defaults to smaller cluster sizes is that COW becomes more expensive, which is not an issue with raw files, so we can choose a larger size. The tradeoff here is only potentially wasted disk space. For qcow2 (or other image formats) over file-posix, the advantage should even be greater because they grow sequentially without leaving holes, so there won't be wasted space. Setting even larger extent size hints for such images may make sense. This can be done with the new option, but let's keep the default conservative for now. The effect is very visible with a test that intentionally creates a badly fragmented file with qemu-img bench (the time difference while creating the file is already remarkable) and then looks at the number of extents and the time a simple "qemu-img map" takes. Without an extent size hint: $ ./qemu-img create -f raw -o extent_size_hint=0 ~/tmp/test.raw 10G Formatting '/home/kwolf/tmp/test.raw', fmt=raw size=10737418240 extent_size_hint=0 $ ./qemu-img bench -f raw -t none -n -w ~/tmp/test.raw -c 1000000 -S 8192 -o 0 Sending 1000000 write requests, 4096 bytes each, 64 in parallel (starting at offset 0, step size 8192) Run completed in 25.848 seconds. $ ./qemu-img bench -f raw -t none -n -w ~/tmp/test.raw -c 1000000 -S 8192 -o 4096 Sending 1000000 write requests, 4096 bytes each, 64 in parallel (starting at offset 4096, step size 8192) Run completed in 19.616 seconds. $ filefrag ~/tmp/test.raw /home/kwolf/tmp/test.raw: 2000000 extents found $ time ./qemu-img map ~/tmp/test.raw Offset Length Mapped to File 0 0x1e8480000 0 /home/kwolf/tmp/test.raw real 0m1,279s user 0m0,043s sys 0m1,226s With the new default extent size hint of 1 MB: $ ./qemu-img create -f raw -o extent_size_hint=1M ~/tmp/test.raw 10G Formatting '/home/kwolf/tmp/test.raw', fmt=raw size=10737418240 extent_size_hint=1048576 $ ./qemu-img bench -f raw -t none -n -w ~/tmp/test.raw -c 1000000 -S 8192 -o 0 Sending 1000000 write requests, 4096 bytes each, 64 in parallel (starting at offset 0, step size 8192) Run completed in 11.833 seconds. $ ./qemu-img bench -f raw -t none -n -w ~/tmp/test.raw -c 1000000 -S 8192 -o 4096 Sending 1000000 write requests, 4096 bytes each, 64 in parallel (starting at offset 4096, step size 8192) Run completed in 10.155 seconds. $ filefrag ~/tmp/test.raw /home/kwolf/tmp/test.raw: 178 extents found $ time ./qemu-img map ~/tmp/test.raw Offset Length Mapped to File 0 0x1e8480000 0 /home/kwolf/tmp/test.raw real 0m0,061s user 0m0,040s sys 0m0,014s Signed-off-by: Kevin Wolf Message-Id: <20200707142329.48303-1-kwolf@redhat.com> Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/file-posix.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'block') diff --git a/block/file-posix.c b/block/file-posix.c index 1989eae85f..8067e238cb 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -30,6 +30,7 @@ #include "block/block_int.h" #include "qemu/module.h" #include "qemu/option.h" +#include "qemu/units.h" #include "trace.h" #include "block/thread-pool.h" #include "qemu/iov.h" @@ -2318,6 +2319,14 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp) if (!file_opts->has_preallocation) { file_opts->preallocation = PREALLOC_MODE_OFF; } + if (!file_opts->has_extent_size_hint) { + file_opts->extent_size_hint = 1 * MiB; + } + if (file_opts->extent_size_hint > UINT32_MAX) { + result = -EINVAL; + error_setg(errp, "Extent size hint is too large"); + goto out; + } /* Create file */ fd = qemu_open(file_opts->filename, O_RDWR | O_CREAT | O_BINARY, 0644); @@ -2375,6 +2384,27 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp) } #endif } +#ifdef FS_IOC_FSSETXATTR + /* + * Try to set the extent size hint. Failure is not fatal, and a warning is + * only printed if the option was explicitly specified. + */ + { + struct fsxattr attr; + result = ioctl(fd, FS_IOC_FSGETXATTR, &attr); + if (result == 0) { + attr.fsx_xflags |= FS_XFLAG_EXTSIZE; + attr.fsx_extsize = file_opts->extent_size_hint; + result = ioctl(fd, FS_IOC_FSSETXATTR, &attr); + } + if (result < 0 && file_opts->has_extent_size_hint && + file_opts->extent_size_hint) + { + warn_report("Failed to set extent size hint: %s", + strerror(errno)); + } + } +#endif /* Resize and potentially preallocate the file to the desired * final size */ @@ -2410,6 +2440,8 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv, { BlockdevCreateOptions options; int64_t total_size = 0; + int64_t extent_size_hint = 0; + bool has_extent_size_hint = false; bool nocow = false; PreallocMode prealloc; char *buf = NULL; @@ -2421,6 +2453,11 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv, /* Read out options */ total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), BDRV_SECTOR_SIZE); + if (qemu_opt_get(opts, BLOCK_OPT_EXTENT_SIZE_HINT)) { + has_extent_size_hint = true; + extent_size_hint = + qemu_opt_get_size_del(opts, BLOCK_OPT_EXTENT_SIZE_HINT, -1); + } nocow = qemu_opt_get_bool(opts, BLOCK_OPT_NOCOW, false); buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); prealloc = qapi_enum_parse(&PreallocMode_lookup, buf, @@ -2440,6 +2477,8 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv, .preallocation = prealloc, .has_nocow = true, .nocow = nocow, + .has_extent_size_hint = has_extent_size_hint, + .extent_size_hint = extent_size_hint, }, }; return raw_co_create(&options, errp); @@ -2930,6 +2969,11 @@ static QemuOptsList raw_create_opts = { #endif ", full)" }, + { + .name = BLOCK_OPT_EXTENT_SIZE_HINT, + .type = QEMU_OPT_SIZE, + .help = "Extent size hint for the image file, 0 to disable" + }, { /* end of list */ } } }; -- cgit v1.2.3-55-g7522 From 80fa43e7df5d68d60a2662036b7d3d95ca8b68f3 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 6 Jul 2020 15:39:47 -0500 Subject: sheepdog: Add trivial backing_fmt support Sheepdog already requires that if backing_file is present, that it be another sheepdog image (see sd_co_create). Meanwhile, we want to move towards always being explicit about the backing format for other drivers where it matters. So for convenience, make qemu-img create -F sheepdog work, while rejecting all other explicit formats (note that this is only for QemuOpts usage; there is no change to the QAPI to allow a format through -blockdev). Signed-off-by: Eric Blake Message-Id: <20200706203954.341758-4-eblake@redhat.com> Signed-off-by: Kevin Wolf --- block/sheepdog.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/sheepdog.c b/block/sheepdog.c index 6c487c8322..cbbebc1aaf 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -2151,13 +2151,21 @@ static int coroutine_fn sd_co_create_opts(BlockDriver *drv, Error **errp) { BlockdevCreateOptions *create_options = NULL; - QDict *qdict, *location_qdict; + QDict *qdict = NULL, *location_qdict; Visitor *v; - char *redundancy; + char *redundancy = NULL; Error *local_err = NULL; int ret; + char *backing_fmt = NULL; redundancy = qemu_opt_get_del(opts, BLOCK_OPT_REDUNDANCY); + backing_fmt = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FMT); + + if (backing_fmt && strcmp(backing_fmt, "sheepdog") != 0) { + error_setg(errp, "backing_file must be a sheepdog image"); + ret = -EINVAL; + goto fail; + } qdict = qemu_opts_to_qdict(opts, NULL); qdict_put_str(qdict, "driver", "sheepdog"); @@ -2220,6 +2228,7 @@ fail: qapi_free_BlockdevCreateOptions(create_options); qobject_unref(qdict); g_free(redundancy); + g_free(backing_fmt); return ret; } @@ -3177,6 +3186,11 @@ static QemuOptsList sd_create_opts = { .type = QEMU_OPT_STRING, .help = "File name of a base image" }, + { + .name = BLOCK_OPT_BACKING_FMT, + .type = QEMU_OPT_STRING, + .help = "Must be 'sheepdog' if present", + }, { .name = BLOCK_OPT_PREALLOC, .type = QEMU_OPT_STRING, -- cgit v1.2.3-55-g7522 From d51a814cf41033d2d29b050e04d85155ac941221 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 6 Jul 2020 15:39:48 -0500 Subject: vmdk: Add trivial backing_fmt support vmdk already requires that if backing_file is present, that it be another vmdk image (see vmdk_co_do_create). Meanwhile, we want to move towards always being explicit about the backing format for other drivers where it matters. So for convenience, make qemu-img create -F vmdk work, while rejecting all other explicit formats (note that this is only for QemuOpts usage; there is no change to the QAPI to allow a format through -blockdev). Signed-off-by: Eric Blake Message-Id: <20200706203954.341758-5-eblake@redhat.com> Signed-off-by: Kevin Wolf --- block/vmdk.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'block') diff --git a/block/vmdk.c b/block/vmdk.c index 28cec50f38..bf9df5ce92 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -2633,6 +2633,14 @@ static int coroutine_fn vmdk_co_create_opts(BlockDriver *drv, bool zeroed_grain; bool compat6; VMDKCreateOptsData data; + char *backing_fmt = NULL; + + backing_fmt = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FMT); + if (backing_fmt && strcmp(backing_fmt, "vmdk") != 0) { + error_setg(errp, "backing_file must be a vmdk image"); + ret = -EINVAL; + goto exit; + } if (filename_decompose(filename, path, prefix, postfix, PATH_MAX, errp)) { ret = -EINVAL; @@ -2691,6 +2699,7 @@ static int coroutine_fn vmdk_co_create_opts(BlockDriver *drv, vmdk_co_create_opts_cb, &data, errp); exit: + g_free(backing_fmt); g_free(adapter_type); g_free(backing_file); g_free(hw_version); @@ -3026,6 +3035,11 @@ static QemuOptsList vmdk_create_opts = { .type = QEMU_OPT_STRING, .help = "File name of a base image" }, + { + .name = BLOCK_OPT_BACKING_FMT, + .type = QEMU_OPT_STRING, + .help = "Must be 'vmdk' if present", + }, { .name = BLOCK_OPT_COMPAT6, .type = QEMU_OPT_BOOL, -- cgit v1.2.3-55-g7522 From 344acbd62ffdbeb7f803644ad46a8129059f6823 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 6 Jul 2020 15:39:49 -0500 Subject: qcow: Tolerate backing_fmt= qcow has no space in the metadata to store a backing format, and there are existing qcow images backed both by raw or by other formats (usually qcow) images, reliant on probing to tell the difference. On the bright side, because we probe every time, raw files are marked as probed and we thus forbid a commit action into the backing file where guest-controlled contents could change the result of the probe next time around (the iotest added here proves that). Still, allowing the user to specify the backing format during creation, even if we can't record it, is a good thing. This patch blindly allows any value that resolves to a known driver, even if the user's request is a mismatch from what probing finds; then the next patch will further enhance things to verify that the user's request matches what we actually probe. With this and the next patch in place, we will finally be ready to deprecate the creation of images where a backing format was not explicitly specified by the user. Note that this is only for QemuOpts usage; there is no change to the QAPI to allow a format through -blockdev. Add a new iotest 301 just for qcow, to demonstrate the latest behavior, and to make it easier to show the improvements made in the next patch. Signed-off-by: Eric Blake Message-Id: <20200706203954.341758-6-eblake@redhat.com> Signed-off-by: Kevin Wolf --- block/qcow.c | 20 ++++++++++- tests/qemu-iotests/301 | 88 ++++++++++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/301.out | 60 +++++++++++++++++++++++++++++++ tests/qemu-iotests/group | 1 + 4 files changed, 168 insertions(+), 1 deletion(-) create mode 100755 tests/qemu-iotests/301 create mode 100644 tests/qemu-iotests/301.out (limited to 'block') diff --git a/block/qcow.c b/block/qcow.c index 1e134f3445..e514a86fe5 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -938,10 +938,11 @@ static int coroutine_fn qcow_co_create_opts(BlockDriver *drv, { BlockdevCreateOptions *create_options = NULL; BlockDriverState *bs = NULL; - QDict *qdict; + QDict *qdict = NULL; Visitor *v; const char *val; int ret; + char *backing_fmt; static const QDictRenames opt_renames[] = { { BLOCK_OPT_BACKING_FILE, "backing-file" }, @@ -949,6 +950,17 @@ static int coroutine_fn qcow_co_create_opts(BlockDriver *drv, { NULL, NULL }, }; + /* + * We can't actually store a backing format, but can check that + * the user's request made sense. + */ + backing_fmt = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FMT); + if (backing_fmt && !bdrv_find_format(backing_fmt)) { + error_setg(errp, "unrecognized backing format '%s'", backing_fmt); + ret = -EINVAL; + goto fail; + } + /* Parse options and convert legacy syntax */ qdict = qemu_opts_to_qdict_filtered(opts, NULL, &qcow_create_opts, true); @@ -1012,6 +1024,7 @@ static int coroutine_fn qcow_co_create_opts(BlockDriver *drv, ret = 0; fail: + g_free(backing_fmt); qobject_unref(qdict); bdrv_unref(bs); qapi_free_BlockdevCreateOptions(create_options); @@ -1146,6 +1159,11 @@ static QemuOptsList qcow_create_opts = { .type = QEMU_OPT_STRING, .help = "File name of a base image" }, + { + .name = BLOCK_OPT_BACKING_FMT, + .type = QEMU_OPT_STRING, + .help = "Format of the backing image", + }, { .name = BLOCK_OPT_ENCRYPT, .type = QEMU_OPT_BOOL, diff --git a/tests/qemu-iotests/301 b/tests/qemu-iotests/301 new file mode 100755 index 0000000000..3823e95617 --- /dev/null +++ b/tests/qemu-iotests/301 @@ -0,0 +1,88 @@ +#!/usr/bin/env bash +# +# Test qcow backing file warnings +# +# Copyright (C) 2020 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +seq=`basename $0` +echo "QA output created by $seq" + +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img + _rm_test_img "$TEST_IMG.qcow2" +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt qcow +_supported_proto file +_supported_os Linux + +size=32M + +echo +echo "== qcow backed by qcow ==" + +TEST_IMG="$TEST_IMG.base" _make_test_img $size +_make_test_img -b "$TEST_IMG.base" $size +_img_info +_make_test_img -b "$TEST_IMG.base" -F $IMGFMT $size +_img_info + +echo +echo "== mismatched command line detection ==" + +_make_test_img -b "$TEST_IMG.base" -F vmdk +_make_test_img -b "$TEST_IMG.base" -F vmdk $size +echo +# Use of -u bypasses the backing format sanity check +_make_test_img -u -b "$TEST_IMG.base" -F vmdk +_make_test_img -u -b "$TEST_IMG.base" -F vmdk $size +echo +# But the format must still be recognized +_make_test_img -b "$TEST_IMG.base" -F garbage $size +_make_test_img -u -b "$TEST_IMG.base" -F garbage $size +_img_info + +echo +echo "== qcow backed by raw ==" + +rm "$TEST_IMG.base" +truncate --size=$size "$TEST_IMG.base" +_make_test_img -b "$TEST_IMG.base" $size +_img_info +_make_test_img -b "$TEST_IMG.base" -F raw $size +_img_info + +echo +echo "== commit cannot change type of raw backing file ==" +TEST_IMG="$TEST_IMG.qcow2" IMGFMT=qcow2 _make_test_img $size +truncate --size=$size "$TEST_IMG.qcow2" +$QEMU_IMG convert -n -f raw -O $IMGFMT "$TEST_IMG.qcow2" "$TEST_IMG" +$QEMU_IMG commit -f $IMGFMT "$TEST_IMG" && echo "unexpected success" +TEST_IMG="$TEST_IMG.base" _img_info + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/301.out b/tests/qemu-iotests/301.out new file mode 100644 index 0000000000..adaf11d42d --- /dev/null +++ b/tests/qemu-iotests/301.out @@ -0,0 +1,60 @@ +QA output created by 301 + +== qcow backed by qcow == +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=33554432 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t.IMGFMT.base +image: TEST_DIR/t.IMGFMT +file format: IMGFMT +virtual size: 32 MiB (33554432 bytes) +cluster_size: 512 +backing file: TEST_DIR/t.IMGFMT.base +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT +image: TEST_DIR/t.IMGFMT +file format: IMGFMT +virtual size: 32 MiB (33554432 bytes) +cluster_size: 512 +backing file: TEST_DIR/t.IMGFMT.base + +== mismatched command line detection == +qemu-img: TEST_DIR/t.IMGFMT: invalid VMDK image descriptor +Could not open backing image to determine size. +qemu-img: warning: Could not verify backing image. This may become an error in future versions. +invalid VMDK image descriptor +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=vmdk + +qemu-img: TEST_DIR/t.IMGFMT: Image creation needs a size parameter +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=vmdk + +qemu-img: warning: Could not verify backing image. This may become an error in future versions. +Unknown driver 'garbage' +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=garbage +qemu-img: TEST_DIR/t.IMGFMT: unrecognized backing format 'garbage' +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=garbage +qemu-img: TEST_DIR/t.IMGFMT: unrecognized backing format 'garbage' +image: TEST_DIR/t.IMGFMT +file format: IMGFMT +virtual size: 32 MiB (33554432 bytes) +cluster_size: 512 +backing file: TEST_DIR/t.IMGFMT.base + +== qcow backed by raw == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t.IMGFMT.base +image: TEST_DIR/t.IMGFMT +file format: IMGFMT +virtual size: 32 MiB (33554432 bytes) +cluster_size: 512 +backing file: TEST_DIR/t.IMGFMT.base +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=raw +image: TEST_DIR/t.IMGFMT +file format: IMGFMT +virtual size: 32 MiB (33554432 bytes) +cluster_size: 512 +backing file: TEST_DIR/t.IMGFMT.base + +== commit cannot change type of raw backing file == +Formatting 'TEST_DIR/t.qcow.IMGFMT', fmt=IMGFMT size=33554432 +qemu-img: Block job failed: Operation not permitted +image: TEST_DIR/t.IMGFMT.base +file format: raw +virtual size: 32 MiB (33554432 bytes) +*** done diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 9b07a7ed03..a4f9e11e7a 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -306,3 +306,4 @@ 295 rw 296 rw 297 meta +301 backing quick -- cgit v1.2.3-55-g7522 From bc5ee6da7122f6fe93ed07241a44315a331487e9 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 6 Jul 2020 15:39:51 -0500 Subject: qcow2: Deprecate use of qemu-img amend to change backing file The use of 'qemu-img amend' to change qcow2 backing files is not tested very well. In particular, our implementation has a bug where if a new backing file is provided without a format, then the prior format is blindly reused, even if this results in data corruption, but this is not caught by iotests. There are also situations where amending other options needs access to the original backing file (for example, on a downgrade to a v2 image, knowing whether a v3 zero cluster must be allocated or may be left unallocated depends on knowing whether the backing file already reads as zero), but the command line does not have a nice way to tell us both the backing file to use for opening the image as well as the backing file to install after the operation is complete. Even if we do allow changing the backing file, it is redundant with the existing ability to change backing files via 'qemu-img rebase -u'. It is time to deprecate this support (leaving the existing behavior intact, even if it is buggy), and at a point in the future, require the use of only 'qemu-img rebase' for adjusting backing chain relations, saving 'qemu-img amend' for changes unrelated to the backing chain. Signed-off-by: Eric Blake Message-Id: <20200706203954.341758-8-eblake@redhat.com> Signed-off-by: Kevin Wolf --- block/qcow2.c | 5 +++++ docs/system/deprecated.rst | 12 ++++++++++++ docs/tools/qemu-img.rst | 4 ++++ tests/qemu-iotests/061.out | 1 + tests/qemu-iotests/082.out | 2 ++ 5 files changed, 24 insertions(+) (limited to 'block') diff --git a/block/qcow2.c b/block/qcow2.c index ea33673c55..f3fc2707cd 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -5511,6 +5511,11 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, } if (backing_file || backing_format) { + if (g_strcmp0(backing_file, s->image_backing_file) || + g_strcmp0(backing_format, s->image_backing_format)) { + warn_report("Deprecated use of amend to alter the backing file; " + "use qemu-img rebase instead"); + } ret = qcow2_change_backing_file(bs, backing_file ?: s->image_backing_file, backing_format ?: s->image_backing_format); diff --git a/docs/system/deprecated.rst b/docs/system/deprecated.rst index c014e049c3..c1f019b9d2 100644 --- a/docs/system/deprecated.rst +++ b/docs/system/deprecated.rst @@ -427,6 +427,18 @@ kernel in 2018, and has also been dropped from glibc. Related binaries ---------------- +qemu-img amend to adjust backing file (since 5.1) +''''''''''''''''''''''''''''''''''''''''''''''''' + +The use of ``qemu-img amend`` to modify the name or format of a qcow2 +backing image is deprecated; this functionality was never fully +documented or tested, and interferes with other amend operations that +need access to the original backing image (such as deciding whether a +v3 zero cluster may be left unallocated when converting to a v2 +image). Rather, any changes to the backing chain should be performed +with ``qemu-img rebase -u`` either before or after the remaining +changes being performed by amend, as appropriate. + Backwards compatibility ----------------------- diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst index e33f5575e3..c35bd64822 100644 --- a/docs/tools/qemu-img.rst +++ b/docs/tools/qemu-img.rst @@ -258,6 +258,10 @@ Command description: Amends the image format specific *OPTIONS* for the image file *FILENAME*. Not all file formats support this operation. + The set of options that can be amended are dependent on the image + format, but note that amending the backing chain relationship should + instead be performed with ``qemu-img rebase``. + --force allows some unsafe operations. Currently for -f luks, it allows to erase the last encryption key, and to overwrite an active encryption key. diff --git a/tests/qemu-iotests/061.out b/tests/qemu-iotests/061.out index b0f8befe30..44e3c624f9 100644 --- a/tests/qemu-iotests/061.out +++ b/tests/qemu-iotests/061.out @@ -370,6 +370,7 @@ wrote 131072/131072 bytes at offset 0 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 131072/131072 bytes at offset 0 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-img: warning: Deprecated use of amend to alter the backing file; use qemu-img rebase instead read 131072/131072 bytes at offset 0 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) No errors were found on the image. diff --git a/tests/qemu-iotests/082.out b/tests/qemu-iotests/082.out index f7b3d54b28..a38a26fc57 100644 --- a/tests/qemu-iotests/082.out +++ b/tests/qemu-iotests/082.out @@ -783,10 +783,12 @@ Amend options for 'qcow2': size= - Virtual disk size Testing: amend -f qcow2 -o backing_file=TEST_DIR/t.qcow2,,help TEST_DIR/t.qcow2 +qemu-img: warning: Deprecated use of amend to alter the backing file; use qemu-img rebase instead Testing: rebase -u -b -f qcow2 TEST_DIR/t.qcow2 Testing: amend -f qcow2 -o backing_file=TEST_DIR/t.qcow2,,? TEST_DIR/t.qcow2 +qemu-img: warning: Deprecated use of amend to alter the backing file; use qemu-img rebase instead Testing: rebase -u -b -f qcow2 TEST_DIR/t.qcow2 -- cgit v1.2.3-55-g7522 From e54ee1b385a9d084b4052b6db7391ea2fd799fa8 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 6 Jul 2020 15:39:53 -0500 Subject: block: Add support to warn on backing file change without format For now, this is a mechanical addition; all callers pass false. But the next patch will use it to improve 'qemu-img rebase -u' when selecting a backing file with no format. Signed-off-by: Eric Blake Reviewed-by: Peter Krempa Reviewed-by: Ján Tomko Message-Id: <20200706203954.341758-10-eblake@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 13 ++++++++++--- block/qcow2.c | 2 +- block/stream.c | 2 +- blockdev.c | 3 ++- include/block/block.h | 4 ++-- qemu-img.c | 4 ++-- 6 files changed, 18 insertions(+), 10 deletions(-) (limited to 'block') diff --git a/block.c b/block.c index 6925e57d7c..4acfebf0e8 100644 --- a/block.c +++ b/block.c @@ -1206,7 +1206,8 @@ static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base, } ret = bdrv_change_backing_file(parent, filename, - base->drv ? base->drv->format_name : ""); + base->drv ? base->drv->format_name : "", + false); if (ret < 0) { error_setg_errno(errp, -ret, "Could not update backing file link"); } @@ -4680,8 +4681,8 @@ int bdrv_check(BlockDriverState *bs, * image file header * -ENOTSUP - format driver doesn't support changing the backing file */ -int bdrv_change_backing_file(BlockDriverState *bs, - const char *backing_file, const char *backing_fmt) +int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, + const char *backing_fmt, bool warn) { BlockDriver *drv = bs->drv; int ret; @@ -4695,6 +4696,12 @@ int bdrv_change_backing_file(BlockDriverState *bs, return -EINVAL; } + if (warn && backing_file && !backing_fmt) { + warn_report("Deprecated use of backing file without explicit " + "backing format, use of this image requires " + "potentially unsafe format probing"); + } + if (drv->bdrv_change_backing_file != NULL) { ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt); } else { diff --git a/block/qcow2.c b/block/qcow2.c index f3fc2707cd..fadf3422f8 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -3627,7 +3627,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) } ret = bdrv_change_backing_file(blk_bs(blk), qcow2_opts->backing_file, - backing_format); + backing_format, false); if (ret < 0) { error_setg_errno(errp, -ret, "Could not assign backing file '%s' " "with format '%s'", qcow2_opts->backing_file, diff --git a/block/stream.c b/block/stream.c index aa2e7af98e..310ccbaa4c 100644 --- a/block/stream.c +++ b/block/stream.c @@ -78,7 +78,7 @@ static int stream_prepare(Job *job) } } bdrv_set_backing_hd(bs, base, &local_err); - ret = bdrv_change_backing_file(bs, base_id, base_fmt); + ret = bdrv_change_backing_file(bs, base_id, base_fmt, false); if (local_err) { error_report_err(local_err); return -EPERM; diff --git a/blockdev.c b/blockdev.c index 59b0b8ffaf..3848a9c8ab 100644 --- a/blockdev.c +++ b/blockdev.c @@ -3416,7 +3416,8 @@ void qmp_change_backing_file(const char *device, } ret = bdrv_change_backing_file(image_bs, backing_file, - image_bs->drv ? image_bs->drv->format_name : ""); + image_bs->drv ? image_bs->drv->format_name : "", + false); if (ret < 0) { error_setg_errno(errp, -ret, "Could not change backing file to '%s'", diff --git a/include/block/block.h b/include/block/block.h index bca3bb831c..6e36154061 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -405,8 +405,8 @@ void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr); void bdrv_refresh_limits(BlockDriverState *bs, Error **errp); int bdrv_commit(BlockDriverState *bs); int bdrv_make_empty(BdrvChild *c, Error **errp); -int bdrv_change_backing_file(BlockDriverState *bs, - const char *backing_file, const char *backing_fmt); +int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, + const char *backing_fmt, bool warn); void bdrv_register(BlockDriver *bdrv); int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, const char *backing_file_str); diff --git a/qemu-img.c b/qemu-img.c index 46d2796fb2..a6df64a949 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -3797,9 +3797,9 @@ static int img_rebase(int argc, char **argv) * doesn't change when we switch the backing file. */ if (out_baseimg && *out_baseimg) { - ret = bdrv_change_backing_file(bs, out_baseimg, out_basefmt); + ret = bdrv_change_backing_file(bs, out_baseimg, out_basefmt, false); } else { - ret = bdrv_change_backing_file(bs, NULL, NULL); + ret = bdrv_change_backing_file(bs, NULL, NULL, false); } if (ret == -ENOSPC) { -- cgit v1.2.3-55-g7522 From e6cada9231af022ffc2e351c70dfaea8530496e1 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Thu, 9 Jul 2020 15:50:45 +0200 Subject: block: Avoid stale pointer dereference in blk_get_aio_context() It is possible for blk_remove_bs() to race with blk_drain_all(), causing the latter to dereference a stale blk->root pointer: blk_remove_bs(blk) bdrv_root_unref_child(blk->root) child_bs = blk->root->bs bdrv_detach_child(blk->root) ... g_free(blk->root) <============== blk->root becomes stale bdrv_unref(child_bs) <============ yield at some point A blk_drain_all() can be triggered by some guest action in the meantime, eg. on POWER, SLOF might disable bus mastering on a virtio-scsi-pci device: virtio_write_config() virtio_pci_stop_ioeventfd() virtio_bus_stop_ioeventfd() virtio_scsi_dataplane_stop() blk_drain_all() blk_get_aio_context() bs = blk->root ? blk->root->bs : NULL ^^^^^^^^^ stale Then, depending on one's luck, QEMU either crashes with SEGV or hits the assertion in blk_get_aio_context(). blk->root is set by blk_insert_bs() which calls bdrv_root_attach_child() first. The blk_remove_bs() function should rollback the changes made by blk_insert_bs() in the opposite order (or it should be documented somewhere why this isn't the case). Clear blk->root before calling bdrv_root_unref_child() in blk_remove_bs(). Signed-off-by: Greg Kurz Message-Id: <159430264541.389456.11925072456012783045.stgit@bahia.lan> Signed-off-by: Kevin Wolf --- block/block-backend.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'block') diff --git a/block/block-backend.c b/block/block-backend.c index 6936b25c83..0bf0188133 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -808,6 +808,7 @@ void blk_remove_bs(BlockBackend *blk) { ThrottleGroupMember *tgm = &blk->public.throttle_group_member; BlockDriverState *bs; + BdrvChild *root; notifier_list_notify(&blk->remove_bs_notifiers, blk); if (tgm->throttle_state) { @@ -825,8 +826,9 @@ void blk_remove_bs(BlockBackend *blk) * to avoid that and a potential QEMU crash. */ blk_drain(blk); - bdrv_root_unref_child(blk->root); + root = blk->root; blk->root = NULL; + bdrv_root_unref_child(root); } /* -- cgit v1.2.3-55-g7522