From 74c44a59348f7fac96c32621e37ee636546f26f8 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Tue, 10 Apr 2018 18:05:03 +0200 Subject: Fix error message about compressed clusters with OFLAG_COPIED Compressed clusters are not supposed to have the COPIED bit set. "qemu-img check" detects that and prints an error message reporting the number of the affected host cluster. This doesn't make much sense because compressed clusters are not aligned to host clusters, so it would be better to report the offset instead. Plus, the calculation is wrong and it uses the raw L2 entry as if it was simply an offset. This patch fixes the error message and reports the offset of the compressed cluster. Signed-off-by: Alberto Garcia Message-id: 0f687957feb72e80c740403191a47e607c2463fe.1523376013.git.berto@igalia.com Signed-off-by: Max Reitz --- block/qcow2-refcount.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 6b8b63514a..2dc23005b7 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -1577,9 +1577,9 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, case QCOW2_CLUSTER_COMPRESSED: /* Compressed clusters don't have QCOW_OFLAG_COPIED */ if (l2_entry & QCOW_OFLAG_COPIED) { - fprintf(stderr, "ERROR: cluster %" PRId64 ": " + fprintf(stderr, "ERROR: coffset=0x%" PRIx64 ": " "copied flag must never be set for compressed " - "clusters\n", l2_entry >> s->cluster_bits); + "clusters\n", l2_entry & s->cluster_offset_mask); l2_entry &= ~QCOW_OFLAG_COPIED; res->corruptions++; } -- cgit v1.2.3-55-g7522 From 52253998ec3e523c9e20ae81e2a6431d8ff733ba Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Tue, 17 Apr 2018 15:37:04 +0300 Subject: qcow2: Give the refcount cache the minimum possible size by default The L2 and refcount caches have default sizes that can be overridden using the l2-cache-size and refcount-cache-size (an additional parameter named cache-size sets the combined size of both caches). Unless forced by one of the aforementioned parameters, QEMU will set the unspecified sizes so that the L2 cache is 4 times larger than the refcount cache. This is based on the premise that the refcount metadata needs to be only a fourth of the L2 metadata to cover the same amount of disk space. This is incorrect for two reasons: a) The amount of disk covered by an L2 table depends solely on the cluster size, but in the case of a refcount block it depends on the cluster size *and* the width of each refcount entry. The 4/1 ratio is only valid with 16-bit entries (the default). b) When we talk about disk space and L2 tables we are talking about guest space (L2 tables map guest clusters to host clusters), whereas refcount blocks are used for host clusters (including L1/L2 tables and the refcount blocks themselves). On a fully populated (and uncompressed) qcow2 file, image size > virtual size so there are more refcount entries than L2 entries. Problem (a) could be fixed by adjusting the algorithm to take into account the refcount entry width. Problem (b) could be fixed by increasing a bit the refcount cache size to account for the clusters used for qcow2 metadata. However this patch takes a completely different approach and instead of keeping a ratio between both cache sizes it assigns as much as possible to the L2 cache and the remainder to the refcount cache. The reason is that L2 tables are used for every single I/O request from the guest and the effect of increasing the cache is significant and clearly measurable. Refcount blocks are however only used for cluster allocation and internal snapshots and in practice are accessed sequentially in most cases, so the effect of increasing the cache is negligible (even when doing random writes from the guest). So, make the refcount cache as small as possible unless the user explicitly asks for a larger one. Signed-off-by: Alberto Garcia Reviewed-by: Eric Blake Reviewed-by: Max Reitz Message-id: 9695182c2eb11b77cb319689a1ebaa4e7c9d6591.1523968389.git.berto@igalia.com Signed-off-by: Max Reitz --- block/qcow2.c | 31 +++++++++++++++++++------------ block/qcow2.h | 4 ---- tests/qemu-iotests/137.out | 2 +- 3 files changed, 20 insertions(+), 17 deletions(-) (limited to 'block') diff --git a/block/qcow2.c b/block/qcow2.c index 2f36e632f9..6d532470a8 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -802,23 +802,30 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, } else if (refcount_cache_size_set) { *l2_cache_size = combined_cache_size - *refcount_cache_size; } else { - *refcount_cache_size = combined_cache_size - / (DEFAULT_L2_REFCOUNT_SIZE_RATIO + 1); - *l2_cache_size = combined_cache_size - *refcount_cache_size; + uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE; + uint64_t max_l2_cache = virtual_disk_size / (s->cluster_size / 8); + uint64_t min_refcount_cache = + (uint64_t) MIN_REFCOUNT_CACHE_SIZE * s->cluster_size; + + /* Assign as much memory as possible to the L2 cache, and + * use the remainder for the refcount cache */ + if (combined_cache_size >= max_l2_cache + min_refcount_cache) { + *l2_cache_size = max_l2_cache; + *refcount_cache_size = combined_cache_size - *l2_cache_size; + } else { + *refcount_cache_size = + MIN(combined_cache_size, min_refcount_cache); + *l2_cache_size = combined_cache_size - *refcount_cache_size; + } } } else { - if (!l2_cache_size_set && !refcount_cache_size_set) { + if (!l2_cache_size_set) { *l2_cache_size = MAX(DEFAULT_L2_CACHE_BYTE_SIZE, (uint64_t)DEFAULT_L2_CACHE_CLUSTERS * s->cluster_size); - *refcount_cache_size = *l2_cache_size - / DEFAULT_L2_REFCOUNT_SIZE_RATIO; - } else if (!l2_cache_size_set) { - *l2_cache_size = *refcount_cache_size - * DEFAULT_L2_REFCOUNT_SIZE_RATIO; - } else if (!refcount_cache_size_set) { - *refcount_cache_size = *l2_cache_size - / DEFAULT_L2_REFCOUNT_SIZE_RATIO; + } + if (!refcount_cache_size_set) { + *refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE * s->cluster_size; } } diff --git a/block/qcow2.h b/block/qcow2.h index adf5c3950f..01b5250415 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -77,10 +77,6 @@ #define DEFAULT_L2_CACHE_CLUSTERS 8 /* clusters */ #define DEFAULT_L2_CACHE_BYTE_SIZE 1048576 /* bytes */ -/* The refblock cache needs only a fourth of the L2 cache size to cover as many - * clusters */ -#define DEFAULT_L2_REFCOUNT_SIZE_RATIO 4 - #define DEFAULT_CLUSTER_SIZE 65536 diff --git a/tests/qemu-iotests/137.out b/tests/qemu-iotests/137.out index e28e1eadba..96724a6c33 100644 --- a/tests/qemu-iotests/137.out +++ b/tests/qemu-iotests/137.out @@ -22,7 +22,7 @@ refcount-cache-size may not exceed cache-size L2 cache size too big L2 cache entry size must be a power of two between 512 and the cluster size (65536) L2 cache entry size must be a power of two between 512 and the cluster size (65536) -L2 cache size too big +Refcount cache size too big Conflicting values for qcow2 options 'overlap-check' ('constant') and 'overlap-check.template' ('all') Unsupported value 'blubb' for qcow2 option 'overlap-check'. Allowed are any of the following: none, constant, cached, all Unsupported value 'blubb' for qcow2 option 'overlap-check'. Allowed are any of the following: none, constant, cached, all -- cgit v1.2.3-55-g7522 From 6c6f24fd84895d03baa898bbc4324dd4ccc97071 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Sat, 21 Apr 2018 15:29:21 +0200 Subject: block: Add COR filter driver This adds a simple copy-on-read filter driver. It relies on the already existing COR functionality in the central block layer code, which may be moved here once we no longer need it there. Signed-off-by: Max Reitz Message-id: 20180421132929.21610-2-mreitz@redhat.com Reviewed-by: Alberto Garcia Reviewed-by: Kevin Wolf Signed-off-by: Max Reitz --- block/Makefile.objs | 2 +- block/copy-on-read.c | 171 +++++++++++++++++++++++++++++++++++++++++++++++++++ qapi/block-core.json | 5 +- 3 files changed, 176 insertions(+), 2 deletions(-) create mode 100644 block/copy-on-read.c (limited to 'block') diff --git a/block/Makefile.objs b/block/Makefile.objs index d644bac60a..899bfb5e2c 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -26,7 +26,7 @@ block-obj-y += accounting.o dirty-bitmap.o block-obj-y += write-threshold.o block-obj-y += backup.o block-obj-$(CONFIG_REPLICATION) += replication.o -block-obj-y += throttle.o +block-obj-y += throttle.o copy-on-read.o block-obj-y += crypto.o diff --git a/block/copy-on-read.c b/block/copy-on-read.c new file mode 100644 index 0000000000..823ec751c4 --- /dev/null +++ b/block/copy-on-read.c @@ -0,0 +1,171 @@ +/* + * Copy-on-read filter block driver + * + * Copyright (c) 2018 Red Hat, Inc. + * + * Author: + * Max Reitz + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include "qemu/osdep.h" +#include "block/block_int.h" + + +static int cor_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, false, + errp); + if (!bs->file) { + return -EINVAL; + } + + bs->supported_write_flags = BDRV_REQ_FUA & + bs->file->bs->supported_write_flags; + + bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) & + bs->file->bs->supported_zero_flags; + + return 0; +} + + +static void cor_close(BlockDriverState *bs) +{ +} + + +#define PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \ + | BLK_PERM_WRITE \ + | BLK_PERM_RESIZE) +#define PERM_UNCHANGED (BLK_PERM_ALL & ~PERM_PASSTHROUGH) + +static void cor_child_perm(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + BlockReopenQueue *reopen_queue, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + if (c == NULL) { + *nperm = (perm & PERM_PASSTHROUGH) | BLK_PERM_WRITE_UNCHANGED; + *nshared = (shared & PERM_PASSTHROUGH) | PERM_UNCHANGED; + return; + } + + *nperm = (perm & PERM_PASSTHROUGH) | + (c->perm & PERM_UNCHANGED); + *nshared = (shared & PERM_PASSTHROUGH) | + (c->shared_perm & PERM_UNCHANGED); +} + + +static int64_t cor_getlength(BlockDriverState *bs) +{ + return bdrv_getlength(bs->file->bs); +} + + +static int cor_truncate(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp) +{ + return bdrv_truncate(bs->file, offset, prealloc, errp); +} + + +static int coroutine_fn cor_co_preadv(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + return bdrv_co_preadv(bs->file, offset, bytes, qiov, + flags | BDRV_REQ_COPY_ON_READ); +} + + +static int coroutine_fn cor_co_pwritev(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + + return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags); +} + + +static int coroutine_fn cor_co_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, int bytes, + BdrvRequestFlags flags) +{ + return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags); +} + + +static int coroutine_fn cor_co_pdiscard(BlockDriverState *bs, + int64_t offset, int bytes) +{ + return bdrv_co_pdiscard(bs->file->bs, offset, bytes); +} + + +static void cor_eject(BlockDriverState *bs, bool eject_flag) +{ + bdrv_eject(bs->file->bs, eject_flag); +} + + +static void cor_lock_medium(BlockDriverState *bs, bool locked) +{ + bdrv_lock_medium(bs->file->bs, locked); +} + + +static bool cor_recurse_is_first_non_filter(BlockDriverState *bs, + BlockDriverState *candidate) +{ + return bdrv_recurse_is_first_non_filter(bs->file->bs, candidate); +} + + +BlockDriver bdrv_copy_on_read = { + .format_name = "copy-on-read", + + .bdrv_open = cor_open, + .bdrv_close = cor_close, + .bdrv_child_perm = cor_child_perm, + + .bdrv_getlength = cor_getlength, + .bdrv_truncate = cor_truncate, + + .bdrv_co_preadv = cor_co_preadv, + .bdrv_co_pwritev = cor_co_pwritev, + .bdrv_co_pwrite_zeroes = cor_co_pwrite_zeroes, + .bdrv_co_pdiscard = cor_co_pdiscard, + + .bdrv_eject = cor_eject, + .bdrv_lock_medium = cor_lock_medium, + + .bdrv_co_block_status = bdrv_co_block_status_from_file, + + .bdrv_recurse_is_first_non_filter = cor_recurse_is_first_non_filter, + + .has_variable_length = true, + .is_filter = true, +}; + +static void bdrv_copy_on_read_init(void) +{ + bdrv_register(&bdrv_copy_on_read); +} + +block_init(bdrv_copy_on_read_init); diff --git a/qapi/block-core.json b/qapi/block-core.json index 17ffd44cce..55728cb823 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -2510,11 +2510,12 @@ # @vxhs: Since 2.10 # @throttle: Since 2.11 # @nvme: Since 2.12 +# @copy-on-read: Since 2.13 # # Since: 2.9 ## { 'enum': 'BlockdevDriver', - 'data': [ 'blkdebug', 'blkverify', 'bochs', 'cloop', + 'data': [ 'blkdebug', 'blkverify', 'bochs', 'cloop', 'copy-on-read', 'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom', 'host_device', 'http', 'https', 'iscsi', 'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels', 'qcow', 'qcow2', 'qed', @@ -3531,6 +3532,7 @@ 'blkverify': 'BlockdevOptionsBlkverify', 'bochs': 'BlockdevOptionsGenericFormat', 'cloop': 'BlockdevOptionsGenericFormat', + 'copy-on-read':'BlockdevOptionsGenericFormat', 'dmg': 'BlockdevOptionsGenericFormat', 'file': 'BlockdevOptionsFile', 'ftp': 'BlockdevOptionsCurlFtp', @@ -4058,6 +4060,7 @@ 'blkverify': 'BlockdevCreateNotSupported', 'bochs': 'BlockdevCreateNotSupported', 'cloop': 'BlockdevCreateNotSupported', + 'copy-on-read': 'BlockdevCreateNotSupported', 'dmg': 'BlockdevCreateNotSupported', 'file': 'BlockdevCreateOptionsFile', 'ftp': 'BlockdevCreateNotSupported', -- cgit v1.2.3-55-g7522 From c6035964f8316b504060618d05b5dd434f18595b Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Sat, 21 Apr 2018 15:29:23 +0200 Subject: block: Add BDRV_REQ_WRITE_UNCHANGED flag This flag signifies that a write request will not change the visible disk content. With this flag set, it is sufficient to have the BLK_PERM_WRITE_UNCHANGED permission instead of BLK_PERM_WRITE. Signed-off-by: Max Reitz Reviewed-by: Stefan Hajnoczi Reviewed-by: Alberto Garcia Message-id: 20180421132929.21610-4-mreitz@redhat.com Reviewed-by: Kevin Wolf Signed-off-by: Max Reitz --- block/io.c | 6 +++++- include/block/block.h | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/io.c b/block/io.c index 4fad5ac2fe..9e8449e795 100644 --- a/block/io.c +++ b/block/io.c @@ -1504,7 +1504,11 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, assert(!waited || !req->serialising); assert(req->overlap_offset <= offset); assert(offset + bytes <= req->overlap_offset + req->overlap_bytes); - assert(child->perm & BLK_PERM_WRITE); + if (flags & BDRV_REQ_WRITE_UNCHANGED) { + assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE)); + } else { + assert(child->perm & BLK_PERM_WRITE); + } assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE); ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req); diff --git a/include/block/block.h b/include/block/block.h index 397b5e8d44..3894edda9d 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -54,8 +54,12 @@ typedef enum { BDRV_REQ_FUA = 0x10, BDRV_REQ_WRITE_COMPRESSED = 0x20, + /* Signifies that this write request will not change the visible disk + * content. */ + BDRV_REQ_WRITE_UNCHANGED = 0x40, + /* Mask of valid flags */ - BDRV_REQ_MASK = 0x3f, + BDRV_REQ_MASK = 0x7f, } BdrvRequestFlags; typedef struct BlockSizes { -- cgit v1.2.3-55-g7522 From 7adcf59fecf3c8ce9330430187350b53f9e50cf7 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Sat, 21 Apr 2018 15:29:24 +0200 Subject: block: Set BDRV_REQ_WRITE_UNCHANGED for COR writes Signed-off-by: Max Reitz Reviewed-by: Stefan Hajnoczi Reviewed-by: Alberto Garcia Message-id: 20180421132929.21610-5-mreitz@redhat.com Reviewed-by: Kevin Wolf Signed-off-by: Max Reitz --- block/io.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/io.c b/block/io.c index 9e8449e795..ca96b487eb 100644 --- a/block/io.c +++ b/block/io.c @@ -1118,13 +1118,15 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, /* FIXME: Should we (perhaps conditionally) be setting * BDRV_REQ_MAY_UNMAP, if it will allow for a sparser copy * that still correctly reads as zero? */ - ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum, 0); + ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum, + BDRV_REQ_WRITE_UNCHANGED); } else { /* This does not change the data on the disk, it is not * necessary to flush even in cache=writethrough mode. */ ret = bdrv_driver_pwritev(bs, cluster_offset, pnum, - &local_qiov, 0); + &local_qiov, + BDRV_REQ_WRITE_UNCHANGED); } if (ret < 0) { -- cgit v1.2.3-55-g7522 From 1b1a920b713af6af795d49d0e3d2a8a65020bf82 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Sat, 21 Apr 2018 15:29:25 +0200 Subject: block/quorum: Support BDRV_REQ_WRITE_UNCHANGED We just need to forward it to quorum's children (except in case of a rewrite because of corruption), but for that we first have to support flags in child requests at all. Signed-off-by: Max Reitz Reviewed-by: Stefan Hajnoczi Reviewed-by: Alberto Garcia Message-id: 20180421132929.21610-6-mreitz@redhat.com Reviewed-by: Kevin Wolf Signed-off-by: Max Reitz --- block/quorum.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'block') diff --git a/block/quorum.c b/block/quorum.c index a5051da56e..e448d7e384 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -115,6 +115,7 @@ struct QuorumAIOCB { /* Request metadata */ uint64_t offset; uint64_t bytes; + int flags; QEMUIOVector *qiov; /* calling IOV */ @@ -157,7 +158,8 @@ static bool quorum_64bits_compare(QuorumVoteValue *a, QuorumVoteValue *b) static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs, QEMUIOVector *qiov, uint64_t offset, - uint64_t bytes) + uint64_t bytes, + int flags) { BDRVQuorumState *s = bs->opaque; QuorumAIOCB *acb = g_new(QuorumAIOCB, 1); @@ -168,6 +170,7 @@ static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs, .bs = bs, .offset = offset, .bytes = bytes, + .flags = flags, .qiov = qiov, .votes.compare = quorum_sha256_compare, .votes.vote_list = QLIST_HEAD_INITIALIZER(acb.votes.vote_list), @@ -271,9 +274,11 @@ static void quorum_rewrite_entry(void *opaque) BDRVQuorumState *s = acb->bs->opaque; /* Ignore any errors, it's just a correction attempt for already - * corrupted data. */ + * corrupted data. + * Mask out BDRV_REQ_WRITE_UNCHANGED because this overwrites the + * area with different data from the other children. */ bdrv_co_pwritev(s->children[co->idx], acb->offset, acb->bytes, - acb->qiov, 0); + acb->qiov, acb->flags & ~BDRV_REQ_WRITE_UNCHANGED); /* Wake up the caller after the last rewrite */ acb->rewrite_count--; @@ -673,7 +678,7 @@ static int quorum_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) { BDRVQuorumState *s = bs->opaque; - QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes); + QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes, flags); int ret; acb->is_read = true; @@ -699,7 +704,7 @@ static void write_quorum_entry(void *opaque) sacb->bs = s->children[i]->bs; sacb->ret = bdrv_co_pwritev(s->children[i], acb->offset, acb->bytes, - acb->qiov, 0); + acb->qiov, acb->flags); if (sacb->ret == 0) { acb->success_count++; } else { @@ -719,7 +724,7 @@ static int quorum_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) { BDRVQuorumState *s = bs->opaque; - QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes); + QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes, flags); int i, ret; for (i = 0; i < s->num_children; i++) { @@ -961,6 +966,8 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, } s->next_child_index = s->num_children; + bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED; + g_free(opened); goto exit; -- cgit v1.2.3-55-g7522 From 228345bf5db8bc97d1c64f062e138d389065d1ab Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Sat, 21 Apr 2018 15:29:26 +0200 Subject: block: Support BDRV_REQ_WRITE_UNCHANGED in filters Update the rest of the filter drivers to support BDRV_REQ_WRITE_UNCHANGED. They already forward write request flags to their children, so we just have to announce support for it. This patch does not cover the replication driver because that currently does not support flags at all, and because it just grabs the WRITE permission for its children when it can, so we should be fine just submitting the incoming WRITE_UNCHANGED requests as normal writes. It also does not cover format drivers for similar reasons. They all use bdrv_format_default_perms() as their .bdrv_child_perm() implementation so they just always grab the WRITE permission for their file children whenever possible. In addition, it often would be difficult to ascertain whether incoming unchanging writes end up as unchanging writes in their files. So we just leave them as normal potentially changing writes. Signed-off-by: Max Reitz Reviewed-by: Stefan Hajnoczi Reviewed-by: Alberto Garcia Message-id: 20180421132929.21610-7-mreitz@redhat.com Reviewed-by: Kevin Wolf Signed-off-by: Max Reitz --- block/blkdebug.c | 9 +++++---- block/blkreplay.c | 3 +++ block/blkverify.c | 3 +++ block/copy-on-read.c | 10 ++++++---- block/mirror.c | 2 ++ block/raw-format.c | 9 +++++---- block/throttle.c | 6 ++++-- 7 files changed, 28 insertions(+), 14 deletions(-) (limited to 'block') diff --git a/block/blkdebug.c b/block/blkdebug.c index 053372c22e..526af2a808 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -398,10 +398,11 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags, goto out; } - bs->supported_write_flags = BDRV_REQ_FUA & - bs->file->bs->supported_write_flags; - bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) & - bs->file->bs->supported_zero_flags; + bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED | + (BDRV_REQ_FUA & bs->file->bs->supported_write_flags); + bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | + ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) & + bs->file->bs->supported_zero_flags); ret = -EINVAL; /* Set alignment overrides */ diff --git a/block/blkreplay.c b/block/blkreplay.c index fe5a9b4a98..b016dbeee7 100755 --- a/block/blkreplay.c +++ b/block/blkreplay.c @@ -35,6 +35,9 @@ static int blkreplay_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } + bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED; + bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED; + ret = 0; fail: return ret; diff --git a/block/blkverify.c b/block/blkverify.c index 754cc9e857..da97ee5927 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -141,6 +141,9 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } + bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED; + bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED; + ret = 0; fail: qemu_opts_del(opts); diff --git a/block/copy-on-read.c b/block/copy-on-read.c index 823ec751c4..6a97208888 100644 --- a/block/copy-on-read.c +++ b/block/copy-on-read.c @@ -33,11 +33,13 @@ static int cor_open(BlockDriverState *bs, QDict *options, int flags, return -EINVAL; } - bs->supported_write_flags = BDRV_REQ_FUA & - bs->file->bs->supported_write_flags; + bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED | + (BDRV_REQ_FUA & + bs->file->bs->supported_write_flags); - bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) & - bs->file->bs->supported_zero_flags; + bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | + ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) & + bs->file->bs->supported_zero_flags); return 0; } diff --git a/block/mirror.c b/block/mirror.c index 6aa38db114..a4197bb975 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1134,6 +1134,8 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, mirror_top_bs->implicit = true; } mirror_top_bs->total_sectors = bs->total_sectors; + mirror_top_bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED; + mirror_top_bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED; bdrv_set_aio_context(mirror_top_bs, bdrv_get_aio_context(bs)); /* bdrv_append takes ownership of the mirror_top_bs reference, need to keep diff --git a/block/raw-format.c b/block/raw-format.c index a378547c99..fe33693a2d 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -415,10 +415,11 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, } bs->sg = bs->file->bs->sg; - bs->supported_write_flags = BDRV_REQ_FUA & - bs->file->bs->supported_write_flags; - bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) & - bs->file->bs->supported_zero_flags; + bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED | + (BDRV_REQ_FUA & bs->file->bs->supported_write_flags); + bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | + ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) & + bs->file->bs->supported_zero_flags); if (bs->probed && !bdrv_is_read_only(bs)) { fprintf(stderr, diff --git a/block/throttle.c b/block/throttle.c index 95ed06acd8..e298827f95 100644 --- a/block/throttle.c +++ b/block/throttle.c @@ -81,8 +81,10 @@ static int throttle_open(BlockDriverState *bs, QDict *options, if (!bs->file) { return -EINVAL; } - bs->supported_write_flags = bs->file->bs->supported_write_flags; - bs->supported_zero_flags = bs->file->bs->supported_zero_flags; + bs->supported_write_flags = bs->file->bs->supported_write_flags | + BDRV_REQ_WRITE_UNCHANGED; + bs->supported_zero_flags = bs->file->bs->supported_zero_flags | + BDRV_REQ_WRITE_UNCHANGED; return throttle_configure_tgm(bs, tgm, options, errp); } -- cgit v1.2.3-55-g7522