summaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorKevin Wolf2018-05-15 16:19:53 +0200
committerKevin Wolf2018-05-15 16:19:53 +0200
commit1fce860ea5eba1ca00a67911fc0b8a5d80009514 (patch)
tree3dcbee27fc48c02f8008b5b098aacedeaf2e373a /block
parentblockjob: Add block_job_driver() (diff)
parentiotests: Add test for -U/force-share conflicts (diff)
downloadqemu-1fce860ea5eba1ca00a67911fc0b8a5d80009514.tar.gz
qemu-1fce860ea5eba1ca00a67911fc0b8a5d80009514.tar.xz
qemu-1fce860ea5eba1ca00a67911fc0b8a5d80009514.zip
Merge remote-tracking branch 'mreitz/tags/pull-block-2018-05-15' into queue-block
- Copy-on-read block driver - The qcow2 default refcount cache size has been decreased - Various bug fixes # gpg: Signature made Tue May 15 16:18:25 2018 CEST # gpg: using RSA key F407DB0061D5CF40 # gpg: Good signature from "Max Reitz <mreitz@redhat.com>" # Primary key fingerprint: 91BE B60A 30DB 3E88 57D1 1829 F407 DB00 61D5 CF40 * mreitz/tags/pull-block-2018-05-15: (21 commits) iotests: Add test for -U/force-share conflicts qemu-img: Use only string options in img_open_opts qemu-io: Use purely string blockdev options block: Document BDRV_REQ_WRITE_UNCHANGED support qemu-img: Check post-truncation size iotests: Add test for COR across nodes iotests: Copy 197 for COR filter driver iotests: Clean up wrap image in 197 block: Support BDRV_REQ_WRITE_UNCHANGED in filters block/quorum: Support BDRV_REQ_WRITE_UNCHANGED block: Set BDRV_REQ_WRITE_UNCHANGED for COR writes block: Add BDRV_REQ_WRITE_UNCHANGED flag block: BLK_PERM_WRITE includes ..._UNCHANGED block: Add COR filter driver iotests: Skip 181 and 201 without userfaultfd iotests: Add failure matching to common.qemu docs: Document the new default sizes of the qcow2 caches qcow2: Give the refcount cache the minimum possible size by default specs/qcow2: Clarify that compressed clusters have the COPIED bit reset Fix error message about compressed clusters with OFLAG_COPIED ... Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Diffstat (limited to 'block')
-rw-r--r--block/Makefile.objs2
-rw-r--r--block/blkdebug.c9
-rwxr-xr-xblock/blkreplay.c3
-rw-r--r--block/blkverify.c3
-rw-r--r--block/copy-on-read.c173
-rw-r--r--block/io.c12
-rw-r--r--block/mirror.c2
-rw-r--r--block/qcow2-refcount.c4
-rw-r--r--block/qcow2.c31
-rw-r--r--block/qcow2.h4
-rw-r--r--block/quorum.c19
-rw-r--r--block/raw-format.c9
-rw-r--r--block/throttle.c6
13 files changed, 239 insertions, 38 deletions
diff --git a/block/Makefile.objs b/block/Makefile.objs
index d644bac60a..899bfb5e2c 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -26,7 +26,7 @@ block-obj-y += accounting.o dirty-bitmap.o
block-obj-y += write-threshold.o
block-obj-y += backup.o
block-obj-$(CONFIG_REPLICATION) += replication.o
-block-obj-y += throttle.o
+block-obj-y += throttle.o copy-on-read.o
block-obj-y += crypto.o
diff --git a/block/blkdebug.c b/block/blkdebug.c
index 053372c22e..526af2a808 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -398,10 +398,11 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
goto out;
}
- bs->supported_write_flags = BDRV_REQ_FUA &
- bs->file->bs->supported_write_flags;
- bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
- bs->file->bs->supported_zero_flags;
+ bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
+ (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
+ bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
+ ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
+ bs->file->bs->supported_zero_flags);
ret = -EINVAL;
/* Set alignment overrides */
diff --git a/block/blkreplay.c b/block/blkreplay.c
index fe5a9b4a98..b016dbeee7 100755
--- a/block/blkreplay.c
+++ b/block/blkreplay.c
@@ -35,6 +35,9 @@ static int blkreplay_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
+ bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED;
+ bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED;
+
ret = 0;
fail:
return ret;
diff --git a/block/blkverify.c b/block/blkverify.c
index 754cc9e857..da97ee5927 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -141,6 +141,9 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
+ bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED;
+ bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED;
+
ret = 0;
fail:
qemu_opts_del(opts);
diff --git a/block/copy-on-read.c b/block/copy-on-read.c
new file mode 100644
index 0000000000..6a97208888
--- /dev/null
+++ b/block/copy-on-read.c
@@ -0,0 +1,173 @@
+/*
+ * Copy-on-read filter block driver
+ *
+ * Copyright (c) 2018 Red Hat, Inc.
+ *
+ * Author:
+ * Max Reitz <mreitz@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "block/block_int.h"
+
+
+static int cor_open(BlockDriverState *bs, QDict *options, int flags,
+ Error **errp)
+{
+ bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, false,
+ errp);
+ if (!bs->file) {
+ return -EINVAL;
+ }
+
+ bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
+ (BDRV_REQ_FUA &
+ bs->file->bs->supported_write_flags);
+
+ bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
+ ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
+ bs->file->bs->supported_zero_flags);
+
+ return 0;
+}
+
+
+static void cor_close(BlockDriverState *bs)
+{
+}
+
+
+#define PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \
+ | BLK_PERM_WRITE \
+ | BLK_PERM_RESIZE)
+#define PERM_UNCHANGED (BLK_PERM_ALL & ~PERM_PASSTHROUGH)
+
+static void cor_child_perm(BlockDriverState *bs, BdrvChild *c,
+ const BdrvChildRole *role,
+ BlockReopenQueue *reopen_queue,
+ uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared)
+{
+ if (c == NULL) {
+ *nperm = (perm & PERM_PASSTHROUGH) | BLK_PERM_WRITE_UNCHANGED;
+ *nshared = (shared & PERM_PASSTHROUGH) | PERM_UNCHANGED;
+ return;
+ }
+
+ *nperm = (perm & PERM_PASSTHROUGH) |
+ (c->perm & PERM_UNCHANGED);
+ *nshared = (shared & PERM_PASSTHROUGH) |
+ (c->shared_perm & PERM_UNCHANGED);
+}
+
+
+static int64_t cor_getlength(BlockDriverState *bs)
+{
+ return bdrv_getlength(bs->file->bs);
+}
+
+
+static int cor_truncate(BlockDriverState *bs, int64_t offset,
+ PreallocMode prealloc, Error **errp)
+{
+ return bdrv_truncate(bs->file, offset, prealloc, errp);
+}
+
+
+static int coroutine_fn cor_co_preadv(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
+{
+ return bdrv_co_preadv(bs->file, offset, bytes, qiov,
+ flags | BDRV_REQ_COPY_ON_READ);
+}
+
+
+static int coroutine_fn cor_co_pwritev(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
+{
+
+ return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
+}
+
+
+static int coroutine_fn cor_co_pwrite_zeroes(BlockDriverState *bs,
+ int64_t offset, int bytes,
+ BdrvRequestFlags flags)
+{
+ return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
+}
+
+
+static int coroutine_fn cor_co_pdiscard(BlockDriverState *bs,
+ int64_t offset, int bytes)
+{
+ return bdrv_co_pdiscard(bs->file->bs, offset, bytes);
+}
+
+
+static void cor_eject(BlockDriverState *bs, bool eject_flag)
+{
+ bdrv_eject(bs->file->bs, eject_flag);
+}
+
+
+static void cor_lock_medium(BlockDriverState *bs, bool locked)
+{
+ bdrv_lock_medium(bs->file->bs, locked);
+}
+
+
+static bool cor_recurse_is_first_non_filter(BlockDriverState *bs,
+ BlockDriverState *candidate)
+{
+ return bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
+}
+
+
+BlockDriver bdrv_copy_on_read = {
+ .format_name = "copy-on-read",
+
+ .bdrv_open = cor_open,
+ .bdrv_close = cor_close,
+ .bdrv_child_perm = cor_child_perm,
+
+ .bdrv_getlength = cor_getlength,
+ .bdrv_truncate = cor_truncate,
+
+ .bdrv_co_preadv = cor_co_preadv,
+ .bdrv_co_pwritev = cor_co_pwritev,
+ .bdrv_co_pwrite_zeroes = cor_co_pwrite_zeroes,
+ .bdrv_co_pdiscard = cor_co_pdiscard,
+
+ .bdrv_eject = cor_eject,
+ .bdrv_lock_medium = cor_lock_medium,
+
+ .bdrv_co_block_status = bdrv_co_block_status_from_file,
+
+ .bdrv_recurse_is_first_non_filter = cor_recurse_is_first_non_filter,
+
+ .has_variable_length = true,
+ .is_filter = true,
+};
+
+static void bdrv_copy_on_read_init(void)
+{
+ bdrv_register(&bdrv_copy_on_read);
+}
+
+block_init(bdrv_copy_on_read_init);
diff --git a/block/io.c b/block/io.c
index 4fad5ac2fe..ca96b487eb 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1118,13 +1118,15 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
/* FIXME: Should we (perhaps conditionally) be setting
* BDRV_REQ_MAY_UNMAP, if it will allow for a sparser copy
* that still correctly reads as zero? */
- ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum, 0);
+ ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum,
+ BDRV_REQ_WRITE_UNCHANGED);
} else {
/* This does not change the data on the disk, it is not
* necessary to flush even in cache=writethrough mode.
*/
ret = bdrv_driver_pwritev(bs, cluster_offset, pnum,
- &local_qiov, 0);
+ &local_qiov,
+ BDRV_REQ_WRITE_UNCHANGED);
}
if (ret < 0) {
@@ -1504,7 +1506,11 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
assert(!waited || !req->serialising);
assert(req->overlap_offset <= offset);
assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
- assert(child->perm & BLK_PERM_WRITE);
+ if (flags & BDRV_REQ_WRITE_UNCHANGED) {
+ assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
+ } else {
+ assert(child->perm & BLK_PERM_WRITE);
+ }
assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);
ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
diff --git a/block/mirror.c b/block/mirror.c
index 6aa38db114..a4197bb975 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1134,6 +1134,8 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
mirror_top_bs->implicit = true;
}
mirror_top_bs->total_sectors = bs->total_sectors;
+ mirror_top_bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED;
+ mirror_top_bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED;
bdrv_set_aio_context(mirror_top_bs, bdrv_get_aio_context(bs));
/* bdrv_append takes ownership of the mirror_top_bs reference, need to keep
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 6b8b63514a..2dc23005b7 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -1577,9 +1577,9 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
case QCOW2_CLUSTER_COMPRESSED:
/* Compressed clusters don't have QCOW_OFLAG_COPIED */
if (l2_entry & QCOW_OFLAG_COPIED) {
- fprintf(stderr, "ERROR: cluster %" PRId64 ": "
+ fprintf(stderr, "ERROR: coffset=0x%" PRIx64 ": "
"copied flag must never be set for compressed "
- "clusters\n", l2_entry >> s->cluster_bits);
+ "clusters\n", l2_entry & s->cluster_offset_mask);
l2_entry &= ~QCOW_OFLAG_COPIED;
res->corruptions++;
}
diff --git a/block/qcow2.c b/block/qcow2.c
index 2f36e632f9..6d532470a8 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -802,23 +802,30 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
} else if (refcount_cache_size_set) {
*l2_cache_size = combined_cache_size - *refcount_cache_size;
} else {
- *refcount_cache_size = combined_cache_size
- / (DEFAULT_L2_REFCOUNT_SIZE_RATIO + 1);
- *l2_cache_size = combined_cache_size - *refcount_cache_size;
+ uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
+ uint64_t max_l2_cache = virtual_disk_size / (s->cluster_size / 8);
+ uint64_t min_refcount_cache =
+ (uint64_t) MIN_REFCOUNT_CACHE_SIZE * s->cluster_size;
+
+ /* Assign as much memory as possible to the L2 cache, and
+ * use the remainder for the refcount cache */
+ if (combined_cache_size >= max_l2_cache + min_refcount_cache) {
+ *l2_cache_size = max_l2_cache;
+ *refcount_cache_size = combined_cache_size - *l2_cache_size;
+ } else {
+ *refcount_cache_size =
+ MIN(combined_cache_size, min_refcount_cache);
+ *l2_cache_size = combined_cache_size - *refcount_cache_size;
+ }
}
} else {
- if (!l2_cache_size_set && !refcount_cache_size_set) {
+ if (!l2_cache_size_set) {
*l2_cache_size = MAX(DEFAULT_L2_CACHE_BYTE_SIZE,
(uint64_t)DEFAULT_L2_CACHE_CLUSTERS
* s->cluster_size);
- *refcount_cache_size = *l2_cache_size
- / DEFAULT_L2_REFCOUNT_SIZE_RATIO;
- } else if (!l2_cache_size_set) {
- *l2_cache_size = *refcount_cache_size
- * DEFAULT_L2_REFCOUNT_SIZE_RATIO;
- } else if (!refcount_cache_size_set) {
- *refcount_cache_size = *l2_cache_size
- / DEFAULT_L2_REFCOUNT_SIZE_RATIO;
+ }
+ if (!refcount_cache_size_set) {
+ *refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE * s->cluster_size;
}
}
diff --git a/block/qcow2.h b/block/qcow2.h
index adf5c3950f..01b5250415 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -77,10 +77,6 @@
#define DEFAULT_L2_CACHE_CLUSTERS 8 /* clusters */
#define DEFAULT_L2_CACHE_BYTE_SIZE 1048576 /* bytes */
-/* The refblock cache needs only a fourth of the L2 cache size to cover as many
- * clusters */
-#define DEFAULT_L2_REFCOUNT_SIZE_RATIO 4
-
#define DEFAULT_CLUSTER_SIZE 65536
diff --git a/block/quorum.c b/block/quorum.c
index a5051da56e..e448d7e384 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -115,6 +115,7 @@ struct QuorumAIOCB {
/* Request metadata */
uint64_t offset;
uint64_t bytes;
+ int flags;
QEMUIOVector *qiov; /* calling IOV */
@@ -157,7 +158,8 @@ static bool quorum_64bits_compare(QuorumVoteValue *a, QuorumVoteValue *b)
static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs,
QEMUIOVector *qiov,
uint64_t offset,
- uint64_t bytes)
+ uint64_t bytes,
+ int flags)
{
BDRVQuorumState *s = bs->opaque;
QuorumAIOCB *acb = g_new(QuorumAIOCB, 1);
@@ -168,6 +170,7 @@ static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs,
.bs = bs,
.offset = offset,
.bytes = bytes,
+ .flags = flags,
.qiov = qiov,
.votes.compare = quorum_sha256_compare,
.votes.vote_list = QLIST_HEAD_INITIALIZER(acb.votes.vote_list),
@@ -271,9 +274,11 @@ static void quorum_rewrite_entry(void *opaque)
BDRVQuorumState *s = acb->bs->opaque;
/* Ignore any errors, it's just a correction attempt for already
- * corrupted data. */
+ * corrupted data.
+ * Mask out BDRV_REQ_WRITE_UNCHANGED because this overwrites the
+ * area with different data from the other children. */
bdrv_co_pwritev(s->children[co->idx], acb->offset, acb->bytes,
- acb->qiov, 0);
+ acb->qiov, acb->flags & ~BDRV_REQ_WRITE_UNCHANGED);
/* Wake up the caller after the last rewrite */
acb->rewrite_count--;
@@ -673,7 +678,7 @@ static int quorum_co_preadv(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov, int flags)
{
BDRVQuorumState *s = bs->opaque;
- QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes);
+ QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes, flags);
int ret;
acb->is_read = true;
@@ -699,7 +704,7 @@ static void write_quorum_entry(void *opaque)
sacb->bs = s->children[i]->bs;
sacb->ret = bdrv_co_pwritev(s->children[i], acb->offset, acb->bytes,
- acb->qiov, 0);
+ acb->qiov, acb->flags);
if (sacb->ret == 0) {
acb->success_count++;
} else {
@@ -719,7 +724,7 @@ static int quorum_co_pwritev(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov, int flags)
{
BDRVQuorumState *s = bs->opaque;
- QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes);
+ QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes, flags);
int i, ret;
for (i = 0; i < s->num_children; i++) {
@@ -961,6 +966,8 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
}
s->next_child_index = s->num_children;
+ bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED;
+
g_free(opened);
goto exit;
diff --git a/block/raw-format.c b/block/raw-format.c
index a378547c99..fe33693a2d 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -415,10 +415,11 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
}
bs->sg = bs->file->bs->sg;
- bs->supported_write_flags = BDRV_REQ_FUA &
- bs->file->bs->supported_write_flags;
- bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
- bs->file->bs->supported_zero_flags;
+ bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
+ (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
+ bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
+ ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
+ bs->file->bs->supported_zero_flags);
if (bs->probed && !bdrv_is_read_only(bs)) {
fprintf(stderr,
diff --git a/block/throttle.c b/block/throttle.c
index 95ed06acd8..e298827f95 100644
--- a/block/throttle.c
+++ b/block/throttle.c
@@ -81,8 +81,10 @@ static int throttle_open(BlockDriverState *bs, QDict *options,
if (!bs->file) {
return -EINVAL;
}
- bs->supported_write_flags = bs->file->bs->supported_write_flags;
- bs->supported_zero_flags = bs->file->bs->supported_zero_flags;
+ bs->supported_write_flags = bs->file->bs->supported_write_flags |
+ BDRV_REQ_WRITE_UNCHANGED;
+ bs->supported_zero_flags = bs->file->bs->supported_zero_flags |
+ BDRV_REQ_WRITE_UNCHANGED;
return throttle_configure_tgm(bs, tgm, options, errp);
}