summaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorPeter Maydell2020-05-08 15:29:18 +0200
committerPeter Maydell2020-05-08 15:29:18 +0200
commitc88f1ffc19e38008a1c33ae039482a860aa7418c (patch)
treedb26706712a4f9db926275ffa5a52a88b63a2a3e /block
parentMerge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20200507a'... (diff)
parentblock: Drop unused .bdrv_has_zero_init_truncate (diff)
downloadqemu-c88f1ffc19e38008a1c33ae039482a860aa7418c.tar.gz
qemu-c88f1ffc19e38008a1c33ae039482a860aa7418c.tar.xz
qemu-c88f1ffc19e38008a1c33ae039482a860aa7418c.zip
Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches: - qcow2: Fix preallocation on block devices - backup: Make sure that source and target size match - vmdk: Fix zero cluster handling - Follow-up cleanups and fixes for the truncate changes - iotests: Skip more tests if required drivers are missing # gpg: Signature made Fri 08 May 2020 13:39:55 BST # gpg: using RSA key DC3DEB159A9AF95D3D7456FE7F09B272C88F2FD6 # gpg: issuer "kwolf@redhat.com" # gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full] # Primary key fingerprint: DC3D EB15 9A9A F95D 3D74 56FE 7F09 B272 C88F 2FD6 * remotes/kevin/tags/for-upstream: (30 commits) block: Drop unused .bdrv_has_zero_init_truncate vhdx: Rework truncation logic parallels: Rework truncation logic ssh: Support BDRV_REQ_ZERO_WRITE for truncate sheepdog: Support BDRV_REQ_ZERO_WRITE for truncate rbd: Support BDRV_REQ_ZERO_WRITE for truncate nfs: Support BDRV_REQ_ZERO_WRITE for truncate file-win32: Support BDRV_REQ_ZERO_WRITE for truncate gluster: Drop useless has_zero_init callback qcow2: Fix preallocation on block devices iotests/055: Use cache.no-flush for vmdk target iotests: Backup with different source/target size backup: Make sure that source and target size match backup: Improve error for bdrv_getlength() failure iotests/283: Use consistent size for source and target iotests: vmdk: Enable zeroed_grained=on by default vmdk: Flush only once in vmdk_L2update() vmdk: Don't update L2 table for zero write on zero cluster vmdk: Fix partial overwrite of zero cluster vmdk: Fix zero cluster allocation ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'block')
-rw-r--r--block/backup-top.c14
-rw-r--r--block/backup.c18
-rw-r--r--block/file-posix.c1
-rw-r--r--block/file-win32.c4
-rw-r--r--block/gluster.c14
-rw-r--r--block/nfs.c4
-rw-r--r--block/parallels.c25
-rw-r--r--block/qcow2.c23
-rw-r--r--block/qed.c1
-rw-r--r--block/raw-format.c6
-rw-r--r--block/rbd.c4
-rw-r--r--block/sheepdog.c4
-rw-r--r--block/ssh.c5
-rw-r--r--block/vhdx.c89
-rw-r--r--block/vmdk.c47
15 files changed, 149 insertions, 110 deletions
diff --git a/block/backup-top.c b/block/backup-top.c
index 3b50c06e2c..79b268e6dc 100644
--- a/block/backup-top.c
+++ b/block/backup-top.c
@@ -148,8 +148,10 @@ static void backup_top_child_perm(BlockDriverState *bs, BdrvChild *c,
*
* Share write to target (child_file), to not interfere
* with guest writes to its disk which may be in target backing chain.
+ * Can't resize during a backup block job because we check the size
+ * only upfront.
*/
- *nshared = BLK_PERM_ALL;
+ *nshared = BLK_PERM_ALL & ~BLK_PERM_RESIZE;
*nperm = BLK_PERM_WRITE;
} else {
/* Source child */
@@ -159,7 +161,7 @@ static void backup_top_child_perm(BlockDriverState *bs, BdrvChild *c,
if (perm & BLK_PERM_WRITE) {
*nperm = *nperm | BLK_PERM_CONSISTENT_READ;
}
- *nshared &= ~BLK_PERM_WRITE;
+ *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
}
}
@@ -192,11 +194,13 @@ BlockDriverState *bdrv_backup_top_append(BlockDriverState *source,
{
Error *local_err = NULL;
BDRVBackupTopState *state;
- BlockDriverState *top = bdrv_new_open_driver(&bdrv_backup_top_filter,
- filter_node_name,
- BDRV_O_RDWR, errp);
+ BlockDriverState *top;
bool appended = false;
+ assert(source->total_sectors == target->total_sectors);
+
+ top = bdrv_new_open_driver(&bdrv_backup_top_filter, filter_node_name,
+ BDRV_O_RDWR, errp);
if (!top) {
return NULL;
}
diff --git a/block/backup.c b/block/backup.c
index a7a7dcaf4c..4f13bb20a5 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -340,7 +340,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
BlockCompletionFunc *cb, void *opaque,
JobTxn *txn, Error **errp)
{
- int64_t len;
+ int64_t len, target_len;
BackupBlockJob *job = NULL;
int64_t cluster_size;
BdrvRequestFlags write_flags;
@@ -400,8 +400,20 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
len = bdrv_getlength(bs);
if (len < 0) {
- error_setg_errno(errp, -len, "unable to get length for '%s'",
- bdrv_get_device_name(bs));
+ error_setg_errno(errp, -len, "Unable to get length for '%s'",
+ bdrv_get_device_or_node_name(bs));
+ goto error;
+ }
+
+ target_len = bdrv_getlength(target);
+ if (target_len < 0) {
+ error_setg_errno(errp, -target_len, "Unable to get length for '%s'",
+ bdrv_get_device_or_node_name(bs));
+ goto error;
+ }
+
+ if (target_len != len) {
+ error_setg(errp, "Source and target image have different sizes");
goto error;
}
diff --git a/block/file-posix.c b/block/file-posix.c
index 05e094be29..3ab8f5a0fa 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -3100,7 +3100,6 @@ BlockDriver bdrv_file = {
.bdrv_co_create = raw_co_create,
.bdrv_co_create_opts = raw_co_create_opts,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
- .bdrv_has_zero_init_truncate = bdrv_has_zero_init_1,
.bdrv_co_block_status = raw_co_block_status,
.bdrv_co_invalidate_cache = raw_co_invalidate_cache,
.bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes,
diff --git a/block/file-win32.c b/block/file-win32.c
index a6b0dda5c3..221aaf713e 100644
--- a/block/file-win32.c
+++ b/block/file-win32.c
@@ -408,6 +408,9 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
win32_aio_attach_aio_context(s->aio, bdrv_get_aio_context(bs));
}
+ /* When extending regular files, we get zeros from the OS */
+ bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
+
ret = 0;
fail:
qemu_opts_del(opts);
@@ -638,7 +641,6 @@ BlockDriver bdrv_file = {
.bdrv_close = raw_close,
.bdrv_co_create_opts = raw_co_create_opts,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
- .bdrv_has_zero_init_truncate = bdrv_has_zero_init_1,
.bdrv_aio_preadv = raw_aio_preadv,
.bdrv_aio_pwritev = raw_aio_pwritev,
diff --git a/block/gluster.c b/block/gluster.c
index d06df900f6..31233cac69 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -1359,12 +1359,6 @@ static int64_t qemu_gluster_allocated_file_size(BlockDriverState *bs)
}
}
-static int qemu_gluster_has_zero_init(BlockDriverState *bs)
-{
- /* GlusterFS volume could be backed by a block device */
- return 0;
-}
-
/*
* Find allocation range in @bs around offset @start.
* May change underlying file descriptor's file offset.
@@ -1569,8 +1563,6 @@ static BlockDriver bdrv_gluster = {
.bdrv_co_readv = qemu_gluster_co_readv,
.bdrv_co_writev = qemu_gluster_co_writev,
.bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
- .bdrv_has_zero_init = qemu_gluster_has_zero_init,
- .bdrv_has_zero_init_truncate = qemu_gluster_has_zero_init,
#ifdef CONFIG_GLUSTERFS_DISCARD
.bdrv_co_pdiscard = qemu_gluster_co_pdiscard,
#endif
@@ -1601,8 +1593,6 @@ static BlockDriver bdrv_gluster_tcp = {
.bdrv_co_readv = qemu_gluster_co_readv,
.bdrv_co_writev = qemu_gluster_co_writev,
.bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
- .bdrv_has_zero_init = qemu_gluster_has_zero_init,
- .bdrv_has_zero_init_truncate = qemu_gluster_has_zero_init,
#ifdef CONFIG_GLUSTERFS_DISCARD
.bdrv_co_pdiscard = qemu_gluster_co_pdiscard,
#endif
@@ -1633,8 +1623,6 @@ static BlockDriver bdrv_gluster_unix = {
.bdrv_co_readv = qemu_gluster_co_readv,
.bdrv_co_writev = qemu_gluster_co_writev,
.bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
- .bdrv_has_zero_init = qemu_gluster_has_zero_init,
- .bdrv_has_zero_init_truncate = qemu_gluster_has_zero_init,
#ifdef CONFIG_GLUSTERFS_DISCARD
.bdrv_co_pdiscard = qemu_gluster_co_pdiscard,
#endif
@@ -1671,8 +1659,6 @@ static BlockDriver bdrv_gluster_rdma = {
.bdrv_co_readv = qemu_gluster_co_readv,
.bdrv_co_writev = qemu_gluster_co_writev,
.bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
- .bdrv_has_zero_init = qemu_gluster_has_zero_init,
- .bdrv_has_zero_init_truncate = qemu_gluster_has_zero_init,
#ifdef CONFIG_GLUSTERFS_DISCARD
.bdrv_co_pdiscard = qemu_gluster_co_pdiscard,
#endif
diff --git a/block/nfs.c b/block/nfs.c
index 385d756e1d..b1718d125a 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -620,6 +620,9 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
}
bs->total_sectors = ret;
+ if (client->has_zero_init) {
+ bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
+ }
return 0;
}
@@ -869,7 +872,6 @@ static BlockDriver bdrv_nfs = {
.create_opts = &nfs_create_opts,
.bdrv_has_zero_init = nfs_has_zero_init,
- .bdrv_has_zero_init_truncate = nfs_has_zero_init,
.bdrv_get_allocated_file_size = nfs_get_allocated_file_size,
.bdrv_co_truncate = nfs_file_co_truncate,
diff --git a/block/parallels.c b/block/parallels.c
index 8db64a55e3..e7717c508e 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -166,7 +166,7 @@ static int64_t block_status(BDRVParallelsState *s, int64_t sector_num,
static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, int *pnum)
{
- int ret;
+ int ret = 0;
BDRVParallelsState *s = bs->opaque;
int64_t pos, space, idx, to_allocate, i, len;
@@ -196,14 +196,24 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
}
if (s->data_end + space > (len >> BDRV_SECTOR_BITS)) {
space += s->prealloc_size;
+ /*
+ * We require the expanded size to read back as zero. If the
+ * user permitted truncation, we try that; but if it fails, we
+ * force the safer-but-slower fallocate.
+ */
+ if (s->prealloc_mode == PRL_PREALLOC_MODE_TRUNCATE) {
+ ret = bdrv_truncate(bs->file,
+ (s->data_end + space) << BDRV_SECTOR_BITS,
+ false, PREALLOC_MODE_OFF, BDRV_REQ_ZERO_WRITE,
+ NULL);
+ if (ret == -ENOTSUP) {
+ s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
+ }
+ }
if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
ret = bdrv_pwrite_zeroes(bs->file,
s->data_end << BDRV_SECTOR_BITS,
space << BDRV_SECTOR_BITS, 0);
- } else {
- ret = bdrv_truncate(bs->file,
- (s->data_end + space) << BDRV_SECTOR_BITS,
- false, PREALLOC_MODE_OFF, 0, NULL);
}
if (ret < 0) {
return ret;
@@ -828,6 +838,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
qemu_opt_get_size_del(opts, PARALLELS_OPT_PREALLOC_SIZE, 0);
s->prealloc_size = MAX(s->tracks, s->prealloc_size >> BDRV_SECTOR_BITS);
buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE);
+ /* prealloc_mode can be downgraded later during allocate_clusters */
s->prealloc_mode = qapi_enum_parse(&prealloc_mode_lookup, buf,
PRL_PREALLOC_MODE_FALLOCATE,
&local_err);
@@ -836,10 +847,6 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
goto fail_options;
}
- if (!bdrv_has_zero_init_truncate(bs->file->bs)) {
- s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
- }
-
if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_INACTIVE)) {
s->header->inuse = cpu_to_le32(HEADER_INUSE_MAGIC);
ret = parallels_update_header(bs);
diff --git a/block/qcow2.c b/block/qcow2.c
index ad934109a8..1ad95ff048 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -4107,7 +4107,7 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
{
int64_t allocation_start, host_offset, guest_offset;
int64_t clusters_allocated;
- int64_t old_file_size, new_file_size;
+ int64_t old_file_size, last_cluster, new_file_size;
uint64_t nb_new_data_clusters, nb_new_l2_tables;
/* With a data file, preallocation means just allocating the metadata
@@ -4127,7 +4127,13 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
ret = old_file_size;
goto fail;
}
- old_file_size = ROUND_UP(old_file_size, s->cluster_size);
+
+ last_cluster = qcow2_get_last_cluster(bs, old_file_size);
+ if (last_cluster >= 0) {
+ old_file_size = (last_cluster + 1) * s->cluster_size;
+ } else {
+ old_file_size = ROUND_UP(old_file_size, s->cluster_size);
+ }
nb_new_data_clusters = DIV_ROUND_UP(offset - old_length,
s->cluster_size);
@@ -4242,15 +4248,17 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
* requires a cluster-aligned start. The end may be unaligned if it is
* at the end of the image (which it is here).
*/
- ret = qcow2_cluster_zeroize(bs, zero_start, offset - zero_start, 0);
- if (ret < 0) {
- error_setg_errno(errp, -ret, "Failed to zero out new clusters");
- goto fail;
+ if (offset > zero_start) {
+ ret = qcow2_cluster_zeroize(bs, zero_start, offset - zero_start, 0);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Failed to zero out new clusters");
+ goto fail;
+ }
}
/* Write explicit zeros for the unaligned head */
if (zero_start > old_length) {
- uint64_t len = zero_start - old_length;
+ uint64_t len = MIN(zero_start, offset) - old_length;
uint8_t *buf = qemu_blockalign0(bs, len);
QEMUIOVector qiov;
qemu_iovec_init_buf(&qiov, buf, len);
@@ -5613,7 +5621,6 @@ BlockDriver bdrv_qcow2 = {
.bdrv_co_create_opts = qcow2_co_create_opts,
.bdrv_co_create = qcow2_co_create,
.bdrv_has_zero_init = qcow2_has_zero_init,
- .bdrv_has_zero_init_truncate = bdrv_has_zero_init_1,
.bdrv_co_block_status = qcow2_co_block_status,
.bdrv_co_preadv_part = qcow2_co_preadv_part,
diff --git a/block/qed.c b/block/qed.c
index fb609cfba1..5da9726518 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -1675,7 +1675,6 @@ static BlockDriver bdrv_qed = {
.bdrv_co_create = bdrv_qed_co_create,
.bdrv_co_create_opts = bdrv_qed_co_create_opts,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
- .bdrv_has_zero_init_truncate = bdrv_has_zero_init_1,
.bdrv_co_block_status = bdrv_qed_co_block_status,
.bdrv_co_readv = bdrv_qed_co_readv,
.bdrv_co_writev = bdrv_qed_co_writev,
diff --git a/block/raw-format.c b/block/raw-format.c
index 351f2d91c6..9108e43696 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -414,11 +414,6 @@ static int raw_has_zero_init(BlockDriverState *bs)
return bdrv_has_zero_init(bs->file->bs);
}
-static int raw_has_zero_init_truncate(BlockDriverState *bs)
-{
- return bdrv_has_zero_init_truncate(bs->file->bs);
-}
-
static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
const char *filename,
QemuOpts *opts,
@@ -582,7 +577,6 @@ BlockDriver bdrv_raw = {
.bdrv_co_ioctl = &raw_co_ioctl,
.create_opts = &raw_create_opts,
.bdrv_has_zero_init = &raw_has_zero_init,
- .bdrv_has_zero_init_truncate = &raw_has_zero_init_truncate,
.strong_runtime_opts = raw_strong_runtime_opts,
.mutable_opts = mutable_opts,
};
diff --git a/block/rbd.c b/block/rbd.c
index f2d52091c7..617553b022 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -817,6 +817,9 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
}
}
+ /* When extending regular files, we get zeros from the OS */
+ bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
+
r = 0;
goto out;
@@ -1310,7 +1313,6 @@ static BlockDriver bdrv_rbd = {
.bdrv_co_create = qemu_rbd_co_create,
.bdrv_co_create_opts = qemu_rbd_co_create_opts,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
- .bdrv_has_zero_init_truncate = bdrv_has_zero_init_1,
.bdrv_get_info = qemu_rbd_getinfo,
.create_opts = &qemu_rbd_create_opts,
.bdrv_getlength = qemu_rbd_getlength,
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 2eb61938ff..27a30d17f4 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -1654,6 +1654,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
memcpy(&s->inode, buf, sizeof(s->inode));
bs->total_sectors = s->inode.vdi_size / BDRV_SECTOR_SIZE;
+ bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
pstrcpy(s->name, sizeof(s->name), vdi);
qemu_co_mutex_init(&s->lock);
qemu_co_mutex_init(&s->queue_lock);
@@ -3225,7 +3226,6 @@ static BlockDriver bdrv_sheepdog = {
.bdrv_co_create = sd_co_create,
.bdrv_co_create_opts = sd_co_create_opts,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
- .bdrv_has_zero_init_truncate = bdrv_has_zero_init_1,
.bdrv_getlength = sd_getlength,
.bdrv_get_allocated_file_size = sd_get_allocated_file_size,
.bdrv_co_truncate = sd_co_truncate,
@@ -3264,7 +3264,6 @@ static BlockDriver bdrv_sheepdog_tcp = {
.bdrv_co_create = sd_co_create,
.bdrv_co_create_opts = sd_co_create_opts,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
- .bdrv_has_zero_init_truncate = bdrv_has_zero_init_1,
.bdrv_getlength = sd_getlength,
.bdrv_get_allocated_file_size = sd_get_allocated_file_size,
.bdrv_co_truncate = sd_co_truncate,
@@ -3303,7 +3302,6 @@ static BlockDriver bdrv_sheepdog_unix = {
.bdrv_co_create = sd_co_create,
.bdrv_co_create_opts = sd_co_create_opts,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
- .bdrv_has_zero_init_truncate = bdrv_has_zero_init_1,
.bdrv_getlength = sd_getlength,
.bdrv_get_allocated_file_size = sd_get_allocated_file_size,
.bdrv_co_truncate = sd_co_truncate,
diff --git a/block/ssh.c b/block/ssh.c
index 9eb33df859..098dbe03c1 100644
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -883,6 +883,10 @@ static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags,
/* Go non-blocking. */
ssh_set_blocking(s->session, 0);
+ if (s->attrs->type == SSH_FILEXFER_TYPE_REGULAR) {
+ bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
+ }
+
qapi_free_BlockdevOptionsSsh(opts);
return 0;
@@ -1393,7 +1397,6 @@ static BlockDriver bdrv_ssh = {
.bdrv_co_create_opts = ssh_co_create_opts,
.bdrv_close = ssh_close,
.bdrv_has_zero_init = ssh_has_zero_init,
- .bdrv_has_zero_init_truncate = ssh_has_zero_init,
.bdrv_co_readv = ssh_co_readv,
.bdrv_co_writev = ssh_co_writev,
.bdrv_getlength = ssh_getlength,
diff --git a/block/vhdx.c b/block/vhdx.c
index e11fb7413a..53e756438a 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -1240,12 +1240,16 @@ exit:
/*
* Allocate a new payload block at the end of the file.
*
- * Allocation will happen at 1MB alignment inside the file
+ * Allocation will happen at 1MB alignment inside the file.
+ *
+ * If @need_zero is set on entry but not cleared on return, then truncation
+ * could not guarantee that the new portion reads as zero, and the caller
+ * will take care of it instead.
*
* Returns the file offset start of the new payload block
*/
static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
- uint64_t *new_offset)
+ uint64_t *new_offset, bool *need_zero)
{
int64_t current_len;
@@ -1262,6 +1266,17 @@ static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
return -EINVAL;
}
+ if (*need_zero) {
+ int ret;
+
+ ret = bdrv_truncate(bs->file, *new_offset + s->block_size, false,
+ PREALLOC_MODE_OFF, BDRV_REQ_ZERO_WRITE, NULL);
+ if (ret != -ENOTSUP) {
+ *need_zero = false;
+ return ret;
+ }
+ }
+
return bdrv_truncate(bs->file, *new_offset + s->block_size, false,
PREALLOC_MODE_OFF, 0, NULL);
}
@@ -1355,18 +1370,38 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
/* in this case, we need to preserve zero writes for
* data that is not part of this write, so we must pad
* the rest of the buffer to zeroes */
-
- /* if we are on a posix system with ftruncate() that extends
- * a file, then it is zero-filled for us. On Win32, the raw
- * layer uses SetFilePointer and SetFileEnd, which does not
- * zero fill AFAIK */
-
- /* Queue another write of zero buffers if the underlying file
- * does not zero-fill on file extension */
-
- if (bdrv_has_zero_init_truncate(bs->file->bs) == 0) {
- use_zero_buffers = true;
-
+ use_zero_buffers = true;
+ /* fall through */
+ case PAYLOAD_BLOCK_NOT_PRESENT: /* fall through */
+ case PAYLOAD_BLOCK_UNMAPPED:
+ case PAYLOAD_BLOCK_UNMAPPED_v095:
+ case PAYLOAD_BLOCK_UNDEFINED:
+ bat_prior_offset = sinfo.file_offset;
+ ret = vhdx_allocate_block(bs, s, &sinfo.file_offset,
+ &use_zero_buffers);
+ if (ret < 0) {
+ goto exit;
+ }
+ /*
+ * once we support differencing files, this may also be
+ * partially present
+ */
+ /* update block state to the newly specified state */
+ vhdx_update_bat_table_entry(bs, s, &sinfo, &bat_entry,
+ &bat_entry_offset,
+ PAYLOAD_BLOCK_FULLY_PRESENT);
+ bat_update = true;
+ /*
+ * Since we just allocated a block, file_offset is the
+ * beginning of the payload block. It needs to be the
+ * write address, which includes the offset into the
+ * block, unless the entire block needs to read as
+ * zeroes but truncation was not able to provide them,
+ * in which case we need to fill in the rest.
+ */
+ if (!use_zero_buffers) {
+ sinfo.file_offset += sinfo.block_offset;
+ } else {
/* zero fill the front, if any */
if (sinfo.block_offset) {
iov1.iov_len = sinfo.block_offset;
@@ -1378,7 +1413,7 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
}
/* our actual data */
- qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
+ qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
sinfo.bytes_avail);
/* zero fill the back, if any */
@@ -1393,29 +1428,7 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
sectors_to_write += iov2.iov_len >> BDRV_SECTOR_BITS;
}
}
- /* fall through */
- case PAYLOAD_BLOCK_NOT_PRESENT: /* fall through */
- case PAYLOAD_BLOCK_UNMAPPED:
- case PAYLOAD_BLOCK_UNMAPPED_v095:
- case PAYLOAD_BLOCK_UNDEFINED:
- bat_prior_offset = sinfo.file_offset;
- ret = vhdx_allocate_block(bs, s, &sinfo.file_offset);
- if (ret < 0) {
- goto exit;
- }
- /* once we support differencing files, this may also be
- * partially present */
- /* update block state to the newly specified state */
- vhdx_update_bat_table_entry(bs, s, &sinfo, &bat_entry,
- &bat_entry_offset,
- PAYLOAD_BLOCK_FULLY_PRESENT);
- bat_update = true;
- /* since we just allocated a block, file_offset is the
- * beginning of the payload block. It needs to be the
- * write address, which includes the offset into the block */
- if (!use_zero_buffers) {
- sinfo.file_offset += sinfo.block_offset;
- }
+
/* fall through */
case PAYLOAD_BLOCK_FULLY_PRESENT:
/* if the file offset address is in the header zone,
diff --git a/block/vmdk.c b/block/vmdk.c
index b02fdd14b2..b18f128816 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -180,7 +180,7 @@ typedef struct VmdkMetaData {
unsigned int l1_index;
unsigned int l2_index;
unsigned int l2_offset;
- int valid;
+ bool new_allocation;
uint32_t *l2_cache_entry;
} VmdkMetaData;
@@ -1340,7 +1340,9 @@ static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp)
* get_whole_cluster
*
* Copy backing file's cluster that covers @sector_num, otherwise write zero,
- * to the cluster at @cluster_sector_num.
+ * to the cluster at @cluster_sector_num. If @zeroed is true, we're overwriting
+ * a zeroed cluster in the current layer and must not copy data from the
+ * backing file.
*
* If @skip_start_sector < @skip_end_sector, the relative range
* [@skip_start_sector, @skip_end_sector) is not copied or written, and leave
@@ -1351,18 +1353,21 @@ static int get_whole_cluster(BlockDriverState *bs,
uint64_t cluster_offset,
uint64_t offset,
uint64_t skip_start_bytes,
- uint64_t skip_end_bytes)
+ uint64_t skip_end_bytes,
+ bool zeroed)
{
int ret = VMDK_OK;
int64_t cluster_bytes;
uint8_t *whole_grain;
+ bool copy_from_backing;
/* For COW, align request sector_num to cluster start */
cluster_bytes = extent->cluster_sectors << BDRV_SECTOR_BITS;
offset = QEMU_ALIGN_DOWN(offset, cluster_bytes);
whole_grain = qemu_blockalign(bs, cluster_bytes);
+ copy_from_backing = bs->backing && !zeroed;
- if (!bs->backing) {
+ if (!copy_from_backing) {
memset(whole_grain, 0, skip_start_bytes);
memset(whole_grain + skip_end_bytes, 0, cluster_bytes - skip_end_bytes);
}
@@ -1377,7 +1382,7 @@ static int get_whole_cluster(BlockDriverState *bs,
/* Read backing data before skip range */
if (skip_start_bytes > 0) {
- if (bs->backing) {
+ if (copy_from_backing) {
/* qcow2 emits this on bs->file instead of bs->backing */
BLKDBG_EVENT(extent->file, BLKDBG_COW_READ);
ret = bdrv_pread(bs->backing, offset, whole_grain,
@@ -1397,7 +1402,7 @@ static int get_whole_cluster(BlockDriverState *bs,
}
/* Read backing data after skip range */
if (skip_end_bytes < cluster_bytes) {
- if (bs->backing) {
+ if (copy_from_backing) {
/* qcow2 emits this on bs->file instead of bs->backing */
BLKDBG_EVENT(extent->file, BLKDBG_COW_READ);
ret = bdrv_pread(bs->backing, offset + skip_end_bytes,
@@ -1430,7 +1435,7 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
offset = cpu_to_le32(offset);
/* update L2 table */
BLKDBG_EVENT(extent->file, BLKDBG_L2_UPDATE);
- if (bdrv_pwrite_sync(extent->file,
+ if (bdrv_pwrite(extent->file,
((int64_t)m_data->l2_offset * 512)
+ (m_data->l2_index * sizeof(offset)),
&offset, sizeof(offset)) < 0) {
@@ -1439,13 +1444,16 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
/* update backup L2 table */
if (extent->l1_backup_table_offset != 0) {
m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
- if (bdrv_pwrite_sync(extent->file,
+ if (bdrv_pwrite(extent->file,
((int64_t)m_data->l2_offset * 512)
+ (m_data->l2_index * sizeof(offset)),
&offset, sizeof(offset)) < 0) {
return VMDK_ERROR;
}
}
+ if (bdrv_flush(extent->file->bs) < 0) {
+ return VMDK_ERROR;
+ }
if (m_data->l2_cache_entry) {
*m_data->l2_cache_entry = offset;
}
@@ -1492,7 +1500,7 @@ static int get_cluster_offset(BlockDriverState *bs,
unsigned int l2_size_bytes = extent->l2_size * extent->entry_size;
if (m_data) {
- m_data->valid = 0;
+ m_data->new_allocation = false;
}
if (extent->flat) {
*cluster_offset = extent->flat_start_offset;
@@ -1572,6 +1580,12 @@ static int get_cluster_offset(BlockDriverState *bs,
extent->l2_cache_counts[min_index] = 1;
found:
l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
+ if (m_data) {
+ m_data->l1_index = l1_index;
+ m_data->l2_index = l2_index;
+ m_data->l2_offset = l2_offset;
+ m_data->l2_cache_entry = ((uint32_t *)l2_table) + l2_index;
+ }
if (extent->sesparse) {
cluster_sector = le64_to_cpu(((uint64_t *)l2_table)[l2_index]);
@@ -1625,16 +1639,13 @@ static int get_cluster_offset(BlockDriverState *bs,
* or inappropriate VM shutdown.
*/
ret = get_whole_cluster(bs, extent, cluster_sector * BDRV_SECTOR_SIZE,
- offset, skip_start_bytes, skip_end_bytes);
+ offset, skip_start_bytes, skip_end_bytes,
+ zeroed);
if (ret) {
return ret;
}
if (m_data) {
- m_data->valid = 1;
- m_data->l1_index = l1_index;
- m_data->l2_index = l2_index;
- m_data->l2_offset = l2_offset;
- m_data->l2_cache_entry = ((uint32_t *)l2_table) + l2_index;
+ m_data->new_allocation = true;
}
}
*cluster_offset = cluster_sector << BDRV_SECTOR_BITS;
@@ -1990,7 +2001,7 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
error_report("Could not write to allocated cluster"
" for streamOptimized");
return -EIO;
- } else {
+ } else if (!zeroed) {
/* allocate */
ret = get_cluster_offset(bs, extent, &m_data, offset,
true, &cluster_offset, 0, 0);
@@ -2005,7 +2016,7 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
offset_in_cluster == 0 &&
n_bytes >= extent->cluster_sectors * BDRV_SECTOR_SIZE) {
n_bytes = extent->cluster_sectors * BDRV_SECTOR_SIZE;
- if (!zero_dry_run) {
+ if (!zero_dry_run && ret != VMDK_ZEROED) {
/* update L2 tables */
if (vmdk_L2update(extent, &m_data, VMDK_GTE_ZEROED)
!= VMDK_OK) {
@@ -2021,7 +2032,7 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
if (ret) {
return ret;
}
- if (m_data.valid) {
+ if (m_data.new_allocation) {
/* update L2 tables */
if (vmdk_L2update(extent, &m_data,
cluster_offset >> BDRV_SECTOR_BITS)