diff options
| author | Peter Maydell | 2020-05-08 15:29:18 +0200 |
|---|---|---|
| committer | Peter Maydell | 2020-05-08 15:29:18 +0200 |
| commit | c88f1ffc19e38008a1c33ae039482a860aa7418c (patch) | |
| tree | db26706712a4f9db926275ffa5a52a88b63a2a3e /block | |
| parent | Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20200507a'... (diff) | |
| parent | block: Drop unused .bdrv_has_zero_init_truncate (diff) | |
| download | qemu-c88f1ffc19e38008a1c33ae039482a860aa7418c.tar.gz qemu-c88f1ffc19e38008a1c33ae039482a860aa7418c.tar.xz qemu-c88f1ffc19e38008a1c33ae039482a860aa7418c.zip | |
Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches:
- qcow2: Fix preallocation on block devices
- backup: Make sure that source and target size match
- vmdk: Fix zero cluster handling
- Follow-up cleanups and fixes for the truncate changes
- iotests: Skip more tests if required drivers are missing
# gpg: Signature made Fri 08 May 2020 13:39:55 BST
# gpg: using RSA key DC3DEB159A9AF95D3D7456FE7F09B272C88F2FD6
# gpg: issuer "kwolf@redhat.com"
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full]
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74 56FE 7F09 B272 C88F 2FD6
* remotes/kevin/tags/for-upstream: (30 commits)
block: Drop unused .bdrv_has_zero_init_truncate
vhdx: Rework truncation logic
parallels: Rework truncation logic
ssh: Support BDRV_REQ_ZERO_WRITE for truncate
sheepdog: Support BDRV_REQ_ZERO_WRITE for truncate
rbd: Support BDRV_REQ_ZERO_WRITE for truncate
nfs: Support BDRV_REQ_ZERO_WRITE for truncate
file-win32: Support BDRV_REQ_ZERO_WRITE for truncate
gluster: Drop useless has_zero_init callback
qcow2: Fix preallocation on block devices
iotests/055: Use cache.no-flush for vmdk target
iotests: Backup with different source/target size
backup: Make sure that source and target size match
backup: Improve error for bdrv_getlength() failure
iotests/283: Use consistent size for source and target
iotests: vmdk: Enable zeroed_grained=on by default
vmdk: Flush only once in vmdk_L2update()
vmdk: Don't update L2 table for zero write on zero cluster
vmdk: Fix partial overwrite of zero cluster
vmdk: Fix zero cluster allocation
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'block')
| -rw-r--r-- | block/backup-top.c | 14 | ||||
| -rw-r--r-- | block/backup.c | 18 | ||||
| -rw-r--r-- | block/file-posix.c | 1 | ||||
| -rw-r--r-- | block/file-win32.c | 4 | ||||
| -rw-r--r-- | block/gluster.c | 14 | ||||
| -rw-r--r-- | block/nfs.c | 4 | ||||
| -rw-r--r-- | block/parallels.c | 25 | ||||
| -rw-r--r-- | block/qcow2.c | 23 | ||||
| -rw-r--r-- | block/qed.c | 1 | ||||
| -rw-r--r-- | block/raw-format.c | 6 | ||||
| -rw-r--r-- | block/rbd.c | 4 | ||||
| -rw-r--r-- | block/sheepdog.c | 4 | ||||
| -rw-r--r-- | block/ssh.c | 5 | ||||
| -rw-r--r-- | block/vhdx.c | 89 | ||||
| -rw-r--r-- | block/vmdk.c | 47 |
15 files changed, 149 insertions, 110 deletions
diff --git a/block/backup-top.c b/block/backup-top.c index 3b50c06e2c..79b268e6dc 100644 --- a/block/backup-top.c +++ b/block/backup-top.c @@ -148,8 +148,10 @@ static void backup_top_child_perm(BlockDriverState *bs, BdrvChild *c, * * Share write to target (child_file), to not interfere * with guest writes to its disk which may be in target backing chain. + * Can't resize during a backup block job because we check the size + * only upfront. */ - *nshared = BLK_PERM_ALL; + *nshared = BLK_PERM_ALL & ~BLK_PERM_RESIZE; *nperm = BLK_PERM_WRITE; } else { /* Source child */ @@ -159,7 +161,7 @@ static void backup_top_child_perm(BlockDriverState *bs, BdrvChild *c, if (perm & BLK_PERM_WRITE) { *nperm = *nperm | BLK_PERM_CONSISTENT_READ; } - *nshared &= ~BLK_PERM_WRITE; + *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE); } } @@ -192,11 +194,13 @@ BlockDriverState *bdrv_backup_top_append(BlockDriverState *source, { Error *local_err = NULL; BDRVBackupTopState *state; - BlockDriverState *top = bdrv_new_open_driver(&bdrv_backup_top_filter, - filter_node_name, - BDRV_O_RDWR, errp); + BlockDriverState *top; bool appended = false; + assert(source->total_sectors == target->total_sectors); + + top = bdrv_new_open_driver(&bdrv_backup_top_filter, filter_node_name, + BDRV_O_RDWR, errp); if (!top) { return NULL; } diff --git a/block/backup.c b/block/backup.c index a7a7dcaf4c..4f13bb20a5 100644 --- a/block/backup.c +++ b/block/backup.c @@ -340,7 +340,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, BlockCompletionFunc *cb, void *opaque, JobTxn *txn, Error **errp) { - int64_t len; + int64_t len, target_len; BackupBlockJob *job = NULL; int64_t cluster_size; BdrvRequestFlags write_flags; @@ -400,8 +400,20 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, len = bdrv_getlength(bs); if (len < 0) { - error_setg_errno(errp, -len, "unable to get length for '%s'", - bdrv_get_device_name(bs)); + error_setg_errno(errp, -len, "Unable to get length for '%s'", + bdrv_get_device_or_node_name(bs)); + goto error; + } + + target_len = bdrv_getlength(target); + if (target_len < 0) { + error_setg_errno(errp, -target_len, "Unable to get length for '%s'", + bdrv_get_device_or_node_name(bs)); + goto error; + } + + if (target_len != len) { + error_setg(errp, "Source and target image have different sizes"); goto error; } diff --git a/block/file-posix.c b/block/file-posix.c index 05e094be29..3ab8f5a0fa 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -3100,7 +3100,6 @@ BlockDriver bdrv_file = { .bdrv_co_create = raw_co_create, .bdrv_co_create_opts = raw_co_create_opts, .bdrv_has_zero_init = bdrv_has_zero_init_1, - .bdrv_has_zero_init_truncate = bdrv_has_zero_init_1, .bdrv_co_block_status = raw_co_block_status, .bdrv_co_invalidate_cache = raw_co_invalidate_cache, .bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes, diff --git a/block/file-win32.c b/block/file-win32.c index a6b0dda5c3..221aaf713e 100644 --- a/block/file-win32.c +++ b/block/file-win32.c @@ -408,6 +408,9 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, win32_aio_attach_aio_context(s->aio, bdrv_get_aio_context(bs)); } + /* When extending regular files, we get zeros from the OS */ + bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; + ret = 0; fail: qemu_opts_del(opts); @@ -638,7 +641,6 @@ BlockDriver bdrv_file = { .bdrv_close = raw_close, .bdrv_co_create_opts = raw_co_create_opts, .bdrv_has_zero_init = bdrv_has_zero_init_1, - .bdrv_has_zero_init_truncate = bdrv_has_zero_init_1, .bdrv_aio_preadv = raw_aio_preadv, .bdrv_aio_pwritev = raw_aio_pwritev, diff --git a/block/gluster.c b/block/gluster.c index d06df900f6..31233cac69 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -1359,12 +1359,6 @@ static int64_t qemu_gluster_allocated_file_size(BlockDriverState *bs) } } -static int qemu_gluster_has_zero_init(BlockDriverState *bs) -{ - /* GlusterFS volume could be backed by a block device */ - return 0; -} - /* * Find allocation range in @bs around offset @start. * May change underlying file descriptor's file offset. @@ -1569,8 +1563,6 @@ static BlockDriver bdrv_gluster = { .bdrv_co_readv = qemu_gluster_co_readv, .bdrv_co_writev = qemu_gluster_co_writev, .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, - .bdrv_has_zero_init = qemu_gluster_has_zero_init, - .bdrv_has_zero_init_truncate = qemu_gluster_has_zero_init, #ifdef CONFIG_GLUSTERFS_DISCARD .bdrv_co_pdiscard = qemu_gluster_co_pdiscard, #endif @@ -1601,8 +1593,6 @@ static BlockDriver bdrv_gluster_tcp = { .bdrv_co_readv = qemu_gluster_co_readv, .bdrv_co_writev = qemu_gluster_co_writev, .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, - .bdrv_has_zero_init = qemu_gluster_has_zero_init, - .bdrv_has_zero_init_truncate = qemu_gluster_has_zero_init, #ifdef CONFIG_GLUSTERFS_DISCARD .bdrv_co_pdiscard = qemu_gluster_co_pdiscard, #endif @@ -1633,8 +1623,6 @@ static BlockDriver bdrv_gluster_unix = { .bdrv_co_readv = qemu_gluster_co_readv, .bdrv_co_writev = qemu_gluster_co_writev, .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, - .bdrv_has_zero_init = qemu_gluster_has_zero_init, - .bdrv_has_zero_init_truncate = qemu_gluster_has_zero_init, #ifdef CONFIG_GLUSTERFS_DISCARD .bdrv_co_pdiscard = qemu_gluster_co_pdiscard, #endif @@ -1671,8 +1659,6 @@ static BlockDriver bdrv_gluster_rdma = { .bdrv_co_readv = qemu_gluster_co_readv, .bdrv_co_writev = qemu_gluster_co_writev, .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, - .bdrv_has_zero_init = qemu_gluster_has_zero_init, - .bdrv_has_zero_init_truncate = qemu_gluster_has_zero_init, #ifdef CONFIG_GLUSTERFS_DISCARD .bdrv_co_pdiscard = qemu_gluster_co_pdiscard, #endif diff --git a/block/nfs.c b/block/nfs.c index 385d756e1d..b1718d125a 100644 --- a/block/nfs.c +++ b/block/nfs.c @@ -620,6 +620,9 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags, } bs->total_sectors = ret; + if (client->has_zero_init) { + bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; + } return 0; } @@ -869,7 +872,6 @@ static BlockDriver bdrv_nfs = { .create_opts = &nfs_create_opts, .bdrv_has_zero_init = nfs_has_zero_init, - .bdrv_has_zero_init_truncate = nfs_has_zero_init, .bdrv_get_allocated_file_size = nfs_get_allocated_file_size, .bdrv_co_truncate = nfs_file_co_truncate, diff --git a/block/parallels.c b/block/parallels.c index 8db64a55e3..e7717c508e 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -166,7 +166,7 @@ static int64_t block_status(BDRVParallelsState *s, int64_t sector_num, static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) { - int ret; + int ret = 0; BDRVParallelsState *s = bs->opaque; int64_t pos, space, idx, to_allocate, i, len; @@ -196,14 +196,24 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num, } if (s->data_end + space > (len >> BDRV_SECTOR_BITS)) { space += s->prealloc_size; + /* + * We require the expanded size to read back as zero. If the + * user permitted truncation, we try that; but if it fails, we + * force the safer-but-slower fallocate. + */ + if (s->prealloc_mode == PRL_PREALLOC_MODE_TRUNCATE) { + ret = bdrv_truncate(bs->file, + (s->data_end + space) << BDRV_SECTOR_BITS, + false, PREALLOC_MODE_OFF, BDRV_REQ_ZERO_WRITE, + NULL); + if (ret == -ENOTSUP) { + s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE; + } + } if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) { ret = bdrv_pwrite_zeroes(bs->file, s->data_end << BDRV_SECTOR_BITS, space << BDRV_SECTOR_BITS, 0); - } else { - ret = bdrv_truncate(bs->file, - (s->data_end + space) << BDRV_SECTOR_BITS, - false, PREALLOC_MODE_OFF, 0, NULL); } if (ret < 0) { return ret; @@ -828,6 +838,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, qemu_opt_get_size_del(opts, PARALLELS_OPT_PREALLOC_SIZE, 0); s->prealloc_size = MAX(s->tracks, s->prealloc_size >> BDRV_SECTOR_BITS); buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE); + /* prealloc_mode can be downgraded later during allocate_clusters */ s->prealloc_mode = qapi_enum_parse(&prealloc_mode_lookup, buf, PRL_PREALLOC_MODE_FALLOCATE, &local_err); @@ -836,10 +847,6 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, goto fail_options; } - if (!bdrv_has_zero_init_truncate(bs->file->bs)) { - s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE; - } - if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_INACTIVE)) { s->header->inuse = cpu_to_le32(HEADER_INUSE_MAGIC); ret = parallels_update_header(bs); diff --git a/block/qcow2.c b/block/qcow2.c index ad934109a8..1ad95ff048 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -4107,7 +4107,7 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, { int64_t allocation_start, host_offset, guest_offset; int64_t clusters_allocated; - int64_t old_file_size, new_file_size; + int64_t old_file_size, last_cluster, new_file_size; uint64_t nb_new_data_clusters, nb_new_l2_tables; /* With a data file, preallocation means just allocating the metadata @@ -4127,7 +4127,13 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, ret = old_file_size; goto fail; } - old_file_size = ROUND_UP(old_file_size, s->cluster_size); + + last_cluster = qcow2_get_last_cluster(bs, old_file_size); + if (last_cluster >= 0) { + old_file_size = (last_cluster + 1) * s->cluster_size; + } else { + old_file_size = ROUND_UP(old_file_size, s->cluster_size); + } nb_new_data_clusters = DIV_ROUND_UP(offset - old_length, s->cluster_size); @@ -4242,15 +4248,17 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, * requires a cluster-aligned start. The end may be unaligned if it is * at the end of the image (which it is here). */ - ret = qcow2_cluster_zeroize(bs, zero_start, offset - zero_start, 0); - if (ret < 0) { - error_setg_errno(errp, -ret, "Failed to zero out new clusters"); - goto fail; + if (offset > zero_start) { + ret = qcow2_cluster_zeroize(bs, zero_start, offset - zero_start, 0); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to zero out new clusters"); + goto fail; + } } /* Write explicit zeros for the unaligned head */ if (zero_start > old_length) { - uint64_t len = zero_start - old_length; + uint64_t len = MIN(zero_start, offset) - old_length; uint8_t *buf = qemu_blockalign0(bs, len); QEMUIOVector qiov; qemu_iovec_init_buf(&qiov, buf, len); @@ -5613,7 +5621,6 @@ BlockDriver bdrv_qcow2 = { .bdrv_co_create_opts = qcow2_co_create_opts, .bdrv_co_create = qcow2_co_create, .bdrv_has_zero_init = qcow2_has_zero_init, - .bdrv_has_zero_init_truncate = bdrv_has_zero_init_1, .bdrv_co_block_status = qcow2_co_block_status, .bdrv_co_preadv_part = qcow2_co_preadv_part, diff --git a/block/qed.c b/block/qed.c index fb609cfba1..5da9726518 100644 --- a/block/qed.c +++ b/block/qed.c @@ -1675,7 +1675,6 @@ static BlockDriver bdrv_qed = { .bdrv_co_create = bdrv_qed_co_create, .bdrv_co_create_opts = bdrv_qed_co_create_opts, .bdrv_has_zero_init = bdrv_has_zero_init_1, - .bdrv_has_zero_init_truncate = bdrv_has_zero_init_1, .bdrv_co_block_status = bdrv_qed_co_block_status, .bdrv_co_readv = bdrv_qed_co_readv, .bdrv_co_writev = bdrv_qed_co_writev, diff --git a/block/raw-format.c b/block/raw-format.c index 351f2d91c6..9108e43696 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -414,11 +414,6 @@ static int raw_has_zero_init(BlockDriverState *bs) return bdrv_has_zero_init(bs->file->bs); } -static int raw_has_zero_init_truncate(BlockDriverState *bs) -{ - return bdrv_has_zero_init_truncate(bs->file->bs); -} - static int coroutine_fn raw_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts, @@ -582,7 +577,6 @@ BlockDriver bdrv_raw = { .bdrv_co_ioctl = &raw_co_ioctl, .create_opts = &raw_create_opts, .bdrv_has_zero_init = &raw_has_zero_init, - .bdrv_has_zero_init_truncate = &raw_has_zero_init_truncate, .strong_runtime_opts = raw_strong_runtime_opts, .mutable_opts = mutable_opts, }; diff --git a/block/rbd.c b/block/rbd.c index f2d52091c7..617553b022 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -817,6 +817,9 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, } } + /* When extending regular files, we get zeros from the OS */ + bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; + r = 0; goto out; @@ -1310,7 +1313,6 @@ static BlockDriver bdrv_rbd = { .bdrv_co_create = qemu_rbd_co_create, .bdrv_co_create_opts = qemu_rbd_co_create_opts, .bdrv_has_zero_init = bdrv_has_zero_init_1, - .bdrv_has_zero_init_truncate = bdrv_has_zero_init_1, .bdrv_get_info = qemu_rbd_getinfo, .create_opts = &qemu_rbd_create_opts, .bdrv_getlength = qemu_rbd_getlength, diff --git a/block/sheepdog.c b/block/sheepdog.c index 2eb61938ff..27a30d17f4 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -1654,6 +1654,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags, memcpy(&s->inode, buf, sizeof(s->inode)); bs->total_sectors = s->inode.vdi_size / BDRV_SECTOR_SIZE; + bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; pstrcpy(s->name, sizeof(s->name), vdi); qemu_co_mutex_init(&s->lock); qemu_co_mutex_init(&s->queue_lock); @@ -3225,7 +3226,6 @@ static BlockDriver bdrv_sheepdog = { .bdrv_co_create = sd_co_create, .bdrv_co_create_opts = sd_co_create_opts, .bdrv_has_zero_init = bdrv_has_zero_init_1, - .bdrv_has_zero_init_truncate = bdrv_has_zero_init_1, .bdrv_getlength = sd_getlength, .bdrv_get_allocated_file_size = sd_get_allocated_file_size, .bdrv_co_truncate = sd_co_truncate, @@ -3264,7 +3264,6 @@ static BlockDriver bdrv_sheepdog_tcp = { .bdrv_co_create = sd_co_create, .bdrv_co_create_opts = sd_co_create_opts, .bdrv_has_zero_init = bdrv_has_zero_init_1, - .bdrv_has_zero_init_truncate = bdrv_has_zero_init_1, .bdrv_getlength = sd_getlength, .bdrv_get_allocated_file_size = sd_get_allocated_file_size, .bdrv_co_truncate = sd_co_truncate, @@ -3303,7 +3302,6 @@ static BlockDriver bdrv_sheepdog_unix = { .bdrv_co_create = sd_co_create, .bdrv_co_create_opts = sd_co_create_opts, .bdrv_has_zero_init = bdrv_has_zero_init_1, - .bdrv_has_zero_init_truncate = bdrv_has_zero_init_1, .bdrv_getlength = sd_getlength, .bdrv_get_allocated_file_size = sd_get_allocated_file_size, .bdrv_co_truncate = sd_co_truncate, diff --git a/block/ssh.c b/block/ssh.c index 9eb33df859..098dbe03c1 100644 --- a/block/ssh.c +++ b/block/ssh.c @@ -883,6 +883,10 @@ static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags, /* Go non-blocking. */ ssh_set_blocking(s->session, 0); + if (s->attrs->type == SSH_FILEXFER_TYPE_REGULAR) { + bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; + } + qapi_free_BlockdevOptionsSsh(opts); return 0; @@ -1393,7 +1397,6 @@ static BlockDriver bdrv_ssh = { .bdrv_co_create_opts = ssh_co_create_opts, .bdrv_close = ssh_close, .bdrv_has_zero_init = ssh_has_zero_init, - .bdrv_has_zero_init_truncate = ssh_has_zero_init, .bdrv_co_readv = ssh_co_readv, .bdrv_co_writev = ssh_co_writev, .bdrv_getlength = ssh_getlength, diff --git a/block/vhdx.c b/block/vhdx.c index e11fb7413a..53e756438a 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -1240,12 +1240,16 @@ exit: /* * Allocate a new payload block at the end of the file. * - * Allocation will happen at 1MB alignment inside the file + * Allocation will happen at 1MB alignment inside the file. + * + * If @need_zero is set on entry but not cleared on return, then truncation + * could not guarantee that the new portion reads as zero, and the caller + * will take care of it instead. * * Returns the file offset start of the new payload block */ static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s, - uint64_t *new_offset) + uint64_t *new_offset, bool *need_zero) { int64_t current_len; @@ -1262,6 +1266,17 @@ static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s, return -EINVAL; } + if (*need_zero) { + int ret; + + ret = bdrv_truncate(bs->file, *new_offset + s->block_size, false, + PREALLOC_MODE_OFF, BDRV_REQ_ZERO_WRITE, NULL); + if (ret != -ENOTSUP) { + *need_zero = false; + return ret; + } + } + return bdrv_truncate(bs->file, *new_offset + s->block_size, false, PREALLOC_MODE_OFF, 0, NULL); } @@ -1355,18 +1370,38 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num, /* in this case, we need to preserve zero writes for * data that is not part of this write, so we must pad * the rest of the buffer to zeroes */ - - /* if we are on a posix system with ftruncate() that extends - * a file, then it is zero-filled for us. On Win32, the raw - * layer uses SetFilePointer and SetFileEnd, which does not - * zero fill AFAIK */ - - /* Queue another write of zero buffers if the underlying file - * does not zero-fill on file extension */ - - if (bdrv_has_zero_init_truncate(bs->file->bs) == 0) { - use_zero_buffers = true; - + use_zero_buffers = true; + /* fall through */ + case PAYLOAD_BLOCK_NOT_PRESENT: /* fall through */ + case PAYLOAD_BLOCK_UNMAPPED: + case PAYLOAD_BLOCK_UNMAPPED_v095: + case PAYLOAD_BLOCK_UNDEFINED: + bat_prior_offset = sinfo.file_offset; + ret = vhdx_allocate_block(bs, s, &sinfo.file_offset, + &use_zero_buffers); + if (ret < 0) { + goto exit; + } + /* + * once we support differencing files, this may also be + * partially present + */ + /* update block state to the newly specified state */ + vhdx_update_bat_table_entry(bs, s, &sinfo, &bat_entry, + &bat_entry_offset, + PAYLOAD_BLOCK_FULLY_PRESENT); + bat_update = true; + /* + * Since we just allocated a block, file_offset is the + * beginning of the payload block. It needs to be the + * write address, which includes the offset into the + * block, unless the entire block needs to read as + * zeroes but truncation was not able to provide them, + * in which case we need to fill in the rest. + */ + if (!use_zero_buffers) { + sinfo.file_offset += sinfo.block_offset; + } else { /* zero fill the front, if any */ if (sinfo.block_offset) { iov1.iov_len = sinfo.block_offset; @@ -1378,7 +1413,7 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num, } /* our actual data */ - qemu_iovec_concat(&hd_qiov, qiov, bytes_done, + qemu_iovec_concat(&hd_qiov, qiov, bytes_done, sinfo.bytes_avail); /* zero fill the back, if any */ @@ -1393,29 +1428,7 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num, sectors_to_write += iov2.iov_len >> BDRV_SECTOR_BITS; } } - /* fall through */ - case PAYLOAD_BLOCK_NOT_PRESENT: /* fall through */ - case PAYLOAD_BLOCK_UNMAPPED: - case PAYLOAD_BLOCK_UNMAPPED_v095: - case PAYLOAD_BLOCK_UNDEFINED: - bat_prior_offset = sinfo.file_offset; - ret = vhdx_allocate_block(bs, s, &sinfo.file_offset); - if (ret < 0) { - goto exit; - } - /* once we support differencing files, this may also be - * partially present */ - /* update block state to the newly specified state */ - vhdx_update_bat_table_entry(bs, s, &sinfo, &bat_entry, - &bat_entry_offset, - PAYLOAD_BLOCK_FULLY_PRESENT); - bat_update = true; - /* since we just allocated a block, file_offset is the - * beginning of the payload block. It needs to be the - * write address, which includes the offset into the block */ - if (!use_zero_buffers) { - sinfo.file_offset += sinfo.block_offset; - } + /* fall through */ case PAYLOAD_BLOCK_FULLY_PRESENT: /* if the file offset address is in the header zone, diff --git a/block/vmdk.c b/block/vmdk.c index b02fdd14b2..b18f128816 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -180,7 +180,7 @@ typedef struct VmdkMetaData { unsigned int l1_index; unsigned int l2_index; unsigned int l2_offset; - int valid; + bool new_allocation; uint32_t *l2_cache_entry; } VmdkMetaData; @@ -1340,7 +1340,9 @@ static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp) * get_whole_cluster * * Copy backing file's cluster that covers @sector_num, otherwise write zero, - * to the cluster at @cluster_sector_num. + * to the cluster at @cluster_sector_num. If @zeroed is true, we're overwriting + * a zeroed cluster in the current layer and must not copy data from the + * backing file. * * If @skip_start_sector < @skip_end_sector, the relative range * [@skip_start_sector, @skip_end_sector) is not copied or written, and leave @@ -1351,18 +1353,21 @@ static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset, uint64_t offset, uint64_t skip_start_bytes, - uint64_t skip_end_bytes) + uint64_t skip_end_bytes, + bool zeroed) { int ret = VMDK_OK; int64_t cluster_bytes; uint8_t *whole_grain; + bool copy_from_backing; /* For COW, align request sector_num to cluster start */ cluster_bytes = extent->cluster_sectors << BDRV_SECTOR_BITS; offset = QEMU_ALIGN_DOWN(offset, cluster_bytes); whole_grain = qemu_blockalign(bs, cluster_bytes); + copy_from_backing = bs->backing && !zeroed; - if (!bs->backing) { + if (!copy_from_backing) { memset(whole_grain, 0, skip_start_bytes); memset(whole_grain + skip_end_bytes, 0, cluster_bytes - skip_end_bytes); } @@ -1377,7 +1382,7 @@ static int get_whole_cluster(BlockDriverState *bs, /* Read backing data before skip range */ if (skip_start_bytes > 0) { - if (bs->backing) { + if (copy_from_backing) { /* qcow2 emits this on bs->file instead of bs->backing */ BLKDBG_EVENT(extent->file, BLKDBG_COW_READ); ret = bdrv_pread(bs->backing, offset, whole_grain, @@ -1397,7 +1402,7 @@ static int get_whole_cluster(BlockDriverState *bs, } /* Read backing data after skip range */ if (skip_end_bytes < cluster_bytes) { - if (bs->backing) { + if (copy_from_backing) { /* qcow2 emits this on bs->file instead of bs->backing */ BLKDBG_EVENT(extent->file, BLKDBG_COW_READ); ret = bdrv_pread(bs->backing, offset + skip_end_bytes, @@ -1430,7 +1435,7 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data, offset = cpu_to_le32(offset); /* update L2 table */ BLKDBG_EVENT(extent->file, BLKDBG_L2_UPDATE); - if (bdrv_pwrite_sync(extent->file, + if (bdrv_pwrite(extent->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(offset)), &offset, sizeof(offset)) < 0) { @@ -1439,13 +1444,16 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data, /* update backup L2 table */ if (extent->l1_backup_table_offset != 0) { m_data->l2_offset = extent->l1_backup_table[m_data->l1_index]; - if (bdrv_pwrite_sync(extent->file, + if (bdrv_pwrite(extent->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(offset)), &offset, sizeof(offset)) < 0) { return VMDK_ERROR; } } + if (bdrv_flush(extent->file->bs) < 0) { + return VMDK_ERROR; + } if (m_data->l2_cache_entry) { *m_data->l2_cache_entry = offset; } @@ -1492,7 +1500,7 @@ static int get_cluster_offset(BlockDriverState *bs, unsigned int l2_size_bytes = extent->l2_size * extent->entry_size; if (m_data) { - m_data->valid = 0; + m_data->new_allocation = false; } if (extent->flat) { *cluster_offset = extent->flat_start_offset; @@ -1572,6 +1580,12 @@ static int get_cluster_offset(BlockDriverState *bs, extent->l2_cache_counts[min_index] = 1; found: l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size; + if (m_data) { + m_data->l1_index = l1_index; + m_data->l2_index = l2_index; + m_data->l2_offset = l2_offset; + m_data->l2_cache_entry = ((uint32_t *)l2_table) + l2_index; + } if (extent->sesparse) { cluster_sector = le64_to_cpu(((uint64_t *)l2_table)[l2_index]); @@ -1625,16 +1639,13 @@ static int get_cluster_offset(BlockDriverState *bs, * or inappropriate VM shutdown. */ ret = get_whole_cluster(bs, extent, cluster_sector * BDRV_SECTOR_SIZE, - offset, skip_start_bytes, skip_end_bytes); + offset, skip_start_bytes, skip_end_bytes, + zeroed); if (ret) { return ret; } if (m_data) { - m_data->valid = 1; - m_data->l1_index = l1_index; - m_data->l2_index = l2_index; - m_data->l2_offset = l2_offset; - m_data->l2_cache_entry = ((uint32_t *)l2_table) + l2_index; + m_data->new_allocation = true; } } *cluster_offset = cluster_sector << BDRV_SECTOR_BITS; @@ -1990,7 +2001,7 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset, error_report("Could not write to allocated cluster" " for streamOptimized"); return -EIO; - } else { + } else if (!zeroed) { /* allocate */ ret = get_cluster_offset(bs, extent, &m_data, offset, true, &cluster_offset, 0, 0); @@ -2005,7 +2016,7 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset, offset_in_cluster == 0 && n_bytes >= extent->cluster_sectors * BDRV_SECTOR_SIZE) { n_bytes = extent->cluster_sectors * BDRV_SECTOR_SIZE; - if (!zero_dry_run) { + if (!zero_dry_run && ret != VMDK_ZEROED) { /* update L2 tables */ if (vmdk_L2update(extent, &m_data, VMDK_GTE_ZEROED) != VMDK_OK) { @@ -2021,7 +2032,7 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset, if (ret) { return ret; } - if (m_data.valid) { + if (m_data.new_allocation) { /* update L2 tables */ if (vmdk_L2update(extent, &m_data, cluster_offset >> BDRV_SECTOR_BITS) |
