summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStefan Hajnoczi2011-11-30 13:23:41 +0100
committerKevin Wolf2011-12-05 14:49:47 +0100
commitaef4acb6616ab7fb5c105660aa8a2cee4e250e75 (patch)
tree53fe5915dd55fd1e4197f462c153b6c551417af4
parentqcow2: Unlock during COW (diff)
downloadqemu-aef4acb6616ab7fb5c105660aa8a2cee4e250e75.tar.gz
qemu-aef4acb6616ab7fb5c105660aa8a2cee4e250e75.tar.xz
qemu-aef4acb6616ab7fb5c105660aa8a2cee4e250e75.zip
qcow2: avoid reentrant bdrv_read() in copy_sectors()
A BlockDriverState should not issue requests on itself through the public block layer interface. Nested, or reentrant, requests are problematic because they do I/O throttling and request tracking twice. Features like block layer copy-on-read use request tracking to avoid race conditions between concurrent requests. The reentrant request will have to "wait" for its parent request to complete. But the parent is waiting for the reentrant request to make progress so we have reached deadlock. The solution is for block drivers to avoid the public block layer interfaces for reentrant requests. Instead they should call their own internal functions if they wish to perform reentrant requests. This is also a good opportunity to make copy_sectors() a true coroutine_fn. That means calling bdrv_co_writev() instead of bdrv_write(). Behavior is unchanged but we're being explicit that this executes in coroutine context. Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
-rw-r--r--block/qcow2-cluster.c27
1 files changed, 19 insertions, 8 deletions
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 0e337075b3..07a2e936fd 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -289,12 +289,15 @@ void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
}
}
-static int copy_sectors(BlockDriverState *bs, uint64_t start_sect,
- uint64_t cluster_offset, int n_start, int n_end)
+static int coroutine_fn copy_sectors(BlockDriverState *bs,
+ uint64_t start_sect,
+ uint64_t cluster_offset,
+ int n_start, int n_end)
{
BDRVQcowState *s = bs->opaque;
+ QEMUIOVector qiov;
+ struct iovec iov;
int n, ret;
- void *buf;
/*
* If this is the last cluster and it is only partially used, we must only
@@ -310,29 +313,37 @@ static int copy_sectors(BlockDriverState *bs, uint64_t start_sect,
return 0;
}
- buf = qemu_blockalign(bs, n * BDRV_SECTOR_SIZE);
+ iov.iov_len = n * BDRV_SECTOR_SIZE;
+ iov.iov_base = qemu_blockalign(bs, iov.iov_len);
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
BLKDBG_EVENT(bs->file, BLKDBG_COW_READ);
- ret = bdrv_read(bs, start_sect + n_start, buf, n);
+
+ /* Call .bdrv_co_readv() directly instead of using the public block-layer
+ * interface. This avoids double I/O throttling and request tracking,
+ * which can lead to deadlock when block layer copy-on-read is enabled.
+ */
+ ret = bs->drv->bdrv_co_readv(bs, start_sect + n_start, n, &qiov);
if (ret < 0) {
goto out;
}
if (s->crypt_method) {
qcow2_encrypt_sectors(s, start_sect + n_start,
- buf, buf, n, 1,
+ iov.iov_base, iov.iov_base, n, 1,
&s->aes_encrypt_key);
}
BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
- ret = bdrv_write(bs->file, (cluster_offset >> 9) + n_start, buf, n);
+ ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + n_start, n, &qiov);
if (ret < 0) {
goto out;
}
ret = 0;
out:
- qemu_vfree(buf);
+ qemu_vfree(iov.iov_base);
return ret;
}