From fa166538743d4e28de7374c41332c3e448826f4b Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 13 Jun 2016 12:56:35 -0600 Subject: block: Assert that flags are in range Add a new BDRV_REQ_MASK constant, and use it to make sure that caller flags are always valid. Tested with 'make check' and with qemu-iotests on both '-raw' and '-qcow2'; the only failure turned up was fixed in the previous commit. Signed-off-by: Eric Blake Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- include/block/block.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/block/block.h b/include/block/block.h index 54cca28bac..8cabcddf6c 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -65,6 +65,9 @@ typedef enum { BDRV_REQ_MAY_UNMAP = 0x4, BDRV_REQ_NO_SERIALISING = 0x8, BDRV_REQ_FUA = 0x10, + + /* Mask of valid flags */ + BDRV_REQ_MASK = 0x1f, } BdrvRequestFlags; typedef struct BlockSizes { -- cgit v1.2.3-55-g7522 From 244483e64ee726cc89a1e05bed2be0ed37071403 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 2 Jun 2016 11:41:52 +0200 Subject: block: Byte-based bdrv_co_do_copy_on_readv() In a first step to convert the common I/O path to work on bytes rather than sectors, this converts the copy-on-read logic that is used by bdrv_aligned_preadv(). Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi --- block/io.c | 63 +++++++++++++++++++++++++++++++-------------------- block/mirror.c | 10 ++++---- include/block/block.h | 10 +++++--- trace-events | 2 +- 4 files changed, 52 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/block/io.c b/block/io.c index 5b2017fedc..b6a2c800a1 100644 --- a/block/io.c +++ b/block/io.c @@ -404,12 +404,12 @@ static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align) } /** - * Round a region to cluster boundaries + * Round a region to cluster boundaries (sector-based) */ -void bdrv_round_to_clusters(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, - int64_t *cluster_sector_num, - int *cluster_nb_sectors) +void bdrv_round_sectors_to_clusters(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + int64_t *cluster_sector_num, + int *cluster_nb_sectors) { BlockDriverInfo bdi; @@ -424,6 +424,26 @@ void bdrv_round_to_clusters(BlockDriverState *bs, } } +/** + * Round a region to cluster boundaries + */ +void bdrv_round_to_clusters(BlockDriverState *bs, + int64_t offset, unsigned int bytes, + int64_t *cluster_offset, + unsigned int *cluster_bytes) +{ + BlockDriverInfo bdi; + + if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) { + *cluster_offset = offset; + *cluster_bytes = bytes; + } else { + int64_t c = bdi.cluster_size; + *cluster_offset = QEMU_ALIGN_DOWN(offset, c); + *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c); + } +} + static int bdrv_get_cluster_size(BlockDriverState *bs) { BlockDriverInfo bdi; @@ -865,7 +885,7 @@ emulate_flags: } static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) + int64_t offset, unsigned int bytes, QEMUIOVector *qiov) { /* Perform I/O through a temporary buffer so that users who scribble over * their read buffer while the operation is in progress do not end up @@ -877,21 +897,20 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, BlockDriver *drv = bs->drv; struct iovec iov; QEMUIOVector bounce_qiov; - int64_t cluster_sector_num; - int cluster_nb_sectors; + int64_t cluster_offset; + unsigned int cluster_bytes; size_t skip_bytes; int ret; /* Cover entire cluster so no additional backing file I/O is required when * allocating cluster in the image file. */ - bdrv_round_to_clusters(bs, sector_num, nb_sectors, - &cluster_sector_num, &cluster_nb_sectors); + bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes); - trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, - cluster_sector_num, cluster_nb_sectors); + trace_bdrv_co_do_copy_on_readv(bs, offset, bytes, + cluster_offset, cluster_bytes); - iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE; + iov.iov_len = cluster_bytes; iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len); if (bounce_buffer == NULL) { ret = -ENOMEM; @@ -900,8 +919,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, qemu_iovec_init_external(&bounce_qiov, &iov, 1); - ret = bdrv_driver_preadv(bs, cluster_sector_num * BDRV_SECTOR_SIZE, - cluster_nb_sectors * BDRV_SECTOR_SIZE, + ret = bdrv_driver_preadv(bs, cluster_offset, cluster_bytes, &bounce_qiov, 0); if (ret < 0) { goto err; @@ -909,16 +927,12 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, if (drv->bdrv_co_pwrite_zeroes && buffer_is_zero(bounce_buffer, iov.iov_len)) { - ret = bdrv_co_do_pwrite_zeroes(bs, - cluster_sector_num * BDRV_SECTOR_SIZE, - cluster_nb_sectors * BDRV_SECTOR_SIZE, - 0); + ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, cluster_bytes, 0); } else { /* This does not change the data on the disk, it is not necessary * to flush even in cache=writethrough mode. */ - ret = bdrv_driver_pwritev(bs, cluster_sector_num * BDRV_SECTOR_SIZE, - cluster_nb_sectors * BDRV_SECTOR_SIZE, + ret = bdrv_driver_pwritev(bs, cluster_offset, cluster_bytes, &bounce_qiov, 0); } @@ -930,9 +944,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, goto err; } - skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE; - qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes, - nb_sectors * BDRV_SECTOR_SIZE); + skip_bytes = offset - cluster_offset; + qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes, bytes); err: qemu_vfree(bounce_buffer); @@ -982,7 +995,7 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, } if (!ret || pnum != nb_sectors) { - ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov); + ret = bdrv_co_do_copy_on_readv(bs, offset, bytes, qiov); goto out; } } diff --git a/block/mirror.c b/block/mirror.c index 1f01f2488c..41848b2c8e 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -185,8 +185,9 @@ static int mirror_cow_align(MirrorBlockJob *s, need_cow |= !test_bit((*sector_num + *nb_sectors - 1) / chunk_sectors, s->cow_bitmap); if (need_cow) { - bdrv_round_to_clusters(blk_bs(s->target), *sector_num, *nb_sectors, - &align_sector_num, &align_nb_sectors); + bdrv_round_sectors_to_clusters(blk_bs(s->target), *sector_num, + *nb_sectors, &align_sector_num, + &align_nb_sectors); } if (align_nb_sectors > max_sectors) { @@ -384,8 +385,9 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) } else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) { int64_t target_sector_num; int target_nb_sectors; - bdrv_round_to_clusters(blk_bs(s->target), sector_num, io_sectors, - &target_sector_num, &target_nb_sectors); + bdrv_round_sectors_to_clusters(blk_bs(s->target), sector_num, + io_sectors, &target_sector_num, + &target_nb_sectors); if (target_sector_num == sector_num && target_nb_sectors == io_sectors) { mirror_method = ret & BDRV_BLOCK_ZERO ? diff --git a/include/block/block.h b/include/block/block.h index 8cabcddf6c..9c3a62cc0a 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -404,10 +404,14 @@ int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors); int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi); ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs); +void bdrv_round_sectors_to_clusters(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + int64_t *cluster_sector_num, + int *cluster_nb_sectors); void bdrv_round_to_clusters(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, - int64_t *cluster_sector_num, - int *cluster_nb_sectors); + int64_t offset, unsigned int bytes, + int64_t *cluster_offset, + unsigned int *cluster_bytes); const char *bdrv_get_encrypted_filename(BlockDriverState *bs); void bdrv_get_backing_filename(BlockDriverState *bs, diff --git a/trace-events b/trace-events index 720c644873..104b64fae1 100644 --- a/trace-events +++ b/trace-events @@ -73,7 +73,7 @@ bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d" bdrv_co_writev(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d" bdrv_co_pwrite_zeroes(void *bs, int64_t offset, int count, int flags) "bs %p offset %"PRId64" count %d flags %#x" -bdrv_co_do_copy_on_readv(void *bs, int64_t sector_num, int nb_sectors, int64_t cluster_sector_num, int cluster_nb_sectors) "bs %p sector_num %"PRId64" nb_sectors %d cluster_sector_num %"PRId64" cluster_nb_sectors %d" +bdrv_co_do_copy_on_readv(void *bs, int64_t offset, unsigned int bytes, int64_t cluster_offset, unsigned int cluster_bytes) "bs %p offset %"PRId64" bytes %u cluster_offset %"PRId64" cluster_bytes %u" # block/stream.c stream_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d" -- cgit v1.2.3-55-g7522 From f1e8474115d6be7eda14092050ffa2b031afb729 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 9 Jun 2016 16:36:00 +0200 Subject: block: Introduce bdrv_preadv() We already have a byte-based bdrv_pwritev(), but the read counterpart was still missing. This commit adds it. Signed-off-by: Kevin Wolf Reviewed-by: Fam Zheng Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi --- block/io.c | 20 +++++++++++++------- include/block/block.h | 1 + 2 files changed, 14 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/block/io.c b/block/io.c index b3ff9bec38..72d72109a5 100644 --- a/block/io.c +++ b/block/io.c @@ -700,6 +700,18 @@ int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags) } } +int bdrv_preadv(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov) +{ + int ret; + + ret = bdrv_prwv_co(bs, offset, qiov, false, 0); + if (ret < 0) { + return ret; + } + + return qiov->size; +} + int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes) { QEMUIOVector qiov; @@ -707,19 +719,13 @@ int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes) .iov_base = (void *)buf, .iov_len = bytes, }; - int ret; if (bytes < 0) { return -EINVAL; } qemu_iovec_init_external(&qiov, &iov, 1); - ret = bdrv_prwv_co(bs, offset, &qiov, false, 0); - if (ret < 0) { - return ret; - } - - return bytes; + return bdrv_preadv(bs, offset, &qiov); } int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov) diff --git a/include/block/block.h b/include/block/block.h index 9c3a62cc0a..9c63d07765 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -235,6 +235,7 @@ int bdrv_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags); int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int count); +int bdrv_preadv(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov); int bdrv_pwrite(BlockDriverState *bs, int64_t offset, const void *buf, int count); int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov); -- cgit v1.2.3-55-g7522 From 5ddda0b8f0c07c8082c87d248c8eb23f43fd44a1 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 9 Jun 2016 16:50:16 +0200 Subject: block: Make .bdrv_load_vmstate() vectored This brings it in line with .bdrv_save_vmstate(). Signed-off-by: Kevin Wolf Reviewed-by: Fam Zheng Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi --- block/io.c | 25 ++++++++++++++++++++----- block/qcow2.c | 6 +++--- block/sheepdog.c | 13 ++++++++++--- include/block/block.h | 1 + include/block/block_int.h | 4 ++-- 5 files changed, 36 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/block/io.c b/block/io.c index 72d72109a5..aac9b67d94 100644 --- a/block/io.c +++ b/block/io.c @@ -1870,14 +1870,29 @@ int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, int64_t pos, int size) +{ + QEMUIOVector qiov; + struct iovec iov = { + .iov_base = buf, + .iov_len = size, + }; + + qemu_iovec_init_external(&qiov, &iov, 1); + return bdrv_readv_vmstate(bs, &qiov, pos); +} + +int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) { BlockDriver *drv = bs->drv; - if (!drv) + + if (!drv) { return -ENOMEDIUM; - if (drv->bdrv_load_vmstate) - return drv->bdrv_load_vmstate(bs, buf, pos, size); - if (bs->file) - return bdrv_load_vmstate(bs->file->bs, buf, pos, size); + } else if (drv->bdrv_load_vmstate) { + return drv->bdrv_load_vmstate(bs, qiov, pos); + } else if (bs->file) { + return bdrv_readv_vmstate(bs->file->bs, qiov, pos); + } + return -ENOTSUP; } diff --git a/block/qcow2.c b/block/qcow2.c index 9de716a524..362ada24f9 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2926,8 +2926,8 @@ static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, return ret; } -static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf, - int64_t pos, int size) +static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, + int64_t pos) { BDRVQcow2State *s = bs->opaque; bool zero_beyond_eof = bs->zero_beyond_eof; @@ -2935,7 +2935,7 @@ static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf, BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD); bs->zero_beyond_eof = false; - ret = bdrv_pread(bs, qcow2_vm_state_offset(s) + pos, buf, size); + ret = bdrv_preadv(bs, qcow2_vm_state_offset(s) + pos, qiov); bs->zero_beyond_eof = zero_beyond_eof; return ret; diff --git a/block/sheepdog.c b/block/sheepdog.c index 23fbace1f9..ef5d044ab9 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -2784,12 +2784,19 @@ static int sd_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, return ret; } -static int sd_load_vmstate(BlockDriverState *bs, uint8_t *data, - int64_t pos, int size) +static int sd_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, + int64_t pos) { BDRVSheepdogState *s = bs->opaque; + void *buf; + int ret; - return do_load_save_vmstate(s, data, pos, size, 1); + buf = qemu_blockalign(bs, qiov->size); + ret = do_load_save_vmstate(s, buf, pos, qiov->size, 1); + qemu_iovec_from_buf(qiov, 0, buf, qiov->size); + qemu_vfree(buf); + + return ret; } diff --git a/include/block/block.h b/include/block/block.h index 9c63d07765..733a8ec2ec 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -431,6 +431,7 @@ void path_combine(char *dest, int dest_size, const char *base_path, const char *filename); +int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, int64_t pos, int size); diff --git a/include/block/block_int.h b/include/block/block_int.h index 8a4963c4fe..f9a32cc5c5 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -226,8 +226,8 @@ struct BlockDriver { int (*bdrv_save_vmstate)(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); - int (*bdrv_load_vmstate)(BlockDriverState *bs, uint8_t *buf, - int64_t pos, int size); + int (*bdrv_load_vmstate)(BlockDriverState *bs, QEMUIOVector *qiov, + int64_t pos); int (*bdrv_change_backing_file)(BlockDriverState *bs, const char *backing_file, const char *backing_fmt); -- cgit v1.2.3-55-g7522 From 1a8ae8221799901dc399a174b52a970d8e6f976a Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 9 Jun 2016 16:24:44 +0200 Subject: block: Make bdrv_load/save_vmstate coroutine_fns This allows drivers to share code between normal I/O and vmstate accesses. Signed-off-by: Kevin Wolf Reviewed-by: Fam Zheng Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi --- block/io.c | 80 ++++++++++++++++++++++++++++++++++------------- include/block/block_int.h | 10 +++--- 2 files changed, 64 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/block/io.c b/block/io.c index af4d43ebef..e1e8948be9 100644 --- a/block/io.c +++ b/block/io.c @@ -1840,6 +1840,62 @@ int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num, return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors); } +typedef struct BdrvVmstateCo { + BlockDriverState *bs; + QEMUIOVector *qiov; + int64_t pos; + bool is_read; + int ret; +} BdrvVmstateCo; + +static int coroutine_fn +bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos, + bool is_read) +{ + BlockDriver *drv = bs->drv; + + if (!drv) { + return -ENOMEDIUM; + } else if (drv->bdrv_load_vmstate) { + return is_read ? drv->bdrv_load_vmstate(bs, qiov, pos) + : drv->bdrv_save_vmstate(bs, qiov, pos); + } else if (bs->file) { + return bdrv_co_rw_vmstate(bs->file->bs, qiov, pos, is_read); + } + + return -ENOTSUP; +} + +static void coroutine_fn bdrv_co_rw_vmstate_entry(void *opaque) +{ + BdrvVmstateCo *co = opaque; + co->ret = bdrv_co_rw_vmstate(co->bs, co->qiov, co->pos, co->is_read); +} + +static inline int +bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos, + bool is_read) +{ + if (qemu_in_coroutine()) { + return bdrv_co_rw_vmstate(bs, qiov, pos, is_read); + } else { + BdrvVmstateCo data = { + .bs = bs, + .qiov = qiov, + .pos = pos, + .is_read = is_read, + .ret = -EINPROGRESS, + }; + Coroutine *co = qemu_coroutine_create(bdrv_co_rw_vmstate_entry); + + qemu_coroutine_enter(co, &data); + while (data.ret == -EINPROGRESS) { + aio_poll(bdrv_get_aio_context(bs), true); + } + return data.ret; + } +} + int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, int64_t pos, int size) { @@ -1862,17 +1918,7 @@ int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) { - BlockDriver *drv = bs->drv; - - if (!drv) { - return -ENOMEDIUM; - } else if (drv->bdrv_save_vmstate) { - return drv->bdrv_save_vmstate(bs, qiov, pos); - } else if (bs->file) { - return bdrv_writev_vmstate(bs->file->bs, qiov, pos); - } - - return -ENOTSUP; + return bdrv_rw_vmstate(bs, qiov, pos, false); } int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, @@ -1896,17 +1942,7 @@ int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) { - BlockDriver *drv = bs->drv; - - if (!drv) { - return -ENOMEDIUM; - } else if (drv->bdrv_load_vmstate) { - return drv->bdrv_load_vmstate(bs, qiov, pos); - } else if (bs->file) { - return bdrv_readv_vmstate(bs->file->bs, qiov, pos); - } - - return -ENOTSUP; + return bdrv_rw_vmstate(bs, qiov, pos, true); } /**************************************************************/ diff --git a/include/block/block_int.h b/include/block/block_int.h index f9a32cc5c5..1fe0811a65 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -224,10 +224,12 @@ struct BlockDriver { int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi); ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs); - int (*bdrv_save_vmstate)(BlockDriverState *bs, QEMUIOVector *qiov, - int64_t pos); - int (*bdrv_load_vmstate)(BlockDriverState *bs, QEMUIOVector *qiov, - int64_t pos); + int coroutine_fn (*bdrv_save_vmstate)(BlockDriverState *bs, + QEMUIOVector *qiov, + int64_t pos); + int coroutine_fn (*bdrv_load_vmstate)(BlockDriverState *bs, + QEMUIOVector *qiov, + int64_t pos); int (*bdrv_change_backing_file)(BlockDriverState *bs, const char *backing_file, const char *backing_fmt); -- cgit v1.2.3-55-g7522 From c9d20029f43a08c6362a655c2c5272612186a004 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 1 Jun 2016 17:13:47 +0200 Subject: block: Remove bs->zero_beyond_eof It is always true for open images now. Signed-off-by: Kevin Wolf Reviewed-by: Fam Zheng Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi --- block.c | 2 -- block/io.c | 52 +++++++++++++++++++++-------------------------- include/block/block_int.h | 3 --- 3 files changed, 23 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/block.c b/block.c index 3d850a2999..b350794c3e 100644 --- a/block.c +++ b/block.c @@ -938,7 +938,6 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file, } bs->request_alignment = drv->bdrv_co_preadv ? 1 : 512; - bs->zero_beyond_eof = true; bs->read_only = !(bs->open_flags & BDRV_O_RDWR); if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) { @@ -2192,7 +2191,6 @@ static void bdrv_close(BlockDriverState *bs) bs->encrypted = 0; bs->valid_key = 0; bs->sg = 0; - bs->zero_beyond_eof = false; QDECREF(bs->options); QDECREF(bs->explicit_options); bs->options = NULL; diff --git a/block/io.c b/block/io.c index e1e8948be9..f5ce5f171e 100644 --- a/block/io.c +++ b/block/io.c @@ -967,6 +967,7 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, int64_t align, QEMUIOVector *qiov, int flags) { + int64_t total_bytes, max_bytes; int ret; assert(is_power_of_2(align)); @@ -1008,40 +1009,33 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, } /* Forward the request to the BlockDriver */ - if (!bs->zero_beyond_eof) { - ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0); - } else { - /* Read zeros after EOF */ - int64_t total_bytes, max_bytes; - - total_bytes = bdrv_getlength(bs); - if (total_bytes < 0) { - ret = total_bytes; - goto out; - } + total_bytes = bdrv_getlength(bs); + if (total_bytes < 0) { + ret = total_bytes; + goto out; + } - max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align); - if (bytes < max_bytes) { - ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0); - } else if (max_bytes > 0) { - QEMUIOVector local_qiov; + max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align); + if (bytes < max_bytes) { + ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0); + } else if (max_bytes > 0) { + QEMUIOVector local_qiov; - qemu_iovec_init(&local_qiov, qiov->niov); - qemu_iovec_concat(&local_qiov, qiov, 0, max_bytes); + qemu_iovec_init(&local_qiov, qiov->niov); + qemu_iovec_concat(&local_qiov, qiov, 0, max_bytes); - ret = bdrv_driver_preadv(bs, offset, max_bytes, &local_qiov, 0); + ret = bdrv_driver_preadv(bs, offset, max_bytes, &local_qiov, 0); - qemu_iovec_destroy(&local_qiov); - } else { - ret = 0; - } + qemu_iovec_destroy(&local_qiov); + } else { + ret = 0; + } - /* Reading beyond end of file is supposed to produce zeroes */ - if (ret == 0 && total_bytes < offset + bytes) { - uint64_t zero_offset = MAX(0, total_bytes - offset); - uint64_t zero_bytes = offset + bytes - zero_offset; - qemu_iovec_memset(qiov, zero_offset, 0, zero_bytes); - } + /* Reading beyond end of file is supposed to produce zeroes */ + if (ret == 0 && total_bytes < offset + bytes) { + uint64_t zero_offset = MAX(0, total_bytes - offset); + uint64_t zero_bytes = offset + bytes - zero_offset; + qemu_iovec_memset(qiov, zero_offset, 0, zero_bytes); } out: diff --git a/include/block/block_int.h b/include/block/block_int.h index 1fe0811a65..16c43e224e 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -451,9 +451,6 @@ struct BlockDriverState { /* I/O Limits */ BlockLimits bl; - /* Whether produces zeros when read beyond eof */ - bool zero_beyond_eof; - /* Alignment requirement for offset/length of I/O requests */ unsigned int request_alignment; /* Flags honored during pwrite (so far: BDRV_REQ_FUA) */ -- cgit v1.2.3-55-g7522 From 274fccee2bf63702b34e3923b1e50a49147a7918 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 10 Jun 2016 20:57:47 +0200 Subject: block/mirror: Fix target backing BDS Currently, we are trying to move the backing BDS from the source to the target in bdrv_replace_in_backing_chain() which is called from mirror_exit(). However, mirror_complete() already tries to open the target's backing chain with a call to bdrv_open_backing_file(). First, we should only set the target's backing BDS once. Second, the mirroring block job has a better idea of what to set it to than the generic code in bdrv_replace_in_backing_chain() (in fact, the latter's conditions on when to move the backing BDS from source to target are not really correct). Therefore, remove that code from bdrv_replace_in_backing_chain() and leave it to mirror_complete(). Depending on what kind of mirroring is performed, we furthermore want to use different strategies to open the target's backing chain: - If blockdev-mirror is used, we can assume the user made sure that the target already has the correct backing chain. In particular, we should not try to open a backing file if the target does not have any yet. - If drive-mirror with mode=absolute-paths is used, we can and should reuse the already existing chain of nodes that the source BDS is in. In case of sync=full, no backing BDS is required; with sync=top, we just link the source's backing BDS to the target, and with sync=none, we use the source BDS as the target's backing BDS. We should not try to open these backing files anew because this would lead to two BDSs existing per physical file in the backing chain, and we would like to avoid such concurrent access. - If drive-mirror with mode=existing is used, we have to use the information provided in the physical image file which means opening the target's backing chain completely anew, just as it has been done already. If the target's backing chain shares images with the source, this may lead to multiple BDSs per physical image file. But since we cannot reliably ascertain this case, there is nothing we can do about it. Signed-off-by: Max Reitz Message-id: 20160610185750.30956-3-mreitz@redhat.com Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Max Reitz --- block.c | 8 -------- block/mirror.c | 39 ++++++++++++++++++++++++++++----------- blockdev.c | 15 ++++++++++++--- include/block/block_int.h | 18 +++++++++++++++++- 4 files changed, 57 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/block.c b/block.c index d090324c68..b331eb9d38 100644 --- a/block.c +++ b/block.c @@ -2291,14 +2291,6 @@ void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new) change_parent_backing_link(old, new); - /* Change backing files if a previously independent node is added to the - * chain. For active commit, we replace top by its own (indirect) backing - * file and don't do anything here so we don't build a loop. */ - if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) { - bdrv_set_backing_hd(new, backing_bs(old)); - bdrv_set_backing_hd(old, NULL); - } - bdrv_unref(old); } diff --git a/block/mirror.c b/block/mirror.c index 41848b2c8e..075384a9cf 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -44,6 +44,7 @@ typedef struct MirrorBlockJob { /* Used to block operations on the drive-mirror-replace target */ Error *replace_blocker; bool is_none_mode; + BlockMirrorBackingMode backing_mode; BlockdevOnError on_source_error, on_target_error; bool synced; bool should_complete; @@ -742,20 +743,26 @@ static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp) static void mirror_complete(BlockJob *job, Error **errp) { MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); - Error *local_err = NULL; - int ret; + BlockDriverState *src, *target; + + src = blk_bs(job->blk); + target = blk_bs(s->target); - ret = bdrv_open_backing_file(blk_bs(s->target), NULL, "backing", - &local_err); - if (ret < 0) { - error_propagate(errp, local_err); - return; - } if (!s->synced) { error_setg(errp, QERR_BLOCK_JOB_NOT_READY, job->id); return; } + if (s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) { + int ret; + + assert(!target->backing); + ret = bdrv_open_backing_file(target, NULL, "backing", errp); + if (ret < 0) { + return; + } + } + /* check the target bs is not blocked and block all operations on it */ if (s->replaces) { AioContext *replace_aio_context; @@ -777,6 +784,13 @@ static void mirror_complete(BlockJob *job, Error **errp) aio_context_release(replace_aio_context); } + if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { + BlockDriverState *backing = s->is_none_mode ? src : s->base; + if (backing_bs(target) != backing) { + bdrv_set_backing_hd(target, backing); + } + } + s->should_complete = true; block_job_enter(&s->common); } @@ -799,6 +813,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target, const char *replaces, int64_t speed, uint32_t granularity, int64_t buf_size, + BlockMirrorBackingMode backing_mode, BlockdevOnError on_source_error, BlockdevOnError on_target_error, bool unmap, @@ -836,6 +851,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target, s->on_source_error = on_source_error; s->on_target_error = on_target_error; s->is_none_mode = is_none_mode; + s->backing_mode = backing_mode; s->base = base; s->granularity = granularity; s->buf_size = ROUND_UP(buf_size, granularity); @@ -859,7 +875,8 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target, void mirror_start(BlockDriverState *bs, BlockDriverState *target, const char *replaces, int64_t speed, uint32_t granularity, int64_t buf_size, - MirrorSyncMode mode, BlockdevOnError on_source_error, + MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, + BlockdevOnError on_source_error, BlockdevOnError on_target_error, bool unmap, BlockCompletionFunc *cb, @@ -875,7 +892,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, is_none_mode = mode == MIRROR_SYNC_MODE_NONE; base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL; mirror_start_job(bs, target, replaces, - speed, granularity, buf_size, + speed, granularity, buf_size, backing_mode, on_source_error, on_target_error, unmap, cb, opaque, errp, &mirror_job_driver, is_none_mode, base); } @@ -922,7 +939,7 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base, } } - mirror_start_job(bs, base, NULL, speed, 0, 0, + mirror_start_job(bs, base, NULL, speed, 0, 0, MIRROR_LEAVE_BACKING_CHAIN, on_error, on_error, false, cb, opaque, &local_err, &commit_active_job_driver, false, base); if (local_err) { diff --git a/blockdev.c b/blockdev.c index 1d498c7b19..c9a0068cd6 100644 --- a/blockdev.c +++ b/blockdev.c @@ -3426,6 +3426,7 @@ static void blockdev_mirror_common(BlockDriverState *bs, BlockDriverState *target, bool has_replaces, const char *replaces, enum MirrorSyncMode sync, + BlockMirrorBackingMode backing_mode, bool has_speed, int64_t speed, bool has_granularity, uint32_t granularity, bool has_buf_size, int64_t buf_size, @@ -3483,7 +3484,7 @@ static void blockdev_mirror_common(BlockDriverState *bs, */ mirror_start(bs, target, has_replaces ? replaces : NULL, - speed, granularity, buf_size, sync, + speed, granularity, buf_size, sync, backing_mode, on_source_error, on_target_error, unmap, block_job_cb, bs, errp); } @@ -3506,6 +3507,7 @@ void qmp_drive_mirror(const char *device, const char *target, BlockBackend *blk; BlockDriverState *source, *target_bs; AioContext *aio_context; + BlockMirrorBackingMode backing_mode; Error *local_err = NULL; QDict *options = NULL; int flags; @@ -3579,6 +3581,12 @@ void qmp_drive_mirror(const char *device, const char *target, } } + if (mode == NEW_IMAGE_MODE_ABSOLUTE_PATHS) { + backing_mode = MIRROR_SOURCE_BACKING_CHAIN; + } else { + backing_mode = MIRROR_OPEN_BACKING_CHAIN; + } + if ((sync == MIRROR_SYNC_MODE_FULL || !source) && mode != NEW_IMAGE_MODE_EXISTING) { @@ -3627,7 +3635,7 @@ void qmp_drive_mirror(const char *device, const char *target, bdrv_set_aio_context(target_bs, aio_context); blockdev_mirror_common(bs, target_bs, - has_replaces, replaces, sync, + has_replaces, replaces, sync, backing_mode, has_speed, speed, has_granularity, granularity, has_buf_size, buf_size, @@ -3659,6 +3667,7 @@ void qmp_blockdev_mirror(const char *device, const char *target, BlockBackend *blk; BlockDriverState *target_bs; AioContext *aio_context; + BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN; Error *local_err = NULL; blk = blk_by_name(device); @@ -3684,7 +3693,7 @@ void qmp_blockdev_mirror(const char *device, const char *target, bdrv_set_aio_context(target_bs, aio_context); blockdev_mirror_common(bs, target_bs, - has_replaces, replaces, sync, + has_replaces, replaces, sync, backing_mode, has_speed, speed, has_granularity, granularity, has_buf_size, buf_size, diff --git a/include/block/block_int.h b/include/block/block_int.h index 16c43e224e..688c6be009 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -509,6 +509,20 @@ struct BlockBackendRootState { BlockdevDetectZeroesOptions detect_zeroes; }; +typedef enum BlockMirrorBackingMode { + /* Reuse the existing backing chain from the source for the target. + * - sync=full: Set backing BDS to NULL. + * - sync=top: Use source's backing BDS. + * - sync=none: Use source as the backing BDS. */ + MIRROR_SOURCE_BACKING_CHAIN, + + /* Open the target's backing chain completely anew */ + MIRROR_OPEN_BACKING_CHAIN, + + /* Do not change the target's backing BDS after job completion */ + MIRROR_LEAVE_BACKING_CHAIN, +} BlockMirrorBackingMode; + static inline BlockDriverState *backing_bs(BlockDriverState *bs) { return bs->backing ? bs->backing->bs : NULL; @@ -671,6 +685,7 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base, * @granularity: The chosen granularity for the dirty bitmap. * @buf_size: The amount of data that can be in flight at one time. * @mode: Whether to collapse all images in the chain to the target. + * @backing_mode: How to establish the target's backing chain after completion. * @on_source_error: The action to take upon error reading from the source. * @on_target_error: The action to take upon error writing to the target. * @unmap: Whether to unmap target where source sectors only contain zeroes. @@ -686,7 +701,8 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base, void mirror_start(BlockDriverState *bs, BlockDriverState *target, const char *replaces, int64_t speed, uint32_t granularity, int64_t buf_size, - MirrorSyncMode mode, BlockdevOnError on_source_error, + MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, + BlockdevOnError on_source_error, BlockdevOnError on_target_error, bool unmap, BlockCompletionFunc *cb, -- cgit v1.2.3-55-g7522