From 298a1665a2800f7264e483c2dd1f551574243a2f Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Wed, 11 Oct 2017 22:46:57 -0500 Subject: block: Allow NULL file for bdrv_get_block_status() Not all callers care about which BDS owns the mapping for a given range of the file. This patch merely simplifies the callers by consolidating the logic in the common call point, while guaranteeing a non-NULL file to all the driver callbacks, for no semantic change. The only caller that does not care about pnum is bdrv_is_allocated, as invoked by vvfat; we can likewise add assertions that the rest of the stack does not have to worry about a NULL pnum. Furthermore, this will also set the stage for a future cleanup: when a caller does not care about which BDS owns an offset, it would be nice to allow the driver to optimize things to not have to return BDRV_BLOCK_OFFSET_VALID in the first place. In the case of fragmented allocation (for example, it's fairly easy to create a qcow2 image where consecutive guest addresses are not at consecutive host addresses), the current contract requires bdrv_get_block_status() to clamp *pnum to the limit where host addresses are no longer consecutive, but allowing a NULL file means that *pnum could be set to the full length of known-allocated data. Signed-off-by: Eric Blake Signed-off-by: Kevin Wolf --- include/block/block_int.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/block/block_int.h b/include/block/block_int.h index 885c08e989..246eee2e82 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -202,10 +202,12 @@ struct BlockDriver { int64_t offset, int bytes); /* - * Building block for bdrv_block_status[_above]. The driver should - * answer only according to the current layer, and should not - * set BDRV_BLOCK_ALLOCATED, but may set BDRV_BLOCK_RAW. See block.h - * for the meaning of _DATA, _ZERO, and _OFFSET_VALID. + * Building block for bdrv_block_status[_above] and + * bdrv_is_allocated[_above]. The driver should answer only + * according to the current layer, and should not set + * BDRV_BLOCK_ALLOCATED, but may set BDRV_BLOCK_RAW. See block.h + * for the meaning of _DATA, _ZERO, and _OFFSET_VALID. The block + * layer guarantees non-NULL pnum and file. */ int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum, -- cgit v1.2.3-55-g7522 From 7cfd527525a7d6b1c904890a6b84c1227846415e Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Wed, 11 Oct 2017 22:46:59 -0500 Subject: block: Make bdrv_round_to_clusters() signature more useful In the process of converting sector-based interfaces to bytes, I'm finding it easier to represent a byte count as a 64-bit integer at the block layer (even if we are internally capped by SIZE_MAX or even INT_MAX for individual transactions, it's still nicer to not have to worry about truncation/overflow issues on as many variables). Update the signature of bdrv_round_to_clusters() to uniformly use int64_t, matching the signature already chosen for bdrv_is_allocated and the fact that off_t is also a signed type, then adjust clients according to the required fallout (even where the result could now exceed 32 bits, no client is directly assigning the result into a 32-bit value without breaking things into a loop first). Signed-off-by: Eric Blake Signed-off-by: Kevin Wolf --- block/io.c | 6 +++--- block/mirror.c | 7 +++---- block/trace-events | 2 +- include/block/block.h | 4 ++-- 4 files changed, 9 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/block/io.c b/block/io.c index 8ba408c813..db8dc59c94 100644 --- a/block/io.c +++ b/block/io.c @@ -469,9 +469,9 @@ static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align) * Round a region to cluster boundaries */ void bdrv_round_to_clusters(BlockDriverState *bs, - int64_t offset, unsigned int bytes, + int64_t offset, int64_t bytes, int64_t *cluster_offset, - unsigned int *cluster_bytes) + int64_t *cluster_bytes) { BlockDriverInfo bdi; @@ -969,7 +969,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, struct iovec iov; QEMUIOVector local_qiov; int64_t cluster_offset; - unsigned int cluster_bytes; + int64_t cluster_bytes; size_t skip_bytes; int ret; int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, diff --git a/block/mirror.c b/block/mirror.c index e76e754d26..d11706c566 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -190,10 +190,9 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset, bool need_cow; int ret = 0; int64_t align_offset = *offset; - unsigned int align_bytes = *bytes; + int64_t align_bytes = *bytes; int max_bytes = s->granularity * s->max_iov; - assert(*bytes < INT_MAX); need_cow = !test_bit(*offset / s->granularity, s->cow_bitmap); need_cow |= !test_bit((*offset + *bytes - 1) / s->granularity, s->cow_bitmap); @@ -388,7 +387,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) while (nb_chunks > 0 && offset < s->bdev_length) { int64_t ret; int io_sectors; - unsigned int io_bytes; + int64_t io_bytes; int64_t io_bytes_acct; enum MirrorMethod { MIRROR_METHOD_COPY, @@ -413,7 +412,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) io_bytes = s->granularity; } else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) { int64_t target_offset; - unsigned int target_bytes; + int64_t target_bytes; bdrv_round_to_clusters(blk_bs(s->target), offset, io_bytes, &target_offset, &target_bytes); if (target_offset == offset && diff --git a/block/trace-events b/block/trace-events index 25dd5a3026..11c8d5f590 100644 --- a/block/trace-events +++ b/block/trace-events @@ -12,7 +12,7 @@ blk_co_pwritev(void *blk, void *bs, int64_t offset, unsigned int bytes, int flag bdrv_co_preadv(void *bs, int64_t offset, int64_t nbytes, unsigned int flags) "bs %p offset %"PRId64" nbytes %"PRId64" flags 0x%x" bdrv_co_pwritev(void *bs, int64_t offset, int64_t nbytes, unsigned int flags) "bs %p offset %"PRId64" nbytes %"PRId64" flags 0x%x" bdrv_co_pwrite_zeroes(void *bs, int64_t offset, int count, int flags) "bs %p offset %"PRId64" count %d flags 0x%x" -bdrv_co_do_copy_on_readv(void *bs, int64_t offset, unsigned int bytes, int64_t cluster_offset, unsigned int cluster_bytes) "bs %p offset %"PRId64" bytes %u cluster_offset %"PRId64" cluster_bytes %u" +bdrv_co_do_copy_on_readv(void *bs, int64_t offset, unsigned int bytes, int64_t cluster_offset, int64_t cluster_bytes) "bs %p offset %"PRId64" bytes %u cluster_offset %"PRId64" cluster_bytes %"PRId64 # block/stream.c stream_one_iteration(void *s, int64_t offset, uint64_t bytes, int is_allocated) "s %p offset %" PRId64 " bytes %" PRIu64 " is_allocated %d" diff --git a/include/block/block.h b/include/block/block.h index d5c2731a03..440f3e9e39 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -474,9 +474,9 @@ int bdrv_get_flags(BlockDriverState *bs); int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi); ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs); void bdrv_round_to_clusters(BlockDriverState *bs, - int64_t offset, unsigned int bytes, + int64_t offset, int64_t bytes, int64_t *cluster_offset, - unsigned int *cluster_bytes); + int64_t *cluster_bytes); const char *bdrv_get_encrypted_filename(BlockDriverState *bs); void bdrv_get_backing_filename(BlockDriverState *bs, -- cgit v1.2.3-55-g7522 From 237d78f8fc62e62f62246883ecf62e44ed35fb80 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Wed, 11 Oct 2017 22:47:03 -0500 Subject: block: Convert bdrv_get_block_status() to bytes We are gradually moving away from sector-based interfaces, towards byte-based. In the common case, allocation is unlikely to ever use values that are not naturally sector-aligned, but it is possible that byte-based values will let us be more precise about allocation at the end of an unaligned file that can do byte-based access. Changing the name of the function from bdrv_get_block_status() to bdrv_block_status() ensures that the compiler enforces that all callers are updated. For now, the io.c layer still assert()s that all callers are sector-aligned, but that can be relaxed when a later patch implements byte-based block status in the drivers. There was an inherent limitation in returning the offset via the return value: we only have room for BDRV_BLOCK_OFFSET_MASK bits, which means an offset can only be mapped for sector-aligned queries (or, if we declare that non-aligned input is at the same relative position modulo 512 of the answer), so the new interface also changes things to return the offset via output through a parameter by reference rather than mashed into the return value. We'll have some glue code that munges between the two styles until we finish converting all uses. For the most part this patch is just the addition of scaling at the callers followed by inverse scaling at bdrv_block_status(), coupled with the tweak in calling convention. But some code, particularly bdrv_is_allocated(), gets a lot simpler because it no longer has to mess with sectors. For ease of review, bdrv_get_block_status_above() will be tackled separately. Signed-off-by: Eric Blake Signed-off-by: Kevin Wolf --- block/io.c | 47 ++++++++++++++++++++++++++++++++++------------- block/qcow2-cluster.c | 2 +- include/block/block.h | 17 +++++++++-------- qemu-img.c | 25 ++++++++++++++----------- 4 files changed, 58 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/block/io.c b/block/io.c index ad84d84888..890d3c073b 100644 --- a/block/io.c +++ b/block/io.c @@ -716,9 +716,9 @@ int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, */ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags) { - int64_t target_size, ret, bytes, offset = 0; + int ret; + int64_t target_size, bytes, offset = 0; BlockDriverState *bs = child->bs; - int n; /* sectors */ target_size = bdrv_getlength(bs); if (target_size < 0) { @@ -730,24 +730,23 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags) if (bytes <= 0) { return 0; } - ret = bdrv_get_block_status(bs, offset >> BDRV_SECTOR_BITS, - bytes >> BDRV_SECTOR_BITS, &n, NULL); + ret = bdrv_block_status(bs, offset, bytes, &bytes, NULL, NULL); if (ret < 0) { error_report("error getting block status at offset %" PRId64 ": %s", offset, strerror(-ret)); return ret; } if (ret & BDRV_BLOCK_ZERO) { - offset += n * BDRV_SECTOR_BITS; + offset += bytes; continue; } - ret = bdrv_pwrite_zeroes(child, offset, n * BDRV_SECTOR_SIZE, flags); + ret = bdrv_pwrite_zeroes(child, offset, bytes, flags); if (ret < 0) { error_report("error writing zeroes at offset %" PRId64 ": %s", offset, strerror(-ret)); return ret; } - offset += n * BDRV_SECTOR_SIZE; + offset += bytes; } } @@ -2045,13 +2044,35 @@ int64_t bdrv_get_block_status_above(BlockDriverState *bs, nb_sectors, pnum, file); } -int64_t bdrv_get_block_status(BlockDriverState *bs, - int64_t sector_num, - int nb_sectors, int *pnum, - BlockDriverState **file) +int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, BlockDriverState **file) { - return bdrv_get_block_status_above(bs, backing_bs(bs), - sector_num, nb_sectors, pnum, file); + int64_t ret; + int n; + + assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)); + assert(pnum); + /* + * The contract allows us to return pnum smaller than bytes, even + * if the next query would see the same status; we truncate the + * request to avoid overflowing the driver's 32-bit interface. + */ + bytes = MIN(bytes, BDRV_REQUEST_MAX_BYTES); + ret = bdrv_get_block_status_above(bs, backing_bs(bs), + offset >> BDRV_SECTOR_BITS, + bytes >> BDRV_SECTOR_BITS, &n, file); + if (ret < 0) { + assert(INT_MIN <= ret); + *pnum = 0; + return ret; + } + *pnum = n * BDRV_SECTOR_SIZE; + if (map) { + *map = ret & BDRV_BLOCK_OFFSET_MASK; + } else { + ret &= ~BDRV_BLOCK_OFFSET_VALID; + } + return ret & ~BDRV_BLOCK_OFFSET_MASK; } int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset, diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 0e5aec81cb..fb10e26068 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -1632,7 +1632,7 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset, * cluster is already marked as zero, or if it's unallocated and we * don't have a backing file. * - * TODO We might want to use bdrv_get_block_status(bs) here, but we're + * TODO We might want to use bdrv_block_status(bs) here, but we're * holding s->lock, so that doesn't work today. * * If full_discard is true, the sector should not read back as zeroes, diff --git a/include/block/block.h b/include/block/block.h index 440f3e9e39..7ac851f82f 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -121,7 +121,7 @@ typedef struct HDGeometry { #define BDRV_REQUEST_MAX_BYTES (BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) /* - * Allocation status flags for bdrv_get_block_status() and friends. + * Allocation status flags for bdrv_block_status() and friends. * * Public flags: * BDRV_BLOCK_DATA: allocation for data at offset is tied to this layer @@ -136,10 +136,11 @@ typedef struct HDGeometry { * that the block layer recompute the answer from the returned * BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID. * - * If BDRV_BLOCK_OFFSET_VALID is set, bits 9-62 (BDRV_BLOCK_OFFSET_MASK) - * represent the offset in the returned BDS that is allocated for the - * corresponding raw data; however, whether that offset actually contains - * data also depends on BDRV_BLOCK_DATA and BDRV_BLOCK_ZERO, as follows: + * If BDRV_BLOCK_OFFSET_VALID is set, bits 9-62 (BDRV_BLOCK_OFFSET_MASK) of + * the return value (old interface) or the entire map parameter (new + * interface) represent the offset in the returned BDS that is allocated for + * the corresponding raw data. However, whether that offset actually + * contains data also depends on BDRV_BLOCK_DATA, as follows: * * DATA ZERO OFFSET_VALID * t t t sectors read as zero, returned file is zero at offset @@ -421,9 +422,9 @@ int bdrv_has_zero_init_1(BlockDriverState *bs); int bdrv_has_zero_init(BlockDriverState *bs); bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs); bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs); -int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, int *pnum, - BlockDriverState **file); +int bdrv_block_status(BlockDriverState *bs, int64_t offset, + int64_t bytes, int64_t *pnum, int64_t *map, + BlockDriverState **file); int64_t bdrv_get_block_status_above(BlockDriverState *bs, BlockDriverState *base, int64_t sector_num, diff --git a/qemu-img.c b/qemu-img.c index af3effdec5..c81d6ce733 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -1599,9 +1599,14 @@ static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num) if (s->sector_next_status <= sector_num) { if (s->target_has_backing) { - ret = bdrv_get_block_status(blk_bs(s->src[src_cur]), - sector_num - src_cur_offset, - n, &n, NULL); + int64_t count = n * BDRV_SECTOR_SIZE; + + ret = bdrv_block_status(blk_bs(s->src[src_cur]), + (sector_num - src_cur_offset) * + BDRV_SECTOR_SIZE, + count, &count, NULL, NULL); + assert(ret < 0 || QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE)); + n = count >> BDRV_SECTOR_BITS; } else { ret = bdrv_get_block_status_above(blk_bs(s->src[src_cur]), NULL, sector_num - src_cur_offset, @@ -2674,13 +2679,12 @@ static void dump_map_entry(OutputFormat output_format, MapEntry *e, static int get_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes, MapEntry *e) { - int64_t ret; + int ret; int depth; BlockDriverState *file; bool has_offset; - int nb_sectors = bytes >> BDRV_SECTOR_BITS; + int64_t map; - assert(bytes < INT_MAX); /* As an optimization, we could cache the current range of unallocated * clusters in each file of the chain, and avoid querying the same * range repeatedly. @@ -2688,12 +2692,11 @@ static int get_block_status(BlockDriverState *bs, int64_t offset, depth = 0; for (;;) { - ret = bdrv_get_block_status(bs, offset >> BDRV_SECTOR_BITS, nb_sectors, - &nb_sectors, &file); + ret = bdrv_block_status(bs, offset, bytes, &bytes, &map, &file); if (ret < 0) { return ret; } - assert(nb_sectors); + assert(bytes); if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) { break; } @@ -2710,10 +2713,10 @@ static int get_block_status(BlockDriverState *bs, int64_t offset, *e = (MapEntry) { .start = offset, - .length = nb_sectors * BDRV_SECTOR_SIZE, + .length = bytes, .data = !!(ret & BDRV_BLOCK_DATA), .zero = !!(ret & BDRV_BLOCK_ZERO), - .offset = ret & BDRV_BLOCK_OFFSET_MASK, + .offset = map, .has_offset = has_offset, .depth = depth, .has_filename = file && has_offset, -- cgit v1.2.3-55-g7522 From 3182664220571d11d4fe03ecdc10fcc1e842ed32 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Wed, 11 Oct 2017 22:47:08 -0500 Subject: block: Convert bdrv_get_block_status_above() to bytes We are gradually moving away from sector-based interfaces, towards byte-based. In the common case, allocation is unlikely to ever use values that are not naturally sector-aligned, but it is possible that byte-based values will let us be more precise about allocation at the end of an unaligned file that can do byte-based access. Changing the name of the function from bdrv_get_block_status_above() to bdrv_block_status_above() ensures that the compiler enforces that all callers are updated. Likewise, since it a byte interface allows an offset mapping that might not be sector aligned, split the mapping out of the return value and into a pass-by-reference parameter. For now, the io.c layer still assert()s that all uses are sector-aligned, but that can be relaxed when a later patch implements byte-based block status in the drivers. For the most part this patch is just the addition of scaling at the callers followed by inverse scaling at bdrv_block_status(), plus updates for the new split return interface. But some code, particularly bdrv_block_status(), gets a lot simpler because it no longer has to mess with sectors. Likewise, mirror code no longer computes s->granularity >> BDRV_SECTOR_BITS, and can therefore drop an assertion about alignment because the loop no longer depends on alignment (never mind that we don't really have a driver that reports sub-sector alignments, so it's not really possible to test the effect of sub-sector mirroring). Fix a neighboring assertion to use is_power_of_2 while there. For ease of review, bdrv_get_block_status() was tackled separately. Signed-off-by: Eric Blake Signed-off-by: Kevin Wolf --- block/io.c | 55 ++++++++------------------------------------------- block/mirror.c | 18 ++++++----------- block/qcow2.c | 30 +++++++++++----------------- include/block/block.h | 8 +++----- qemu-img.c | 49 +++++++++++++++++++++++++-------------------- 5 files changed, 57 insertions(+), 103 deletions(-) (limited to 'include') diff --git a/block/io.c b/block/io.c index 61b3477cd1..e64b1cb294 100644 --- a/block/io.c +++ b/block/io.c @@ -2016,7 +2016,7 @@ static int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs, return ret; } -/* Coroutine wrapper for bdrv_get_block_status_above() */ +/* Coroutine wrapper for bdrv_block_status_above() */ static void coroutine_fn bdrv_block_status_above_co_entry(void *opaque) { BdrvCoBlockStatusData *data = opaque; @@ -2064,58 +2064,19 @@ static int bdrv_common_block_status_above(BlockDriverState *bs, return data.ret; } -int64_t bdrv_get_block_status_above(BlockDriverState *bs, - BlockDriverState *base, - int64_t sector_num, - int nb_sectors, int *pnum, - BlockDriverState **file) +int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base, + int64_t offset, int64_t bytes, int64_t *pnum, + int64_t *map, BlockDriverState **file) { - int64_t ret; - int64_t n; - int64_t map; - - ret = bdrv_common_block_status_above(bs, base, true, - sector_num * BDRV_SECTOR_SIZE, - nb_sectors * BDRV_SECTOR_SIZE, - &n, &map, file); - if (ret < 0) { - *pnum = 0; - return ret; - } - assert(QEMU_IS_ALIGNED(n | map, BDRV_SECTOR_SIZE)); - *pnum = n >> BDRV_SECTOR_BITS; - return ret | map; + return bdrv_common_block_status_above(bs, base, true, offset, bytes, + pnum, map, file); } int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { - int64_t ret; - int n; - - assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)); - assert(pnum); - /* - * The contract allows us to return pnum smaller than bytes, even - * if the next query would see the same status; we truncate the - * request to avoid overflowing the driver's 32-bit interface. - */ - bytes = MIN(bytes, BDRV_REQUEST_MAX_BYTES); - ret = bdrv_get_block_status_above(bs, backing_bs(bs), - offset >> BDRV_SECTOR_BITS, - bytes >> BDRV_SECTOR_BITS, &n, file); - if (ret < 0) { - assert(INT_MIN <= ret); - *pnum = 0; - return ret; - } - *pnum = n * BDRV_SECTOR_SIZE; - if (map) { - *map = ret & BDRV_BLOCK_OFFSET_MASK; - } else { - ret &= ~BDRV_BLOCK_OFFSET_VALID; - } - return ret & ~BDRV_BLOCK_OFFSET_MASK; + return bdrv_block_status_above(bs, backing_bs(bs), + offset, bytes, pnum, map, file); } int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset, diff --git a/block/mirror.c b/block/mirror.c index d11706c566..307b6391a8 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -328,7 +328,6 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) uint64_t delay_ns = 0; /* At least the first dirty chunk is mirrored in one iteration. */ int nb_chunks = 1; - int sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS; bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)); int max_io_bytes = MAX(s->buf_size / MAX_IN_FLIGHT, MAX_IO_BYTES); @@ -376,7 +375,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) } /* Clear dirty bits before querying the block status, because - * calling bdrv_get_block_status_above could yield - if some blocks are + * calling bdrv_block_status_above could yield - if some blocks are * marked dirty in this window, we need to know. */ bdrv_reset_dirty_bitmap_locked(s->dirty_bitmap, offset, @@ -385,8 +384,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) bitmap_set(s->in_flight_bitmap, offset / s->granularity, nb_chunks); while (nb_chunks > 0 && offset < s->bdev_length) { - int64_t ret; - int io_sectors; + int ret; int64_t io_bytes; int64_t io_bytes_acct; enum MirrorMethod { @@ -396,11 +394,9 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) } mirror_method = MIRROR_METHOD_COPY; assert(!(offset % s->granularity)); - ret = bdrv_get_block_status_above(source, NULL, - offset >> BDRV_SECTOR_BITS, - nb_chunks * sectors_per_chunk, - &io_sectors, NULL); - io_bytes = io_sectors * BDRV_SECTOR_SIZE; + ret = bdrv_block_status_above(source, NULL, offset, + nb_chunks * s->granularity, + &io_bytes, NULL, NULL); if (ret < 0) { io_bytes = MIN(nb_chunks * s->granularity, max_io_bytes); } else if (ret & BDRV_BLOCK_DATA) { @@ -1131,9 +1127,7 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, granularity = bdrv_get_default_bitmap_granularity(target); } - assert ((granularity & (granularity - 1)) == 0); - /* Granularity must be large enough for sector-based dirty bitmap */ - assert(granularity >= BDRV_SECTOR_SIZE); + assert(is_power_of_2(granularity)); if (buf_size < 0) { error_setg(errp, "Invalid parameter 'buf-size'"); diff --git a/block/qcow2.c b/block/qcow2.c index 795be673e7..29d0a50955 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2974,8 +2974,8 @@ finish: static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes) { - int nr; - int64_t res; + int64_t nr; + int res; int64_t start; /* TODO: Widening to sector boundaries should only be needed as @@ -2991,10 +2991,8 @@ static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes) if (!bytes) { return true; } - res = bdrv_get_block_status_above(bs, NULL, start >> BDRV_SECTOR_BITS, - bytes >> BDRV_SECTOR_BITS, &nr, NULL); - return res >= 0 && (res & BDRV_BLOCK_ZERO) && - nr * BDRV_SECTOR_SIZE == bytes; + res = bdrv_block_status_above(bs, NULL, start, bytes, &nr, NULL, NULL); + return res >= 0 && (res & BDRV_BLOCK_ZERO) && nr == bytes; } static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs, @@ -3700,17 +3698,14 @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, required = virtual_size; } else { int64_t offset; - int pnum = 0; + int64_t pnum = 0; - for (offset = 0; offset < ssize; - offset += pnum * BDRV_SECTOR_SIZE) { - int nb_sectors = MIN(ssize - offset, - BDRV_REQUEST_MAX_BYTES) / BDRV_SECTOR_SIZE; - int64_t ret; + for (offset = 0; offset < ssize; offset += pnum) { + int ret; - ret = bdrv_get_block_status_above(in_bs, NULL, - offset >> BDRV_SECTOR_BITS, - nb_sectors, &pnum, NULL); + ret = bdrv_block_status_above(in_bs, NULL, offset, + ssize - offset, &pnum, NULL, + NULL); if (ret < 0) { error_setg_errno(&local_err, -ret, "Unable to get block status"); @@ -3722,11 +3717,10 @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, } else if ((ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) == (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) { /* Extend pnum to end of cluster for next iteration */ - pnum = (ROUND_UP(offset + pnum * BDRV_SECTOR_SIZE, - cluster_size) - offset) >> BDRV_SECTOR_BITS; + pnum = ROUND_UP(offset + pnum, cluster_size) - offset; /* Count clusters we've seen */ - required += offset % cluster_size + pnum * BDRV_SECTOR_SIZE; + required += offset % cluster_size + pnum; } } } diff --git a/include/block/block.h b/include/block/block.h index 7ac851f82f..fbc21daf62 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -425,11 +425,9 @@ bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs); int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file); -int64_t bdrv_get_block_status_above(BlockDriverState *bs, - BlockDriverState *base, - int64_t sector_num, - int nb_sectors, int *pnum, - BlockDriverState **file); +int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base, + int64_t offset, int64_t bytes, int64_t *pnum, + int64_t *map, BlockDriverState **file); int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes, int64_t *pnum); int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base, diff --git a/qemu-img.c b/qemu-img.c index c81d6ce733..78c820e487 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -1226,7 +1226,7 @@ static int img_compare(int argc, char **argv) BlockDriverState *bs1, *bs2; int64_t total_sectors1, total_sectors2; uint8_t *buf1 = NULL, *buf2 = NULL; - int pnum1, pnum2; + int64_t pnum1, pnum2; int allocated1, allocated2; int ret = 0; /* return value - 0 Ident, 1 Different, >1 Error */ bool progress = false, quiet = false, strict = false; @@ -1374,15 +1374,17 @@ static int img_compare(int argc, char **argv) } for (;;) { - int64_t status1, status2; + int status1, status2; nb_sectors = sectors_to_process(total_sectors, sector_num); if (nb_sectors <= 0) { break; } - status1 = bdrv_get_block_status_above(bs1, NULL, sector_num, - total_sectors1 - sector_num, - &pnum1, NULL); + status1 = bdrv_block_status_above(bs1, NULL, + sector_num * BDRV_SECTOR_SIZE, + (total_sectors1 - sector_num) * + BDRV_SECTOR_SIZE, + &pnum1, NULL, NULL); if (status1 < 0) { ret = 3; error_report("Sector allocation test failed for %s", filename1); @@ -1390,25 +1392,29 @@ static int img_compare(int argc, char **argv) } allocated1 = status1 & BDRV_BLOCK_ALLOCATED; - status2 = bdrv_get_block_status_above(bs2, NULL, sector_num, - total_sectors2 - sector_num, - &pnum2, NULL); + status2 = bdrv_block_status_above(bs2, NULL, + sector_num * BDRV_SECTOR_SIZE, + (total_sectors2 - sector_num) * + BDRV_SECTOR_SIZE, + &pnum2, NULL, NULL); if (status2 < 0) { ret = 3; error_report("Sector allocation test failed for %s", filename2); goto out; } allocated2 = status2 & BDRV_BLOCK_ALLOCATED; + /* TODO: Relax this once comparison is byte-based, and we no longer + * have to worry about sector alignment */ + assert(QEMU_IS_ALIGNED(pnum1 | pnum2, BDRV_SECTOR_SIZE)); if (pnum1) { - nb_sectors = MIN(nb_sectors, pnum1); + nb_sectors = MIN(nb_sectors, pnum1 >> BDRV_SECTOR_BITS); } if (pnum2) { - nb_sectors = MIN(nb_sectors, pnum2); + nb_sectors = MIN(nb_sectors, pnum2 >> BDRV_SECTOR_BITS); } if (strict) { - if ((status1 & ~BDRV_BLOCK_OFFSET_MASK) != - (status2 & ~BDRV_BLOCK_OFFSET_MASK)) { + if (status1 != status2) { ret = 1; qprintf(quiet, "Strict mode: Offset %" PRId64 " block status mismatch!\n", @@ -1417,7 +1423,7 @@ static int img_compare(int argc, char **argv) } } if ((status1 & BDRV_BLOCK_ZERO) && (status2 & BDRV_BLOCK_ZERO)) { - nb_sectors = MIN(pnum1, pnum2); + nb_sectors = DIV_ROUND_UP(MIN(pnum1, pnum2), BDRV_SECTOR_SIZE); } else if (allocated1 == allocated2) { if (allocated1) { ret = blk_pread(blk1, sector_num << BDRV_SECTOR_BITS, buf1, @@ -1589,8 +1595,8 @@ static void convert_select_part(ImgConvertState *s, int64_t sector_num, static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num) { - int64_t ret, src_cur_offset; - int n, src_cur; + int64_t src_cur_offset; + int ret, n, src_cur; convert_select_part(s, sector_num, &src_cur, &src_cur_offset); @@ -1598,23 +1604,24 @@ static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num) n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS); if (s->sector_next_status <= sector_num) { + int64_t count = n * BDRV_SECTOR_SIZE; + if (s->target_has_backing) { - int64_t count = n * BDRV_SECTOR_SIZE; ret = bdrv_block_status(blk_bs(s->src[src_cur]), (sector_num - src_cur_offset) * BDRV_SECTOR_SIZE, count, &count, NULL, NULL); - assert(ret < 0 || QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE)); - n = count >> BDRV_SECTOR_BITS; } else { - ret = bdrv_get_block_status_above(blk_bs(s->src[src_cur]), NULL, - sector_num - src_cur_offset, - n, &n, NULL); + ret = bdrv_block_status_above(blk_bs(s->src[src_cur]), NULL, + (sector_num - src_cur_offset) * + BDRV_SECTOR_SIZE, + count, &count, NULL, NULL); } if (ret < 0) { return ret; } + n = DIV_ROUND_UP(count, BDRV_SECTOR_SIZE); if (ret & BDRV_BLOCK_ZERO) { s->status = BLK_ZERO; -- cgit v1.2.3-55-g7522 From efa6e2ed643c770153eeacace410c06f15360cd9 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Wed, 11 Oct 2017 22:47:17 -0500 Subject: block: Align block status requests Any device that has request_alignment greater than 512 should be unable to report status at a finer granularity; it may also be simpler for such devices to be guaranteed that the block layer has rounded things out to the granularity boundary (the way the block layer already rounds all other I/O out). Besides, getting the code correct for super-sector alignment also benefits us for the fact that our public interface now has byte granularity, even though none of our drivers have byte-level callbacks. Add an assertion in blkdebug that proves that the block layer never requests status of unaligned sections, similar to what it does on other requests (while still keeping the generic helper in place for when future patches add a throttle driver). Note that iotest 177 already covers this (it would fail if you use just the blkdebug.c hunk without the io.c changes). Meanwhile, we can drop assertions in callers that no longer have to pass in sector-aligned addresses. There is a mid-function scope added for 'count' and 'longret', for a couple of reasons: first, an upcoming patch will add an 'if' statement that checks whether a driver has an old- or new-style callback, and can conveniently use the same scope for less indentation churn at that time. Second, since we are trying to get rid of sector-based computations, wrapping things in a scope makes it easier to group and see what will be deleted in a final cleanup patch once all drivers have been converted to the new-style callback. Signed-off-by: Eric Blake Signed-off-by: Kevin Wolf --- block/blkdebug.c | 13 ++++++++- block/io.c | 71 ++++++++++++++++++++++++++++++----------------- include/block/block_int.h | 3 +- 3 files changed, 59 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/block/blkdebug.c b/block/blkdebug.c index dfdf9b91aa..e21669979d 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -627,6 +627,17 @@ static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs, return bdrv_co_pdiscard(bs->file->bs, offset, bytes); } +static int64_t coroutine_fn blkdebug_co_get_block_status( + BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum, + BlockDriverState **file) +{ + assert(QEMU_IS_ALIGNED(sector_num | nb_sectors, + DIV_ROUND_UP(bs->bl.request_alignment, + BDRV_SECTOR_SIZE))); + return bdrv_co_get_block_status_from_file(bs, sector_num, nb_sectors, + pnum, file); +} + static void blkdebug_close(BlockDriverState *bs) { BDRVBlkdebugState *s = bs->opaque; @@ -896,7 +907,7 @@ static BlockDriver bdrv_blkdebug = { .bdrv_co_flush_to_disk = blkdebug_co_flush, .bdrv_co_pwrite_zeroes = blkdebug_co_pwrite_zeroes, .bdrv_co_pdiscard = blkdebug_co_pdiscard, - .bdrv_co_get_block_status = bdrv_co_get_block_status_from_file, + .bdrv_co_get_block_status = blkdebug_co_get_block_status, .bdrv_debug_event = blkdebug_debug_event, .bdrv_debug_breakpoint = blkdebug_debug_breakpoint, diff --git a/block/io.c b/block/io.c index e64b1cb294..d2cb20d872 100644 --- a/block/io.c +++ b/block/io.c @@ -1839,10 +1839,11 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, { int64_t total_size; int64_t n; /* bytes */ - int64_t ret; + int ret; int64_t local_map = 0; BlockDriverState *local_file = NULL; - int count; /* sectors */ + int64_t aligned_offset, aligned_bytes; + uint32_t align; assert(pnum); *pnum = 0; @@ -1881,35 +1882,58 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, } bdrv_inc_in_flight(bs); + + /* Round out to request_alignment boundaries */ + /* TODO: until we have a byte-based driver callback, we also have to + * round out to sectors, even if that is bigger than request_alignment */ + align = MAX(bs->bl.request_alignment, BDRV_SECTOR_SIZE); + aligned_offset = QEMU_ALIGN_DOWN(offset, align); + aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset; + + { + int count; /* sectors */ + int64_t longret; + + assert(QEMU_IS_ALIGNED(aligned_offset | aligned_bytes, + BDRV_SECTOR_SIZE)); + /* + * The contract allows us to return pnum smaller than bytes, even + * if the next query would see the same status; we truncate the + * request to avoid overflowing the driver's 32-bit interface. + */ + longret = bs->drv->bdrv_co_get_block_status( + bs, aligned_offset >> BDRV_SECTOR_BITS, + MIN(INT_MAX, aligned_bytes) >> BDRV_SECTOR_BITS, &count, + &local_file); + if (longret < 0) { + assert(INT_MIN <= longret); + ret = longret; + goto out; + } + if (longret & BDRV_BLOCK_OFFSET_VALID) { + local_map = longret & BDRV_BLOCK_OFFSET_MASK; + } + ret = longret & ~BDRV_BLOCK_OFFSET_MASK; + *pnum = count * BDRV_SECTOR_SIZE; + } + /* - * TODO: Rather than require aligned offsets, we could instead - * round to the driver's request_alignment here, then touch up - * count afterwards back to the caller's expectations. - */ - assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)); - /* - * The contract allows us to return pnum smaller than bytes, even - * if the next query would see the same status; we truncate the - * request to avoid overflowing the driver's 32-bit interface. + * The driver's result must be a multiple of request_alignment. + * Clamp pnum and adjust map to original request. */ - bytes = MIN(bytes, BDRV_REQUEST_MAX_BYTES); - ret = bs->drv->bdrv_co_get_block_status(bs, offset >> BDRV_SECTOR_BITS, - bytes >> BDRV_SECTOR_BITS, &count, - &local_file); - if (ret < 0) { - goto out; + assert(QEMU_IS_ALIGNED(*pnum, align) && align > offset - aligned_offset); + *pnum -= offset - aligned_offset; + if (*pnum > bytes) { + *pnum = bytes; } if (ret & BDRV_BLOCK_OFFSET_VALID) { - local_map = ret & BDRV_BLOCK_OFFSET_MASK; + local_map += offset - aligned_offset; } - *pnum = count * BDRV_SECTOR_SIZE; if (ret & BDRV_BLOCK_RAW) { assert(ret & BDRV_BLOCK_OFFSET_VALID && local_file); ret = bdrv_co_block_status(local_file, want_zero, local_map, *pnum, pnum, &local_map, &local_file); - assert(ret < 0 || - QEMU_IS_ALIGNED(*pnum | local_map, BDRV_SECTOR_SIZE)); goto out; } @@ -1968,11 +1992,6 @@ early_out: if (map) { *map = local_map; } - if (ret >= 0) { - ret &= ~BDRV_BLOCK_OFFSET_MASK; - } else { - assert(INT_MIN <= ret); - } return ret; } diff --git a/include/block/block_int.h b/include/block/block_int.h index 246eee2e82..a5482775ec 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -207,7 +207,8 @@ struct BlockDriver { * according to the current layer, and should not set * BDRV_BLOCK_ALLOCATED, but may set BDRV_BLOCK_RAW. See block.h * for the meaning of _DATA, _ZERO, and _OFFSET_VALID. The block - * layer guarantees non-NULL pnum and file. + * layer guarantees input aligned to request_alignment, as well as + * non-NULL pnum and file. */ int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum, -- cgit v1.2.3-55-g7522