From 50717e941b9f306a45292621999eeafbaa954418 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 21 Jan 2013 17:09:45 +0100 Subject: block: allow customizing the granularity of the dirty bitmap Reviewed-by: Eric Blake Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- qapi-schema.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'qapi-schema.json') diff --git a/qapi-schema.json b/qapi-schema.json index 6d7252b9e8..ce4f901dc6 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -667,10 +667,12 @@ # # @count: number of dirty bytes according to the dirty bitmap # +# @granularity: granularity of the dirty bitmap in bytes (since 1.4) +# # Since: 1.3 ## { 'type': 'BlockDirtyInfo', - 'data': {'count': 'int'} } + 'data': {'count': 'int', 'granularity': 'int'} } ## # @BlockInfo: -- cgit v1.2.3-55-g7522 From eee13dfe302833944d1176677d12a6ea421a94ea Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 21 Jan 2013 17:09:46 +0100 Subject: mirror: allow customizing the granularity The desired granularity may be very different depending on the kind of operation (e.g. continuous replication vs. collapse-to-raw) and whether the VM is expected to perform lots of I/O while mirroring is in progress. Allow the user to customize it, while providing a sane default so that in general there will be no extra allocated space in the target compared to the source. Reviewed-by: Eric Blake Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block/mirror.c | 52 ++++++++++++++++++++++++++++++----------------- blockdev.c | 15 +++++++++++++- hmp.c | 2 +- include/block/block_int.h | 3 ++- qapi-schema.json | 8 +++++++- qmp-commands.hx | 8 +++++++- 6 files changed, 64 insertions(+), 24 deletions(-) (limited to 'qapi-schema.json') diff --git a/block/mirror.c b/block/mirror.c index e425927ee5..0fecb40c10 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -17,9 +17,6 @@ #include "qemu/ratelimit.h" #include "qemu/bitmap.h" -#define BLOCK_SIZE (1 << 20) -#define BDRV_SECTORS_PER_DIRTY_CHUNK (BLOCK_SIZE >> BDRV_SECTOR_BITS) - #define SLICE_TIME 100000000ULL /* ns */ typedef struct MirrorBlockJob { @@ -31,6 +28,7 @@ typedef struct MirrorBlockJob { bool synced; bool should_complete; int64_t sector_num; + int64_t granularity; size_t buf_size; unsigned long *cow_bitmap; HBitmapIter hbi; @@ -56,7 +54,7 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s, BlockDriverState *source = s->common.bs; BlockDriverState *target = s->target; QEMUIOVector qiov; - int ret, nb_sectors; + int ret, nb_sectors, sectors_per_chunk; int64_t end, sector_num, chunk_num; struct iovec iov; @@ -72,16 +70,16 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s, * is very large, we need to do COW ourselves. The first time a cluster is * copied, copy it entirely. * - * Because both BDRV_SECTORS_PER_DIRTY_CHUNK and the cluster size are - * powers of two, the number of sectors to copy cannot exceed one cluster. + * Because both the granularity and the cluster size are powers of two, the + * number of sectors to copy cannot exceed one cluster. */ sector_num = s->sector_num; - nb_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK; - chunk_num = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK; + sectors_per_chunk = nb_sectors = s->granularity >> BDRV_SECTOR_BITS; + chunk_num = sector_num / sectors_per_chunk; if (s->cow_bitmap && !test_bit(chunk_num, s->cow_bitmap)) { trace_mirror_cow(s, sector_num); bdrv_round_to_clusters(s->target, - sector_num, BDRV_SECTORS_PER_DIRTY_CHUNK, + sector_num, sectors_per_chunk, §or_num, &nb_sectors); } @@ -107,8 +105,8 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s, goto fail; } if (s->cow_bitmap) { - bitmap_set(s->cow_bitmap, sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK, - nb_sectors / BDRV_SECTORS_PER_DIRTY_CHUNK); + bitmap_set(s->cow_bitmap, sector_num / sectors_per_chunk, + nb_sectors / sectors_per_chunk); } return 0; @@ -122,7 +120,7 @@ static void coroutine_fn mirror_run(void *opaque) { MirrorBlockJob *s = opaque; BlockDriverState *bs = s->common.bs; - int64_t sector_num, end, length; + int64_t sector_num, end, sectors_per_chunk, length; BlockDriverInfo bdi; char backing_filename[1024]; int ret = 0; @@ -146,22 +144,23 @@ static void coroutine_fn mirror_run(void *opaque) sizeof(backing_filename)); if (backing_filename[0] && !s->target->backing_hd) { bdrv_get_info(s->target, &bdi); - if (s->buf_size < bdi.cluster_size) { + if (s->granularity < bdi.cluster_size) { s->buf_size = bdi.cluster_size; - length = (bdrv_getlength(bs) + BLOCK_SIZE - 1) / BLOCK_SIZE; + length = (bdrv_getlength(bs) + s->granularity - 1) / s->granularity; s->cow_bitmap = bitmap_new(length); } } end = s->common.len >> BDRV_SECTOR_BITS; s->buf = qemu_blockalign(bs, s->buf_size); + sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS; if (s->mode != MIRROR_SYNC_MODE_NONE) { /* First part, loop on the sectors and initialize the dirty bitmap. */ BlockDriverState *base; base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd; for (sector_num = 0; sector_num < end; ) { - int64_t next = (sector_num | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1; + int64_t next = (sector_num | (sectors_per_chunk - 1)) + 1; ret = bdrv_co_is_allocated_above(bs, base, sector_num, next - sector_num, &n); @@ -242,7 +241,7 @@ static void coroutine_fn mirror_run(void *opaque) s->common.offset = (end - cnt) * BDRV_SECTOR_SIZE; if (s->common.speed) { - delay_ns = ratelimit_calculate_delay(&s->limit, BDRV_SECTORS_PER_DIRTY_CHUNK); + delay_ns = ratelimit_calculate_delay(&s->limit, sectors_per_chunk); } else { delay_ns = 0; } @@ -332,7 +331,7 @@ static BlockJobType mirror_job_type = { }; void mirror_start(BlockDriverState *bs, BlockDriverState *target, - int64_t speed, MirrorSyncMode mode, + int64_t speed, int64_t granularity, MirrorSyncMode mode, BlockdevOnError on_source_error, BlockdevOnError on_target_error, BlockDriverCompletionFunc *cb, @@ -340,6 +339,20 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, { MirrorBlockJob *s; + if (granularity == 0) { + /* Choose the default granularity based on the target file's cluster + * size, clamped between 4k and 64k. */ + BlockDriverInfo bdi; + if (bdrv_get_info(target, &bdi) >= 0 && bdi.cluster_size != 0) { + granularity = MAX(4096, bdi.cluster_size); + granularity = MIN(65536, granularity); + } else { + granularity = 65536; + } + } + + assert ((granularity & (granularity - 1)) == 0); + if ((on_source_error == BLOCKDEV_ON_ERROR_STOP || on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) && !bdrv_iostatus_is_enabled(bs)) { @@ -356,9 +369,10 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, s->on_target_error = on_target_error; s->target = target; s->mode = mode; - s->buf_size = BLOCK_SIZE; + s->granularity = granularity; + s->buf_size = granularity; - bdrv_set_dirty_tracking(bs, BLOCK_SIZE); + bdrv_set_dirty_tracking(bs, granularity); bdrv_set_enable_write_cache(s->target, true); bdrv_set_on_error(s->target, on_target_error, on_target_error); bdrv_iostatus_enable(s->target); diff --git a/blockdev.c b/blockdev.c index 1eb62b637c..07fd3273ed 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1193,6 +1193,7 @@ void qmp_drive_mirror(const char *device, const char *target, enum MirrorSyncMode sync, bool has_mode, enum NewImageMode mode, bool has_speed, int64_t speed, + bool has_granularity, uint32_t granularity, bool has_on_source_error, BlockdevOnError on_source_error, bool has_on_target_error, BlockdevOnError on_target_error, Error **errp) @@ -1218,6 +1219,17 @@ void qmp_drive_mirror(const char *device, const char *target, if (!has_mode) { mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; } + if (!has_granularity) { + granularity = 0; + } + if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) { + error_set(errp, QERR_INVALID_PARAMETER, device); + return; + } + if (granularity & (granularity - 1)) { + error_set(errp, QERR_INVALID_PARAMETER, device); + return; + } bs = bdrv_find(device); if (!bs) { @@ -1299,7 +1311,8 @@ void qmp_drive_mirror(const char *device, const char *target, return; } - mirror_start(bs, target_bs, speed, sync, on_source_error, on_target_error, + mirror_start(bs, target_bs, speed, granularity, sync, + on_source_error, on_target_error, block_job_cb, bs, &local_err); if (local_err != NULL) { bdrv_delete(target_bs); diff --git a/hmp.c b/hmp.c index c7b6ba02fc..0f3347dd76 100644 --- a/hmp.c +++ b/hmp.c @@ -796,7 +796,7 @@ void hmp_drive_mirror(Monitor *mon, const QDict *qdict) qmp_drive_mirror(device, filename, !!format, format, full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP, - true, mode, false, 0, + true, mode, false, 0, false, 0, false, 0, false, 0, &errp); hmp_handle_error(mon, &errp); } diff --git a/include/block/block_int.h b/include/block/block_int.h index b81c061cd9..1165339fd1 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -344,6 +344,7 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base, * @bs: Block device to operate on. * @target: Block device to write to. * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @granularity: The chosen granularity for the dirty bitmap. * @mode: Whether to collapse all images in the chain to the target. * @on_source_error: The action to take upon error reading from the source. * @on_target_error: The action to take upon error writing to the target. @@ -357,7 +358,7 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base, * @bs will be switched to read from @target. */ void mirror_start(BlockDriverState *bs, BlockDriverState *target, - int64_t speed, MirrorSyncMode mode, + int64_t speed, int64_t granularity, MirrorSyncMode mode, BlockdevOnError on_source_error, BlockdevOnError on_target_error, BlockDriverCompletionFunc *cb, diff --git a/qapi-schema.json b/qapi-schema.json index ce4f901dc6..fd5ec93c03 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1636,6 +1636,11 @@ # (all the disk, only the sectors allocated in the topmost image, or # only new I/O). # +# @granularity: #optional granularity of the dirty bitmap, default is 64K +# if the image format doesn't have clusters, 4K if the clusters +# are smaller than that, else the cluster size. Must be a +# power of 2 between 512 and 64M (since 1.4). +# # @on-source-error: #optional the action to take on an error on the source, # default 'report'. 'stop' and 'enospc' can only be used # if the block device supports io-status (see BlockInfo). @@ -1652,7 +1657,8 @@ { 'command': 'drive-mirror', 'data': { 'device': 'str', 'target': 'str', '*format': 'str', 'sync': 'MirrorSyncMode', '*mode': 'NewImageMode', - '*speed': 'int', '*on-source-error': 'BlockdevOnError', + '*speed': 'int', '*granularity': 'uint32', + '*on-source-error': 'BlockdevOnError', '*on-target-error': 'BlockdevOnError' } } ## diff --git a/qmp-commands.hx b/qmp-commands.hx index cbf12804be..835ea26e9d 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -938,7 +938,8 @@ EQMP { .name = "drive-mirror", .args_type = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?," - "on-source-error:s?,on-target-error:s?", + "on-source-error:s?,on-target-error:s?," + "granularity:i?", .mhandler.cmd_new = qmp_marshal_input_drive_mirror, }, @@ -962,6 +963,7 @@ Arguments: file/device (NewImageMode, optional, default 'absolute-paths') - "speed": maximum speed of the streaming job, in bytes per second (json-int) +- "granularity": granularity of the dirty bitmap, in bytes (json-int, optional) - "sync": what parts of the disk image should be copied to the destination; possibilities include "full" for all the disk, "top" for only the sectors allocated in the topmost image, or "none" to only replicate new I/O @@ -971,6 +973,10 @@ Arguments: - "on-target-error": the action to take on an error on the target (BlockdevOnError, default 'report') +The default value of the granularity is the image cluster size clamped +between 4096 and 65536, if the image format defines one. If the format +does not define a cluster size, the default value of the granularity +is 65536. Example: -- cgit v1.2.3-55-g7522 From 08e4ed6cdeeee7912072cf14aa8ab6c60dacb4fb Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 22 Jan 2013 09:03:13 +0100 Subject: mirror: add buf-size argument to drive-mirror This makes sense when the next commit starts using the extra buffer space to perform many I/O operations asynchronously. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block/mirror.c | 8 ++++---- blockdev.c | 9 ++++++++- hmp.c | 2 +- include/block/block_int.h | 5 +++-- qapi-schema.json | 5 ++++- qmp-commands.hx | 4 +++- tests/qemu-iotests/041 | 31 +++++++++++++++++++++++++++++++ tests/qemu-iotests/041.out | 4 ++-- 8 files changed, 56 insertions(+), 12 deletions(-) (limited to 'qapi-schema.json') diff --git a/block/mirror.c b/block/mirror.c index fc6b9b7118..896972c297 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -207,7 +207,7 @@ static void coroutine_fn mirror_run(void *opaque) if (backing_filename[0] && !s->target->backing_hd) { bdrv_get_info(s->target, &bdi); if (s->granularity < bdi.cluster_size) { - s->buf_size = bdi.cluster_size; + s->buf_size = MAX(s->buf_size, bdi.cluster_size); length = (bdrv_getlength(bs) + s->granularity - 1) / s->granularity; s->cow_bitmap = bitmap_new(length); } @@ -416,8 +416,8 @@ static BlockJobType mirror_job_type = { }; void mirror_start(BlockDriverState *bs, BlockDriverState *target, - int64_t speed, int64_t granularity, MirrorSyncMode mode, - BlockdevOnError on_source_error, + int64_t speed, int64_t granularity, int64_t buf_size, + MirrorSyncMode mode, BlockdevOnError on_source_error, BlockdevOnError on_target_error, BlockDriverCompletionFunc *cb, void *opaque, Error **errp) @@ -455,7 +455,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, s->target = target; s->mode = mode; s->granularity = granularity; - s->buf_size = granularity; + s->buf_size = MAX(buf_size, granularity); bdrv_set_dirty_tracking(bs, granularity); bdrv_set_enable_write_cache(s->target, true); diff --git a/blockdev.c b/blockdev.c index 07fd3273ed..ad25b9b86e 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1188,12 +1188,15 @@ void qmp_block_commit(const char *device, drive_get_ref(drive_get_by_blockdev(bs)); } +#define DEFAULT_MIRROR_BUF_SIZE (10 << 20) + void qmp_drive_mirror(const char *device, const char *target, bool has_format, const char *format, enum MirrorSyncMode sync, bool has_mode, enum NewImageMode mode, bool has_speed, int64_t speed, bool has_granularity, uint32_t granularity, + bool has_buf_size, int64_t buf_size, bool has_on_source_error, BlockdevOnError on_source_error, bool has_on_target_error, BlockdevOnError on_target_error, Error **errp) @@ -1222,6 +1225,10 @@ void qmp_drive_mirror(const char *device, const char *target, if (!has_granularity) { granularity = 0; } + if (!has_buf_size) { + buf_size = DEFAULT_MIRROR_BUF_SIZE; + } + if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) { error_set(errp, QERR_INVALID_PARAMETER, device); return; @@ -1311,7 +1318,7 @@ void qmp_drive_mirror(const char *device, const char *target, return; } - mirror_start(bs, target_bs, speed, granularity, sync, + mirror_start(bs, target_bs, speed, granularity, buf_size, sync, on_source_error, on_target_error, block_job_cb, bs, &local_err); if (local_err != NULL) { diff --git a/hmp.c b/hmp.c index 0f3347dd76..99fd89206b 100644 --- a/hmp.c +++ b/hmp.c @@ -796,7 +796,7 @@ void hmp_drive_mirror(Monitor *mon, const QDict *qdict) qmp_drive_mirror(device, filename, !!format, format, full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP, - true, mode, false, 0, false, 0, + true, mode, false, 0, false, 0, false, 0, false, 0, false, 0, &errp); hmp_handle_error(mon, &errp); } diff --git a/include/block/block_int.h b/include/block/block_int.h index 1165339fd1..f7279b978a 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -345,6 +345,7 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base, * @target: Block device to write to. * @speed: The maximum speed, in bytes per second, or 0 for unlimited. * @granularity: The chosen granularity for the dirty bitmap. + * @buf_size: The amount of data that can be in flight at one time. * @mode: Whether to collapse all images in the chain to the target. * @on_source_error: The action to take upon error reading from the source. * @on_target_error: The action to take upon error writing to the target. @@ -358,8 +359,8 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base, * @bs will be switched to read from @target. */ void mirror_start(BlockDriverState *bs, BlockDriverState *target, - int64_t speed, int64_t granularity, MirrorSyncMode mode, - BlockdevOnError on_source_error, + int64_t speed, int64_t granularity, int64_t buf_size, + MirrorSyncMode mode, BlockdevOnError on_source_error, BlockdevOnError on_target_error, BlockDriverCompletionFunc *cb, void *opaque, Error **errp); diff --git a/qapi-schema.json b/qapi-schema.json index fd5ec93c03..ba75c4de12 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1641,6 +1641,9 @@ # are smaller than that, else the cluster size. Must be a # power of 2 between 512 and 64M (since 1.4). # +# @buf-size: #optional maximum amount of data in flight from source to +# target (since 1.4). +# # @on-source-error: #optional the action to take on an error on the source, # default 'report'. 'stop' and 'enospc' can only be used # if the block device supports io-status (see BlockInfo). @@ -1658,7 +1661,7 @@ 'data': { 'device': 'str', 'target': 'str', '*format': 'str', 'sync': 'MirrorSyncMode', '*mode': 'NewImageMode', '*speed': 'int', '*granularity': 'uint32', - '*on-source-error': 'BlockdevOnError', + '*buf-size': 'int', '*on-source-error': 'BlockdevOnError', '*on-target-error': 'BlockdevOnError' } } ## diff --git a/qmp-commands.hx b/qmp-commands.hx index 835ea26e9d..273b4a67ba 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -939,7 +939,7 @@ EQMP .name = "drive-mirror", .args_type = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?," "on-source-error:s?,on-target-error:s?," - "granularity:i?", + "granularity:i?,buf-size:i?", .mhandler.cmd_new = qmp_marshal_input_drive_mirror, }, @@ -964,6 +964,8 @@ Arguments: - "speed": maximum speed of the streaming job, in bytes per second (json-int) - "granularity": granularity of the dirty bitmap, in bytes (json-int, optional) +- "buf_size": maximum amount of data in flight from source to target, in bytes + (json-int, default 10M) - "sync": what parts of the disk image should be copied to the destination; possibilities include "full" for all the disk, "top" for only the sectors allocated in the topmost image, or "none" to only replicate new I/O diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 index a1299b348e..b040820c51 100755 --- a/tests/qemu-iotests/041 +++ b/tests/qemu-iotests/041 @@ -207,6 +207,37 @@ class TestSingleDrive(ImageMirroringTestCase): self.assertTrue(self.compare_images(test_img, target_img), 'target image does not match source after mirroring') + def test_small_buffer(self): + self.assert_no_active_mirrors() + + # A small buffer is rounded up automatically + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + buf_size=4096, target=target_img) + self.assert_qmp(result, 'return', {}) + + self.complete_and_wait() + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/inserted/file', target_img) + self.vm.shutdown() + self.assertTrue(self.compare_images(test_img, target_img), + 'target image does not match source after mirroring') + + def test_small_buffer2(self): + self.assert_no_active_mirrors() + + qemu_img('create', '-f', iotests.imgfmt, '-o', 'cluster_size=%d,size=%d' + % (TestSingleDrive.image_len, TestSingleDrive.image_len), target_img) + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + buf_size=65536, mode='existing', target=target_img) + self.assert_qmp(result, 'return', {}) + + self.complete_and_wait() + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/inserted/file', target_img) + self.vm.shutdown() + self.assertTrue(self.compare_images(test_img, target_img), + 'target image does not match source after mirroring') + def test_large_cluster(self): self.assert_no_active_mirrors() diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out index 3a89159833..84bfd63fba 100644 --- a/tests/qemu-iotests/041.out +++ b/tests/qemu-iotests/041.out @@ -1,5 +1,5 @@ -.................... +...................... ---------------------------------------------------------------------- -Ran 20 tests +Ran 22 tests OK -- cgit v1.2.3-55-g7522