From 6faac15fa80c4c1f813d96afc13bceaa3bc5ffe7 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Thu, 24 Oct 2013 12:06:50 +0200 Subject: block: make BdrvRequestFlags public Reviewed-by: Eric Blake Signed-off-by: Peter Lieven Signed-off-by: Stefan Hajnoczi --- include/block/block.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/block/block.h b/include/block/block.h index 3560deb883..ba2082c0c6 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -62,6 +62,11 @@ typedef struct BlockDevOps { void (*resize_cb)(void *opaque); } BlockDevOps; +typedef enum { + BDRV_REQ_COPY_ON_READ = 0x1, + BDRV_REQ_ZERO_WRITE = 0x2, +} BdrvRequestFlags; + #define BDRV_O_RDWR 0x0002 #define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save writes in a snapshot */ #define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */ -- cgit v1.2.3-55-g7522 From aa7bfbfff792538a9eeefe879fc4c629aa0b4203 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Thu, 24 Oct 2013 12:06:51 +0200 Subject: block: add flags to bdrv_*_write_zeroes Reviewed-by: Eric Blake Signed-off-by: Peter Lieven Signed-off-by: Stefan Hajnoczi --- block-migration.c | 2 +- block.c | 20 +++++++++++--------- block/backup.c | 3 ++- block/qcow2-cluster.c | 2 +- block/qcow2.c | 2 +- block/qed.c | 3 ++- block/raw_bsd.c | 5 +++-- block/vmdk.c | 3 ++- include/block/block.h | 4 ++-- include/block/block_int.h | 2 +- qemu-io-cmds.c | 2 +- 11 files changed, 27 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/block-migration.c b/block-migration.c index daf9ec1eab..713a8e36e1 100644 --- a/block-migration.c +++ b/block-migration.c @@ -780,7 +780,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) } if (flags & BLK_MIG_FLAG_ZERO_BLOCK) { - ret = bdrv_write_zeroes(bs, addr, nr_sectors); + ret = bdrv_write_zeroes(bs, addr, nr_sectors, 0); } else { buf = g_malloc(BLOCK_SIZE); qemu_get_buffer(f, buf, BLOCK_SIZE); diff --git a/block.c b/block.c index 3dc6c12c9d..e14ba48720 100644 --- a/block.c +++ b/block.c @@ -79,7 +79,7 @@ static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, bool is_write); static void coroutine_fn bdrv_co_do_rw(void *opaque); static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, - int64_t sector_num, int nb_sectors); + int64_t sector_num, int nb_sectors, BdrvRequestFlags flags); static QTAILQ_HEAD(, BlockDriverState) bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states); @@ -2392,10 +2392,11 @@ int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov) return bdrv_rwv_co(bs, sector_num, qiov, true, 0); } -int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors) +int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, BdrvRequestFlags flags) { return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true, - BDRV_REQ_ZERO_WRITE); + BDRV_REQ_ZERO_WRITE | flags); } int bdrv_pread(BlockDriverState *bs, int64_t offset, @@ -2577,7 +2578,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, if (drv->bdrv_co_write_zeroes && buffer_is_zero(bounce_buffer, iov.iov_len)) { ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num, - cluster_nb_sectors); + cluster_nb_sectors, 0); } else { /* This does not change the data on the disk, it is not necessary * to flush even in cache=writethrough mode. @@ -2711,7 +2712,7 @@ int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, } static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, - int64_t sector_num, int nb_sectors) + int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) { BlockDriver *drv = bs->drv; QEMUIOVector qiov; @@ -2723,7 +2724,7 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, /* First try the efficient write zeroes operation */ if (drv->bdrv_co_write_zeroes) { - ret = drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors); + ret = drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags); if (ret != -ENOTSUP) { return ret; } @@ -2778,7 +2779,7 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, if (ret < 0) { /* Do nothing, write notifier decided to fail this request */ } else if (flags & BDRV_REQ_ZERO_WRITE) { - ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors); + ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags); } else { ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov); } @@ -2812,12 +2813,13 @@ int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num, } int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, - int64_t sector_num, int nb_sectors) + int64_t sector_num, int nb_sectors, + BdrvRequestFlags flags) { trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors); return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL, - BDRV_REQ_ZERO_WRITE); + BDRV_REQ_ZERO_WRITE | flags); } /** diff --git a/block/backup.c b/block/backup.c index cad14c90b2..830a179d6d 100644 --- a/block/backup.c +++ b/block/backup.c @@ -138,7 +138,8 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs, if (buffer_is_zero(iov.iov_base, iov.iov_len)) { ret = bdrv_co_write_zeroes(job->target, - start * BACKUP_SECTORS_PER_CLUSTER, n); + start * BACKUP_SECTORS_PER_CLUSTER, + n, 0); } else { ret = bdrv_co_writev(job->target, start * BACKUP_SECTORS_PER_CLUSTER, n, diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 791083a0ef..11f9c50aa7 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -1613,7 +1613,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, } ret = bdrv_write_zeroes(bs->file, offset / BDRV_SECTOR_SIZE, - s->cluster_sectors); + s->cluster_sectors, 0); if (ret < 0) { if (!preallocated) { qcow2_free_clusters(bs, offset, s->cluster_size, diff --git a/block/qcow2.c b/block/qcow2.c index 6e5d98dc48..2fe37ed6cb 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1696,7 +1696,7 @@ static int qcow2_make_empty(BlockDriverState *bs) } static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs, - int64_t sector_num, int nb_sectors) + int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) { int ret; BDRVQcowState *s = bs->opaque; diff --git a/block/qed.c b/block/qed.c index 6c0cba04f3..adc2736dd7 100644 --- a/block/qed.c +++ b/block/qed.c @@ -1397,7 +1397,8 @@ static void coroutine_fn qed_co_write_zeroes_cb(void *opaque, int ret) static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs, int64_t sector_num, - int nb_sectors) + int nb_sectors, + BdrvRequestFlags flags) { BlockDriverAIOCB *blockacb; BDRVQEDState *s = bs->opaque; diff --git a/block/raw_bsd.c b/block/raw_bsd.c index 2265dcc03f..fb5181b127 100644 --- a/block/raw_bsd.c +++ b/block/raw_bsd.c @@ -68,9 +68,10 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, } static int coroutine_fn raw_co_write_zeroes(BlockDriverState *bs, - int64_t sector_num, int nb_sectors) + int64_t sector_num, int nb_sectors, + BdrvRequestFlags flags) { - return bdrv_co_write_zeroes(bs->file, sector_num, nb_sectors); + return bdrv_co_write_zeroes(bs->file, sector_num, nb_sectors, flags); } static int coroutine_fn raw_co_discard(BlockDriverState *bs, diff --git a/block/vmdk.c b/block/vmdk.c index a7ebd0f125..6555663a7e 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1419,7 +1419,8 @@ static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num, static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs, int64_t sector_num, - int nb_sectors) + int nb_sectors, + BdrvRequestFlags flags) { int ret; BDRVVmdkState *s = bs->opaque; diff --git a/include/block/block.h b/include/block/block.h index ba2082c0c6..8ba9f0c80f 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -192,7 +192,7 @@ int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num, int bdrv_write(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors); int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, - int nb_sectors); + int nb_sectors, BdrvRequestFlags flags); int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov); int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int count); @@ -214,7 +214,7 @@ int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num, * because it may allocate memory for the entire region. */ int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, int64_t sector_num, - int nb_sectors); + int nb_sectors, BdrvRequestFlags flags); BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, const char *backing_file); int bdrv_get_backing_file_depth(BlockDriverState *bs); diff --git a/include/block/block_int.h b/include/block/block_int.h index 166606615c..d798208aff 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -130,7 +130,7 @@ struct BlockDriver { * instead. */ int coroutine_fn (*bdrv_co_write_zeroes)(BlockDriverState *bs, - int64_t sector_num, int nb_sectors); + int64_t sector_num, int nb_sectors, BdrvRequestFlags flags); int coroutine_fn (*bdrv_co_discard)(BlockDriverState *bs, int64_t sector_num, int nb_sectors); int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs, diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 667f4e4f3a..7e9fecb34f 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -442,7 +442,7 @@ static void coroutine_fn co_write_zeroes_entry(void *opaque) CoWriteZeroes *data = opaque; data->ret = bdrv_co_write_zeroes(data->bs, data->offset / BDRV_SECTOR_SIZE, - data->count / BDRV_SECTOR_SIZE); + data->count / BDRV_SECTOR_SIZE, 0); data->done = true; if (data->ret < 0) { *data->total = data->ret; -- cgit v1.2.3-55-g7522 From d32f35cbc5a87af5e1100dd55074a79fc2cb5307 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Thu, 24 Oct 2013 12:06:52 +0200 Subject: block: introduce BDRV_REQ_MAY_UNMAP request flag Reviewed-by: Eric Blake Signed-off-by: Peter Lieven Signed-off-by: Stefan Hajnoczi --- block-migration.c | 3 ++- block.c | 4 ++++ block/backup.c | 2 +- include/block/block.h | 7 +++++++ 4 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/block-migration.c b/block-migration.c index 713a8e36e1..fc4ef93ea2 100644 --- a/block-migration.c +++ b/block-migration.c @@ -780,7 +780,8 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) } if (flags & BLK_MIG_FLAG_ZERO_BLOCK) { - ret = bdrv_write_zeroes(bs, addr, nr_sectors, 0); + ret = bdrv_write_zeroes(bs, addr, nr_sectors, + BDRV_REQ_MAY_UNMAP); } else { buf = g_malloc(BLOCK_SIZE); qemu_get_buffer(f, buf, BLOCK_SIZE); diff --git a/block.c b/block.c index e14ba48720..d34e974d07 100644 --- a/block.c +++ b/block.c @@ -2818,6 +2818,10 @@ int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, { trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors); + if (!(bs->open_flags & BDRV_O_UNMAP)) { + flags &= ~BDRV_REQ_MAY_UNMAP; + } + return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL, BDRV_REQ_ZERO_WRITE | flags); } diff --git a/block/backup.c b/block/backup.c index 830a179d6d..0198514043 100644 --- a/block/backup.c +++ b/block/backup.c @@ -139,7 +139,7 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs, if (buffer_is_zero(iov.iov_base, iov.iov_len)) { ret = bdrv_co_write_zeroes(job->target, start * BACKUP_SECTORS_PER_CLUSTER, - n, 0); + n, BDRV_REQ_MAY_UNMAP); } else { ret = bdrv_co_writev(job->target, start * BACKUP_SECTORS_PER_CLUSTER, n, diff --git a/include/block/block.h b/include/block/block.h index 8ba9f0c80f..1f30a56fb0 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -65,6 +65,13 @@ typedef struct BlockDevOps { typedef enum { BDRV_REQ_COPY_ON_READ = 0x1, BDRV_REQ_ZERO_WRITE = 0x2, + /* The BDRV_REQ_MAY_UNMAP flag is used to indicate that the block driver + * is allowed to optimize a write zeroes request by unmapping (discarding) + * blocks if it is guaranteed that the result will read back as + * zeroes. The flag is only passed to the driver if the block device is + * opened with BDRV_O_UNMAP. + */ + BDRV_REQ_MAY_UNMAP = 0x4, } BdrvRequestFlags; #define BDRV_O_RDWR 0x0002 -- cgit v1.2.3-55-g7522 From e1a5c4bed44bc23f69f53883c657e31e150cab30 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Thu, 24 Oct 2013 12:06:53 +0200 Subject: block: add logical block provisioning info to BlockDriverInfo Reviewed-by: Eric Blake Signed-off-by: Peter Lieven Signed-off-by: Stefan Hajnoczi --- include/block/block.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/block/block.h b/include/block/block.h index 1f30a56fb0..9c76967104 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -18,6 +18,22 @@ typedef struct BlockDriverInfo { /* offset at which the VM state can be saved (0 if not possible) */ int64_t vm_state_offset; bool is_dirty; + /* + * True if unallocated blocks read back as zeroes. This is equivalent + * to the the LBPRZ flag in the SCSI logical block provisioning page. + */ + bool unallocated_blocks_are_zero; + /* + * True if the driver can optimize writing zeroes by unmapping + * sectors. This is equivalent to the BLKDISCARDZEROES ioctl in Linux + * with the difference that in qemu a discard is allowed to silently + * fail. Therefore we have to use bdrv_write_zeroes with the + * BDRV_REQ_MAY_UNMAP flag for an optimized zero write with unmapping. + * After this call the driver has to guarantee that the contents read + * back as zero. It is additionally required that the block device is + * opened with BDRV_O_UNMAP flag for this to work. + */ + bool can_write_zeroes_with_unmap; } BlockDriverInfo; typedef struct BlockFragInfo { -- cgit v1.2.3-55-g7522 From 4ce786914b745a144a9eda1ea33f3ff98328c527 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Thu, 24 Oct 2013 12:06:54 +0200 Subject: block: add wrappers for logical block provisioning information This adds 2 wrappers to read the unallocated_blocks_are_zero and can_write_zeroes_with_unmap info from the BDI. The wrappers are required to check for the existence of a backing_hd and if the devices are opened with the correct flags. Reviewed-by: Eric Blake Signed-off-by: Peter Lieven Signed-off-by: Stefan Hajnoczi --- block.c | 30 ++++++++++++++++++++++++++++++ include/block/block.h | 2 ++ 2 files changed, 32 insertions(+) (limited to 'include') diff --git a/block.c b/block.c index d34e974d07..37595826a3 100644 --- a/block.c +++ b/block.c @@ -3103,6 +3103,36 @@ int bdrv_has_zero_init(BlockDriverState *bs) return 0; } +bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs) +{ + BlockDriverInfo bdi; + + if (bs->backing_hd) { + return false; + } + + if (bdrv_get_info(bs, &bdi) == 0) { + return bdi.unallocated_blocks_are_zero; + } + + return false; +} + +bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs) +{ + BlockDriverInfo bdi; + + if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) { + return false; + } + + if (bdrv_get_info(bs, &bdi) == 0) { + return bdi.can_write_zeroes_with_unmap; + } + + return false; +} + typedef struct BdrvCoGetBlockStatusData { BlockDriverState *bs; BlockDriverState *base; diff --git a/include/block/block.h b/include/block/block.h index 9c76967104..803c5caceb 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -344,6 +344,8 @@ int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors); int bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors); int bdrv_has_zero_init_1(BlockDriverState *bs); int bdrv_has_zero_init(BlockDriverState *bs); +bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs); +bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs); int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum); int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, -- cgit v1.2.3-55-g7522 From fe81c2cca6dc69a5e423f6d8b235606b7f3ca7b7 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Thu, 24 Oct 2013 12:06:56 +0200 Subject: block: add BlockLimits structure to BlockDriverState this patch adds BlockLimits which introduces discard and write_zeroes limits and alignment information to the BlockDriverState. Signed-off-by: Peter Lieven Signed-off-by: Stefan Hajnoczi --- include/block/block_int.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/block/block_int.h b/include/block/block_int.h index d798208aff..95140b6ccf 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -230,6 +230,20 @@ struct BlockDriver { QLIST_ENTRY(BlockDriver) list; }; +typedef struct BlockLimits { + /* maximum number of sectors that can be discarded at once */ + int max_discard; + + /* optimal alignment for discard requests in sectors */ + int64_t discard_alignment; + + /* maximum number of sectors that can zeroized at once */ + int max_write_zeroes; + + /* optimal alignment for write zeroes requests in sectors */ + int64_t write_zeroes_alignment; +} BlockLimits; + /* * Note: the function bdrv_append() copies and swaps contents of * BlockDriverStates, so if you add new fields to this struct, please @@ -283,6 +297,9 @@ struct BlockDriverState { uint64_t total_time_ns[BDRV_MAX_IOTYPE]; uint64_t wr_highest_sector; + /* I/O Limits */ + BlockLimits bl; + /* Whether the disk can expand beyond total_sectors */ int growable; -- cgit v1.2.3-55-g7522 From d75cbb5e68f5a7d64248ba89399a7f2073231de5 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Thu, 24 Oct 2013 12:07:03 +0200 Subject: block: introduce bdrv_make_zero this patch adds a call to completely zero out a block device. the operation is sped up by checking the block status and only writing zeroes to the device if they currently do not return zeroes. optionally the zero writing can be sped up by setting the flag BDRV_REQ_MAY_UNMAP to emulate the zero write by unmapping if the driver supports it. Signed-off-by: Peter Lieven Signed-off-by: Stefan Hajnoczi --- block.c | 37 +++++++++++++++++++++++++++++++++++++ include/block/block.h | 1 + 2 files changed, 38 insertions(+) (limited to 'include') diff --git a/block.c b/block.c index df5a603686..c390f20042 100644 --- a/block.c +++ b/block.c @@ -2399,6 +2399,43 @@ int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, BDRV_REQ_ZERO_WRITE | flags); } +/* + * Completely zero out a block device with the help of bdrv_write_zeroes. + * The operation is sped up by checking the block status and only writing + * zeroes to the device if they currently do not return zeroes. Optional + * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP). + * + * Returns < 0 on error, 0 on success. For error codes see bdrv_write(). + */ +int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags) +{ + int64_t target_size = bdrv_getlength(bs) / BDRV_SECTOR_SIZE; + int64_t ret, nb_sectors, sector_num = 0; + int n; + + for (;;) { + nb_sectors = target_size - sector_num; + if (nb_sectors <= 0) { + return 0; + } + if (nb_sectors > INT_MAX) { + nb_sectors = INT_MAX; + } + ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n); + if (ret & BDRV_BLOCK_ZERO) { + sector_num += n; + continue; + } + ret = bdrv_write_zeroes(bs, sector_num, n, flags); + if (ret < 0) { + error_report("error writing zeroes at sector %" PRId64 ": %s", + sector_num, strerror(-ret)); + return ret; + } + sector_num += n; + } +} + int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int count1) { diff --git a/include/block/block.h b/include/block/block.h index 803c5caceb..4d9e67c7eb 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -216,6 +216,7 @@ int bdrv_write(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors); int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors, BdrvRequestFlags flags); +int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags); int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov); int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int count); -- cgit v1.2.3-55-g7522 From e4654d2d9406016d6e4e296ba8db3d118caf9ff6 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 13 Nov 2013 18:29:43 +0800 Subject: block: per caller dirty bitmap Previously a BlockDriverState has only one dirty bitmap, so only one caller (e.g. a block job) can keep track of writing. This changes the dirty bitmap to a list and creates a BdrvDirtyBitmap for each caller, the lifecycle is managed with these new functions: bdrv_create_dirty_bitmap bdrv_release_dirty_bitmap Where BdrvDirtyBitmap is a linked list wrapper structure of HBitmap. In place of bdrv_set_dirty_tracking, a BdrvDirtyBitmap pointer argument is added to these functions, since each caller has its own dirty bitmap: bdrv_get_dirty bdrv_dirty_iter_init bdrv_get_dirty_count bdrv_set_dirty and bdrv_reset_dirty prototypes are unchanged but will internally walk the list of all dirty bitmaps and set them one by one. Signed-off-by: Fam Zheng Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block-migration.c | 22 +++++++++---- block.c | 83 ++++++++++++++++++++++++++++------------------- block/mirror.c | 23 +++++++------ block/qapi.c | 8 ----- include/block/block.h | 11 ++++--- include/block/block_int.h | 2 +- 6 files changed, 86 insertions(+), 63 deletions(-) (limited to 'include') diff --git a/block-migration.c b/block-migration.c index fc4ef93ea2..897fdbabb5 100644 --- a/block-migration.c +++ b/block-migration.c @@ -58,6 +58,7 @@ typedef struct BlkMigDevState { /* Protected by block migration lock. */ unsigned long *aio_bitmap; int64_t completed_sectors; + BdrvDirtyBitmap *dirty_bitmap; } BlkMigDevState; typedef struct BlkMigBlock { @@ -309,12 +310,21 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) /* Called with iothread lock taken. */ -static void set_dirty_tracking(int enable) +static void set_dirty_tracking(void) { BlkMigDevState *bmds; QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) { - bdrv_set_dirty_tracking(bmds->bs, enable ? BLOCK_SIZE : 0); + bmds->dirty_bitmap = bdrv_create_dirty_bitmap(bmds->bs, BLOCK_SIZE); + } +} + +static void unset_dirty_tracking(void) +{ + BlkMigDevState *bmds; + + QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) { + bdrv_release_dirty_bitmap(bmds->bs, bmds->dirty_bitmap); } } @@ -432,7 +442,7 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds, } else { blk_mig_unlock(); } - if (bdrv_get_dirty(bmds->bs, sector)) { + if (bdrv_get_dirty(bmds->bs, bmds->dirty_bitmap, sector)) { if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) { nr_sectors = total_sectors - sector; @@ -554,7 +564,7 @@ static int64_t get_remaining_dirty(void) int64_t dirty = 0; QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) { - dirty += bdrv_get_dirty_count(bmds->bs); + dirty += bdrv_get_dirty_count(bmds->bs, bmds->dirty_bitmap); } return dirty << BDRV_SECTOR_BITS; @@ -569,7 +579,7 @@ static void blk_mig_cleanup(void) bdrv_drain_all(); - set_dirty_tracking(0); + unset_dirty_tracking(); blk_mig_lock(); while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) { @@ -604,7 +614,7 @@ static int block_save_setup(QEMUFile *f, void *opaque) init_blk_migration(f); /* start track dirty blocks */ - set_dirty_tracking(1); + set_dirty_tracking(); qemu_mutex_unlock_iothread(); ret = flush_blks(f); diff --git a/block.c b/block.c index e22b55f157..d792d53dd5 100644 --- a/block.c +++ b/block.c @@ -49,6 +49,11 @@ #include #endif +struct BdrvDirtyBitmap { + HBitmap *bitmap; + QLIST_ENTRY(BdrvDirtyBitmap) list; +}; + #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load); @@ -318,6 +323,7 @@ BlockDriverState *bdrv_new(const char *device_name) BlockDriverState *bs; bs = g_malloc0(sizeof(BlockDriverState)); + QLIST_INIT(&bs->dirty_bitmaps); pstrcpy(bs->device_name, sizeof(bs->device_name), device_name); if (device_name[0] != '\0') { QTAILQ_INSERT_TAIL(&bdrv_states, bs, list); @@ -1617,7 +1623,7 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest, bs_dest->iostatus = bs_src->iostatus; /* dirty bitmap */ - bs_dest->dirty_bitmap = bs_src->dirty_bitmap; + bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps; /* reference count */ bs_dest->refcnt = bs_src->refcnt; @@ -1650,7 +1656,7 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) /* bs_new must be anonymous and shouldn't have anything fancy enabled */ assert(bs_new->device_name[0] == '\0'); - assert(bs_new->dirty_bitmap == NULL); + assert(QLIST_EMPTY(&bs_new->dirty_bitmaps)); assert(bs_new->job == NULL); assert(bs_new->dev == NULL); assert(bs_new->in_use == 0); @@ -1711,6 +1717,7 @@ static void bdrv_delete(BlockDriverState *bs) assert(!bs->job); assert(!bs->in_use); assert(!bs->refcnt); + assert(QLIST_EMPTY(&bs->dirty_bitmaps)); bdrv_close(bs); @@ -2858,9 +2865,7 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, ret = bdrv_co_flush(bs); } - if (bs->dirty_bitmap) { - bdrv_set_dirty(bs, sector_num, nb_sectors); - } + bdrv_set_dirty(bs, sector_num, nb_sectors); if (bs->wr_highest_sector < sector_num + nb_sectors - 1) { bs->wr_highest_sector = sector_num + nb_sectors - 1; @@ -3431,7 +3436,7 @@ int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num, if (bdrv_check_request(bs, sector_num, nb_sectors)) return -EIO; - assert(!bs->dirty_bitmap); + assert(QLIST_EMPTY(&bs->dirty_bitmaps)); return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors); } @@ -4296,9 +4301,7 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, return -EROFS; } - if (bs->dirty_bitmap) { - bdrv_reset_dirty(bs, sector_num, nb_sectors); - } + bdrv_reset_dirty(bs, sector_num, nb_sectors); /* Do nothing if disabled. */ if (!(bs->open_flags & BDRV_O_UNMAP)) { @@ -4490,58 +4493,70 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) return true; } -void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity) +BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity) { int64_t bitmap_size; + BdrvDirtyBitmap *bitmap; assert((granularity & (granularity - 1)) == 0); - if (granularity) { - granularity >>= BDRV_SECTOR_BITS; - assert(!bs->dirty_bitmap); - bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS); - bs->dirty_bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1); - } else { - if (bs->dirty_bitmap) { - hbitmap_free(bs->dirty_bitmap); - bs->dirty_bitmap = NULL; + granularity >>= BDRV_SECTOR_BITS; + assert(granularity); + bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS); + bitmap = g_malloc0(sizeof(BdrvDirtyBitmap)); + bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1); + QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list); + return bitmap; +} + +void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) +{ + BdrvDirtyBitmap *bm, *next; + QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) { + if (bm == bitmap) { + QLIST_REMOVE(bitmap, list); + hbitmap_free(bitmap->bitmap); + g_free(bitmap); + return; } } } -int bdrv_get_dirty(BlockDriverState *bs, int64_t sector) +int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector) { - if (bs->dirty_bitmap) { - return hbitmap_get(bs->dirty_bitmap, sector); + if (bitmap) { + return hbitmap_get(bitmap->bitmap, sector); } else { return 0; } } -void bdrv_dirty_iter_init(BlockDriverState *bs, HBitmapIter *hbi) +void bdrv_dirty_iter_init(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, HBitmapIter *hbi) { - hbitmap_iter_init(hbi, bs->dirty_bitmap, 0); + hbitmap_iter_init(hbi, bitmap->bitmap, 0); } void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors) { - hbitmap_set(bs->dirty_bitmap, cur_sector, nr_sectors); + BdrvDirtyBitmap *bitmap; + QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { + hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); + } } -void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, - int nr_sectors) +void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors) { - hbitmap_reset(bs->dirty_bitmap, cur_sector, nr_sectors); + BdrvDirtyBitmap *bitmap; + QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { + hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); + } } -int64_t bdrv_get_dirty_count(BlockDriverState *bs) +int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) { - if (bs->dirty_bitmap) { - return hbitmap_count(bs->dirty_bitmap); - } else { - return 0; - } + return hbitmap_count(bitmap->bitmap); } /* Get a reference to bs */ diff --git a/block/mirror.c b/block/mirror.c index 7b95acf88c..6dc27ad35d 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -39,6 +39,7 @@ typedef struct MirrorBlockJob { int64_t granularity; size_t buf_size; unsigned long *cow_bitmap; + BdrvDirtyBitmap *dirty_bitmap; HBitmapIter hbi; uint8_t *buf; QSIMPLEQ_HEAD(, MirrorBuffer) buf_free; @@ -145,9 +146,10 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s) s->sector_num = hbitmap_iter_next(&s->hbi); if (s->sector_num < 0) { - bdrv_dirty_iter_init(source, &s->hbi); + bdrv_dirty_iter_init(source, s->dirty_bitmap, &s->hbi); s->sector_num = hbitmap_iter_next(&s->hbi); - trace_mirror_restart_iter(s, bdrv_get_dirty_count(source)); + trace_mirror_restart_iter(s, + bdrv_get_dirty_count(source, s->dirty_bitmap)); assert(s->sector_num >= 0); } @@ -183,7 +185,7 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s) do { int added_sectors, added_chunks; - if (!bdrv_get_dirty(source, next_sector) || + if (!bdrv_get_dirty(source, s->dirty_bitmap, next_sector) || test_bit(next_chunk, s->in_flight_bitmap)) { assert(nb_sectors > 0); break; @@ -249,7 +251,8 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s) /* Advance the HBitmapIter in parallel, so that we do not examine * the same sector twice. */ - if (next_sector > hbitmap_next_sector && bdrv_get_dirty(source, next_sector)) { + if (next_sector > hbitmap_next_sector + && bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) { hbitmap_next_sector = hbitmap_iter_next(&s->hbi); } @@ -355,7 +358,7 @@ static void coroutine_fn mirror_run(void *opaque) } } - bdrv_dirty_iter_init(bs, &s->hbi); + bdrv_dirty_iter_init(bs, s->dirty_bitmap, &s->hbi); last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); for (;;) { uint64_t delay_ns; @@ -367,7 +370,7 @@ static void coroutine_fn mirror_run(void *opaque) goto immediate_exit; } - cnt = bdrv_get_dirty_count(bs); + cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap); /* Note that even when no rate limit is applied we need to yield * periodically with no pending I/O so that qemu_aio_flush() returns. @@ -409,7 +412,7 @@ static void coroutine_fn mirror_run(void *opaque) should_complete = s->should_complete || block_job_is_cancelled(&s->common); - cnt = bdrv_get_dirty_count(bs); + cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap); } } @@ -424,7 +427,7 @@ static void coroutine_fn mirror_run(void *opaque) */ trace_mirror_before_drain(s, cnt); bdrv_drain_all(); - cnt = bdrv_get_dirty_count(bs); + cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap); } ret = 0; @@ -471,7 +474,7 @@ immediate_exit: qemu_vfree(s->buf); g_free(s->cow_bitmap); g_free(s->in_flight_bitmap); - bdrv_set_dirty_tracking(bs, 0); + bdrv_release_dirty_bitmap(bs, s->dirty_bitmap); bdrv_iostatus_disable(s->target); if (s->should_complete && ret == 0) { if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) { @@ -575,7 +578,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, s->granularity = granularity; s->buf_size = MAX(buf_size, granularity); - bdrv_set_dirty_tracking(bs, granularity); + s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity); bdrv_set_enable_write_cache(s->target, true); bdrv_set_on_error(s->target, on_target_error, on_target_error); bdrv_iostatus_enable(s->target); diff --git a/block/qapi.c b/block/qapi.c index 5880b3e42b..6b0cdcfa95 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -204,14 +204,6 @@ void bdrv_query_info(BlockDriverState *bs, info->io_status = bs->iostatus; } - if (bs->dirty_bitmap) { - info->has_dirty = true; - info->dirty = g_malloc0(sizeof(*info->dirty)); - info->dirty->count = bdrv_get_dirty_count(bs) * BDRV_SECTOR_SIZE; - info->dirty->granularity = - ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bs->dirty_bitmap)); - } - if (bs->drv) { info->has_inserted = true; info->inserted = g_malloc0(sizeof(*info->inserted)); diff --git a/include/block/block.h b/include/block/block.h index 4d9e67c7eb..33ae9a9b5a 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -419,12 +419,15 @@ void *qemu_blockalign(BlockDriverState *bs, size_t size); bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov); struct HBitmapIter; -void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity); -int bdrv_get_dirty(BlockDriverState *bs, int64_t sector); +typedef struct BdrvDirtyBitmap BdrvDirtyBitmap; +BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity); +void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap); +int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector); void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors); void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors); -void bdrv_dirty_iter_init(BlockDriverState *bs, struct HBitmapIter *hbi); -int64_t bdrv_get_dirty_count(BlockDriverState *bs); +void bdrv_dirty_iter_init(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi); +int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap); void bdrv_enable_copy_on_read(BlockDriverState *bs); void bdrv_disable_copy_on_read(BlockDriverState *bs); diff --git a/include/block/block_int.h b/include/block/block_int.h index 95140b6ccf..3c5e9cbfc0 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -318,7 +318,7 @@ struct BlockDriverState { bool iostatus_enabled; BlockDeviceIoStatus iostatus; char device_name[32]; - HBitmap *dirty_bitmap; + QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps; int refcnt; int in_use; /* users other than guest access, eg. block migration */ QTAILQ_ENTRY(BlockDriverState) list; -- cgit v1.2.3-55-g7522 From 21b568350861a2c6eedfef95b70f88faf2fbd32a Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 13 Nov 2013 18:29:44 +0800 Subject: qapi: Change BlockDirtyInfo to list We have multiple dirty bitmaps in BDS now, switch QAPI to allow query it (BlockInfo.dirty_bitmaps), and also drop old BlockInfo.dirty. Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- block.c | 20 ++++++++++++++++++++ block/qapi.c | 5 +++++ include/block/block.h | 1 + qapi-schema.json | 6 +++--- 4 files changed, 29 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/block.c b/block.c index d792d53dd5..765bbae41a 100644 --- a/block.c +++ b/block.c @@ -4522,6 +4522,26 @@ void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) } } +BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs) +{ + BdrvDirtyBitmap *bm; + BlockDirtyInfoList *list = NULL; + BlockDirtyInfoList **plist = &list; + + QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { + BlockDirtyInfo *info = g_malloc0(sizeof(BlockDirtyInfo)); + BlockDirtyInfoList *entry = g_malloc0(sizeof(BlockDirtyInfoList)); + info->count = bdrv_get_dirty_count(bs, bm); + info->granularity = + ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bm->bitmap)); + entry->value = info; + *plist = entry; + plist = &entry->next; + } + + return list; +} + int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector) { if (bitmap) { diff --git a/block/qapi.c b/block/qapi.c index 6b0cdcfa95..a32cb79db8 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -204,6 +204,11 @@ void bdrv_query_info(BlockDriverState *bs, info->io_status = bs->iostatus; } + if (!QLIST_EMPTY(&bs->dirty_bitmaps)) { + info->has_dirty_bitmaps = true; + info->dirty_bitmaps = bdrv_query_dirty_bitmaps(bs); + } + if (bs->drv) { info->has_inserted = true; info->inserted = g_malloc0(sizeof(*info->inserted)); diff --git a/include/block/block.h b/include/block/block.h index 33ae9a9b5a..b6bdae8dcc 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -422,6 +422,7 @@ struct HBitmapIter; typedef struct BdrvDirtyBitmap BdrvDirtyBitmap; BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity); void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap); +BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs); int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector); void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors); void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors); diff --git a/qapi-schema.json b/qapi-schema.json index 83fa4852ce..8630eb50ea 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -948,8 +948,8 @@ # @tray_open: #optional True if the device has a tray and it is open # (only present if removable is true) # -# @dirty: #optional dirty bitmap information (only present if the dirty -# bitmap is enabled) +# @dirty-bitmaps: #optional dirty bitmaps information (only present if the +# driver has one or more dirty bitmaps) (Since 1.8) # # @io-status: #optional @BlockDeviceIoStatus. Only present if the device # supports it and the VM is configured to stop on errors @@ -963,7 +963,7 @@ 'data': {'device': 'str', 'type': 'str', 'removable': 'bool', 'locked': 'bool', '*inserted': 'BlockDeviceInfo', '*tray_open': 'bool', '*io-status': 'BlockDeviceIoStatus', - '*dirty': 'BlockDirtyInfo' } } + '*dirty-bitmaps': ['BlockDirtyInfo'] } } ## # @query-block: -- cgit v1.2.3-55-g7522 From b3af018f3babfe7a0328759a86c00a6a6b4f6443 Mon Sep 17 00:00:00 2001 From: Liu Yuan Date: Thu, 7 Nov 2013 22:56:38 +0800 Subject: sheepdog: support user-defined redundancy option Sheepdog support two kinds of redundancy, full replication and erasure coding. # create a fully replicated vdi with x copies -o redundancy=x (1 <= x <= SD_MAX_COPIES) # create a erasure coded vdi with x data strips and y parity strips -o redundancy=x:y (x must be one of {2,4,8,16} and 1 <= y < SD_EC_MAX_STRIP) E.g, to convert a vdi into sheepdog vdi 'test' with 8:3 erasure coding scheme $ qemu-img convert -o redundancy=8:3 linux-0.2.img sheepdog:test Cc: Kevin Wolf Cc: Stefan Hajnoczi Signed-off-by: Liu Yuan Signed-off-by: Stefan Hajnoczi --- block/sheepdog.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++- include/block/block_int.h | 1 + 2 files changed, 74 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/block/sheepdog.c b/block/sheepdog.c index ae436754f8..b4ae50f44d 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -91,6 +91,14 @@ #define SD_NR_VDIS (1U << 24) #define SD_DATA_OBJ_SIZE (UINT64_C(1) << 22) #define SD_MAX_VDI_SIZE (SD_DATA_OBJ_SIZE * MAX_DATA_OBJS) +/* + * For erasure coding, we use at most SD_EC_MAX_STRIP for data strips and + * (SD_EC_MAX_STRIP - 1) for parity strips + * + * SD_MAX_COPIES is sum of number of data strips and parity strips. + */ +#define SD_EC_MAX_STRIP 16 +#define SD_MAX_COPIES (SD_EC_MAX_STRIP * 2 - 1) #define SD_INODE_SIZE (sizeof(SheepdogInode)) #define CURRENT_VDI_ID 0 @@ -1495,6 +1503,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot) hdr.data_length = wlen; hdr.vdi_size = s->inode.vdi_size; hdr.copy_policy = s->inode.copy_policy; + hdr.copies = s->inode.nr_copies; ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen); @@ -1562,6 +1571,60 @@ out: return ret; } +/* + * Sheepdog support two kinds of redundancy, full replication and erasure + * coding. + * + * # create a fully replicated vdi with x copies + * -o redundancy=x (1 <= x <= SD_MAX_COPIES) + * + * # create a erasure coded vdi with x data strips and y parity strips + * -o redundancy=x:y (x must be one of {2,4,8,16} and 1 <= y < SD_EC_MAX_STRIP) + */ +static int parse_redundancy(BDRVSheepdogState *s, const char *opt) +{ + struct SheepdogInode *inode = &s->inode; + const char *n1, *n2; + long copy, parity; + char p[10]; + + pstrcpy(p, sizeof(p), opt); + n1 = strtok(p, ":"); + n2 = strtok(NULL, ":"); + + if (!n1) { + return -EINVAL; + } + + copy = strtol(n1, NULL, 10); + if (copy > SD_MAX_COPIES || copy < 1) { + return -EINVAL; + } + if (!n2) { + inode->copy_policy = 0; + inode->nr_copies = copy; + return 0; + } + + if (copy != 2 && copy != 4 && copy != 8 && copy != 16) { + return -EINVAL; + } + + parity = strtol(n2, NULL, 10); + if (parity >= SD_EC_MAX_STRIP || parity < 1) { + return -EINVAL; + } + + /* + * 4 bits for parity and 4 bits for data. + * We have to compress upper data bits because it can't represent 16 + */ + inode->copy_policy = ((copy / 2) << 4) + parity; + inode->nr_copies = copy + parity; + + return 0; +} + static int sd_create(const char *filename, QEMUOptionParameter *options, Error **errp) { @@ -1602,6 +1665,11 @@ static int sd_create(const char *filename, QEMUOptionParameter *options, ret = -EINVAL; goto out; } + } else if (!strcmp(options->name, BLOCK_OPT_REDUNDANCY)) { + ret = parse_redundancy(s, options->value.s); + if (ret < 0) { + goto out; + } } options++; } @@ -1644,7 +1712,6 @@ static int sd_create(const char *filename, QEMUOptionParameter *options, bdrv_unref(bs); } - /* TODO: allow users to specify copy number */ ret = do_sd_create(s, &vid, 0); if (!prealloc || ret) { goto out; @@ -2432,6 +2499,11 @@ static QEMUOptionParameter sd_create_options[] = { .type = OPT_STRING, .help = "Preallocation mode (allowed values: off, full)" }, + { + .name = BLOCK_OPT_REDUNDANCY, + .type = OPT_STRING, + .help = "Redundancy of the image" + }, { NULL } }; diff --git a/include/block/block_int.h b/include/block/block_int.h index 3c5e9cbfc0..d0f70c4596 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -53,6 +53,7 @@ #define BLOCK_OPT_COMPAT_LEVEL "compat" #define BLOCK_OPT_LAZY_REFCOUNTS "lazy_refcounts" #define BLOCK_OPT_ADAPTER_TYPE "adapter_type" +#define BLOCK_OPT_REDUNDANCY "redundancy" typedef struct BdrvTrackedRequest { BlockDriverState *bs; -- cgit v1.2.3-55-g7522 From 4cc70e933731ebf4309e1f1ce90973a0de04f28f Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 20 Nov 2013 10:01:54 +0800 Subject: blkdebug: add "remove_break" command This adds "remove_break" command which is the reverse of blkdebug command "break": it removes all breakpoints with given tag and resumes all the requests. Signed-off-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- block.c | 13 +++++++++++++ block/blkdebug.c | 27 +++++++++++++++++++++++++++ include/block/block.h | 1 + include/block/block_int.h | 2 ++ qemu-io-cmds.c | 22 ++++++++++++++++++++++ 5 files changed, 65 insertions(+) (limited to 'include') diff --git a/block.c b/block.c index 765bbae41a..faa52d4750 100644 --- a/block.c +++ b/block.c @@ -3525,6 +3525,19 @@ int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, return -ENOTSUP; } +int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag) +{ + while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) { + bs = bs->file; + } + + if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) { + return bs->drv->bdrv_debug_remove_breakpoint(bs, tag); + } + + return -ENOTSUP; +} + int bdrv_debug_resume(BlockDriverState *bs, const char *tag) { while (bs && bs->drv && !bs->drv->bdrv_debug_resume) { diff --git a/block/blkdebug.c b/block/blkdebug.c index 16d2b91ac9..37cf028545 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -605,6 +605,31 @@ static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag) return -ENOENT; } +static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs, + const char *tag) +{ + BDRVBlkdebugState *s = bs->opaque; + BlkdebugSuspendedReq *r; + BlkdebugRule *rule, *next; + int i, ret = -ENOENT; + + for (i = 0; i < BLKDBG_EVENT_MAX; i++) { + QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) { + if (rule->action == ACTION_SUSPEND && + !strcmp(rule->options.suspend.tag, tag)) { + remove_rule(rule); + ret = 0; + } + } + } + QLIST_FOREACH(r, &s->suspended_reqs, next) { + if (!strcmp(r->tag, tag)) { + qemu_coroutine_enter(r->co, NULL); + ret = 0; + } + } + return ret; +} static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag) { @@ -639,6 +664,8 @@ static BlockDriver bdrv_blkdebug = { .bdrv_debug_event = blkdebug_debug_event, .bdrv_debug_breakpoint = blkdebug_debug_breakpoint, + .bdrv_debug_remove_breakpoint + = blkdebug_debug_remove_breakpoint, .bdrv_debug_resume = blkdebug_debug_resume, .bdrv_debug_is_suspended = blkdebug_debug_is_suspended, }; diff --git a/include/block/block.h b/include/block/block.h index b6bdae8dcc..5beccbf1cf 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -519,6 +519,7 @@ void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event); int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, const char *tag); +int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag); int bdrv_debug_resume(BlockDriverState *bs, const char *tag); bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag); diff --git a/include/block/block_int.h b/include/block/block_int.h index d0f70c4596..773899b500 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -219,6 +219,8 @@ struct BlockDriver { /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */ int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event, const char *tag); + int (*bdrv_debug_remove_breakpoint)(BlockDriverState *bs, + const char *tag); int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag); bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag); diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 7e9fecb34f..85e4982bd8 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -1956,6 +1956,18 @@ static int break_f(BlockDriverState *bs, int argc, char **argv) return 0; } +static int remove_break_f(BlockDriverState *bs, int argc, char **argv) +{ + int ret; + + ret = bdrv_debug_remove_breakpoint(bs, argv[1]); + if (ret < 0) { + printf("Could not remove breakpoint %s: %s\n", argv[1], strerror(-ret)); + } + + return 0; +} + static const cmdinfo_t break_cmd = { .name = "break", .argmin = 2, @@ -1966,6 +1978,15 @@ static const cmdinfo_t break_cmd = { "request as tag", }; +static const cmdinfo_t remove_break_cmd = { + .name = "remove_break", + .argmin = 1, + .argmax = 1, + .cfunc = remove_break_f, + .args = "tag", + .oneline = "remove a breakpoint by tag", +}; + static int resume_f(BlockDriverState *bs, int argc, char **argv) { int ret; @@ -2126,6 +2147,7 @@ static void __attribute((constructor)) init_qemuio_commands(void) qemuio_add_command(&alloc_cmd); qemuio_add_command(&map_cmd); qemuio_add_command(&break_cmd); + qemuio_add_command(&remove_break_cmd); qemuio_add_command(&resume_cmd); qemuio_add_command(&wait_break_cmd); qemuio_add_command(&abort_cmd); -- cgit v1.2.3-55-g7522