From ebde595ce63369921dbbe1bd16fec0b230050d67 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Mon, 9 Nov 2015 18:16:46 +0800 Subject: block: Add more types for tracked request We'll track more request types besides read and write, change the boolean field to an enum. Signed-off-by: Fam Zheng Reviewed-by: Kevin Wolf Message-id: 1447064214-29930-2-git-send-email-famz@redhat.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- include/block/block_int.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/block/block_int.h b/include/block/block_int.h index 603145a21d..ac034069ff 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -60,11 +60,19 @@ #define BLOCK_PROBE_BUF_SIZE 512 +enum BdrvTrackedRequestType { + BDRV_TRACKED_READ, + BDRV_TRACKED_WRITE, + BDRV_TRACKED_FLUSH, + BDRV_TRACKED_IOCTL, + BDRV_TRACKED_DISCARD, +}; + typedef struct BdrvTrackedRequest { BlockDriverState *bs; int64_t offset; unsigned int bytes; - bool is_write; + enum BdrvTrackedRequestType type; bool serialising; int64_t overlap_offset; -- cgit v1.2.3-55-g7522 From 8b45f6878d291646cadc4786ae807e6a42c188b4 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Mon, 9 Nov 2015 18:16:50 +0800 Subject: block: Add ioctl parameter fields to BlockRequest The two fields that will be used by ioctl handling code later are added as union, because it's used exclusively by ioctl code which dosn't need the four fields in the other struct of the union. Signed-off-by: Fam Zheng Reviewed-by: Kevin Wolf Message-id: 1447064214-29930-6-git-send-email-famz@redhat.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- include/block/block.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/block/block.h b/include/block/block.h index 610db923d5..c8b40b739f 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -335,10 +335,18 @@ void bdrv_aio_cancel_async(BlockAIOCB *acb); typedef struct BlockRequest { /* Fields to be filled by multiwrite caller */ - int64_t sector; - int nb_sectors; - int flags; - QEMUIOVector *qiov; + union { + struct { + int64_t sector; + int nb_sectors; + int flags; + QEMUIOVector *qiov; + }; + struct { + int req; + void *buf; + }; + }; BlockCompletionFunc *cb; void *opaque; -- cgit v1.2.3-55-g7522 From 83c98d7b924eab36a0a2c7813731dbef439a91d3 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Mon, 9 Nov 2015 18:16:52 +0800 Subject: block: Drop BlockDriver.bdrv_ioctl Now the callback is not used any more, drop the field along with all implementations in block drivers, which are iscsi and raw. Signed-off-by: Fam Zheng Reviewed-by: Kevin Wolf Message-id: 1447064214-29930-8-git-send-email-famz@redhat.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/iscsi.c | 33 --------------------------------- block/raw-posix.c | 8 -------- block/raw_bsd.c | 6 ------ include/block/block_int.h | 1 - 4 files changed, 48 deletions(-) (limited to 'include') diff --git a/block/iscsi.c b/block/iscsi.c index 46290e0c43..bd1f1bfcd1 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -898,38 +898,6 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs, return &acb->common; } -static void ioctl_cb(void *opaque, int status) -{ - int *p_status = opaque; - *p_status = status; -} - -static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) -{ - IscsiLun *iscsilun = bs->opaque; - int status; - - switch (req) { - case SG_GET_VERSION_NUM: - *(int *)buf = 30000; - break; - case SG_GET_SCSI_ID: - ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type; - break; - case SG_IO: - status = -EINPROGRESS; - iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status); - - while (status == -EINPROGRESS) { - aio_poll(iscsilun->aio_context, true); - } - - return 0; - default: - return -1; - } - return 0; -} #endif static int64_t @@ -1860,7 +1828,6 @@ static BlockDriver bdrv_iscsi = { .bdrv_co_flush_to_disk = iscsi_co_flush, #ifdef __linux__ - .bdrv_ioctl = iscsi_ioctl, .bdrv_aio_ioctl = iscsi_aio_ioctl, #endif diff --git a/block/raw-posix.c b/block/raw-posix.c index 918c756c2e..aec9ec6bbb 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -2175,12 +2175,6 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags, } #if defined(__linux__) -static int hdev_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) -{ - BDRVRawState *s = bs->opaque; - - return ioctl(s->fd, req, buf); -} static BlockAIOCB *hdev_aio_ioctl(BlockDriverState *bs, unsigned long int req, void *buf, @@ -2338,7 +2332,6 @@ static BlockDriver bdrv_host_device = { /* generic scsi device */ #ifdef __linux__ - .bdrv_ioctl = hdev_ioctl, .bdrv_aio_ioctl = hdev_aio_ioctl, #endif }; @@ -2471,7 +2464,6 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_lock_medium = cdrom_lock_medium, /* generic scsi device */ - .bdrv_ioctl = hdev_ioctl, .bdrv_aio_ioctl = hdev_aio_ioctl, }; #endif /* __linux__ */ diff --git a/block/raw_bsd.c b/block/raw_bsd.c index 0aded31c22..915d6fd0e6 100644 --- a/block/raw_bsd.c +++ b/block/raw_bsd.c @@ -169,11 +169,6 @@ static void raw_lock_medium(BlockDriverState *bs, bool locked) bdrv_lock_medium(bs->file->bs, locked); } -static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) -{ - return bdrv_ioctl(bs->file->bs, req, buf); -} - static BlockAIOCB *raw_aio_ioctl(BlockDriverState *bs, unsigned long int req, void *buf, BlockCompletionFunc *cb, @@ -262,7 +257,6 @@ BlockDriver bdrv_raw = { .bdrv_media_changed = &raw_media_changed, .bdrv_eject = &raw_eject, .bdrv_lock_medium = &raw_lock_medium, - .bdrv_ioctl = &raw_ioctl, .bdrv_aio_ioctl = &raw_aio_ioctl, .create_opts = &raw_create_opts, .bdrv_has_zero_init = &raw_has_zero_init diff --git a/include/block/block_int.h b/include/block/block_int.h index ac034069ff..84991d9973 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -227,7 +227,6 @@ struct BlockDriver { void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked); /* to control generic scsi devices */ - int (*bdrv_ioctl)(BlockDriverState *bs, unsigned long int req, void *buf); BlockAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs, unsigned long int req, void *buf, BlockCompletionFunc *cb, void *opaque); -- cgit v1.2.3-55-g7522 From 67da1dc5ce696c7b309b1db100da7d94292847b7 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Mon, 9 Nov 2015 18:16:53 +0800 Subject: block: Introduce BlockDriver.bdrv_drain callback Drivers can have internal request sources that generate IO, like the need_check_timer in QED. Since we want quiesced periods that contain nested event loops in block layer, we need to have a way to disable such event sources. Block drivers must implement the "bdrv_drain" callback if it has any internal sources that can generate I/O activity, like a timer or a worker thread (even in a library) that can schedule QEMUBH in an asynchronous callback. Update the comments of bdrv_drain and bdrv_drained_begin accordingly. Like bdrv_requests_pending(), we should consider all the children of bs. Before, the while loop just works, as bdrv_requests_pending() already tracks its children; now we mustn't miss the callback, so recurse down explicitly. Signed-off-by: Fam Zheng Reviewed-by: Paolo Bonzini Message-id: 1447064214-29930-9-git-send-email-famz@redhat.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/io.c | 16 +++++++++++++++- include/block/block_int.h | 6 ++++++ 2 files changed, 21 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/block/io.c b/block/io.c index 4ecb17134b..adc1eabef0 100644 --- a/block/io.c +++ b/block/io.c @@ -237,8 +237,21 @@ bool bdrv_requests_pending(BlockDriverState *bs) return false; } +static void bdrv_drain_recurse(BlockDriverState *bs) +{ + BdrvChild *child; + + if (bs->drv && bs->drv->bdrv_drain) { + bs->drv->bdrv_drain(bs); + } + QLIST_FOREACH(child, &bs->children, next) { + bdrv_drain_recurse(child->bs); + } +} + /* - * Wait for pending requests to complete on a single BlockDriverState subtree + * Wait for pending requests to complete on a single BlockDriverState subtree, + * and suspend block driver's internal I/O until next request arrives. * * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState * AioContext. @@ -251,6 +264,7 @@ void bdrv_drain(BlockDriverState *bs) { bool busy = true; + bdrv_drain_recurse(bs); while (busy) { /* Keep iterating */ bdrv_flush_io_queue(bs); diff --git a/include/block/block_int.h b/include/block/block_int.h index 84991d9973..3a24913a7e 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -295,6 +295,12 @@ struct BlockDriver { */ int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo); + /** + * Drain and stop any internal sources of requests in the driver, and + * remain so until next I/O callback (e.g. bdrv_co_writev) is called. + */ + void (*bdrv_drain)(BlockDriverState *bs); + QLIST_ENTRY(BlockDriver) list; }; -- cgit v1.2.3-55-g7522 From df9a681dc9ad41c9cdeb9ecc5d060ba9abd27e01 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Mon, 9 Nov 2015 18:16:54 +0800 Subject: qed: Implement .bdrv_drain The "need_check_timer" is used to clear the "NEED_CHECK" flag in the image header after a grace period once metadata update has finished. In compliance to the bdrv_drain semantics we should make sure it remains deleted once .bdrv_drain is called. We cannot reuse qed_need_check_timer_cb because here it doesn't satisfy the assertion. Do the "plug" and "flush" calls manually. Signed-off-by: Fam Zheng Reviewed-by: Kevin Wolf Message-id: 1447064214-29930-10-git-send-email-famz@redhat.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block.c | 19 +++++++- block/qed.c | 13 ++++++ blockdev.c | 114 +++++++++++++++++++++++++++++++++++++++++++++- docs/bitmaps.md | 6 +-- include/block/block.h | 1 - include/block/block_int.h | 3 ++ qapi-schema.json | 6 ++- 7 files changed, 152 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/block.c b/block.c index 867520a3fd..3a7324bb05 100644 --- a/block.c +++ b/block.c @@ -3404,10 +3404,25 @@ void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); } -void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap) +void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out) { assert(bdrv_dirty_bitmap_enabled(bitmap)); - hbitmap_reset_all(bitmap->bitmap); + if (!out) { + hbitmap_reset_all(bitmap->bitmap); + } else { + HBitmap *backup = bitmap->bitmap; + bitmap->bitmap = hbitmap_alloc(bitmap->size, + hbitmap_granularity(backup)); + *out = backup; + } +} + +void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in) +{ + HBitmap *tmp = bitmap->bitmap; + assert(bdrv_dirty_bitmap_enabled(bitmap)); + bitmap->bitmap = in; + hbitmap_free(tmp); } void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, diff --git a/block/qed.c b/block/qed.c index 5ea05d4909..9b88895038 100644 --- a/block/qed.c +++ b/block/qed.c @@ -375,6 +375,18 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs, } } +static void bdrv_qed_drain(BlockDriverState *bs) +{ + BDRVQEDState *s = bs->opaque; + + /* Cancel timer and start doing I/O that were meant to happen as if it + * fired, that way we get bdrv_drain() taking care of the ongoing requests + * correctly. */ + qed_cancel_need_check_timer(s); + qed_plug_allocating_write_reqs(s); + bdrv_aio_flush(s->bs, qed_clear_need_check, s); +} + static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { @@ -1676,6 +1688,7 @@ static BlockDriver bdrv_qed = { .bdrv_check = bdrv_qed_check, .bdrv_detach_aio_context = bdrv_qed_detach_aio_context, .bdrv_attach_aio_context = bdrv_qed_attach_aio_context, + .bdrv_drain = bdrv_qed_drain, }; static void bdrv_qed_init(void) diff --git a/blockdev.c b/blockdev.c index 8607df90a9..7739df587f 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1874,6 +1874,106 @@ static void blockdev_backup_clean(BlkTransactionState *common) } } +typedef struct BlockDirtyBitmapState { + BlkTransactionState common; + BdrvDirtyBitmap *bitmap; + BlockDriverState *bs; + AioContext *aio_context; + HBitmap *backup; + bool prepared; +} BlockDirtyBitmapState; + +static void block_dirty_bitmap_add_prepare(BlkTransactionState *common, + Error **errp) +{ + Error *local_err = NULL; + BlockDirtyBitmapAdd *action; + BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, + common, common); + + action = common->action->block_dirty_bitmap_add; + /* AIO context taken and released within qmp_block_dirty_bitmap_add */ + qmp_block_dirty_bitmap_add(action->node, action->name, + action->has_granularity, action->granularity, + &local_err); + + if (!local_err) { + state->prepared = true; + } else { + error_propagate(errp, local_err); + } +} + +static void block_dirty_bitmap_add_abort(BlkTransactionState *common) +{ + BlockDirtyBitmapAdd *action; + BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, + common, common); + + action = common->action->block_dirty_bitmap_add; + /* Should not be able to fail: IF the bitmap was added via .prepare(), + * then the node reference and bitmap name must have been valid. + */ + if (state->prepared) { + qmp_block_dirty_bitmap_remove(action->node, action->name, &error_abort); + } +} + +static void block_dirty_bitmap_clear_prepare(BlkTransactionState *common, + Error **errp) +{ + BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, + common, common); + BlockDirtyBitmap *action; + + action = common->action->block_dirty_bitmap_clear; + state->bitmap = block_dirty_bitmap_lookup(action->node, + action->name, + &state->bs, + &state->aio_context, + errp); + if (!state->bitmap) { + return; + } + + if (bdrv_dirty_bitmap_frozen(state->bitmap)) { + error_setg(errp, "Cannot modify a frozen bitmap"); + return; + } else if (!bdrv_dirty_bitmap_enabled(state->bitmap)) { + error_setg(errp, "Cannot clear a disabled bitmap"); + return; + } + + bdrv_clear_dirty_bitmap(state->bitmap, &state->backup); + /* AioContext is released in .clean() */ +} + +static void block_dirty_bitmap_clear_abort(BlkTransactionState *common) +{ + BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, + common, common); + + bdrv_undo_clear_dirty_bitmap(state->bitmap, state->backup); +} + +static void block_dirty_bitmap_clear_commit(BlkTransactionState *common) +{ + BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, + common, common); + + hbitmap_free(state->backup); +} + +static void block_dirty_bitmap_clear_clean(BlkTransactionState *common) +{ + BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, + common, common); + + if (state->aio_context) { + aio_context_release(state->aio_context); + } +} + static void abort_prepare(BlkTransactionState *common, Error **errp) { error_setg(errp, "Transaction aborted using Abort action"); @@ -1921,6 +2021,18 @@ static const BdrvActionOps actions[] = { .abort = internal_snapshot_abort, .clean = internal_snapshot_clean, }, + [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ADD] = { + .instance_size = sizeof(BlockDirtyBitmapState), + .prepare = block_dirty_bitmap_add_prepare, + .abort = block_dirty_bitmap_add_abort, + }, + [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_CLEAR] = { + .instance_size = sizeof(BlockDirtyBitmapState), + .prepare = block_dirty_bitmap_clear_prepare, + .commit = block_dirty_bitmap_clear_commit, + .abort = block_dirty_bitmap_clear_abort, + .clean = block_dirty_bitmap_clear_clean, + } }; /* @@ -2472,7 +2584,7 @@ void qmp_block_dirty_bitmap_clear(const char *node, const char *name, goto out; } - bdrv_clear_dirty_bitmap(bitmap); + bdrv_clear_dirty_bitmap(bitmap, NULL); out: aio_context_release(aio_context); diff --git a/docs/bitmaps.md b/docs/bitmaps.md index fa87f077fe..9fd8ea65ea 100644 --- a/docs/bitmaps.md +++ b/docs/bitmaps.md @@ -97,11 +97,7 @@ which is included at the end of this document. } ``` -## Transactions (Not yet implemented) - -* Transactional commands are forthcoming in a future version, - and are not yet available for use. This section serves as - documentation of intent for their design and usage. +## Transactions ### Justification diff --git a/include/block/block.h b/include/block/block.h index c8b40b739f..92f6f6a95a 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -501,7 +501,6 @@ void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, int64_t cur_sector, int nr_sectors); void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, int64_t cur_sector, int nr_sectors); -void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap); void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi); void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset); int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap); diff --git a/include/block/block_int.h b/include/block/block_int.h index 3a24913a7e..695393e6e6 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -693,4 +693,7 @@ void blk_dev_resize_cb(BlockBackend *blk); void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors); bool bdrv_requests_pending(BlockDriverState *bs); +void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out); +void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in); + #endif /* BLOCK_INT_H */ diff --git a/qapi-schema.json b/qapi-schema.json index c3f95ab170..b60c3729a7 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1546,6 +1546,8 @@ # blockdev-snapshot-internal-sync since 1.7 # blockdev-backup since 2.3 # blockdev-snapshot since 2.5 +# block-dirty-bitmap-add since 2.5 +# block-dirty-bitmap-clear since 2.5 ## { 'union': 'TransactionAction', 'data': { @@ -1554,7 +1556,9 @@ 'drive-backup': 'DriveBackup', 'blockdev-backup': 'BlockdevBackup', 'abort': 'Abort', - 'blockdev-snapshot-internal-sync': 'BlockdevSnapshotInternal' + 'blockdev-snapshot-internal-sync': 'BlockdevSnapshotInternal', + 'block-dirty-bitmap-add': 'BlockDirtyBitmapAdd', + 'block-dirty-bitmap-clear': 'BlockDirtyBitmap' } } ## -- cgit v1.2.3-55-g7522 From 18930ba3d17866fff6df52ae6d2e54ce5c5ca04b Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 5 Nov 2015 18:13:11 -0500 Subject: blockjob: Introduce reference count and fix reference to job->bs Add reference count to block job, meanwhile move the ownership of the reference to job->bs from the caller (which is released in two completion callbacks) to the block job itself. It is necessary for block_job_complete_sync to work, because block job shouldn't live longer than its bs, as asserted in bdrv_delete. Now block_job_complete_sync can be simplified. Signed-off-by: Fam Zheng Signed-off-by: John Snow Message-id: 1446765200-3054-6-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/mirror.c | 2 +- blockdev.c | 28 ---------------------------- blockjob.c | 25 ++++++++++++++++--------- include/block/blockjob.h | 18 +++++++++++++++--- qemu-img.c | 3 --- 5 files changed, 32 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/block/mirror.c b/block/mirror.c index 60f1cb589d..52c9abfe14 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -742,7 +742,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target, s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp); if (!s->dirty_bitmap) { g_free(s->replaces); - block_job_release(bs); + block_job_unref(&s->common); return; } diff --git a/blockdev.c b/blockdev.c index 209dbd51ca..944525806e 100644 --- a/blockdev.c +++ b/blockdev.c @@ -283,32 +283,6 @@ typedef struct { BlockDriverState *bs; } BDRVPutRefBH; -static void bdrv_put_ref_bh(void *opaque) -{ - BDRVPutRefBH *s = opaque; - - bdrv_unref(s->bs); - qemu_bh_delete(s->bh); - g_free(s); -} - -/* - * Release a BDS reference in a BH - * - * It is not safe to use bdrv_unref() from a callback function when the callers - * still need the BlockDriverState. In such cases we schedule a BH to release - * the reference. - */ -static void bdrv_put_ref_bh_schedule(BlockDriverState *bs) -{ - BDRVPutRefBH *s; - - s = g_new(BDRVPutRefBH, 1); - s->bh = qemu_bh_new(bdrv_put_ref_bh, s); - s->bs = bs; - qemu_bh_schedule(s->bh); -} - static int parse_block_error_action(const char *buf, bool is_read, Error **errp) { if (!strcmp(buf, "ignore")) { @@ -2741,8 +2715,6 @@ static void block_job_cb(void *opaque, int ret) } else { block_job_event_completed(bs->job, msg); } - - bdrv_put_ref_bh_schedule(bs); } void qmp_block_stream(const char *device, diff --git a/blockjob.c b/blockjob.c index c02fe598b8..ae9c5b2925 100644 --- a/blockjob.c +++ b/blockjob.c @@ -60,6 +60,7 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs, job->cb = cb; job->opaque = opaque; job->busy = true; + job->refcnt = 1; bs->job = job; /* Only set speed when necessary to avoid NotSupported error */ @@ -68,7 +69,7 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs, block_job_set_speed(job, speed, &local_err); if (local_err) { - block_job_release(bs); + block_job_unref(job); error_propagate(errp, local_err); return NULL; } @@ -76,15 +77,21 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs, return job; } -void block_job_release(BlockDriverState *bs) +void block_job_ref(BlockJob *job) { - BlockJob *job = bs->job; + ++job->refcnt; +} - bs->job = NULL; - bdrv_op_unblock_all(bs, job->blocker); - error_free(job->blocker); - g_free(job->id); - g_free(job); +void block_job_unref(BlockJob *job) +{ + if (--job->refcnt == 0) { + job->bs->job = NULL; + bdrv_op_unblock_all(job->bs, job->blocker); + bdrv_unref(job->bs); + error_free(job->blocker); + g_free(job->id); + g_free(job); + } } void block_job_completed(BlockJob *job, int ret) @@ -93,7 +100,7 @@ void block_job_completed(BlockJob *job, int ret) assert(bs->job == job); job->cb(job->opaque, ret); - block_job_release(bs); + block_job_unref(job); } void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) diff --git a/include/block/blockjob.h b/include/block/blockjob.h index 289b13f0c0..b649a402a0 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -130,6 +130,9 @@ struct BlockJob { /** The opaque value that is passed to the completion function. */ void *opaque; + + /** Reference count of the block job */ + int refcnt; }; /** @@ -174,12 +177,21 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns); void block_job_yield(BlockJob *job); /** - * block_job_release: + * block_job_ref: + * @bs: The block device. + * + * Grab a reference to the block job. Should be paired with block_job_unref. + */ +void block_job_ref(BlockJob *job); + +/** + * block_job_unref: * @bs: The block device. * - * Release job resources when an error occurred or job completed. + * Release reference to the block job and release resources if it is the last + * reference. */ -void block_job_release(BlockDriverState *bs); +void block_job_unref(BlockJob *job); /** * block_job_completed: diff --git a/qemu-img.c b/qemu-img.c index 9831db75ef..033011c4e7 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -645,9 +645,6 @@ static void common_block_job_cb(void *opaque, int ret) if (ret < 0) { error_setg_errno(cbi->errp, -ret, "Block job failed"); } - - /* Drop this block job's reference */ - bdrv_unref(cbi->bs); } static void run_block_job(BlockJob *job, Error **errp) -- cgit v1.2.3-55-g7522 From 57901ecb8e02f03464d5f37bb6edf82e5076812d Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 5 Nov 2015 18:13:12 -0500 Subject: blockjob: Add .commit and .abort block job actions Reviewed-by: Max Reitz Reviewed-by: John Snow Signed-off-by: Fam Zheng Signed-off-by: John Snow Message-id: 1446765200-3054-7-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- include/block/blockjob.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/block/blockjob.h b/include/block/blockjob.h index b649a402a0..ed856d72bf 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -50,6 +50,26 @@ typedef struct BlockJobDriver { * manually. */ void (*complete)(BlockJob *job, Error **errp); + + /** + * If the callback is not NULL, it will be invoked when all the jobs + * belonging to the same transaction complete; or upon this job's + * completion if it is not in a transaction. Skipped if NULL. + * + * All jobs will complete with a call to either .commit() or .abort() but + * never both. + */ + void (*commit)(BlockJob *job); + + /** + * If the callback is not NULL, it will be invoked when any job in the + * same transaction fails; or upon this job's failure (due to error or + * cancellation) if it is not in a transaction. Skipped if NULL. + * + * All jobs will complete with a call to either .commit() or .abort() but + * never both. + */ + void (*abort)(BlockJob *job); } BlockJobDriver; /** -- cgit v1.2.3-55-g7522 From a689dbf2df74a55d43e3fc4d6aec30ed67ca998f Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 5 Nov 2015 18:13:13 -0500 Subject: blockjob: Add "completed" and "ret" in BlockJob They are set when block_job_completed is called. Signed-off-by: Fam Zheng Reviewed-by: John Snow Reviewed-by: Max Reitz Signed-off-by: John Snow Message-id: 1446765200-3054-8-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- blockjob.c | 3 +++ include/block/blockjob.h | 9 +++++++++ 2 files changed, 12 insertions(+) (limited to 'include') diff --git a/blockjob.c b/blockjob.c index ae9c5b2925..bcd7efcb64 100644 --- a/blockjob.c +++ b/blockjob.c @@ -99,6 +99,9 @@ void block_job_completed(BlockJob *job, int ret) BlockDriverState *bs = job->bs; assert(bs->job == job); + assert(!job->completed); + job->completed = true; + job->ret = ret; job->cb(job->opaque, ret); block_job_unref(job); } diff --git a/include/block/blockjob.h b/include/block/blockjob.h index ed856d72bf..c70d55a644 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -153,6 +153,15 @@ struct BlockJob { /** Reference count of the block job */ int refcnt; + + /* True if this job has reported completion by calling block_job_completed. + */ + bool completed; + + /* ret code passed to block_job_completed. + */ + int ret; + }; /** -- cgit v1.2.3-55-g7522 From c55a832fdddec2c350b585ade0476501f616608d Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 5 Nov 2015 18:13:15 -0500 Subject: block: Add block job transactions Sometimes block jobs must execute as a transaction group. Finishing jobs wait until all other jobs are ready to complete successfully. Failure or cancellation of one job cancels the other jobs in the group. Signed-off-by: Stefan Hajnoczi Reviewed-by: Max Reitz Signed-off-by: Fam Zheng Signed-off-by: John Snow Message-id: 1446765200-3054-10-git-send-email-jsnow@redhat.com [Rewrite the implementation which is now contained in block_job_completed. --Fam] Signed-off-by: Fam Zheng Reviewed-by: Max Reitz Signed-off-by: John Snow Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- blockjob.c | 135 ++++++++++++++++++++++++++++++++++++++++++++++- include/block/block.h | 1 + include/block/blockjob.h | 38 +++++++++++++ 3 files changed, 172 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/blockjob.c b/blockjob.c index 81b268e20d..80adb9d52a 100644 --- a/blockjob.c +++ b/blockjob.c @@ -37,6 +37,19 @@ #include "qemu/timer.h" #include "qapi-event.h" +/* Transactional group of block jobs */ +struct BlockJobTxn { + + /* Is this txn being cancelled? */ + bool aborting; + + /* List of jobs */ + QLIST_HEAD(, BlockJob) jobs; + + /* Reference count */ + int refcnt; +}; + void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs, int64_t speed, BlockCompletionFunc *cb, void *opaque, Error **errp) @@ -94,6 +107,86 @@ void block_job_unref(BlockJob *job) } } +static void block_job_completed_single(BlockJob *job) +{ + if (!job->ret) { + if (job->driver->commit) { + job->driver->commit(job); + } + } else { + if (job->driver->abort) { + job->driver->abort(job); + } + } + job->cb(job->opaque, job->ret); + if (job->txn) { + block_job_txn_unref(job->txn); + } + block_job_unref(job); +} + +static void block_job_completed_txn_abort(BlockJob *job) +{ + AioContext *ctx; + BlockJobTxn *txn = job->txn; + BlockJob *other_job, *next; + + if (txn->aborting) { + /* + * We are cancelled by another job, which will handle everything. + */ + return; + } + txn->aborting = true; + /* We are the first failed job. Cancel other jobs. */ + QLIST_FOREACH(other_job, &txn->jobs, txn_list) { + ctx = bdrv_get_aio_context(other_job->bs); + aio_context_acquire(ctx); + } + QLIST_FOREACH(other_job, &txn->jobs, txn_list) { + if (other_job == job || other_job->completed) { + /* Other jobs are "effectively" cancelled by us, set the status for + * them; this job, however, may or may not be cancelled, depending + * on the caller, so leave it. */ + if (other_job != job) { + other_job->cancelled = true; + } + continue; + } + block_job_cancel_sync(other_job); + assert(other_job->completed); + } + QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) { + ctx = bdrv_get_aio_context(other_job->bs); + block_job_completed_single(other_job); + aio_context_release(ctx); + } +} + +static void block_job_completed_txn_success(BlockJob *job) +{ + AioContext *ctx; + BlockJobTxn *txn = job->txn; + BlockJob *other_job, *next; + /* + * Successful completion, see if there are other running jobs in this + * txn. + */ + QLIST_FOREACH(other_job, &txn->jobs, txn_list) { + if (!other_job->completed) { + return; + } + } + /* We are the last completed job, commit the transaction. */ + QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) { + ctx = bdrv_get_aio_context(other_job->bs); + aio_context_acquire(ctx); + assert(other_job->ret == 0); + block_job_completed_single(other_job); + aio_context_release(ctx); + } +} + void block_job_completed(BlockJob *job, int ret) { BlockDriverState *bs = job->bs; @@ -102,8 +195,13 @@ void block_job_completed(BlockJob *job, int ret) assert(!job->completed); job->completed = true; job->ret = ret; - job->cb(job->opaque, ret); - block_job_unref(job); + if (!job->txn) { + block_job_completed_single(job); + } else if (ret < 0 || block_job_is_cancelled(job)) { + block_job_completed_txn_abort(job); + } else { + block_job_completed_txn_success(job); + } } void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) @@ -402,3 +500,36 @@ void block_job_defer_to_main_loop(BlockJob *job, qemu_bh_schedule(data->bh); } + +BlockJobTxn *block_job_txn_new(void) +{ + BlockJobTxn *txn = g_new0(BlockJobTxn, 1); + QLIST_INIT(&txn->jobs); + txn->refcnt = 1; + return txn; +} + +static void block_job_txn_ref(BlockJobTxn *txn) +{ + txn->refcnt++; +} + +void block_job_txn_unref(BlockJobTxn *txn) +{ + if (txn && --txn->refcnt == 0) { + g_free(txn); + } +} + +void block_job_txn_add_job(BlockJobTxn *txn, BlockJob *job) +{ + if (!txn) { + return; + } + + assert(!job->txn); + job->txn = txn; + + QLIST_INSERT_HEAD(&txn->jobs, job, txn_list); + block_job_txn_ref(txn); +} diff --git a/include/block/block.h b/include/block/block.h index 92f6f6a95a..73edb1a79c 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -14,6 +14,7 @@ typedef struct BlockDriver BlockDriver; typedef struct BlockJob BlockJob; typedef struct BdrvChild BdrvChild; typedef struct BdrvChildRole BdrvChildRole; +typedef struct BlockJobTxn BlockJobTxn; typedef struct BlockDriverInfo { /* in bytes, 0 if irrelevant */ diff --git a/include/block/blockjob.h b/include/block/blockjob.h index c70d55a644..d84ccd8d2c 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -162,6 +162,9 @@ struct BlockJob { */ int ret; + /** Non-NULL if this job is part of a transaction */ + BlockJobTxn *txn; + QLIST_ENTRY(BlockJob) txn_list; }; /** @@ -405,4 +408,39 @@ void block_job_defer_to_main_loop(BlockJob *job, BlockJobDeferToMainLoopFn *fn, void *opaque); +/** + * block_job_txn_new: + * + * Allocate and return a new block job transaction. Jobs can be added to the + * transaction using block_job_txn_add_job(). + * + * The transaction is automatically freed when the last job completes or is + * cancelled. + * + * All jobs in the transaction either complete successfully or fail/cancel as a + * group. Jobs wait for each other before completing. Cancelling one job + * cancels all jobs in the transaction. + */ +BlockJobTxn *block_job_txn_new(void); + +/** + * block_job_txn_unref: + * + * Release a reference that was previously acquired with block_job_txn_add_job + * or block_job_txn_new. If it's the last reference to the object, it will be + * freed. + */ +void block_job_txn_unref(BlockJobTxn *txn); + +/** + * block_job_txn_add_job: + * @txn: The transaction (may be NULL) + * @job: Job to add to the transaction + * + * Add @job to the transaction. The @job must not already be in a transaction. + * The caller must call either block_job_txn_unref() or block_job_completed() + * to release the reference that is automatically grabbed here. + */ +void block_job_txn_add_job(BlockJobTxn *txn, BlockJob *job); + #endif -- cgit v1.2.3-55-g7522 From 78f51fde88d1925b5ae51ba789339baa9ff72ad1 Mon Sep 17 00:00:00 2001 From: John Snow Date: Thu, 5 Nov 2015 18:13:17 -0500 Subject: block: Add BlockJobTxn support to backup_run Allow a BlockJobTxn to be passed into backup_run, which will allow the job to join a transactional group if present. Propagate this new parameter outward into new QMP helper functions in blockdev.c to allow transaction commands to pass forward their BlockJobTxn object in a forthcoming patch. [split up from a patch originally by Stefan and Fam. --js] Signed-off-by: Stefan Hajnoczi Signed-off-by: Fam Zheng Signed-off-by: John Snow Signed-off-by: John Snow Message-id: 1446765200-3054-12-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/backup.c | 3 +- blockdev.c | 112 ++++++++++++++++++++++++++++++++++------------ include/block/block_int.h | 3 +- 3 files changed, 88 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/block/backup.c b/block/backup.c index a80800f472..3b39119256 100644 --- a/block/backup.c +++ b/block/backup.c @@ -493,7 +493,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, BlockdevOnError on_source_error, BlockdevOnError on_target_error, BlockCompletionFunc *cb, void *opaque, - Error **errp) + BlockJobTxn *txn, Error **errp) { int64_t len; @@ -575,6 +575,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, sync_bitmap : NULL; job->common.len = len; job->common.co = qemu_coroutine_create(backup_run); + block_job_txn_add_job(txn, &job->common); qemu_coroutine_enter(job->common.co, job); return; diff --git a/blockdev.c b/blockdev.c index 944525806e..5ed4284fbf 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1716,6 +1716,18 @@ typedef struct DriveBackupState { BlockJob *job; } DriveBackupState; +static void do_drive_backup(const char *device, const char *target, + bool has_format, const char *format, + enum MirrorSyncMode sync, + bool has_mode, enum NewImageMode mode, + bool has_speed, int64_t speed, + bool has_bitmap, const char *bitmap, + bool has_on_source_error, + BlockdevOnError on_source_error, + bool has_on_target_error, + BlockdevOnError on_target_error, + BlockJobTxn *txn, Error **errp); + static void drive_backup_prepare(BlkActionState *common, Error **errp) { DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); @@ -1744,15 +1756,15 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp) bdrv_drained_begin(blk_bs(blk)); state->bs = blk_bs(blk); - qmp_drive_backup(backup->device, backup->target, - backup->has_format, backup->format, - backup->sync, - backup->has_mode, backup->mode, - backup->has_speed, backup->speed, - backup->has_bitmap, backup->bitmap, - backup->has_on_source_error, backup->on_source_error, - backup->has_on_target_error, backup->on_target_error, - &local_err); + do_drive_backup(backup->device, backup->target, + backup->has_format, backup->format, + backup->sync, + backup->has_mode, backup->mode, + backup->has_speed, backup->speed, + backup->has_bitmap, backup->bitmap, + backup->has_on_source_error, backup->on_source_error, + backup->has_on_target_error, backup->on_target_error, + NULL, &local_err); if (local_err) { error_propagate(errp, local_err); return; @@ -1789,6 +1801,15 @@ typedef struct BlockdevBackupState { AioContext *aio_context; } BlockdevBackupState; +static void do_blockdev_backup(const char *device, const char *target, + enum MirrorSyncMode sync, + bool has_speed, int64_t speed, + bool has_on_source_error, + BlockdevOnError on_source_error, + bool has_on_target_error, + BlockdevOnError on_target_error, + BlockJobTxn *txn, Error **errp); + static void blockdev_backup_prepare(BlkActionState *common, Error **errp) { BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); @@ -1827,12 +1848,12 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) state->bs = blk_bs(blk); bdrv_drained_begin(state->bs); - qmp_blockdev_backup(backup->device, backup->target, - backup->sync, - backup->has_speed, backup->speed, - backup->has_on_source_error, backup->on_source_error, - backup->has_on_target_error, backup->on_target_error, - &local_err); + do_blockdev_backup(backup->device, backup->target, + backup->sync, + backup->has_speed, backup->speed, + backup->has_on_source_error, backup->on_source_error, + backup->has_on_target_error, backup->on_target_error, + NULL, &local_err); if (local_err) { error_propagate(errp, local_err); return; @@ -2895,15 +2916,17 @@ out: aio_context_release(aio_context); } -void qmp_drive_backup(const char *device, const char *target, - bool has_format, const char *format, - enum MirrorSyncMode sync, - bool has_mode, enum NewImageMode mode, - bool has_speed, int64_t speed, - bool has_bitmap, const char *bitmap, - bool has_on_source_error, BlockdevOnError on_source_error, - bool has_on_target_error, BlockdevOnError on_target_error, - Error **errp) +static void do_drive_backup(const char *device, const char *target, + bool has_format, const char *format, + enum MirrorSyncMode sync, + bool has_mode, enum NewImageMode mode, + bool has_speed, int64_t speed, + bool has_bitmap, const char *bitmap, + bool has_on_source_error, + BlockdevOnError on_source_error, + bool has_on_target_error, + BlockdevOnError on_target_error, + BlockJobTxn *txn, Error **errp) { BlockBackend *blk; BlockDriverState *bs; @@ -3018,7 +3041,7 @@ void qmp_drive_backup(const char *device, const char *target, backup_start(bs, target_bs, speed, sync, bmap, on_source_error, on_target_error, - block_job_cb, bs, &local_err); + block_job_cb, bs, txn, &local_err); if (local_err != NULL) { bdrv_unref(target_bs); error_propagate(errp, local_err); @@ -3029,19 +3052,37 @@ out: aio_context_release(aio_context); } +void qmp_drive_backup(const char *device, const char *target, + bool has_format, const char *format, + enum MirrorSyncMode sync, + bool has_mode, enum NewImageMode mode, + bool has_speed, int64_t speed, + bool has_bitmap, const char *bitmap, + bool has_on_source_error, BlockdevOnError on_source_error, + bool has_on_target_error, BlockdevOnError on_target_error, + Error **errp) +{ + return do_drive_backup(device, target, has_format, format, sync, + has_mode, mode, has_speed, speed, + has_bitmap, bitmap, + has_on_source_error, on_source_error, + has_on_target_error, on_target_error, + NULL, errp); +} + BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp) { return bdrv_named_nodes_list(errp); } -void qmp_blockdev_backup(const char *device, const char *target, +void do_blockdev_backup(const char *device, const char *target, enum MirrorSyncMode sync, bool has_speed, int64_t speed, bool has_on_source_error, BlockdevOnError on_source_error, bool has_on_target_error, BlockdevOnError on_target_error, - Error **errp) + BlockJobTxn *txn, Error **errp) { BlockBackend *blk, *target_blk; BlockDriverState *bs; @@ -3089,7 +3130,7 @@ void qmp_blockdev_backup(const char *device, const char *target, bdrv_ref(target_bs); bdrv_set_aio_context(target_bs, aio_context); backup_start(bs, target_bs, speed, sync, NULL, on_source_error, - on_target_error, block_job_cb, bs, &local_err); + on_target_error, block_job_cb, bs, txn, &local_err); if (local_err != NULL) { bdrv_unref(target_bs); error_propagate(errp, local_err); @@ -3098,6 +3139,21 @@ out: aio_context_release(aio_context); } +void qmp_blockdev_backup(const char *device, const char *target, + enum MirrorSyncMode sync, + bool has_speed, int64_t speed, + bool has_on_source_error, + BlockdevOnError on_source_error, + bool has_on_target_error, + BlockdevOnError on_target_error, + Error **errp) +{ + do_blockdev_backup(device, target, sync, has_speed, speed, + has_on_source_error, on_source_error, + has_on_target_error, on_target_error, + NULL, errp); +} + void qmp_drive_mirror(const char *device, const char *target, bool has_format, const char *format, bool has_node_name, const char *node_name, diff --git a/include/block/block_int.h b/include/block/block_int.h index 695393e6e6..4012e36437 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -669,6 +669,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, * @on_target_error: The action to take upon error writing to the target. * @cb: Completion function for the job. * @opaque: Opaque pointer value passed to @cb. + * @txn: Transaction that this job is part of (may be NULL). * * Start a backup operation on @bs. Clusters in @bs are written to @target * until the job is cancelled or manually completed. @@ -679,7 +680,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, BlockdevOnError on_source_error, BlockdevOnError on_target_error, BlockCompletionFunc *cb, void *opaque, - Error **errp); + BlockJobTxn *txn, Error **errp); void blk_set_bs(BlockBackend *blk, BlockDriverState *bs); -- cgit v1.2.3-55-g7522 From bd797fc15b4290e02c219b7cd6289a33cd6cd18b Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Wed, 28 Oct 2015 17:33:01 +0200 Subject: util: Infrastructure for computing recent averages This module computes the average of a set of values within a time window, keeping also track of the minimum and maximum values. In order to produce more accurate results it works internally by creating two time windows of the same period, offsetted by half of that period. Values are accounted on both windows and the data is always returned from the oldest one. [Add missing util/replay.o to test-timed-average dependencies to fix the build. --Stefan] Signed-off-by: Alberto Garcia Message-id: 201b09c21bbc9c329779d2b2365ee2b9c80dceeb.1446044837.git.berto@igalia.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- include/qemu/timed-average.h | 63 +++++++++++++ tests/Makefile | 4 + tests/test-timed-average.c | 90 +++++++++++++++++++ util/Makefile.objs | 1 + util/timed-average.c | 210 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 368 insertions(+) create mode 100644 include/qemu/timed-average.h create mode 100644 tests/test-timed-average.c create mode 100644 util/timed-average.c (limited to 'include') diff --git a/include/qemu/timed-average.h b/include/qemu/timed-average.h new file mode 100644 index 0000000000..f1cdddc48b --- /dev/null +++ b/include/qemu/timed-average.h @@ -0,0 +1,63 @@ +/* + * QEMU timed average computation + * + * Copyright (C) Nodalink, EURL. 2014 + * Copyright (C) Igalia, S.L. 2015 + * + * Authors: + * Benoît Canet + * Alberto Garcia + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) version 3 or any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef TIMED_AVERAGE_H +#define TIMED_AVERAGE_H + +#include + +#include "qemu/timer.h" + +typedef struct TimedAverageWindow TimedAverageWindow; +typedef struct TimedAverage TimedAverage; + +/* All fields of both structures are private */ + +struct TimedAverageWindow { + uint64_t min; /* minimum value accounted in the window */ + uint64_t max; /* maximum value accounted in the window */ + uint64_t sum; /* sum of all values */ + uint64_t count; /* number of values */ + int64_t expiration; /* the end of the current window in ns */ +}; + +struct TimedAverage { + uint64_t period; /* period in nanoseconds */ + TimedAverageWindow windows[2]; /* two overlapping windows of with + * an offset of period / 2 between them */ + unsigned current; /* the current window index: it's also the + * oldest window index */ + QEMUClockType clock_type; /* the clock used */ +}; + +void timed_average_init(TimedAverage *ta, QEMUClockType clock_type, + uint64_t period); + +void timed_average_account(TimedAverage *ta, uint64_t value); + +uint64_t timed_average_min(TimedAverage *ta); +uint64_t timed_average_avg(TimedAverage *ta); +uint64_t timed_average_max(TimedAverage *ta); + +#endif diff --git a/tests/Makefile b/tests/Makefile index 6a6b1fc0a6..90c4141ac5 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -83,6 +83,7 @@ check-unit-y += tests/test-crypto-cipher$(EXESUF) check-unit-$(CONFIG_GNUTLS) += tests/test-crypto-tlscredsx509$(EXESUF) check-unit-$(CONFIG_GNUTLS) += tests/test-crypto-tlssession$(EXESUF) check-unit-$(CONFIG_LINUX) += tests/test-qga$(EXESUF) +check-unit-y += tests/test-timed-average$(EXESUF) check-block-$(CONFIG_POSIX) += tests/qemu-iotests-quick.sh @@ -412,6 +413,9 @@ tests/test-vmstate$(EXESUF): tests/test-vmstate.o \ migration/vmstate.o migration/qemu-file.o migration/qemu-file-buf.o \ migration/qemu-file-unix.o qjson.o \ $(test-qom-obj-y) +tests/test-timed-average$(EXESUF): tests/test-timed-average.o qemu-timer.o \ + libqemuutil.a stubs/clock-warp.o stubs/cpu-get-icount.o \ + stubs/notify-event.o stubs/replay.o tests/test-qapi-types.c tests/test-qapi-types.h :\ $(SRC_PATH)/tests/qapi-schema/qapi-schema-test.json $(SRC_PATH)/scripts/qapi-types.py $(qapi-py) diff --git a/tests/test-timed-average.c b/tests/test-timed-average.c new file mode 100644 index 0000000000..a049799b80 --- /dev/null +++ b/tests/test-timed-average.c @@ -0,0 +1,90 @@ +/* + * Timed average computation tests + * + * Copyright Nodalink, EURL. 2014 + * + * Authors: + * Benoît Canet + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ + +#include +#include + +#include "qemu/timed-average.h" + +/* This is the clock for QEMU_CLOCK_VIRTUAL */ +static int64_t my_clock_value; + +int64_t cpu_get_clock(void) +{ + return my_clock_value; +} + +static void account(TimedAverage *ta) +{ + timed_average_account(ta, 1); + timed_average_account(ta, 5); + timed_average_account(ta, 2); + timed_average_account(ta, 4); + timed_average_account(ta, 3); +} + +static void test_average(void) +{ + TimedAverage ta; + uint64_t result; + int i; + + /* we will compute some average on a period of 1 second */ + timed_average_init(&ta, QEMU_CLOCK_VIRTUAL, NANOSECONDS_PER_SECOND); + + result = timed_average_min(&ta); + g_assert(result == 0); + result = timed_average_avg(&ta); + g_assert(result == 0); + result = timed_average_max(&ta); + g_assert(result == 0); + + for (i = 0; i < 100; i++) { + account(&ta); + result = timed_average_min(&ta); + g_assert(result == 1); + result = timed_average_avg(&ta); + g_assert(result == 3); + result = timed_average_max(&ta); + g_assert(result == 5); + my_clock_value += NANOSECONDS_PER_SECOND / 10; + } + + my_clock_value += NANOSECONDS_PER_SECOND * 100; + + result = timed_average_min(&ta); + g_assert(result == 0); + result = timed_average_avg(&ta); + g_assert(result == 0); + result = timed_average_max(&ta); + g_assert(result == 0); + + for (i = 0; i < 100; i++) { + account(&ta); + result = timed_average_min(&ta); + g_assert(result == 1); + result = timed_average_avg(&ta); + g_assert(result == 3); + result = timed_average_max(&ta); + g_assert(result == 5); + my_clock_value += NANOSECONDS_PER_SECOND / 10; + } +} + +int main(int argc, char **argv) +{ + /* tests in the same order as the header function declarations */ + g_test_init(&argc, &argv, NULL); + g_test_add_func("/timed-average/average", test_average); + return g_test_run(); +} + diff --git a/util/Makefile.objs b/util/Makefile.objs index d7cc39907f..89dd80ef86 100644 --- a/util/Makefile.objs +++ b/util/Makefile.objs @@ -29,3 +29,4 @@ util-obj-y += qemu-coroutine.o qemu-coroutine-lock.o qemu-coroutine-io.o util-obj-y += qemu-coroutine-sleep.o util-obj-y += coroutine-$(CONFIG_COROUTINE_BACKEND).o util-obj-y += buffer.o +util-obj-y += timed-average.o diff --git a/util/timed-average.c b/util/timed-average.c new file mode 100644 index 0000000000..98a1170204 --- /dev/null +++ b/util/timed-average.c @@ -0,0 +1,210 @@ +/* + * QEMU timed average computation + * + * Copyright (C) Nodalink, EURL. 2014 + * Copyright (C) Igalia, S.L. 2015 + * + * Authors: + * Benoît Canet + * Alberto Garcia + * + * This program is free sofware: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Sofware Foundation, either version 2 of the License, or + * (at your option) version 3 or any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +#include "qemu/timed-average.h" + +/* This module computes an average of a set of values within a time + * window. + * + * Algorithm: + * + * - Create two windows with a certain expiration period, and + * offsetted by period / 2. + * - Each time you want to account a new value, do it in both windows. + * - The minimum / maximum / average values are always returned from + * the oldest window. + * + * Example: + * + * t=0 |t=0.5 |t=1 |t=1.5 |t=2 + * wnd0: [0,0.5)|wnd0: [0.5,1.5) | |wnd0: [1.5,2.5) | + * wnd1: [0,1) | |wnd1: [1,2) | | + * + * Values are returned from: + * + * wnd0---------|wnd1------------|wnd0---------|wnd1-------------| + */ + +/* Update the expiration of a time window + * + * @w: the window used + * @now: the current time in nanoseconds + * @period: the expiration period in nanoseconds + */ +static void update_expiration(TimedAverageWindow *w, int64_t now, + int64_t period) +{ + /* time elapsed since the last theoretical expiration */ + int64_t elapsed = (now - w->expiration) % period; + /* time remaininging until the next expiration */ + int64_t remaining = period - elapsed; + /* compute expiration */ + w->expiration = now + remaining; +} + +/* Reset a window + * + * @w: the window to reset + */ +static void window_reset(TimedAverageWindow *w) +{ + w->min = UINT64_MAX; + w->max = 0; + w->sum = 0; + w->count = 0; +} + +/* Get the current window (that is, the one with the earliest + * expiration time). + * + * @ta: the TimedAverage structure + * @ret: a pointer to the current window + */ +static TimedAverageWindow *current_window(TimedAverage *ta) +{ + return &ta->windows[ta->current]; +} + +/* Initialize a TimedAverage structure + * + * @ta: the TimedAverage structure + * @clock_type: the type of clock to use + * @period: the time window period in nanoseconds + */ +void timed_average_init(TimedAverage *ta, QEMUClockType clock_type, + uint64_t period) +{ + int64_t now = qemu_clock_get_ns(clock_type); + + /* Returned values are from the oldest window, so they belong to + * the interval [ta->period/2,ta->period). By adjusting the + * requested period by 4/3, we guarantee that they're in the + * interval [2/3 period,4/3 period), closer to the requested + * period on average */ + ta->period = (uint64_t) period * 4 / 3; + ta->clock_type = clock_type; + ta->current = 0; + + window_reset(&ta->windows[0]); + window_reset(&ta->windows[1]); + + /* Both windows are offsetted by half a period */ + ta->windows[0].expiration = now + ta->period / 2; + ta->windows[1].expiration = now + ta->period; +} + +/* Check if the time windows have expired, updating their counters and + * expiration time if that's the case. + * + * @ta: the TimedAverage structure + */ +static void check_expirations(TimedAverage *ta) +{ + int64_t now = qemu_clock_get_ns(ta->clock_type); + int i; + + assert(ta->period != 0); + + /* Check if the windows have expired */ + for (i = 0; i < 2; i++) { + TimedAverageWindow *w = &ta->windows[i]; + if (w->expiration <= now) { + window_reset(w); + update_expiration(w, now, ta->period); + } + } + + /* Make ta->current point to the oldest window */ + if (ta->windows[0].expiration < ta->windows[1].expiration) { + ta->current = 0; + } else { + ta->current = 1; + } +} + +/* Account a value + * + * @ta: the TimedAverage structure + * @value: the value to account + */ +void timed_average_account(TimedAverage *ta, uint64_t value) +{ + int i; + check_expirations(ta); + + /* Do the accounting in both windows at the same time */ + for (i = 0; i < 2; i++) { + TimedAverageWindow *w = &ta->windows[i]; + + w->sum += value; + w->count++; + + if (value < w->min) { + w->min = value; + } + + if (value > w->max) { + w->max = value; + } + } +} + +/* Get the minimum value + * + * @ta: the TimedAverage structure + * @ret: the minimum value + */ +uint64_t timed_average_min(TimedAverage *ta) +{ + TimedAverageWindow *w; + check_expirations(ta); + w = current_window(ta); + return w->min < UINT64_MAX ? w->min : 0; +} + +/* Get the average value + * + * @ta: the TimedAverage structure + * @ret: the average value + */ +uint64_t timed_average_avg(TimedAverage *ta) +{ + TimedAverageWindow *w; + check_expirations(ta); + w = current_window(ta); + return w->count > 0 ? w->sum / w->count : 0; +} + +/* Get the maximum value + * + * @ta: the TimedAverage structure + * @ret: the maximum value + */ +uint64_t timed_average_max(TimedAverage *ta) +{ + check_expirations(ta); + return current_window(ta)->max; +} -- cgit v1.2.3-55-g7522 From cb38fffbc968e797ae32039b1e2c47b940b30cb4 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Wed, 28 Oct 2015 17:33:02 +0200 Subject: block: Add idle_time_ns to BlockDeviceStats This patch adds the new field 'idle_time_ns' to the BlockDeviceStats structure, indicating the time that has passed since the previous I/O operation. It also adds the block_acct_idle_time_ns() call, to ensure that all references to the clock type used for accounting are in the same place. This will later allow us to use a different clock for iotests. Signed-off-by: Alberto Garcia Message-id: 7d8cfcf931453e1a2443e6626e8c1edc347c7c8a.1446044837.git.berto@igalia.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/accounting.c | 12 ++++++++++-- block/qapi.c | 5 +++++ hmp.c | 4 +++- include/block/accounting.h | 2 ++ qapi/block-core.json | 6 +++++- qmp-commands.hx | 10 ++++++++-- 6 files changed, 33 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/block/accounting.c b/block/accounting.c index 6f4c0f12eb..d427fa8fb8 100644 --- a/block/accounting.c +++ b/block/accounting.c @@ -40,12 +40,15 @@ void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie, void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie) { + int64_t time_ns = qemu_clock_get_ns(clock_type); + int64_t latency_ns = time_ns - cookie->start_time_ns; + assert(cookie->type < BLOCK_MAX_IOTYPE); stats->nr_bytes[cookie->type] += cookie->bytes; stats->nr_ops[cookie->type]++; - stats->total_time_ns[cookie->type] += - qemu_clock_get_ns(clock_type) - cookie->start_time_ns; + stats->total_time_ns[cookie->type] += latency_ns; + stats->last_access_time_ns = time_ns; } @@ -55,3 +58,8 @@ void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type, assert(type < BLOCK_MAX_IOTYPE); stats->merged[type] += num_requests; } + +int64_t block_acct_idle_time_ns(BlockAcctStats *stats) +{ + return qemu_clock_get_ns(clock_type) - stats->last_access_time_ns; +} diff --git a/block/qapi.c b/block/qapi.c index 89d4274177..9799656761 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -357,6 +357,11 @@ static BlockStats *bdrv_query_stats(const BlockDriverState *bs, s->stats->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE]; s->stats->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ]; s->stats->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH]; + + s->stats->has_idle_time_ns = stats->last_access_time_ns > 0; + if (s->stats->has_idle_time_ns) { + s->stats->idle_time_ns = block_acct_idle_time_ns(stats); + } } s->stats->wr_highest_offset = bs->wr_highest_offset; diff --git a/hmp.c b/hmp.c index d3812831ce..21406059c7 100644 --- a/hmp.c +++ b/hmp.c @@ -522,6 +522,7 @@ void hmp_info_blockstats(Monitor *mon, const QDict *qdict) " flush_total_time_ns=%" PRId64 " rd_merged=%" PRId64 " wr_merged=%" PRId64 + " idle_time_ns=%" PRId64 "\n", stats->value->stats->rd_bytes, stats->value->stats->wr_bytes, @@ -532,7 +533,8 @@ void hmp_info_blockstats(Monitor *mon, const QDict *qdict) stats->value->stats->rd_total_time_ns, stats->value->stats->flush_total_time_ns, stats->value->stats->rd_merged, - stats->value->stats->wr_merged); + stats->value->stats->wr_merged, + stats->value->stats->idle_time_ns); } qapi_free_BlockStatsList(stats_list); diff --git a/include/block/accounting.h b/include/block/accounting.h index 66637cdfed..4b2b999bd0 100644 --- a/include/block/accounting.h +++ b/include/block/accounting.h @@ -40,6 +40,7 @@ typedef struct BlockAcctStats { uint64_t nr_ops[BLOCK_MAX_IOTYPE]; uint64_t total_time_ns[BLOCK_MAX_IOTYPE]; uint64_t merged[BLOCK_MAX_IOTYPE]; + int64_t last_access_time_ns; } BlockAcctStats; typedef struct BlockAcctCookie { @@ -53,5 +54,6 @@ void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie, void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie); void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type, int num_requests); +int64_t block_acct_idle_time_ns(BlockAcctStats *stats); #endif diff --git a/qapi/block-core.json b/qapi/block-core.json index 470e86c6df..597281821b 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -448,6 +448,10 @@ # @wr_merged: Number of write requests that have been merged into another # request (Since 2.3). # +# @idle_time_ns: #optional Time since the last I/O operation, in +# nanoseconds. If the field is absent it means that +# there haven't been any operations yet (Since 2.5). +# # Since: 0.14.0 ## { 'struct': 'BlockDeviceStats', @@ -455,7 +459,7 @@ 'wr_operations': 'int', 'flush_operations': 'int', 'flush_total_time_ns': 'int', 'wr_total_time_ns': 'int', 'rd_total_time_ns': 'int', 'wr_highest_offset': 'int', - 'rd_merged': 'int', 'wr_merged': 'int' } } + 'rd_merged': 'int', 'wr_merged': 'int', '*idle_time_ns': 'int' } } ## # @BlockStats: diff --git a/qmp-commands.hx b/qmp-commands.hx index ede18bf701..5c4c16fe9b 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -2583,6 +2583,10 @@ Each json-object contain the following: another request (json-int) - "wr_merged": number of write requests that have been merged into another request (json-int) + - "idle_time_ns": time since the last I/O operation, in + nanoseconds. If the field is absent it means + that there haven't been any operations yet + (json-int, optional) - "parent": Contains recursively the statistics of the underlying protocol (e.g. the host file for a qcow2 image). If there is no underlying protocol, this field is omitted @@ -2607,7 +2611,8 @@ Example: "flush_total_times_ns":49653 "flush_operations":61, "rd_merged":0, - "wr_merged":0 + "wr_merged":0, + "idle_time_ns":2953431879 } }, "stats":{ @@ -2621,7 +2626,8 @@ Example: "rd_total_times_ns":3465673657 "flush_total_times_ns":49653, "rd_merged":0, - "wr_merged":0 + "wr_merged":0, + "idle_time_ns":2953431879 } }, { -- cgit v1.2.3-55-g7522 From 7ee12dafe96a86dfa96af38cea1289305e429a55 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Wed, 28 Oct 2015 17:33:03 +0200 Subject: block: Add statistics for failed and invalid I/O operations This patch adds the block_acct_failed() and block_acct_invalid() functions to allow keeping track of failed and invalid I/O operations. The number of failed and invalid operations is exposed in BlockDeviceStats. We don't keep track of the time spent on invalid operations because they are cancelled immediately when they are started. Signed-off-by: Alberto Garcia Message-id: a7256ccb883a86356b1c6c46b5a29ed5448546a5.1446044837.git.berto@igalia.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/accounting.c | 23 +++++++++++++++++++++++ block/qapi.c | 10 ++++++++++ include/block/accounting.h | 4 ++++ qapi/block-core.json | 23 ++++++++++++++++++++++- qmp-commands.hx | 12 ++++++++++++ 5 files changed, 71 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/block/accounting.c b/block/accounting.c index d427fa8fb8..49a9444377 100644 --- a/block/accounting.c +++ b/block/accounting.c @@ -51,6 +51,29 @@ void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie) stats->last_access_time_ns = time_ns; } +void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie) +{ + int64_t time_ns = qemu_clock_get_ns(clock_type); + + assert(cookie->type < BLOCK_MAX_IOTYPE); + + stats->failed_ops[cookie->type]++; + stats->total_time_ns[cookie->type] += time_ns - cookie->start_time_ns; + stats->last_access_time_ns = time_ns; +} + +void block_acct_invalid(BlockAcctStats *stats, enum BlockAcctType type) +{ + assert(type < BLOCK_MAX_IOTYPE); + + /* block_acct_done() and block_acct_failed() update + * total_time_ns[], but this one does not. The reason is that + * invalid requests are accounted during their submission, + * therefore there's no actual I/O involved. */ + + stats->invalid_ops[type]++; + stats->last_access_time_ns = qemu_clock_get_ns(clock_type); +} void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type, int num_requests) diff --git a/block/qapi.c b/block/qapi.c index 9799656761..d1a6bdc1d2 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -351,6 +351,16 @@ static BlockStats *bdrv_query_stats(const BlockDriverState *bs, s->stats->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE]; s->stats->rd_operations = stats->nr_ops[BLOCK_ACCT_READ]; s->stats->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE]; + + s->stats->failed_rd_operations = stats->failed_ops[BLOCK_ACCT_READ]; + s->stats->failed_wr_operations = stats->failed_ops[BLOCK_ACCT_WRITE]; + s->stats->failed_flush_operations = stats->failed_ops[BLOCK_ACCT_FLUSH]; + + s->stats->invalid_rd_operations = stats->invalid_ops[BLOCK_ACCT_READ]; + s->stats->invalid_wr_operations = stats->invalid_ops[BLOCK_ACCT_WRITE]; + s->stats->invalid_flush_operations = + stats->invalid_ops[BLOCK_ACCT_FLUSH]; + s->stats->rd_merged = stats->merged[BLOCK_ACCT_READ]; s->stats->wr_merged = stats->merged[BLOCK_ACCT_WRITE]; s->stats->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH]; diff --git a/include/block/accounting.h b/include/block/accounting.h index 4b2b999bd0..b50e3cc8d2 100644 --- a/include/block/accounting.h +++ b/include/block/accounting.h @@ -38,6 +38,8 @@ enum BlockAcctType { typedef struct BlockAcctStats { uint64_t nr_bytes[BLOCK_MAX_IOTYPE]; uint64_t nr_ops[BLOCK_MAX_IOTYPE]; + uint64_t invalid_ops[BLOCK_MAX_IOTYPE]; + uint64_t failed_ops[BLOCK_MAX_IOTYPE]; uint64_t total_time_ns[BLOCK_MAX_IOTYPE]; uint64_t merged[BLOCK_MAX_IOTYPE]; int64_t last_access_time_ns; @@ -52,6 +54,8 @@ typedef struct BlockAcctCookie { void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie, int64_t bytes, enum BlockAcctType type); void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie); +void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie); +void block_acct_invalid(BlockAcctStats *stats, enum BlockAcctType type); void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type, int num_requests); int64_t block_acct_idle_time_ns(BlockAcctStats *stats); diff --git a/qapi/block-core.json b/qapi/block-core.json index 597281821b..009b7c88f9 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -452,6 +452,24 @@ # nanoseconds. If the field is absent it means that # there haven't been any operations yet (Since 2.5). # +# @failed_rd_operations: The number of failed read operations +# performed by the device (Since 2.5) +# +# @failed_wr_operations: The number of failed write operations +# performed by the device (Since 2.5) +# +# @failed_flush_operations: The number of failed flush operations +# performed by the device (Since 2.5) +# +# @invalid_rd_operations: The number of invalid read operations +# performed by the device (Since 2.5) +# +# @invalid_wr_operations: The number of invalid write operations +# performed by the device (Since 2.5) +# +# @invalid_flush_operations: The number of invalid flush operations +# performed by the device (Since 2.5) +# # Since: 0.14.0 ## { 'struct': 'BlockDeviceStats', @@ -459,7 +477,10 @@ 'wr_operations': 'int', 'flush_operations': 'int', 'flush_total_time_ns': 'int', 'wr_total_time_ns': 'int', 'rd_total_time_ns': 'int', 'wr_highest_offset': 'int', - 'rd_merged': 'int', 'wr_merged': 'int', '*idle_time_ns': 'int' } } + 'rd_merged': 'int', 'wr_merged': 'int', '*idle_time_ns': 'int', + 'failed_rd_operations': 'int', 'failed_wr_operations': 'int', + 'failed_flush_operations': 'int', 'invalid_rd_operations': 'int', + 'invalid_wr_operations': 'int', 'invalid_flush_operations': 'int' } } ## # @BlockStats: diff --git a/qmp-commands.hx b/qmp-commands.hx index 5c4c16fe9b..b031df6942 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -2587,6 +2587,18 @@ Each json-object contain the following: nanoseconds. If the field is absent it means that there haven't been any operations yet (json-int, optional) + - "failed_rd_operations": number of failed read operations + (json-int) + - "failed_wr_operations": number of failed write operations + (json-int) + - "failed_flush_operations": number of failed flush operations + (json-int) + - "invalid_rd_operations": number of invalid read operations + (json-int) + - "invalid_wr_operations": number of invalid write operations + (json-int) + - "invalid_flush_operations": number of invalid flush operations + (json-int) - "parent": Contains recursively the statistics of the underlying protocol (e.g. the host file for a qcow2 image). If there is no underlying protocol, this field is omitted -- cgit v1.2.3-55-g7522 From 362e9299b34b3101aaa20f20363441c9f055fa5e Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Wed, 28 Oct 2015 17:33:04 +0200 Subject: block: Allow configuring whether to account failed and invalid ops This patch adds two options, "stats-account-invalid" and "stats-account-failed", that can be used to decide whether invalid and failed I/O operations must be used when collecting statistics for latency and last access time. Signed-off-by: Alberto Garcia Reviewed-by: Stefan Hajnoczi Message-id: ebc7e5966511a342cad428a392c5f5ad56b15213.1446044837.git.berto@igalia.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/accounting.c | 24 +++++++++++++++++++----- block/qapi.c | 3 +++ blockdev.c | 16 ++++++++++++++++ include/block/accounting.h | 5 +++++ qapi/block-core.json | 17 ++++++++++++++++- qmp-commands.hx | 25 ++++++++++++++++++++----- 6 files changed, 79 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/block/accounting.c b/block/accounting.c index 49a9444377..923aeaf5be 100644 --- a/block/accounting.c +++ b/block/accounting.c @@ -28,6 +28,13 @@ static QEMUClockType clock_type = QEMU_CLOCK_REALTIME; +void block_acct_init(BlockAcctStats *stats, bool account_invalid, + bool account_failed) +{ + stats->account_invalid = account_invalid; + stats->account_failed = account_failed; +} + void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie, int64_t bytes, enum BlockAcctType type) { @@ -53,13 +60,17 @@ void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie) void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie) { - int64_t time_ns = qemu_clock_get_ns(clock_type); - assert(cookie->type < BLOCK_MAX_IOTYPE); stats->failed_ops[cookie->type]++; - stats->total_time_ns[cookie->type] += time_ns - cookie->start_time_ns; - stats->last_access_time_ns = time_ns; + + if (stats->account_failed) { + int64_t time_ns = qemu_clock_get_ns(clock_type); + int64_t latency_ns = time_ns - cookie->start_time_ns; + + stats->total_time_ns[cookie->type] += latency_ns; + stats->last_access_time_ns = time_ns; + } } void block_acct_invalid(BlockAcctStats *stats, enum BlockAcctType type) @@ -72,7 +83,10 @@ void block_acct_invalid(BlockAcctStats *stats, enum BlockAcctType type) * therefore there's no actual I/O involved. */ stats->invalid_ops[type]++; - stats->last_access_time_ns = qemu_clock_get_ns(clock_type); + + if (stats->account_invalid) { + stats->last_access_time_ns = qemu_clock_get_ns(clock_type); + } } void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type, diff --git a/block/qapi.c b/block/qapi.c index d1a6bdc1d2..083469abcb 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -372,6 +372,9 @@ static BlockStats *bdrv_query_stats(const BlockDriverState *bs, if (s->stats->has_idle_time_ns) { s->stats->idle_time_ns = block_acct_idle_time_ns(stats); } + + s->stats->account_invalid = stats->account_invalid; + s->stats->account_failed = stats->account_failed; } s->stats->wr_highest_offset = bs->wr_highest_offset; diff --git a/blockdev.c b/blockdev.c index 977ffd47ff..17029fd6c0 100644 --- a/blockdev.c +++ b/blockdev.c @@ -441,6 +441,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, const char *buf; int bdrv_flags = 0; int on_read_error, on_write_error; + bool account_invalid, account_failed; BlockBackend *blk; BlockDriverState *bs; ThrottleConfig cfg; @@ -477,6 +478,9 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, /* extract parameters */ snapshot = qemu_opt_get_bool(opts, "snapshot", 0); + account_invalid = qemu_opt_get_bool(opts, "stats-account-invalid", true); + account_failed = qemu_opt_get_bool(opts, "stats-account-failed", true); + extract_common_blockdev_options(opts, &bdrv_flags, &throttling_group, &cfg, &detect_zeroes, &error); if (error) { @@ -573,6 +577,8 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, if (bdrv_key_required(bs)) { autostart = 0; } + + block_acct_init(blk_get_stats(blk), account_invalid, account_failed); } blk_set_on_error(blk, on_read_error, on_write_error); @@ -3900,6 +3906,16 @@ QemuOptsList qemu_common_drive_opts = { .name = "detect-zeroes", .type = QEMU_OPT_STRING, .help = "try to optimize zero writes (off, on, unmap)", + },{ + .name = "stats-account-invalid", + .type = QEMU_OPT_BOOL, + .help = "whether to account for invalid I/O operations " + "in the statistics", + },{ + .name = "stats-account-failed", + .type = QEMU_OPT_BOOL, + .help = "whether to account for failed I/O operations " + "in the statistics", }, { /* end of list */ } }, diff --git a/include/block/accounting.h b/include/block/accounting.h index b50e3cc8d2..0d9b076955 100644 --- a/include/block/accounting.h +++ b/include/block/accounting.h @@ -25,6 +25,7 @@ #define BLOCK_ACCOUNTING_H #include +#include #include "qemu/typedefs.h" @@ -43,6 +44,8 @@ typedef struct BlockAcctStats { uint64_t total_time_ns[BLOCK_MAX_IOTYPE]; uint64_t merged[BLOCK_MAX_IOTYPE]; int64_t last_access_time_ns; + bool account_invalid; + bool account_failed; } BlockAcctStats; typedef struct BlockAcctCookie { @@ -51,6 +54,8 @@ typedef struct BlockAcctCookie { enum BlockAcctType type; } BlockAcctCookie; +void block_acct_init(BlockAcctStats *stats, bool account_invalid, + bool account_failed); void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie, int64_t bytes, enum BlockAcctType type); void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie); diff --git a/qapi/block-core.json b/qapi/block-core.json index 009b7c88f9..0d517ab8e5 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -470,6 +470,12 @@ # @invalid_flush_operations: The number of invalid flush operations # performed by the device (Since 2.5) # +# @account_invalid: Whether invalid operations are included in the +# last access statistics (Since 2.5) +# +# @account_failed: Whether failed operations are included in the +# latency and last access statistics (Since 2.5) +# # Since: 0.14.0 ## { 'struct': 'BlockDeviceStats', @@ -480,7 +486,8 @@ 'rd_merged': 'int', 'wr_merged': 'int', '*idle_time_ns': 'int', 'failed_rd_operations': 'int', 'failed_wr_operations': 'int', 'failed_flush_operations': 'int', 'invalid_rd_operations': 'int', - 'invalid_wr_operations': 'int', 'invalid_flush_operations': 'int' } } + 'invalid_wr_operations': 'int', 'invalid_flush_operations': 'int', + 'account_invalid': 'bool', 'account_failed': 'bool' } } ## # @BlockStats: @@ -1461,6 +1468,12 @@ # (default: enospc) # @read-only: #optional whether the block device should be read-only # (default: false) +# @stats-account-invalid: #optional whether to include invalid +# operations when computing last access statistics +# (default: true) (Since 2.5) +# @stats-account-failed: #optional whether to include failed +# operations when computing latency and last +# access statistics (default: true) (Since 2.5) # @detect-zeroes: #optional detect and optimize zero writes (Since 2.1) # (default: off) # @@ -1476,6 +1489,8 @@ '*rerror': 'BlockdevOnError', '*werror': 'BlockdevOnError', '*read-only': 'bool', + '*stats-account-invalid': 'bool', + '*stats-account-failed': 'bool', '*detect-zeroes': 'BlockdevDetectZeroesOptions' } } ## diff --git a/qmp-commands.hx b/qmp-commands.hx index b031df6942..b0710a2d04 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -2599,6 +2599,11 @@ Each json-object contain the following: (json-int) - "invalid_flush_operations": number of invalid flush operations (json-int) + - "account_invalid": whether invalid operations are included in + the last access statistics (json-bool) + - "account_failed": whether failed operations are included in the + latency and last access statistics + (json-bool) - "parent": Contains recursively the statistics of the underlying protocol (e.g. the host file for a qcow2 image). If there is no underlying protocol, this field is omitted @@ -2624,7 +2629,9 @@ Example: "flush_operations":61, "rd_merged":0, "wr_merged":0, - "idle_time_ns":2953431879 + "idle_time_ns":2953431879, + "account_invalid":true, + "account_failed":false } }, "stats":{ @@ -2639,7 +2646,9 @@ Example: "flush_total_times_ns":49653, "rd_merged":0, "wr_merged":0, - "idle_time_ns":2953431879 + "idle_time_ns":2953431879, + "account_invalid":true, + "account_failed":false } }, { @@ -2655,7 +2664,9 @@ Example: "rd_total_times_ns":0 "flush_total_times_ns":0, "rd_merged":0, - "wr_merged":0 + "wr_merged":0, + "account_invalid":false, + "account_failed":false } }, { @@ -2671,7 +2682,9 @@ Example: "rd_total_times_ns":0 "flush_total_times_ns":0, "rd_merged":0, - "wr_merged":0 + "wr_merged":0, + "account_invalid":false, + "account_failed":false } }, { @@ -2687,7 +2700,9 @@ Example: "rd_total_times_ns":0 "flush_total_times_ns":0, "rd_merged":0, - "wr_merged":0 + "wr_merged":0, + "account_invalid":false, + "account_failed":false } } ] -- cgit v1.2.3-55-g7522 From 979e9b03fc8c85d3b78a14410c64cbb16d348095 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Wed, 28 Oct 2015 17:33:05 +0200 Subject: block: Compute minimum, maximum and average I/O latencies This patch keeps track of the minimum, maximum and average latencies of I/O operations during a certain interval of time. The values are exposed in the BlockDeviceTimedStats structure. An option to define the intervals to collect these statistics will be added in a separate patch. Signed-off-by: Alberto Garcia Message-id: c7382dc89622c64f918d09f32815827772628f8e.1446044837.git.berto@igalia.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/accounting.c | 43 ++++++++++++++++++++++++++++++++++++++ block/block-backend.c | 1 + block/qapi.c | 28 +++++++++++++++++++++++++ include/block/accounting.h | 14 +++++++++++++ qapi/block-core.json | 52 +++++++++++++++++++++++++++++++++++++++++++++- qmp-commands.hx | 31 +++++++++++++++++++++++++++ 6 files changed, 168 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/block/accounting.c b/block/accounting.c index 923aeaf5be..61de8ce9bc 100644 --- a/block/accounting.c +++ b/block/accounting.c @@ -35,6 +35,39 @@ void block_acct_init(BlockAcctStats *stats, bool account_invalid, stats->account_failed = account_failed; } +void block_acct_cleanup(BlockAcctStats *stats) +{ + BlockAcctTimedStats *s, *next; + QSLIST_FOREACH_SAFE(s, &stats->intervals, entries, next) { + g_free(s); + } +} + +void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length) +{ + BlockAcctTimedStats *s; + unsigned i; + + s = g_new0(BlockAcctTimedStats, 1); + s->interval_length = interval_length; + QSLIST_INSERT_HEAD(&stats->intervals, s, entries); + + for (i = 0; i < BLOCK_MAX_IOTYPE; i++) { + timed_average_init(&s->latency[i], clock_type, + (uint64_t) interval_length * NANOSECONDS_PER_SECOND); + } +} + +BlockAcctTimedStats *block_acct_interval_next(BlockAcctStats *stats, + BlockAcctTimedStats *s) +{ + if (s == NULL) { + return QSLIST_FIRST(&stats->intervals); + } else { + return QSLIST_NEXT(s, entries); + } +} + void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie, int64_t bytes, enum BlockAcctType type) { @@ -47,6 +80,7 @@ void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie, void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie) { + BlockAcctTimedStats *s; int64_t time_ns = qemu_clock_get_ns(clock_type); int64_t latency_ns = time_ns - cookie->start_time_ns; @@ -56,6 +90,10 @@ void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie) stats->nr_ops[cookie->type]++; stats->total_time_ns[cookie->type] += latency_ns; stats->last_access_time_ns = time_ns; + + QSLIST_FOREACH(s, &stats->intervals, entries) { + timed_average_account(&s->latency[cookie->type], latency_ns); + } } void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie) @@ -65,11 +103,16 @@ void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie) stats->failed_ops[cookie->type]++; if (stats->account_failed) { + BlockAcctTimedStats *s; int64_t time_ns = qemu_clock_get_ns(clock_type); int64_t latency_ns = time_ns - cookie->start_time_ns; stats->total_time_ns[cookie->type] += latency_ns; stats->last_access_time_ns = time_ns; + + QSLIST_FOREACH(s, &stats->intervals, entries) { + timed_average_account(&s->latency[cookie->type], latency_ns); + } } } diff --git a/block/block-backend.c b/block/block-backend.c index 6f9309fef4..9889e813b6 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -176,6 +176,7 @@ static void blk_delete(BlockBackend *blk) } g_free(blk->name); drive_info_del(blk->legacy_dinfo); + block_acct_cleanup(&blk->stats); g_free(blk); } diff --git a/block/qapi.c b/block/qapi.c index 083469abcb..6169a22243 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -346,6 +346,7 @@ static BlockStats *bdrv_query_stats(const BlockDriverState *bs, s->stats = g_malloc0(sizeof(*s->stats)); if (bs->blk) { BlockAcctStats *stats = blk_get_stats(bs->blk); + BlockAcctTimedStats *ts = NULL; s->stats->rd_bytes = stats->nr_bytes[BLOCK_ACCT_READ]; s->stats->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE]; @@ -375,6 +376,33 @@ static BlockStats *bdrv_query_stats(const BlockDriverState *bs, s->stats->account_invalid = stats->account_invalid; s->stats->account_failed = stats->account_failed; + + while ((ts = block_acct_interval_next(stats, ts))) { + BlockDeviceTimedStatsList *timed_stats = + g_malloc0(sizeof(*timed_stats)); + BlockDeviceTimedStats *dev_stats = g_malloc0(sizeof(*dev_stats)); + timed_stats->next = s->stats->timed_stats; + timed_stats->value = dev_stats; + s->stats->timed_stats = timed_stats; + + TimedAverage *rd = &ts->latency[BLOCK_ACCT_READ]; + TimedAverage *wr = &ts->latency[BLOCK_ACCT_WRITE]; + TimedAverage *fl = &ts->latency[BLOCK_ACCT_FLUSH]; + + dev_stats->interval_length = ts->interval_length; + + dev_stats->min_rd_latency_ns = timed_average_min(rd); + dev_stats->max_rd_latency_ns = timed_average_max(rd); + dev_stats->avg_rd_latency_ns = timed_average_avg(rd); + + dev_stats->min_wr_latency_ns = timed_average_min(wr); + dev_stats->max_wr_latency_ns = timed_average_max(wr); + dev_stats->avg_wr_latency_ns = timed_average_avg(wr); + + dev_stats->min_flush_latency_ns = timed_average_min(fl); + dev_stats->max_flush_latency_ns = timed_average_max(fl); + dev_stats->avg_flush_latency_ns = timed_average_avg(fl); + } } s->stats->wr_highest_offset = bs->wr_highest_offset; diff --git a/include/block/accounting.h b/include/block/accounting.h index 0d9b076955..1dd582a99a 100644 --- a/include/block/accounting.h +++ b/include/block/accounting.h @@ -28,6 +28,9 @@ #include #include "qemu/typedefs.h" +#include "qemu/timed-average.h" + +typedef struct BlockAcctTimedStats BlockAcctTimedStats; enum BlockAcctType { BLOCK_ACCT_READ, @@ -36,6 +39,12 @@ enum BlockAcctType { BLOCK_MAX_IOTYPE, }; +struct BlockAcctTimedStats { + TimedAverage latency[BLOCK_MAX_IOTYPE]; + unsigned interval_length; /* in seconds */ + QSLIST_ENTRY(BlockAcctTimedStats) entries; +}; + typedef struct BlockAcctStats { uint64_t nr_bytes[BLOCK_MAX_IOTYPE]; uint64_t nr_ops[BLOCK_MAX_IOTYPE]; @@ -44,6 +53,7 @@ typedef struct BlockAcctStats { uint64_t total_time_ns[BLOCK_MAX_IOTYPE]; uint64_t merged[BLOCK_MAX_IOTYPE]; int64_t last_access_time_ns; + QSLIST_HEAD(, BlockAcctTimedStats) intervals; bool account_invalid; bool account_failed; } BlockAcctStats; @@ -56,6 +66,10 @@ typedef struct BlockAcctCookie { void block_acct_init(BlockAcctStats *stats, bool account_invalid, bool account_failed); +void block_acct_cleanup(BlockAcctStats *stats); +void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length); +BlockAcctTimedStats *block_acct_interval_next(BlockAcctStats *stats, + BlockAcctTimedStats *s); void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie, int64_t bytes, enum BlockAcctType type); void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie); diff --git a/qapi/block-core.json b/qapi/block-core.json index 0d517ab8e5..80470da574 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -414,6 +414,52 @@ ## { 'command': 'query-block', 'returns': ['BlockInfo'] } + +## +# @BlockDeviceTimedStats: +# +# Statistics of a block device during a given interval of time. +# +# @interval_length: Interval used for calculating the statistics, +# in seconds. +# +# @min_rd_latency_ns: Minimum latency of read operations in the +# defined interval, in nanoseconds. +# +# @min_wr_latency_ns: Minimum latency of write operations in the +# defined interval, in nanoseconds. +# +# @min_flush_latency_ns: Minimum latency of flush operations in the +# defined interval, in nanoseconds. +# +# @max_rd_latency_ns: Maximum latency of read operations in the +# defined interval, in nanoseconds. +# +# @max_wr_latency_ns: Maximum latency of write operations in the +# defined interval, in nanoseconds. +# +# @max_flush_latency_ns: Maximum latency of flush operations in the +# defined interval, in nanoseconds. +# +# @avg_rd_latency_ns: Average latency of read operations in the +# defined interval, in nanoseconds. +# +# @avg_wr_latency_ns: Average latency of write operations in the +# defined interval, in nanoseconds. +# +# @avg_flush_latency_ns: Average latency of flush operations in the +# defined interval, in nanoseconds. +# +# Since: 2.5 +## + +{ 'struct': 'BlockDeviceTimedStats', + 'data': { 'interval_length': 'int', 'min_rd_latency_ns': 'int', + 'max_rd_latency_ns': 'int', 'avg_rd_latency_ns': 'int', + 'min_wr_latency_ns': 'int', 'max_wr_latency_ns': 'int', + 'avg_wr_latency_ns': 'int', 'min_flush_latency_ns': 'int', + 'max_flush_latency_ns': 'int', 'avg_flush_latency_ns': 'int' } } + ## # @BlockDeviceStats: # @@ -476,6 +522,9 @@ # @account_failed: Whether failed operations are included in the # latency and last access statistics (Since 2.5) # +# @timed_stats: Statistics specific to the set of previously defined +# intervals of time (Since 2.5) +# # Since: 0.14.0 ## { 'struct': 'BlockDeviceStats', @@ -487,7 +536,8 @@ 'failed_rd_operations': 'int', 'failed_wr_operations': 'int', 'failed_flush_operations': 'int', 'invalid_rd_operations': 'int', 'invalid_wr_operations': 'int', 'invalid_flush_operations': 'int', - 'account_invalid': 'bool', 'account_failed': 'bool' } } + 'account_invalid': 'bool', 'account_failed': 'bool', + 'timed_stats': ['BlockDeviceTimedStats'] } } ## # @BlockStats: diff --git a/qmp-commands.hx b/qmp-commands.hx index b0710a2d04..3e50f7fbb1 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -2604,6 +2604,37 @@ Each json-object contain the following: - "account_failed": whether failed operations are included in the latency and last access statistics (json-bool) + - "timed_stats": A json-array containing statistics collected in + specific intervals, with the following members: + - "interval_length": interval used for calculating the + statistics, in seconds (json-int) + - "min_rd_latency_ns": minimum latency of read operations in + the defined interval, in nanoseconds + (json-int) + - "min_wr_latency_ns": minimum latency of write operations in + the defined interval, in nanoseconds + (json-int) + - "min_flush_latency_ns": minimum latency of flush operations + in the defined interval, in + nanoseconds (json-int) + - "max_rd_latency_ns": maximum latency of read operations in + the defined interval, in nanoseconds + (json-int) + - "max_wr_latency_ns": maximum latency of write operations in + the defined interval, in nanoseconds + (json-int) + - "max_flush_latency_ns": maximum latency of flush operations + in the defined interval, in + nanoseconds (json-int) + - "avg_rd_latency_ns": average latency of read operations in + the defined interval, in nanoseconds + (json-int) + - "avg_wr_latency_ns": average latency of write operations in + the defined interval, in nanoseconds + (json-int) + - "avg_flush_latency_ns": average latency of flush operations + in the defined interval, in + nanoseconds (json-int) - "parent": Contains recursively the statistics of the underlying protocol (e.g. the host file for a qcow2 image). If there is no underlying protocol, this field is omitted -- cgit v1.2.3-55-g7522 From 96e4dedaff9922f87e4ec351d51d3f093198382a Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Wed, 28 Oct 2015 17:33:06 +0200 Subject: block: Add average I/O queue depth to BlockDeviceTimedStats This patch adds two new fields to BlockDeviceTimedStats that track the average number of pending read and write requests for a block device. The values are calculated for the period of time defined for that interval. Signed-off-by: Alberto Garcia Message-id: fd31fef53e2714f2f30d59ed58ca2f67ec9ab926.1446044837.git.berto@igalia.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/accounting.c | 12 ++++++++++++ block/qapi.c | 5 +++++ include/block/accounting.h | 2 ++ include/qemu/timed-average.h | 1 + qapi/block-core.json | 9 ++++++++- qmp-commands.hx | 6 ++++++ util/timed-average.c | 31 ++++++++++++++++++++++++++----- 7 files changed, 60 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/block/accounting.c b/block/accounting.c index 61de8ce9bc..a9419315a3 100644 --- a/block/accounting.c +++ b/block/accounting.c @@ -143,3 +143,15 @@ int64_t block_acct_idle_time_ns(BlockAcctStats *stats) { return qemu_clock_get_ns(clock_type) - stats->last_access_time_ns; } + +double block_acct_queue_depth(BlockAcctTimedStats *stats, + enum BlockAcctType type) +{ + uint64_t sum, elapsed; + + assert(type < BLOCK_MAX_IOTYPE); + + sum = timed_average_sum(&stats->latency[type], &elapsed); + + return (double) sum / elapsed; +} diff --git a/block/qapi.c b/block/qapi.c index 6169a22243..d20262decb 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -402,6 +402,11 @@ static BlockStats *bdrv_query_stats(const BlockDriverState *bs, dev_stats->min_flush_latency_ns = timed_average_min(fl); dev_stats->max_flush_latency_ns = timed_average_max(fl); dev_stats->avg_flush_latency_ns = timed_average_avg(fl); + + dev_stats->avg_rd_queue_depth = + block_acct_queue_depth(ts, BLOCK_ACCT_READ); + dev_stats->avg_wr_queue_depth = + block_acct_queue_depth(ts, BLOCK_ACCT_WRITE); } } diff --git a/include/block/accounting.h b/include/block/accounting.h index 1dd582a99a..482926b8ec 100644 --- a/include/block/accounting.h +++ b/include/block/accounting.h @@ -78,5 +78,7 @@ void block_acct_invalid(BlockAcctStats *stats, enum BlockAcctType type); void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type, int num_requests); int64_t block_acct_idle_time_ns(BlockAcctStats *stats); +double block_acct_queue_depth(BlockAcctTimedStats *stats, + enum BlockAcctType type); #endif diff --git a/include/qemu/timed-average.h b/include/qemu/timed-average.h index f1cdddc48b..364bf88f70 100644 --- a/include/qemu/timed-average.h +++ b/include/qemu/timed-average.h @@ -59,5 +59,6 @@ void timed_average_account(TimedAverage *ta, uint64_t value); uint64_t timed_average_min(TimedAverage *ta); uint64_t timed_average_avg(TimedAverage *ta); uint64_t timed_average_max(TimedAverage *ta); +uint64_t timed_average_sum(TimedAverage *ta, uint64_t *elapsed); #endif diff --git a/qapi/block-core.json b/qapi/block-core.json index 80470da574..6fd68a6d24 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -450,6 +450,12 @@ # @avg_flush_latency_ns: Average latency of flush operations in the # defined interval, in nanoseconds. # +# @avg_rd_queue_depth: Average number of pending read operations +# in the defined interval. +# +# @avg_wr_queue_depth: Average number of pending write operations +# in the defined interval. +# # Since: 2.5 ## @@ -458,7 +464,8 @@ 'max_rd_latency_ns': 'int', 'avg_rd_latency_ns': 'int', 'min_wr_latency_ns': 'int', 'max_wr_latency_ns': 'int', 'avg_wr_latency_ns': 'int', 'min_flush_latency_ns': 'int', - 'max_flush_latency_ns': 'int', 'avg_flush_latency_ns': 'int' } } + 'max_flush_latency_ns': 'int', 'avg_flush_latency_ns': 'int', + 'avg_rd_queue_depth': 'number', 'avg_wr_queue_depth': 'number' } } ## # @BlockDeviceStats: diff --git a/qmp-commands.hx b/qmp-commands.hx index 3e50f7fbb1..9d8b42f59a 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -2635,6 +2635,12 @@ Each json-object contain the following: - "avg_flush_latency_ns": average latency of flush operations in the defined interval, in nanoseconds (json-int) + - "avg_rd_queue_depth": average number of pending read + operations in the defined interval + (json-number) + - "avg_wr_queue_depth": average number of pending write + operations in the defined interval + (json-number). - "parent": Contains recursively the statistics of the underlying protocol (e.g. the host file for a qcow2 image). If there is no underlying protocol, this field is omitted diff --git a/util/timed-average.c b/util/timed-average.c index 98a1170204..a2dfb4834d 100644 --- a/util/timed-average.c +++ b/util/timed-average.c @@ -120,8 +120,10 @@ void timed_average_init(TimedAverage *ta, QEMUClockType clock_type, * expiration time if that's the case. * * @ta: the TimedAverage structure + * @elapsed: if non-NULL, the elapsed time (in ns) within the current + * window will be stored here */ -static void check_expirations(TimedAverage *ta) +static void check_expirations(TimedAverage *ta, uint64_t *elapsed) { int64_t now = qemu_clock_get_ns(ta->clock_type); int i; @@ -143,6 +145,12 @@ static void check_expirations(TimedAverage *ta) } else { ta->current = 1; } + + /* Calculate the elapsed time within the current window */ + if (elapsed) { + int64_t remaining = ta->windows[ta->current].expiration - now; + *elapsed = ta->period - remaining; + } } /* Account a value @@ -153,7 +161,7 @@ static void check_expirations(TimedAverage *ta) void timed_average_account(TimedAverage *ta, uint64_t value) { int i; - check_expirations(ta); + check_expirations(ta, NULL); /* Do the accounting in both windows at the same time */ for (i = 0; i < 2; i++) { @@ -180,7 +188,7 @@ void timed_average_account(TimedAverage *ta, uint64_t value) uint64_t timed_average_min(TimedAverage *ta) { TimedAverageWindow *w; - check_expirations(ta); + check_expirations(ta, NULL); w = current_window(ta); return w->min < UINT64_MAX ? w->min : 0; } @@ -193,7 +201,7 @@ uint64_t timed_average_min(TimedAverage *ta) uint64_t timed_average_avg(TimedAverage *ta) { TimedAverageWindow *w; - check_expirations(ta); + check_expirations(ta, NULL); w = current_window(ta); return w->count > 0 ? w->sum / w->count : 0; } @@ -205,6 +213,19 @@ uint64_t timed_average_avg(TimedAverage *ta) */ uint64_t timed_average_max(TimedAverage *ta) { - check_expirations(ta); + check_expirations(ta, NULL); return current_window(ta)->max; } + +/* Get the sum of all accounted values + * @ta: the TimedAverage structure + * @elapsed: if non-NULL, the elapsed time (in ns) will be stored here + * @ret: the sum of all accounted values + */ +uint64_t timed_average_sum(TimedAverage *ta, uint64_t *elapsed) +{ + TimedAverageWindow *w; + check_expirations(ta, elapsed); + w = current_window(ta); + return w->sum; +} -- cgit v1.2.3-55-g7522 From aece5edc96f211eec6febdafc9bbbb99315a2efd Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Wed, 28 Oct 2015 17:33:18 +0200 Subject: block: Update copyright of the accounting code Signed-off-by: Alberto Garcia Message-id: 80a2278e3ec2dafd5daab20a7cb2d6a9b83371e4.1446044838.git.berto@igalia.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/accounting.c | 1 + include/block/accounting.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/block/accounting.c b/block/accounting.c index 05a5c5fdf3..185025ec1e 100644 --- a/block/accounting.c +++ b/block/accounting.c @@ -2,6 +2,7 @@ * QEMU System Emulator block accounting * * Copyright (c) 2011 Christoph Hellwig + * Copyright (c) 2015 Igalia, S.L. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal diff --git a/include/block/accounting.h b/include/block/accounting.h index 482926b8ec..0f46cb4ec1 100644 --- a/include/block/accounting.h +++ b/include/block/accounting.h @@ -2,6 +2,7 @@ * QEMU System Emulator block accounting * * Copyright (c) 2011 Christoph Hellwig + * Copyright (c) 2015 Igalia, S.L. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal -- cgit v1.2.3-55-g7522