From 5905fbc9c94ccd744c1b249472eafcc2d827548a Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Fri, 5 Apr 2013 11:32:19 +0200 Subject: block: fix I/O throttling accounting blind spot I/O throttling relies on bdrv_acct_done() which is called when a request completes. This leaves a blind spot since we only charge for completed requests, not submitted requests. For example, if there is 1 operation remaining in this time slice the guest could submit 3 operations and they will all be submitted successfully since they don't actually get accounted for until they complete. Originally we probably thought this is okay since the requests will be accounted when the time slice is extended. In practice it causes fluctuations since the guest can exceed its I/O limit and it will be punished for this later on. Account for I/O upon submission so that I/O limits are enforced properly. Signed-off-by: Stefan Hajnoczi Tested-By: Benoit Canet Signed-off-by: Kevin Wolf --- include/block/block_int.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/block/block_int.h b/include/block/block_int.h index 0986a2d6ac..83941d8672 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -256,7 +256,7 @@ struct BlockDriverState { int64_t slice_start; int64_t slice_end; BlockIOLimit io_limits; - BlockIOBaseValue io_base; + BlockIOBaseValue slice_submitted; CoQueue throttled_reqs; QEMUTimer *block_timer; bool io_limits_enabled; -- cgit v1.2.3-55-g7522 From ae29d6c64bd8d55873a2cb1df50ae4321b497447 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Fri, 5 Apr 2013 11:32:20 +0200 Subject: block: keep I/O throttling slice time constant It is not necessary to adjust the slice time at runtime. We already extend the current slice in order to carry over accounting into the next slice. Changing the actual slice time value introduces oscillations. The guest may experience large changes in throughput or IOPS from one moment to the next when slice times are adjusted. Reported-by: BenoƮt Canet Signed-off-by: Stefan Hajnoczi Tested-By: Benoit Canet Signed-off-by: Kevin Wolf --- block.c | 19 +++++++++---------- blockdev.c | 1 - include/block/block_int.h | 1 - 3 files changed, 9 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/block.c b/block.c index 25976b556a..00eca275ba 100644 --- a/block.c +++ b/block.c @@ -140,7 +140,6 @@ void bdrv_io_limits_disable(BlockDriverState *bs) bs->slice_start = 0; bs->slice_end = 0; - bs->slice_time = 0; } static void bdrv_block_timer(void *opaque) @@ -1432,7 +1431,6 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest, bs_dest->enable_write_cache = bs_src->enable_write_cache; /* i/o timing parameters */ - bs_dest->slice_time = bs_src->slice_time; bs_dest->slice_start = bs_src->slice_start; bs_dest->slice_end = bs_src->slice_end; bs_dest->slice_submitted = bs_src->slice_submitted; @@ -3749,6 +3747,7 @@ static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors, bool is_write, double elapsed_time, uint64_t *wait) { uint64_t bps_limit = 0; + uint64_t extension; double bytes_limit, bytes_base, bytes_res; double slice_time, wait_time; @@ -3796,8 +3795,10 @@ static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors, * info can be kept until the timer fire, so it is increased and tuned * based on the result of experiment. */ - bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10; - bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME; + extension = wait_time * NANOSECONDS_PER_SECOND; + extension = DIV_ROUND_UP(extension, BLOCK_IO_SLICE_TIME) * + BLOCK_IO_SLICE_TIME; + bs->slice_end += extension; if (wait) { *wait = wait_time * BLOCK_IO_SLICE_TIME * 10; } @@ -3848,8 +3849,8 @@ static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write, wait_time = 0; } - bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10; - bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME; + /* Exceeded current slice, extend it by another slice time */ + bs->slice_end += BLOCK_IO_SLICE_TIME; if (wait) { *wait = wait_time * BLOCK_IO_SLICE_TIME * 10; } @@ -3868,12 +3869,10 @@ static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors, now = qemu_get_clock_ns(vm_clock); if ((bs->slice_start < now) && (bs->slice_end > now)) { - bs->slice_end = now + bs->slice_time; + bs->slice_end = now + BLOCK_IO_SLICE_TIME; } else { - bs->slice_time = 5 * BLOCK_IO_SLICE_TIME; bs->slice_start = now; - bs->slice_end = now + bs->slice_time; - + bs->slice_end = now + BLOCK_IO_SLICE_TIME; memset(&bs->slice_submitted, 0, sizeof(bs->slice_submitted)); } diff --git a/blockdev.c b/blockdev.c index 8cdc9ce16a..6dc999d802 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1069,7 +1069,6 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd, } bs->io_limits = io_limits; - bs->slice_time = BLOCK_IO_SLICE_TIME; if (!bs->io_limits_enabled && bdrv_io_limits_enabled(bs)) { bdrv_io_limits_enable(bs); diff --git a/include/block/block_int.h b/include/block/block_int.h index 83941d8672..9aa98b5d12 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -252,7 +252,6 @@ struct BlockDriverState { unsigned int copy_on_read_in_flight; /* the time for latest disk I/O */ - int64_t slice_time; int64_t slice_start; int64_t slice_end; BlockIOLimit io_limits; -- cgit v1.2.3-55-g7522