summaryrefslogtreecommitdiffstats
path: root/block/blk-core.c
diff options
context:
space:
mode:
authorJens Axboe2016-11-09 20:38:14 +0100
committerJens Axboe2016-11-10 21:53:40 +0100
commit87760e5eef359788047d6fd54fc12eec74ce0d27 (patch)
tree0c394ea517cc093d8fe837ad5a7201d0d30c7afe /block/blk-core.c
parentblk-wbt: add general throttling mechanism (diff)
downloadkernel-qcow2-linux-87760e5eef359788047d6fd54fc12eec74ce0d27.tar.gz
kernel-qcow2-linux-87760e5eef359788047d6fd54fc12eec74ce0d27.tar.xz
kernel-qcow2-linux-87760e5eef359788047d6fd54fc12eec74ce0d27.zip
block: hook up writeback throttling
Enable throttling of buffered writeback to make it a lot more smooth, and has way less impact on other system activity. Background writeback should be, by definition, background activity. The fact that we flush huge bundles of it at the time means that it potentially has heavy impacts on foreground workloads, which isn't ideal. We can't easily limit the sizes of writes that we do, since that would impact file system layout in the presence of delayed allocation. So just throttle back buffered writeback, unless someone is waiting for it. The algorithm for when to throttle takes its inspiration in the CoDel networking scheduling algorithm. Like CoDel, blk-wb monitors the minimum latencies of requests over a window of time. In that window of time, if the minimum latency of any request exceeds a given target, then a scale count is incremented and the queue depth is shrunk. The next monitoring window is shrunk accordingly. Unlike CoDel, if we hit a window that exhibits good behavior, then we simply increment the scale count and re-calculate the limits for that scale value. This prevents us from oscillating between a close-to-ideal value and max all the time, instead remaining in the windows where we get good behavior. Unlike CoDel, blk-wb allows the scale count to to negative. This happens if we primarily have writes going on. Unlike positive scale counts, this doesn't change the size of the monitoring window. When the heavy writers finish, blk-bw quickly snaps back to it's stable state of a zero scale count. The patch registers a sysfs entry, 'wb_lat_usec'. This sets the latency target to me met. It defaults to 2 msec for non-rotational storage, and 75 msec for rotational storage. Setting this value to '0' disables blk-wb. Generally, a user would not have to touch this setting. We don't enable WBT on devices that are managed with CFQ, and have a non-root block cgroup attached. If we have a proportional share setup on this particular disk, then the wbt throttling will interfere with that. We don't have a strong need for wbt for that case, since we will rely on CFQ doing that for us. Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'block/blk-core.c')
-rw-r--r--block/blk-core.c17
1 files changed, 15 insertions, 2 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 216372b01624..59f8129a4295 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -39,6 +39,7 @@
#include "blk.h"
#include "blk-mq.h"
+#include "blk-wbt.h"
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
@@ -882,6 +883,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
fail:
blk_free_flush_queue(q->fq);
+ wbt_exit(q);
return NULL;
}
EXPORT_SYMBOL(blk_init_allocated_queue);
@@ -1344,6 +1346,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
blk_delete_timer(rq);
blk_clear_rq_complete(rq);
trace_block_rq_requeue(q, rq);
+ wbt_requeue(q->rq_wb, &rq->issue_stat);
if (rq->rq_flags & RQF_QUEUED)
blk_queue_end_tag(q, rq);
@@ -1436,6 +1439,8 @@ void __blk_put_request(struct request_queue *q, struct request *req)
/* this is a bio leak */
WARN_ON(req->bio != NULL);
+ wbt_done(q->rq_wb, &req->issue_stat);
+
/*
* Request may not have originated from ll_rw_blk. if not,
* it didn't come out of our reserved rq pools
@@ -1663,6 +1668,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
int el_ret, where = ELEVATOR_INSERT_SORT;
struct request *req;
unsigned int request_count = 0;
+ unsigned int wb_acct;
/*
* low level driver can indicate that it wants pages above a
@@ -1715,17 +1721,22 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
}
get_rq:
+ wb_acct = wbt_wait(q->rq_wb, bio, q->queue_lock);
+
/*
* Grab a free request. This is might sleep but can not fail.
* Returns with the queue unlocked.
*/
req = get_request(q, bio->bi_opf, bio, GFP_NOIO);
if (IS_ERR(req)) {
+ __wbt_done(q->rq_wb, wb_acct);
bio->bi_error = PTR_ERR(req);
bio_endio(bio);
goto out_unlock;
}
+ wbt_track(&req->issue_stat, wb_acct);
+
/*
* After dropping the lock and possibly sleeping here, our request
* may now be mergeable after it had proven unmergeable (above).
@@ -2467,6 +2478,7 @@ void blk_start_request(struct request *req)
if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) {
blk_stat_set_issue_time(&req->issue_stat);
req->rq_flags |= RQF_STATS;
+ wbt_issue(req->q->rq_wb, &req->issue_stat);
}
/*
@@ -2708,9 +2720,10 @@ void blk_finish_request(struct request *req, int error)
blk_account_io_done(req);
- if (req->end_io)
+ if (req->end_io) {
+ wbt_done(req->q->rq_wb, &req->issue_stat);
req->end_io(req, error);
- else {
+ } else {
if (blk_bidi_rq(req))
__blk_put_request(req->next_rq->q, req->next_rq);