weight based blk-throttling can use the estimated bandwidth to calculate cgroup bandwidth.
Signed-off-by: Shaohua Li <[email protected]> --- block/blk-core.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ block/blk-sysfs.c | 13 +++++++++++++ include/linux/blkdev.h | 4 ++++ 3 files changed, 66 insertions(+) diff --git a/block/blk-core.c b/block/blk-core.c index 33e2f62..8c85bb0 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -753,6 +753,12 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) if (blkcg_init_queue(q)) goto fail_ref; + /* + * assign a big initial bandwidth (10GB/s), so blk-throte doesn't start + * slowly + */ + q->avg_bw[READ] = 10 * 1024 * 1024 * 2; + q->avg_bw[WRITE] = 10 * 1024 * 1024 * 2; return q; fail_ref: @@ -1909,6 +1915,46 @@ static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors) return 0; } +static void blk_update_bandwidth(struct request_queue *q, + struct hd_struct *p) +{ + unsigned long now = jiffies; + unsigned long last = q->bw_timestamp; + sector_t bw; + sector_t read_sect, write_sect, tmp_sect; + + if (time_before(now, last + HZ / 5)) + return; + + if (cmpxchg(&q->bw_timestamp, last, now) != last) + return; + + tmp_sect = part_stat_read(p, sectors[READ]); + read_sect = tmp_sect - q->last_sects[READ]; + q->last_sects[READ] = tmp_sect; + tmp_sect = part_stat_read(p, sectors[WRITE]); + write_sect = tmp_sect - q->last_sects[WRITE]; + q->last_sects[WRITE] = tmp_sect; + + if (now - last > HZ) + return; + if (now == last) + return; + + bw = read_sect * HZ; + sector_div(bw, now - last); + if (q->avg_bw[READ] < bw) + q->avg_bw[READ] += (bw - q->avg_bw[READ]) >> 3; + if (q->avg_bw[READ] > bw) + q->avg_bw[READ] -= (q->avg_bw[READ] - bw) >> 3; + bw = write_sect * HZ; + sector_div(bw, now - last); + if (q->avg_bw[WRITE] < bw) + q->avg_bw[WRITE] += (bw - q->avg_bw[WRITE]) >> 3; + if (q->avg_bw[WRITE] > bw) + q->avg_bw[WRITE] -= (q->avg_bw[WRITE] - bw) >> 3; +} + static noinline_for_stack bool generic_make_request_checks(struct bio *bio) { @@ -1981,6 +2027,9 @@ generic_make_request_checks(struct bio *bio) */ create_io_context(GFP_ATOMIC, q->node); + blk_update_bandwidth(q, + part->partno ? &part_to_disk(part)->part0 : part); + if (!blkcg_bio_issue_check(q, bio)) return false; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index e140cc4..419f6bd 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -348,6 +348,13 @@ static ssize_t queue_poll_store(struct request_queue *q, const char *page, return ret; } +static ssize_t queue_avg_perf_show(struct request_queue *q, char *page) +{ + return sprintf(page, "%llu %llu\n", + (unsigned long long)q->avg_bw[READ] * 512, + (unsigned long long)q->avg_bw[WRITE] * 512); +} + static struct queue_sysfs_entry queue_requests_entry = { .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, .show = queue_requests_show, @@ -479,6 +486,11 @@ static struct queue_sysfs_entry queue_poll_entry = { .store = queue_poll_store, }; +static struct queue_sysfs_entry queue_avg_perf_entry = { + .attr = {.name = "average_perf", .mode = S_IRUGO }, + .show = queue_avg_perf_show, +}; + static struct attribute *default_attrs[] = { &queue_requests_entry.attr, &queue_ra_entry.attr, @@ -504,6 +516,7 @@ static struct attribute *default_attrs[] = { &queue_iostats_entry.attr, &queue_random_entry.attr, &queue_poll_entry.attr, + &queue_avg_perf_entry.attr, NULL, }; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c70e358..7e6b8ed 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -464,6 +464,10 @@ struct request_queue { struct bio_set *bio_split; bool mq_sysfs_init_done; + + unsigned long bw_timestamp; + sector_t avg_bw[2]; + sector_t last_sects[2]; }; #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ -- 2.4.6

