We know total bandwidth of a disk and can calculate cgroup's bandwidth
percentage against disk bandwidth according to its weight. We can easily
calculate cgroup bandwidth.

Signed-off-by: Shaohua Li <[email protected]>
---
 block/blk-throttle.c | 135 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 134 insertions(+), 1 deletion(-)

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 2149a1d..b3f847d 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -12,6 +12,9 @@
 #include <linux/blk-cgroup.h>
 #include "blk.h"
 
+#define MAX_WEIGHT (1000)
+#define WEIGHT_RATIO_SHIFT (12)
+#define WEIGHT_RATIO (1 << WEIGHT_RATIO_SHIFT)
 /* Max dispatch from a group in 1 round */
 static int throtl_grp_quantum = 8;
 
@@ -74,6 +77,10 @@ struct throtl_service_queue {
        unsigned int            nr_pending;     /* # queued in the tree */
        unsigned long           first_pending_disptime; /* disptime of the 
first tg */
        struct timer_list       pending_timer;  /* fires on 
first_pending_disptime */
+
+       unsigned int            weight;
+       unsigned int            children_weight;
+       unsigned int            ratio;
 };
 
 enum tg_state_flags {
@@ -152,6 +159,9 @@ struct throtl_data
 
        /* Work for dispatching throttled bios */
        struct work_struct dispatch_work;
+
+       bool bw_based;
+       bool weight_based;
 };
 
 static void throtl_pending_timer_fn(unsigned long arg);
@@ -203,6 +213,15 @@ static struct throtl_data *sq_to_td(struct 
throtl_service_queue *sq)
                return container_of(sq, struct throtl_data, service_queue);
 }
 
+static inline uint64_t queue_bandwidth(struct throtl_data *td, int rw)
+{
+       uint64_t bw = td->queue->avg_bw[rw] * 512;
+
+       /* give extra bw, so cgroup can dispatch enough IO */
+       bw += bw >> 3;
+       return bw;
+}
+
 /**
  * throtl_log - log debug message via blktrace
  * @sq: the service_queue being reported
@@ -371,6 +390,7 @@ static void throtl_pd_init(struct blkg_policy_data *pd)
        sq->parent_sq = &td->service_queue;
        if (cgroup_subsys_on_dfl(io_cgrp_subsys) && blkg->parent)
                sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue;
+       sq->parent_sq->children_weight += sq->weight;
        tg->td = td;
 }
 
@@ -386,7 +406,8 @@ static void tg_update_has_rules(struct throtl_grp *tg)
 
        for (rw = READ; rw <= WRITE; rw++)
                tg->has_rules[rw] = (parent_tg && parent_tg->has_rules[rw]) ||
-                                   (tg->bps[rw] != -1 || tg->iops[rw] != -1);
+                                   (tg->bps[rw] != -1 || tg->iops[rw] != -1 ||
+                                    tg->service_queue.weight);
 }
 
 static void throtl_pd_online(struct blkg_policy_data *pd)
@@ -401,6 +422,10 @@ static void throtl_pd_online(struct blkg_policy_data *pd)
 static void throtl_pd_free(struct blkg_policy_data *pd)
 {
        struct throtl_grp *tg = pd_to_tg(pd);
+       struct throtl_service_queue *sq = &tg->service_queue;
+
+       if (sq->parent_sq)
+               sq->parent_sq->children_weight -= sq->weight;
 
        del_timer_sync(&tg->service_queue.pending_timer);
        kfree(tg);
@@ -898,6 +923,48 @@ static void start_parent_slice_with_credit(struct 
throtl_grp *child_tg,
 
 }
 
+static void tg_update_bps(struct throtl_grp *tg)
+{
+       struct throtl_service_queue *sq, *parent_sq;
+
+       sq = &tg->service_queue;
+       parent_sq = sq->parent_sq;
+
+       if (!tg->td->weight_based || !parent_sq)
+               return;
+       sq->ratio = max_t(unsigned int,
+               parent_sq->ratio * sq->weight / parent_sq->children_weight,
+               1);
+
+       tg->bps[READ] = max_t(uint64_t,
+               (queue_bandwidth(tg->td, READ) * sq->ratio) >>
+                       WEIGHT_RATIO_SHIFT,
+               1024);
+       tg->bps[WRITE] = max_t(uint64_t,
+               (queue_bandwidth(tg->td, WRITE) * sq->ratio) >>
+                       WEIGHT_RATIO_SHIFT,
+               1024);
+}
+
+static void tg_update_ratio(struct throtl_grp *tg)
+{
+       struct throtl_data *td = tg->td;
+       struct cgroup_subsys_state *pos_css;
+       struct blkcg_gq *blkg;
+
+       blkg_for_each_descendant_pre(blkg, pos_css, td->queue->root_blkg) {
+               struct throtl_service_queue *sq;
+
+               tg = blkg_to_tg(blkg);
+               sq = &tg->service_queue;
+
+               if (!sq->parent_sq)
+                       continue;
+
+               tg_update_bps(tg);
+       }
+}
+
 static void tg_dispatch_one_bio(struct throtl_grp *tg, bool rw)
 {
        struct throtl_service_queue *sq = &tg->service_queue;
@@ -1202,12 +1269,65 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of,
                v = -1;
 
        tg = blkg_to_tg(ctx.blkg);
+       if (tg->td->weight_based) {
+               ret = -EBUSY;
+               goto out_finish;
+       }
 
        if (is_u64)
                *(u64 *)((void *)tg + of_cft(of)->private) = v;
        else
                *(unsigned int *)((void *)tg + of_cft(of)->private) = v;
+       tg->td->bw_based = true;
+
+       tg_conf_updated(tg);
+       ret = 0;
+out_finish:
+       blkg_conf_finish(&ctx);
+       return ret ?: nbytes;
+}
+
+static ssize_t tg_set_weight(struct kernfs_open_file *of,
+                          char *buf, size_t nbytes, loff_t off)
+{
+       struct blkcg *blkcg = css_to_blkcg(of_css(of));
+       struct blkg_conf_ctx ctx;
+       struct throtl_grp *tg;
+       int ret;
+       u64 v;
+       int old_weight;
+
+       ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
+       if (ret)
+               return ret;
 
+       ret = -EINVAL;
+       if (sscanf(ctx.body, "%llu", &v) != 1)
+               goto out_finish;
+       if (v > MAX_WEIGHT)
+               v = MAX_WEIGHT;
+       if (v == 0)
+               v = 1;
+
+       tg = blkg_to_tg(ctx.blkg);
+       if (tg->td->bw_based) {
+               ret = -EBUSY;
+               goto out_finish;
+       }
+       tg->td->weight_based = true;
+
+       old_weight = tg->service_queue.weight;
+
+       tg->service_queue.weight = v;
+       if (tg->service_queue.parent_sq) {
+               struct throtl_service_queue *psq = tg->service_queue.parent_sq;
+               if (v > old_weight)
+                       psq->children_weight += v - old_weight;
+               else if (v < old_weight)
+                       psq->children_weight -= old_weight - v;
+       }
+
+       tg_update_ratio(tg);
        tg_conf_updated(tg);
        ret = 0;
 out_finish:
@@ -1229,6 +1349,12 @@ static ssize_t tg_set_conf_uint(struct kernfs_open_file 
*of,
 
 static struct cftype throtl_legacy_files[] = {
        {
+               .name = "throttle.weight",
+               .private = offsetof(struct throtl_grp, service_queue.weight),
+               .seq_show = tg_print_conf_uint,
+               .write = tg_set_weight,
+       },
+       {
                .name = "throttle.read_bps_device",
                .private = offsetof(struct throtl_grp, bps[READ]),
                .seq_show = tg_print_conf_u64,
@@ -1313,6 +1439,10 @@ static ssize_t tg_set_max(struct kernfs_open_file *of,
                return ret;
 
        tg = blkg_to_tg(ctx.blkg);
+       if (tg->td->weight_based) {
+               ret = -EBUSY;
+               goto out_finish;
+       }
 
        v[0] = tg->bps[READ];
        v[1] = tg->bps[WRITE];
@@ -1358,6 +1488,7 @@ static ssize_t tg_set_max(struct kernfs_open_file *of,
        tg->bps[WRITE] = v[1];
        tg->iops[READ] = v[2];
        tg->iops[WRITE] = v[3];
+       tg->td->bw_based = true;
 
        tg_conf_updated(tg);
        ret = 0;
@@ -1415,6 +1546,7 @@ bool blk_throtl_bio(struct request_queue *q, struct 
blkcg_gq *blkg,
 
        sq = &tg->service_queue;
 
+       tg_update_bps(tg);
        while (true) {
                /* throtl is FIFO - if bios are already queued, should queue */
                if (sq->nr_queued[rw])
@@ -1563,6 +1695,7 @@ int blk_throtl_init(struct request_queue *q)
 
        INIT_WORK(&td->dispatch_work, blk_throtl_dispatch_work_fn);
        throtl_service_queue_init(&td->service_queue);
+       td->service_queue.ratio = WEIGHT_RATIO;
 
        q->td = td;
        td->queue = q;
-- 
2.4.6

Reply via email to