If a cgroup is inactive for some time, it should be excluded from
bandwidth calculation.

Signed-off-by: Shaohua Li <[email protected]>
---
 block/blk-throttle.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 67 insertions(+), 4 deletions(-)

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index b3f847d..5c11270 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -15,6 +15,9 @@
 #define MAX_WEIGHT (1000)
 #define WEIGHT_RATIO_SHIFT (12)
 #define WEIGHT_RATIO (1 << WEIGHT_RATIO_SHIFT)
+/* must less than the interval we update bandwidth */
+#define CGCHECK_TIME (msecs_to_jiffies(20))
+
 /* Max dispatch from a group in 1 round */
 static int throtl_grp_quantum = 8;
 
@@ -81,6 +84,9 @@ struct throtl_service_queue {
        unsigned int            weight;
        unsigned int            children_weight;
        unsigned int            ratio;
+
+       unsigned long active_timestamp;
+       bool active;
 };
 
 enum tg_state_flags {
@@ -162,6 +168,7 @@ struct throtl_data
 
        bool bw_based;
        bool weight_based;
+       unsigned long last_check_timestamp;
 };
 
 static void throtl_pending_timer_fn(unsigned long arg);
@@ -390,7 +397,6 @@ static void throtl_pd_init(struct blkg_policy_data *pd)
        sq->parent_sq = &td->service_queue;
        if (cgroup_subsys_on_dfl(io_cgrp_subsys) && blkg->parent)
                sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue;
-       sq->parent_sq->children_weight += sq->weight;
        tg->td = td;
 }
 
@@ -424,7 +430,7 @@ static void throtl_pd_free(struct blkg_policy_data *pd)
        struct throtl_grp *tg = pd_to_tg(pd);
        struct throtl_service_queue *sq = &tg->service_queue;
 
-       if (sq->parent_sq)
+       if (sq->active && sq->parent_sq)
                sq->parent_sq->children_weight -= sq->weight;
 
        del_timer_sync(&tg->service_queue.pending_timer);
@@ -930,7 +936,7 @@ static void tg_update_bps(struct throtl_grp *tg)
        sq = &tg->service_queue;
        parent_sq = sq->parent_sq;
 
-       if (!tg->td->weight_based || !parent_sq)
+       if (!tg->td->weight_based || !parent_sq || !sq->active)
                return;
        sq->ratio = max_t(unsigned int,
                parent_sq->ratio * sq->weight / parent_sq->children_weight,
@@ -965,6 +971,26 @@ static void tg_update_ratio(struct throtl_grp *tg)
        }
 }
 
+static void tg_update_active_time(struct throtl_grp *tg)
+{
+       struct throtl_service_queue *sq = &tg->service_queue;
+       bool update_ratio = false;
+       unsigned long now = jiffies;
+
+       while (sq->parent_sq) {
+               sq->active_timestamp = now;
+               if (!sq->active) {
+                       sq->parent_sq->children_weight += sq->weight;
+                       sq->active = true;
+                       update_ratio = true;
+               }
+               sq = sq->parent_sq;
+       };
+
+       if (update_ratio)
+               tg_update_ratio(tg);
+}
+
 static void tg_dispatch_one_bio(struct throtl_grp *tg, bool rw)
 {
        struct throtl_service_queue *sq = &tg->service_queue;
@@ -984,6 +1010,8 @@ static void tg_dispatch_one_bio(struct throtl_grp *tg, 
bool rw)
 
        throtl_charge_bio(tg, bio);
 
+       tg_update_active_time(tg);
+
        /*
         * If our parent is another tg, we just need to transfer @bio to
         * the parent using throtl_add_bio_tg().  If our parent is
@@ -1319,7 +1347,7 @@ static ssize_t tg_set_weight(struct kernfs_open_file *of,
        old_weight = tg->service_queue.weight;
 
        tg->service_queue.weight = v;
-       if (tg->service_queue.parent_sq) {
+       if (tg->service_queue.active && tg->service_queue.parent_sq) {
                struct throtl_service_queue *psq = tg->service_queue.parent_sq;
                if (v > old_weight)
                        psq->children_weight += v - old_weight;
@@ -1524,6 +1552,39 @@ static struct blkcg_policy blkcg_policy_throtl = {
        .pd_free_fn             = throtl_pd_free,
 };
 
+static void detect_inactive_cg(struct throtl_grp *tg)
+{
+       struct throtl_data *td = tg->td;
+       struct throtl_service_queue *sq = &tg->service_queue;
+       unsigned long now = jiffies;
+       struct cgroup_subsys_state *pos_css;
+       struct blkcg_gq *blkg;
+       bool update_ratio = false;
+
+       tg_update_active_time(tg);
+
+       if (time_before(now, td->last_check_timestamp))
+               return;
+       td->last_check_timestamp = now + CGCHECK_TIME;
+
+       blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {
+               tg = blkg_to_tg(blkg);
+               sq = &tg->service_queue;
+               if (sq->parent_sq &&
+                   time_before(sq->active_timestamp + CGCHECK_TIME, now) &&
+                   !(sq->nr_queued[READ] || sq->nr_queued[WRITE])) {
+                       if (sq->active && sq->parent_sq) {
+                               sq->active = false;
+                               sq->parent_sq->children_weight -= sq->weight;
+                               update_ratio = true;
+                       }
+               }
+       }
+
+       if (update_ratio)
+               tg_update_ratio(tg);
+}
+
 bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
                    struct bio *bio)
 {
@@ -1546,6 +1607,7 @@ bool blk_throtl_bio(struct request_queue *q, struct 
blkcg_gq *blkg,
 
        sq = &tg->service_queue;
 
+       detect_inactive_cg(tg);
        tg_update_bps(tg);
        while (true) {
                /* throtl is FIFO - if bios are already queued, should queue */
@@ -1696,6 +1758,7 @@ int blk_throtl_init(struct request_queue *q)
        INIT_WORK(&td->dispatch_work, blk_throtl_dispatch_work_fn);
        throtl_service_queue_init(&td->service_queue);
        td->service_queue.ratio = WEIGHT_RATIO;
+       td->service_queue.active = true;
 
        q->td = td;
        td->queue = q;
-- 
2.4.6

Reply via email to