commit: 41cf3e1a269f2ff1d94992251fbc4e65e0c35417 Author: Mike Pagano <mpagano <AT> gentoo <DOT> org> AuthorDate: Sat Nov 29 18:03:46 2014 +0000 Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org> CommitDate: Sat Nov 29 18:03:46 2014 +0000 URL: http://sources.gentoo.org/gitweb/?p=proj/linux-patches.git;a=commit;h=41cf3e1a
Bump BFQ patchset to v7r6-3.16 --- ...-cgroups-kconfig-build-bits-for-v7r6-3.16.patch | 6 +- ...ck-introduce-the-v7r6-I-O-sched-for-3.17.patch1 | 421 ++++++++++++++++++--- ...add-Early-Queue-Merge-EQM-v7r6-for-3.16.0.patch | 194 ++++++---- 3 files changed, 474 insertions(+), 147 deletions(-) diff --git a/5001_BFQ-1-block-cgroups-kconfig-build-bits-for-v7r5-3.16.patch b/5001_BFQ-1-block-cgroups-kconfig-build-bits-for-v7r6-3.16.patch similarity index 97% rename from 5001_BFQ-1-block-cgroups-kconfig-build-bits-for-v7r5-3.16.patch rename to 5001_BFQ-1-block-cgroups-kconfig-build-bits-for-v7r6-3.16.patch index 088bd05..7f6a5f4 100644 --- a/5001_BFQ-1-block-cgroups-kconfig-build-bits-for-v7r5-3.16.patch +++ b/5001_BFQ-1-block-cgroups-kconfig-build-bits-for-v7r6-3.16.patch @@ -1,7 +1,7 @@ -From 6519e5beef1063a86d3fc917cff2592cb599e824 Mon Sep 17 00:00:00 2001 +From 92ef290b97a50b9d60eb928166413140cd7a4802 Mon Sep 17 00:00:00 2001 From: Paolo Valente <paolo.vale...@unimore.it> Date: Thu, 22 May 2014 11:59:35 +0200 -Subject: [PATCH 1/3] block: cgroups, kconfig, build bits for BFQ-v7r5-3.16 +Subject: [PATCH 1/3] block: cgroups, kconfig, build bits for BFQ-v7r6-3.16 Update Kconfig.iosched and do the related Makefile changes to include kernel configuration options for BFQ. Also add the bfqio controller @@ -100,5 +100,5 @@ index 98c4f9b..13b010d 100644 SUBSYS(perf_event) #endif -- -2.0.3 +2.1.2 diff --git a/5002_BFQ-2-block-introduce-the-v7r5-I-O-sched-for-3.16.patch1 b/5002_BFQ-2-block-introduce-the-v7r6-I-O-sched-for-3.17.patch1 similarity index 92% rename from 5002_BFQ-2-block-introduce-the-v7r5-I-O-sched-for-3.16.patch1 rename to 5002_BFQ-2-block-introduce-the-v7r6-I-O-sched-for-3.17.patch1 index 6f630ba..7ae3298 100644 --- a/5002_BFQ-2-block-introduce-the-v7r5-I-O-sched-for-3.16.patch1 +++ b/5002_BFQ-2-block-introduce-the-v7r6-I-O-sched-for-3.17.patch1 @@ -1,9 +1,9 @@ -From c56e6c5db41f7137d3e0b38063ef0c944eec1898 Mon Sep 17 00:00:00 2001 +From e4fcd78909604194d930e38874a9313090b80348 Mon Sep 17 00:00:00 2001 From: Paolo Valente <paolo.vale...@unimore.it> Date: Thu, 9 May 2013 19:10:02 +0200 -Subject: [PATCH 2/3] block: introduce the BFQ-v7r5 I/O sched for 3.16 +Subject: [PATCH 2/3] block: introduce the BFQ-v7r6 I/O sched for 3.16 -Add the BFQ-v7r5 I/O scheduler to 3.16. +Add the BFQ-v7r6 I/O scheduler to 3.16. The general structure is borrowed from CFQ, as much of the code for handling I/O contexts. Over time, several useful features have been ported from CFQ as well (details in the changelog in README.BFQ). A @@ -56,12 +56,12 @@ until it expires. Signed-off-by: Paolo Valente <paolo.vale...@unimore.it> Signed-off-by: Arianna Avanzini <avanzini.aria...@gmail.com> --- - block/bfq-cgroup.c | 930 +++++++++++++ + block/bfq-cgroup.c | 930 ++++++++++++ block/bfq-ioc.c | 36 + - block/bfq-iosched.c | 3617 +++++++++++++++++++++++++++++++++++++++++++++++++++ - block/bfq-sched.c | 1207 +++++++++++++++++ - block/bfq.h | 742 +++++++++++ - 5 files changed, 6532 insertions(+) + block/bfq-iosched.c | 3887 +++++++++++++++++++++++++++++++++++++++++++++++++++ + block/bfq-sched.c | 1207 ++++++++++++++++ + block/bfq.h | 773 ++++++++++ + 5 files changed, 6833 insertions(+) create mode 100644 block/bfq-cgroup.c create mode 100644 block/bfq-ioc.c create mode 100644 block/bfq-iosched.c @@ -1048,10 +1048,10 @@ index 0000000..7f6b000 +} diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c new file mode 100644 -index 0000000..0a0891b +index 0000000..b919b03 --- /dev/null +++ b/block/bfq-iosched.c -@@ -0,0 +1,3617 @@ +@@ -0,0 +1,3887 @@ +/* + * Budget Fair Queueing (BFQ) disk scheduler. + * @@ -1625,6 +1625,220 @@ index 0000000..0a0891b + return dur; +} + ++/* Empty burst list and add just bfqq (see comments to bfq_handle_burst) */ ++static inline void bfq_reset_burst_list(struct bfq_data *bfqd, ++ struct bfq_queue *bfqq) ++{ ++ struct bfq_queue *item; ++ struct hlist_node *n; ++ ++ hlist_for_each_entry_safe(item, n, &bfqd->burst_list, burst_list_node) ++ hlist_del_init(&item->burst_list_node); ++ hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list); ++ bfqd->burst_size = 1; ++} ++ ++/* Add bfqq to the list of queues in current burst (see bfq_handle_burst) */ ++static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) ++{ ++ /* Increment burst size to take into account also bfqq */ ++ bfqd->burst_size++; ++ ++ if (bfqd->burst_size == bfqd->bfq_large_burst_thresh) { ++ struct bfq_queue *pos, *bfqq_item; ++ struct hlist_node *n; ++ ++ /* ++ * Enough queues have been activated shortly after each ++ * other to consider this burst as large. ++ */ ++ bfqd->large_burst = true; ++ ++ /* ++ * We can now mark all queues in the burst list as ++ * belonging to a large burst. ++ */ ++ hlist_for_each_entry(bfqq_item, &bfqd->burst_list, ++ burst_list_node) ++ bfq_mark_bfqq_in_large_burst(bfqq_item); ++ bfq_mark_bfqq_in_large_burst(bfqq); ++ ++ /* ++ * From now on, and until the current burst finishes, any ++ * new queue being activated shortly after the last queue ++ * was inserted in the burst can be immediately marked as ++ * belonging to a large burst. So the burst list is not ++ * needed any more. Remove it. ++ */ ++ hlist_for_each_entry_safe(pos, n, &bfqd->burst_list, ++ burst_list_node) ++ hlist_del_init(&pos->burst_list_node); ++ } else /* burst not yet large: add bfqq to the burst list */ ++ hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list); ++} ++ ++/* ++ * If many queues happen to become active shortly after each other, then, ++ * to help the processes associated to these queues get their job done as ++ * soon as possible, it is usually better to not grant either weight-raising ++ * or device idling to these queues. In this comment we describe, firstly, ++ * the reasons why this fact holds, and, secondly, the next function, which ++ * implements the main steps needed to properly mark these queues so that ++ * they can then be treated in a different way. ++ * ++ * As for the terminology, we say that a queue becomes active, i.e., ++ * switches from idle to backlogged, either when it is created (as a ++ * consequence of the arrival of an I/O request), or, if already existing, ++ * when a new request for the queue arrives while the queue is idle. ++ * Bursts of activations, i.e., activations of different queues occurring ++ * shortly after each other, are typically caused by services or applications ++ * that spawn or reactivate many parallel threads/processes. Examples are ++ * systemd during boot or git grep. ++ * ++ * These services or applications benefit mostly from a high throughput: ++ * the quicker the requests of the activated queues are cumulatively served, ++ * the sooner the target job of these queues gets completed. As a consequence, ++ * weight-raising any of these queues, which also implies idling the device ++ * for it, is almost always counterproductive: in most cases it just lowers ++ * throughput. ++ * ++ * On the other hand, a burst of activations may be also caused by the start ++ * of an application that does not consist in a lot of parallel I/O-bound ++ * threads. In fact, with a complex application, the burst may be just a ++ * consequence of the fact that several processes need to be executed to ++ * start-up the application. To start an application as quickly as possible, ++ * the best thing to do is to privilege the I/O related to the application ++ * with respect to all other I/O. Therefore, the best strategy to start as ++ * quickly as possible an application that causes a burst of activations is ++ * to weight-raise all the queues activated during the burst. This is the ++ * exact opposite of the best strategy for the other type of bursts. ++ * ++ * In the end, to take the best action for each of the two cases, the two ++ * types of bursts need to be distinguished. Fortunately, this seems ++ * relatively easy to do, by looking at the sizes of the bursts. In ++ * particular, we found a threshold such that bursts with a larger size ++ * than that threshold are apparently caused only by services or commands ++ * such as systemd or git grep. For brevity, hereafter we call just 'large' ++ * these bursts. BFQ *does not* weight-raise queues whose activations occur ++ * in a large burst. In addition, for each of these queues BFQ performs or ++ * does not perform idling depending on which choice boosts the throughput ++ * most. The exact choice depends on the device and request pattern at ++ * hand. ++ * ++ * Turning back to the next function, it implements all the steps needed ++ * to detect the occurrence of a large burst and to properly mark all the ++ * queues belonging to it (so that they can then be treated in a different ++ * way). This goal is achieved by maintaining a special "burst list" that ++ * holds, temporarily, the queues that belong to the burst in progress. The ++ * list is then used to mark these queues as belonging to a large burst if ++ * the burst does become large. The main steps are the following. ++ * ++ * . when the very first queue is activated, the queue is inserted into the ++ * list (as it could be the first queue in a possible burst) ++ * ++ * . if the current burst has not yet become large, and a queue Q that does ++ * not yet belong to the burst is activated shortly after the last time ++ * at which a new queue entered the burst list, then the function appends ++ * Q to the burst list ++ * ++ * . if, as a consequence of the previous step, the burst size reaches ++ * the large-burst threshold, then ++ * ++ * . all the queues in the burst list are marked as belonging to a ++ * large burst ++ * ++ * . the burst list is deleted; in fact, the burst list already served ++ * its purpose (keeping temporarily track of the queues in a burst, ++ * so as to be able to mark them as belonging to a large burst in the ++ * previous sub-step), and now is not needed any more ++ * ++ * . the device enters a large-burst mode ++ * ++ * . if a queue Q that does not belong to the burst is activated while ++ * the device is in large-burst mode and shortly after the last time ++ * at which a queue either entered the burst list or was marked as ++ * belonging to the current large burst, then Q is immediately marked ++ * as belonging to a large burst. ++ * ++ * . if a queue Q that does not belong to the burst is activated a while ++ * later, i.e., not shortly after, than the last time at which a queue ++ * either entered the burst list or was marked as belonging to the ++ * current large burst, then the current burst is deemed as finished and: ++ * ++ * . the large-burst mode is reset if set ++ * ++ * . the burst list is emptied ++ * ++ * . Q is inserted in the burst list, as Q may be the first queue ++ * in a possible new burst (then the burst list contains just Q ++ * after this step). ++ */ ++static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq, ++ bool idle_for_long_time) ++{ ++ /* ++ * If bfqq happened to be activated in a burst, but has been idle ++ * for at least as long as an interactive queue, then we assume ++ * that, in the overall I/O initiated in the burst, the I/O ++ * associated to bfqq is finished. So bfqq does not need to be ++ * treated as a queue belonging to a burst anymore. Accordingly, ++ * we reset bfqq's in_large_burst flag if set, and remove bfqq ++ * from the burst list if it's there. We do not decrement instead ++ * burst_size, because the fact that bfqq does not need to belong ++ * to the burst list any more does not invalidate the fact that ++ * bfqq may have been activated during the current burst. ++ */ ++ if (idle_for_long_time) { ++ hlist_del_init(&bfqq->burst_list_node); ++ bfq_clear_bfqq_in_large_burst(bfqq); ++ } ++ ++ /* ++ * If bfqq is already in the burst list or is part of a large ++ * burst, then there is nothing else to do. ++ */ ++ if (!hlist_unhashed(&bfqq->burst_list_node) || ++ bfq_bfqq_in_large_burst(bfqq)) ++ return; ++ ++ /* ++ * If bfqq's activation happens late enough, then the current ++ * burst is finished, and related data structures must be reset. ++ * ++ * In this respect, consider the special case where bfqq is the very ++ * first queue being activated. In this case, last_ins_in_burst is ++ * not yet significant when we get here. But it is easy to verify ++ * that, whether or not the following condition is true, bfqq will ++ * end up being inserted into the burst list. In particular the ++ * list will happen to contain only bfqq. And this is exactly what ++ * has to happen, as bfqq may be the first queue in a possible ++ * burst. ++ */ ++ if (time_is_before_jiffies(bfqd->last_ins_in_burst + ++ bfqd->bfq_burst_interval)) { ++ bfqd->large_burst = false; ++ bfq_reset_burst_list(bfqd, bfqq); ++ return; ++ } ++ ++ /* ++ * If we get here, then bfqq is being activated shortly after the ++ * last queue. So, if the current burst is also large, we can mark ++ * bfqq as belonging to this large burst immediately. ++ */ ++ if (bfqd->large_burst) { ++ bfq_mark_bfqq_in_large_burst(bfqq); ++ return; ++ } ++ ++ /* ++ * If we get here, then a large-burst state has not yet been ++ * reached, but bfqq is being activated shortly after the last ++ * queue. Then we add bfqq to the burst. ++ */ ++ bfq_add_to_burst(bfqd, bfqq); ++} ++ +static void bfq_add_request(struct request *rq) +{ + struct bfq_queue *bfqq = RQ_BFQQ(rq); @@ -1632,7 +1846,7 @@ index 0000000..0a0891b + struct bfq_data *bfqd = bfqq->bfqd; + struct request *next_rq, *prev; + unsigned long old_wr_coeff = bfqq->wr_coeff; -+ int idle_for_long_time = 0; ++ bool interactive = false; + + bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq)); + bfqq->queued[rq_is_sync(rq)]++; @@ -1655,11 +1869,35 @@ index 0000000..0a0891b + bfq_rq_pos_tree_add(bfqd, bfqq); + + if (!bfq_bfqq_busy(bfqq)) { -+ int soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 && ++ bool soft_rt, ++ idle_for_long_time = time_is_before_jiffies( ++ bfqq->budget_timeout + ++ bfqd->bfq_wr_min_idle_time); ++ ++ if (bfq_bfqq_sync(bfqq)) { ++ bool already_in_burst = ++ !hlist_unhashed(&bfqq->burst_list_node) || ++ bfq_bfqq_in_large_burst(bfqq); ++ bfq_handle_burst(bfqd, bfqq, idle_for_long_time); ++ /* ++ * If bfqq was not already in the current burst, ++ * then, at this point, bfqq either has been ++ * added to the current burst or has caused the ++ * current burst to terminate. In particular, in ++ * the second case, bfqq has become the first ++ * queue in a possible new burst. ++ * In both cases last_ins_in_burst needs to be ++ * moved forward. ++ */ ++ if (!already_in_burst) ++ bfqd->last_ins_in_burst = jiffies; ++ } ++ ++ soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 && ++ !bfq_bfqq_in_large_burst(bfqq) && + time_is_before_jiffies(bfqq->soft_rt_next_start); -+ idle_for_long_time = time_is_before_jiffies( -+ bfqq->budget_timeout + -+ bfqd->bfq_wr_min_idle_time); ++ interactive = !bfq_bfqq_in_large_burst(bfqq) && ++ idle_for_long_time; + entity->budget = max_t(unsigned long, bfqq->max_budget, + bfq_serv_to_charge(next_rq, bfqq)); + @@ -1682,9 +1920,9 @@ index 0000000..0a0891b + * If the queue is not being boosted and has been idle + * for enough time, start a weight-raising period + */ -+ if (old_wr_coeff == 1 && (idle_for_long_time || soft_rt)) { ++ if (old_wr_coeff == 1 && (interactive || soft_rt)) { + bfqq->wr_coeff = bfqd->bfq_wr_coeff; -+ if (idle_for_long_time) ++ if (interactive) + bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); + else + bfqq->wr_cur_max_time = @@ -1694,11 +1932,12 @@ index 0000000..0a0891b + jiffies, + jiffies_to_msecs(bfqq->wr_cur_max_time)); + } else if (old_wr_coeff > 1) { -+ if (idle_for_long_time) ++ if (interactive) + bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); -+ else if (bfqq->wr_cur_max_time == -+ bfqd->bfq_wr_rt_max_time && -+ !soft_rt) { ++ else if (bfq_bfqq_in_large_burst(bfqq) || ++ (bfqq->wr_cur_max_time == ++ bfqd->bfq_wr_rt_max_time && ++ !soft_rt)) { + bfqq->wr_coeff = 1; + bfq_log_bfqq(bfqd, bfqq, + "wrais ending at %lu, rais_max_time %u", @@ -1787,8 +2026,7 @@ index 0000000..0a0891b + } + + if (bfqd->low_latency && -+ (old_wr_coeff == 1 || bfqq->wr_coeff == 1 || -+ idle_for_long_time)) ++ (old_wr_coeff == 1 || bfqq->wr_coeff == 1 || interactive)) + bfqq->last_wr_start_finish = jiffies; +} + @@ -2291,9 +2529,7 @@ index 0000000..0a0891b + return rq; +} + -+/* -+ * Must be called with the queue_lock held. -+ */ ++/* Must be called with the queue_lock held. */ +static int bfqq_process_refs(struct bfq_queue *bfqq) +{ + int process_refs, io_refs; @@ -2896,16 +3132,26 @@ index 0000000..0a0891b + * long comment, we try to briefly describe all the details and motivations + * behind the components of this logical expression. + * -+ * First, the expression may be true only for sync queues. Besides, if -+ * bfqq is also being weight-raised, then the expression always evaluates -+ * to true, as device idling is instrumental for preserving low-latency -+ * guarantees (see [1]). Otherwise, the expression evaluates to true only -+ * if bfqq has a non-null idle window and at least one of the following -+ * two conditions holds. The first condition is that the device is not -+ * performing NCQ, because idling the device most certainly boosts the -+ * throughput if this condition holds and bfqq has been granted a non-null -+ * idle window. The second compound condition is made of the logical AND of -+ * two components. ++ * First, the expression is false if bfqq is not sync, or if: bfqq happened ++ * to become active during a large burst of queue activations, and the ++ * pattern of requests bfqq contains boosts the throughput if bfqq is ++ * expired. In fact, queues that became active during a large burst benefit ++ * only from throughput, as discussed in the comments to bfq_handle_burst. ++ * In this respect, expiring bfqq certainly boosts the throughput on NCQ- ++ * capable flash-based devices, whereas, on rotational devices, it boosts ++ * the throughput only if bfqq contains random requests. ++ * ++ * On the opposite end, if (a) bfqq is sync, (b) the above burst-related ++ * condition does not hold, and (c) bfqq is being weight-raised, then the ++ * expression always evaluates to true, as device idling is instrumental ++ * for preserving low-latency guarantees (see [1]). If, instead, conditions ++ * (a) and (b) do hold, but (c) does not, then the expression evaluates to ++ * true only if: (1) bfqq is I/O-bound and has a non-null idle window, and ++ * (2) at least one of the following two conditions holds. ++ * The first condition is that the device is not performing NCQ, because ++ * idling the device most certainly boosts the throughput if this condition ++ * holds and bfqq is I/O-bound and has been granted a non-null idle window. ++ * The second compound condition is made of the logical AND of two components. + * + * The first component is true only if there is no weight-raised busy + * queue. This guarantees that the device is not idled for a sync non- @@ -3022,6 +3268,12 @@ index 0000000..0a0891b +#define cond_for_seeky_on_ncq_hdd (bfq_bfqq_constantly_seeky(bfqq) && \ + bfqd->busy_in_flight_queues == \ + bfqd->const_seeky_busy_in_flight_queues) ++ ++#define cond_for_expiring_in_burst (bfq_bfqq_in_large_burst(bfqq) && \ ++ bfqd->hw_tag && \ ++ (blk_queue_nonrot(bfqd->queue) || \ ++ bfq_bfqq_constantly_seeky(bfqq))) ++ +/* + * Condition for expiring a non-weight-raised queue (and hence not idling + * the device). @@ -3033,9 +3285,9 @@ index 0000000..0a0891b + cond_for_seeky_on_ncq_hdd)))) + + return bfq_bfqq_sync(bfqq) && -+ (bfq_bfqq_IO_bound(bfqq) || bfqq->wr_coeff > 1) && ++ !cond_for_expiring_in_burst && + (bfqq->wr_coeff > 1 || -+ (bfq_bfqq_idle_window(bfqq) && ++ (bfq_bfqq_IO_bound(bfqq) && bfq_bfqq_idle_window(bfqq) && + !cond_for_expiring_non_wr) + ); +} @@ -3179,10 +3431,12 @@ index 0000000..0a0891b + if (entity->ioprio_changed) + bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change"); + /* -+ * If too much time has elapsed from the beginning -+ * of this weight-raising, stop it. ++ * If the queue was activated in a burst, or ++ * too much time has elapsed from the beginning ++ * of this weight-raising, then end weight raising. + */ -+ if (time_is_before_jiffies(bfqq->last_wr_start_finish + ++ if (bfq_bfqq_in_large_burst(bfqq) || ++ time_is_before_jiffies(bfqq->last_wr_start_finish + + bfqq->wr_cur_max_time)) { + bfqq->last_wr_start_finish = jiffies; + bfq_log_bfqq(bfqd, bfqq, @@ -3387,6 +3641,17 @@ index 0000000..0a0891b + BUG_ON(bfq_bfqq_busy(bfqq)); + BUG_ON(bfqd->in_service_queue == bfqq); + ++ if (bfq_bfqq_sync(bfqq)) ++ /* ++ * The fact that this queue is being destroyed does not ++ * invalidate the fact that this queue may have been ++ * activated during the current burst. As a consequence, ++ * although the queue does not exist anymore, and hence ++ * needs to be removed from the burst list if there, ++ * the burst size has not to be decremented. ++ */ ++ hlist_del_init(&bfqq->burst_list_node); ++ + bfq_log_bfqq(bfqd, bfqq, "put_queue: %p freed", bfqq); + + kmem_cache_free(bfq_pool, bfqq); @@ -3540,6 +3805,7 @@ index 0000000..0a0891b +{ + RB_CLEAR_NODE(&bfqq->entity.rb_node); + INIT_LIST_HEAD(&bfqq->fifo); ++ INIT_HLIST_NODE(&bfqq->burst_list_node); + + atomic_set(&bfqq->ref, 0); + bfqq->bfqd = bfqd; @@ -4298,6 +4564,7 @@ index 0000000..0a0891b + + INIT_LIST_HEAD(&bfqd->active_list); + INIT_LIST_HEAD(&bfqd->idle_list); ++ INIT_HLIST_HEAD(&bfqd->burst_list); + + bfqd->hw_tag = -1; + @@ -4318,6 +4585,9 @@ index 0000000..0a0891b + bfqd->bfq_failed_cooperations = 7000; + bfqd->bfq_requests_within_timer = 120; + ++ bfqd->bfq_large_burst_thresh = 11; ++ bfqd->bfq_burst_interval = msecs_to_jiffies(500); ++ + bfqd->low_latency = true; + + bfqd->bfq_wr_coeff = 20; @@ -4653,7 +4923,7 @@ index 0000000..0a0891b + device_speed_thresh[1] = (R_fast[1] + R_slow[1]) / 2; + + elv_register(&iosched_bfq); -+ pr_info("BFQ I/O-scheduler version: v7r5"); ++ pr_info("BFQ I/O-scheduler version: v7r6"); + + return 0; +} @@ -5884,12 +6154,12 @@ index 0000000..c4831b7 +} diff --git a/block/bfq.h b/block/bfq.h new file mode 100644 -index 0000000..a83e69d +index 0000000..0378c86 --- /dev/null +++ b/block/bfq.h -@@ -0,0 +1,742 @@ +@@ -0,0 +1,773 @@ +/* -+ * BFQ-v7r5 for 3.16.0: data structures and common functions prototypes. ++ * BFQ-v7r6 for 3.16.0: data structures and common functions prototypes. + * + * Based on ideas and code from CFQ: + * Copyright (C) 2003 Jens Axboe <ax...@kernel.dk> @@ -6086,6 +6356,7 @@ index 0000000..a83e69d + * @dispatched: number of requests on the dispatch list or inside driver. + * @flags: status flags. + * @bfqq_list: node for active/idle bfqq list inside our bfqd. ++ * @burst_list_node: node for the device's burst list. + * @seek_samples: number of seeks sampled + * @seek_total: sum of the distances of the seeks sampled + * @seek_mean: mean seek distance @@ -6146,6 +6417,8 @@ index 0000000..a83e69d + + struct list_head bfqq_list; + ++ struct hlist_node burst_list_node; ++ + unsigned int seek_samples; + u64 seek_total; + sector_t seek_mean; @@ -6298,22 +6571,38 @@ index 0000000..a83e69d + * again idling to a queue which was marked as + * non-I/O-bound (see the definition of the + * IO_bound flag for further details). -+ * @bfq_wr_coeff: Maximum factor by which the weight of a weight-raised -+ * queue is multiplied -+ * @bfq_wr_max_time: maximum duration of a weight-raising period (jiffies) -+ * @bfq_wr_rt_max_time: maximum duration for soft real-time processes ++ * @last_ins_in_burst: last time at which a queue entered the current ++ * burst of queues being activated shortly after ++ * each other; for more details about this and the ++ * following parameters related to a burst of ++ * activations, see the comments to the function ++ * @bfq_handle_burst. ++ * @bfq_burst_interval: reference time interval used to decide whether a ++ * queue has been activated shortly after ++ * @last_ins_in_burst. ++ * @burst_size: number of queues in the current burst of queue activations. ++ * @bfq_large_burst_thresh: maximum burst size above which the current ++ * queue-activation burst is deemed as 'large'. ++ * @large_burst: true if a large queue-activation burst is in progress. ++ * @burst_list: head of the burst list (as for the above fields, more details ++ * in the comments to the function bfq_handle_burst). ++ * @low_latency: if set to true, low-latency heuristics are enabled. ++ * @bfq_wr_coeff: maximum factor by which the weight of a weight-raised ++ * queue is multiplied. ++ * @bfq_wr_max_time: maximum duration of a weight-raising period (jiffies). ++ * @bfq_wr_rt_max_time: maximum duration for soft real-time processes. + * @bfq_wr_min_idle_time: minimum idle period after which weight-raising -+ * may be reactivated for a queue (in jiffies) ++ * may be reactivated for a queue (in jiffies). + * @bfq_wr_min_inter_arr_async: minimum period between request arrivals + * after which weight-raising may be + * reactivated for an already busy queue -+ * (in jiffies) ++ * (in jiffies). + * @bfq_wr_max_softrt_rate: max service-rate for a soft real-time queue, -+ * sectors per seconds ++ * sectors per seconds. + * @RT_prod: cached value of the product R*T used for computing the maximum -+ * duration of the weight raising automatically -+ * @device_speed: device-speed class for the low-latency heuristic -+ * @oom_bfqq: fallback dummy bfqq for extreme OOM conditions ++ * duration of the weight raising automatically. ++ * @device_speed: device-speed class for the low-latency heuristic. ++ * @oom_bfqq: fallback dummy bfqq for extreme OOM conditions. + * + * All the fields are protected by the @queue lock. + */ @@ -6377,6 +6666,13 @@ index 0000000..a83e69d + unsigned int bfq_failed_cooperations; + unsigned int bfq_requests_within_timer; + ++ unsigned long last_ins_in_burst; ++ unsigned long bfq_burst_interval; ++ int burst_size; ++ unsigned long bfq_large_burst_thresh; ++ bool large_burst; ++ struct hlist_head burst_list; ++ + bool low_latency; + + /* parameters of the low_latency heuristics */ @@ -6406,6 +6702,10 @@ index 0000000..a83e69d + * having consumed at most 2/10 of + * its budget + */ ++ BFQ_BFQQ_FLAG_in_large_burst, /* ++ * bfqq activated in a large burst, ++ * see comments to bfq_handle_burst. ++ */ + BFQ_BFQQ_FLAG_constantly_seeky, /* + * bfqq has proved to be slow and + * seeky until budget timeout @@ -6441,6 +6741,7 @@ index 0000000..a83e69d +BFQ_BFQQ_FNS(sync); +BFQ_BFQQ_FNS(budget_new); +BFQ_BFQQ_FNS(IO_bound); ++BFQ_BFQQ_FNS(in_large_burst); +BFQ_BFQQ_FNS(constantly_seeky); +BFQ_BFQQ_FNS(coop); +BFQ_BFQQ_FNS(split_coop); @@ -6561,15 +6862,15 @@ index 0000000..a83e69d +} + +static inline struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, -+ int is_sync) ++ bool is_sync) +{ -+ return bic->bfqq[!!is_sync]; ++ return bic->bfqq[is_sync]; +} + +static inline void bic_set_bfqq(struct bfq_io_cq *bic, -+ struct bfq_queue *bfqq, int is_sync) ++ struct bfq_queue *bfqq, bool is_sync) +{ -+ bic->bfqq[!!is_sync] = bfqq; ++ bic->bfqq[is_sync] = bfqq; +} + +static inline struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic) @@ -6631,5 +6932,5 @@ index 0000000..a83e69d + +#endif /* _BFQ_H */ -- -2.0.3 +2.1.2 diff --git a/5003_BFQ-3-block-add-Early-Queue-Merge-EQM-v7r5-for-3.16.0.patch b/5003_BFQ-3-block-add-Early-Queue-Merge-EQM-v7r6-for-3.16.0.patch similarity index 87% rename from 5003_BFQ-3-block-add-Early-Queue-Merge-EQM-v7r5-for-3.16.0.patch rename to 5003_BFQ-3-block-add-Early-Queue-Merge-EQM-v7r6-for-3.16.0.patch index e606f5d..53e7c76 100644 --- a/5003_BFQ-3-block-add-Early-Queue-Merge-EQM-v7r5-for-3.16.0.patch +++ b/5003_BFQ-3-block-add-Early-Queue-Merge-EQM-v7r6-for-3.16.0.patch @@ -1,7 +1,7 @@ -From 5b290be286aa74051b4b77a216032b771ceadd23 Mon Sep 17 00:00:00 2001 +From 5428334e0390ccad40fa21dd046eb163025a4f74 Mon Sep 17 00:00:00 2001 From: Mauro Andreolini <mauro.andreol...@unimore.it> -Date: Wed, 18 Jun 2014 17:38:07 +0200 -Subject: [PATCH 3/3] block, bfq: add Early Queue Merge (EQM) to BFQ-v7r5 for +Date: Sun, 19 Oct 2014 01:15:59 +0200 +Subject: [PATCH 3/3] block, bfq: add Early Queue Merge (EQM) to BFQ-v7r6 for 3.16.0 A set of processes may happen to perform interleaved reads, i.e.,requests @@ -34,13 +34,13 @@ Signed-off-by: Mauro Andreolini <mauro.andreol...@unimore.it> Signed-off-by: Arianna Avanzini <avanzini.aria...@gmail.com> Signed-off-by: Paolo Valente <paolo.vale...@unimore.it> --- - block/bfq-iosched.c | 736 ++++++++++++++++++++++++++++++++++++---------------- + block/bfq-iosched.c | 743 +++++++++++++++++++++++++++++++++++++--------------- block/bfq-sched.c | 28 -- - block/bfq.h | 46 +++- - 3 files changed, 556 insertions(+), 254 deletions(-) + block/bfq.h | 54 +++- + 3 files changed, 573 insertions(+), 252 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c -index 0a0891b..d1d8e67 100644 +index b919b03..bbfb4e1 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -571,6 +571,57 @@ static inline unsigned int bfq_wr_duration(struct bfq_data *bfqd) @@ -64,7 +64,9 @@ index 0a0891b..d1d8e67 100644 + bfq_mark_bfqq_IO_bound(bfqq); + else + bfq_clear_bfqq_IO_bound(bfqq); ++ /* Assuming that the flag in_large_burst is already correctly set */ + if (bic->wr_time_left && bfqq->bfqd->low_latency && ++ !bfq_bfqq_in_large_burst(bfqq) && + bic->cooperations < bfqq->bfqd->bfq_coop_thresh) { + /* + * Start a weight raising period with the duration given by @@ -85,9 +87,7 @@ index 0a0891b..d1d8e67 100644 + bic->wr_time_left = 0; +} + -+/* -+ * Must be called with the queue_lock held. -+ */ ++/* Must be called with the queue_lock held. */ +static int bfqq_process_refs(struct bfq_queue *bfqq) +{ + int process_refs, io_refs; @@ -98,23 +98,35 @@ index 0a0891b..d1d8e67 100644 + return process_refs; +} + - static void bfq_add_request(struct request *rq) - { - struct bfq_queue *bfqq = RQ_BFQQ(rq); -@@ -602,8 +653,11 @@ static void bfq_add_request(struct request *rq) + /* Empty burst list and add just bfqq (see comments to bfq_handle_burst) */ + static inline void bfq_reset_burst_list(struct bfq_data *bfqd, + struct bfq_queue *bfqq) +@@ -815,7 +866,7 @@ static void bfq_add_request(struct request *rq) + bfq_rq_pos_tree_add(bfqd, bfqq); if (!bfq_bfqq_busy(bfqq)) { - int soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 && -+ bfq_bfqq_cooperations(bfqq) < bfqd->bfq_coop_thresh && +- bool soft_rt, ++ bool soft_rt, coop_or_in_burst, + idle_for_long_time = time_is_before_jiffies( + bfqq->budget_timeout + + bfqd->bfq_wr_min_idle_time); +@@ -839,11 +890,12 @@ static void bfq_add_request(struct request *rq) + bfqd->last_ins_in_burst = jiffies; + } + ++ coop_or_in_burst = bfq_bfqq_in_large_burst(bfqq) || ++ bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh; + soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 && +- !bfq_bfqq_in_large_burst(bfqq) && ++ !coop_or_in_burst && time_is_before_jiffies(bfqq->soft_rt_next_start); -- idle_for_long_time = time_is_before_jiffies( -+ idle_for_long_time = bfq_bfqq_cooperations(bfqq) < -+ bfqd->bfq_coop_thresh && -+ time_is_before_jiffies( - bfqq->budget_timeout + - bfqd->bfq_wr_min_idle_time); +- interactive = !bfq_bfqq_in_large_burst(bfqq) && +- idle_for_long_time; ++ interactive = !coop_or_in_burst && idle_for_long_time; entity->budget = max_t(unsigned long, bfqq->max_budget, -@@ -624,11 +678,20 @@ static void bfq_add_request(struct request *rq) + bfq_serv_to_charge(next_rq, bfqq)); + +@@ -862,11 +914,20 @@ static void bfq_add_request(struct request *rq) if (!bfqd->low_latency) goto add_bfqq_busy; @@ -132,28 +144,22 @@ index 0a0891b..d1d8e67 100644 + * requests have not been redirected to a shared queue) + * start a weight-raising period. */ -- if (old_wr_coeff == 1 && (idle_for_long_time || soft_rt)) { -+ if (old_wr_coeff == 1 && (idle_for_long_time || soft_rt) && +- if (old_wr_coeff == 1 && (interactive || soft_rt)) { ++ if (old_wr_coeff == 1 && (interactive || soft_rt) && + (!bfq_bfqq_sync(bfqq) || bfqq->bic != NULL)) { bfqq->wr_coeff = bfqd->bfq_wr_coeff; - if (idle_for_long_time) + if (interactive) bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); -@@ -642,9 +705,11 @@ static void bfq_add_request(struct request *rq) +@@ -880,7 +941,7 @@ static void bfq_add_request(struct request *rq) } else if (old_wr_coeff > 1) { - if (idle_for_long_time) + if (interactive) bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); -- else if (bfqq->wr_cur_max_time == -- bfqd->bfq_wr_rt_max_time && -- !soft_rt) { -+ else if (bfq_bfqq_cooperations(bfqq) >= -+ bfqd->bfq_coop_thresh || -+ (bfqq->wr_cur_max_time == -+ bfqd->bfq_wr_rt_max_time && -+ !soft_rt)) { - bfqq->wr_coeff = 1; - bfq_log_bfqq(bfqd, bfqq, - "wrais ending at %lu, rais_max_time %u", -@@ -660,18 +725,18 @@ static void bfq_add_request(struct request *rq) +- else if (bfq_bfqq_in_large_burst(bfqq) || ++ else if (coop_or_in_burst || + (bfqq->wr_cur_max_time == + bfqd->bfq_wr_rt_max_time && + !soft_rt)) { +@@ -899,18 +960,18 @@ static void bfq_add_request(struct request *rq) /* * * The remaining weight-raising time is lower @@ -184,7 +190,7 @@ index 0a0891b..d1d8e67 100644 * * In addition, the application is now meeting * the requirements for being deemed soft rt. -@@ -706,6 +771,7 @@ static void bfq_add_request(struct request *rq) +@@ -945,6 +1006,7 @@ static void bfq_add_request(struct request *rq) bfqd->bfq_wr_rt_max_time; } } @@ -192,7 +198,7 @@ index 0a0891b..d1d8e67 100644 if (old_wr_coeff != bfqq->wr_coeff) entity->ioprio_changed = 1; add_bfqq_busy: -@@ -918,90 +984,35 @@ static void bfq_end_wr(struct bfq_data *bfqd) +@@ -1156,90 +1218,35 @@ static void bfq_end_wr(struct bfq_data *bfqd) spin_unlock_irq(bfqd->queue->queue_lock); } @@ -297,7 +303,7 @@ index 0a0891b..d1d8e67 100644 if (RB_EMPTY_ROOT(root)) return NULL; -@@ -1020,7 +1031,7 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd) +@@ -1258,7 +1265,7 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd) * next_request position). */ __bfqq = rb_entry(parent, struct bfq_queue, pos_node); @@ -306,7 +312,7 @@ index 0a0891b..d1d8e67 100644 return __bfqq; if (blk_rq_pos(__bfqq->next_rq) < sector) -@@ -1031,7 +1042,7 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd) +@@ -1269,7 +1276,7 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd) return NULL; __bfqq = rb_entry(node, struct bfq_queue, pos_node); @@ -315,7 +321,7 @@ index 0a0891b..d1d8e67 100644 return __bfqq; return NULL; -@@ -1040,14 +1051,12 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd) +@@ -1278,14 +1285,12 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd) /* * bfqd - obvious * cur_bfqq - passed in so that we don't decide that the current queue @@ -334,7 +340,7 @@ index 0a0891b..d1d8e67 100644 { struct bfq_queue *bfqq; -@@ -1067,7 +1076,7 @@ static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd, +@@ -1305,7 +1310,7 @@ static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd, * working closely on the same area of the disk. In that case, * we can group them together and don't waste time idling. */ @@ -343,7 +349,7 @@ index 0a0891b..d1d8e67 100644 if (bfqq == NULL || bfqq == cur_bfqq) return NULL; -@@ -1094,6 +1103,305 @@ static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd, +@@ -1332,6 +1337,307 @@ static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd, return bfqq; } @@ -508,6 +514,8 @@ index 0a0891b..d1d8e67 100644 + bfqq->bic->wr_time_left = 0; + bfqq->bic->saved_idle_window = bfq_bfqq_idle_window(bfqq); + bfqq->bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq); ++ bfqq->bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq); ++ bfqq->bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node); + bfqq->bic->cooperations++; + bfqq->bic->failed_cooperations = 0; +} @@ -649,13 +657,11 @@ index 0a0891b..d1d8e67 100644 /* * If enough samples have been computed, return the current max budget * stored in bfqd, which is dynamically updated according to the -@@ -1237,63 +1545,6 @@ static struct request *bfq_check_fifo(struct bfq_queue *bfqq) +@@ -1475,61 +1781,6 @@ static struct request *bfq_check_fifo(struct bfq_queue *bfqq) return rq; } --/* -- * Must be called with the queue_lock held. -- */ +-/* Must be called with the queue_lock held. */ -static int bfqq_process_refs(struct bfq_queue *bfqq) -{ - int process_refs, io_refs; @@ -713,7 +719,7 @@ index 0a0891b..d1d8e67 100644 static inline unsigned long bfq_bfqq_budget_left(struct bfq_queue *bfqq) { struct bfq_entity *entity = &bfqq->entity; -@@ -2011,7 +2262,7 @@ static inline bool bfq_bfqq_must_idle(struct bfq_queue *bfqq) +@@ -2263,7 +2514,7 @@ static inline bool bfq_bfqq_must_idle(struct bfq_queue *bfqq) */ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) { @@ -722,7 +728,7 @@ index 0a0891b..d1d8e67 100644 struct request *next_rq; enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT; -@@ -2021,17 +2272,6 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) +@@ -2273,17 +2524,6 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) bfq_log_bfqq(bfqd, bfqq, "select_queue: already in-service queue"); @@ -740,7 +746,7 @@ index 0a0891b..d1d8e67 100644 if (bfq_may_expire_for_budg_timeout(bfqq) && !timer_pending(&bfqd->idle_slice_timer) && !bfq_bfqq_must_idle(bfqq)) -@@ -2070,10 +2310,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) +@@ -2322,10 +2562,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) bfq_clear_bfqq_wait_request(bfqq); del_timer(&bfqd->idle_slice_timer); } @@ -752,7 +758,7 @@ index 0a0891b..d1d8e67 100644 } } -@@ -2082,40 +2319,30 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) +@@ -2334,40 +2571,30 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) * in flight (possibly waiting for a completion) or is idling for a * new request, then keep it. */ @@ -800,25 +806,25 @@ index 0a0891b..d1d8e67 100644 jiffies_to_msecs(bfqq->wr_cur_max_time), bfqq->wr_coeff, bfqq->entity.weight, bfqq->entity.orig_weight); -@@ -2124,11 +2351,15 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, +@@ -2376,12 +2603,16 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, entity->orig_weight * bfqq->wr_coeff); if (entity->ioprio_changed) bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change"); + /* - * If too much time has elapsed from the beginning -- * of this weight-raising, stop it. + * If the queue was activated in a burst, or + * too much time has elapsed from the beginning +- * of this weight-raising, then end weight raising. + * of this weight-raising period, or the queue has + * exceeded the acceptable number of cooperations, -+ * stop it. ++ * then end weight raising. */ -- if (time_is_before_jiffies(bfqq->last_wr_start_finish + -+ if (bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh || -+ time_is_before_jiffies(bfqq->last_wr_start_finish + + if (bfq_bfqq_in_large_burst(bfqq) || ++ bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh || + time_is_before_jiffies(bfqq->last_wr_start_finish + bfqq->wr_cur_max_time)) { bfqq->last_wr_start_finish = jiffies; - bfq_log_bfqq(bfqd, bfqq, -@@ -2136,11 +2367,13 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, +@@ -2390,11 +2621,13 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, bfqq->last_wr_start_finish, jiffies_to_msecs(bfqq->wr_cur_max_time)); bfq_bfqq_end_wr(bfqq); @@ -835,7 +841,7 @@ index 0a0891b..d1d8e67 100644 } /* -@@ -2377,6 +2610,25 @@ static inline void bfq_init_icq(struct io_cq *icq) +@@ -2642,6 +2875,25 @@ static inline void bfq_init_icq(struct io_cq *icq) struct bfq_io_cq *bic = icq_to_bic(icq); bic->ttime.last_end_request = jiffies; @@ -861,7 +867,7 @@ index 0a0891b..d1d8e67 100644 } static void bfq_exit_icq(struct io_cq *icq) -@@ -2390,6 +2642,13 @@ static void bfq_exit_icq(struct io_cq *icq) +@@ -2655,6 +2907,13 @@ static void bfq_exit_icq(struct io_cq *icq) } if (bic->bfqq[BLK_RW_SYNC]) { @@ -875,7 +881,7 @@ index 0a0891b..d1d8e67 100644 bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_SYNC]); bic->bfqq[BLK_RW_SYNC] = NULL; } -@@ -2678,6 +2937,10 @@ static void bfq_update_idle_window(struct bfq_data *bfqd, +@@ -2944,6 +3203,10 @@ static void bfq_update_idle_window(struct bfq_data *bfqd, if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq)) return; @@ -886,7 +892,7 @@ index 0a0891b..d1d8e67 100644 enable_idle = bfq_bfqq_idle_window(bfqq); if (atomic_read(&bic->icq.ioc->active_ref) == 0 || -@@ -2725,6 +2988,7 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, +@@ -2991,6 +3254,7 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 || !BFQQ_SEEKY(bfqq)) bfq_update_idle_window(bfqd, bfqq, bic); @@ -894,7 +900,7 @@ index 0a0891b..d1d8e67 100644 bfq_log_bfqq(bfqd, bfqq, "rq_enqueued: idle_window=%d (seeky %d, mean %llu)", -@@ -2785,13 +3049,49 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, +@@ -3051,13 +3315,49 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, static void bfq_insert_request(struct request_queue *q, struct request *rq) { struct bfq_data *bfqd = q->elevator->elevator_data; @@ -945,7 +951,7 @@ index 0a0891b..d1d8e67 100644 rq->fifo_time = jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)]; list_add_tail(&rq->queuelist, &bfqq->fifo); -@@ -2956,18 +3256,6 @@ static void bfq_put_request(struct request *rq) +@@ -3222,18 +3522,6 @@ static void bfq_put_request(struct request *rq) } } @@ -964,7 +970,7 @@ index 0a0891b..d1d8e67 100644 /* * Returns NULL if a new bfqq should be allocated, or the old bfqq if this * was the last process referring to said bfqq. -@@ -2976,6 +3264,9 @@ static struct bfq_queue * +@@ -3242,6 +3530,9 @@ static struct bfq_queue * bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq) { bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue"); @@ -974,7 +980,7 @@ index 0a0891b..d1d8e67 100644 if (bfqq_process_refs(bfqq) == 1) { bfqq->pid = current->pid; bfq_clear_bfqq_coop(bfqq); -@@ -3004,6 +3295,7 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, +@@ -3270,6 +3561,7 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, struct bfq_queue *bfqq; struct bfq_group *bfqg; unsigned long flags; @@ -982,9 +988,21 @@ index 0a0891b..d1d8e67 100644 might_sleep_if(gfp_mask & __GFP_WAIT); -@@ -3022,24 +3314,14 @@ new_queue: +@@ -3287,25 +3579,26 @@ new_queue: + if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) { bfqq = bfq_get_queue(bfqd, bfqg, is_sync, bic, gfp_mask); bic_set_bfqq(bic, bfqq, is_sync); ++ if (split && is_sync) { ++ if ((bic->was_in_burst_list && bfqd->large_burst) || ++ bic->saved_in_large_burst) ++ bfq_mark_bfqq_in_large_burst(bfqq); ++ else { ++ bfq_clear_bfqq_in_large_burst(bfqq); ++ if (bic->was_in_burst_list) ++ hlist_add_head(&bfqq->burst_list_node, ++ &bfqd->burst_list); ++ } ++ } } else { - /* - * If the queue was seeky for too long, break it apart. @@ -1009,7 +1027,7 @@ index 0a0891b..d1d8e67 100644 } bfqq->allocated[rw]++; -@@ -3050,6 +3332,26 @@ new_queue: +@@ -3316,6 +3609,26 @@ new_queue: rq->elv.priv[0] = bic; rq->elv.priv[1] = bfqq; @@ -1076,10 +1094,10 @@ index c4831b7..546a254 100644 { if (bfqd->in_service_bic != NULL) { diff --git a/block/bfq.h b/block/bfq.h -index a83e69d..ebbd040 100644 +index 0378c86..93a2d24 100644 --- a/block/bfq.h +++ b/block/bfq.h -@@ -215,18 +215,21 @@ struct bfq_group; +@@ -216,18 +216,21 @@ struct bfq_group; * idle @bfq_queue with no outstanding requests, then * the task associated with the queue it is deemed as * soft real-time (see the comments to the function @@ -1107,7 +1125,7 @@ index a83e69d..ebbd040 100644 * All the fields are protected by the queue lock of the containing bfqd. */ struct bfq_queue { -@@ -264,6 +267,7 @@ struct bfq_queue { +@@ -267,6 +270,7 @@ struct bfq_queue { unsigned int requests_within_timer; pid_t pid; @@ -1115,7 +1133,7 @@ index a83e69d..ebbd040 100644 /* weight-raising fields */ unsigned long wr_cur_max_time; -@@ -293,12 +297,34 @@ struct bfq_ttime { +@@ -296,12 +300,42 @@ struct bfq_ttime { * @icq: associated io_cq structure * @bfqq: array of two process queues, the sync and the async * @ttime: associated @bfq_ttime struct @@ -1130,6 +1148,11 @@ index a83e69d..ebbd040 100644 + * window + * @saved_IO_bound: same purpose as the previous two fields for the I/O + * bound classification of a queue ++ * @saved_in_large_burst: same purpose as the previous fields for the ++ * value of the field keeping the queue's belonging ++ * to a large burst ++ * @was_in_burst_list: true if the queue belonged to a burst list ++ * before its merge with another cooperating queue + * @cooperations: counter of consecutive successful queue merges underwent + * by any of the process' @bfq_queues + * @failed_cooperations: counter of consecutive failed queue merges of any @@ -1142,15 +1165,18 @@ index a83e69d..ebbd040 100644 int ioprio; + + unsigned int wr_time_left; -+ unsigned int saved_idle_window; -+ unsigned int saved_IO_bound; ++ bool saved_idle_window; ++ bool saved_IO_bound; ++ ++ bool saved_in_large_burst; ++ bool was_in_burst_list; + + unsigned int cooperations; + unsigned int failed_cooperations; }; enum bfq_device_speed { -@@ -511,7 +537,7 @@ enum bfqq_state_flags { +@@ -537,7 +571,7 @@ enum bfqq_state_flags { BFQ_BFQQ_FLAG_prio_changed, /* task priority has changed */ BFQ_BFQQ_FLAG_sync, /* synchronous queue */ BFQ_BFQQ_FLAG_budget_new, /* no completion with this budget */ @@ -1159,7 +1185,7 @@ index a83e69d..ebbd040 100644 * bfqq has timed-out at least once * having consumed at most 2/10 of * its budget -@@ -520,12 +546,13 @@ enum bfqq_state_flags { +@@ -550,12 +584,13 @@ enum bfqq_state_flags { * bfqq has proved to be slow and * seeky until budget timeout */ @@ -1175,7 +1201,7 @@ index a83e69d..ebbd040 100644 }; #define BFQ_BFQQ_FNS(name) \ -@@ -554,6 +581,7 @@ BFQ_BFQQ_FNS(IO_bound); +@@ -585,6 +620,7 @@ BFQ_BFQQ_FNS(in_large_burst); BFQ_BFQQ_FNS(constantly_seeky); BFQ_BFQQ_FNS(coop); BFQ_BFQQ_FNS(split_coop); @@ -1184,5 +1210,5 @@ index a83e69d..ebbd040 100644 #undef BFQ_BFQQ_FNS -- -2.0.3 +2.1.2