From: Pavel Begunkov <[email protected]>

Commit 4bc6339a583cec650b05 ("block: move blk_stat_add() to
__blk_mq_end_request()") moved blk_stat_add(), so now it's called after
blk_update_request(), which zeroes rq->__data_len. Without length,
blk_stat_add() can't calculate stat bucket and returns error,
effectively disabling hybrid polling.

__blk_mq_end_request() is a right place to call blk_stat_add(), as it's
guaranteed to be called for each request. Yet, calculating time there
won't provide sufficient accuracy/precision for finer tuned hybrid
polling, because a path from __blk_mq_complete_request() to
__blk_mq_end_request() adds unpredictable overhead.

Add io_end_time_ns field in struct request, save time as soon as
possible (at __blk_mq_complete_request()) and reuse later.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 block/blk-mq.c         | 13 ++++++++++---
 block/blk-stat.c       |  4 ++--
 block/blk-stat.h       |  2 +-
 include/linux/blkdev.h | 11 +++++++++++
 4 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 32b8ad3d341b..8f6b6bfe0ccb 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -330,6 +330,7 @@ static struct request *blk_mq_rq_ctx_init(struct 
blk_mq_alloc_data *data,
        else
                rq->start_time_ns = 0;
        rq->io_start_time_ns = 0;
+       rq->io_end_time_ns = 0;
        rq->nr_phys_segments = 0;
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
        rq->nr_integrity_segments = 0;
@@ -532,14 +533,17 @@ EXPORT_SYMBOL_GPL(blk_mq_free_request);
 
 inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
 {
-       u64 now = 0;
+       u64 now = rq->io_end_time_ns;
 
-       if (blk_mq_need_time_stamp(rq))
+       /* called directly bypassing __blk_mq_complete_request */
+       if (blk_mq_need_time_stamp(rq) && !now) {
                now = ktime_get_ns();
+               rq->io_end_time_ns = now;
+       }
 
        if (rq->rq_flags & RQF_STATS) {
                blk_mq_poll_stats_start(rq->q);
-               blk_stat_add(rq, now);
+               blk_stat_add(rq);
        }
 
        if (rq->internal_tag != -1)
@@ -579,6 +583,9 @@ static void __blk_mq_complete_request(struct request *rq)
        bool shared = false;
        int cpu;
 
+       if (blk_mq_need_time_stamp(rq))
+               rq->io_end_time_ns = ktime_get_ns();
+
        WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
        /*
         * Most of single queue controllers, there is only one irq vector
diff --git a/block/blk-stat.c b/block/blk-stat.c
index 940f15d600f8..9b9b30927ea8 100644
--- a/block/blk-stat.c
+++ b/block/blk-stat.c
@@ -48,7 +48,7 @@ void blk_rq_stat_add(struct blk_rq_stat *stat, u64 value)
        stat->nr_samples++;
 }
 
-void blk_stat_add(struct request *rq, u64 now)
+void blk_stat_add(struct request *rq)
 {
        struct request_queue *q = rq->q;
        struct blk_stat_callback *cb;
@@ -56,7 +56,7 @@ void blk_stat_add(struct request *rq, u64 now)
        int bucket;
        u64 value;
 
-       value = (now >= rq->io_start_time_ns) ? now - rq->io_start_time_ns : 0;
+       value = blk_rq_io_time(rq);
 
        blk_throtl_stat_add(rq, value);
 
diff --git a/block/blk-stat.h b/block/blk-stat.h
index 17b47a86eefb..2653818cee36 100644
--- a/block/blk-stat.h
+++ b/block/blk-stat.h
@@ -65,7 +65,7 @@ struct blk_stat_callback {
 struct blk_queue_stats *blk_alloc_queue_stats(void);
 void blk_free_queue_stats(struct blk_queue_stats *);
 
-void blk_stat_add(struct request *rq, u64 now);
+void blk_stat_add(struct request *rq);
 
 /* record time/size info in request but not add a callback */
 void blk_stat_enable_accounting(struct request_queue *q);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 592669bcc536..2a8d4b68d707 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -198,6 +198,9 @@ struct request {
        u64 start_time_ns;
        /* Time that I/O was submitted to the device. */
        u64 io_start_time_ns;
+       /* Time that I/O was reported completed by the device. */
+       u64 io_end_time_ns;
+
 
 #ifdef CONFIG_BLK_WBT
        unsigned short wbt_flags;
@@ -385,6 +388,14 @@ static inline int blkdev_reset_zones_ioctl(struct 
block_device *bdev,
 
 #endif /* CONFIG_BLK_DEV_ZONED */
 
+static inline u64 blk_rq_io_time(struct request *rq)
+{
+       u64 start = rq->io_start_time_ns;
+       u64 end = rq->io_end_time_ns;
+
+       return (end - start) ? end - start : 0;
+}
+
 struct request_queue {
        /*
         * Together with queue_head for cacheline sharing
-- 
2.21.0

Reply via email to