From: Ming Lin <min...@ssi.samsung.com>

In commit b49a087("block: remove split code in
blkdev_issue_{discard,write_same}"), discard_granularity and alignment
checks were removed. Ideally, with bio late splitting, the upper layers
shouldn't need to depend on device's limits.

Christoph reported a discard regression on the HGST Ultrastar SN100 NVMe
device when mkfs.xfs. We have not found the root cause yet.

This patch re-adds discard_granularity and alignment checks by reverting
the related changes in commit b49a087. The good thing is now we can
remove the 2G discard size cap and just use UINT_MAX to avoid bi_size
overflow.

Reviewed-by: Christoph Hellwig <h...@lst.de>
Tested-by: Christoph Hellwig <h...@lst.de>
Signed-off-by: Ming Lin <min...@ssi.samsung.com>
---
 block/blk-lib.c | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/block/blk-lib.c b/block/blk-lib.c
index bd40292..9ebf653 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -26,13 +26,6 @@ static void bio_batch_end_io(struct bio *bio)
        bio_put(bio);
 }
 
-/*
- * Ensure that max discard sectors doesn't overflow bi_size and hopefully
- * it is of the proper granularity as long as the granularity is a power
- * of two.
- */
-#define MAX_BIO_SECTORS ((1U << 31) >> 9)
-
 /**
  * blkdev_issue_discard - queue a discard
  * @bdev:      blockdev to issue discard for
@@ -50,6 +43,8 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t 
sector,
        DECLARE_COMPLETION_ONSTACK(wait);
        struct request_queue *q = bdev_get_queue(bdev);
        int type = REQ_WRITE | REQ_DISCARD;
+       unsigned int granularity;
+       int alignment;
        struct bio_batch bb;
        struct bio *bio;
        int ret = 0;
@@ -61,6 +56,10 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t 
sector,
        if (!blk_queue_discard(q))
                return -EOPNOTSUPP;
 
+       /* Zero-sector (unknown) and one-sector granularities are the same.  */
+       granularity = max(q->limits.discard_granularity >> 9, 1U);
+       alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
+
        if (flags & BLKDEV_DISCARD_SECURE) {
                if (!blk_queue_secdiscard(q))
                        return -EOPNOTSUPP;
@@ -74,7 +73,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t 
sector,
        blk_start_plug(&plug);
        while (nr_sects) {
                unsigned int req_sects;
-               sector_t end_sect;
+               sector_t end_sect, tmp;
 
                bio = bio_alloc(gfp_mask, 1);
                if (!bio) {
@@ -82,8 +81,22 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t 
sector,
                        break;
                }
 
-               req_sects = min_t(sector_t, nr_sects, MAX_BIO_SECTORS);
+               /* Make sure bi_size doesn't overflow */
+               req_sects = min_t(sector_t, nr_sects, UINT_MAX >> 9);
+
+               /*
+                * If splitting a request, and the next starting sector would be
+                * misaligned, stop the discard at the previous aligned sector.
+                */
                end_sect = sector + req_sects;
+               tmp = end_sect;
+               if (req_sects < nr_sects &&
+                   sector_div(tmp, granularity) != alignment) {
+                       end_sect = end_sect - alignment;
+                       sector_div(end_sect, granularity);
+                       end_sect = end_sect * granularity + alignment;
+                       req_sects = end_sect - sector;
+               }
 
                bio->bi_iter.bi_sector = sector;
                bio->bi_end_io = bio_batch_end_io;
-- 
1.9.1


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to