From: Yu Kuai <yuku...@huawei.com>

behind write rely on bitmap, because the number of IO are recorded in
bitmap->behind_writes, and callers rely on bitmap_wait_behind_writes()
to wait for IO to be done.

However, currently callers doesn't check if bitmap is enabeld before
calling into behind methods. Hence if behind write start without bitmap,
readers will not wait for slow write IO to be done and old data can be
read in some corner cases.

Signed-off-by: Yu Kuai <yuku...@huawei.com>
---
 drivers/md/md-bitmap.c |  6 ------
 drivers/md/raid1.c     | 45 ++++++++++++++++++++++++++----------------
 2 files changed, 28 insertions(+), 23 deletions(-)

diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index a079cbe2e6f1..5bd75fa6ee1d 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -2049,9 +2049,6 @@ static void bitmap_start_behind_write(struct mddev *mddev)
        struct bitmap *bitmap = mddev->bitmap;
        int bw;
 
-       if (!bitmap)
-               return;
-
        atomic_inc(&bitmap->behind_writes);
        bw = atomic_read(&bitmap->behind_writes);
        if (bw > bitmap->behind_writes_used)
@@ -2065,9 +2062,6 @@ static void bitmap_end_behind_write(struct mddev *mddev)
 {
        struct bitmap *bitmap = mddev->bitmap;
 
-       if (!bitmap)
-               return;
-
        if (atomic_dec_and_test(&bitmap->behind_writes))
                wake_up(&bitmap->behind_wait);
        pr_debug("dec write-behind count %d/%lu\n",
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a46ce996ea9c..015eabb502ec 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1366,7 +1366,8 @@ static void raid1_read_request(struct mddev *mddev, 
struct bio *bio,
                                    (unsigned long long)r1_bio->sector,
                                    mirror->rdev->bdev);
 
-       if (test_bit(WriteMostly, &mirror->rdev->flags)) {
+       if (test_bit(WriteMostly, &mirror->rdev->flags) &&
+           md_bitmap_enabled(mddev, false)) {
                /*
                 * Reading from a write-mostly device must take care not to
                 * over-take any writes that are 'behind'
@@ -1452,6 +1453,30 @@ static bool wait_blocked_rdev(struct mddev *mddev, 
struct bio *bio)
        return true;
 }
 
+static void raid1_start_write_behind(struct mddev *mddev, struct r1bio *r1_bio,
+                                    struct bio *bio)
+{
+       unsigned long max_write_behind = mddev->bitmap_info.max_write_behind;
+       struct md_bitmap_stats stats;
+       int err;
+
+       /* behind write rely on bitmap, see bitmap_operations */
+       if (!md_bitmap_enabled(mddev, false))
+               return;
+
+       err = mddev->bitmap_ops->get_stats(mddev->bitmap, &stats);
+       if (err)
+               return;
+
+       /* Don't do behind IO if reader is waiting, or there are too many. */
+       if (!stats.behind_wait && stats.behind_writes < max_write_behind)
+               alloc_behind_master_bio(r1_bio, bio);
+
+       if (test_bit(R1BIO_BehindIO, &r1_bio->state))
+               mddev->bitmap_ops->start_behind_write(mddev);
+
+}
+
 static void raid1_write_request(struct mddev *mddev, struct bio *bio,
                                int max_write_sectors)
 {
@@ -1612,22 +1637,8 @@ static void raid1_write_request(struct mddev *mddev, 
struct bio *bio,
                        continue;
 
                if (first_clone) {
-                       unsigned long max_write_behind =
-                               mddev->bitmap_info.max_write_behind;
-                       struct md_bitmap_stats stats;
-                       int err;
-
-                       /* do behind I/O ?
-                        * Not if there are too many, or cannot
-                        * allocate memory, or a reader on WriteMostly
-                        * is waiting for behind writes to flush */
-                       err = mddev->bitmap_ops->get_stats(mddev->bitmap, 
&stats);
-                       if (!err && write_behind && !stats.behind_wait &&
-                           stats.behind_writes < max_write_behind)
-                               alloc_behind_master_bio(r1_bio, bio);
-
-                       if (test_bit(R1BIO_BehindIO, &r1_bio->state))
-                               mddev->bitmap_ops->start_behind_write(mddev);
+                       if (write_behind)
+                               raid1_start_write_behind(mddev, r1_bio, bio);
                        first_clone = 0;
                }
 
-- 
2.39.2


Reply via email to