The dm core splits REQ_PREFLUSH bios that have data into two bios.
First, a REQ_PREFLUSH bio with no data is submitted to all underlying
dm devices. Next, the REQ_PREFLUSH flag is cleared and the same bio is
resubmitted. This approach is essential if there are multiple underlying
devices to provide correct REQ_PREFLUSH semantics.

Splitting a bio into an empty flush bio and a non-flush data bio is
not necessary if there is only a single underlying device. Hence this
patch that does not split REQ_PREFLUSH bios if there is only one
underlying device.

This patch preserves the order of REQ_PREFLUSH writes if there is only
one underlying device and if one or more write bios have been queued
past the REQ_PREFLUSH bio before the REQ_PREFLUSH bio is processed.

Cc: Mike Snitzer <[email protected]>
Cc: Damien Le Moal <[email protected]>
Signed-off-by: Bart Van Assche <[email protected]>
---

Changes compared to v1:
 - Made the patch description more detailed.
 - Removed the reference to write pipelining from the patch description.

 drivers/md/dm.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 66dd5f6ce778..d0791eef21f7 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -490,9 +490,13 @@ u64 dm_start_time_ns_from_clone(struct bio *bio)
 }
 EXPORT_SYMBOL_GPL(dm_start_time_ns_from_clone);
 
-static inline bool bio_is_flush_with_data(struct bio *bio)
+static inline bool bio_is_flush_with_data(struct mapped_device *md,
+                                         struct bio *bio)
 {
-       return ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size);
+       guard(rcu)();
+
+       return bio->bi_opf & REQ_PREFLUSH && bio->bi_iter.bi_size &&
+              ((struct dm_table *)rcu_dereference(md->map))->num_targets > 1;
 }
 
 static inline unsigned int dm_io_sectors(struct dm_io *io, struct bio *bio)
@@ -501,7 +505,7 @@ static inline unsigned int dm_io_sectors(struct dm_io *io, 
struct bio *bio)
         * If REQ_PREFLUSH set, don't account payload, it will be
         * submitted (and accounted) after this flush completes.
         */
-       if (bio_is_flush_with_data(bio))
+       if (bio_is_flush_with_data(io->md, bio))
                return 0;
        if (unlikely(dm_io_flagged(io, DM_IO_WAS_SPLIT)))
                return io->sectors;
@@ -976,7 +980,7 @@ static void __dm_io_complete(struct dm_io *io, bool 
first_stage)
        if (requeued)
                return;
 
-       if (bio_is_flush_with_data(bio)) {
+       if (bio_is_flush_with_data(md, bio)) {
                /*
                 * Preflush done for flush with data, reissue
                 * without REQ_PREFLUSH.
@@ -1715,7 +1719,7 @@ static void dm_queue_poll_io(struct bio *bio, struct 
dm_io *io)
 }
 
 /*
- * Select the correct strategy for processing a non-flush bio.
+ * Select the correct strategy for processing a bio.
  */
 static blk_status_t __split_and_process_bio(struct clone_info *ci)
 {
@@ -1996,7 +2000,7 @@ static void dm_split_and_process_bio(struct mapped_device 
*md,
        }
        init_clone_info(&ci, io, map, bio, is_abnormal);
 
-       if (bio->bi_opf & REQ_PREFLUSH) {
+       if (bio->bi_opf & REQ_PREFLUSH && ci.map->num_targets > 1) {
                __send_empty_flush(&ci);
                /* dm_io_complete submits any data associated with flush */
                goto out;

Reply via email to