On Fri, 12 Sep 2025, Bart Van Assche wrote:

> On 9/12/25 12:56 PM, Mikulas Patocka wrote:
> > If the table has only linear targets and there is just one underlying
> > device, we can optimize REQ_PREFLUSH with data - we don't have to split
> > it to two bios - a flush and a write. We can pass it to the linear target
> > directly.
> 
> Thanks! Since this patch works fine on my test setup:
> 
> Tested-by: Bart Van Assche <[email protected]>
> 
> > @@ -1996,11 +1993,19 @@ static void dm_split_and_process_bio(str
> >     }
> >     init_clone_info(&ci, io, map, bio, is_abnormal);
> >   - if (bio->bi_opf & REQ_PREFLUSH) {
> > +   if (unlikely((bio->bi_opf & REQ_PREFLUSH) != 0)) {
> > +           if (map->flush_bypasses_map) {
> 
> How map->flush_bypasses_map makes submitting REQ_PREFLUSH bios that have
> data safe is nontrivial so it's probably a good idea to explain this in
> a comment.
> 
> > +                   struct list_head *devices = dm_table_get_devices(map);
> > +                   if (devices->next == devices->prev)
> > +                           goto send_preflush_with_data;
> 
> Should it be explained in a comment that devices->next == devices->prev
> checks whether there are either no elements or one element in the list
> and that we know that the devices list is not empty?
> 
> > +           }
> > +           if (bio->bi_iter.bi_size)
> > +                   io->requeue_flush_with_data = true;
> >             __send_empty_flush(&ci);
> >             /* dm_io_complete submits any data associated with flush */
> >             goto out;
> >     }
> > +send_preflush_with_data:
> >             if (static_branch_unlikely(&zoned_enabled) &&
> >         (bio_op(bio) == REQ_OP_ZONE_RESET_ALL)) {
> 
> Isn't it recommended to leave a blank line above goto labels rather than
> below?
> 
> Thanks,
> 
> Bart.

OK. Here I added the comment and I am sending version 4 of the patch.

Mikulas



From: Mikulas Patocka <[email protected]>

If the table has only linear targets and there is just one underlying
device, we can optimize REQ_PREFLUSH with data - we don't have to split
it to two bios - a flush and a write. We can pass it to the linear target
directly.

Signed-off-by: Mikulas Patocka <[email protected]>
Tested-by: Bart Van Assche <[email protected]>

---
 drivers/md/dm-core.h |    1 +
 drivers/md/dm.c      |   31 +++++++++++++++++++++++--------
 2 files changed, 24 insertions(+), 8 deletions(-)

Index: linux-2.6/drivers/md/dm.c
===================================================================
--- linux-2.6.orig/drivers/md/dm.c      2025-09-15 17:30:25.000000000 +0200
+++ linux-2.6/drivers/md/dm.c   2025-09-15 17:35:47.000000000 +0200
@@ -490,18 +490,13 @@ u64 dm_start_time_ns_from_clone(struct b
 }
 EXPORT_SYMBOL_GPL(dm_start_time_ns_from_clone);
 
-static inline bool bio_is_flush_with_data(struct bio *bio)
-{
-       return ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size);
-}
-
 static inline unsigned int dm_io_sectors(struct dm_io *io, struct bio *bio)
 {
        /*
         * If REQ_PREFLUSH set, don't account payload, it will be
         * submitted (and accounted) after this flush completes.
         */
-       if (bio_is_flush_with_data(bio))
+       if (io->requeue_flush_with_data)
                return 0;
        if (unlikely(dm_io_flagged(io, DM_IO_WAS_SPLIT)))
                return io->sectors;
@@ -590,6 +585,7 @@ static struct dm_io *alloc_io(struct map
        io = container_of(tio, struct dm_io, tio);
        io->magic = DM_IO_MAGIC;
        io->status = BLK_STS_OK;
+       io->requeue_flush_with_data = false;
 
        /* one ref is for submission, the other is for completion */
        atomic_set(&io->io_count, 2);
@@ -976,11 +972,12 @@ static void __dm_io_complete(struct dm_i
        if (requeued)
                return;
 
-       if (bio_is_flush_with_data(bio)) {
+       if (unlikely(io->requeue_flush_with_data)) {
                /*
                 * Preflush done for flush with data, reissue
                 * without REQ_PREFLUSH.
                 */
+               io->requeue_flush_with_data = false;
                bio->bi_opf &= ~REQ_PREFLUSH;
                queue_io(md, bio);
        } else {
@@ -1996,12 +1993,30 @@ static void dm_split_and_process_bio(str
        }
        init_clone_info(&ci, io, map, bio, is_abnormal);
 
-       if (bio->bi_opf & REQ_PREFLUSH) {
+       if (unlikely((bio->bi_opf & REQ_PREFLUSH) != 0)) {
+               /*
+                * The "flush_bypasses_map" is set on targets where it is safe
+                * to skip the map function and submit bios directly to the
+                * underlying block devices - currently, it is set for dm-linear
+                * and dm-stripe.
+                *
+                * If we have just one underlying device (i.e. there is one
+                * linear target or multiple linear targets pointing to the same
+                * device), we can send the flush with data directly to it.
+                */
+               if (map->flush_bypasses_map) {
+                       struct list_head *devices = dm_table_get_devices(map);
+                       if (devices->next == devices->prev)
+                               goto send_preflush_with_data;
+               }
+               if (bio->bi_iter.bi_size)
+                       io->requeue_flush_with_data = true;
                __send_empty_flush(&ci);
                /* dm_io_complete submits any data associated with flush */
                goto out;
        }
 
+send_preflush_with_data:
        if (static_branch_unlikely(&zoned_enabled) &&
            (bio_op(bio) == REQ_OP_ZONE_RESET_ALL)) {
                error = __send_zone_reset_all(&ci);
Index: linux-2.6/drivers/md/dm-core.h
===================================================================
--- linux-2.6.orig/drivers/md/dm-core.h 2025-09-15 17:30:25.000000000 +0200
+++ linux-2.6/drivers/md/dm-core.h      2025-09-15 17:30:25.000000000 +0200
@@ -291,6 +291,7 @@ struct dm_io {
        struct dm_io *next;
        struct dm_stats_aux stats_aux;
        blk_status_t status;
+       bool requeue_flush_with_data;
        atomic_t io_count;
        struct mapped_device *md;
 


Reply via email to