On Fri, 12 Sep 2025, Bart Van Assche wrote:
> On 9/12/25 12:56 PM, Mikulas Patocka wrote:
> > If the table has only linear targets and there is just one underlying
> > device, we can optimize REQ_PREFLUSH with data - we don't have to split
> > it to two bios - a flush and a write. We can pass it to the linear target
> > directly.
>
> Thanks! Since this patch works fine on my test setup:
>
> Tested-by: Bart Van Assche <[email protected]>
>
> > @@ -1996,11 +1993,19 @@ static void dm_split_and_process_bio(str
> > }
> > init_clone_info(&ci, io, map, bio, is_abnormal);
> > - if (bio->bi_opf & REQ_PREFLUSH) {
> > + if (unlikely((bio->bi_opf & REQ_PREFLUSH) != 0)) {
> > + if (map->flush_bypasses_map) {
>
> How map->flush_bypasses_map makes submitting REQ_PREFLUSH bios that have
> data safe is nontrivial so it's probably a good idea to explain this in
> a comment.
>
> > + struct list_head *devices = dm_table_get_devices(map);
> > + if (devices->next == devices->prev)
> > + goto send_preflush_with_data;
>
> Should it be explained in a comment that devices->next == devices->prev
> checks whether there are either no elements or one element in the list
> and that we know that the devices list is not empty?
>
> > + }
> > + if (bio->bi_iter.bi_size)
> > + io->requeue_flush_with_data = true;
> > __send_empty_flush(&ci);
> > /* dm_io_complete submits any data associated with flush */
> > goto out;
> > }
> > +send_preflush_with_data:
> > if (static_branch_unlikely(&zoned_enabled) &&
> > (bio_op(bio) == REQ_OP_ZONE_RESET_ALL)) {
>
> Isn't it recommended to leave a blank line above goto labels rather than
> below?
>
> Thanks,
>
> Bart.
OK. Here I added the comment and I am sending version 4 of the patch.
Mikulas
From: Mikulas Patocka <[email protected]>
If the table has only linear targets and there is just one underlying
device, we can optimize REQ_PREFLUSH with data - we don't have to split
it to two bios - a flush and a write. We can pass it to the linear target
directly.
Signed-off-by: Mikulas Patocka <[email protected]>
Tested-by: Bart Van Assche <[email protected]>
---
drivers/md/dm-core.h | 1 +
drivers/md/dm.c | 31 +++++++++++++++++++++++--------
2 files changed, 24 insertions(+), 8 deletions(-)
Index: linux-2.6/drivers/md/dm.c
===================================================================
--- linux-2.6.orig/drivers/md/dm.c 2025-09-15 17:30:25.000000000 +0200
+++ linux-2.6/drivers/md/dm.c 2025-09-15 17:35:47.000000000 +0200
@@ -490,18 +490,13 @@ u64 dm_start_time_ns_from_clone(struct b
}
EXPORT_SYMBOL_GPL(dm_start_time_ns_from_clone);
-static inline bool bio_is_flush_with_data(struct bio *bio)
-{
- return ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size);
-}
-
static inline unsigned int dm_io_sectors(struct dm_io *io, struct bio *bio)
{
/*
* If REQ_PREFLUSH set, don't account payload, it will be
* submitted (and accounted) after this flush completes.
*/
- if (bio_is_flush_with_data(bio))
+ if (io->requeue_flush_with_data)
return 0;
if (unlikely(dm_io_flagged(io, DM_IO_WAS_SPLIT)))
return io->sectors;
@@ -590,6 +585,7 @@ static struct dm_io *alloc_io(struct map
io = container_of(tio, struct dm_io, tio);
io->magic = DM_IO_MAGIC;
io->status = BLK_STS_OK;
+ io->requeue_flush_with_data = false;
/* one ref is for submission, the other is for completion */
atomic_set(&io->io_count, 2);
@@ -976,11 +972,12 @@ static void __dm_io_complete(struct dm_i
if (requeued)
return;
- if (bio_is_flush_with_data(bio)) {
+ if (unlikely(io->requeue_flush_with_data)) {
/*
* Preflush done for flush with data, reissue
* without REQ_PREFLUSH.
*/
+ io->requeue_flush_with_data = false;
bio->bi_opf &= ~REQ_PREFLUSH;
queue_io(md, bio);
} else {
@@ -1996,12 +1993,30 @@ static void dm_split_and_process_bio(str
}
init_clone_info(&ci, io, map, bio, is_abnormal);
- if (bio->bi_opf & REQ_PREFLUSH) {
+ if (unlikely((bio->bi_opf & REQ_PREFLUSH) != 0)) {
+ /*
+ * The "flush_bypasses_map" is set on targets where it is safe
+ * to skip the map function and submit bios directly to the
+ * underlying block devices - currently, it is set for dm-linear
+ * and dm-stripe.
+ *
+ * If we have just one underlying device (i.e. there is one
+ * linear target or multiple linear targets pointing to the same
+ * device), we can send the flush with data directly to it.
+ */
+ if (map->flush_bypasses_map) {
+ struct list_head *devices = dm_table_get_devices(map);
+ if (devices->next == devices->prev)
+ goto send_preflush_with_data;
+ }
+ if (bio->bi_iter.bi_size)
+ io->requeue_flush_with_data = true;
__send_empty_flush(&ci);
/* dm_io_complete submits any data associated with flush */
goto out;
}
+send_preflush_with_data:
if (static_branch_unlikely(&zoned_enabled) &&
(bio_op(bio) == REQ_OP_ZONE_RESET_ALL)) {
error = __send_zone_reset_all(&ci);
Index: linux-2.6/drivers/md/dm-core.h
===================================================================
--- linux-2.6.orig/drivers/md/dm-core.h 2025-09-15 17:30:25.000000000 +0200
+++ linux-2.6/drivers/md/dm-core.h 2025-09-15 17:30:25.000000000 +0200
@@ -291,6 +291,7 @@ struct dm_io {
struct dm_io *next;
struct dm_stats_aux stats_aux;
blk_status_t status;
+ bool requeue_flush_with_data;
atomic_t io_count;
struct mapped_device *md;