On Tue, 24 Jun 2025 07:33:49 +0000 Dongsheng Yang <dongsheng.y...@linux.dev> wrote:
> This patch introduces *backing_dev.{c,h}*, a self-contained layer that > handles all interaction with the *backing block device* where cache > write-back and cache-miss reads are serviced. Isolating this logic > keeps the core dm-pcache code free of low-level bio plumbing. > > * Device setup / teardown > - Opens the target with `dm_get_device()`, stores `bdev`, file and > size, and initialises a dedicated `bioset`. > - Gracefully releases resources via `backing_dev_stop()`. > > * Request object (`struct pcache_backing_dev_req`) > - Two request flavours: > - REQ-type – cloned from an upper `struct bio` issued to > dm-pcache; trimmed and re-targeted to the backing LBA. > - KMEM-type – maps an arbitrary kernel memory buffer > into a freshly built. > - Private completion callback (`end_req`) propagates status to the > upper layer and handles resource recycling. > > * Submission & completion path > - Lock-protected submit queue + worker (`req_submit_work`) let pcache > push many requests asynchronously, at the same time, allow caller > to submit backing_dev_req in atomic context. > - End-io handler moves finished requests to a completion list processed > by `req_complete_work`, ensuring callbacks run in process context. > - Direct-submit option for non-atomic context. > > * Flush > - `backing_dev_flush()` issues a flush to persist backing-device data. > > Signed-off-by: Dongsheng Yang <dongsheng.y...@linux.dev> > --- > drivers/md/dm-pcache/backing_dev.c | 292 +++++++++++++++++++++++++++++ > drivers/md/dm-pcache/backing_dev.h | 88 +++++++++ > 2 files changed, 380 insertions(+) > create mode 100644 drivers/md/dm-pcache/backing_dev.c > create mode 100644 drivers/md/dm-pcache/backing_dev.h > > diff --git a/drivers/md/dm-pcache/backing_dev.c > b/drivers/md/dm-pcache/backing_dev.c > new file mode 100644 > index 000000000000..590c6415319d > --- /dev/null > +++ b/drivers/md/dm-pcache/backing_dev.c > @@ -0,0 +1,292 @@ > +// SPDX-License-Identifier: GPL-2.0-or-later > +#include <linux/blkdev.h> > + > +#include "../dm-core.h" > +#include "pcache_internal.h" > +#include "cache_dev.h" > +#include "backing_dev.h" > +#include "cache.h" > +#include "dm_pcache.h" > + > +static void backing_dev_exit(struct pcache_backing_dev *backing_dev) > +{ > + kmem_cache_destroy(backing_dev->backing_req_cache); > +} > + > +static void req_submit_fn(struct work_struct *work); > +static void req_complete_fn(struct work_struct *work); > +static int backing_dev_init(struct dm_pcache *pcache) > +{ > + struct pcache_backing_dev *backing_dev = &pcache->backing_dev; > + int ret; > + > + backing_dev->backing_req_cache = KMEM_CACHE(pcache_backing_dev_req, 0); > + if (!backing_dev->backing_req_cache) { > + ret = -ENOMEM; return -ENOMEM; and drop the err label. > + goto err; > + } > + > + INIT_LIST_HEAD(&backing_dev->submit_list); > + INIT_LIST_HEAD(&backing_dev->complete_list); > + spin_lock_init(&backing_dev->submit_lock); > + spin_lock_init(&backing_dev->complete_lock); > + INIT_WORK(&backing_dev->req_submit_work, req_submit_fn); > + INIT_WORK(&backing_dev->req_complete_work, req_complete_fn); > + > + return 0; > +err: > + return ret; > +} > +static void req_complete_fn(struct work_struct *work) > +{ > + struct pcache_backing_dev *backing_dev = container_of(work, struct > pcache_backing_dev, req_complete_work); Very long line. Wrap it somewhere. > + struct pcache_backing_dev_req *backing_req; > + LIST_HEAD(tmp_list); > + > + spin_lock_irq(&backing_dev->complete_lock); > + list_splice_init(&backing_dev->complete_list, &tmp_list); > + spin_unlock_irq(&backing_dev->complete_lock); > + > + while (!list_empty(&tmp_list)) { > + backing_req = list_first_entry(&tmp_list, > + struct pcache_backing_dev_req, > node); > + list_del_init(&backing_req->node); > + backing_dev_req_end(backing_req); > + } > +} > + > +static void backing_dev_bio_end(struct bio *bio) > +{ > + struct pcache_backing_dev_req *backing_req = bio->bi_private; > + struct pcache_backing_dev *backing_dev = backing_req->backing_dev; > + unsigned long flags; > + > + backing_req->ret = bio->bi_status; > + > + spin_lock_irqsave(&backing_dev->complete_lock, flags); > + list_move_tail(&backing_req->node, &backing_dev->complete_list); > + queue_work(BACKING_DEV_TO_PCACHE(backing_dev)->task_wq, > &backing_dev->req_complete_work); > + spin_unlock_irqrestore(&backing_dev->complete_lock, flags); > +} > + > +static void req_submit_fn(struct work_struct *work) > +{ > + struct pcache_backing_dev *backing_dev = container_of(work, struct > pcache_backing_dev, req_submit_work); Very long line. Wrap after = > + struct pcache_backing_dev_req *backing_req; > + LIST_HEAD(tmp_list); > + > + spin_lock(&backing_dev->submit_lock); > + list_splice_init(&backing_dev->submit_list, &tmp_list); > + spin_unlock(&backing_dev->submit_lock); > + > + while (!list_empty(&tmp_list)) { > + backing_req = list_first_entry(&tmp_list, > + struct pcache_backing_dev_req, > node); > + list_del_init(&backing_req->node); > + submit_bio_noacct(&backing_req->bio); > + } > +} > + > +static void bio_map(struct bio *bio, void *base, size_t size) > +{ > + struct page *page; > + unsigned int offset; > + unsigned int len; > + > + if (!is_vmalloc_addr(base)) { > + page = virt_to_page(base); > + offset = offset_in_page(base); > + > + BUG_ON(!bio_add_page(bio, page, size, offset)); BUG_ON(!bio_add_page(bio, virt_to_page(base), size offset_in_page(base)); Seems readable enough. Obviously that depends on whether those local variables get more useage in later patches. > + return; > + } > + > + flush_kernel_vmap_range(base, size); > + while (size) { > + page = vmalloc_to_page(base); > + offset = offset_in_page(base); > + len = min_t(size_t, PAGE_SIZE - offset, size); > + > + BUG_ON(!bio_add_page(bio, page, len, offset)); > + size -= len; > + base += len; > + } > +} > + > +static struct pcache_backing_dev_req *kmem_type_req_create(struct > pcache_backing_dev *backing_dev, > + struct > pcache_backing_dev_req_opts *opts) > +{ > + struct pcache_backing_dev_req *backing_req; > + struct bio *backing_bio; > + u32 n_vecs = get_n_vecs(opts->kmem.data, opts->kmem.len); > + > + backing_req = kmem_cache_zalloc(backing_dev->backing_req_cache, > opts->gfp_mask); > + if (!backing_req) > + return NULL; > + > + if (n_vecs > BACKING_DEV_REQ_INLINE_BVECS) { > + backing_req->kmem.bvecs = kmalloc_array(n_vecs, sizeof(struct > bio_vec), opts->gfp_mask); > + if (!backing_req->kmem.bvecs) > + goto err_free_req; > + } else { > + backing_req->kmem.bvecs = backing_req->kmem.inline_bvecs; > + } > + > + backing_req->type = BACKING_DEV_REQ_TYPE_KMEM; > + > + bio_init(&backing_req->bio, backing_dev->dm_dev->bdev, > backing_req->kmem.bvecs, > + n_vecs, opts->kmem.opf); Odd alignment. Align second line under & > + > + backing_bio = &backing_req->bio; > + bio_map(backing_bio, opts->kmem.data, opts->kmem.len); > + > + backing_bio->bi_iter.bi_sector = (opts->kmem.backing_off) >> > SECTOR_SHIFT; > + backing_bio->bi_private = backing_req; > + backing_bio->bi_end_io = backing_dev_bio_end; > + > + backing_req->backing_dev = backing_dev; > + INIT_LIST_HEAD(&backing_req->node); > + backing_req->end_req = opts->end_fn; > + backing_req->priv_data = opts->priv_data; Bit of a mixture of formatting between aligned = and not. Pick one style. I prefer never forcing alignment but others do like it. I'm fine with that too, just not a mix. > + > + return backing_req; > + > +err_free_req: > + kmem_cache_free(backing_dev->backing_req_cache, backing_req); > + return NULL; > +} > + > +struct pcache_backing_dev_req *backing_dev_req_create(struct > pcache_backing_dev *backing_dev, > + struct > pcache_backing_dev_req_opts *opts) > +{ > + if (opts->type == BACKING_DEV_REQ_TYPE_REQ) > + return req_type_req_create(backing_dev, opts); > + else if (opts->type == BACKING_DEV_REQ_TYPE_KMEM) returned in earlier branch so go with simpler if (opts->type..) Or use a switch statement if you expect to get more entries in this over time. > + return kmem_type_req_create(backing_dev, opts); > + > + return NULL; > +} > + > +void backing_dev_flush(struct pcache_backing_dev *backing_dev) > +{ > + blkdev_issue_flush(backing_dev->dm_dev->bdev); > +}