For the devices which does not support copy, copy emulation is
added. Copy-emulation is implemented by reading from source ranges
into memory and writing to the corresponding destination asynchronously.
For zoned device we maintain a linked list of read submission and try to
submit corresponding write in same order.
Also emulation is used, if copy offload fails or partially completes.

Signed-off-by: Nitesh Shetty <nj.she...@samsung.com>
Signed-off-by: Vincent Fu <vincent...@samsung.com>
Signed-off-by: Anuj Gupta <anuj2...@samsung.com>
---
 block/blk-lib.c        | 241 ++++++++++++++++++++++++++++++++++++++++-
 block/blk-map.c        |   4 +-
 include/linux/blkdev.h |   3 +
 3 files changed, 245 insertions(+), 3 deletions(-)

diff --git a/block/blk-lib.c b/block/blk-lib.c
index 2ce3c872ca49..43b1d0ef5732 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -428,6 +428,239 @@ static inline int blk_copy_sanity_check(struct 
block_device *src_bdev,
        return 0;
 }
 
+static void *blk_alloc_buf(sector_t req_size, sector_t *alloc_size,
+               gfp_t gfp_mask)
+{
+       int min_size = PAGE_SIZE;
+       void *buf;
+
+       while (req_size >= min_size) {
+               buf = kvmalloc(req_size, gfp_mask);
+               if (buf) {
+                       *alloc_size = req_size;
+                       return buf;
+               }
+               /* retry half the requested size */
+               req_size >>= 1;
+       }
+
+       return NULL;
+}
+
+static void blk_copy_emulate_write_end_io(struct bio *bio)
+{
+       struct copy_ctx *ctx = bio->bi_private;
+       struct cio *cio = ctx->cio;
+       sector_t clen;
+       int ri = ctx->range_idx;
+
+       if (bio->bi_status) {
+               cio->io_err = blk_status_to_errno(bio->bi_status);
+               clen = (bio->bi_iter.bi_sector << SECTOR_SHIFT) -
+                       cio->ranges[ri].dst;
+               cio->ranges[ri].comp_len = min_t(sector_t, clen,
+                               cio->ranges[ri].comp_len);
+       }
+       kvfree(page_address(bio->bi_io_vec[0].bv_page));
+       bio_map_kern_endio(bio);
+       if (atomic_dec_and_test(&ctx->refcount))
+               kfree(ctx);
+       if (atomic_dec_and_test(&cio->refcount)) {
+               if (cio->endio) {
+                       cio->endio(cio->private, cio->io_err);
+                       kfree(cio);
+               } else
+                       blk_wake_io_task(cio->waiter);
+       }
+}
+
+static void blk_copy_emulate_read_end_io(struct bio *read_bio)
+{
+       struct copy_ctx *ctx = read_bio->bi_private;
+       struct cio *cio = ctx->cio;
+       sector_t clen;
+       int ri = ctx->range_idx;
+       unsigned long flags;
+
+       if (read_bio->bi_status) {
+               cio->io_err = blk_status_to_errno(read_bio->bi_status);
+               goto err_rw_bio;
+       }
+
+       /* For zoned device, we check if completed bio is first entry in linked
+        * list,
+        * if yes, we start the worker to submit write bios.
+        * if not, then we just update status of bio in ctx,
+        * once the worker gets scheduled, it will submit writes for all
+        * the consecutive REQ_COPY_READ_COMPLETE bios.
+        */
+       if (bdev_is_zoned(ctx->write_bio->bi_bdev)) {
+               spin_lock_irqsave(&cio->list_lock, flags);
+               ctx->status = REQ_COPY_READ_COMPLETE;
+               if (ctx == list_first_entry(&cio->list,
+                                       struct copy_ctx, list)) {
+                       spin_unlock_irqrestore(&cio->list_lock, flags);
+                       schedule_work(&ctx->dispatch_work);
+                       goto free_read_bio;
+               }
+               spin_unlock_irqrestore(&cio->list_lock, flags);
+       } else
+               schedule_work(&ctx->dispatch_work);
+
+free_read_bio:
+       kfree(read_bio);
+
+       return;
+
+err_rw_bio:
+       clen = (read_bio->bi_iter.bi_sector << SECTOR_SHIFT) -
+                                       cio->ranges[ri].src;
+       cio->ranges[ri].comp_len = min_t(sector_t, clen,
+                                       cio->ranges[ri].comp_len);
+       __free_page(read_bio->bi_io_vec[0].bv_page);
+       bio_map_kern_endio(read_bio);
+       if (atomic_dec_and_test(&ctx->refcount))
+               kfree(ctx);
+       if (atomic_dec_and_test(&cio->refcount)) {
+               if (cio->endio) {
+                       cio->endio(cio->private, cio->io_err);
+                       kfree(cio);
+               } else
+                       blk_wake_io_task(cio->waiter);
+       }
+}
+
+/*
+ * If native copy offload feature is absent, this function tries to emulate,
+ * by copying data from source to a temporary buffer and from buffer to
+ * destination device.
+ */
+static int blk_copy_emulate(struct block_device *src_bdev,
+               struct block_device *dst_bdev, struct range_entry *ranges,
+               int nr, cio_iodone_t end_io, void *private, gfp_t gfp_mask)
+{
+       struct request_queue *sq = bdev_get_queue(src_bdev);
+       struct request_queue *dq = bdev_get_queue(dst_bdev);
+       struct bio *read_bio, *write_bio;
+       void *buf = NULL;
+       struct copy_ctx *ctx;
+       struct cio *cio;
+       sector_t src, dst, offset, buf_len, req_len, rem = 0;
+       int ri = 0, ret = 0;
+       unsigned long flags;
+       sector_t max_src_hw_len = min_t(unsigned int, queue_max_hw_sectors(sq),
+                       queue_max_segments(sq) << (PAGE_SHIFT - SECTOR_SHIFT))
+                       << SECTOR_SHIFT;
+       sector_t max_dst_hw_len = min_t(unsigned int, queue_max_hw_sectors(dq),
+                       queue_max_segments(dq) << (PAGE_SHIFT - SECTOR_SHIFT))
+                       << SECTOR_SHIFT;
+       sector_t max_hw_len = min_t(unsigned int,
+                       max_src_hw_len, max_dst_hw_len);
+
+       cio = kzalloc(sizeof(struct cio), GFP_KERNEL);
+       if (!cio)
+               return -ENOMEM;
+       cio->ranges = ranges;
+       atomic_set(&cio->refcount, 1);
+       cio->waiter = current;
+       cio->endio = end_io;
+       cio->private = private;
+
+       if (bdev_is_zoned(dst_bdev)) {
+               INIT_LIST_HEAD(&cio->list);
+               spin_lock_init(&cio->list_lock);
+       }
+
+       for (ri = 0; ri < nr; ri++) {
+               offset = ranges[ri].comp_len;
+               src = ranges[ri].src + offset;
+               dst = ranges[ri].dst + offset;
+               /* If IO fails, we truncate comp_len */
+               ranges[ri].comp_len = ranges[ri].len;
+
+               for (rem = ranges[ri].len - offset; rem > 0; rem -= buf_len) {
+                       req_len = min_t(int, max_hw_len, rem);
+
+                       buf = blk_alloc_buf(req_len, &buf_len, gfp_mask);
+                       if (!buf) {
+                               ret = -ENOMEM;
+                               goto err_alloc_buf;
+                       }
+
+                       ctx = kzalloc(sizeof(struct copy_ctx), gfp_mask);
+                       if (!ctx) {
+                               ret = -ENOMEM;
+                               goto err_ctx;
+                       }
+
+                       read_bio = bio_map_kern(sq, buf, buf_len, gfp_mask);
+                       if (IS_ERR(read_bio)) {
+                               ret = PTR_ERR(read_bio);
+                               goto err_read_bio;
+                       }
+
+                       write_bio = bio_map_kern(dq, buf, buf_len, gfp_mask);
+                       if (IS_ERR(write_bio)) {
+                               ret = PTR_ERR(write_bio);
+                               goto err_write_bio;
+                       }
+
+                       ctx->cio = cio;
+                       ctx->range_idx = ri;
+                       ctx->write_bio = write_bio;
+                       atomic_set(&ctx->refcount, 1);
+
+                       read_bio->bi_iter.bi_sector = src >> SECTOR_SHIFT;
+                       read_bio->bi_iter.bi_size = buf_len;
+                       read_bio->bi_opf = REQ_OP_READ | REQ_SYNC;
+                       bio_set_dev(read_bio, src_bdev);
+                       read_bio->bi_end_io = blk_copy_emulate_read_end_io;
+                       read_bio->bi_private = ctx;
+
+                       write_bio->bi_iter.bi_size = buf_len;
+                       write_bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
+                       bio_set_dev(write_bio, dst_bdev);
+                       write_bio->bi_end_io = blk_copy_emulate_write_end_io;
+                       write_bio->bi_iter.bi_sector = dst >> SECTOR_SHIFT;
+                       write_bio->bi_private = ctx;
+
+                       if (bdev_is_zoned(dst_bdev)) {
+                               INIT_WORK(&ctx->dispatch_work,
+                                       blk_zoned_copy_dispatch_work_fn);
+                               INIT_LIST_HEAD(&ctx->list);
+                               spin_lock_irqsave(&cio->list_lock, flags);
+                               ctx->status = REQ_COPY_READ_PROGRESS;
+                               list_add_tail(&ctx->list, &cio->list);
+                               spin_unlock_irqrestore(&cio->list_lock, flags);
+                       } else
+                               INIT_WORK(&ctx->dispatch_work,
+                                       blk_copy_dispatch_work_fn);
+
+                       atomic_inc(&cio->refcount);
+                       submit_bio(read_bio);
+
+                       src += buf_len;
+                       dst += buf_len;
+               }
+       }
+
+       /* Wait for completion of all IO's*/
+       return cio_await_completion(cio);
+
+err_write_bio:
+       bio_put(read_bio);
+err_read_bio:
+       kfree(ctx);
+err_ctx:
+       kvfree(buf);
+err_alloc_buf:
+       ranges[ri].comp_len -= min_t(sector_t,
+                       ranges[ri].comp_len, (ranges[ri].len - rem));
+
+       cio->io_err = ret;
+       return cio_await_completion(cio);
+}
+
 static inline bool blk_check_copy_offload(struct request_queue *src_q,
                struct request_queue *dst_q)
 {
@@ -460,15 +693,21 @@ int blkdev_issue_copy(struct block_device *src_bdev,
        struct request_queue *src_q = bdev_get_queue(src_bdev);
        struct request_queue *dst_q = bdev_get_queue(dst_bdev);
        int ret = -EINVAL;
+       bool offload = false;
 
        ret = blk_copy_sanity_check(src_bdev, dst_bdev, ranges, nr);
        if (ret)
                return ret;
 
-       if (blk_check_copy_offload(src_q, dst_q))
+       offload = blk_check_copy_offload(src_q, dst_q);
+       if (offload)
                ret = blk_copy_offload(src_bdev, dst_bdev, ranges, nr,
                                end_io, private, gfp_mask);
 
+       if (ret || !offload)
+               ret = blk_copy_emulate(src_bdev, dst_bdev, ranges, nr,
+                               end_io, private, gfp_mask);
+
        return ret;
 }
 EXPORT_SYMBOL_GPL(blkdev_issue_copy);
diff --git a/block/blk-map.c b/block/blk-map.c
index 19940c978c73..bcf8db2b75f1 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -363,7 +363,7 @@ static void bio_invalidate_vmalloc_pages(struct bio *bio)
 #endif
 }
 
-static void bio_map_kern_endio(struct bio *bio)
+void bio_map_kern_endio(struct bio *bio)
 {
        bio_invalidate_vmalloc_pages(bio);
        bio_uninit(bio);
@@ -380,7 +380,7 @@ static void bio_map_kern_endio(struct bio *bio)
  *     Map the kernel address into a bio suitable for io to a block
  *     device. Returns an error pointer in case of error.
  */
-static struct bio *bio_map_kern(struct request_queue *q, void *data,
+struct bio *bio_map_kern(struct request_queue *q, void *data,
                unsigned int len, gfp_t gfp_mask)
 {
        unsigned long kaddr = (unsigned long)data;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 48e9160b7195..c5621550e5b4 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1066,6 +1066,9 @@ int blkdev_issue_secure_erase(struct block_device *bdev, 
sector_t sector,
 int blkdev_issue_copy(struct block_device *src_bdev,
                struct block_device *dst_bdev, struct range_entry *ranges,
                int nr, cio_iodone_t end_io, void *private, gfp_t gfp_mask);
+struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
+               gfp_t gfp_mask);
+void bio_map_kern_endio(struct bio *bio);
 
 #define BLKDEV_ZERO_NOUNMAP    (1 << 0)  /* do not free blocks */
 #define BLKDEV_ZERO_NOFALLBACK (1 << 1)  /* don't write explicit zeroes */
-- 
2.35.1.500.gb896f729e2

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel

Reply via email to