Some hardware can support big block data encrytion, the original dm-crypt
only implemented the 'based-bio' things that will limit the efficiency
(only handle one bio at one time) for the big block data encryption.

This patch introduces the 'based-request' method to handle the big block,
which it can contain more than one bio at one time for dm-drypt. Now we use
a config macro to enable the 'based-request' method and to ensure the original
code can be run successfully.

Signed-off-by: Baolin Wang <baolin.w...@linaro.org>
---
 drivers/md/Kconfig    |    6 +
 drivers/md/dm-crypt.c |  831 ++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 835 insertions(+), 2 deletions(-)

diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index d5415ee..aea1db0 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -266,6 +266,12 @@ config DM_CRYPT
 
          If unsure, say N.
 
+config DM_REQ_CRYPT
+       bool "Crypt target support with request"
+       depends on BLK_DEV_DM
+       select CRYPTO
+       select CRYPTO_CBC
+
 config DM_SNAPSHOT
        tristate "Snapshot target"
        depends on BLK_DEV_DM
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index d60c88d..e21a1ed15 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -28,10 +28,13 @@
 #include <crypto/hash.h>
 #include <crypto/md5.h>
 #include <crypto/algapi.h>
+#include <linux/buffer_head.h>
 
 #include <linux/device-mapper.h>
 
 #define DM_MSG_PREFIX "crypt"
+#define DM_MAX_SG_LIST (1024)
+#define BIO_INLINE_VECS        (4)
 
 /*
  * context holding the current state of a multi-part conversion
@@ -64,10 +67,27 @@ struct dm_crypt_io {
        struct rb_node rb_node;
 } CRYPTO_MINALIGN_ATTR;
 
+struct dm_req_crypt_io {
+       struct crypt_config *cc;
+       struct work_struct work;
+       struct request *cloned_request;
+       struct convert_context ctx;
+
+       int error;
+       atomic_t pending;
+       sector_t sector;
+       struct rb_node rb_node;
+
+       bool should_encrypt;
+       bool should_decrypt;
+};
+
 struct dm_crypt_request {
        struct convert_context *ctx;
        struct scatterlist sg_in;
        struct scatterlist sg_out;
+       struct sg_table req_sgt_in;
+       struct sg_table req_sgt_out;
        sector_t iv_sector;
 };
 
@@ -127,6 +147,10 @@ struct crypt_config {
         */
        mempool_t *req_pool;
        mempool_t *page_pool;
+
+       struct kmem_cache *req_crypt_io_pool;
+       mempool_t *req_io_pool;
+
        struct bio_set *bs;
        struct mutex bio_alloc_lock;
 
@@ -184,6 +208,7 @@ struct crypt_config {
 static void clone_init(struct dm_crypt_io *, struct bio *);
 static void kcryptd_queue_crypt(struct dm_crypt_io *io);
 static u8 *iv_of_dmreq(struct crypt_config *cc, struct dm_crypt_request 
*dmreq);
+static int req_crypt_write_work(void *data);
 
 /*
  * Use this to access cipher attributes that are the same for each CPU.
@@ -1547,6 +1572,8 @@ static void crypt_dtr(struct dm_target *ti)
                mempool_destroy(cc->page_pool);
        if (cc->req_pool)
                mempool_destroy(cc->req_pool);
+       if (cc->req_io_pool)
+               mempool_destroy(cc->req_io_pool);
 
        if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
                cc->iv_gen_ops->dtr(cc);
@@ -1556,6 +1583,7 @@ static void crypt_dtr(struct dm_target *ti)
 
        kzfree(cc->cipher);
        kzfree(cc->cipher_string);
+       kmem_cache_destroy(cc->req_crypt_io_pool);
 
        /* Must zero key material before freeing */
        kzfree(cc);
@@ -1796,7 +1824,19 @@ static int crypt_ctr(struct dm_target *ti, unsigned int 
argc, char **argv)
                goto bad;
        }
 
-       cc->bs = bioset_create(MIN_IOS, 0);
+       cc->req_crypt_io_pool = KMEM_CACHE(dm_req_crypt_io, 0);
+       if (!cc->req_crypt_io_pool) {
+               ti->error = "Cannot allocate req_crypt_io_pool";
+               goto bad;
+       }
+
+       cc->req_io_pool = mempool_create_slab_pool(MIN_IOS, 
cc->req_crypt_io_pool);
+       if (!cc->req_io_pool) {
+               ti->error = "Cannot allocate request io mempool";
+               goto bad;
+       }
+
+       cc->bs = bioset_create(BIO_MAX_PAGES, 0);
        if (!cc->bs) {
                ti->error = "Cannot allocate crypt bioset";
                goto bad;
@@ -1880,7 +1920,12 @@ static int crypt_ctr(struct dm_target *ti, unsigned int 
argc, char **argv)
        init_waitqueue_head(&cc->write_thread_wait);
        cc->write_tree = RB_ROOT;
 
+#ifndef CONFIG_DM_REQ_CRYPT
        cc->write_thread = kthread_create(dmcrypt_write, cc, "dmcrypt_write");
+#else
+       cc->write_thread = kthread_create(req_crypt_write_work,
+                                         cc, "req_dmcrypt_write");
+#endif
        if (IS_ERR(cc->write_thread)) {
                ret = PTR_ERR(cc->write_thread);
                cc->write_thread = NULL;
@@ -2045,14 +2090,796 @@ static int crypt_iterate_devices(struct dm_target *ti,
        return fn(ti, cc->dev, cc->start, ti->len, data);
 }
 
+/*
+ * If bio->bi_dev is a partition, remap the location
+ */
+static inline void req_crypt_blk_partition_remap(struct bio *bio)
+{
+       struct block_device *bdev = bio->bi_bdev;
+
+       if (bio_sectors(bio) && bdev != bdev->bd_contains) {
+               struct hd_struct *p = bdev->bd_part;
+               /* Check for integer overflow, should never happen. */
+               if (p->start_sect > (UINT_MAX - bio->bi_iter.bi_sector))
+                       return;
+
+               bio->bi_iter.bi_sector += p->start_sect;
+               bio->bi_bdev = bdev->bd_contains;
+       }
+}
+
+static void req_crypt_dispatch_io(struct dm_req_crypt_io *io)
+{
+       struct request *clone = io->cloned_request;
+       struct request *rq = dm_get_orig_rq(clone);
+
+       dm_dispatch_clone_request(clone, rq);
+}
+
+static void req_crypt_free_resource(struct dm_req_crypt_io *io)
+{
+       struct crypt_config *cc = io->cc;
+       struct ablkcipher_request *req = io->ctx.req;
+       struct dm_crypt_request *dmreq = dmreq_of_req(cc, req);
+
+       if (dmreq->req_sgt_out.orig_nents > 0)
+               sg_free_table(&dmreq->req_sgt_out);
+
+       if (dmreq->req_sgt_in.orig_nents > 0)
+               sg_free_table(&dmreq->req_sgt_in);
+
+       mempool_free(req, cc->req_pool);
+       mempool_free(io, cc->req_io_pool);
+}
+
+static void req_crypt_inc_pending(struct dm_req_crypt_io *io)
+{
+       atomic_inc(&io->pending);
+}
+
+static void req_crypt_dec_pending_encrypt(struct dm_req_crypt_io *io)
+{
+       struct request *clone = io->cloned_request;
+       int error = io->error;
+
+       atomic_dec(&io->pending);
+
+       if (error < 0) {
+               dm_kill_unmapped_request(clone, error);
+               req_crypt_free_resource(io);
+       }
+}
+
+static void req_crypt_dec_pending_decrypt(struct dm_req_crypt_io *io)
+{
+       struct request *clone = io->cloned_request;
+       int error = io->error;
+
+       atomic_dec(&io->pending);
+
+       dm_end_request(clone, error);
+       req_crypt_free_resource(io);
+}
+
+/*
+ * This callback is called by the worker queue to perform non-decrypt writes
+ * and use the dm function to complete the bios and requests.
+ */
+static void req_crypt_write_plain(struct dm_req_crypt_io *io)
+{
+       io->error = 0;
+       req_crypt_dispatch_io(io);
+}
+
+/*
+ * This callback is called by the worker queue to perform non-decrypt reads
+ * and use the dm function to complete the bios and requests.
+ */
+static void req_crypt_read_plain(struct dm_req_crypt_io *io)
+{
+       struct crypt_config *cc = io->cc;
+       struct request *clone = io->cloned_request;
+
+       dm_end_request(clone, 0);
+       mempool_free(io, cc->req_io_pool);
+}
+
+#define req_crypt_io_from_node(node) rb_entry((node), struct dm_req_crypt_io, 
rb_node)
+static int req_crypt_write_work(void *data)
+{
+       struct crypt_config *cc = data;
+       struct dm_req_crypt_io *io;
+
+       while (1) {
+               struct rb_root write_tree;
+               struct blk_plug plug;
+               DECLARE_WAITQUEUE(wait, current);
+
+               spin_lock_irq(&cc->write_thread_wait.lock);
+
+continue_locked:
+               if (!RB_EMPTY_ROOT(&cc->write_tree))
+                       goto pop_from_list;
+
+               __set_current_state(TASK_INTERRUPTIBLE);
+               __add_wait_queue(&cc->write_thread_wait, &wait);
+
+               spin_unlock_irq(&cc->write_thread_wait.lock);
+
+               if (unlikely(kthread_should_stop())) {
+                       set_task_state(current, TASK_RUNNING);
+                       remove_wait_queue(&cc->write_thread_wait, &wait);
+                       break;
+               }
+
+               schedule();
+
+               set_task_state(current, TASK_RUNNING);
+               spin_lock_irq(&cc->write_thread_wait.lock);
+               __remove_wait_queue(&cc->write_thread_wait, &wait);
+               goto continue_locked;
+
+pop_from_list:
+               write_tree = cc->write_tree;
+               cc->write_tree = RB_ROOT;
+               spin_unlock_irq(&cc->write_thread_wait.lock);
+
+               BUG_ON(rb_parent(write_tree.rb_node));
+
+               blk_start_plug(&plug);
+               do {
+                       io = req_crypt_io_from_node(rb_first(&write_tree));
+                       rb_erase(&io->rb_node, &write_tree);
+                       req_crypt_dispatch_io(io);
+               } while (!RB_EMPTY_ROOT(&write_tree));
+               blk_finish_plug(&plug);
+       }
+
+       return 0;
+}
+
+static void req_crypt_write_io_submit(struct dm_req_crypt_io *io, int async)
+{
+       struct crypt_config *cc = io->cc;
+       unsigned long flags;
+       sector_t sector;
+       struct rb_node **rbp, *parent;
+
+       if (io->error < 0)
+               return;
+
+       if (likely(!async) && test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags)) {
+               req_crypt_dispatch_io(io);
+               return;
+       }
+
+       spin_lock_irqsave(&cc->write_thread_wait.lock, flags);
+       rbp = &cc->write_tree.rb_node;
+       parent = NULL;
+       sector = io->sector;
+
+       while (*rbp) {
+               parent = *rbp;
+               if (sector < req_crypt_io_from_node(parent)->sector)
+                       rbp = &(*rbp)->rb_left;
+               else
+                       rbp = &(*rbp)->rb_right;
+       }
+
+       rb_link_node(&io->rb_node, parent, rbp);
+       rb_insert_color(&io->rb_node, &cc->write_tree);
+
+       wake_up_locked(&cc->write_thread_wait);
+       spin_unlock_irqrestore(&cc->write_thread_wait.lock, flags);
+}
+
+/*
+ * Cipher complete callback, this is triggered by the linux crypto api once
+ * the operation is done. This signals the waiting thread that the crypto
+ * operation is complete.
+ */
+static void req_crypt_cipher_complete(struct crypto_async_request *req, int 
err)
+{
+       struct dm_crypt_request *dmreq = req->data;
+       struct convert_context *ctx = dmreq->ctx;
+       struct dm_req_crypt_io *io =
+               container_of(ctx, struct dm_req_crypt_io, ctx);
+       struct crypt_config *cc = io->cc;
+
+       if (err == -EINPROGRESS)
+               return;
+
+       io->error = err;
+       atomic_dec(&io->ctx.cc_pending);
+       complete(&io->ctx.restart);
+
+       if (!err && cc->iv_gen_ops && cc->iv_gen_ops->post)
+               err = cc->iv_gen_ops->post(cc, iv_of_dmreq(cc, dmreq), dmreq);
+}
+
+static int req_crypt_alloc_req(struct crypt_config *cc,
+                               struct convert_context *ctx)
+{
+       /* TODO: need to reconsider and modify here */
+       unsigned int key_index = ctx->cc_sector & (cc->tfms_count - 1);
+       struct dm_crypt_request *dmreq;
+
+       ctx->req = mempool_alloc(cc->req_pool, GFP_NOIO);
+       if (!ctx->req)
+               return -ENOMEM;
+
+       dmreq = dmreq_of_req(cc, ctx->req);
+       dmreq->req_sgt_in.orig_nents = 0;
+       dmreq->req_sgt_out.orig_nents = 0;
+
+       crypto_ablkcipher_clear_flags(cc->tfms[key_index], ~0);
+       ablkcipher_request_set_tfm(ctx->req, cc->tfms[key_index]);
+
+       /*
+        * Use REQ_MAY_BACKLOG so a cipher driver internally backlogs
+        * requests if driver request queue is full.
+        */
+       ablkcipher_request_set_callback(ctx->req,
+           CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
+           req_crypt_cipher_complete, dmreq_of_req(cc, ctx->req));
+
+       return 0;
+}
+
+/*
+ * Free the pages that used to allacation for write operation, also it
+ * will free the bvec if there are.
+ */
+static void req_crypt_free_pages(struct crypt_config *cc, struct request 
*clone)
+{
+       struct req_iterator iter;
+       struct bio_vec bvec;
+       struct bio *bio_t;
+       int nr_iovecs = 0;
+
+       rq_for_each_segment(bvec, clone, iter) {
+               if (bvec.bv_offset == 0 && bvec.bv_page)
+                       mempool_free(bvec.bv_page, cc->page_pool);
+               bvec.bv_page = NULL;
+       }
+
+       __rq_for_each_bio(bio_t, clone) {
+               nr_iovecs = bio_t->bi_max_vecs;
+               if (nr_iovecs > BIO_INLINE_VECS) {
+                       BIO_BUG_ON(BIO_POOL_IDX(bio_t) >= BIOVEC_NR_POOLS);
+                       bvec_free(cc->bs->bvec_pool, bio_t->bi_io_vec,
+                                 BIO_POOL_IDX(bio_t));
+               }
+       }
+}
+
+/*
+ * Allocate the pages for write operation.
+ */
+static int req_crypt_alloc_pages(struct crypt_config *cc, struct request 
*clone)
+{
+       gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM;
+       struct page *page = NULL;
+       struct bio_vec *bvl = NULL;
+       struct bio_vec *bv = NULL;
+       struct bio *bio_t = NULL;
+       unsigned long idx = BIO_POOL_NONE;
+       struct bio_vec bvec;
+       struct bvec_iter biter;
+       int nr_iovecs = 0, i = 0, remaining_size = 0;
+
+       /*
+        * When clone the request, it will not copy the bi_vcnt and
+        * bi_max_vecs of one bio, so we should set it here.
+        */
+       __rq_for_each_bio(bio_t, clone) {
+               nr_iovecs = 0;
+               bio_for_each_segment(bvec, bio_t, biter)
+                       nr_iovecs++;
+               bio_t->bi_vcnt = bio_t->bi_max_vecs = nr_iovecs;
+       }
+
+       /*
+        * When clone the original request, it will also clone the bios of
+        * the original request. But it will not copy the pages which the
+        * original bios are pointing to and the cloned bios just point
+        * same page. So here we need to allocate some new pages for the
+        * clone bios to encrypto system.
+        */
+       __rq_for_each_bio(bio_t, clone) {
+               nr_iovecs = bio_t->bi_max_vecs;
+               if (nr_iovecs > BIO_INLINE_VECS)
+                       bvl = bvec_alloc(GFP_NOIO, nr_iovecs,
+                                        &idx, cc->bs->bvec_pool);
+               else if (nr_iovecs)
+                       bvl = bio_t->bi_inline_vecs;
+
+               if (!bvl)
+                       return -ENOMEM;
+
+               memcpy(bvl, bio_t->bi_io_vec,
+                      nr_iovecs * sizeof(struct bio_vec));
+               bio_t->bi_max_vecs = nr_iovecs;
+               bio_t->bi_io_vec = bvl;
+               if (idx < BIO_POOL_NONE) {
+                       bio_t->bi_flags &= ~(BIO_POOL_NONE << BIO_POOL_OFFSET);
+                       bio_t->bi_flags |= idx << BIO_POOL_OFFSET;
+               }
+       }
+
+       __rq_for_each_bio(bio_t, clone) {
+               bio_for_each_segment_all(bv, bio_t, i) {
+                       if (bv->bv_len > remaining_size) {
+                               page = NULL;
+                               while (page == NULL) {
+                                       page = mempool_alloc(cc->page_pool,
+                                                            gfp_mask);
+                                       if (!page) {
+                                               DMERR("%s page alloc failed",
+                                                     __func__);
+                                               congestion_wait(BLK_RW_ASYNC,
+                                                               HZ/100);
+                                       }
+                               }
+
+                               bv->bv_page = page;
+                               bv->bv_offset = 0;
+                               remaining_size = PAGE_SIZE - bv->bv_len;
+                               if (remaining_size < 0)
+                                       BUG();
+                       } else {
+                               bv->bv_page = page;
+                               bv->bv_offset = PAGE_SIZE - remaining_size;
+                               remaining_size = remaining_size - bv->bv_len;
+                       }
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * Check how many sg entry numbers are needed when map one request
+ * with scatterlist in advance.
+ */
+static unsigned int req_crypt_clone_sg_entry(struct request *clone)
+{
+       struct request_queue *q = clone->q;
+       struct bio_vec bvec, bvprv = { NULL };
+       struct bio *bio_t = NULL;
+       struct bvec_iter biter;
+       unsigned int nbytes, sg_length, sg_cnt = 0;
+
+       __rq_for_each_bio(bio_t, clone) {
+               sg_length = 0;
+               bio_for_each_segment(bvec, bio_t, biter) {
+                       nbytes = bvec.bv_len;
+                       if (sg_length + nbytes > queue_max_segment_size(q)) {
+                               sg_length = 0;
+                               sg_cnt++;
+                               goto next;
+                       }
+
+                       if (!BIOVEC_PHYS_MERGEABLE(&bvprv, &bvec)) {
+                               sg_length = 0;
+                               sg_cnt++;
+                               goto next;
+                       }
+
+                       if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bvec)) {
+                               sg_length = 0;
+                               sg_cnt++;
+                               goto next;
+                       }
+
+                       sg_length += nbytes;
+next:
+                       memcpy(&bvprv, &bvec, sizeof(struct bio_vec));
+               }
+       }
+
+       return sg_cnt;
+}
+
+static int req_crypt_convert_block(struct crypt_config *cc,
+                                  struct request *clone,
+                                  struct convert_context *ctx)
+{
+       struct ablkcipher_request *req = ctx->req;
+       struct dm_crypt_request *dmreq = dmreq_of_req(cc, req);
+       u8 *iv = iv_of_dmreq(cc, dmreq);
+       struct scatterlist *req_sg_in = NULL;
+       struct scatterlist *req_sg_out = NULL;
+       unsigned int total_sg_len_req_in = 0;
+       unsigned int total_sg_len_req_out = 0;
+       unsigned int total_bytes_in_req = 0;
+       unsigned int sg_in_max = 0, sg_out_max = 0;
+       int ret;
+
+       dmreq->iv_sector = ctx->cc_sector;
+       dmreq->ctx = ctx;
+       atomic_set(&ctx->cc_pending, 1);
+
+       /*
+        * Need to calculate how many sg entry need to be used
+        * for this clone.
+        */
+       sg_in_max = req_crypt_clone_sg_entry(clone) + 1;
+       if (sg_in_max > DM_MAX_SG_LIST || sg_in_max <= 0) {
+               DMERR("%s sg entry too large or none %d\n",
+                     __func__, sg_in_max);
+               return -EINVAL;
+       } else if (sg_in_max == 2) {
+               req_sg_in = &dmreq->sg_in;
+       }
+
+       if (!req_sg_in) {
+               ret = sg_alloc_table(&dmreq->req_sgt_in,
+                                    sg_in_max, GFP_KERNEL);
+               if (ret) {
+                       DMERR("%s sg in allocation failed\n", __func__);
+                       return -ENOMEM;
+               }
+
+               req_sg_in = dmreq->req_sgt_in.sgl;
+       }
+
+       total_sg_len_req_in = blk_rq_map_sg(clone->q, clone, req_sg_in);
+       if ((total_sg_len_req_in <= 0)
+           || (total_sg_len_req_in > sg_in_max)) {
+               DMERR("%s in sg map error %d\n", __func__, total_sg_len_req_in);
+               return -EINVAL;
+       }
+
+       total_bytes_in_req = clone->__data_len;
+
+       if (rq_data_dir(clone) == READ)
+               goto set_crypt;
+
+       ret = req_crypt_alloc_pages(cc, clone);
+       if (ret < 0) {
+               DMERR("%s alloc request pages failed\n", __func__);
+               return -ENOMEM;
+       }
+
+       sg_out_max = req_crypt_clone_sg_entry(clone) + 1;
+       if (sg_out_max > DM_MAX_SG_LIST || sg_out_max <= 0) {
+               DMERR("%s sg entry too large or none %d\n",
+                     __func__, sg_out_max);
+               return -EINVAL;
+       } else if (sg_out_max == 2) {
+               req_sg_out = &dmreq->sg_out;
+       }
+
+       if (!req_sg_out) {
+               ret = sg_alloc_table(&dmreq->req_sgt_out,
+                                    sg_out_max, GFP_KERNEL);
+               if (ret) {
+                       DMERR("%s sg out allocation failed\n", __func__);
+                       return -ENOMEM;
+               }
+
+               req_sg_out = dmreq->req_sgt_out.sgl;
+       }
+
+       total_sg_len_req_out = blk_rq_map_sg(clone->q, clone, req_sg_out);
+       if ((total_sg_len_req_out <= 0) ||
+           (total_sg_len_req_out > sg_out_max)) {
+               DMERR("%s out sg map error %d\n",
+                     __func__, total_sg_len_req_out);
+               return -EINVAL;
+       }
+
+set_crypt:
+       if (cc->iv_gen_ops) {
+               ret = cc->iv_gen_ops->generator(cc, iv, dmreq);
+               if (ret < 0) {
+                       DMERR("%s generator iv error %d\n", __func__, ret);
+                       return ret;
+               }
+       }
+
+       atomic_inc(&ctx->cc_pending);
+
+       if (rq_data_dir(clone) == WRITE) {
+               ablkcipher_request_set_crypt(req, req_sg_in,
+                       req_sg_out, total_bytes_in_req, iv);
+
+               ret = crypto_ablkcipher_encrypt(req);
+       } else {
+               ablkcipher_request_set_crypt(req, req_sg_in,
+                       req_sg_in, total_bytes_in_req, iv);
+
+               ret = crypto_ablkcipher_decrypt(req);
+       }
+
+       if (!ret && cc->iv_gen_ops && cc->iv_gen_ops->post)
+               ret = cc->iv_gen_ops->post(cc, iv, dmreq);
+
+       return ret;
+}
+
+static void req_crypt_write_convert(struct dm_req_crypt_io *io)
+{
+       struct request *clone = io->cloned_request;
+       struct bio *bio_src = NULL;
+       struct crypt_config *cc = io->cc;
+       int crypt_finished;
+       int ret = 0, err = 0;
+
+       req_crypt_inc_pending(io);
+
+       crypt_convert_init(cc, &io->ctx, NULL, NULL, io->sector);
+       req_crypt_alloc_req(cc, &io->ctx);
+
+       ret = req_crypt_convert_block(cc, clone, &io->ctx);
+       switch (ret) {
+       case 0:
+               atomic_dec(&io->ctx.cc_pending);
+               break;
+       case -EBUSY:
+               /*
+                * Lets make this synchronous request by waiting on
+                * in progress as well
+                */
+       case -EINPROGRESS:
+               wait_for_completion_io(&io->ctx.restart);
+               if (io->error) {
+                       err = -EIO;
+                       goto crypt_error;
+               }
+               break;
+       default:
+               err = -EIO;
+               atomic_dec(&io->ctx.cc_pending);
+               break;
+       }
+
+       __rq_for_each_bio(bio_src, clone)
+               blk_queue_bounce(clone->q, &bio_src);
+
+crypt_error:
+       if (err == -EIO)
+               req_crypt_free_pages(cc, clone);
+
+       if (io)
+               io->error = err;
+
+       /* Encryption was already finished, submit io now */
+       crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending);
+       if (crypt_finished)
+               req_crypt_write_io_submit(io, 0);
+       else
+               io->error = -EIO;
+
+       req_crypt_dec_pending_encrypt(io);
+}
+
+static void req_crypt_read_convert(struct dm_req_crypt_io *io)
+{
+       struct crypt_config *cc = io->cc;
+       struct request *clone = io->cloned_request;
+       int ret = 0, err = 0;
+
+       req_crypt_inc_pending(io);
+
+       /* io->sector need to be initilized */
+       crypt_convert_init(cc, &io->ctx, NULL, NULL, io->sector);
+       req_crypt_alloc_req(cc, &io->ctx);
+
+       ret = req_crypt_convert_block(cc, clone, &io->ctx);
+       switch (ret) {
+       case 0:
+               atomic_dec(&io->ctx.cc_pending);
+               break;
+       case -EBUSY:
+               /*
+                * Lets make this synchronous request by waiting on
+                * in progress as well
+                */
+       case -EINPROGRESS:
+               wait_for_completion_io(&io->ctx.restart);
+               if (io->error)
+                       err = -EIO;
+               break;
+       default:
+               err = -EIO;
+               atomic_dec(&io->ctx.cc_pending);
+               break;
+       }
+
+       if (io)
+               io->error = err;
+
+       if (!atomic_dec_and_test(&io->ctx.cc_pending))
+               DMWARN("%s decryption was not finished\n", __func__);
+
+       req_crypt_dec_pending_decrypt(io);
+}
+
+/* Queue callback function that will get triggered */
+static void req_crypt_work(struct work_struct *work)
+{
+       struct dm_req_crypt_io *io =
+                       container_of(work, struct dm_req_crypt_io, work);
+
+       if (rq_data_dir(io->cloned_request) == WRITE) {
+               if (io->should_encrypt)
+                       req_crypt_write_convert(io);
+               else
+                       req_crypt_write_plain(io);
+       } else if (rq_data_dir(io->cloned_request) == READ) {
+               if (io->should_decrypt)
+                       req_crypt_read_convert(io);
+               else
+                       req_crypt_read_plain(io);
+       } else {
+               DMERR("%s received non-write request for clone 0x%p\n",
+                     __func__, io->cloned_request);
+       }
+}
+
+static void req_crypt_queue(struct dm_req_crypt_io *io)
+{
+       struct crypt_config *cc = io->cc;
+
+       INIT_WORK(&io->work, req_crypt_work);
+       queue_work(cc->crypt_queue, &io->work);
+}
+
+static bool req_crypt_should_encrypt(struct dm_req_crypt_io *req)
+{
+       if (!req || !req->cloned_request || !req->cloned_request->bio)
+               return false;
+
+       /* Maybe there are some others to be considerated */
+       return true;
+}
+
+static bool req_crypt_should_deccrypt(struct dm_req_crypt_io *req)
+{
+       if (!req || !req->cloned_request || !req->cloned_request->bio)
+               return false;
+
+       /* Maybe there are some others to be considerated */
+       return true;
+}
+
+static void crypt_req_io_init(struct dm_req_crypt_io *io,
+                             struct crypt_config *cc,
+                             struct request *clone,
+                             sector_t sector)
+{
+       io->cc = cc;
+       io->sector = sector;
+       io->cloned_request = clone;
+       io->error = 0;
+       io->ctx.req = NULL;
+       atomic_set(&io->pending, 0);
+
+       if (rq_data_dir(clone) == WRITE)
+               io->should_encrypt = req_crypt_should_encrypt(io);
+       else if (rq_data_dir(clone) == READ)
+               io->should_decrypt = req_crypt_should_deccrypt(io);
+       else
+               io->should_decrypt = 0;
+}
+
+/*
+ * This function is called with interrupts disabled
+ * The function remaps the clone for the underlying device.
+ * If it is a write request, it calls into the worker queue to
+ * encrypt the data
+ * and submit the request directly using the elevator
+ * For a read request no pre-processing is required the request
+ * is returned to dm once mapping is done
+ */
+static int req_crypt_map(struct dm_target *ti, struct request *clone,
+                        union map_info *map_context)
+{
+       struct crypt_config *cc = ti->private;
+       int copy_bio_sector_to_req = 0;
+       struct dm_req_crypt_io *req_io;
+       struct bio *bio_src;
+
+       if ((rq_data_dir(clone) != READ) && (rq_data_dir(clone) != WRITE)) {
+               DMERR("%s unknown request.\n", __func__);
+               return -EINVAL;
+       }
+
+       req_io = mempool_alloc(cc->req_io_pool, GFP_NOWAIT);
+       if (!req_io) {
+               DMERR("%s req io allocation failed.\n", __func__);
+               return -ENOMEM;
+       }
+
+       map_context->ptr = req_io;
+
+       /* Get the queue of the underlying original device */
+       clone->q = bdev_get_queue(cc->dev->bdev);
+       clone->rq_disk = cc->dev->bdev->bd_disk;
+
+       __rq_for_each_bio(bio_src, clone) {
+               bio_src->bi_bdev = cc->dev->bdev;
+               /*
+                * If request is REQ_FLUSH or REQ_DISCARD, just bypass crypt
+                * queues. It will free the bios of the request in block layer
+                * when completing the bypass if the request is REQ_FLUSH or
+                * REQ_DISCARD.
+                */
+               if (clone->cmd_flags & REQ_DISCARD
+                   || clone->cmd_flags & REQ_FLUSH)
+                       continue;
+
+               bio_set_flag(bio_src, BIO_ENDIO_FREE);
+
+               /*
+                * If this device has partitions, remap block n
+                * of partition p to block n+start(p) of the disk.
+                */
+               req_crypt_blk_partition_remap(bio_src);
+               if (copy_bio_sector_to_req == 0) {
+                       clone->__sector = bio_src->bi_iter.bi_sector;
+                       copy_bio_sector_to_req++;
+               }
+               blk_queue_bounce(clone->q, &bio_src);
+       }
+
+       crypt_req_io_init(req_io, cc, clone,
+                         dm_target_offset(ti, clone->__sector));
+
+       if (rq_data_dir(clone) == READ) {
+               return DM_MAPIO_REMAPPED;
+       } else if (rq_data_dir(clone) == WRITE) {
+               req_crypt_queue(req_io);
+               return DM_MAPIO_SUBMITTED;
+       }
+
+       return -EINVAL;
+}
+
+/*
+ * The endio function is called from ksoftirqd context (atomic).
+ * For write operations the new pages created form the mempool
+ * is freed and returned.  * For read operations, decryption is
+ * required, since this is called in a atomic  * context, the
+ * request is sent to a worker queue to complete decryption and
+ * free the request once done.
+ */
+static int req_crypt_endio(struct dm_target *ti, struct request *clone,
+                          int error, union map_info *map_context)
+{
+       struct dm_req_crypt_io *req_io = map_context->ptr;
+       struct crypt_config *cc = ti->private;
+       int ret = 0;
+
+       /* If it is a write request, do nothing just return. */
+       if (rq_data_dir(clone) == WRITE) {
+               if (req_io->should_encrypt)
+                       req_crypt_free_pages(cc, clone);
+               req_crypt_free_resource(req_io);
+       } else if (rq_data_dir(clone) == READ) {
+               req_io->error = error;
+               req_crypt_queue(req_io);
+               ret = DM_ENDIO_INCOMPLETE;
+       }
+
+       return ret;
+}
+
 static struct target_type crypt_target = {
        .name   = "crypt",
        .version = {1, 14, 0},
        .module = THIS_MODULE,
        .ctr    = crypt_ctr,
        .dtr    = crypt_dtr,
-       .map    = crypt_map,
        .status = crypt_status,
+#ifndef CONFIG_DM_REQ_CRYPT
+       .map    = crypt_map,
+#else
+       .map_rq = req_crypt_map,
+       .rq_end_io = req_crypt_endio,
+#endif
        .postsuspend = crypt_postsuspend,
        .preresume = crypt_preresume,
        .resume = crypt_resume,
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to