Provide an IO extension handler that attaches PI data from the io
extension structure to a kiocb, then teach directio how to attach the
pages representing the PI buffer directly to a bio.

Signed-off-by: Darrick J. Wong <darrick.w...@oracle.com>
---
 Documentation/block/data-integrity.txt |   11 ++++
 fs/aio.c                               |   62 +++++++++++++++++++++
 fs/bio-integrity.c                     |   94 +++++++++++++++++++++++++++++++-
 fs/direct-io.c                         |   70 +++++++++++++++++++-----
 include/linux/aio.h                    |   10 +++
 include/linux/bio.h                    |   15 +++++
 include/uapi/linux/aio_abi.h           |    6 ++
 mm/filemap.c                           |    6 ++
 8 files changed, 259 insertions(+), 15 deletions(-)


diff --git a/Documentation/block/data-integrity.txt 
b/Documentation/block/data-integrity.txt
index 2d735b0a..1d1f070 100644
--- a/Documentation/block/data-integrity.txt
+++ b/Documentation/block/data-integrity.txt
@@ -282,6 +282,17 @@ will require extra work due to the application tag.
       It is up to the receiver to process them and verify data
       integrity upon completion.
 
+    int bio_integrity_prep_buffer(struct bio *bio, int rw,
+                                 struct bio_integrity_prep_iter *pi);
+
+      This function should be called before submit_bio; its purpose is to
+      attach an arbitrary array of struct page * containing integrity data
+      to an existing bio.  Primarily this is intended for AIO/DIO to be
+      able to attach a userspace buffer to a bio.
+
+      The bio_integrity_prep_iter should contain the page offset and buffer
+      length of the PI buffer, the number of pages, and the actual array of
+      pages, as returned by get_user_pages.
 
 5.4 REGISTERING A BLOCK DEVICE AS CAPABLE OF EXCHANGING INTEGRITY
     METADATA
diff --git a/fs/aio.c b/fs/aio.c
index 0c40bdc..3f932c3 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1379,7 +1379,69 @@ struct io_extension_type {
        int (*destroy_fn)(struct kiocb *);
 };
 
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+static int destroy_pi_ext(struct kiocb *req)
+{
+       unsigned int i;
+
+       if (req->ki_ioext->ke_pi_iter.pi_userpages == NULL)
+               return 0;
+
+       for (i = 0; i < req->ki_ioext->ke_pi_iter.pi_nrpages; i++)
+               page_cache_release(req->ki_ioext->ke_pi_iter.pi_userpages[i]);
+       kfree(req->ki_ioext->ke_pi_iter.pi_userpages);
+       req->ki_ioext->ke_pi_iter.pi_userpages = NULL;
+
+       return 0;
+}
+
+static int setup_pi_ext(struct kiocb *req, int is_write)
+{
+       struct file *file = req->ki_filp;
+       struct io_extension *ext = &req->ki_ioext->ke_kern;
+       void *p;
+       unsigned long start, end;
+       int retval;
+
+       if (!(file->f_flags & O_DIRECT)) {
+               pr_debug("EINVAL: can't use PI without O_DIRECT.\n");
+               return -EINVAL;
+       }
+
+       BUG_ON(req->ki_ioext->ke_pi_iter.pi_userpages);
+
+       end = (((unsigned long)ext->ie_pi_buf) + ext->ie_pi_buflen +
+               PAGE_SIZE - 1) >> PAGE_SHIFT;
+       start = ((unsigned long)ext->ie_pi_buf) >> PAGE_SHIFT;
+       req->ki_ioext->ke_pi_iter.pi_offset = offset_in_page(ext->ie_pi_buf);
+       req->ki_ioext->ke_pi_iter.pi_len = ext->ie_pi_buflen;
+       req->ki_ioext->ke_pi_iter.pi_nrpages = end - start;
+       p = kzalloc(req->ki_ioext->ke_pi_iter.pi_nrpages *
+                   sizeof(struct page *),
+                   GFP_NOIO);
+       if (p == NULL) {
+               pr_err("%s: no room for page array?\n", __func__);
+               return -ENOMEM;
+       }
+       req->ki_ioext->ke_pi_iter.pi_userpages = p;
+
+       retval = get_user_pages_fast((unsigned long)ext->ie_pi_buf,
+                                    req->ki_ioext->ke_pi_iter.pi_nrpages,
+                                    is_write,
+                                    req->ki_ioext->ke_pi_iter.pi_userpages);
+       if (retval != req->ki_ioext->ke_pi_iter.pi_nrpages) {
+               pr_err("%s: couldn't map pages?\n", __func__);
+               req->ki_ioext->ke_pi_iter.pi_nrpages = retval;
+               return -ENOMEM;
+       }
+       req->ki_flags |= KIOCB_DIO_ONLY;
+
+       return 0;
+}
+#endif
+
 static struct io_extension_type extensions[] = {
+       {IO_EXT_PI, IO_EXT_SIZE(ie_pi_ret), setup_pi_ext, destroy_pi_ext},
        {IO_EXT_INVALID, 0, NULL, NULL},
 };
 
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 413312f..3df9aeb 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -138,7 +138,7 @@ int bio_integrity_add_page(struct bio *bio, struct page 
*page,
        struct bio_vec *iv;
 
        if (bip->bip_vcnt >= bip_integrity_vecs(bip)) {
-               printk(KERN_ERR "%s: bip_vec full\n", __func__);
+               pr_err("%s: bip_vec full\n", __func__);
                return 0;
        }
 
@@ -250,7 +250,7 @@ static int bio_integrity_tag(struct bio *bio, void 
*tag_buf, unsigned int len,
                                        DIV_ROUND_UP(len, bi->tag_size));
 
        if (nr_sectors * bi->tuple_size > bip->bip_iter.bi_size) {
-               printk(KERN_ERR "%s: tag too big for bio: %u > %u\n", __func__,
+               pr_err("%s: tag too big for bio: %u > %u\n", __func__,
                       nr_sectors * bi->tuple_size, bip->bip_iter.bi_size);
                return -1;
        }
@@ -375,6 +375,96 @@ static inline unsigned short 
blk_integrity_tuple_size(struct blk_integrity *bi)
 }
 
 /**
+ * bio_integrity_prep_buffer - Prepare bio for integrity I/O
+ * @bio:       bio to prepare
+ * @rw:                data direction for the bio
+ * @pi:                pi data to attach to bio
+ *
+ * Description: Allocates a buffer for integrity metadata, maps the
+ * pages and attaches them to a bio.  The bio must have target device
+ * and start sector set prior to calling.  The pages specified in the
+ * @pi argument should contain integrity metadata in the WRITE case,
+ * and should be ready to receive metadata in the READ case.
+ */
+int bio_integrity_prep_buffer(struct bio *bio, int rw,
+                             struct bio_integrity_prep_iter *pi)
+{
+       struct bio_integrity_payload *bip;
+       struct blk_integrity *bi;
+       unsigned long start, end;
+       unsigned int len, nr_pages;
+       unsigned int bytes, i;
+       unsigned int sectors;
+       int ret;
+
+       bi = bdev_get_integrity(bio->bi_bdev);
+       BUG_ON(bi == NULL);
+       BUG_ON(bio_integrity(bio));
+
+       sectors = bio_integrity_hw_sectors(bi, bio_sectors(bio));
+
+       /* Allocate kernel buffer for protection data */
+       len = sectors * blk_integrity_tuple_size(bi);
+       end = (pi->pi_offset + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+       start = pi->pi_offset >> PAGE_SHIFT;
+       nr_pages = end - start;
+
+       if (pi->pi_len < len) {
+               pr_err("%s: not enough space left in buffer!\n", __func__);
+               return -ENOMEM;
+       }
+
+       /* Allocate bio integrity payload and integrity vectors */
+       bip = bio_integrity_alloc(bio, GFP_NOIO, nr_pages);
+       if (unlikely(bip == NULL)) {
+               pr_err("could not allocate data integrity bioset\n");
+               return -EIO;
+       }
+
+       bip->bip_owns_buf = 0;
+       bip->bip_buf = NULL;
+       bip->bip_iter.bi_size = len;
+       bip->bip_iter.bi_sector = bio->bi_iter.bi_sector;
+
+       /* Map it */
+       for (i = 0 ; i < nr_pages ; i++) {
+               bytes = PAGE_SIZE - pi->pi_offset;
+
+               if (bytes > pi->pi_len)
+                       bytes = pi->pi_len;
+               if (bytes > len)
+                       bytes = len;
+               if (pi->pi_len <= 0 || len == 0)
+                       break;
+
+               ret = bio_integrity_add_page(bio, *pi->pi_userpages,
+                                            bytes, pi->pi_offset);
+
+               if (ret == 0)
+                       return -EIO;
+
+               if (ret < bytes)
+                       break;
+
+               len -= bytes;
+               pi->pi_len -= bytes;
+               if (pi->pi_offset + bytes == PAGE_SIZE)
+                       pi->pi_userpages++;
+               pi->pi_offset = (pi->pi_offset + bytes) % PAGE_SIZE;
+       }
+
+       /* Install custom I/O completion handler if read verify is enabled */
+       if ((rw & WRITE) == READ) {
+               bip->bip_end_io = bio->bi_end_io;
+               bio->bi_end_io = bio_integrity_endio;
+               ret = 0;
+       }
+
+       return ret;
+}
+EXPORT_SYMBOL(bio_integrity_prep_buffer);
+
+/**
  * bio_integrity_prep - Prepare bio for integrity I/O
  * @bio:       bio to prepare
  *
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 160a548..3f591f8 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -111,6 +111,10 @@ struct dio_submit {
         */
        unsigned head;                  /* next page to process */
        unsigned tail;                  /* last valid page + 1 */
+
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+       struct bio_integrity_prep_iter  pi_iter;
+#endif
 };
 
 /* dio_state communicated between submission path and end_io */
@@ -221,6 +225,7 @@ static inline struct page *dio_get_page(struct dio *dio,
        return dio->pages[sdio->head++];
 }
 
+
 /**
  * dio_complete() - called when all DIO BIO I/O has been completed
  * @offset: the byte offset in the file of the completed operation
@@ -385,6 +390,22 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
        sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
 }
 
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+static int dio_prep_pi_buffers(struct dio *dio, struct dio_submit *sdio)
+{
+       struct bio *bio = sdio->bio;
+       if (sdio->pi_iter.pi_userpages == NULL || !bio_integrity_enabled(bio))
+               return 0;
+
+       return bio_integrity_prep_buffer(bio, dio->rw, &sdio->pi_iter);
+}
+#else
+static int dio_prep_pi_buffers(struct dio *dio, struct dio_submit *sdio)
+{
+       return 0;
+}
+#endif
+
 /*
  * In the AIO read case we speculatively dirty the pages before starting IO.
  * During IO completion, any of these pages which happen to have been written
@@ -392,13 +413,18 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
  *
  * bios hold a dio reference between submit_bio and ->end_io.
  */
-static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
+static inline int dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
 {
        struct bio *bio = sdio->bio;
        unsigned long flags;
+       int ret = 0;
 
        bio->bi_private = dio;
 
+       ret = dio_prep_pi_buffers(dio, sdio);
+       if (ret)
+               return ret;
+
        spin_lock_irqsave(&dio->bio_lock, flags);
        dio->refcount++;
        spin_unlock_irqrestore(&dio->bio_lock, flags);
@@ -415,6 +441,8 @@ static inline void dio_bio_submit(struct dio *dio, struct 
dio_submit *sdio)
        sdio->bio = NULL;
        sdio->boundary = 0;
        sdio->logical_offset_in_bio = 0;
+
+       return ret;
 }
 
 /*
@@ -736,8 +764,11 @@ static inline int dio_send_cur_page(struct dio *dio, 
struct dio_submit *sdio,
                 * have.
                 */
                if (sdio->final_block_in_bio != sdio->cur_page_block ||
-                   cur_offset != bio_next_offset)
-                       dio_bio_submit(dio, sdio);
+                   cur_offset != bio_next_offset) {
+                       ret = dio_bio_submit(dio, sdio);
+                       if (ret)
+                               goto out;
+               }
        }
 
        if (sdio->bio == NULL) {
@@ -747,7 +778,9 @@ static inline int dio_send_cur_page(struct dio *dio, struct 
dio_submit *sdio,
        }
 
        if (dio_bio_add_page(sdio) != 0) {
-               dio_bio_submit(dio, sdio);
+               ret = dio_bio_submit(dio, sdio);
+               if (ret)
+                       goto out;
                ret = dio_new_bio(dio, sdio, sdio->cur_page_block, map_bh);
                if (ret == 0) {
                        ret = dio_bio_add_page(sdio);
@@ -823,8 +856,12 @@ out:
         * avoid metadata seeks.
         */
        if (sdio->boundary) {
+               int ret2;
+
                ret = dio_send_cur_page(dio, sdio, map_bh);
-               dio_bio_submit(dio, sdio);
+               ret2 = dio_bio_submit(dio, sdio);
+               if (ret == 0)
+                       ret = ret2;
                page_cache_release(sdio->cur_page);
                sdio->cur_page = NULL;
        }
@@ -1120,7 +1157,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct 
inode *inode,
        unsigned blocksize_mask = (1 << blkbits) - 1;
        ssize_t retval = -EINVAL;
        loff_t end = offset;
-       struct dio *dio;
+       struct dio *dio = NULL;
        struct dio_submit sdio = { 0, };
        unsigned long user_addr;
        size_t bytes;
@@ -1187,8 +1224,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct 
inode *inode,
                                                              end - 1);
                        if (retval) {
                                mutex_unlock(&inode->i_mutex);
-                               kmem_cache_free(dio_cache, dio);
-                               goto out;
+                               goto out_dio;
                        }
                }
        }
@@ -1217,8 +1253,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct 
inode *inode,
                         * We grab i_mutex only for reads so we don't have
                         * to release it here
                         */
-                       kmem_cache_free(dio_cache, dio);
-                       goto out;
+                       goto out_dio;
                }
        }
 
@@ -1228,6 +1263,9 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct 
inode *inode,
        atomic_inc(&inode->i_dio_count);
 
        retval = 0;
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+       sdio.pi_iter = iocb->ki_ioext->ke_pi_iter;
+#endif
        sdio.blkbits = blkbits;
        sdio.blkfactor = i_blkbits - blkbits;
        sdio.block_in_file = offset >> blkbits;
@@ -1315,8 +1353,12 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct 
inode *inode,
                page_cache_release(sdio.cur_page);
                sdio.cur_page = NULL;
        }
-       if (sdio.bio)
-               dio_bio_submit(dio, &sdio);
+       if (sdio.bio) {
+               int ret2;
+               ret2 = dio_bio_submit(dio, &sdio);
+               if (retval == 0)
+                       retval = ret2;
+       }
 
        blk_finish_plug(&plug);
 
@@ -1353,7 +1395,9 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct 
inode *inode,
                retval = dio_complete(dio, offset, retval, false);
        } else
                BUG_ON(retval != -EIOCBQUEUED);
-
+       return retval;
+out_dio:
+       kmem_cache_free(dio_cache, dio);
 out:
        return retval;
 }
diff --git a/include/linux/aio.h b/include/linux/aio.h
index 60f4364..3f142b8 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -6,6 +6,7 @@
 #include <linux/aio_abi.h>
 #include <linux/uio.h>
 #include <linux/rcupdate.h>
+#include <linux/bio.h>
 
 #include <linux/atomic.h>
 
@@ -14,6 +15,8 @@ struct kiocb;
 
 #define KIOCB_KEY              0
 
+#define KIOCB_DIO_ONLY (1)     /* don't try buffered if directio fails */
+
 /*
  * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
  * cancelled or completed (this makes a certain amount of sense because
@@ -29,10 +32,15 @@ struct kiocb;
 
 typedef int (kiocb_cancel_fn)(struct kiocb *);
 
+/* per-kiocb extension data */
 struct kio_extension {
        struct io_extension __user *ke_user;
        struct io_extension ke_kern;
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+       struct bio_integrity_prep_iter  ke_pi_iter;     /* PI buffers */
+#endif
 };
+
 struct kiocb {
        struct file             *ki_filp;
        struct kioctx           *ki_ctx;        /* NULL for sync ops */
@@ -59,6 +67,8 @@ struct kiocb {
 
        /* Kernel copy of extension descriptors */
        struct kio_extension    *ki_ioext;
+
+       unsigned int            ki_flags;
 };
 
 static inline bool is_sync_kiocb(struct kiocb *kiocb)
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 5a4d39b..4729ab1 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -635,6 +635,13 @@ struct biovec_slab {
        struct kmem_cache *slab;
 };
 
+struct bio_integrity_prep_iter {
+       struct page **pi_userpages;     /* Pages containing PI data */
+       size_t pi_nrpages;              /* Number of PI data pages */
+       size_t pi_offset;               /* Offset into the page */
+       size_t pi_len;                  /* Length of the buffer */
+};
+
 /*
  * a small number of entries is fine, not going to be performance critical.
  * basically we just need to survive
@@ -663,6 +670,8 @@ extern int bio_integrity_enabled(struct bio *bio);
 extern int bio_integrity_set_tag(struct bio *, void *, unsigned int);
 extern int bio_integrity_get_tag(struct bio *, void *, unsigned int);
 extern int bio_integrity_prep(struct bio *);
+extern int bio_integrity_prep_buffer(struct bio *, int rw,
+                                    struct bio_integrity_prep_iter *);
 extern void bio_integrity_endio(struct bio *, int);
 extern void bio_integrity_advance(struct bio *, unsigned int);
 extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int);
@@ -693,6 +702,12 @@ static inline void bioset_integrity_free (struct bio_set 
*bs)
        return;
 }
 
+static inline int bio_integrity_prep_buffer(struct bio *bio, int rw,
+                                           struct bio_integrity_prep_iter *pi)
+{
+       return 0;
+}
+
 static inline int bio_integrity_prep(struct bio *bio)
 {
        return 0;
diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h
index 07ffd1f..d7b8c68 100644
--- a/include/uapi/linux/aio_abi.h
+++ b/include/uapi/linux/aio_abi.h
@@ -74,11 +74,17 @@ struct io_event {
 
 /* IO extension types */
 #define IO_EXT_INVALID (0)
+#define IO_EXT_PI      (1)     /* protection info (checksums, etc) */
 
 /* IO extension descriptor */
 struct io_extension {
        __u64 ie_size;
        __u64 ie_has;
+
+       /* PI stuff */
+       __u64 ie_pi_buf;
+       __u32 ie_pi_buflen;
+       __u32 ie_pi_ret;
 };
 
 /*
diff --git a/mm/filemap.c b/mm/filemap.c
index 7a13f6a..d35ddb3 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2477,6 +2477,12 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, 
const struct iovec *iov,
                                                        ppos, count, ocount);
                if (written < 0 || written == count)
                        goto out;
+
+               if (iocb->ki_flags & KIOCB_DIO_ONLY) {
+                       err = -EINVAL;
+                       goto out;
+               }
+
                /*
                 * direct-io write to a hole: fall through to buffered I/O
                 * for completing the rest of the request.

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to