The commit is pushed to "branch-rh9-5.14.0-427.44.1.vz9.80.x-ovz" and will appear at g...@bitbucket.org:openvz/vzkernel.git after rh9-5.14.0-427.44.1.vz9.80.3 ------> commit 8d5a6070839db6fb61e22aa5a18f1d4f634cbaad Author: Andrey Zhadchenko <andrey.zhadche...@virtuozzo.com> Date: Mon Dec 30 13:39:48 2024 +0800
vhost-blk: rework iov and bio handling Manual page handling is tiresome and error-prone. Let's use iov iterators and bio_iov_iter_get_pages() helper, which will automatically fill bio with pages from iov. As this also pins pages, add bio_release_pages() at the end of every bio. While at it, remove VHOST_BLK_SECTOR_BITS and it's friends since they are just a copy of SECTOR_BITS. v2: - fix bio allocation size formula - fix bio_iov_iter_get_pages error path to only put pages for which we had succesful get - add explicit BUG_ON to check sector aligned bio start https://virtuozzo.atlassian.net/browse/PSBM-157752 Signed-off-by: Andrey Zhadchenko <andrey.zhadche...@virtuozzo.com> Signed-off-by: Pavel Tikhomirov <ptikhomi...@virtuozzo.com> ====== Patchset description: vhost-blk: bounce buffer for unaligned requests Andrey Zhadchenko (2): vhost-blk: rework iov and bio handling vhost-blk: add bounce-buffer for non-aligned requests David Howells (1): iov_iter: Add a function to extract a page list from an iterator Pavel Tikhomirov (1): vhost-blk: remove excess vhost_blk_req.use_inline Feature: vhost-blk: in-kernel accelerator for virtio-blk guests --- drivers/vhost/blk.c | 161 ++++++++++++++-------------------------------------- 1 file changed, 43 insertions(+), 118 deletions(-) diff --git a/drivers/vhost/blk.c b/drivers/vhost/blk.c index 90d20d0eb722..a289552d6f37 100644 --- a/drivers/vhost/blk.c +++ b/drivers/vhost/blk.c @@ -49,22 +49,10 @@ enum { #define VHOST_MAX_METADATA_IOV 1 -#define VHOST_BLK_SECTOR_BITS 9 -#define VHOST_BLK_SECTOR_SIZE (1 << VHOST_BLK_SECTOR_BITS) -#define VHOST_BLK_SECTOR_MASK (VHOST_BLK_SECTOR_SIZE - 1) - -struct req_page_list { - struct page **pages; - int pages_nr; -}; - #define NR_INLINE 16 struct vhost_blk_req { - struct req_page_list inline_pl[NR_INLINE]; - struct page *inline_page[NR_INLINE]; struct bio *inline_bio[NR_INLINE]; - struct req_page_list *pl; int req_bin; bool use_inline; @@ -137,12 +125,6 @@ static int move_iovec(struct iovec *from, struct iovec *to, return len ? -1 : moved_seg; } -static inline int iov_num_pages(struct iovec *iov) -{ - return (PAGE_ALIGN((unsigned long)iov->iov_base + iov->iov_len) - - ((unsigned long)iov->iov_base & PAGE_MASK)) >> PAGE_SHIFT; -} - static inline int vhost_blk_set_status(struct vhost_blk_req *req, u8 status) { struct iov_iter iter; @@ -172,28 +154,14 @@ static void vhost_blk_req_done(struct bio *bio) vhost_vq_work_queue(&req->blk_vq->vq, &req->blk_vq->work); } + bio_release_pages(bio, !req->bi_opf); bio_put(bio); } -static void vhost_blk_req_umap(struct vhost_blk_req *req) +static void vhost_blk_req_cleanup(struct vhost_blk_req *req) { - struct req_page_list *pl; - int i, j; - - if (req->pl) { - for (i = 0; i < req->iov_nr; i++) { - pl = &req->pl[i]; - - for (j = 0; j < pl->pages_nr; j++) { - if (!req->bi_opf) - set_page_dirty_lock(pl->pages[j]); - put_page(pl->pages[j]); - } - } - } - if (!req->use_inline) - kfree(req->pl); + kfree(req->bio); } static int vhost_blk_bio_make_simple(struct vhost_blk_req *req, @@ -202,7 +170,6 @@ static int vhost_blk_bio_make_simple(struct vhost_blk_req *req, struct bio *bio; req->use_inline = true; - req->pl = NULL; req->bio = req->inline_bio; bio = bio_alloc(bdev, 0, req->bi_opf, GFP_KERNEL); @@ -219,111 +186,69 @@ static int vhost_blk_bio_make_simple(struct vhost_blk_req *req, return 0; } -static struct page **vhost_blk_prepare_req(struct vhost_blk_req *req, - int total_pages, int iov_nr) -{ - int pl_len, page_len, bio_len; - void *buf; - - req->use_inline = false; - pl_len = iov_nr * sizeof(req->pl[0]); - page_len = total_pages * sizeof(struct page *); - bio_len = (total_pages + BIO_MAX_VECS - 1) / BIO_MAX_VECS * sizeof(struct bio *); - - buf = kmalloc(pl_len + page_len + bio_len, GFP_KERNEL); - if (!buf) - return NULL; - - req->pl = buf; - req->bio = buf + pl_len + page_len; - - return buf + pl_len; -} - static int vhost_blk_bio_make(struct vhost_blk_req *req, struct block_device *bdev) { - int pages_nr_total, i, j, ret; - struct iovec *iov = req->iov; - int iov_nr = req->iov_nr; - struct page **pages, *page; - struct bio *bio = NULL; - int bio_nr = 0; + int nr_pages, nr_pages_total = 0, bio_nr = 0, ret, i; + struct iov_iter iter; + struct bio *bio; + sector_t sector = req->sector; + unsigned long pos = 0; if (unlikely(req->bi_opf == REQ_OP_FLUSH)) return vhost_blk_bio_make_simple(req, bdev); - pages_nr_total = 0; - for (i = 0; i < iov_nr; i++) - pages_nr_total += iov_num_pages(&iov[i]); + iov_iter_init(&iter, req->bi_opf, req->iov, req->iov_nr, req->len); - if (pages_nr_total > NR_INLINE) { - pages = vhost_blk_prepare_req(req, pages_nr_total, iov_nr); - if (!pages) + nr_pages_total = iov_iter_npages(&iter, INT_MAX); + if (nr_pages_total > NR_INLINE * BIO_MAX_VECS) { + req->bio = kmalloc(((nr_pages_total + BIO_MAX_VECS - 1) / + BIO_MAX_VECS) * sizeof(struct bio *), + GFP_KERNEL); + if (!req->bio) return -ENOMEM; + req->use_inline = false; } else { req->use_inline = true; - req->pl = req->inline_pl; - pages = req->inline_page; req->bio = req->inline_bio; } - req->iov_nr = 0; - for (i = 0; i < iov_nr; i++) { - int pages_nr = iov_num_pages(&iov[i]); - unsigned long iov_base, iov_len; - struct req_page_list *pl; + nr_pages = bio_iov_vecs_to_alloc(&iter, BIO_MAX_VECS); + do { + /* We can't handle next bio if it's start is not sector aligned */ + BUG_ON(pos & SECTOR_MASK); - iov_base = (unsigned long)iov[i].iov_base; - iov_len = (unsigned long)iov[i].iov_len; - - ret = get_user_pages_fast(iov_base, pages_nr, - !req->bi_opf, pages); - if (ret != pages_nr) + bio = bio_alloc(bdev, nr_pages, req->bi_opf, GFP_KERNEL); + if (!bio) goto fail; - req->iov_nr++; - pl = &req->pl[i]; - pl->pages_nr = pages_nr; - pl->pages = pages; - - for (j = 0; j < pages_nr; j++) { - unsigned int off, len, pos; - - page = pages[j]; - off = iov_base & ~PAGE_MASK; - len = PAGE_SIZE - off; - if (len > iov_len) - len = iov_len; - - while (!bio || !bio_add_page(bio, page, len, off)) { - bio = bio_alloc(bdev, bio_max_segs(pages_nr_total), - req->bi_opf, GFP_KERNEL); - if (!bio) - goto fail; - bio->bi_iter.bi_sector = req->sector; - bio->bi_private = req; - bio->bi_end_io = vhost_blk_req_done; - req->bio[bio_nr++] = bio; - } - - iov_base += len; - iov_len -= len; - pages_nr_total--; + bio->bi_iter.bi_sector = sector; + bio->bi_private = req; + bio->bi_end_io = vhost_blk_req_done; - pos = (iov_base & VHOST_BLK_SECTOR_MASK) + iov_len; - req->sector += pos >> VHOST_BLK_SECTOR_BITS; + ret = bio_iov_iter_get_pages(bio, &iter); + if (unlikely(ret)) { + bio_put(bio); + goto fail; } + req->bio[bio_nr++] = bio; + + pos += bio->bi_iter.bi_size; + sector = req->sector + (pos >> SECTOR_SHIFT); + + nr_pages = bio_iov_vecs_to_alloc(&iter, BIO_MAX_VECS); + } while (nr_pages); - pages += pages_nr; - } atomic_set(&req->bio_nr, bio_nr); return 0; - fail: - for (i = 0; i < bio_nr; i++) + for (i = 0; i < bio_nr; i++) { + bio_release_pages(req->bio[i], false); bio_put(req->bio[i]); - vhost_blk_req_umap(req); + } + + vhost_blk_req_cleanup(req); + return -ENOMEM; } @@ -535,7 +460,7 @@ static void vhost_blk_handle_host_kick(struct vhost_work *work) if (!blk) blk = req->blk; - vhost_blk_req_umap(req); + vhost_blk_req_cleanup(req); status = req->bio_err == 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR; ret = vhost_blk_set_status(req, status); _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel