On 02/04/2016 02:08 PM, Javier González wrote:
> Since writes are buffered in memory, incoming reads must retrieve
> buffered pages instead of submitting the I/O to the media.
> 
> This patch implements this logic. When a read bio arrives to rrpc, valid
> pages from the flash blocks residing in memory are copied. If there are
> any "holes" in the bio, a new bio is submitted to the media to retrieve
> the necessary pages. The original bio is updated accordingly.
> 
> Signed-off-by: Javier González <jav...@cnexlabs.com>
> ---
>  drivers/lightnvm/rrpc.c  | 451 
> ++++++++++++++++++++++++++++++++++++-----------
>  include/linux/lightnvm.h |   1 +
>  2 files changed, 346 insertions(+), 106 deletions(-)
> 
> diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
> index e9fb19d..6348d52 100644
> --- a/drivers/lightnvm/rrpc.c
> +++ b/drivers/lightnvm/rrpc.c
> @@ -827,10 +827,13 @@ static void rrpc_end_io(struct nvm_rq *rqd)
>       struct rrpc *rrpc = container_of(rqd->ins, struct rrpc, instance);
>       uint8_t nr_pages = rqd->nr_pages;
>  
> -     if (bio_data_dir(rqd->bio) == WRITE)
> +     if (bio_data_dir(rqd->bio) == WRITE) {
>               rrpc_end_io_write(rrpc, rqd, nr_pages);
> -     else
> +     } else {
> +             if (rqd->flags & NVM_IOTYPE_SYNC)
> +                     return;
>               rrpc_end_io_read(rrpc, rqd, nr_pages);
> +     }
>  
>       bio_put(rqd->bio);
>  
> @@ -842,83 +845,6 @@ static void rrpc_end_io(struct nvm_rq *rqd)
>       mempool_free(rqd, rrpc->rq_pool);
>  }
>  
> -static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
> -                     struct nvm_rq *rqd, struct rrpc_buf_rq *brrqd,
> -                     unsigned long flags, int nr_pages)
> -{
> -     struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
> -     struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rrqd);
> -     struct rrpc_addr *gp;
> -     sector_t laddr = rrpc_get_laddr(bio);
> -     int is_gc = flags & NVM_IOTYPE_GC;
> -     int i;
> -
> -     if (!is_gc && rrpc_lock_rq(rrpc, bio, rrqd)) {
> -             nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list);
> -             mempool_free(rrqd, rrpc->rrq_pool);
> -             mempool_free(rqd, rrpc->rq_pool);
> -             return NVM_IO_REQUEUE;
> -     }
> -
> -     for (i = 0; i < nr_pages; i++) {
> -             /* We assume that mapping occurs at 4KB granularity */
> -             BUG_ON(!(laddr + i >= 0 && laddr + i < rrpc->nr_sects));
> -             gp = &rrpc->trans_map[laddr + i];
> -
> -             if (gp->rblk) {
> -                     rqd->ppa_list[i] = rrpc_ppa_to_gaddr(rrpc->dev,
> -                                                             gp->addr);
> -             } else {
> -                     BUG_ON(is_gc);
> -                     rrpc_unlock_laddr(rrpc, r);
> -                     nvm_dev_dma_free(rrpc->dev, rqd->ppa_list,
> -                                                     rqd->dma_ppa_list);
> -                     mempool_free(rrqd, rrpc->rrq_pool);
> -                     mempool_free(rqd, rrpc->rq_pool);
> -                     return NVM_IO_DONE;
> -             }
> -
> -             brrqd[i].addr = gp;
> -     }
> -
> -     rqd->opcode = NVM_OP_HBREAD;
> -
> -     return NVM_IO_OK;
> -}
> -
> -static int rrpc_read_rq(struct rrpc *rrpc, struct bio *bio, struct nvm_rq 
> *rqd,
> -                                                     unsigned long flags)
> -{
> -     struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
> -     int is_gc = flags & NVM_IOTYPE_GC;
> -     sector_t laddr = rrpc_get_laddr(bio);
> -     struct rrpc_addr *gp;
> -
> -     if (!is_gc && rrpc_lock_rq(rrpc, bio, rrqd)) {
> -             mempool_free(rrqd, rrpc->rrq_pool);
> -             mempool_free(rqd, rrpc->rq_pool);
> -             return NVM_IO_REQUEUE;
> -     }
> -
> -     BUG_ON(!(laddr >= 0 && laddr < rrpc->nr_sects));
> -     gp = &rrpc->trans_map[laddr];
> -
> -     if (gp->rblk) {
> -             rqd->ppa_addr = rrpc_ppa_to_gaddr(rrpc->dev, gp->addr);
> -     } else {
> -             BUG_ON(is_gc);
> -             rrpc_unlock_rq(rrpc, rrqd);
> -             mempool_free(rrqd, rrpc->rrq_pool);
> -             mempool_free(rqd, rrpc->rq_pool);
> -             return NVM_IO_DONE;
> -     }
> -
> -     rqd->opcode = NVM_OP_HBREAD;
> -     rrqd->addr = gp;
> -
> -     return NVM_IO_OK;
> -}
> -
>  /*
>   * Copy data from current bio to block write buffer. This if necessary
>   * to guarantee durability if a flash block becomes bad before all pages
> @@ -1051,14 +977,335 @@ static int rrpc_write_rq(struct rrpc *rrpc, struct 
> bio *bio,
>       return NVM_IO_DONE;
>  }
>  
> +static int rrpc_buffer_write(struct rrpc *rrpc, struct bio *bio,
> +                             struct rrpc_rq *rrqd, unsigned long flags)
> +{
> +     uint8_t nr_pages = rrpc_get_pages(bio);
> +
> +     rrqd->nr_pages = nr_pages;
> +
> +     if (nr_pages > 1)
> +             return rrpc_write_ppalist_rq(rrpc, bio, rrqd, flags, nr_pages);
> +     else
> +             return rrpc_write_rq(rrpc, bio, rrqd, flags);
> +}
> +
> +static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
> +                     struct nvm_rq *rqd, struct rrpc_buf_rq *brrqd,
> +                     unsigned long flags, int nr_pages)
> +{
> +     struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
> +     struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rrqd);
> +     struct rrpc_addr *gp;
> +     sector_t laddr = rrpc_get_laddr(bio);
> +     int is_gc = flags & NVM_IOTYPE_GC;
> +     int i;
> +
> +     if (!is_gc && rrpc_lock_rq(rrpc, bio, rrqd)) {
> +             nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list);
> +             return NVM_IO_REQUEUE;
> +     }
> +
> +     for (i = 0; i < nr_pages; i++) {
> +             /* We assume that mapping occurs at 4KB granularity */
> +             BUG_ON(!(laddr + i >= 0 && laddr + i < rrpc->nr_sects));
> +             gp = &rrpc->trans_map[laddr + i];
> +
> +             if (gp->rblk) {
> +                     rqd->ppa_list[i] = rrpc_ppa_to_gaddr(rrpc->dev,
> +                                                             gp->addr);
> +             } else {
> +                     BUG_ON(is_gc);
> +                     rrpc_unlock_laddr(rrpc, r);
> +                     nvm_dev_dma_free(rrpc->dev, rqd->ppa_list,
> +                                                     rqd->dma_ppa_list);
> +                     return NVM_IO_DONE;
> +             }
> +
> +             brrqd[i].addr = gp;
> +     }
> +
> +     rqd->opcode = NVM_OP_HBREAD;
> +
> +     return NVM_IO_OK;
> +}
> +
> +static int rrpc_read_rq(struct rrpc *rrpc, struct bio *bio, struct nvm_rq 
> *rqd,
> +                                                     unsigned long flags)
> +{
> +     struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
> +     int is_gc = flags & NVM_IOTYPE_GC;
> +     sector_t laddr = rrpc_get_laddr(bio);
> +     struct rrpc_addr *gp;
> +
> +     if (!is_gc && rrpc_lock_rq(rrpc, bio, rrqd))
> +             return NVM_IO_REQUEUE;
> +
> +     BUG_ON(!(laddr >= 0 && laddr < rrpc->nr_sects));
> +     gp = &rrpc->trans_map[laddr];
> +
> +     if (gp->rblk) {
> +             rqd->ppa_addr = rrpc_ppa_to_gaddr(rrpc->dev, gp->addr);
> +     } else {
> +             BUG_ON(is_gc);
> +             rrpc_unlock_rq(rrpc, rrqd);
> +             return NVM_IO_DONE;
> +     }
> +
> +     rqd->opcode = NVM_OP_HBREAD;
> +     rrqd->addr = gp;
> +
> +     return NVM_IO_OK;
> +}
> +
> +static int rrpc_read_w_buf_entry(struct bio *bio, struct rrpc_block *rblk,
> +                                     struct bvec_iter iter, int entry)
> +{
> +     struct buf_entry *read_entry;
> +     struct bio_vec bv;
> +     struct page *page;
> +     void *kaddr;
> +     void *data;
> +     int read = 0;
> +
> +     lockdep_assert_held(&rblk->w_buf.s_lock);
> +
> +     spin_lock(&rblk->w_buf.w_lock);
> +     if (entry >= rblk->w_buf.cur_mem) {
> +             spin_unlock(&rblk->w_buf.w_lock);
> +             goto out;
> +     }
> +     spin_unlock(&rblk->w_buf.w_lock);
> +
> +     read_entry = &rblk->w_buf.entries[entry];
> +     data = read_entry->data;
> +
> +     bv = bio_iter_iovec(bio, iter);
> +     page = bv.bv_page;
> +     kaddr = kmap_atomic(page);
> +     memcpy(kaddr + bv.bv_offset, data, RRPC_EXPOSED_PAGE_SIZE);
> +     kunmap_atomic(kaddr);
> +     read++;
> +
> +out:
> +     return read;
> +}
> +
> +static int rrpc_read_from_w_buf(struct rrpc *rrpc, struct nvm_rq *rqd,
> +                     struct rrpc_buf_rq *brrqd, unsigned long *read_bitmap)
> +{
> +     struct nvm_dev *dev = rrpc->dev;
> +     struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
> +     struct rrpc_addr *addr;
> +     struct bio *bio = rqd->bio;
> +     struct bvec_iter iter = bio->bi_iter;
> +     struct rrpc_block *rblk;
> +     unsigned long blk_id;
> +     int nr_pages = rqd->nr_pages;
> +     int left = nr_pages;
> +     int read = 0;
> +     int entry;
> +     int i;
> +
> +     if (nr_pages != bio->bi_vcnt)
> +             goto out;
> +
> +     if (nr_pages == 1) {
> +             rblk = rrqd->addr->rblk;
> +
> +             /* If the write buffer exists, the block is open in memory */
> +             spin_lock(&rblk->w_buf.s_lock);
> +             atomic_inc(&rblk->w_buf.refs);
> +             if (rblk->w_buf.entries) {
> +                     blk_id = rblk->parent->id;
> +                     entry = rrqd->addr->addr -
> +                             (blk_id * dev->sec_per_pg * dev->pgs_per_blk);
> +
> +                     read = rrpc_read_w_buf_entry(bio, rblk, iter, entry);
> +
> +                     left -= read;
> +                     WARN_ON(test_and_set_bit(0, read_bitmap));
> +             }
> +             bio_advance_iter(bio, &iter, RRPC_EXPOSED_PAGE_SIZE);
> +
> +             atomic_dec(&rblk->w_buf.refs);
> +             spin_unlock(&rblk->w_buf.s_lock);
> +
> +             goto out;
> +     }
> +
> +     /* Iterate through all pages and copy those that are found in the write
> +      * buffer. We will complete the holes (if any) with a intermediate bio
> +      * later on
> +      */
> +     for (i = 0; i < nr_pages; i++) {
> +             addr = brrqd[i].addr;
> +             rblk = addr->rblk;
> +
> +             /* If the write buffer exists, the block is open in memory */
> +             spin_lock(&rblk->w_buf.s_lock);
> +             atomic_inc(&rblk->w_buf.refs);
> +             if (rblk->w_buf.entries) {
> +                     blk_id = rblk->parent->id;
> +                     entry = addr->addr - (blk_id * dev->sec_per_pg *
> +                                                     dev->pgs_per_blk);
> +
> +                     read = rrpc_read_w_buf_entry(bio, rblk, iter, entry);
> +
> +                     left -= read;
> +                     WARN_ON(test_and_set_bit(i, read_bitmap));
> +             }
> +             bio_advance_iter(bio, &iter, RRPC_EXPOSED_PAGE_SIZE);
> +
> +             atomic_dec(&rblk->w_buf.refs);
> +             spin_unlock(&rblk->w_buf.s_lock);
> +     }
> +
> +out:
> +     return left;
> +}
> +
> +static int rrpc_submit_read_io(struct rrpc *rrpc, struct bio *bio,
> +                             struct nvm_rq *rqd, unsigned long flags)
> +{
> +     struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
> +     int err;
> +
> +     err = nvm_submit_io(rrpc->dev, rqd);
> +     if (err) {
> +             pr_err("rrpc: I/O submission failed: %d\n", err);
> +             bio_put(bio);
> +             if (!(flags & NVM_IOTYPE_GC)) {
> +                     rrpc_unlock_rq(rrpc, rrqd);
> +                     if (rqd->nr_pages > 1)
> +                             nvm_dev_dma_free(rrpc->dev,
> +                     rqd->ppa_list, rqd->dma_ppa_list);
> +             }
> +             return NVM_IO_ERR;
> +     }
> +
> +     return NVM_IO_OK;
> +}
> +
> +static int rrpc_fill_partial_read_bio(struct rrpc *rrpc, struct bio *bio,
> +                             unsigned long *read_bitmap, struct nvm_rq *rqd,
> +                             struct rrpc_buf_rq *brrqd, uint8_t nr_pages)
> +{
> +     struct bio *new_bio;
> +     struct page *page;
> +     struct bio_vec src_bv, dst_bv;
> +     void *src_p, *dst_p;
> +     int nr_holes = nr_pages - bitmap_weight(read_bitmap, nr_pages);
> +     int hole;
> +     int i = 0;
> +     int ret;
> +     DECLARE_COMPLETION_ONSTACK(wait);
> +
> +     new_bio = bio_alloc(GFP_KERNEL, nr_holes);
> +     if (!new_bio) {
> +             pr_err("nvm: rrpc: could not alloc read bio\n");
> +             return NVM_IO_ERR;
> +     }
> +
> +     hole = find_first_zero_bit(read_bitmap, nr_pages);
> +     do {
> +             page = mempool_alloc(rrpc->page_pool, GFP_KERNEL);
> +             if (!page) {
> +                     bio_put(new_bio);
> +                     pr_err("nvm: rrpc: could not alloc read page\n");
> +                     goto err;
> +             }
> +
> +             ret = bio_add_page(new_bio, page, RRPC_EXPOSED_PAGE_SIZE, 0);
> +             if (ret != RRPC_EXPOSED_PAGE_SIZE) {
> +                     pr_err("nvm: rrpc: could not add page to bio\n");
> +                     mempool_free(page, rrpc->page_pool);
> +                     goto err;
> +             }
> +
> +             rqd->ppa_list[i] = rrpc_ppa_to_gaddr(rrpc->dev,
> +                                                     brrqd[hole].addr->addr);
> +
> +             i++;
> +             hole = find_next_zero_bit(read_bitmap, nr_pages, hole + 1);
> +     } while (hole != nr_pages);
> +
> +     if (nr_holes != new_bio->bi_vcnt) {
> +             pr_err("rrpc: malformed bio\n");
> +             goto err;
> +     }
> +
> +     new_bio->bi_iter.bi_sector = bio->bi_iter.bi_sector;
> +     new_bio->bi_rw = READ;
> +     new_bio->bi_private = &wait;
> +     new_bio->bi_end_io = rrpc_end_sync_bio;
> +
> +     rqd->flags |= NVM_IOTYPE_SYNC;
> +     rqd->bio = new_bio;
> +     rqd->nr_pages = nr_holes;
> +
> +     rrpc_submit_read_io(rrpc, new_bio, rqd, rqd->flags);
> +     wait_for_completion_io(&wait);
> +
> +     if (new_bio->bi_error)
> +             goto err;
> +
> +     /* Fill the holes in the original bio */
> +     i = 0;
> +     hole = find_first_zero_bit(read_bitmap, nr_pages);
> +     do {
> +             src_bv = new_bio->bi_io_vec[i];
> +             dst_bv = bio->bi_io_vec[hole];
> +
> +             src_p = kmap_atomic(src_bv.bv_page);
> +             dst_p = kmap_atomic(dst_bv.bv_page);
> +
> +             memcpy(dst_p + dst_bv.bv_offset,
> +                     src_p + src_bv.bv_offset,
> +                     RRPC_EXPOSED_PAGE_SIZE);
> +
> +             kunmap_atomic(src_p);
> +             kunmap_atomic(dst_p);
> +
> +             mempool_free(&src_bv.bv_page, rrpc->page_pool);
> +
> +             i++;
> +             hole = find_next_zero_bit(read_bitmap, nr_pages, hole + 1);
> +     } while (hole != nr_pages);
> +
> +     bio_put(new_bio);
> +
> +     /* Complete the original bio and associated request */
> +     rqd->flags &= ~NVM_IOTYPE_SYNC;
> +     rqd->bio = bio;
> +     rqd->nr_pages = nr_pages;
> +
> +     bio_endio(bio);
> +     rrpc_end_io(rqd);
> +     return NVM_IO_OK;
> +
> +err:
> +     /* Free allocated pages in new bio */
> +     for (i = 0; i < new_bio->bi_vcnt; i++) {
> +             src_bv = new_bio->bi_io_vec[i];
> +             mempool_free(&src_bv.bv_page, rrpc->page_pool);
> +     }
> +     bio_endio(new_bio);
> +     return NVM_IO_ERR;
> +}
> +
>  static int rrpc_submit_read(struct rrpc *rrpc, struct bio *bio,
>                               struct rrpc_rq *rrqd, unsigned long flags)
>  {
>       struct nvm_rq *rqd;
>       struct rrpc_buf_rq brrqd[rrpc->max_write_pgs];
> +     unsigned long read_bitmap; /* Max 64 ppas per request */
> +     uint8_t left;
>       uint8_t nr_pages = rrpc_get_pages(bio);
>       int err;
>  
> +     bitmap_zero(&read_bitmap, nr_pages);
> +
>       rqd = mempool_alloc(rrpc->rq_pool, GFP_KERNEL);
>       if (!rqd) {
>               pr_err_ratelimited("rrpc: not able to queue bio.");
> @@ -1073,22 +1320,25 @@ static int rrpc_submit_read(struct rrpc *rrpc, struct 
> bio *bio,
>                                               &rqd->dma_ppa_list);
>               if (!rqd->ppa_list) {
>                       pr_err("rrpc: not able to allocate ppa list\n");
> -                     mempool_free(rrqd, rrpc->rrq_pool);
>                       mempool_free(rqd, rrpc->rq_pool);
> +                     mempool_free(rrqd, rrpc->rrq_pool);
>                       return NVM_IO_ERR;
>               }
>  
>               err = rrpc_read_ppalist_rq(rrpc, bio, rqd, brrqd, flags,
>                                                               nr_pages);
>               if (err) {
> -                     mempool_free(rrqd, rrpc->rrq_pool);
>                       mempool_free(rqd, rrpc->rq_pool);
> +                     mempool_free(rrqd, rrpc->rrq_pool);
>                       return err;
>               }
>       } else {
>               err = rrpc_read_rq(rrpc, bio, rqd, flags);
> -             if (err)
> +             if (err) {
> +                     mempool_free(rrqd, rrpc->rrq_pool);
> +                     mempool_free(rqd, rrpc->rq_pool);
>                       return err;
> +             }
>       }
>  
>       bio_get(bio);
> @@ -1097,33 +1347,22 @@ static int rrpc_submit_read(struct rrpc *rrpc, struct 
> bio *bio,
>       rqd->nr_pages = rrqd->nr_pages = nr_pages;
>       rqd->flags = flags;
>  
> -     err = nvm_submit_io(rrpc->dev, rqd);
> -     if (err) {
> -             pr_err("rrpc: I/O submission failed: %d\n", err);
> -             bio_put(bio);
> -             if (!(flags & NVM_IOTYPE_GC)) {
> -                     rrpc_unlock_rq(rrpc, rrqd);
> -                     if (rqd->nr_pages > 1)
> -                             nvm_dev_dma_free(rrpc->dev,
> -                     rqd->ppa_list, rqd->dma_ppa_list);
> -             }
> +     left = rrpc_read_from_w_buf(rrpc, rqd, brrqd, &read_bitmap);
> +     if (left == 0) {
> +             bio_endio(bio);
> +             rrpc_end_io(rqd);
> +             return NVM_IO_OK;
> +     } else if (left < 0)
>               return NVM_IO_ERR;
> -     }
>  
> -     return NVM_IO_OK;
> -}
> +     if (bitmap_empty(&read_bitmap, nr_pages))
> +             return rrpc_submit_read_io(rrpc, bio, rqd, flags);
>  
> -static int rrpc_buffer_write(struct rrpc *rrpc, struct bio *bio,
> -                             struct rrpc_rq *rrqd, unsigned long flags)
> -{
> -     uint8_t nr_pages = rrpc_get_pages(bio);
> -
> -     rrqd->nr_pages = nr_pages;
> -
> -     if (nr_pages > 1)
> -             return rrpc_write_ppalist_rq(rrpc, bio, rrqd, flags, nr_pages);
> -     else
> -             return rrpc_write_rq(rrpc, bio, rrqd, flags);
> +     /* The read bio could not be completely read from the write buffer. This
> +      * case only occurs when several pages are sent in a single bio
> +      */
> +     return rrpc_fill_partial_read_bio(rrpc, bio, &read_bitmap, rqd, brrqd,
> +                                                             nr_pages);
>  }
>  
>  static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio,
> diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
> index eda9743..ae26ced 100644
> --- a/include/linux/lightnvm.h
> +++ b/include/linux/lightnvm.h
> @@ -11,6 +11,7 @@ enum {
>  
>       NVM_IOTYPE_NONE = 0,
>       NVM_IOTYPE_GC = 1,
> +     NVM_IOTYPE_SYNC = 2,
>  };
>  
>  #define NVM_BLK_BITS (16)
> 

Seems like this can be merged into the write buffer patch as well?

Reply via email to