Hi Huan,

> 
> Currently, udmabuf handles folio by creating an unpin list to record
> each folio obtained from the list and unpinning them when released. To
> maintain this approach, many data structures have been established.
> 
> However, maintaining this type of data structure requires a significant
> amount of memory and traversing the list is a substantial overhead,
> which is not friendly to the CPU cache.
> 
> Considering that during creation, we arranged the folio array in the
> order of pin and set the offset according to pgcnt.
> 
> We actually don't need to use unpin_list to unpin during release.
> Instead, we can iterate through the folios array during release and
> unpin any folio that is different from the ones previously accessed.
No, that won't work because iterating the folios array doesn't tell you
anything about how many times a folio was pinned (via memfd_pin_folios()),
as a folio could be part of multiple ranges.

For example, if userspace provides ranges 64..128 and 256..512 (assuming
these are 4k sized subpage offsets and we have a 2MB hugetlb folio), then
the same folio would cover both ranges and there would be 2 entries for
this folio in unpin_list. But, with your logic, we'd be erroneously unpinning
it only once.

Not sure if there are any great solutions available to address this situation,
but one option I can think of is to convert unpin_list to unpin array 
(dynamically
resized with krealloc?) and track its length separately. Or, as suggested 
earlier,
another way is to not use unpin_list for memfds backed by shmem, but I suspect
this may not work if THP is enabled.

Thanks,
Vivek

> 
> By this, not only saves the overhead of the udmabuf_folio data structure
> but also makes array access more cache-friendly.
> 
> Signed-off-by: Huan Yang <l...@vivo.com>
> ---
>  drivers/dma-buf/udmabuf.c | 68 +++++++++++++++++----------------------
>  1 file changed, 30 insertions(+), 38 deletions(-)
> 
> diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
> index 8f9cb0e2e71a..1e7f46c33d1a 100644
> --- a/drivers/dma-buf/udmabuf.c
> +++ b/drivers/dma-buf/udmabuf.c
> @@ -26,16 +26,19 @@ MODULE_PARM_DESC(size_limit_mb, "Max size of a
> dmabuf, in megabytes. Default is
> 
>  struct udmabuf {
>       pgoff_t pagecount;
> -     struct folio **folios;
>       struct sg_table *sg;
>       struct miscdevice *device;
> +     struct folio **folios;
> +     /**
> +      * offset in folios array's folio, byte unit.
> +      * udmabuf can use either shmem or hugetlb pages, an array based
> on
> +      * pages may not be suitable.
> +      * Especially when HVO is enabled, the tail page will be released,
> +      * so our reference to the page will no longer be correct.
> +      * Hence, it's necessary to record the offset in order to reference
> +      * the correct PFN within the folio.
> +      */
>       pgoff_t *offsets;
> -     struct list_head unpin_list;
> -};
> -
> -struct udmabuf_folio {
> -     struct folio *folio;
> -     struct list_head list;
>  };
> 
>  static int mmap_udmabuf(struct dma_buf *buf, struct vm_area_struct
> *vma)
> @@ -160,32 +163,28 @@ static void unmap_udmabuf(struct
> dma_buf_attachment *at,
>       return put_sg_table(at->dev, sg, direction);
>  }
> 
> -static void unpin_all_folios(struct list_head *unpin_list)
> +/**
> + * unpin_all_folios:         unpin each folio we pinned in create
> + * The udmabuf set all folio in folios and pinned it, but for large folio,
> + * We may have only used a small portion of the physical in the folio.
> + * we will repeatedly, sequentially set the folio into the array to ensure
> + * that the offset can index the correct folio at the corresponding index.
> + * Hence, we only need to unpin the first iterred folio.
> + */
> +static void unpin_all_folios(struct udmabuf *ubuf)
>  {
> -     struct udmabuf_folio *ubuf_folio;
> -
> -     while (!list_empty(unpin_list)) {
> -             ubuf_folio = list_first_entry(unpin_list,
> -                                           struct udmabuf_folio, list);
> -             unpin_folio(ubuf_folio->folio);
> -
> -             list_del(&ubuf_folio->list);
> -             kfree(ubuf_folio);
> -     }
> -}
> +     pgoff_t pg;
> +     struct folio *last = NULL;
> 
> -static int add_to_unpin_list(struct list_head *unpin_list,
> -                          struct folio *folio)
> -{
> -     struct udmabuf_folio *ubuf_folio;
> +     for (pg = 0; pg < ubuf->pagecount; pg++) {
> +             struct folio *tmp = ubuf->folios[pg];
> 
> -     ubuf_folio = kzalloc(sizeof(*ubuf_folio), GFP_KERNEL);
> -     if (!ubuf_folio)
> -             return -ENOMEM;
> +             if (tmp == last)
> +                     continue;
> 
> -     ubuf_folio->folio = folio;
> -     list_add_tail(&ubuf_folio->list, unpin_list);
> -     return 0;
> +             last = tmp;
> +             unpin_folio(tmp);
> +     }
>  }
> 
>  static void release_udmabuf(struct dma_buf *buf)
> @@ -196,7 +195,7 @@ static void release_udmabuf(struct dma_buf *buf)
>       if (ubuf->sg)
>               put_sg_table(dev, ubuf->sg, DMA_BIDIRECTIONAL);
> 
> -     unpin_all_folios(&ubuf->unpin_list);
> +     unpin_all_folios(ubuf);
>       kvfree(ubuf->offsets);
>       kvfree(ubuf->folios);
>       kfree(ubuf);
> @@ -308,7 +307,6 @@ static long udmabuf_create(struct miscdevice
> *device,
>       if (!ubuf)
>               return -ENOMEM;
> 
> -     INIT_LIST_HEAD(&ubuf->unpin_list);
>       pglimit = (size_limit_mb * 1024 * 1024) >> PAGE_SHIFT;
>       for (i = 0; i < head->count; i++) {
>               if (!IS_ALIGNED(list[i].offset, PAGE_SIZE))
> @@ -366,12 +364,6 @@ static long udmabuf_create(struct miscdevice
> *device,
>                       u32 k;
>                       long fsize = folio_size(folios[j]);
> 
> -                     ret = add_to_unpin_list(&ubuf->unpin_list, folios[j]);
> -                     if (ret < 0) {
> -                             kfree(folios);
> -                             goto err;
> -                     }
> -
>                       for (k = pgoff; k < fsize; k += PAGE_SIZE) {
>                               ubuf->folios[pgbuf] = folios[j];
>                               ubuf->offsets[pgbuf] = k;
> @@ -399,7 +391,7 @@ static long udmabuf_create(struct miscdevice
> *device,
>  err:
>       if (memfd)
>               fput(memfd);
> -     unpin_all_folios(&ubuf->unpin_list);
> +     unpin_all_folios(ubuf);
>       kvfree(ubuf->offsets);
>       kvfree(ubuf->folios);
>       kfree(ubuf);
> --
> 2.45.2

Reply via email to