Thank you for the review Andrzej. I'll update the patch to v2 shortly, it should
cover all your comments.

FYI, I'll be on holiday until September 16 so I might not be able to respond in 
the
following days.

Regards,
Szymon

On 03.09.2019 15:19, Andrzej Hajda wrote:
> +CC: $(./script/get_maintainers.pl drivers/gpu/drm/vgem/vgem_drv.c)
>
>
> On 20.08.2019 08:58, Szymon Andrzejuk wrote:
>> Page fault handler inside vgem driver now preallocates pages in advance
>> when fault occurs for the first time. Pages can be allocated in
>> direction of increasing/decreasing addresses, depending on memory access
>> profile. In case of random access no preallocation occurs.
>>
>> Synthetic benchmark showed over 8x bandwidth increase when copying data
>> from mmapped vgem buffer with memcpy and ~160 times when accessing mapped
>> buffer sequentially. Compiled with gcc 8.2.0 with -O2 flag.
>> Unigine Heaven running on custom virgl vtest virtual GPU with vgem buffers
>> sees ~17% FPS increase.
>>
>> This performance increase only occurs when accessing vgem buffer mapped
>> using DRM_IOCTL_MODE_MAP_DUMB ioctl. When accessing buffer imported
>> from prime fd the vgem page fault handler is not invoked. It's advised
>> to use vector streaming copy instructions and avoid sequential accesses
>> in this case. Streaming copy brings the performance to be on par with
>> similar buffer allocated with memfd_create(2) syscall.
>>
>> Signed-off-by: Szymon Andrzejuk <s.andrze...@samsung.com>
>> ---
>>  drivers/gpu/drm/vgem/vgem_drv.c | 177 ++++++++++++++++++++++++++------
>>  1 file changed, 143 insertions(+), 34 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/vgem/vgem_drv.c 
>> b/drivers/gpu/drm/vgem/vgem_drv.c
>> index 11a8f99ba18c..739ba841e89c 100644
>> --- a/drivers/gpu/drm/vgem/vgem_drv.c
>> +++ b/drivers/gpu/drm/vgem/vgem_drv.c
>> @@ -34,6 +34,7 @@
>>  #include <linux/ramfs.h>
>>  #include <linux/shmem_fs.h>
>>  #include <linux/dma-buf.h>
>> +#include <linux/pfn_t.h>
>>  #include "vgem_drv.h"
>>  
>>  #define DRIVER_NAME "vgem"
>> @@ -50,8 +51,21 @@ static struct vgem_device {
>>  static void vgem_gem_free_object(struct drm_gem_object *obj)
>>  {
>>      struct drm_vgem_gem_object *vgem_obj = to_vgem_bo(obj);
>> +    int i;
>> +
>> +    mutex_lock(&vgem_obj->pages_lock);
>> +    if (vgem_obj->pages) {
>> +            int num_pages = obj->size >> PAGE_SHIFT;
>> +
>> +            for (i = 0; i < num_pages; i++) {
>> +                    if (vgem_obj->pages[i])
>> +                            put_page(vgem_obj->pages[i]);
>> +            }
>> +            kvfree(vgem_obj->pages);
>> +            vgem_obj->pages = NULL;
>> +    }
>> +    mutex_unlock(&vgem_obj->pages_lock);
>>  
>> -    kvfree(vgem_obj->pages);
>>      mutex_destroy(&vgem_obj->pages_lock);
>>  
>>      if (obj->import_attach)
>> @@ -61,6 +75,72 @@ static void vgem_gem_free_object(struct drm_gem_object 
>> *obj)
>>      kfree(vgem_obj);
>>  }
>>  
>> +static int __vgem_alloc_page(struct page *page, struct vm_area_struct *vma,
>> +                                                     unsigned long vaddr, 
>> int page_num)
>
> unused page_num
>
>
>> +{
>> +    unsigned long pfn;
>> +    int insert_ret;
>> +
>> +    pfn = page_to_pfn(page);
>> +    insert_ret = vmf_insert_mixed(vma, vaddr, __pfn_to_pfn_t(pfn, PFN_DEV));
>> +
>> +    if (insert_ret & VM_FAULT_ERROR)
>> +            return VM_FAULT_ERROR;
>
> Is it OK to return mask? instead of insert_ret.
>
>
>> +
>> +    return 0;
>> +}
>> +
>> +static int __vgem_read_mapping_page(struct drm_vgem_gem_object *obj,
>> +                                                                    int 
>> page_num, struct page **page)
>> +{
>> +    int ret;
>> +    struct page *mapped_page;
>> +
>> +    mapped_page = 
>> shmem_read_mapping_page(file_inode(obj->base.filp)->i_mapping,
>> +                                                                            
>>   page_num);
>> +    if (IS_ERR(page)) {
>> +            switch (PTR_ERR(page)) {
>> +            case -ENOSPC:
>> +            case -ENOMEM:
>> +                    ret = VM_FAULT_OOM;
>> +                    break;
>> +            case -EBUSY:
>> +                    ret = VM_FAULT_RETRY;
>> +                    break;
>> +            case -EFAULT:
>> +            case -EINVAL:
>> +                    ret = VM_FAULT_SIGBUS;
>> +                    break;
>> +            default:
>> +                    WARN_ON(PTR_ERR(page));
>> +                    ret = VM_FAULT_SIGBUS;
>> +                    break;
>> +            }
>> +
>> +            return ret;
>> +    }
>> +
>> +    *page = mapped_page;
>> +    return 0;
>> +}
>> +
>> +static int __vgem_prepare_single_page(struct drm_vgem_gem_object *obj,
>> +                                                                      
>> struct vm_area_struct *vma,
>> +                                                                      int 
>> page_num, unsigned long vaddr)
>> +{
>> +    int ret;
>> +
>> +    ret = __vgem_read_mapping_page(obj, page_num, &obj->pages[page_num]);
>> +    if (ret)
>> +            return ret;
>> +
>> +    ret = __vgem_alloc_page(obj->pages[page_num], vma, vaddr, page_num);
>> +    if (ret)
>> +            return ret;
>
> One can use return __vgem_alloc_page(...), up to you.
>
>
>> +
>> +    return 0;
>> +}
>> +
>>  static vm_fault_t vgem_gem_fault(struct vm_fault *vmf)
>>  {
>>      struct vm_area_struct *vma = vmf->vma;
>> @@ -70,6 +150,8 @@ static vm_fault_t vgem_gem_fault(struct vm_fault *vmf)
>>      vm_fault_t ret = VM_FAULT_SIGBUS;
>>      loff_t num_pages;
>>      pgoff_t page_offset;
>> +    int page_num, page_prep_ret;
>> +    const int PREFAULT_PAGES = 8;
>>      page_offset = (vaddr - vma->vm_start) >> PAGE_SHIFT;
>>  
>>      num_pages = DIV_ROUND_UP(obj->base.size, PAGE_SIZE);
>> @@ -77,41 +159,60 @@ static vm_fault_t vgem_gem_fault(struct vm_fault *vmf)
>>      if (page_offset >= num_pages)
>>              return VM_FAULT_SIGBUS;
>>  
>> +    ret = VM_FAULT_NOPAGE;
>> +
>>      mutex_lock(&obj->pages_lock);
>> -    if (obj->pages) {
>> -            get_page(obj->pages[page_offset]);
>> -            vmf->page = obj->pages[page_offset];
>> -            ret = 0;
>> -    }
>> -    mutex_unlock(&obj->pages_lock);
>> -    if (ret) {
>> -            struct page *page;
>> -
>> -            page = shmem_read_mapping_page(
>> -                                    file_inode(obj->base.filp)->i_mapping,
>> -                                    page_offset);
>> -            if (!IS_ERR(page)) {
>> -                    vmf->page = page;
>> -                    ret = 0;
>> -            } else switch (PTR_ERR(page)) {
>> -                    case -ENOSPC:
>> -                    case -ENOMEM:
>> -                            ret = VM_FAULT_OOM;
>> -                            break;
>> -                    case -EBUSY:
>> -                            ret = VM_FAULT_RETRY;
>> -                            break;
>> -                    case -EFAULT:
>> -                    case -EINVAL:
>> -                            ret = VM_FAULT_SIGBUS;
>> -                            break;
>> -                    default:
>> -                            WARN_ON(PTR_ERR(page));
>> -                            ret = VM_FAULT_SIGBUS;
>> -                            break;
>> +
>> +    if (num_pages > 1) {
>> +            bool forward = true;
>> +            bool random = false;
>> +
>> +            // Determine prefaulting direction. If adjacent pages are both
>> +            // allocated/not allocated then we have random access.
>> +            // Always try to prefault on first and last page.
>> +            if (page_offset != 0 && page_offset != num_pages - 1) {
>> +                    struct page *next, *prev;
>> +
>> +                    next = obj->pages[page_offset + 1];
>> +                    prev = obj->pages[page_offset - 1];
>> +                    if (!((uintptr_t)next ^ (uintptr_t)prev))
>> +                            random = true;
>> +                    else if (!prev)
>> +                            forward = false;
>> +            } else {
>> +                    forward = (page_offset == 0);
>>              }
>
> Quite complicated,  maybe sth like this:
>
>     bool next, prev;
>
>     next = obj->pages[page_offset + 1];
>
>     prev = obj->pages[page_offset - 1];
>
>     if (prev == next)
>
>         random = true;
>
>
>>  
>> +            if (!random) {
>> +                    for (page_num = page_offset;
>> +                            forward ? page_num < page_offset + 
>> PREFAULT_PAGES && page_num < num_pages :
>> +                            page_offset - page_num < PREFAULT_PAGES && 
>> page_num >= 0;
>> +                            forward ? page_num++ : page_num--) {
>
> Again complicated, try pre-calculate boundaries and:
>
> start_page = ...;
>
> end_page = ...;
>
> step = forward ? 1 : -1;
>
> for (page_num = start_page; page_num < end_page; page_num += step)
>
>     ...
>
>
>> +                            if (!obj->pages[page_num]) {
>> +                                    page_prep_ret = 
>> __vgem_prepare_single_page(obj, vma, page_num, vaddr);
>> +                                    if (page_prep_ret) {
>> +                                            ret = page_prep_ret;
>> +                                            break;
>> +                                    }
>> +                            } else {
>> +                                    // random access, exit loop
>> +                                    break;
>> +                            }
>> +
>> +                            vaddr = vaddr + (forward ? 1 : -1) * PAGE_SIZE;
>
> vaddr += step * PAGE_SIZE;
>
>
> Regards
>
> Andrzej
>
>
>> +                    }
>> +            } else {
>> +                    page_prep_ret = __vgem_prepare_single_page(obj, vma, 
>> page_offset, vaddr);
>> +                    if (page_prep_ret)
>> +                            ret = page_prep_ret;
>> +            }
>> +    } else {
>> +            page_prep_ret = __vgem_prepare_single_page(obj, vma, 
>> page_offset, vaddr);
>> +            if (page_prep_ret)
>> +                    ret = page_prep_ret;
>>      }
>> +
>> +    mutex_unlock(&obj->pages_lock);
>>      return ret;
>>  }
>>  
>> @@ -182,7 +283,7 @@ static struct drm_gem_object *vgem_gem_create(struct 
>> drm_device *dev,
>>                                            unsigned long size)
>>  {
>>      struct drm_vgem_gem_object *obj;
>> -    int ret;
>> +    int ret, num_pages;
>>  
>>      obj = __vgem_gem_create(dev, size);
>>      if (IS_ERR(obj))
>> @@ -193,6 +294,13 @@ static struct drm_gem_object *vgem_gem_create(struct 
>> drm_device *dev,
>>      if (ret)
>>              return ERR_PTR(ret);
>>  
>> +    mutex_lock(&obj->pages_lock);
>> +
>> +    num_pages = obj->base.size >> PAGE_SHIFT;
>> +    obj->pages = kvcalloc(num_pages, sizeof(struct page *), GFP_KERNEL);
>> +
>> +    mutex_unlock(&obj->pages_lock);
>> +
>>      return &obj->base;
>>  }
>>  
>> @@ -262,7 +370,8 @@ static int vgem_mmap(struct file *filp, struct 
>> vm_area_struct *vma)
>>      /* Keep the WC mmaping set by drm_gem_mmap() but our pages
>>       * are ordinary and not special.
>>       */
>> -    vma->vm_flags = flags | VM_DONTEXPAND | VM_DONTDUMP;
>> +    vma->vm_flags = flags | VM_DONTEXPAND | VM_DONTDUMP | VM_MIXEDMAP;
>> +
>>      return 0;
>>  }
>>  
>>
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
>
>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

Reply via email to