Reviewed-by: Felix Kuehling <felix.kuehl...@amd.com>

This looks very elegant now. :)

On 2017-08-31 05:44 AM, Christian König wrote:
> From: Roger He <hongbo...@amd.com>
>
> This can improve performance for some cases.
>
> v2 (chk): handle all sizes, simplify the patch quite a bit
> v3 (chk): adjust dw estimation as well
> v4 (chk): use single loop, make end mask 64bit
>
> Signed-off-by: Roger He <hongbo...@amd.com>
> Signed-off-by: Christian König <christian.koe...@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 55 
> ++++++++++++++++------------------
>  1 file changed, 26 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 0379af1..4c09338 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1415,8 +1415,6 @@ static int amdgpu_vm_frag_ptes(struct 
> amdgpu_pte_update_params  *params,
>                               uint64_t start, uint64_t end,
>                               uint64_t dst, uint64_t flags)
>  {
> -     int r;
> -
>       /**
>        * The MC L1 TLB supports variable sized pages, based on a fragment
>        * field in the PTE. When this field is set to a non-zero value, page
> @@ -1435,39 +1433,38 @@ static int amdgpu_vm_frag_ptes(struct 
> amdgpu_pte_update_params        *params,
>        * Userspace can support this by aligning virtual base address and
>        * allocation size to the fragment size.
>        */
> -     unsigned pages_per_frag = params->adev->vm_manager.fragment_size;
> -     uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
> -     uint64_t frag_align = 1 << pages_per_frag;
> -
> -     uint64_t frag_start = ALIGN(start, frag_align);
> -     uint64_t frag_end = end & ~(frag_align - 1);
> +     unsigned max_frag = params->adev->vm_manager.fragment_size;
> +     int r;
>  
>       /* system pages are non continuously */
> -     if (params->src || !(flags & AMDGPU_PTE_VALID) ||
> -         (frag_start >= frag_end))
> +     if (params->src || !(flags & AMDGPU_PTE_VALID))
>               return amdgpu_vm_update_ptes(params, start, end, dst, flags);
>  
> -     /* handle the 4K area at the beginning */
> -     if (start != frag_start) {
> -             r = amdgpu_vm_update_ptes(params, start, frag_start,
> -                                       dst, flags);
> +     while (start != end) {
> +             uint64_t frag_flags, frag_end;
> +             unsigned frag;
> +
> +             /* This intentionally wraps around if no bit is set */
> +             frag = min((unsigned)ffs(start) - 1,
> +                        (unsigned)fls64(end - start) - 1);
> +             if (frag >= max_frag) {
> +                     frag_flags = AMDGPU_PTE_FRAG(max_frag);
> +                     frag_end = end & ~((1ULL << max_frag) - 1);
> +             } else {
> +                     frag_flags = AMDGPU_PTE_FRAG(frag);
> +                     frag_end = start + (1 << frag);
> +             }
> +
> +             r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
> +                                       flags | frag_flags);
>               if (r)
>                       return r;
> -             dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
> -     }
> -
> -     /* handle the area in the middle */
> -     r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
> -                               flags | frag_flags);
> -     if (r)
> -             return r;
>  
> -     /* handle the 4K area at the end */
> -     if (frag_end != end) {
> -             dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
> -             r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
> +             dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
> +             start = frag_end;
>       }
> -     return r;
> +
> +     return 0;
>  }
>  
>  /**
> @@ -1557,8 +1554,8 @@ static int amdgpu_vm_bo_update_mapping(struct 
> amdgpu_device *adev,
>               /* set page commands needed */
>               ndw += ncmds * 10;
>  
> -             /* two extra commands for begin/end of fragment */
> -             ndw += 2 * 10;
> +             /* extra commands for begin/end fragments */
> +             ndw += 2 * 10 * adev->vm_manager.fragment_size;
>  
>               params.func = amdgpu_vm_do_set_ptes;
>       }

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to