Am 2021-10-13 um 7:18 p.m. schrieb Philip Yang:
> When creating unregistered new svm range to recover retry fault, avoid
> new svm range to overlap with ranges or userptr ranges managed by TTM,
> otherwise svm migration will trigger TTM or userptr eviction, to evict
> user queues unexpectedly.
>
> Change helper amdgpu_ttm_tt_affect_userptr to return userptr which is
> inside the range. Add helper svm_range_check_vm_userptr to scan all
> userptr of the vm, and return overlap userptr bo start, last.
>
> Signed-off-by: Philip Yang <philip.y...@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehl...@amd.com>


> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  4 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_svm.c    | 95 +++++++++++++++++++++++--
>  3 files changed, 94 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index bd5dda8066fa..d784f8d3a834 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1220,7 +1220,7 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct 
> ttm_tt *ttm)
>   *
>   */
>  bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
> -                               unsigned long end)
> +                               unsigned long end, unsigned long *userptr)
>  {
>       struct amdgpu_ttm_tt *gtt = (void *)ttm;
>       unsigned long size;
> @@ -1235,6 +1235,8 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, 
> unsigned long start,
>       if (gtt->userptr > end || gtt->userptr + size <= start)
>               return false;
>  
> +     if (userptr)
> +             *userptr = gtt->userptr;
>       return true;
>  }
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index ba5c864b8de1..91a087f9dc7c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -182,7 +182,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object 
> *bo,
>  bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm);
>  struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm);
>  bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
> -                               unsigned long end);
> +                               unsigned long end, unsigned long *userptr);
>  bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
>                                      int *last_invalidated);
>  bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index 49c92713c2ad..b691c8495d66 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -50,7 +50,9 @@ static bool
>  svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
>                                   const struct mmu_notifier_range *range,
>                                   unsigned long cur_seq);
> -
> +static int
> +svm_range_check_vm(struct kfd_process *p, uint64_t start, uint64_t last,
> +                uint64_t *bo_s, uint64_t *bo_l);
>  static const struct mmu_interval_notifier_ops svm_range_mn_ops = {
>       .invalidate = svm_range_cpu_invalidate_pagetables,
>  };
> @@ -2308,6 +2310,7 @@ svm_range_best_restore_location(struct svm_range 
> *prange,
>  
>       return -1;
>  }
> +
>  static int
>  svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
>                               unsigned long *start, unsigned long *last)
> @@ -2355,8 +2358,59 @@ svm_range_get_range_boundaries(struct kfd_process *p, 
> int64_t addr,
>                 vma->vm_end >> PAGE_SHIFT, *last);
>  
>       return 0;
> +}
> +
> +static int
> +svm_range_check_vm_userptr(struct kfd_process *p, uint64_t start, uint64_t 
> last,
> +                        uint64_t *bo_s, uint64_t *bo_l)
> +{
> +     struct amdgpu_bo_va_mapping *mapping;
> +     struct interval_tree_node *node;
> +     struct amdgpu_bo *bo = NULL;
> +     unsigned long userptr;
> +     uint32_t i;
> +     int r;
>  
> +     for (i = 0; i < p->n_pdds; i++) {
> +             struct amdgpu_vm *vm;
> +
> +             if (!p->pdds[i]->drm_priv)
> +                     continue;
> +
> +             vm = drm_priv_to_vm(p->pdds[i]->drm_priv);
> +             r = amdgpu_bo_reserve(vm->root.bo, false);
> +             if (r)
> +                     return r;
> +
> +             /* Check userptr by searching entire vm->va interval tree */
> +             node = interval_tree_iter_first(&vm->va, 0, ~0ULL);
> +             while (node) {
> +                     mapping = container_of((struct rb_node *)node,
> +                                            struct amdgpu_bo_va_mapping, rb);
> +                     bo = mapping->bo_va->base.bo;
> +
> +                     if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
> +                                                      start << PAGE_SHIFT,
> +                                                      last << PAGE_SHIFT,
> +                                                      &userptr)) {
> +                             node = interval_tree_iter_next(node, 0, ~0ULL);
> +                             continue;
> +                     }
> +
> +                     pr_debug("[0x%llx 0x%llx] already userptr mapped\n",
> +                              start, last);
> +                     if (bo_s && bo_l) {
> +                             *bo_s = userptr >> PAGE_SHIFT;
> +                             *bo_l = *bo_s + bo->tbo.ttm->num_pages - 1;
> +                     }
> +                     amdgpu_bo_unreserve(vm->root.bo);
> +                     return -EADDRINUSE;
> +             }
> +             amdgpu_bo_unreserve(vm->root.bo);
> +     }
> +     return 0;
>  }
> +
>  static struct
>  svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
>                                               struct kfd_process *p,
> @@ -2366,10 +2420,26 @@ svm_range *svm_range_create_unregistered_range(struct 
> amdgpu_device *adev,
>       struct svm_range *prange = NULL;
>       unsigned long start, last;
>       uint32_t gpuid, gpuidx;
> +     uint64_t bo_s = 0;
> +     uint64_t bo_l = 0;
> +     int r;
>  
>       if (svm_range_get_range_boundaries(p, addr, &start, &last))
>               return NULL;
>  
> +     r = svm_range_check_vm(p, start, last, &bo_s, &bo_l);
> +     if (r != -EADDRINUSE)
> +             r = svm_range_check_vm_userptr(p, start, last, &bo_s, &bo_l);
> +
> +     if (r == -EADDRINUSE) {
> +             if (addr >= bo_s && addr <= bo_l)
> +                     return NULL;
> +
> +             /* Create one page svm range if 2MB range overlapping */
> +             start = addr;
> +             last = addr;
> +     }
> +
>       prange = svm_range_new(&p->svms, start, last);
>       if (!prange) {
>               pr_debug("Failed to create prange in address [0x%llx]\n", addr);
> @@ -2672,6 +2742,8 @@ int svm_range_list_init(struct kfd_process *p)
>   * @p: current kfd_process
>   * @start: range start address, in pages
>   * @last: range last address, in pages
> + * @bo_s: mapping start address in pages if address range already mapped
> + * @bo_l: mapping last address in pages if address range already mapped
>   *
>   * The purpose is to avoid virtual address ranges already allocated by
>   * kfd_ioctl_alloc_memory_of_gpu ioctl.
> @@ -2686,8 +2758,11 @@ int svm_range_list_init(struct kfd_process *p)
>   * a signal. Release all buffer reservations and return to user-space.
>   */
>  static int
> -svm_range_check_vm(struct kfd_process *p, uint64_t start, uint64_t last)
> +svm_range_check_vm(struct kfd_process *p, uint64_t start, uint64_t last,
> +                uint64_t *bo_s, uint64_t *bo_l)
>  {
> +     struct amdgpu_bo_va_mapping *mapping;
> +     struct interval_tree_node *node;
>       uint32_t i;
>       int r;
>  
> @@ -2701,8 +2776,17 @@ svm_range_check_vm(struct kfd_process *p, uint64_t 
> start, uint64_t last)
>               r = amdgpu_bo_reserve(vm->root.bo, false);
>               if (r)
>                       return r;
> -             if (interval_tree_iter_first(&vm->va, start, last)) {
> -                     pr_debug("Range [0x%llx 0x%llx] already mapped\n", 
> start, last);
> +
> +             node = interval_tree_iter_first(&vm->va, start, last);
> +             if (node) {
> +                     pr_debug("range [0x%llx 0x%llx] already TTM mapped\n",
> +                              start, last);
> +                     mapping = container_of((struct rb_node *)node,
> +                                            struct amdgpu_bo_va_mapping, rb);
> +                     if (bo_s && bo_l) {
> +                             *bo_s = mapping->start;
> +                             *bo_l = mapping->last;
> +                     }
>                       amdgpu_bo_unreserve(vm->root.bo);
>                       return -EADDRINUSE;
>               }
> @@ -2743,7 +2827,8 @@ svm_range_is_valid(struct kfd_process *p, uint64_t 
> start, uint64_t size)
>               start = min(end, vma->vm_end);
>       } while (start < end);
>  
> -     return svm_range_check_vm(p, start_unchg, (end - 1) >> PAGE_SHIFT);
> +     return svm_range_check_vm(p, start_unchg, (end - 1) >> PAGE_SHIFT, NULL,
> +                               NULL);
>  }
>  
>  /**

Reply via email to