This allows to detect deadlocks happening because of resource
constraints.

Especially submissions which want to use all of GDS doesn't result in
sporadic -ENOMEM any more.

Signed-off-by: Christian König <christian.koe...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 86 ++++++++++++++------------
 1 file changed, 46 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index ec888fc6ead8..ff532c8b7a62 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -782,7 +782,7 @@ static int amdgpu_cs_bo_validate(void *param, struct 
amdgpu_bo *bo)
        struct ttm_operation_ctx ctx = {
                .interruptible = true,
                .no_wait_gpu = false,
-               .resv = bo->tbo.base.resv
+               .exec = &p->exec,
        };
        uint32_t domain;
        int r;
@@ -834,7 +834,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
                                union drm_amdgpu_cs *cs)
 {
        struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
-       struct ttm_operation_ctx ctx = { true, false };
+       struct ttm_operation_ctx ctx = {
+               .interruptible =true,
+               .exec = &p->exec
+       };
        struct amdgpu_vm *vm = &fpriv->vm;
        struct amdgpu_bo_list_entry *e;
        struct drm_gem_object *obj;
@@ -919,50 +922,56 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser 
*p,
                        if (unlikely(r))
                                goto out_free_user_pages;
                }
-       }
-
-       amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
-               struct mm_struct *usermm;
 
-               usermm = amdgpu_ttm_tt_get_usermm(e->bo->tbo.ttm);
-               if (usermm && usermm != current->mm) {
-                       r = -EPERM;
-                       goto out_free_user_pages;
-               }
+               amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+                       struct mm_struct *usermm;
 
-               if (amdgpu_ttm_tt_is_userptr(e->bo->tbo.ttm) &&
-                   e->user_invalidated && e->user_pages) {
-                       amdgpu_bo_placement_from_domain(e->bo,
-                                                       AMDGPU_GEM_DOMAIN_CPU);
-                       r = ttm_bo_validate(&e->bo->tbo, &e->bo->placement,
-                                           &ctx);
-                       if (r)
+                       usermm = amdgpu_ttm_tt_get_usermm(e->bo->tbo.ttm);
+                       if (usermm && usermm != current->mm) {
+                               r = -EPERM;
                                goto out_free_user_pages;
+                       }
+
+                       if (amdgpu_ttm_tt_is_userptr(e->bo->tbo.ttm) &&
+                           e->user_invalidated && e->user_pages) {
+                               amdgpu_bo_placement_from_domain(e->bo,
+                                                               
AMDGPU_GEM_DOMAIN_CPU);
+                               r = ttm_bo_validate(&e->bo->tbo, 
&e->bo->placement,
+                                                   &ctx);
+                               drm_exec_retry_on_contention(&p->exec);
+                               if (r)
+                                       goto out_free_user_pages;
+
+                               amdgpu_ttm_tt_set_user_pages(e->bo->tbo.ttm,
+                                                            e->user_pages);
+                       }
 
-                       amdgpu_ttm_tt_set_user_pages(e->bo->tbo.ttm,
-                                                    e->user_pages);
+                       kvfree(e->user_pages);
+                       e->user_pages = NULL;
                }
 
-               kvfree(e->user_pages);
-               e->user_pages = NULL;
-       }
+               amdgpu_cs_get_threshold_for_moves(p->adev, 
&p->bytes_moved_threshold,
+                                                 
&p->bytes_moved_vis_threshold);
+               p->bytes_moved = 0;
+               p->bytes_moved_vis = 0;
 
-       amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
-                                         &p->bytes_moved_vis_threshold);
-       p->bytes_moved = 0;
-       p->bytes_moved_vis = 0;
+               r = amdgpu_vm_validate(p->adev, &fpriv->vm, NULL,
+                                      amdgpu_cs_bo_validate, p);
+               drm_exec_retry_on_contention(&p->exec);
+               if (r) {
+                       DRM_ERROR("amdgpu_vm_validate() failed.\n");
+                       goto out_free_user_pages;
+               }
 
-       r = amdgpu_vm_validate(p->adev, &fpriv->vm, NULL,
-                              amdgpu_cs_bo_validate, p);
-       if (r) {
-               DRM_ERROR("amdgpu_vm_validate() failed.\n");
-               goto out_free_user_pages;
-       }
+               drm_exec_for_each_locked_object(&p->exec, index, obj) {
+                       r = amdgpu_cs_bo_validate(p, gem_to_amdgpu_bo(obj));
+                       drm_exec_retry_on_contention(&p->exec);
+                       if (unlikely(r))
+                               goto out_free_user_pages;
+               }
 
-       drm_exec_for_each_locked_object(&p->exec, index, obj) {
-               r = amdgpu_cs_bo_validate(p, gem_to_amdgpu_bo(obj));
-               if (unlikely(r))
-                       goto out_free_user_pages;
+               amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
+                                            p->bytes_moved_vis);
        }
 
        if (p->uf_bo) {
@@ -973,9 +982,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
                p->gang_leader->uf_addr += amdgpu_bo_gpu_offset(p->uf_bo);
        }
 
-       amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
-                                    p->bytes_moved_vis);
-
        for (i = 0; i < p->gang_size; ++i)
                amdgpu_job_set_resources(p->jobs[i], p->bo_list->gds_obj,
                                         p->bo_list->gws_obj,
-- 
2.34.1

Reply via email to