On Fri, Jun 11, 2021 at 02:03:01PM +0200, Christian König wrote:
> Drop the workaround and instead implement a better solution.
> 
> Basically we are now chaining all submissions using a dma_fence_chain
> container and adding them as exclusive fence to the dma_resv object.
> 
> This way other drivers can still sync to the single exclusive fence
> while amdgpu only sync to fences from different processes.
> 
> v2: polish kerneldoc a bit

There is no kerneldoc here, and otherwise this patch looks unchanged from
the previous round. Hit send a bit too quickly?
-Daniel

> 
> Signed-off-by: Christian König <christian.koe...@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h |  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 54 +++++++++++++----
>  drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 65 ---------------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c     |  3 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h  |  1 -
>  6 files changed, 47 insertions(+), 79 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> index a130e766cbdb..c905a4cfc173 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> @@ -34,6 +34,7 @@ struct amdgpu_fpriv;
>  struct amdgpu_bo_list_entry {
>       struct ttm_validate_buffer      tv;
>       struct amdgpu_bo_va             *bo_va;
> +     struct dma_fence_chain          *chain;
>       uint32_t                        priority;
>       struct page                     **user_pages;
>       bool                            user_invalidated;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 325e82621467..f6f3029f958d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -587,6 +587,20 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser 
> *p,
>               goto out;
>       }
>  
> +     amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> +             struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> +
> +             e->bo_va = amdgpu_vm_bo_find(vm, bo);
> +
> +             if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
> +                     e->chain = dma_fence_chain_alloc();
> +                     if (!e->chain) {
> +                             r = -ENOMEM;
> +                             goto error_validate;
> +                     }
> +             }
> +     }
> +
>       amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
>                                         &p->bytes_moved_vis_threshold);
>       p->bytes_moved = 0;
> @@ -614,15 +628,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser 
> *p,
>       gws = p->bo_list->gws_obj;
>       oa = p->bo_list->oa_obj;
>  
> -     amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> -             struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> -
> -             /* Make sure we use the exclusive slot for shared BOs */
> -             if (bo->prime_shared_count)
> -                     e->tv.num_shared = 0;
> -             e->bo_va = amdgpu_vm_bo_find(vm, bo);
> -     }
> -
>       if (gds) {
>               p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
>               p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
> @@ -644,8 +649,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser 
> *p,
>       }
>  
>  error_validate:
> -     if (r)
> +     if (r) {
> +             amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> +                     dma_fence_chain_free(e->chain);
> +                     e->chain = NULL;
> +             }
>               ttm_eu_backoff_reservation(&p->ticket, &p->validated);
> +     }
>  out:
>       return r;
>  }
> @@ -685,9 +695,17 @@ static void amdgpu_cs_parser_fini(struct 
> amdgpu_cs_parser *parser, int error,
>  {
>       unsigned i;
>  
> -     if (error && backoff)
> +     if (error && backoff) {
> +             struct amdgpu_bo_list_entry *e;
> +
> +             amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
> +                     dma_fence_chain_free(e->chain);
> +                     e->chain = NULL;
> +             }
> +
>               ttm_eu_backoff_reservation(&parser->ticket,
>                                          &parser->validated);
> +     }
>  
>       for (i = 0; i < parser->num_post_deps; i++) {
>               drm_syncobj_put(parser->post_deps[i].syncobj);
> @@ -1260,6 +1278,20 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>  
>       amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
>  
> +     amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> +             struct dma_resv *resv = e->tv.bo->base.resv;
> +             struct dma_fence_chain *chain = e->chain;
> +
> +             if (!chain)
> +                     continue;
> +
> +             dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
> +                                  dma_fence_get(p->fence), 1);
> +
> +             rcu_assign_pointer(resv->fence_excl, &chain->base);
> +             e->chain = NULL;
> +     }
> +
>       ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>       mutex_unlock(&p->adev->notifier_lock);
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> index c3053b83b80c..23219fc3b28c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> @@ -42,48 +42,6 @@
>  #include <linux/pci-p2pdma.h>
>  #include <linux/pm_runtime.h>
>  
> -static int
> -__dma_resv_make_exclusive(struct dma_resv *obj)
> -{
> -     struct dma_fence **fences;
> -     unsigned int count;
> -     int r;
> -
> -     if (!dma_resv_shared_list(obj)) /* no shared fences to convert */
> -             return 0;
> -
> -     r = dma_resv_get_fences(obj, NULL, &count, &fences);
> -     if (r)
> -             return r;
> -
> -     if (count == 0) {
> -             /* Now that was unexpected. */
> -     } else if (count == 1) {
> -             dma_resv_add_excl_fence(obj, fences[0]);
> -             dma_fence_put(fences[0]);
> -             kfree(fences);
> -     } else {
> -             struct dma_fence_array *array;
> -
> -             array = dma_fence_array_create(count, fences,
> -                                            dma_fence_context_alloc(1), 0,
> -                                            false);
> -             if (!array)
> -                     goto err_fences_put;
> -
> -             dma_resv_add_excl_fence(obj, &array->base);
> -             dma_fence_put(&array->base);
> -     }
> -
> -     return 0;
> -
> -err_fences_put:
> -     while (count--)
> -             dma_fence_put(fences[count]);
> -     kfree(fences);
> -     return -ENOMEM;
> -}
> -
>  /**
>   * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
>   *
> @@ -110,24 +68,6 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
>       if (r < 0)
>               goto out;
>  
> -     r = amdgpu_bo_reserve(bo, false);
> -     if (unlikely(r != 0))
> -             goto out;
> -
> -     /*
> -      * We only create shared fences for internal use, but importers
> -      * of the dmabuf rely on exclusive fences for implicitly
> -      * tracking write hazards. As any of the current fences may
> -      * correspond to a write, we need to convert all existing
> -      * fences on the reservation object into a single exclusive
> -      * fence.
> -      */
> -     r = __dma_resv_make_exclusive(bo->tbo.base.resv);
> -     if (r)
> -             goto out;
> -
> -     bo->prime_shared_count++;
> -     amdgpu_bo_unreserve(bo);
>       return 0;
>  
>  out:
> @@ -150,9 +90,6 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
>       struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
>       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>  
> -     if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
> -             bo->prime_shared_count--;
> -
>       pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
>       pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>  }
> @@ -406,8 +343,6 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct 
> dma_buf *dma_buf)
>       bo = gem_to_amdgpu_bo(gobj);
>       bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
>       bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
> -     if (dma_buf->ops != &amdgpu_dmabuf_ops)
> -             bo->prime_shared_count = 1;
>  
>       dma_resv_unlock(resv);
>       return gobj;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 1c3e3b608332..5d82120fc160 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -829,7 +829,8 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void 
> *data,
>               break;
>       }
>       case AMDGPU_GEM_OP_SET_PLACEMENT:
> -             if (robj->prime_shared_count && (args->value & 
> AMDGPU_GEM_DOMAIN_VRAM)) {
> +             if (robj->tbo.base.import_attach &&
> +                 args->value & AMDGPU_GEM_DOMAIN_VRAM) {
>                       r = -EINVAL;
>                       amdgpu_bo_unreserve(robj);
>                       break;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 96447e1d4c9c..30951b593809 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -871,7 +871,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 
> domain,
>               return -EINVAL;
>  
>       /* A shared bo cannot be migrated to VRAM */
> -     if (bo->prime_shared_count || bo->tbo.base.import_attach) {
> +     if (bo->tbo.base.import_attach) {
>               if (domain & AMDGPU_GEM_DOMAIN_GTT)
>                       domain = AMDGPU_GEM_DOMAIN_GTT;
>               else
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index b35962702278..55d7e90eaa72 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -98,7 +98,6 @@ struct amdgpu_bo {
>       struct ttm_buffer_object        tbo;
>       struct ttm_bo_kmap_obj          kmap;
>       u64                             flags;
> -     unsigned                        prime_shared_count;
>       /* per VM structure for page tables and with virtual addresses */
>       struct amdgpu_vm_bo_base        *vm_bo;
>       /* Constant after initialization */
> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to