Looks aborting in such case is the safe way, otherwise the fence_wait() outside 
will still fail 

-----Original Message-----
From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] 
Sent: 2018年3月1日 18:50
To: Liu, Monk <monk....@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 4/4] drm/amdgpu: use separate status for buffer funcs 
availability

Am 01.03.2018 um 11:33 schrieb Liu, Monk:
>> int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma) @@ 
>> -1684,6 +1680,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, 
>> uint64_t src_offset,
>       amdgpu_ring_pad_ib(ring, &job->ibs[0]);
>       WARN_ON(job->ibs[0].length_dw > num_dw);
>       if (direct_submit) {
> +             WARN_ON(!ring->ready);
>               r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs,
>                                      NULL, fence);
>               job->fence = dma_fence_get(*fence);
>
> [ML] in direct_submit case if ring->ready is false why we continue and only 
> give a warning on that ? shouldn't we just abort or use scheduler way ??

When we use direct submission the scheduler is turned off. So we could return 
an error, but using the scheduler probably results in a deadlock.

Christian.

>
>
> /Monk
>
>
> -----Original Message-----
> From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com]
> Sent: 2018年3月1日 18:23
> To: amd-gfx@lists.freedesktop.org
> Cc: Liu, Monk <monk....@amd.com>
> Subject: [PATCH 4/4] drm/amdgpu: use separate status for buffer funcs 
> availability
>
> The ring status can change during GPU reset, but we still need to be able to 
> schedule TTM buffer moves in the meantime.
>
> Otherwise we can ran into problems because of aborted move/fill operations 
> during GPU resets.
>
> Signed-off-by: Christian König <christian.koe...@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 17 +++++++----------  
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  1 +
>   2 files changed, 8 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 2aa6823ef503..53b34b3b8232 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -213,9 +213,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object 
> *bo,
>       abo = ttm_to_amdgpu_bo(bo);
>       switch (bo->mem.mem_type) {
>       case TTM_PL_VRAM:
> -             if (adev->mman.buffer_funcs &&
> -                 adev->mman.buffer_funcs_ring &&
> -                 adev->mman.buffer_funcs_ring->ready == false) {
> +             if (!adev->mman.buffer_funcs_enabled) {
>                       amdgpu_ttm_placement_from_domain(abo, 
> AMDGPU_GEM_DOMAIN_CPU);
>               } else if (adev->gmc.visible_vram_size < 
> adev->gmc.real_vram_size &&
>                          !(abo->flags & 
> AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { @@ -331,7 +329,7 @@ int 
> amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
>       const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
>                                       AMDGPU_GPU_PAGE_SIZE);
>   
> -     if (!ring->ready) {
> +     if (!adev->mman.buffer_funcs_enabled) {
>               DRM_ERROR("Trying to move memory with ring turned off.\n");
>               return -EINVAL;
>       }
> @@ -577,12 +575,9 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, 
> bool evict,
>               amdgpu_move_null(bo, new_mem);
>               return 0;
>       }
> -     if (adev->mman.buffer_funcs == NULL ||
> -         adev->mman.buffer_funcs_ring == NULL ||
> -         !adev->mman.buffer_funcs_ring->ready) {
> -             /* use memcpy */
> +
> +     if (!adev->mman.buffer_funcs_enabled)
>               goto memcpy;
> -     }
>   
>       if (old_mem->mem_type == TTM_PL_VRAM &&
>           new_mem->mem_type == TTM_PL_SYSTEM) { @@ -1549,6 +1544,7 @@ void 
> amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
>       else
>               size = adev->gmc.visible_vram_size;
>       man->size = size >> PAGE_SHIFT;
> +     adev->mman.buffer_funcs_enabled = enable;
>   }
>   
>   int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma) @@ -1684,6 
> +1680,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t 
> src_offset,
>       amdgpu_ring_pad_ib(ring, &job->ibs[0]);
>       WARN_ON(job->ibs[0].length_dw > num_dw);
>       if (direct_submit) {
> +             WARN_ON(!ring->ready);
>               r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs,
>                                      NULL, fence);
>               job->fence = dma_fence_get(*fence); @@ -1720,7 +1717,7 @@ int 
> amdgpu_fill_buffer(struct amdgpu_bo *bo,
>       struct amdgpu_job *job;
>       int r;
>   
> -     if (!ring->ready) {
> +     if (!adev->mman.buffer_funcs_enabled) {
>               DRM_ERROR("Trying to clear memory with ring turned off.\n");
>               return -EINVAL;
>       }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index b8117c6e51f1..6ea7de863041 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -53,6 +53,7 @@ struct amdgpu_mman {
>       /* buffer handling */
>       const struct amdgpu_buffer_funcs        *buffer_funcs;
>       struct amdgpu_ring                      *buffer_funcs_ring;
> +     bool                                    buffer_funcs_enabled;
>   
>       struct mutex                            gtt_window_lock;
>       /* Scheduler entity for buffer moves */
> --
> 2.14.1
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to