[PATCH] drm/radeon: fence virtual address and free it once idle v3

Jerome Glisse Mon, 6 Aug 2012 12:30:41 -0400

On Mon, Aug 6, 2012 at 12:06 PM, Christian K?nig
<deathsimple at vodafone.de> wrote:
> On 04.08.2012 11:07, Christian K?nig wrote:
>>
>> On 03.08.2012 23:26, j.glisse at gmail.com wrote:
>>>
>>> From: Jerome Glisse <jglisse at redhat.com>
>>>
>>> Virtual address need to be fenced to know when we can safely remove it.
>>> This patch also properly clear the pagetable. Previously it was
>>> serouisly broken.
>>>
>>> Kernel 3.5/3.4 need a similar patch but adapted for difference in mutex
>>> locking.
>>>
>>> v2: For to update pagetable when unbinding bo (don't bailout if
>>>      bo_va->valid is true).
>>> v3: Add kernel 3.5/3.4 comment.
>>>
>>> Signed-off-by: Jerome Glisse <jglisse at redhat.com>
>>
>> It should fix the problem, going to test it as soon as I find some 5min
>> spare in my vacation. Till then it is:
>>
>> Reviewed-by: Christian K?nig <christian.koenig at amd.com>
>>
>> In the future I would really like to make the updating of the PTE also
>> async, that should save us allot of problems here.
>
>
> Had time today to test that a bit more. Found two minor notes in the patch,
> see below.
>
>
>>
>>
>>> ---
>>>   drivers/gpu/drm/radeon/radeon.h        |    1 +
>>>   drivers/gpu/drm/radeon/radeon_cs.c     |   32
>>> +++++++++++++++++++++++++++++---
>>>   drivers/gpu/drm/radeon/radeon_gart.c   |   24 ++++++++++++++++++++++--
>>>   drivers/gpu/drm/radeon/radeon_gem.c    |   13 ++-----------
>>>   drivers/gpu/drm/radeon/radeon_object.c |    6 +-----
>>>   5 files changed, 55 insertions(+), 21 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/radeon/radeon.h
>>> b/drivers/gpu/drm/radeon/radeon.h
>>> index 5431af2..8d75c65 100644
>>> --- a/drivers/gpu/drm/radeon/radeon.h
>>> +++ b/drivers/gpu/drm/radeon/radeon.h
>>> @@ -300,6 +300,7 @@ struct radeon_bo_va {
>>>       uint64_t            soffset;
>>>       uint64_t            eoffset;
>>>       uint32_t            flags;
>>> +    struct radeon_fence        *fence;
>>>       bool                valid;
>>>   };
>>>   diff --git a/drivers/gpu/drm/radeon/radeon_cs.c
>>> b/drivers/gpu/drm/radeon/radeon_cs.c
>>> index 8a4c49e..995f3ab 100644
>>> --- a/drivers/gpu/drm/radeon/radeon_cs.c
>>> +++ b/drivers/gpu/drm/radeon/radeon_cs.c
>>> @@ -278,6 +278,30 @@ int radeon_cs_parser_init(struct radeon_cs_parser
>>> *p, void *data)
>>>       return 0;
>>>   }
>>>   +static void radeon_bo_vm_fence_va(struct radeon_cs_parser *parser,
>>> +                  struct radeon_fence *fence)
>>> +{
>>> +    struct radeon_fpriv *fpriv = parser->filp->driver_priv;
>>> +    struct radeon_vm *vm = &fpriv->vm;
>>> +    struct radeon_bo_list *lobj;
>
>
>>> +    int r;
>
> "r" is unused here, please remove.
>
>
>>> +
>>> +    if (parser->chunk_ib_idx == -1)
>>> +        return;
>>> +    if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
>>> +        return;
>>> +
>>> +    list_for_each_entry(lobj, &parser->validated, tv.head) {
>>> +        struct radeon_bo_va *bo_va;
>>> +        struct radeon_bo *rbo = lobj->bo;
>>> +
>>> +        bo_va = radeon_bo_va(rbo, vm);
>>> +        radeon_fence_unref(&bo_va->fence);
>>> +        bo_va->fence = radeon_fence_ref(fence);
>>> +    }
>
>
>>> +    return 0;
>
> The function doesn't return an error value, so this should just be a
> "return" without value.
>
>
>>> +}
>>> +
>>>   /**
>>>    * cs_parser_fini() - clean parser states
>>>    * @parser:    parser structure holding parsing context.
>>> @@ -290,11 +314,14 @@ static void radeon_cs_parser_fini(struct
>>> radeon_cs_parser *parser, int error)
>>>   {
>>>       unsigned i;
>>>   -    if (!error)
>>> +    if (!error) {
>>> +        /* fence all bo va before ttm_eu_fence_buffer_objects so bo are
>>> still reserved */
>>> +        radeon_bo_vm_fence_va(parser, parser->ib.fence);
>>>           ttm_eu_fence_buffer_objects(&parser->validated,
>>>                           parser->ib.fence);
>>> -    else
>>> +    } else {
>>>           ttm_eu_backoff_reservation(&parser->validated);
>>> +    }
>>>         if (parser->relocs != NULL) {
>>>           for (i = 0; i < parser->nrelocs; i++) {
>>> @@ -388,7 +415,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device
>>> *rdev,
>>>         if (parser->chunk_ib_idx == -1)
>>>           return 0;
>>> -
>>>       if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
>>>           return 0;
>>>   diff --git a/drivers/gpu/drm/radeon/radeon_gart.c
>>> b/drivers/gpu/drm/radeon/radeon_gart.c
>>> index b372005..9912182 100644
>>> --- a/drivers/gpu/drm/radeon/radeon_gart.c
>>> +++ b/drivers/gpu/drm/radeon/radeon_gart.c
>>> @@ -814,7 +814,7 @@ int radeon_vm_bo_update_pte(struct radeon_device
>>> *rdev,
>>>           return -EINVAL;
>>>       }
>>>   -    if (bo_va->valid)
>>> +    if (bo_va->valid && mem)
>>>           return 0;
>>>         ngpu_pages = radeon_bo_ngpu_pages(bo);
>>> @@ -859,11 +859,27 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev,
>>>                struct radeon_bo *bo)
>>>   {
>>>       struct radeon_bo_va *bo_va;
>>> +    int r;
>>>         bo_va = radeon_bo_va(bo, vm);
>>>       if (bo_va == NULL)
>>>           return 0;
>>>   +    /* wait for va use to end */
>>> +    while (bo_va->fence) {
>>> +        r = radeon_fence_wait(bo_va->fence, false);
>>> +        if (r) {
>>> +            DRM_ERROR("error while waiting for fence: %d\n", r);
>>> +        }
>>> +        if (r == -EDEADLK) {
>>> +            r = radeon_gpu_reset(rdev);
>>> +            if (!r)
>>> +                continue;
>>> +        }
>>> +        break;
>>> +    }
>>> +    radeon_fence_unref(&bo_va->fence);
>>> +
>>>       mutex_lock(&rdev->vm_manager.lock);
>>>       mutex_lock(&vm->mutex);
>>>       radeon_vm_bo_update_pte(rdev, vm, bo, NULL);
>>> @@ -952,12 +968,15 @@ void radeon_vm_fini(struct radeon_device *rdev,
>>> struct radeon_vm *vm)
>>>       radeon_vm_unbind_locked(rdev, vm);
>>>       mutex_unlock(&rdev->vm_manager.lock);
>>>   -    /* remove all bo */
>>> +    /* remove all bo at this point non are busy any more because unbind
>>> +     * waited for the last vm fence to signal
>>> +     */
>>>       r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false);
>>>       if (!r) {
>>>           bo_va = radeon_bo_va(rdev->ring_tmp_bo.bo, vm);
>>>           list_del_init(&bo_va->bo_list);
>>>           list_del_init(&bo_va->vm_list);
>>> +        radeon_fence_unref(&bo_va->fence);
>>>           radeon_bo_unreserve(rdev->ring_tmp_bo.bo);
>>>           kfree(bo_va);
>>>       }
>>> @@ -969,6 +988,7 @@ void radeon_vm_fini(struct radeon_device *rdev,
>>> struct radeon_vm *vm)
>>>           r = radeon_bo_reserve(bo_va->bo, false);
>>>           if (!r) {
>>>               list_del_init(&bo_va->bo_list);
>>> +            radeon_fence_unref(&bo_va->fence);
>>>               radeon_bo_unreserve(bo_va->bo);
>>>               kfree(bo_va);
>>>           }
>>> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c
>>> b/drivers/gpu/drm/radeon/radeon_gem.c
>>> index 84d0452..1b57b00 100644
>>> --- a/drivers/gpu/drm/radeon/radeon_gem.c
>>> +++ b/drivers/gpu/drm/radeon/radeon_gem.c
>>> @@ -134,25 +134,16 @@ void radeon_gem_object_close(struct drm_gem_object
>>> *obj,
>>>       struct radeon_device *rdev = rbo->rdev;
>>>       struct radeon_fpriv *fpriv = file_priv->driver_priv;
>>>       struct radeon_vm *vm = &fpriv->vm;
>>> -    struct radeon_bo_va *bo_va, *tmp;
>>>         if (rdev->family < CHIP_CAYMAN) {
>>>           return;
>>>       }
>>>         if (radeon_bo_reserve(rbo, false)) {
>>> +        dev_err(rdev->dev, "leaking bo va because we fail to reserve
>>> bo\n");
>>>           return;
>>>       }
>>> -    list_for_each_entry_safe(bo_va, tmp, &rbo->va, bo_list) {
>>> -        if (bo_va->vm == vm) {
>>> -            /* remove from this vm address space */
>>> -            mutex_lock(&vm->mutex);
>>> -            list_del(&bo_va->vm_list);
>>> -            mutex_unlock(&vm->mutex);
>>> -            list_del(&bo_va->bo_list);
>>> -            kfree(bo_va);
>>> -        }
>>> -    }
>>> +    radeon_vm_bo_rmv(rdev, vm, rbo);
>>>       radeon_bo_unreserve(rbo);
>>>   }
>>>   diff --git a/drivers/gpu/drm/radeon/radeon_object.c
>>> b/drivers/gpu/drm/radeon/radeon_object.c
>>> index 1f1a4c8..1cb014b 100644
>>> --- a/drivers/gpu/drm/radeon/radeon_object.c
>>> +++ b/drivers/gpu/drm/radeon/radeon_object.c
>>> @@ -52,11 +52,7 @@ void radeon_bo_clear_va(struct radeon_bo *bo)
>>>         list_for_each_entry_safe(bo_va, tmp, &bo->va, bo_list) {
>>>           /* remove from all vm address space */
>>> -        mutex_lock(&bo_va->vm->mutex);
>>> -        list_del(&bo_va->vm_list);
>>> -        mutex_unlock(&bo_va->vm->mutex);
>>> -        list_del(&bo_va->bo_list);
>>> -        kfree(bo_va);
>>> +        radeon_vm_bo_rmv(bo->rdev, bo_va->vm, bo);
>>>       }
>>>   }
>>
>>
>
> Additional to that patch we still need a minor fix to mesa (just move
> freeing the VM range after closing the handle). Going to send that in the
> next minute to the mesa-dev list.
>
> Otherwise the patch indeed fixes the problem, but also isn't the best idea
> for performance.
>
> Cheers,
> Christian.
>


This does not impact performance, it's all about destroying bo/va so
it's not a performance path. Or am i missing something here ?

Cheers,
Jerome

[PATCH] drm/radeon: fence virtual address and free it once idle v3

Reply via email to