On Mon, Aug 6, 2012 at 12:06 PM, Christian K?nig <deathsimple at vodafone.de> wrote: > On 04.08.2012 11:07, Christian K?nig wrote: >> >> On 03.08.2012 23:26, j.glisse at gmail.com wrote: >>> >>> From: Jerome Glisse <jglisse at redhat.com> >>> >>> Virtual address need to be fenced to know when we can safely remove it. >>> This patch also properly clear the pagetable. Previously it was >>> serouisly broken. >>> >>> Kernel 3.5/3.4 need a similar patch but adapted for difference in mutex >>> locking. >>> >>> v2: For to update pagetable when unbinding bo (don't bailout if >>> bo_va->valid is true). >>> v3: Add kernel 3.5/3.4 comment. >>> >>> Signed-off-by: Jerome Glisse <jglisse at redhat.com> >> >> It should fix the problem, going to test it as soon as I find some 5min >> spare in my vacation. Till then it is: >> >> Reviewed-by: Christian K?nig <christian.koenig at amd.com> >> >> In the future I would really like to make the updating of the PTE also >> async, that should save us allot of problems here. > > > Had time today to test that a bit more. Found two minor notes in the patch, > see below. > > >> >> >>> --- >>> drivers/gpu/drm/radeon/radeon.h | 1 + >>> drivers/gpu/drm/radeon/radeon_cs.c | 32 >>> +++++++++++++++++++++++++++++--- >>> drivers/gpu/drm/radeon/radeon_gart.c | 24 ++++++++++++++++++++++-- >>> drivers/gpu/drm/radeon/radeon_gem.c | 13 ++----------- >>> drivers/gpu/drm/radeon/radeon_object.c | 6 +----- >>> 5 files changed, 55 insertions(+), 21 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/radeon/radeon.h >>> b/drivers/gpu/drm/radeon/radeon.h >>> index 5431af2..8d75c65 100644 >>> --- a/drivers/gpu/drm/radeon/radeon.h >>> +++ b/drivers/gpu/drm/radeon/radeon.h >>> @@ -300,6 +300,7 @@ struct radeon_bo_va { >>> uint64_t soffset; >>> uint64_t eoffset; >>> uint32_t flags; >>> + struct radeon_fence *fence; >>> bool valid; >>> }; >>> diff --git a/drivers/gpu/drm/radeon/radeon_cs.c >>> b/drivers/gpu/drm/radeon/radeon_cs.c >>> index 8a4c49e..995f3ab 100644 >>> --- a/drivers/gpu/drm/radeon/radeon_cs.c >>> +++ b/drivers/gpu/drm/radeon/radeon_cs.c >>> @@ -278,6 +278,30 @@ int radeon_cs_parser_init(struct radeon_cs_parser >>> *p, void *data) >>> return 0; >>> } >>> +static void radeon_bo_vm_fence_va(struct radeon_cs_parser *parser, >>> + struct radeon_fence *fence) >>> +{ >>> + struct radeon_fpriv *fpriv = parser->filp->driver_priv; >>> + struct radeon_vm *vm = &fpriv->vm; >>> + struct radeon_bo_list *lobj; > > >>> + int r; > > "r" is unused here, please remove. > > >>> + >>> + if (parser->chunk_ib_idx == -1) >>> + return; >>> + if ((parser->cs_flags & RADEON_CS_USE_VM) == 0) >>> + return; >>> + >>> + list_for_each_entry(lobj, &parser->validated, tv.head) { >>> + struct radeon_bo_va *bo_va; >>> + struct radeon_bo *rbo = lobj->bo; >>> + >>> + bo_va = radeon_bo_va(rbo, vm); >>> + radeon_fence_unref(&bo_va->fence); >>> + bo_va->fence = radeon_fence_ref(fence); >>> + } > > >>> + return 0; > > The function doesn't return an error value, so this should just be a > "return" without value. > > >>> +} >>> + >>> /** >>> * cs_parser_fini() - clean parser states >>> * @parser: parser structure holding parsing context. >>> @@ -290,11 +314,14 @@ static void radeon_cs_parser_fini(struct >>> radeon_cs_parser *parser, int error) >>> { >>> unsigned i; >>> - if (!error) >>> + if (!error) { >>> + /* fence all bo va before ttm_eu_fence_buffer_objects so bo are >>> still reserved */ >>> + radeon_bo_vm_fence_va(parser, parser->ib.fence); >>> ttm_eu_fence_buffer_objects(&parser->validated, >>> parser->ib.fence); >>> - else >>> + } else { >>> ttm_eu_backoff_reservation(&parser->validated); >>> + } >>> if (parser->relocs != NULL) { >>> for (i = 0; i < parser->nrelocs; i++) { >>> @@ -388,7 +415,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device >>> *rdev, >>> if (parser->chunk_ib_idx == -1) >>> return 0; >>> - >>> if ((parser->cs_flags & RADEON_CS_USE_VM) == 0) >>> return 0; >>> diff --git a/drivers/gpu/drm/radeon/radeon_gart.c >>> b/drivers/gpu/drm/radeon/radeon_gart.c >>> index b372005..9912182 100644 >>> --- a/drivers/gpu/drm/radeon/radeon_gart.c >>> +++ b/drivers/gpu/drm/radeon/radeon_gart.c >>> @@ -814,7 +814,7 @@ int radeon_vm_bo_update_pte(struct radeon_device >>> *rdev, >>> return -EINVAL; >>> } >>> - if (bo_va->valid) >>> + if (bo_va->valid && mem) >>> return 0; >>> ngpu_pages = radeon_bo_ngpu_pages(bo); >>> @@ -859,11 +859,27 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev, >>> struct radeon_bo *bo) >>> { >>> struct radeon_bo_va *bo_va; >>> + int r; >>> bo_va = radeon_bo_va(bo, vm); >>> if (bo_va == NULL) >>> return 0; >>> + /* wait for va use to end */ >>> + while (bo_va->fence) { >>> + r = radeon_fence_wait(bo_va->fence, false); >>> + if (r) { >>> + DRM_ERROR("error while waiting for fence: %d\n", r); >>> + } >>> + if (r == -EDEADLK) { >>> + r = radeon_gpu_reset(rdev); >>> + if (!r) >>> + continue; >>> + } >>> + break; >>> + } >>> + radeon_fence_unref(&bo_va->fence); >>> + >>> mutex_lock(&rdev->vm_manager.lock); >>> mutex_lock(&vm->mutex); >>> radeon_vm_bo_update_pte(rdev, vm, bo, NULL); >>> @@ -952,12 +968,15 @@ void radeon_vm_fini(struct radeon_device *rdev, >>> struct radeon_vm *vm) >>> radeon_vm_unbind_locked(rdev, vm); >>> mutex_unlock(&rdev->vm_manager.lock); >>> - /* remove all bo */ >>> + /* remove all bo at this point non are busy any more because unbind >>> + * waited for the last vm fence to signal >>> + */ >>> r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); >>> if (!r) { >>> bo_va = radeon_bo_va(rdev->ring_tmp_bo.bo, vm); >>> list_del_init(&bo_va->bo_list); >>> list_del_init(&bo_va->vm_list); >>> + radeon_fence_unref(&bo_va->fence); >>> radeon_bo_unreserve(rdev->ring_tmp_bo.bo); >>> kfree(bo_va); >>> } >>> @@ -969,6 +988,7 @@ void radeon_vm_fini(struct radeon_device *rdev, >>> struct radeon_vm *vm) >>> r = radeon_bo_reserve(bo_va->bo, false); >>> if (!r) { >>> list_del_init(&bo_va->bo_list); >>> + radeon_fence_unref(&bo_va->fence); >>> radeon_bo_unreserve(bo_va->bo); >>> kfree(bo_va); >>> } >>> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c >>> b/drivers/gpu/drm/radeon/radeon_gem.c >>> index 84d0452..1b57b00 100644 >>> --- a/drivers/gpu/drm/radeon/radeon_gem.c >>> +++ b/drivers/gpu/drm/radeon/radeon_gem.c >>> @@ -134,25 +134,16 @@ void radeon_gem_object_close(struct drm_gem_object >>> *obj, >>> struct radeon_device *rdev = rbo->rdev; >>> struct radeon_fpriv *fpriv = file_priv->driver_priv; >>> struct radeon_vm *vm = &fpriv->vm; >>> - struct radeon_bo_va *bo_va, *tmp; >>> if (rdev->family < CHIP_CAYMAN) { >>> return; >>> } >>> if (radeon_bo_reserve(rbo, false)) { >>> + dev_err(rdev->dev, "leaking bo va because we fail to reserve >>> bo\n"); >>> return; >>> } >>> - list_for_each_entry_safe(bo_va, tmp, &rbo->va, bo_list) { >>> - if (bo_va->vm == vm) { >>> - /* remove from this vm address space */ >>> - mutex_lock(&vm->mutex); >>> - list_del(&bo_va->vm_list); >>> - mutex_unlock(&vm->mutex); >>> - list_del(&bo_va->bo_list); >>> - kfree(bo_va); >>> - } >>> - } >>> + radeon_vm_bo_rmv(rdev, vm, rbo); >>> radeon_bo_unreserve(rbo); >>> } >>> diff --git a/drivers/gpu/drm/radeon/radeon_object.c >>> b/drivers/gpu/drm/radeon/radeon_object.c >>> index 1f1a4c8..1cb014b 100644 >>> --- a/drivers/gpu/drm/radeon/radeon_object.c >>> +++ b/drivers/gpu/drm/radeon/radeon_object.c >>> @@ -52,11 +52,7 @@ void radeon_bo_clear_va(struct radeon_bo *bo) >>> list_for_each_entry_safe(bo_va, tmp, &bo->va, bo_list) { >>> /* remove from all vm address space */ >>> - mutex_lock(&bo_va->vm->mutex); >>> - list_del(&bo_va->vm_list); >>> - mutex_unlock(&bo_va->vm->mutex); >>> - list_del(&bo_va->bo_list); >>> - kfree(bo_va); >>> + radeon_vm_bo_rmv(bo->rdev, bo_va->vm, bo); >>> } >>> } >> >> > > Additional to that patch we still need a minor fix to mesa (just move > freeing the VM range after closing the handle). Going to send that in the > next minute to the mesa-dev list. > > Otherwise the patch indeed fixes the problem, but also isn't the best idea > for performance. > > Cheers, > Christian. >
This does not impact performance, it's all about destroying bo/va so it's not a performance path. Or am i missing something here ? Cheers, Jerome