On Tue, Oct 31, 2023 at 3:08 PM Christian König <christian.koe...@amd.com> wrote:
> Am 31.10.23 um 14:59 schrieb Bas Nieuwenhuizen: > > > > On Tue, Oct 31, 2023 at 2:57 PM Christian König <christian.koe...@amd.com> > wrote: > >> Am 31.10.23 um 14:40 schrieb Tatsuyuki Ishi: >> > The current amdgpu_gem_va_update_vm only tries to perform updates for >> the >> > BO specified in the GEM ioctl; however, when a binding is split, the >> > adjacent bindings also need to be updated. Such updates currently ends >> up >> > getting deferred until next submission which causes stalls. >> >> Yeah, that is a necessity. The hardware simply doesn't support what you >> try to do here in all cases. >> > > What can the hardware not do here? Is this just needing to wait for TLB > flushes before we can free pagetables, can we just delay that? > > > On some hardware generations (especially Navi1x, but also everything older > than Polaris) you can't invalidate the TLB while it is in use. > > For Polaris and older it just means that you don't have a guarantee that > the shader can't access the memory any more. So delaying the free operation > helps here. > > But for Navi1x it's a workaround for a hardware bug. If you try to > invalidate the TLB while it is in use you can potentially triggering memory > accesses to random addresses. > > That's why we still delay TLB invalidation's to the next CS and use a new > VMID for each submission instead of invalidating the old one. > > I'm currently working on changing that for Navi2x and newer (maybe Vega as > well), but this is something you can really only do on some hw generations > after validating that it works. > I think as long as we make sure all significant work gets done asynchronously, doing the TLB flushing on the next submit (/submissions, one per queue?) is fine for our purposes. (As an aside after thinking some more I *think* we also need some work to make these maps/unmaps (VALID->PRT and PRT->VALID) atomic, as I think it is valid Vulkan to make these race. As such I'm speculating we'd need a bit more reworking there too, not just a late free of the lower level pagetables) - Bas > > Regards, > Christian. > > > >> >> So this approach won't work in general. >> >> Regards, >> Christian. >> >> > >> > Introduce a new state "dirty", shared between per-VM BOs and traditional >> > BOs, containing all BOs that have pending updates in `invalids`. >> > amdgpu_gem_va_update_vm will now simply flush any pending updates for >> BOs >> > in the dirty state. >> > >> > Signed-off-by: Tatsuyuki Ishi <ishitatsuy...@gmail.com> >> > --- >> > drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 18 ++++--- >> > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 66 ++++++++++++++++++------- >> > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 ++ >> > 3 files changed, 63 insertions(+), 24 deletions(-) >> > >> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >> > index a1b15d0d6c48..01d3a97248b0 100644 >> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >> > @@ -604,10 +604,9 @@ int amdgpu_gem_metadata_ioctl(struct drm_device >> *dev, void *data, >> > * vital here, so they are not reported back to userspace. >> > */ >> > static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, >> > - struct amdgpu_vm *vm, >> > - struct amdgpu_bo_va *bo_va, >> > - uint32_t operation) >> > + struct amdgpu_vm *vm) >> > { >> > + struct amdgpu_bo_va *bo_va; >> > int r; >> > >> > if (!amdgpu_vm_ready(vm)) >> > @@ -617,12 +616,18 @@ static void amdgpu_gem_va_update_vm(struct >> amdgpu_device *adev, >> > if (r) >> > goto error; >> > >> > - if (operation == AMDGPU_VA_OP_MAP || >> > - operation == AMDGPU_VA_OP_REPLACE) { >> > + spin_lock(&vm->status_lock); >> > + while (!list_empty(&vm->dirty)) { >> > + bo_va = list_first_entry(&vm->dirty, struct amdgpu_bo_va, >> > + base.vm_status); >> > + spin_unlock(&vm->status_lock); >> > + >> > r = amdgpu_vm_bo_update(adev, bo_va, false); >> > if (r) >> > goto error; >> > + spin_lock(&vm->status_lock); >> > } >> > + spin_unlock(&vm->status_lock); >> > >> > r = amdgpu_vm_update_pdes(adev, vm, false); >> > >> > @@ -792,8 +797,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, >> void *data, >> > break; >> > } >> > if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && >> !amdgpu_vm_debug) >> > - amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, >> > - args->operation); >> > + amdgpu_gem_va_update_vm(adev, &fpriv->vm); >> > >> > error: >> > drm_exec_fini(&exec); >> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> > index dd6f72e2a1d6..01d31891cd05 100644 >> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> > @@ -191,6 +191,21 @@ static void amdgpu_vm_bo_set_evicted(struct >> amdgpu_vm_bo_base *vm_bo, bool evict >> > spin_unlock(&vm_bo->vm->status_lock); >> > } >> > >> > +/** >> > + * amdgpu_vm_bo_dirty - vm_bo is dirty >> > + * >> > + * @vm_bo: vm_bo which is dirty >> > + * >> > + * State for normal and per VM BOs that are not moved, but have new >> entries in >> > + * bo_va->invalids. >> > + */ >> > +static void amdgpu_vm_bo_dirty(struct amdgpu_vm_bo_base *vm_bo) >> > +{ >> > + spin_lock(&vm_bo->vm->status_lock); >> > + list_move(&vm_bo->vm_status, &vm_bo->vm->dirty); >> > + spin_unlock(&vm_bo->vm->status_lock); >> > +} >> > + >> > /** >> > * amdgpu_vm_bo_moved - vm_bo is moved >> > * >> > @@ -1042,6 +1057,9 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, >> > list_for_each_entry_safe(bo_va, tmp, &vm->evicted, >> base.eviction_status) >> > amdgpu_vm_bo_get_memory(bo_va, stats); >> > >> > + list_for_each_entry_safe(bo_va, tmp, &vm->dirty, base.vm_status) >> > + amdgpu_vm_bo_get_memory(bo_va, stats); >> > + >> > list_for_each_entry_safe(bo_va, tmp, &vm->relocated, >> base.vm_status) >> > amdgpu_vm_bo_get_memory(bo_va, stats); >> > >> > @@ -1411,6 +1429,17 @@ int amdgpu_vm_handle_moved(struct amdgpu_device >> *adev, >> > dma_resv_unlock(resv); >> > spin_lock(&vm->status_lock); >> > } >> > + >> > + while (!list_empty(&vm->dirty)) { >> > + bo_va = list_first_entry(&vm->dirty, struct amdgpu_bo_va, >> > + base.vm_status); >> > + spin_unlock(&vm->status_lock); >> > + >> > + r = amdgpu_vm_bo_update(adev, bo_va, false); >> > + if (r) >> > + return r; >> > + spin_lock(&vm->status_lock); >> > + } >> > spin_unlock(&vm->status_lock); >> > >> > return 0; >> > @@ -1476,19 +1505,16 @@ static void amdgpu_vm_bo_insert_map(struct >> amdgpu_device *adev, >> > struct amdgpu_bo_va_mapping *mapping) >> > { >> > struct amdgpu_vm *vm = bo_va->base.vm; >> > - struct amdgpu_bo *bo = bo_va->base.bo; >> > >> > mapping->bo_va = bo_va; >> > list_add(&mapping->list, &bo_va->invalids); >> > amdgpu_vm_it_insert(mapping, &vm->va); >> > + if (!bo_va->base.moved) >> > + amdgpu_vm_bo_dirty(&bo_va->base); >> > >> > if (mapping->flags & AMDGPU_PTE_PRT) >> > amdgpu_vm_prt_get(adev); >> > >> > - if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv && >> > - !bo_va->base.moved) { >> > - amdgpu_vm_bo_moved(&bo_va->base); >> > - } >> > trace_amdgpu_vm_bo_map(bo_va, mapping); >> > } >> > >> > @@ -1725,6 +1751,8 @@ int amdgpu_vm_bo_clear_mappings(struct >> amdgpu_device *adev, >> > before->flags = tmp->flags; >> > before->bo_va = tmp->bo_va; >> > list_add(&before->list, &tmp->bo_va->invalids); >> > + if (!tmp->bo_va->base.moved) >> > + amdgpu_vm_bo_dirty(&tmp->bo_va->base); >> > } >> > >> > /* Remember mapping split at the end */ >> > @@ -1736,6 +1764,8 @@ int amdgpu_vm_bo_clear_mappings(struct >> amdgpu_device *adev, >> > after->flags = tmp->flags; >> > after->bo_va = tmp->bo_va; >> > list_add(&after->list, &tmp->bo_va->invalids); >> > + if (!tmp->bo_va->base.moved) >> > + amdgpu_vm_bo_dirty(&tmp->bo_va->base); >> > } >> > >> > list_del(&tmp->list); >> > @@ -1761,30 +1791,18 @@ int amdgpu_vm_bo_clear_mappings(struct >> amdgpu_device *adev, >> > >> > /* Insert partial mapping before the range */ >> > if (!list_empty(&before->list)) { >> > - struct amdgpu_bo *bo = before->bo_va->base.bo; >> > - >> > amdgpu_vm_it_insert(before, &vm->va); >> > if (before->flags & AMDGPU_PTE_PRT) >> > amdgpu_vm_prt_get(adev); >> > - >> > - if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv >> && >> > - !before->bo_va->base.moved) >> > - amdgpu_vm_bo_moved(&before->bo_va->base); >> > } else { >> > kfree(before); >> > } >> > >> > /* Insert partial mapping after the range */ >> > if (!list_empty(&after->list)) { >> > - struct amdgpu_bo *bo = after->bo_va->base.bo; >> > - >> > amdgpu_vm_it_insert(after, &vm->va); >> > if (after->flags & AMDGPU_PTE_PRT) >> > amdgpu_vm_prt_get(adev); >> > - >> > - if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv >> && >> > - !after->bo_va->base.moved) >> > - amdgpu_vm_bo_moved(&after->bo_va->base); >> > } else { >> > kfree(after); >> > } >> > @@ -2136,6 +2154,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, >> struct amdgpu_vm *vm, int32_t xcp >> > INIT_LIST_HEAD(&vm->evicted); >> > INIT_LIST_HEAD(&vm->relocated); >> > INIT_LIST_HEAD(&vm->moved); >> > + INIT_LIST_HEAD(&vm->dirty); >> > INIT_LIST_HEAD(&vm->idle); >> > INIT_LIST_HEAD(&vm->invalidated); >> > spin_lock_init(&vm->status_lock); >> > @@ -2648,11 +2667,13 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm >> *vm, struct seq_file *m) >> > { >> > struct amdgpu_bo_va *bo_va, *tmp; >> > u64 total_idle = 0; >> > + u64 total_dirty = 0; >> > u64 total_relocated = 0; >> > u64 total_moved = 0; >> > u64 total_invalidated = 0; >> > u64 total_done = 0; >> > unsigned int total_idle_objs = 0; >> > + unsigned int total_dirty_objs = 0; >> > unsigned int total_relocated_objs = 0; >> > unsigned int total_moved_objs = 0; >> > unsigned int total_invalidated_objs = 0; >> > @@ -2669,6 +2690,15 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm >> *vm, struct seq_file *m) >> > total_idle_objs = id; >> > id = 0; >> > >> > + seq_puts(m, "\tDirty BOs:\n"); >> > + list_for_each_entry_safe(bo_va, tmp, &vm->dirty, base.vm_status) { >> > + if (!bo_va->base.bo) >> > + continue; >> > + total_dirty += amdgpu_bo_print_info(id++, bo_va->base.bo, >> m); >> > + } >> > + total_dirty_objs = id; >> > + id = 0; >> > + >> > seq_puts(m, "\tRelocated BOs:\n"); >> > list_for_each_entry_safe(bo_va, tmp, &vm->relocated, >> base.vm_status) { >> > if (!bo_va->base.bo) >> > @@ -2707,6 +2737,8 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm >> *vm, struct seq_file *m) >> > >> > seq_printf(m, "\tTotal idle size: %12lld\tobjs:\t%d\n", >> total_idle, >> > total_idle_objs); >> > + seq_printf(m, "\tTotal dirty size: %12lld\tobjs:\t%d\n", >> total_dirty, >> > + total_dirty_objs); >> > seq_printf(m, "\tTotal relocated size: %12lld\tobjs:\t%d\n", >> total_relocated, >> > total_relocated_objs); >> > seq_printf(m, "\tTotal moved size: %12lld\tobjs:\t%d\n", >> total_moved, >> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >> > index d9ab97eabda9..f91d4fcf80b8 100644 >> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >> > @@ -276,6 +276,9 @@ struct amdgpu_vm { >> > /* per VM BOs moved, but not yet updated in the PT */ >> > struct list_head moved; >> > >> > + /* normal and per VM BOs that are not moved, but have new PT >> entries */ >> > + struct list_head dirty; >> > + >> > /* All BOs of this VM not currently in the state machine */ >> > struct list_head idle; >> > >> >> >