Thanks for this. A few comments and a question inline.

On 2018-08-31 09:27 AM, Christian König wrote:
> Since we have a lot of FAQ on the VM state machine try to improve the
> documentation by adding functions for each state move.
>
> Signed-off-by: Christian König <christian.koe...@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 107 
> ++++++++++++++++++++++++---------
>  1 file changed, 79 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index a9275a99d793..40c22635fefd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -204,6 +204,69 @@ static unsigned amdgpu_vm_bo_size(struct amdgpu_device 
> *adev, unsigned level)
>       return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8);
>  }
>  
> +/**
> + * amdgpu_vm_bo_evicted - vm_bo is evicted
> + *
> + * @vm_bo: vm_bo which is evicted
> + *
> + * State for PDs/PTs and per VM BOs which are not at the location they should
> + * be.
> + */
> +static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
> +{
> +     struct amdgpu_vm *vm = vm_bo->vm;
> +     struct amdgpu_bo *bo = vm_bo->bo;
> +
> +     vm_bo->moved = true;
> +     if (bo->tbo.type == ttm_bo_type_kernel)
> +             list_move(&vm_bo->vm_status, &vm->evicted);
> +     else
> +             list_move_tail(&vm_bo->vm_status, &vm->evicted);
> +}
> +
> +/**
> + * amdgpu_vm_bo_relocated - vm_bo is reloacted
> + *
> + * @vm_bo: vm_bo which is relocated
> + *
> + * State for PDs/PTs which needs to update their parent PD.
> + */
> +static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
> +{
> +     list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
> +}
> +
> +/**
> + * amdgpu_vm_bo_moved - vm_bo is moved
> + *
> + * @vm_bo: vm_bo which is moved
> + *
> + * State for per VM and normal BOs which are moved, but that change is not 
> yet
> + * reflected in the page tables.

I have a question here. Why does amdgpu_cs_vm_handling call
amdgpu_vm_bo_update manually for its BO list entries? Wouldn't it be
enough to just call amdgpu_vm_handle_moved?

> + */
> +static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
> +{
> +     struct amdgpu_vm *vm = vm_bo->vm;
> +
> +     spin_lock(&vm->moved_lock);
> +     list_move(&vm_bo->vm_status, &vm->moved);
> +     spin_unlock(&vm->moved_lock);

If vm->moved_lock protects the moved list, do we also need to take it
whenever something is moved from that list? That could potentially be
any list_move operation that uses vm_bo->vm_status. I found one case
below where that may not be handled correctly.

> +}
> +
> +/**
> + * amdgpu_vm_bo_idle - vm_bo is idle
> + *
> + * @vm_bo: vm_bo which is now idle
> + *
> + * State for PDs/PTs and per VM BOs which have gone through the state machine
> + * and are now idle.
> + */
> +static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
> +{
> +     list_move(&vm_bo->vm_status, &vm_bo->vm->idle);
> +     vm_bo->moved = false;
> +}
> +
>  /**
>   * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
>   *
> @@ -232,9 +295,9 @@ static void amdgpu_vm_bo_base_init(struct 
> amdgpu_vm_bo_base *base,
>  
>       vm->bulk_moveable = false;
>       if (bo->tbo.type == ttm_bo_type_kernel)
> -             list_move(&base->vm_status, &vm->relocated);
> +             amdgpu_vm_bo_relocated(base);
>       else
> -             list_move(&base->vm_status, &vm->idle);
> +             amdgpu_vm_bo_idle(base);
>  
>       if (bo->preferred_domains &
>           amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
> @@ -245,8 +308,7 @@ static void amdgpu_vm_bo_base_init(struct 
> amdgpu_vm_bo_base *base,
>        * is currently evicted. add the bo to the evicted list to make sure it
>        * is validated on next vm use to avoid fault.
>        * */
> -     list_move_tail(&base->vm_status, &vm->evicted);
> -     base->moved = true;
> +     amdgpu_vm_bo_evicted(base);
>  }
>  
>  /**
> @@ -342,9 +404,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
> struct amdgpu_vm *vm,
>                       break;
>  
>               if (bo->tbo.type != ttm_bo_type_kernel) {
> -                     spin_lock(&vm->moved_lock);
> -                     list_move(&bo_base->vm_status, &vm->moved);
> -                     spin_unlock(&vm->moved_lock);
> +                     amdgpu_vm_bo_moved(bo_base);
>               } else {
>                       if (vm->use_cpu_for_update)
>                               r = amdgpu_bo_kmap(bo, NULL);
> @@ -352,7 +412,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
> struct amdgpu_vm *vm,
>                               r = amdgpu_ttm_alloc_gart(&bo->tbo);
>                       if (r)
>                               break;
> -                     list_move(&bo_base->vm_status, &vm->relocated);
> +                     amdgpu_vm_bo_relocated(bo_base);
>               }
>       }
>  
> @@ -1123,8 +1183,7 @@ int amdgpu_vm_update_directories(struct amdgpu_device 
> *adev,
>               bo_base = list_first_entry(&vm->relocated,
>                                          struct amdgpu_vm_bo_base,
>                                          vm_status);
> -             bo_base->moved = false;
> -             list_move(&bo_base->vm_status, &vm->idle);
> +             amdgpu_vm_bo_idle(bo_base);
>  
>               bo = bo_base->bo->parent;
>               if (!bo)
> @@ -1243,7 +1302,7 @@ static void amdgpu_vm_handle_huge_pages(struct 
> amdgpu_pte_update_params *p,
>               if (entry->huge) {
>                       /* Add the entry to the relocated list to update it. */
>                       entry->huge = false;
> -                     list_move(&entry->base.vm_status, &p->vm->relocated);
> +                     amdgpu_vm_bo_relocated(&entry->base);
>               }
>               return;
>       }
> @@ -1746,9 +1805,9 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>               uint32_t mem_type = bo->tbo.mem.mem_type;
>  
>               if (!(bo->preferred_domains & 
> amdgpu_mem_type_to_domain(mem_type)))
> -                     list_add_tail(&bo_va->base.vm_status, &vm->evicted);
> +                     amdgpu_vm_bo_evicted(&bo_va->base);
>               else
> -                     list_add(&bo_va->base.vm_status, &vm->idle);
> +                     amdgpu_vm_bo_idle(&bo_va->base);

There is a small change in behaviour here for clearing
bo_va->base.moved. Not sure if it matters.

>       }
>  
>       list_splice_init(&bo_va->invalids, &bo_va->valids);
> @@ -2472,28 +2531,20 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device 
> *adev,
>  
>       list_for_each_entry(bo_base, &bo->va, bo_list) {
>               struct amdgpu_vm *vm = bo_base->vm;
> -             bool was_moved = bo_base->moved;
>  
> -             bo_base->moved = true;
>               if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
> -                     if (bo->tbo.type == ttm_bo_type_kernel)
> -                             list_move(&bo_base->vm_status, &vm->evicted);
> -                     else
> -                             list_move_tail(&bo_base->vm_status,
> -                                            &vm->evicted);
> +                     amdgpu_vm_bo_evicted(bo_base);

I think here it's possible that the BO was on the moved list. I think
that means amdgpu_vm_bo_evicted should take the moved_lock just in case.

Regards,
  Felix

>                       continue;
>               }
>  
> -             if (was_moved)
> +             if (bo_base->moved)
>                       continue;
>  
> -             if (bo->tbo.type == ttm_bo_type_kernel) {
> -                     list_move(&bo_base->vm_status, &vm->relocated);
> -             } else {
> -                     spin_lock(&bo_base->vm->moved_lock);
> -                     list_move(&bo_base->vm_status, &vm->moved);
> -                     spin_unlock(&bo_base->vm->moved_lock);
> -             }
> +             bo_base->moved = true;
> +             if (bo->tbo.type == ttm_bo_type_kernel)
> +                     amdgpu_vm_bo_relocated(bo_base);
> +             else
> +                     amdgpu_vm_bo_moved(bo_base);
>       }
>  }
>  

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to