[Public] Regards, Prike
> -----Original Message----- > From: amd-gfx <amd-gfx-boun...@lists.freedesktop.org> On Behalf Of Liang, > Prike > Sent: Tuesday, July 1, 2025 9:39 PM > To: Koenig, Christian <christian.koe...@amd.com>; Alex Deucher > <alexdeuc...@gmail.com> > Cc: amd-gfx@lists.freedesktop.org; Deucher, Alexander > <alexander.deuc...@amd.com> > Subject: RE: [PATCH v4 05/11] drm/amdgpu: add userq object va track helpers > > [Public] > > Regards, > Prike > > > -----Original Message----- > > From: Koenig, Christian <christian.koe...@amd.com> > > Sent: Wednesday, June 25, 2025 4:02 PM > > To: Alex Deucher <alexdeuc...@gmail.com>; Liang, Prike > > <prike.li...@amd.com> > > Cc: amd-gfx@lists.freedesktop.org; Deucher, Alexander > > <alexander.deuc...@amd.com> > > Subject: Re: [PATCH v4 05/11] drm/amdgpu: add userq object va track > > helpers > > > > On 24.06.25 19:15, Alex Deucher wrote: > > > On Tue, Jun 24, 2025 at 4:54 AM Prike Liang <prike.li...@amd.com> wrote: > > >> > > >> Add the userq object virtual address get(),mapped() and put() > > >> helpers for tracking the userq obj va address usage. > > >> > > >> Signed-off-by: Prike Liang <prike.li...@amd.com> > > >> --- > > >> drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 132 > > >> ++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | > > 14 +++ > > >> drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 4 + > > >> 3 files changed, 149 insertions(+), 1 deletion(-) > > >> > > >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c > > >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c > > >> index a20f7ccbe98e..cbbd1860a69a 100644 > > >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c > > >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c > > >> @@ -76,6 +76,134 @@ int amdgpu_userq_input_va_validate(struct > > >> amdgpu_vm > > *vm, u64 addr, > > >> return -EINVAL; > > >> } > > >> > > >> +int amdgpu_userq_buffer_va_get(struct amdgpu_vm *vm, u64 addr) { > > >> + struct amdgpu_bo_va_mapping *mapping; > > >> + u64 user_addr; > > >> + int r; > > >> + > > >> + user_addr = amdgpu_userq_va_align(addr); > > >> + r = amdgpu_bo_reserve(vm->root.bo, false); > > >> + if (r) > > >> + return r; > > >> + > > >> + mapping = amdgpu_vm_bo_lookup_mapping(vm, user_addr); > > >> + if (!mapping) > > >> + goto out_err; > > >> + > > >> + /* > > >> + * Need to unify the following userq va reference. > > >> + * mqd bo > > >> + * rptr bo > > >> + * wptr bo > > >> + * eop bo > > >> + * doorbell bo > > >> + */ > > >> + /*amdgpu_bo_ref(mapping->bo_va->base.bo);*/ > > >> + mapping->bo_va->queue_refcount++; > > >> + > > >> + amdgpu_bo_unreserve(vm->root.bo); > > >> + return 0; > > >> + > > >> +out_err: > > >> + amdgpu_bo_unreserve(vm->root.bo); > > >> + return -EINVAL; > > >> +} > > >> + > > >> +bool amdgpu_userq_buffer_va_mapped(struct amdgpu_vm *vm, u64 addr) { > > >> + struct amdgpu_bo_va_mapping *mapping; > > >> + u64 user_addr; > > >> + bool r; > > >> + > > >> + user_addr = amdgpu_userq_va_align(addr); > > >> + > > >> + if (amdgpu_bo_reserve(vm->root.bo, false)) > > >> + return false; > > >> + > > >> + mapping = amdgpu_vm_bo_lookup_mapping(vm, user_addr); > > >> + if (!IS_ERR_OR_NULL(mapping) && > > >> + mapping->bo_va->queue_refcount > > > 0) > > >> + r = true; > > >> + else > > >> + r = false; > > >> + amdgpu_bo_unreserve(vm->root.bo); > > >> + > > >> + return r; > > >> +} > > >> + > > >> +bool amdgpu_userq_buffer_vas_mapped(struct amdgpu_vm *vm, > > >> + struct amdgpu_usermode_queue *queue) { > > >> + > > >> + if (amdgpu_userq_buffer_va_mapped(vm, queue->queue_va) || > > >> + amdgpu_userq_buffer_va_mapped(vm, queue->rptr_va) || > > >> + amdgpu_userq_buffer_va_mapped(vm, queue->wptr_va) || > > >> + amdgpu_userq_buffer_va_mapped(vm, queue->eop_va) || > > >> + amdgpu_userq_buffer_va_mapped(vm, queue->shadow_va) || > > >> + amdgpu_userq_buffer_va_mapped(vm, queue->csa_va)) > > > > > > Only some of these are relevant for each queue type. Rather than > > > checking all of the critical buffers in all of your helper > > > functions, it might be cleaner to add new userq callbacks that only > > > check/update the relevant buffers for the queue type. > Yes, validating on a per-queue basis will be clearer for each userq type. But > that will > be more complicated and require more checkpoints. In this current proposal, > the > irrelevant userq virtual address will be invalid and return early place at > amdgpu_vm_bo_lookup_mapping(), and the current implement seems to be more > uniform and simpler. Thought? Idea? If someone still prefer to do the per queue validation, then I will do that. > > >> + return true; > > >> + else > > >> + return false; > > >> +} > > >> + > > >> +int amdgpu_userq_buffer_va_put(struct amdgpu_vm *vm, u64 addr) { > > >> + struct amdgpu_bo_va_mapping *mapping; > > >> + u64 user_addr; > > >> + int r; > > >> + > > >> + user_addr = amdgpu_userq_va_align(addr); > > >> + r = amdgpu_bo_reserve(vm->root.bo, false); > > >> + if (r) > > >> + return r; > > >> + > > >> + mapping = amdgpu_vm_bo_lookup_mapping(vm, user_addr); > > >> + if (!mapping) > > >> + goto out_err; > > >> + /* > > >> + * If here refer to the userq VM BOs and keep the usage count by > > >> + * amdgpu_bo_ref(mapping->bo_va->base.bo) at creating the userq > IOCTL, > > >> + * this reference and usage counter will be kept until > > amdgpu_userq_destroy(), > > >> + * while the userq VA is unmapped at amdgpu_vm_bo_unmap(), > > >> + which is > > ahead of > > >> + * amdgpu_userq_destroy(). So, when amdgpu_vm_bo_unmap() > > >> + tries to > > unmap the > > >> + * userq VA mapping, it will result in an unmap error > > >> + caused by the BO's > > reference. > > >> + */ > > >> + /*amdgpu_bo_unref(&mapping->bo_va->base.bo);*/ > > > > > > I still don't follow this. Why can't we get a reference in userq > > > create and put the reference in userq destroy. If the app is > > > freeing the buffers before the queue, that's an app bug. > > > > We should probably just signal the eviction fence when the application > > tries to unmap a BO which is vital for an user queue. > > > > On restoring the evicted queue we then check again if all the VAs > > needed for this queue are valid and if they aren't we deny resuming the > > queue. > > > > Christian. > > > > > > > > Alex > > > > > >> + > > >> + mapping->bo_va->queue_refcount--; > > >> + > > >> + amdgpu_bo_unreserve(vm->root.bo); > > >> + return 0; > > >> + > > >> +out_err: > > >> + amdgpu_bo_unreserve(vm->root.bo); > > >> + return -EINVAL; > > >> +} > > >> + > > >> +static void amdgpu_userq_buffer_vas_get(struct amdgpu_vm *vm, > > >> + struct amdgpu_usermode_queue *queue) { > > >> + amdgpu_userq_buffer_va_get(vm, queue->queue_va); > > >> + amdgpu_userq_buffer_va_get(vm, queue->rptr_va); > > >> + amdgpu_userq_buffer_va_get(vm, queue->wptr_va); > > >> + amdgpu_userq_buffer_va_get(vm, queue->eop_va); > > >> + amdgpu_userq_buffer_va_get(vm, queue->shadow_va); > > >> + amdgpu_userq_buffer_va_get(vm, queue->csa_va); } > > >> + > > >> +int amdgpu_userq_buffer_vas_put(struct amdgpu_vm *vm, > > >> + struct amdgpu_usermode_queue *queue) { > > >> + amdgpu_userq_buffer_va_put(vm, queue->queue_va); > > >> + amdgpu_userq_buffer_va_put(vm, queue->rptr_va); > > >> + amdgpu_userq_buffer_va_put(vm, queue->wptr_va); > > >> + amdgpu_userq_buffer_va_put(vm, queue->eop_va); > > >> + amdgpu_userq_buffer_va_put(vm, queue->shadow_va); > > >> + amdgpu_userq_buffer_va_put(vm, queue->csa_va); > > >> + > > >> + return 0; > > >> +} > > >> + > > >> static int > > >> amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr, > > >> struct amdgpu_usermode_queue *queue) @@ > > >> -442,6 +570,9 @@ amdgpu_userq_create(struct drm_file *filp, union > > drm_amdgpu_userq *args) > > >> queue->queue_type = args->in.ip_type; > > >> queue->vm = &fpriv->vm; > > >> queue->priority = priority; > > >> + queue->queue_va = args->in.queue_va; > > >> + queue->rptr_va = args->in.rptr_va; > > >> + queue->wptr_va = args->in.wptr_va; > > >> > > >> db_info.queue_type = queue->queue_type; > > >> db_info.doorbell_handle = queue->doorbell_handle; @@ -472,7 > > >> +603,6 @@ amdgpu_userq_create(struct drm_file *filp, union > > drm_amdgpu_userq *args) > > >> goto unlock; > > >> } > > >> > > >> - > > >> qid = idr_alloc(&uq_mgr->userq_idr, queue, 1, > > AMDGPU_MAX_USERQ_COUNT, GFP_KERNEL); > > >> if (qid < 0) { > > >> drm_file_err(uq_mgr->file, "Failed to allocate a > > >> queue id\n"); diff --git > > >> a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h > > >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h > > >> index 704935ca0c36..194ec7a6b3b2 100644 > > >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h > > >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h > > >> @@ -52,6 +52,13 @@ struct amdgpu_usermode_queue { > > >> enum amdgpu_userq_state state; > > >> uint64_t doorbell_handle; > > >> uint64_t doorbell_index; > > >> + uint64_t queue_va; > > >> + uint64_t rptr_va; > > >> + uint64_t wptr_va; > > >> + uint64_t eop_va; > > >> + uint64_t shadow_va; > > >> + uint64_t csa_va; > > >> + > > >> uint64_t flags; > > >> struct amdgpu_mqd_prop *userq_prop; > > >> struct amdgpu_userq_mgr *userq_mgr; @@ -134,4 +141,11 @@ > > >> int amdgpu_userq_start_sched_for_enforce_isolation(struct > > >> amdgpu_device *adev, > > >> > > >> int amdgpu_userq_input_va_validate(struct amdgpu_vm *vm, u64 addr, > > >> u64 expected_size); > > >> +int amdgpu_userq_buffer_va_get(struct amdgpu_vm *vm, u64 addr); > > >> +bool amdgpu_userq_buffer_va_mapped(struct amdgpu_vm *vm, u64 > > >> +addr); bool amdgpu_userq_buffer_vas_mapped(struct amdgpu_vm *vm, > > >> + struct amdgpu_usermode_queue *queue); int > > >> +amdgpu_userq_buffer_va_put(struct amdgpu_vm *vm, u64 addr); int > > >> +amdgpu_userq_buffer_vas_put(struct amdgpu_vm *vm, > > >> + struct amdgpu_usermode_queue *queue); > > >> #endif > > >> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c > > >> b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c > > >> index 4615d3fba530..c9cde14064d1 100644 > > >> --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c > > >> +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c > > >> @@ -263,6 +263,7 @@ static int mes_userq_mqd_create(struct > > amdgpu_userq_mgr *uq_mgr, > > >> userq_props->hqd_active = false; > > >> userq_props->tmz_queue = > > >> mqd_user->flags & > > >> AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE; > > >> + queue->eop_va = compute_mqd->eop_va; > > >> kfree(compute_mqd); > > >> } else if (queue->queue_type == AMDGPU_HW_IP_GFX) { > > >> struct drm_amdgpu_userq_mqd_gfx11 *mqd_gfx_v11; @@ > > >> -284,6 +285,8 @@ static int mes_userq_mqd_create(struct > > >> amdgpu_userq_mgr > > *uq_mgr, > > >> userq_props->csa_addr = mqd_gfx_v11->csa_va; > > >> userq_props->tmz_queue = > > >> mqd_user->flags & > > >> AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE; > > >> + queue->shadow_va = mqd_gfx_v11->shadow_va; > > >> + queue->csa_va = mqd_gfx_v11->csa_va; > > >> > > >> if (amdgpu_userq_input_va_validate(queue->vm, > > >> mqd_gfx_v11- > > >shadow_va, > > >> shadow_info.shadow_size)) { > > >> @@ -317,6 +320,7 @@ static int mes_userq_mqd_create(struct > > amdgpu_userq_mgr *uq_mgr, > > >> } > > >> > > >> userq_props->csa_addr = mqd_sdma_v11->csa_va; > > >> + queue->csa_va = mqd_sdma_v11->csa_va; > > >> kfree(mqd_sdma_v11); > > >> } > > >> > > >> -- > > >> 2.34.1 > > >>