On 4/30/25 17:48, Alex Deucher wrote: > Query the status of the user queue, currently whether > the queue is hung and whether or not VRAM is lost. > > Reviewed-by: Sunil Khatri <sunil.kha...@amd.com> > Signed-off-by: Alex Deucher <alexander.deuc...@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 47 ++++++++++++++++++++++- > drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 1 + > 2 files changed, 47 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c > index 4be46fa76ceba..e56fae10400db 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c > @@ -460,6 +460,7 @@ amdgpu_userq_create(struct drm_file *filp, union > drm_amdgpu_userq *args) > queue->queue_type = args->in.ip_type; > queue->vm = &fpriv->vm; > queue->priority = priority; > + queue->generation = amdgpu_vm_generation(adev, &fpriv->vm); > > db_info.queue_type = queue->queue_type; > db_info.doorbell_handle = queue->doorbell_handle; > @@ -532,6 +533,34 @@ amdgpu_userq_create(struct drm_file *filp, union > drm_amdgpu_userq *args) > return r; > } > > +static int > +amdgpu_userq_query_status(struct drm_file *filp, union drm_amdgpu_userq > *args) > +{ > + struct amdgpu_fpriv *fpriv = filp->driver_priv; > + struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; > + struct amdgpu_device *adev = uq_mgr->adev; > + struct amdgpu_usermode_queue *queue; > + int queue_id = args->in.queue_id; > + > + mutex_lock(&uq_mgr->userq_mutex); > + > + queue = amdgpu_userq_find(uq_mgr, queue_id); > + if (!queue) { > + dev_err(adev->dev, "Invalid queue id to query\n");
Using dev_err is probably not justified, this is more debug level. > + mutex_unlock(&uq_mgr->userq_mutex); > + return -EINVAL; > + } > + args->out_qs.flags = 0; > + if (queue->state == AMDGPU_USERQ_STATE_HUNG) > + args->out_qs.flags |= AMDGPU_USERQ_QUERY_STATUS_FLAGS_HUNG; > + if (queue->generation != amdgpu_vm_generation(adev, &fpriv->vm)) > + args->out_qs.flags |= AMDGPU_USERQ_QUERY_STATUS_FLAGS_VRAMLOST; > + > + mutex_unlock(&uq_mgr->userq_mutex); > + > + return 0; > +} > + > int amdgpu_userq_ioctl(struct drm_device *dev, void *data, > struct drm_file *filp) > { > @@ -565,7 +594,23 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void > *data, > if (r) > DRM_ERROR("Failed to destroy usermode queue\n"); > break; > - > + case AMDGPU_USERQ_OP_QUERY_STATUS: > + if (args->in.ip_type || > + args->in.doorbell_handle || > + args->in.doorbell_offset || > + args->in.flags || > + args->in.queue_va || > + args->in.queue_size || > + args->in.rptr_va || > + args->in.wptr_va || > + args->in.wptr_va || > + args->in.mqd || > + args->in.mqd_size) > + return -EINVAL; We can't expect those fields to be initialized for this operation. So checking them is probably not a good idea. > + r = amdgpu_userq_query_status(filp, args); > + if (r) > + DRM_ERROR("Failed to query usermode queue status\n"); That message is overkill we already have the message inside amdgpu_userq_query_status(). Regards, Christian. > + break; > default: > DRM_DEBUG_DRIVER("Invalid user queue op specified: %d\n", > args->in.op); > return -EINVAL; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h > index 6ede08dd821d2..35a08ee59d893 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h > @@ -65,6 +65,7 @@ struct amdgpu_usermode_queue { > struct dma_fence *last_fence; > u32 xcp_id; > int priority; > + uint64_t generation; > }; > > struct amdgpu_userq_funcs {