On 12/19/25 14:42, Tvrtko Ursulin wrote:
> Currently there are two flavours of the context reference count
> destructor:
>
> - amdgpu_ctx_do_release(), used from kref_put from places where the code
> thinks context may have been used, or is in active use, and;
> - amdgpu_ctx_fini(), used when code is sure context entities have already
> been idled.
>
> Since amdgpu_ctx_do_release() calls amdgpu_ctx_fini() after having idled
> and destroyed the scheduler entities, we can consolidate the two into a
> single function.
>
> Functional difference is that now drm_sched_entity_destroy() is called on
> context manager shutdown (file close), where previously it was
> drm_sched_entity_fini(). But the former is a superset of the latter, and
> during file close the flush method is also called, which calls
> drm_sched_entity_flush(), which is also called by
> drm_sched_entity_destroy(). And as it is safe to attempt to flush a never
> used entity, or flush it twice, there is actually no functional change.
>
> Signed-off-by: Tvrtko Ursulin <[email protected]>
> Suggested-by: Christian König <[email protected]>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 103 +++++++-----------------
> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 9 ++-
> 2 files changed, 38 insertions(+), 74 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index dd1e562dbaed..f3d5a7180bda 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -269,26 +269,6 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx
> *ctx, u32 hw_ip,
> return r;
> }
>
> -static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_device *adev,
> - struct amdgpu_ctx_entity *entity)
> -{
> - ktime_t res = ns_to_ktime(0);
> - int i;
> -
> - if (!entity)
> - return res;
> -
> - for (i = 0; i < amdgpu_sched_jobs; ++i) {
> - res = ktime_add(res, amdgpu_ctx_fence_time(entity->fences[i]));
> - dma_fence_put(entity->fences[i]);
> - }
> -
> - amdgpu_xcp_release_sched(adev, entity);
> -
> - kfree(entity);
> - return res;
> -}
> -
> static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
> u32 *stable_pstate)
> {
> @@ -403,28 +383,47 @@ static int amdgpu_ctx_set_stable_pstate(struct
> amdgpu_ctx *ctx,
> return r;
> }
>
> -static void amdgpu_ctx_fini(struct kref *ref)
> +void amdgpu_ctx_fini(struct kref *ref)
> {
> struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
> struct amdgpu_ctx_mgr *mgr = ctx->mgr;
> struct amdgpu_device *adev = mgr->adev;
> - unsigned i, j, idx;
> + int i, j, k;
>
> if (!adev)
> return;
>
> for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
> - ktime_t spend;
> + struct amdgpu_ctx_entity *entity = ctx->entities[i][j];
> + ktime_t t = ns_to_ktime(0);
>
> - spend = amdgpu_ctx_fini_entity(adev,
> ctx->entities[i][j]);
> - atomic64_add(ktime_to_ns(spend), &mgr->time_spend[i]);
> + if (!entity)
> + continue;
> +
> + drm_sched_entity_destroy(&entity->entity);
> +
> + for (k = 0; k < amdgpu_sched_jobs; k++) {
> + struct dma_fence *fence = entity->fences[k];
> +
> + if (!fence)
> + continue;
> +
> + t = ktime_add(t, amdgpu_ctx_fence_time(fence));
> + dma_fence_put(fence);
> + }
> +
> + amdgpu_xcp_release_sched(adev, entity);
> +
> + kfree(entity);
> +
> + atomic64_add(ktime_to_ns(t), &mgr->time_spend[i]);
> }
> }
>
> - if (drm_dev_enter(adev_to_drm(adev), &idx)) {
> + if (drm_dev_enter(adev_to_drm(adev), &i)) {
> amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate);
> - drm_dev_exit(idx);
> + drm_dev_exit(i);
Please keep i and idx separate here.
Apart from that looks good to me.
Regards,
Christian.
> }
>
> kfree(ctx);
> @@ -502,24 +501,6 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
> return r;
> }
>
> -static void amdgpu_ctx_do_release(struct kref *ref)
> -{
> - struct amdgpu_ctx *ctx;
> - u32 i, j;
> -
> - ctx = container_of(ref, struct amdgpu_ctx, refcount);
> - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> - for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> - if (!ctx->entities[i][j])
> - continue;
> -
> - drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
> - }
> - }
> -
> - amdgpu_ctx_fini(ref);
> -}
> -
> static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
> {
> struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
> @@ -527,8 +508,7 @@ static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv,
> uint32_t id)
>
> mutex_lock(&mgr->lock);
> ctx = idr_remove(&mgr->ctx_handles, id);
> - if (ctx)
> - kref_put(&ctx->refcount, amdgpu_ctx_do_release);
> + amdgpu_ctx_put(ctx);
> mutex_unlock(&mgr->lock);
> return ctx ? 0 : -EINVAL;
> }
> @@ -744,15 +724,6 @@ struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv
> *fpriv, uint32_t id)
> return ctx;
> }
>
> -int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
> -{
> - if (ctx == NULL)
> - return -EINVAL;
> -
> - kref_put(&ctx->refcount, amdgpu_ctx_do_release);
> - return 0;
> -}
> -
> uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
> struct drm_sched_entity *entity,
> struct dma_fence *fence)
> @@ -921,29 +892,15 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr
> *mgr, long timeout)
> static void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
> {
> struct amdgpu_ctx *ctx;
> - struct idr *idp;
> - uint32_t id, i, j;
> + uint32_t id;
>
> - idp = &mgr->ctx_handles;
> -
> - idr_for_each_entry(idp, ctx, id) {
> + idr_for_each_entry(&mgr->ctx_handles, ctx, id) {
> if (kref_read(&ctx->refcount) != 1) {
> DRM_ERROR("ctx %p is still alive\n", ctx);
> continue;
> }
>
> - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> - for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> - struct drm_sched_entity *entity;
> -
> - if (!ctx->entities[i][j])
> - continue;
> -
> - entity = &ctx->entities[i][j]->entity;
> - drm_sched_entity_fini(entity);
> - }
> - }
> - kref_put(&ctx->refcount, amdgpu_ctx_fini);
> + amdgpu_ctx_put(ctx);
> }
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> index cf8d700a22fe..b1fa7fe74569 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> @@ -70,7 +70,14 @@ struct amdgpu_ctx_mgr {
> extern const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM];
>
> struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
> -int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
> +
> +void amdgpu_ctx_fini(struct kref *kref);
> +
> +static inline void amdgpu_ctx_put(struct amdgpu_ctx *ctx)
> +{
> + if (ctx)
> + kref_put(&ctx->refcount, amdgpu_ctx_fini);
> +}
>
> int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
> u32 ring, struct drm_sched_entity **entity);