amdgpu: Convert context manager to xarray

Christian König Fri, 19 Dec 2025 02:45:24 -0800

On 12/19/25 09:49, Tvrtko Ursulin wrote:
> On 19/12/2025 08:28, Christian König wrote:
>> On 12/18/25 16:04, Tvrtko Ursulin wrote:
>>> IDR is deprecated so let's convert the context manager to xarray.
>>>
>>> In doing so we remove the context manager mutex and switch call sites
>>> which required the guarantee context cannot go away while they walk the
>>> list of context, or otherwise operate on them, to use reference counting.
>>>
>>> This allows us to use the built in xarray spinlock for all operations and
>>> just temporarily drop it when we need to call sleeping functions.
>>>
>>> Signed-off-by: Tvrtko Ursulin <[email protected]>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c   | 117 ++++++++--------------
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h   |   5 +-
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c |   8 +-
>>>   3 files changed, 49 insertions(+), 81 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> index afedea02188d..ee1464f8751a 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> @@ -484,22 +484,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device 
>>> *adev,
>>>       if (!ctx)
>>>           return -ENOMEM;
>>>   -    mutex_lock(&mgr->lock);
>>> -    r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX, 
>>> GFP_KERNEL);
>>> -    if (r < 0) {
>>> -        mutex_unlock(&mgr->lock);
>>> -        kfree(ctx);
>>> -        return r;
>>> -    }
>>> -
>>> -    *id = (uint32_t)r;
>>>       r = amdgpu_ctx_init(mgr, priority, filp, ctx);
>>>       if (r) {
>>> -        idr_remove(&mgr->ctx_handles, *id);
>>> -        *id = 0;
>>>           kfree(ctx);
>>> +        return r;
>>>       }
>>> -    mutex_unlock(&mgr->lock);
>>> +
>>> +    r = xa_alloc(&mgr->ctx_handles, id, ctx,
>>> +             XA_LIMIT(1, AMDGPU_VM_MAX_NUM_CTX - 1), GFP_KERNEL);
>>
>> Please drop the AMDGPU_VM_MAX_NUM_CTX define as well. That is just a totally 
>> pointless limitation.
> 
> So xa_limit_32b ?


I think so yes. At least I don't see a good reason to have an artificial 
limitation.

> 
>>
>>> +    if (r)
>>> +        kref_put(&ctx->refcount, amdgpu_ctx_fini);
>>> +
>>
>> While at it you should probably clean up the unecessary differenciation 
>> between amdgpu_ctx_fini and amdgpu_ctx__do_release as well.
> 
> Right, those are very confusing indeed, so I happily will give that a go.
> 
>>
>>>       return r;
>>>   }
>>>   @@ -523,14 +518,11 @@ static void amdgpu_ctx_do_release(struct kref *ref)
>>>     static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
>>>   {
>>> -    struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
>>>       struct amdgpu_ctx *ctx;
>>>   -    mutex_lock(&mgr->lock);
>>> -    ctx = idr_remove(&mgr->ctx_handles, id);
>>> -    if (ctx)
>>> -        kref_put(&ctx->refcount, amdgpu_ctx_do_release);
>>> -    mutex_unlock(&mgr->lock);
>>> +    ctx = xa_erase(&fpriv->ctx_mgr.ctx_handles, id);
>>> +    amdgpu_ctx_put(ctx);
>>> +
>>>       return ctx ? 0 : -EINVAL;
>>>   }
>>>   @@ -539,20 +531,12 @@ static int amdgpu_ctx_query(struct amdgpu_device 
>>> *adev,
>>>                   union drm_amdgpu_ctx_out *out)
>>>   {
>>>       struct amdgpu_ctx *ctx;
>>> -    struct amdgpu_ctx_mgr *mgr;
>>>       unsigned reset_counter;
>>>   -    if (!fpriv)
>>> +    ctx = amdgpu_ctx_get(fpriv, id);
>>> +    if (!ctx)
>>>           return -EINVAL;
>>>   -    mgr = &fpriv->ctx_mgr;
>>> -    mutex_lock(&mgr->lock);
>>> -    ctx = idr_find(&mgr->ctx_handles, id);
>>> -    if (!ctx) {
>>> -        mutex_unlock(&mgr->lock);
>>> -        return -EINVAL;
>>> -    }
>>> -
>>>       /* TODO: these two are always zero */
>>>       out->state.flags = 0x0;
>>>       out->state.hangs = 0x0;
>>> @@ -566,7 +550,8 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev,
>>>           out->state.reset_status = AMDGPU_CTX_UNKNOWN_RESET;
>>>       ctx->reset_counter_query = reset_counter;
>>>   -    mutex_unlock(&mgr->lock);
>>> +    amdgpu_ctx_put(ctx);
>>> +
>>>       return 0;
>>>   }
>>>   @@ -578,19 +563,11 @@ static int amdgpu_ctx_query2(struct amdgpu_device 
>>> *adev,
>>>   {
>>>       struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
>>>       struct amdgpu_ctx *ctx;
>>> -    struct amdgpu_ctx_mgr *mgr;
>>>   -    if (!fpriv)
>>> +    ctx = amdgpu_ctx_get(fpriv, id);
>>> +    if (!ctx)
>>>           return -EINVAL;
>>>   -    mgr = &fpriv->ctx_mgr;
>>> -    mutex_lock(&mgr->lock);
>>> -    ctx = idr_find(&mgr->ctx_handles, id);
>>> -    if (!ctx) {
>>> -        mutex_unlock(&mgr->lock);
>>> -        return -EINVAL;
>>> -    }
>>> -
>>>       out->state.flags = 0x0;
>>>       out->state.hangs = 0x0;
>>>   @@ -630,7 +607,8 @@ static int amdgpu_ctx_query2(struct amdgpu_device 
>>> *adev,
>>>                         msecs_to_jiffies(AMDGPU_RAS_COUNTE_DELAY_MS));
>>>       }
>>>   -    mutex_unlock(&mgr->lock);
>>> +    amdgpu_ctx_put(ctx);
>>> +
>>>       return 0;
>>>   }
>>>   @@ -639,26 +617,18 @@ static int amdgpu_ctx_stable_pstate(struct 
>>> amdgpu_device *adev,
>>>                       bool set, u32 *stable_pstate)
>>>   {
>>>       struct amdgpu_ctx *ctx;
>>> -    struct amdgpu_ctx_mgr *mgr;
>>>       int r;
>>>   -    if (!fpriv)
>>> +    ctx = amdgpu_ctx_get(fpriv, id);
>>> +    if (!ctx)
>>>           return -EINVAL;
>>>   -    mgr = &fpriv->ctx_mgr;
>>> -    mutex_lock(&mgr->lock);
>>> -    ctx = idr_find(&mgr->ctx_handles, id);
>>> -    if (!ctx) {
>>> -        mutex_unlock(&mgr->lock);
>>> -        return -EINVAL;
>>> -    }
>>> -
>>>       if (set)
>>>           r = amdgpu_ctx_set_stable_pstate(ctx, *stable_pstate);
>>>       else
>>>           r = amdgpu_ctx_get_stable_pstate(ctx, stable_pstate);
>>>   -    mutex_unlock(&mgr->lock);
>>> +    amdgpu_ctx_put(ctx);
>>>       return r;
>>>   }
>>>   @@ -737,11 +707,11 @@ struct amdgpu_ctx *amdgpu_ctx_get(struct 
>>> amdgpu_fpriv *fpriv, uint32_t id)
>>>         mgr = &fpriv->ctx_mgr;
>>>   -    mutex_lock(&mgr->lock);
>>> -    ctx = idr_find(&mgr->ctx_handles, id);
>>> +    xa_lock(&mgr->ctx_handles);
>>> +    ctx = xa_load(&mgr->ctx_handles, id);
>>>       if (ctx)
>>>           kref_get(&ctx->refcount);
>>> -    mutex_unlock(&mgr->lock);
>>> +    xa_unlock(&mgr->ctx_handles);
>>>       return ctx;
>>>   }
>>>   @@ -886,8 +856,7 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
>>>       unsigned int i;
>>>         mgr->adev = adev;
>>> -    mutex_init(&mgr->lock);
>>> -    idr_init_base(&mgr->ctx_handles, 1);
>>> +    xa_init_flags(&mgr->ctx_handles, XA_FLAGS_ALLOC1);
>>>         for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
>>>           atomic64_set(&mgr->time_spend[i], 0);
>>> @@ -896,13 +865,14 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
>>>   long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
>>>   {
>>>       struct amdgpu_ctx *ctx;
>>> -    struct idr *idp;
>>> -    uint32_t id, i, j;
>>> +    unsigned long id;
>>> +    uint32_t i, j;
>>>   -    idp = &mgr->ctx_handles;
>>> -
>>> -    mutex_lock(&mgr->lock);
>>> -    idr_for_each_entry(idp, ctx, id) {
>>> +    xa_lock(&mgr->ctx_handles);
>>> +    xa_for_each(&mgr->ctx_handles, id, ctx) {
>>> +        if (!kref_get_unless_zero(&ctx->refcount))
>>
>> This should be pointless, why not use kfre_get instead?
> 
> True, don't know what I was thinking.
> 
>>
>>> +            continue;
>>> +        xa_unlock(&mgr->ctx_handles);
>>>           for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>>>               for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>>>                   struct drm_sched_entity *entity;
>>> @@ -914,20 +884,20 @@ long amdgpu_ctx_mgr_entity_flush(struct 
>>> amdgpu_ctx_mgr *mgr, long timeout)
>>>                   timeout = drm_sched_entity_flush(entity, timeout);
>>>               }
>>>           }
>>> +        amdgpu_ctx_put(ctx);
>>> +        xa_lock(&mgr->ctx_handles);
>>>       }
>>> -    mutex_unlock(&mgr->lock);
>>> +    xa_unlock(&mgr->ctx_handles);
>>>       return timeout;
>>>   }
>>>     static void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
>>>   {
>>>       struct amdgpu_ctx *ctx;
>>> -    struct idr *idp;
>>> -    uint32_t id, i, j;
>>> +    unsigned long id;
>>> +    uint32_t i, j;
>>>   -    idp = &mgr->ctx_handles;
>>> -
>>> -    idr_for_each_entry(idp, ctx, id) {
>>> +    xa_for_each(&mgr->ctx_handles, id, ctx) {
>>
>>>           if (kref_read(&ctx->refcount) != 1) {
>>>               DRM_ERROR("ctx %p is still alive\n", ctx);
>>>               continue;
>>
>> Please drop that check as well. It just leads to memory leaks and errors 
>> should anything go wrong and so only make things worse.
>>
>> We can have something like WARN_ON_ONCE(kref_read(&ctx->refcount) != 1) 
>> here, but I think that is superflous as well.
> 
> Hm, you mean leaving the entity around while the rest of the file gets closed 
> causes other problems ie. this is not enough to turn crashes into memory 
> leaks?

Yes, exactly that. This is basically just another pointless try to work around 
problems elsewhere.

Regards,
Christian.

> 
> Regards,
> 
> Tvrtko
> 
>>
>> Regards,
>> Christian.
>>
>>> @@ -951,8 +921,7 @@ static void amdgpu_ctx_mgr_entity_fini(struct 
>>> amdgpu_ctx_mgr *mgr)
>>>   void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
>>>   {
>>>       amdgpu_ctx_mgr_entity_fini(mgr);
>>> -    idr_destroy(&mgr->ctx_handles);
>>> -    mutex_destroy(&mgr->lock);
>>> +    xa_destroy(&mgr->ctx_handles);
>>>   }
>>>     void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
>>> @@ -960,21 +929,21 @@ void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
>>>   {
>>>       struct amdgpu_ctx *ctx;
>>>       unsigned int hw_ip, i;
>>> -    uint32_t id;
>>> +    unsigned long id;
>>>         /*
>>>        * This is a little bit racy because it can be that a ctx or a fence 
>>> are
>>>        * destroyed just in the moment we try to account them. But that is ok
>>>        * since exactly that case is explicitely allowed by the interface.
>>>        */
>>> -    mutex_lock(&mgr->lock);
>>>       for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
>>>           uint64_t ns = atomic64_read(&mgr->time_spend[hw_ip]);
>>>             usage[hw_ip] = ns_to_ktime(ns);
>>>       }
>>>   -    idr_for_each_entry(&mgr->ctx_handles, ctx, id) {
>>> +    xa_lock(&mgr->ctx_handles);
>>> +    xa_for_each(&mgr->ctx_handles, id, ctx) {
>>>           for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
>>>               for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) {
>>>                   struct amdgpu_ctx_entity *centity;
>>> @@ -988,5 +957,5 @@ void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
>>>               }
>>>           }
>>>       }
>>> -    mutex_unlock(&mgr->lock);
>>> +    xa_unlock(&mgr->ctx_handles);
>>>   }
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>>> index 090dfe86f75b..d073cffa455d 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>>> @@ -25,6 +25,7 @@
>>>     #include <linux/ktime.h>
>>>   #include <linux/types.h>
>>> +#include <linux/xarray.h>
>>>     #include "amdgpu_ring.h"
>>>   @@ -62,9 +63,7 @@ struct amdgpu_ctx {
>>>     struct amdgpu_ctx_mgr {
>>>       struct amdgpu_device    *adev;
>>> -    struct mutex        lock;
>>> -    /* protected by lock */
>>> -    struct idr        ctx_handles;
>>> +    struct xarray        ctx_handles;
>>>       atomic64_t        time_spend[AMDGPU_HW_IP_NUM];
>>>   };
>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
>>> index 341beec59537..471d27b2db01 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
>>> @@ -39,7 +39,7 @@ static int amdgpu_sched_process_priority_override(struct 
>>> amdgpu_device *adev,
>>>       struct amdgpu_fpriv *fpriv;
>>>       struct amdgpu_ctx_mgr *mgr;
>>>       struct amdgpu_ctx *ctx;
>>> -    uint32_t id;
>>> +    unsigned long id;
>>>       int r;
>>>         if (fd_empty(f))
>>> @@ -50,10 +50,10 @@ static int 
>>> amdgpu_sched_process_priority_override(struct amdgpu_device *adev,
>>>           return r;
>>>         mgr = &fpriv->ctx_mgr;
>>> -    mutex_lock(&mgr->lock);
>>> -    idr_for_each_entry(&mgr->ctx_handles, ctx, id)
>>> +    xa_lock(&mgr->ctx_handles);
>>> +    xa_for_each(&mgr->ctx_handles, id, ctx)
>>>           amdgpu_ctx_priority_override(ctx, priority);
>>> -    mutex_unlock(&mgr->lock);
>>> +    xa_unlock(&mgr->ctx_handles);
>>>         return 0;
>>>   }
>>
>

Re: [RFC 4/5] drm/amdgpu: Convert context manager to xarray

Reply via email to