On 12/18/25 16:04, Tvrtko Ursulin wrote:
> IDR is deprecated so let's convert the context manager to xarray.
>
> In doing so we remove the context manager mutex and switch call sites
> which required the guarantee context cannot go away while they walk the
> list of context, or otherwise operate on them, to use reference counting.
>
> This allows us to use the built in xarray spinlock for all operations and
> just temporarily drop it when we need to call sleeping functions.
>
> Signed-off-by: Tvrtko Ursulin <[email protected]>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 117 ++++++++--------------
> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 5 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 8 +-
> 3 files changed, 49 insertions(+), 81 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index afedea02188d..ee1464f8751a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -484,22 +484,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
> if (!ctx)
> return -ENOMEM;
>
> - mutex_lock(&mgr->lock);
> - r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX,
> GFP_KERNEL);
> - if (r < 0) {
> - mutex_unlock(&mgr->lock);
> - kfree(ctx);
> - return r;
> - }
> -
> - *id = (uint32_t)r;
> r = amdgpu_ctx_init(mgr, priority, filp, ctx);
> if (r) {
> - idr_remove(&mgr->ctx_handles, *id);
> - *id = 0;
> kfree(ctx);
> + return r;
> }
> - mutex_unlock(&mgr->lock);
> +
> + r = xa_alloc(&mgr->ctx_handles, id, ctx,
> + XA_LIMIT(1, AMDGPU_VM_MAX_NUM_CTX - 1), GFP_KERNEL);
Please drop the AMDGPU_VM_MAX_NUM_CTX define as well. That is just a totally
pointless limitation.
> + if (r)
> + kref_put(&ctx->refcount, amdgpu_ctx_fini);
> +
While at it you should probably clean up the unecessary differenciation between
amdgpu_ctx_fini and amdgpu_ctx__do_release as well.
> return r;
> }
>
> @@ -523,14 +518,11 @@ static void amdgpu_ctx_do_release(struct kref *ref)
>
> static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
> {
> - struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
> struct amdgpu_ctx *ctx;
>
> - mutex_lock(&mgr->lock);
> - ctx = idr_remove(&mgr->ctx_handles, id);
> - if (ctx)
> - kref_put(&ctx->refcount, amdgpu_ctx_do_release);
> - mutex_unlock(&mgr->lock);
> + ctx = xa_erase(&fpriv->ctx_mgr.ctx_handles, id);
> + amdgpu_ctx_put(ctx);
> +
> return ctx ? 0 : -EINVAL;
> }
>
> @@ -539,20 +531,12 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev,
> union drm_amdgpu_ctx_out *out)
> {
> struct amdgpu_ctx *ctx;
> - struct amdgpu_ctx_mgr *mgr;
> unsigned reset_counter;
>
> - if (!fpriv)
> + ctx = amdgpu_ctx_get(fpriv, id);
> + if (!ctx)
> return -EINVAL;
>
> - mgr = &fpriv->ctx_mgr;
> - mutex_lock(&mgr->lock);
> - ctx = idr_find(&mgr->ctx_handles, id);
> - if (!ctx) {
> - mutex_unlock(&mgr->lock);
> - return -EINVAL;
> - }
> -
> /* TODO: these two are always zero */
> out->state.flags = 0x0;
> out->state.hangs = 0x0;
> @@ -566,7 +550,8 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev,
> out->state.reset_status = AMDGPU_CTX_UNKNOWN_RESET;
> ctx->reset_counter_query = reset_counter;
>
> - mutex_unlock(&mgr->lock);
> + amdgpu_ctx_put(ctx);
> +
> return 0;
> }
>
> @@ -578,19 +563,11 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
> {
> struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
> struct amdgpu_ctx *ctx;
> - struct amdgpu_ctx_mgr *mgr;
>
> - if (!fpriv)
> + ctx = amdgpu_ctx_get(fpriv, id);
> + if (!ctx)
> return -EINVAL;
>
> - mgr = &fpriv->ctx_mgr;
> - mutex_lock(&mgr->lock);
> - ctx = idr_find(&mgr->ctx_handles, id);
> - if (!ctx) {
> - mutex_unlock(&mgr->lock);
> - return -EINVAL;
> - }
> -
> out->state.flags = 0x0;
> out->state.hangs = 0x0;
>
> @@ -630,7 +607,8 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
>
> msecs_to_jiffies(AMDGPU_RAS_COUNTE_DELAY_MS));
> }
>
> - mutex_unlock(&mgr->lock);
> + amdgpu_ctx_put(ctx);
> +
> return 0;
> }
>
> @@ -639,26 +617,18 @@ static int amdgpu_ctx_stable_pstate(struct
> amdgpu_device *adev,
> bool set, u32 *stable_pstate)
> {
> struct amdgpu_ctx *ctx;
> - struct amdgpu_ctx_mgr *mgr;
> int r;
>
> - if (!fpriv)
> + ctx = amdgpu_ctx_get(fpriv, id);
> + if (!ctx)
> return -EINVAL;
>
> - mgr = &fpriv->ctx_mgr;
> - mutex_lock(&mgr->lock);
> - ctx = idr_find(&mgr->ctx_handles, id);
> - if (!ctx) {
> - mutex_unlock(&mgr->lock);
> - return -EINVAL;
> - }
> -
> if (set)
> r = amdgpu_ctx_set_stable_pstate(ctx, *stable_pstate);
> else
> r = amdgpu_ctx_get_stable_pstate(ctx, stable_pstate);
>
> - mutex_unlock(&mgr->lock);
> + amdgpu_ctx_put(ctx);
> return r;
> }
>
> @@ -737,11 +707,11 @@ struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv
> *fpriv, uint32_t id)
>
> mgr = &fpriv->ctx_mgr;
>
> - mutex_lock(&mgr->lock);
> - ctx = idr_find(&mgr->ctx_handles, id);
> + xa_lock(&mgr->ctx_handles);
> + ctx = xa_load(&mgr->ctx_handles, id);
> if (ctx)
> kref_get(&ctx->refcount);
> - mutex_unlock(&mgr->lock);
> + xa_unlock(&mgr->ctx_handles);
> return ctx;
> }
>
> @@ -886,8 +856,7 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
> unsigned int i;
>
> mgr->adev = adev;
> - mutex_init(&mgr->lock);
> - idr_init_base(&mgr->ctx_handles, 1);
> + xa_init_flags(&mgr->ctx_handles, XA_FLAGS_ALLOC1);
>
> for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
> atomic64_set(&mgr->time_spend[i], 0);
> @@ -896,13 +865,14 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
> long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
> {
> struct amdgpu_ctx *ctx;
> - struct idr *idp;
> - uint32_t id, i, j;
> + unsigned long id;
> + uint32_t i, j;
>
> - idp = &mgr->ctx_handles;
> -
> - mutex_lock(&mgr->lock);
> - idr_for_each_entry(idp, ctx, id) {
> + xa_lock(&mgr->ctx_handles);
> + xa_for_each(&mgr->ctx_handles, id, ctx) {
> + if (!kref_get_unless_zero(&ctx->refcount))
This should be pointless, why not use kfre_get instead?
> + continue;
> + xa_unlock(&mgr->ctx_handles);
> for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> struct drm_sched_entity *entity;
> @@ -914,20 +884,20 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr
> *mgr, long timeout)
> timeout = drm_sched_entity_flush(entity,
> timeout);
> }
> }
> + amdgpu_ctx_put(ctx);
> + xa_lock(&mgr->ctx_handles);
> }
> - mutex_unlock(&mgr->lock);
> + xa_unlock(&mgr->ctx_handles);
> return timeout;
> }
>
> static void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
> {
> struct amdgpu_ctx *ctx;
> - struct idr *idp;
> - uint32_t id, i, j;
> + unsigned long id;
> + uint32_t i, j;
>
> - idp = &mgr->ctx_handles;
> -
> - idr_for_each_entry(idp, ctx, id) {
> + xa_for_each(&mgr->ctx_handles, id, ctx) {
> if (kref_read(&ctx->refcount) != 1) {
> DRM_ERROR("ctx %p is still alive\n", ctx);
> continue;
Please drop that check as well. It just leads to memory leaks and errors should
anything go wrong and so only make things worse.
We can have something like WARN_ON_ONCE(kref_read(&ctx->refcount) != 1) here,
but I think that is superflous as well.
Regards,
Christian.
> @@ -951,8 +921,7 @@ static void amdgpu_ctx_mgr_entity_fini(struct
> amdgpu_ctx_mgr *mgr)
> void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
> {
> amdgpu_ctx_mgr_entity_fini(mgr);
> - idr_destroy(&mgr->ctx_handles);
> - mutex_destroy(&mgr->lock);
> + xa_destroy(&mgr->ctx_handles);
> }
>
> void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
> @@ -960,21 +929,21 @@ void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
> {
> struct amdgpu_ctx *ctx;
> unsigned int hw_ip, i;
> - uint32_t id;
> + unsigned long id;
>
> /*
> * This is a little bit racy because it can be that a ctx or a fence are
> * destroyed just in the moment we try to account them. But that is ok
> * since exactly that case is explicitely allowed by the interface.
> */
> - mutex_lock(&mgr->lock);
> for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
> uint64_t ns = atomic64_read(&mgr->time_spend[hw_ip]);
>
> usage[hw_ip] = ns_to_ktime(ns);
> }
>
> - idr_for_each_entry(&mgr->ctx_handles, ctx, id) {
> + xa_lock(&mgr->ctx_handles);
> + xa_for_each(&mgr->ctx_handles, id, ctx) {
> for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
> for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) {
> struct amdgpu_ctx_entity *centity;
> @@ -988,5 +957,5 @@ void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
> }
> }
> }
> - mutex_unlock(&mgr->lock);
> + xa_unlock(&mgr->ctx_handles);
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> index 090dfe86f75b..d073cffa455d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> @@ -25,6 +25,7 @@
>
> #include <linux/ktime.h>
> #include <linux/types.h>
> +#include <linux/xarray.h>
>
> #include "amdgpu_ring.h"
>
> @@ -62,9 +63,7 @@ struct amdgpu_ctx {
>
> struct amdgpu_ctx_mgr {
> struct amdgpu_device *adev;
> - struct mutex lock;
> - /* protected by lock */
> - struct idr ctx_handles;
> + struct xarray ctx_handles;
> atomic64_t time_spend[AMDGPU_HW_IP_NUM];
> };
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
> index 341beec59537..471d27b2db01 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
> @@ -39,7 +39,7 @@ static int amdgpu_sched_process_priority_override(struct
> amdgpu_device *adev,
> struct amdgpu_fpriv *fpriv;
> struct amdgpu_ctx_mgr *mgr;
> struct amdgpu_ctx *ctx;
> - uint32_t id;
> + unsigned long id;
> int r;
>
> if (fd_empty(f))
> @@ -50,10 +50,10 @@ static int amdgpu_sched_process_priority_override(struct
> amdgpu_device *adev,
> return r;
>
> mgr = &fpriv->ctx_mgr;
> - mutex_lock(&mgr->lock);
> - idr_for_each_entry(&mgr->ctx_handles, ctx, id)
> + xa_lock(&mgr->ctx_handles);
> + xa_for_each(&mgr->ctx_handles, id, ctx)
> amdgpu_ctx_priority_override(ctx, priority);
> - mutex_unlock(&mgr->lock);
> + xa_unlock(&mgr->ctx_handles);
>
> return 0;
> }