Implement get_reset ioctl for amdgpu

Signed-off-by: André Almeida <andrealm...@igalia.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  4 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 35 +++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h |  5 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 12 +++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h |  2 ++
 6 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 2eb2c66843a8..0ba26b4b039c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1262,8 +1262,10 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
        uint64_t seq;
        int r;
 
-       for (i = 0; i < p->gang_size; ++i)
+       for (i = 0; i < p->gang_size; ++i) {
+               p->jobs[i]->ctx = p->ctx;
                drm_sched_job_arm(&p->jobs[i]->base);
+       }
 
        for (i = 0; i < p->gang_size; ++i) {
                struct dma_fence *fence;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index d2139ac12159..d3e292382d4a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -322,6 +322,9 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, 
int32_t priority,
        ctx->init_priority = priority;
        ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET;
 
+       ctx->global_reset_counter = atomic_read(&mgr->adev->gpu_reset_counter);
+       ctx->local_reset_counter = 0;
+
        r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
        if (r)
                return r;
@@ -963,3 +966,35 @@ void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
        }
        mutex_unlock(&mgr->lock);
 }
+
+int amdgpu_get_reset(struct drm_file *filp, struct drm_device *dev,
+                    struct drm_get_reset *reset)
+{
+       struct amdgpu_device *adev = drm_to_adev(dev);
+       struct amdgpu_ctx *ctx;
+       struct amdgpu_ctx_mgr *mgr;
+       unsigned int id = reset->ctx_id;
+       struct amdgpu_fpriv *fpriv = filp->driver_priv;
+
+       mgr = &fpriv->ctx_mgr;
+       mutex_lock(&mgr->lock);
+       ctx = idr_find(&mgr->ctx_handles, id);
+       if (!ctx) {
+               mutex_unlock(&mgr->lock);
+               return -EINVAL;
+       }
+
+       reset->dev_reset_count =
+               atomic_read(&adev->gpu_reset_counter) - 
ctx->global_reset_counter;
+
+       reset->ctx_reset_count = ctx->local_reset_counter;
+
+       if (amdgpu_in_reset(adev))
+               reset->flags |= DRM_RESET_IN_PROGRESS;
+
+       if (ctx->vram_lost_counter != atomic_read(&adev->vram_lost_counter))
+               reset->flags |= DRM_RESET_VRAM_LOST;
+
+       mutex_unlock(&mgr->lock);
+       return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index 0fa0e56daf67..0c9815695884 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -57,6 +57,9 @@ struct amdgpu_ctx {
        unsigned long                   ras_counter_ce;
        unsigned long                   ras_counter_ue;
        uint32_t                        stable_pstate;
+
+       uint64_t                        global_reset_counter;
+       uint64_t                        local_reset_counter;
 };
 
 struct amdgpu_ctx_mgr {
@@ -97,4 +100,6 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
 void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
                          ktime_t usage[AMDGPU_HW_IP_NUM]);
 
+int amdgpu_get_reset(struct drm_file *file_priv, struct drm_device *dev,
+                    struct drm_get_reset *reset);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index c9a41c997c6c..431791b2c3cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2805,6 +2805,7 @@ static const struct drm_driver amdgpu_kms_driver = {
 #ifdef CONFIG_PROC_FS
        .show_fdinfo = amdgpu_show_fdinfo,
 #endif
+       .get_reset = amdgpu_get_reset,
 
        .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
        .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index c3d9d75143f4..1553a2633d46 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -35,11 +35,20 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct 
drm_sched_job *s_job)
 {
        struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
        struct amdgpu_job *job = to_amdgpu_job(s_job);
+       struct drm_sched_entity *entity = job->base.entity;
        struct amdgpu_task_info ti;
        struct amdgpu_device *adev = ring->adev;
        int idx;
        int r;
 
+       memset(&ti, 0, sizeof(struct amdgpu_task_info));
+       amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti);
+
+       if (job->ctx) {
+               DRM_INFO("Increasing ctx reset count for %s (%d)\n", 
ti.process_name, ti.pid);
+               job->ctx->local_reset_counter++;
+       }
+
        if (!drm_dev_enter(adev_to_drm(adev), &idx)) {
                DRM_INFO("%s - device unplugged skipping recovery on 
scheduler:%s",
                         __func__, s_job->sched->name);
@@ -48,7 +57,6 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct 
drm_sched_job *s_job)
                return DRM_GPU_SCHED_STAT_ENODEV;
        }
 
-       memset(&ti, 0, sizeof(struct amdgpu_task_info));
        adev->job_hang = true;
 
        if (amdgpu_gpu_recovery &&
@@ -58,7 +66,6 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct 
drm_sched_job *s_job)
                goto exit;
        }
 
-       amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti);
        DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n",
                  job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
                  ring->fence_drv.sync_seq);
@@ -105,6 +112,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
         */
        (*job)->base.sched = &adev->rings[0]->sched;
        (*job)->vm = vm;
+       (*job)->ctx = NULL;
 
        amdgpu_sync_create(&(*job)->explicit_sync);
        (*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index 52f2e313ea17..0d463babaa60 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -63,6 +63,8 @@ struct amdgpu_job {
        uint32_t                oa_base, oa_size;
        uint32_t                vram_lost_counter;
 
+       struct amdgpu_ctx       *ctx;
+
        /* user fence handling */
        uint64_t                uf_addr;
        uint64_t                uf_sequence;
-- 
2.41.0

Reply via email to