Groups can be killed during a reset even though they did nothing wrong.
That usually happens when the FW is put in a bad state by other groups,
resulting in group suspension failures when the reset happens.

If we end up in that situation, flag the group innocent and report
innocence through a new DRM_PANTHOR_GROUP_STATE flag.

Bump the minor driver version to reflect the uAPI change.

Changes in v3:
- Actually report innocence to userspace

Changes in v2:
- New patch

Signed-off-by: Boris Brezillon <boris.brezil...@collabora.com>
---
 drivers/gpu/drm/panthor/panthor_drv.c   |  2 +-
 drivers/gpu/drm/panthor/panthor_sched.c | 18 ++++++++++++++++++
 include/uapi/drm/panthor_drm.h          |  9 +++++++++
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/panthor/panthor_drv.c 
b/drivers/gpu/drm/panthor/panthor_drv.c
index ac7e53f6e3f0..f1dff7e0173d 100644
--- a/drivers/gpu/drm/panthor/panthor_drv.c
+++ b/drivers/gpu/drm/panthor/panthor_drv.c
@@ -1507,7 +1507,7 @@ static const struct drm_driver panthor_drm_driver = {
        .desc = "Panthor DRM driver",
        .date = "20230801",
        .major = 1,
-       .minor = 2,
+       .minor = 3,
 
        .gem_create_object = panthor_gem_create_object,
        .gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table,
diff --git a/drivers/gpu/drm/panthor/panthor_sched.c 
b/drivers/gpu/drm/panthor/panthor_sched.c
index ef4bec7ff9c7..97ed5fe5a191 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -610,6 +610,16 @@ struct panthor_group {
         */
        bool timedout;
 
+       /**
+        * @innocent: True when the group becomes unusable because the group 
suspension
+        * failed during a reset.
+        *
+        * Sometimes the FW was put in a bad state by other groups, causing the 
group
+        * suspension happening in the reset path to fail. In that case, we 
consider the
+        * group innocent.
+        */
+       bool innocent;
+
        /**
         * @syncobjs: Pool of per-queue synchronization objects.
         *
@@ -2690,6 +2700,12 @@ void panthor_sched_suspend(struct panthor_device *ptdev)
                        u32 csg_id = ffs(slot_mask) - 1;
                        struct panthor_csg_slot *csg_slot = 
&sched->csg_slots[csg_id];
 
+                       /* If the group was still usable before that point, we 
consider
+                        * it innocent.
+                        */
+                       if (group_can_run(csg_slot->group))
+                               csg_slot->group->innocent = true;
+
                        /* We consider group suspension failures as fatal and 
flag the
                         * group as unusable by setting timedout=true.
                         */
@@ -3570,6 +3586,8 @@ int panthor_group_get_state(struct panthor_file *pfile,
                get_state->state |= DRM_PANTHOR_GROUP_STATE_FATAL_FAULT;
                get_state->fatal_queues = group->fatal_queues;
        }
+       if (group->innocent)
+               get_state->state |= DRM_PANTHOR_GROUP_STATE_INNOCENT;
        mutex_unlock(&sched->lock);
 
        group_put(group);
diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h
index 87c9cb555dd1..b99763cbae48 100644
--- a/include/uapi/drm/panthor_drm.h
+++ b/include/uapi/drm/panthor_drm.h
@@ -923,6 +923,15 @@ enum drm_panthor_group_state_flags {
         * When a group ends up with this flag set, no jobs can be submitted to 
its queues.
         */
        DRM_PANTHOR_GROUP_STATE_FATAL_FAULT = 1 << 1,
+
+       /**
+        * @DRM_PANTHOR_GROUP_STATE_INNOCENT: Group was killed during a reset 
caused by other
+        * groups.
+        *
+        * This flag can only be set if DRM_PANTHOR_GROUP_STATE_TIMEDOUT is set 
and
+        * DRM_PANTHOR_GROUP_STATE_FATAL_FAULT is not.
+        */
+       DRM_PANTHOR_GROUP_STATE_INNOCENT = 1 << 2,
 };
 
 /**
-- 
2.46.2

Reply via email to