When we allow ourselves to sleep before a GPU reset after disabling
submission, even for a few milliseconds, gives an innocent context the
opportunity to clear the GPU before the reset occurs. However, how long
to sleep depends on the typical non-preemptible duration (a similar
problem to determining the ideal preempt-reset timeout or even the
heartbeat interval). As this seems of a hard policy decision, punt it to
userspace.

The timeout can be adjusted using

        /sys/class/drm/card?/engine/*/stop_timeout_ms

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahti...@linux.intel.com>
Cc: Jon Bloomfield <jon.bloomfi...@intel.com>
---
 drivers/gpu/drm/i915/Kconfig.profile         |  3 ++
 drivers/gpu/drm/i915/gt/intel_engine_sysfs.c | 40 ++++++++++++++++++++
 2 files changed, 43 insertions(+)

diff --git a/drivers/gpu/drm/i915/Kconfig.profile 
b/drivers/gpu/drm/i915/Kconfig.profile
index b87c8f485a24..76145d25ce65 100644
--- a/drivers/gpu/drm/i915/Kconfig.profile
+++ b/drivers/gpu/drm/i915/Kconfig.profile
@@ -60,6 +60,9 @@ config DRM_I915_STOP_TIMEOUT
          that the reset itself may take longer and so be more disruptive to
          interactive or low latency workloads.
 
+         This is adjustable via
+         /sys/class/drm/card?/engine/*/stop_timeout_ms
+
 config DRM_I915_TIMESLICE_DURATION
        int "Scheduling quantum for userspace batches (ms, jiffy granularity)"
        default 1 # milliseconds
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
index b1bd768b13d7..86377a4ffe70 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
@@ -184,6 +184,45 @@ timeslice_show(struct kobject *kobj, struct kobj_attribute 
*attr, char *buf)
 static struct kobj_attribute timeslice_duration_attr =
 __ATTR(timeslice_duration_ms, 0644, timeslice_show, timeslice_store);
 
+static ssize_t
+stop_store(struct kobject *kobj, struct kobj_attribute *attr,
+          const char *buf, size_t count)
+{
+       struct intel_engine_cs *engine = kobj_to_engine(kobj);
+       unsigned long long duration;
+       int err;
+
+       /*
+        * When we allow ourselves to sleep before a GPU reset after disabling
+        * submission, even for a few milliseconds, gives an innocent context
+        * the opportunity to clear the GPU before the reset occurs. However,
+        * how long to sleep depends on the typical non-preemptible duration
+        * (a similar problem to determining the ideal preempt-reset timeout
+        * or even the heartbeat interval).
+        */
+
+       err = kstrtoull(buf, 0, &duration);
+       if (err)
+               return err;
+
+       if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
+               return -EINVAL;
+
+       WRITE_ONCE(engine->props.stop_timeout_ms, duration);
+       return count;
+}
+
+static ssize_t
+stop_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+       struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+       return sprintf(buf, "%lu\n", engine->props.stop_timeout_ms);
+}
+
+static struct kobj_attribute stop_timeout_attr =
+__ATTR(stop_timeout_ms, 0644, stop_show, stop_store);
+
 static void kobj_engine_release(struct kobject *kobj)
 {
        kfree(kobj);
@@ -224,6 +263,7 @@ void intel_engines_add_sysfs(struct drm_i915_private *i915)
                &mmio_attr.attr,
                &caps_attr.attr,
                &all_caps_attr.attr,
+               &stop_timeout_attr.attr,
                NULL
        };
 
-- 
2.24.0.rc0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to