Rendering buffers that need full GT power can request full power through
this I915_EXEC_GT_FULL flag. If the default is the power saving with half
slices off it executes LRIs to immediately enable all slices.

CC: Eric Anholt <e...@anholt.net>
CC: Kenneth Graunke <kenn...@whitecape.org>
Signed-off-by: Rodrigo Vivi <rodrigo.v...@gmail.com>
---
 drivers/gpu/drm/i915/i915_drv.h            |  8 ++++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 67 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_reg.h            | 11 +++++
 drivers/gpu/drm/i915/i915_sysfs.c          | 11 ++++-
 drivers/gpu/drm/i915/intel_display.c       |  6 +++
 drivers/gpu/drm/i915/intel_drv.h           |  1 +
 drivers/gpu/drm/i915/intel_pm.c            | 41 ++++++++++++++++--
 include/uapi/drm/i915_drm.h                |  5 ++-
 8 files changed, 144 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 685fb1d..bd4774e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1219,6 +1219,12 @@ struct i915_package_c8 {
        } regsave;
 };
 
+struct i915_gt_slices {
+       int state_default;
+       int forcing_full;
+       struct mutex m_lock;
+};
+
 typedef struct drm_i915_private {
        struct drm_device *dev;
        struct kmem_cache *slab;
@@ -1418,6 +1424,8 @@ typedef struct drm_i915_private {
 
        struct i915_package_c8 pc8;
 
+       struct i915_gt_slices gt_slices;
+
        /* Old dri1 support infrastructure, beware the dragons ya fools entering
         * here! */
        struct i915_dri1_state dri1;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 0ce0d47..16dc86e 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -923,6 +923,66 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
 }
 
 static int
+i915_gt_full_immediate(struct drm_device *dev, struct intel_ring_buffer *ring)
+{
+       drm_i915_private_t *dev_priv = dev->dev_private;
+       int ret;
+
+       if (!HAS_SLICE_SHUTDOWN(dev) || ring == &dev_priv->ring[BCS])
+               return 0;
+
+       ret = intel_ring_begin(ring, 3);
+       if (ret)
+               return ret;
+       intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+       intel_ring_emit(ring, HSW_GT_SLICE_INFO);
+       intel_ring_emit(ring, SLICE_SEL_BOTH);
+       intel_ring_advance(ring);
+
+       ret = intel_ring_begin(ring, 3);
+       if (ret)
+               return ret;
+       intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+       intel_ring_emit(ring, MI_PREDICATE_RESULT_2);
+       intel_ring_emit(ring, LOWER_SLICE_ENABLED);
+       intel_ring_advance(ring);
+
+       ret = intel_ring_begin(ring, 3);
+       if (ret)
+               return ret;
+       intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+       intel_ring_emit(ring, HSW_SLICESHUTDOWN);
+       intel_ring_emit(ring, ~SLICE_SHUTDOWN);
+       intel_ring_advance(ring);
+
+       ret = intel_ring_begin(ring, 3);
+       if (ret)
+               return ret;
+       intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+       intel_ring_emit(ring, RC_IDLE_MAX_COUNT);
+       intel_ring_emit(ring, CS_IDLE_COUNT_1US);
+       intel_ring_advance(ring);
+
+       ret = intel_ring_begin(ring, 3);
+       if (ret)
+               return ret;
+       intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+       intel_ring_emit(ring, WAIT_FOR_RC6_EXIT);
+       intel_ring_emit(ring, WAIT_RC6_EXIT | MASK_WAIT_RC6_EXIT);
+       intel_ring_advance(ring);
+
+       ret = intel_ring_begin(ring, 3);
+       if (ret)
+               return ret;
+       intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+       intel_ring_emit(ring, RC_IDLE_MAX_COUNT);
+       intel_ring_emit(ring, CS_IDLE_COUNT_5US);
+       intel_ring_advance(ring);
+
+       return 0;
+}
+
+static int
 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
                       struct drm_file *file,
                       struct drm_i915_gem_execbuffer2 *args,
@@ -999,6 +1059,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
                return -EINVAL;
        }
 
+       if (args->flags & I915_EXEC_GT_FULL &&
+           dev_priv->gt_slices.state_default == 0 &&
+           !dev_priv->gt_slices.forcing_full) {
+               dev_priv->gt_slices.forcing_full = true;
+               i915_gt_full_immediate(dev, ring);
+       }
+
        mode = args->flags & I915_EXEC_CONSTANTS_MASK;
        mask = I915_EXEC_CONSTANTS_MASK;
        switch (mode) {
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 497c441..0146bef 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -277,6 +277,17 @@
 #define   SLICE_STATUS_MAIN_ON (2<<0)
 #define   SLICE_STATUS_BOTH_ON (3<<0)
 
+#define HSW_SLICESHUTDOWN      0xA190
+#define   SLICE_SHUTDOWN       (1<<0)
+
+#define RC_IDLE_MAX_COUNT      0x2054
+#define   CS_IDLE_COUNT_1US    (1<<1)
+#define   CS_IDLE_COUNT_5US    (1<<3)
+
+#define WAIT_FOR_RC6_EXIT      0x20CC
+#define   WAIT_RC6_EXIT                (1<<0)
+#define   MASK_WAIT_RC6_EXIT   (1<<16)
+
 /*
  * 3D instructions used by the kernel
  */
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c 
b/drivers/gpu/drm/i915/i915_sysfs.c
index 86ccd52..25b0c5b 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -124,9 +124,13 @@ static ssize_t gt_slice_config_show(struct device *kdev,
        struct drm_minor *minor = container_of(kdev, struct drm_minor, kdev);
        struct drm_device *dev = minor->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
+       bool full;
 
-       return sprintf(buf, "%s\n", I915_READ(MI_PREDICATE_RESULT_2) ==
-                      LOWER_SLICE_ENABLED ? "full" : "half");
+       mutex_lock(&dev_priv->gt_slices.m_lock);
+       full = I915_READ(MI_PREDICATE_RESULT_2) == LOWER_SLICE_ENABLED;
+       mutex_unlock(&dev_priv->gt_slices.m_lock);
+
+       return sprintf(buf, "%s\n", full ? "full" : "half");
 }
 
 static ssize_t gt_slice_config_store(struct device *kdev,
@@ -135,16 +139,19 @@ static ssize_t gt_slice_config_store(struct device *kdev,
 {
        struct drm_minor *minor = container_of(kdev, struct drm_minor, kdev);
        struct drm_device *dev = minor->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
        int ret;
 
        if (!strncmp(buf, "full", sizeof("full") - 1)) {
                ret = intel_set_gt_full(dev);
                if (ret)
                        return ret;
+               dev_priv->gt_slices.state_default = 1;
        } else if (!strncmp(buf, "half", sizeof("half") - 1)) {
                ret = intel_set_gt_half(dev);
                if (ret)
                        return ret;
+               dev_priv->gt_slices.state_default = 0;
        } else
                return -EINVAL;
        return count;
diff --git a/drivers/gpu/drm/i915/intel_display.c 
b/drivers/gpu/drm/i915/intel_display.c
index 4f1b636..1a2f8bc 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -7778,6 +7778,12 @@ void intel_mark_idle(struct drm_device *dev)
 
        if (dev_priv->info->gen >= 6)
                gen6_rps_idle(dev->dev_private);
+
+       if (HAS_SLICE_SHUTDOWN(dev) && dev_priv->gt_slices.forcing_full &&
+           dev_priv->gt_slices.state_default == 0) {
+               intel_set_gt_half_async(dev);
+               dev_priv->gt_slices.forcing_full = false;
+       }
 }
 
 void intel_mark_fb_busy(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index a9abbb5..98cd63e 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -836,6 +836,7 @@ void intel_disable_gt_powersave(struct drm_device *dev);
 void ironlake_teardown_rc6(struct drm_device *dev);
 int intel_set_gt_full(struct drm_device *dev);
 int intel_set_gt_half(struct drm_device *dev);
+void intel_set_gt_half_async(struct drm_device *dev);
 void intel_init_gt_slices(struct drm_device *dev);
 void gen6_update_ring_freq(struct drm_device *dev);
 void gen6_rps_idle(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 63af075..24a0dc7 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3873,6 +3873,7 @@ int intel_set_gt_full(struct drm_device *dev)
        if (!HAS_SLICE_SHUTDOWN(dev))
                return -ENODEV;
 
+       mutex_lock(&dev_priv->gt_slices.m_lock);
        I915_WRITE(HSW_GT_SLICE_INFO, SLICE_SEL_BOTH);
 
        /* Slices are enabled on RC6 exit */
@@ -3881,14 +3882,19 @@ int intel_set_gt_full(struct drm_device *dev)
        if (wait_for(((I915_READ(HSW_GT_SLICE_INFO) & SLICE_STATUS_MASK) ==
                      SLICE_STATUS_BOTH_ON), 2000)) {
                DRM_ERROR("Timeout enabling full gt slices\n");
+
                I915_WRITE(HSW_GT_SLICE_INFO, ~SLICE_SEL_BOTH);
                I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED);
+               mutex_unlock(&dev_priv->gt_slices.m_lock);
+
                gen6_gt_force_wake_put(dev_priv);
                return -ETIMEDOUT;
        }
+
        I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_ENABLED);
-       gen6_gt_force_wake_put(dev_priv);
+       mutex_unlock(&dev_priv->gt_slices.m_lock);
 
+       gen6_gt_force_wake_put(dev_priv);
        return 0;
 }
 
@@ -3899,6 +3905,7 @@ int intel_set_gt_half(struct drm_device *dev)
        if (!HAS_SLICE_SHUTDOWN(dev))
                return -ENODEV;
 
+       mutex_lock(&dev_priv->gt_slices.m_lock);
        I915_WRITE(HSW_GT_SLICE_INFO, ~SLICE_SEL_BOTH);
 
        /* Slices are disabled on RC6 exit */
@@ -3907,16 +3914,40 @@ int intel_set_gt_half(struct drm_device *dev)
        if (wait_for(((I915_READ(HSW_GT_SLICE_INFO) & SLICE_STATUS_MASK) ==
                      SLICE_STATUS_MAIN_ON), 2000)) {
                DRM_ERROR("Timed out disabling half gt slices\n");
+
                I915_WRITE(HSW_GT_SLICE_INFO, SLICE_SEL_BOTH);
                I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_ENABLED);
+               mutex_unlock(&dev_priv->gt_slices.m_lock);
+
                gen6_gt_force_wake_put(dev_priv);
                return -ETIMEDOUT;
        }
+
        I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED);
+       mutex_unlock(&dev_priv->gt_slices.m_lock);
+
        gen6_gt_force_wake_put(dev_priv);
        return 0;
 }
 
+/**
+ * On Haswell, slices on/off transitions are done via RC6 sequence.
+ * This async function allows you to request slices shutdown without waiting.
+ * Slices will be disabled on next RC6 exit.
+ */
+void intel_set_gt_half_async(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       if (!HAS_SLICE_SHUTDOWN(dev))
+               return;
+
+       mutex_lock(&dev_priv->gt_slices.m_lock);
+       I915_WRITE(HSW_GT_SLICE_INFO, ~SLICE_SEL_BOTH);
+       I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED);
+       mutex_unlock(&dev_priv->gt_slices.m_lock);
+}
+
 void intel_init_gt_slices(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3927,9 +3958,13 @@ void intel_init_gt_slices(struct drm_device *dev)
        if (!HAS_SLICE_SHUTDOWN(dev))
                return;
 
+       dev_priv->gt_slices.state_default = 1;
+       dev_priv->gt_slices.forcing_full = false;
+       mutex_init(&dev_priv->gt_slices.m_lock);
+
        if (!i915_gt_slice_config) {
-               I915_WRITE(HSW_GT_SLICE_INFO, ~SLICE_SEL_BOTH);
-               I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED);
+               intel_set_gt_half_async(dev);
+               dev_priv->gt_slices.state_default = 0;
        }
 }
 
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 3a4e97b..144054f 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -731,7 +731,10 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_HANDLE_LUT           (1<<12)
 
-#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_HANDLE_LUT<<1)
+/* Use all available GT Slisces */
+#define I915_EXEC_GT_FULL              (1<<13)
+
+#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_GT_FULL<<1)
 
 #define I915_EXEC_CONTEXT_ID_MASK      (0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
1.7.11.7

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to