BDW supports GT C0 residency reporting in constant time unit. Driver calculates
GT utilization based on C0 residency and adjusts RP frequency up/down
accordingly.

Signed-off-by: Daisy Sun <daisy....@intel.com>
[torourke: rebased on latest and resolved conflict]
Signed-off-by: Tom O'Rourke <Tom.O'rou...@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h      |  17 ++
 drivers/gpu/drm/i915/i915_irq.c      |  10 ++
 drivers/gpu/drm/i915/i915_reg.h      |   4 +
 drivers/gpu/drm/i915/intel_display.c |   2 +
 drivers/gpu/drm/i915/intel_drv.h     |   1 +
 drivers/gpu/drm/i915/intel_pm.c      | 148 ++++++++++++++---
 6 files changed, 162 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 272aa7a..eef8366 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -820,6 +820,19 @@ struct i915_suspend_saved_registers {
        u32 savePCH_PORT_HOTPLUG;
 };
 
+struct intel_rps_bdw_cal {
+       u32 it_threshold_pct; /* interrupt, in percentage */
+       u32 eval_interval; /* evaluation interval, in us */
+       u32 last_ts;
+       u32 last_c0;
+       bool is_up;
+};
+
+struct intel_rps_bdw_turbo {
+       struct intel_rps_bdw_cal up;
+       struct intel_rps_bdw_cal down;
+};
+
 struct intel_gen6_power_mgmt {
        /* work and pm_iir are protected by dev_priv->irq_lock */
        struct work_struct work;
@@ -850,6 +863,9 @@ struct intel_gen6_power_mgmt {
        bool enabled;
        struct delayed_work delayed_resume_work;
 
+       bool is_bdw_sw_turbo;   /* Switch of BDW software turbo */
+       struct intel_rps_bdw_turbo sw_turbo; /* Calculate RP interrupt timing */
+
        /*
         * Protects RPS/RC6 register access and PCU communication.
         * Must be taken after struct_mutex if nested.
@@ -2509,6 +2525,7 @@ extern void intel_disable_fbc(struct drm_device *dev);
 extern bool ironlake_set_drps(struct drm_device *dev, u8 val);
 extern void intel_init_pch_refclk(struct drm_device *dev);
 extern void gen6_set_rps(struct drm_device *dev, u8 val);
+extern void bdw_software_turbo(struct drm_device *dev);
 extern void valleyview_set_rps(struct drm_device *dev, u8 val);
 extern int valleyview_rps_max_freq(struct drm_i915_private *dev_priv);
 extern int valleyview_rps_min_freq(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 2b3d852..e077269 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1558,6 +1558,16 @@ static void i9xx_pipe_crc_irq_handler(struct drm_device 
*dev, enum pipe pipe)
                                     res1, res2);
 }
 
+void gen8_flip_interrupt(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       if (!dev_priv->rps.is_bdw_sw_turbo)
+               return;
+
+       bdw_software_turbo(dev);
+}
+
 /* The RPS events need forcewake, so we add them to a work queue and mask their
  * IMR bits until the work is done. Other interrupts can be processed without
  * the work queue. */
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 0b88508..ec08cd9 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -5041,6 +5041,10 @@ enum punit_power_well {
 #define GEN8_UCGCTL6                           0x9430
 #define   GEN8_SDEUNIT_CLOCK_GATE_DISABLE      (1<<14)
 
+#define TIMESTAMP_CTR          0x44070
+#define FREQ_1_28_US(us)       (((us) * 100) >> 7)
+#define MCHBAR_PCU_C0          (MCHBAR_MIRROR_BASE_SNB + 0x5960)
+
 #define GEN6_RPNSWREQ                          0xA008
 #define   GEN6_TURBO_DISABLE                   (1<<31)
 #define   GEN6_FREQUENCY(x)                    ((x)<<25)
diff --git a/drivers/gpu/drm/i915/intel_display.c 
b/drivers/gpu/drm/i915/intel_display.c
index b57210c..8e39ea7 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -8893,6 +8893,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
        unsigned long flags;
        int ret;
 
+       gen8_flip_interrupt(dev);
+
        /* Can't change pixel format via MI display flips. */
        if (fb->pixel_format != crtc->primary->fb->pixel_format)
                return -EINVAL;
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index b885df1..15010d3 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -929,6 +929,7 @@ void ironlake_teardown_rc6(struct drm_device *dev);
 void gen6_update_ring_freq(struct drm_device *dev);
 void gen6_rps_idle(struct drm_i915_private *dev_priv);
 void gen6_rps_boost(struct drm_i915_private *dev_priv);
+void gen8_flip_interrupt(struct drm_device *dev);
 void intel_aux_display_runtime_get(struct drm_i915_private *dev_priv);
 void intel_aux_display_runtime_put(struct drm_i915_private *dev_priv);
 void intel_runtime_pm_get(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 75c1c76..9327cd7 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -2963,6 +2963,9 @@ static void gen6_set_rps_thresholds(struct 
drm_i915_private *dev_priv, u8 val)
 {
        int new_power;
 
+       if (dev_priv->rps.is_bdw_sw_turbo)
+               return;
+
        new_power = dev_priv->rps.power;
        switch (dev_priv->rps.power) {
        case LOW_POWER:
@@ -3308,12 +3311,87 @@ static void parse_rp_state_cap(struct drm_i915_private 
*dev_priv, u32 rp_state_c
                dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
 }
 
+static void bdw_sw_calculate_freq(struct drm_device *dev,
+               struct intel_rps_bdw_cal *c, u32 *cur_time, u32 *c0)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       u64 busy = 0;
+       u32 busyness_pct = 0;
+       u32 elapsed_time = 0;
+       u16 new_freq = 0;
+
+       if (!c || !cur_time || !c0)
+               return;
+
+       if (0 == c->last_c0)
+               goto out;
+
+       /* Check Evaluation interval */
+       elapsed_time = *cur_time - c->last_ts;
+       if (elapsed_time < c->eval_interval)
+               return;
+
+       mutex_lock(&dev_priv->rps.hw_lock);
+
+       /*
+        * c0 unit in 32*1.28 usec, elapsed_time unit in 1 usec.
+        * Whole busyness_pct calculation should be
+        *     busy = ((u64)(*c0 - c->last_c0) << 5 << 7) / 100;
+        *     busyness_pct = (u32)(busy * 100 / elapsed_time);
+        * The final formula is to simplify CPU calculation
+        */
+       busy = (u64)(*c0 - c->last_c0) << 12;
+       do_div(busy, elapsed_time);
+       busyness_pct = (u32)busy;
+
+       if (c->is_up && busyness_pct >= c->it_threshold_pct)
+               new_freq = (u16)dev_priv->rps.cur_freq + 3;
+       if (!c->is_up && busyness_pct <= c->it_threshold_pct)
+               new_freq = (u16)dev_priv->rps.cur_freq - 1;
+
+       /* Adjust to new frequency busyness and compare with threshold */
+       if (0 != new_freq) {
+               if (new_freq > dev_priv->rps.max_freq_softlimit)
+                       new_freq = dev_priv->rps.max_freq_softlimit;
+               else if (new_freq < dev_priv->rps.min_freq_softlimit)
+                       new_freq = dev_priv->rps.min_freq_softlimit;
+
+               gen6_set_rps(dev, new_freq);
+       }
+
+       mutex_unlock(&dev_priv->rps.hw_lock);
+
+out:
+       c->last_c0 = *c0;
+       c->last_ts = *cur_time;
+}
+
+void bdw_software_turbo(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       u32 current_time = I915_READ(TIMESTAMP_CTR); /* unit in usec */
+       u32 current_c0 = I915_READ(MCHBAR_PCU_C0); /* unit in 32*1.28 usec */
+
+       bdw_sw_calculate_freq(dev, &dev_priv->rps.sw_turbo.up,
+                       &current_time, &current_c0);
+       bdw_sw_calculate_freq(dev, &dev_priv->rps.sw_turbo.down,
+                       &current_time, &current_c0);
+}
+
+
 static void gen8_enable_rps(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_ring_buffer *ring;
        uint32_t rc6_mask = 0, rp_state_cap;
+       uint32_t threshold_up_pct, threshold_down_pct;
+       uint32_t ei_up, ei_down; /* up and down evaluation interval */
+       u32 rp_ctl_flag;
        int unused;
+       
+       /* Use software Turbo for BDW */
+       dev_priv->rps.is_bdw_sw_turbo = IS_BROADWELL(dev);
 
        /* 1a: Software RC state - RC0 */
        I915_WRITE(GEN6_RC_STATE, 0);
@@ -3350,35 +3428,62 @@ static void gen8_enable_rps(struct drm_device *dev)
                   HSW_FREQUENCY(dev_priv->rps.rp1_freq));
        I915_WRITE(GEN6_RC_VIDEO_FREQ,
                   HSW_FREQUENCY(dev_priv->rps.rp1_freq));
-       /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
-       I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout 
*/
+       ei_up = 84480; /* 84.48ms */
+       ei_down = 448000;
+       threshold_up_pct = 90; /* x percent busy */
+       threshold_down_pct = 70;
+
+       if (dev_priv->rps.is_bdw_sw_turbo) {
+               dev_priv->rps.sw_turbo.up.it_threshold_pct = threshold_up_pct;
+               dev_priv->rps.sw_turbo.up.eval_interval = ei_up;
+               dev_priv->rps.sw_turbo.up.is_up = true;
+               dev_priv->rps.sw_turbo.up.last_ts = 0;
+               dev_priv->rps.sw_turbo.up.last_c0 = 0;
+
+               dev_priv->rps.sw_turbo.down.it_threshold_pct = 
threshold_down_pct;
+               dev_priv->rps.sw_turbo.down.eval_interval = ei_down;
+               dev_priv->rps.sw_turbo.down.is_up = false;
+               dev_priv->rps.sw_turbo.down.last_ts = 0;
+               dev_priv->rps.sw_turbo.down.last_c0 = 0;
+       } else {
+               /* NB: Docs say 1s, and 1000000 - which aren't equivalent
+                * 1 second timeout*/
+               I915_WRITE(GEN6_RP_DOWN_TIMEOUT, FREQ_1_28_US(1000000));
 
-       /* Docs recommend 900MHz, and 300 MHz respectively */
-       I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
-                  dev_priv->rps.max_freq_softlimit << 24 |
-                  dev_priv->rps.min_freq_softlimit << 16);
+               /* Docs recommend 900MHz, and 300 MHz respectively */
+               I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
+                          dev_priv->rps.max_freq_softlimit << 24 |
+                          dev_priv->rps.min_freq_softlimit << 16);
 
-       I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per 
EI, 90% */
-       I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness 
per EI, 70%*/
-       I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
-       I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
+               I915_WRITE(GEN6_RP_UP_THRESHOLD,
+                       FREQ_1_28_US(ei_up * threshold_up_pct / 100));
+               I915_WRITE(GEN6_RP_DOWN_THRESHOLD,
+                       FREQ_1_28_US(ei_down * threshold_down_pct / 100));
+               I915_WRITE(GEN6_RP_UP_EI,
+                       FREQ_1_28_US(ei_up));
+               I915_WRITE(GEN6_RP_DOWN_EI,
+                       FREQ_1_28_US(ei_down));
 
-       I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
+               I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
+       }
 
        /* 5: Enable RPS */
-       I915_WRITE(GEN6_RP_CONTROL,
-                  GEN6_RP_MEDIA_TURBO |
-                  GEN6_RP_MEDIA_HW_NORMAL_MODE |
-                  GEN6_RP_MEDIA_IS_GFX |
-                  GEN6_RP_ENABLE |
-                  GEN6_RP_UP_BUSY_AVG |
-                  GEN6_RP_DOWN_IDLE_AVG);
+       rp_ctl_flag = GEN6_RP_MEDIA_TURBO |
+                                       GEN6_RP_MEDIA_HW_NORMAL_MODE |
+                                       GEN6_RP_MEDIA_IS_GFX |
+                                       GEN6_RP_UP_BUSY_AVG |
+                                       GEN6_RP_DOWN_IDLE_AVG;
+       if (!dev_priv->rps.is_bdw_sw_turbo)
+               rp_ctl_flag |= GEN6_RP_ENABLE;
+
+       I915_WRITE(GEN6_RP_CONTROL, rp_ctl_flag);
 
        /* 6: Ring frequency + overclocking (our driver does this later */
 
        gen6_set_rps(dev, (I915_READ(GEN6_GT_PERF_STATUS) & 0xff00) >> 8);
-
-       gen6_enable_rps_interrupts(dev);
+       
+       if (!dev_priv->rps.is_bdw_sw_turbo)
+               gen6_enable_rps_interrupts(dev);
 
        gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
 }
@@ -4529,8 +4634,11 @@ static void intel_gen6_powersave_work(struct work_struct 
*work)
                container_of(work, struct drm_i915_private,
                             rps.delayed_resume_work.work);
        struct drm_device *dev = dev_priv->dev;
+       
+       dev_priv->rps.is_bdw_sw_turbo = false;
 
        mutex_lock(&dev_priv->rps.hw_lock);
+       
 
        if (IS_VALLEYVIEW(dev)) {
                valleyview_enable_rps(dev);
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to