Signed-off-by: Daisy Sun <daisy....@intel.com>
[torourke: rebased on latest and resolved conflict]
Signed-off-by: Tom O'Rourke <Tom.O'rou...@intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 22 ++++
drivers/gpu/drm/i915/i915_irq.c | 21 ++++
drivers/gpu/drm/i915/i915_reg.h | 4 +
drivers/gpu/drm/i915/intel_display.c | 3 +
drivers/gpu/drm/i915/intel_pm.c | 230 +++++++++++++++++++++++++++++------
5 files changed, 241 insertions(+), 39 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ef38c3b..f1c4c5b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -915,6 +915,23 @@ struct intel_rps_ei {
u32 media_c0;
};
+struct intel_rps_bdw_cal {
+ u32 it_threshold_pct; /* interrupt, in percentage */
+ u32 eval_interval; /* evaluation interval, in us */
+ u32 last_ts;
+ u32 last_c0;
+ bool is_up;
+};
+
+struct intel_rps_bdw_turbo {
+ struct intel_rps_bdw_cal up;
+ struct intel_rps_bdw_cal down;
+ struct timer_list flip_timer;
+ u32 timeout;
+ atomic_t flip_received;
+ struct work_struct work_max_freq;
+};
+
struct intel_gen6_power_mgmt {
/* work and pm_iir are protected by dev_priv->irq_lock */
struct work_struct work;
@@ -948,6 +965,9 @@ struct intel_gen6_power_mgmt {
bool enabled;
struct delayed_work delayed_resume_work;
+ bool is_bdw_sw_turbo; /* Switch of BDW software turbo */
+ struct intel_rps_bdw_turbo sw_turbo; /* Calculate RP interrupt timing */
+
/* manual wa residency calculations */
struct intel_rps_ei up_ei, down_ei;
@@ -2703,6 +2723,8 @@ extern void intel_disable_fbc(struct drm_device *dev);
extern bool ironlake_set_drps(struct drm_device *dev, u8 val);
extern void intel_init_pch_refclk(struct drm_device *dev);
extern void gen6_set_rps(struct drm_device *dev, u8 val);
+extern void bdw_software_turbo(struct drm_device *dev);
+extern void gen8_flip_interrupt(struct drm_device *dev);
extern void valleyview_set_rps(struct drm_device *dev, u8 val);
extern void intel_set_memory_cxsr(struct drm_i915_private *dev_priv,
bool enable);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 6ef9d6f..367f8e1 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1961,6 +1961,27 @@ static void i9xx_pipe_crc_irq_handler(struct drm_device
*dev, enum pipe pipe)
res1, res2);
}
+void gen8_flip_interrupt(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+
+ if (!dev_priv->rps.is_bdw_sw_turbo)
+ return;
+
+ if(atomic_read(&dev_priv->rps.sw_turbo.flip_received)) {
+ mod_timer(&dev_priv->rps.sw_turbo.flip_timer,
+
usecs_to_jiffies(dev_priv->rps.sw_turbo.timeout) + jiffies);
+ }
+ else {
+ dev_priv->rps.sw_turbo.flip_timer.expires =
+
usecs_to_jiffies(dev_priv->rps.sw_turbo.timeout) + jiffies;
+ add_timer(&dev_priv->rps.sw_turbo.flip_timer);
+ atomic_set(&dev_priv->rps.sw_turbo.flip_received, true);
+ }
+
+ bdw_software_turbo(dev);
+}
+
/* The RPS events need forcewake, so we add them to a work queue and mask
their
* IMR bits until the work is done. Other interrupts can be processed without
* the work queue. */
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index fe5c276..088e0e1 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -5453,6 +5453,10 @@ enum punit_power_well {
#define GEN8_UCGCTL6 0x9430
#define GEN8_SDEUNIT_CLOCK_GATE_DISABLE (1<<14)
+#define TIMESTAMP_CTR 0x44070
+#define FREQ_1_28_US(us) (((us) * 100) >> 7)
+#define MCHBAR_PCU_C0 (MCHBAR_MIRROR_BASE_SNB + 0x5960)
+
#define GEN6_GFXPAUSE 0xA000
#define GEN6_RPNSWREQ 0xA008
#define GEN6_TURBO_DISABLE (1<<31)
diff --git a/drivers/gpu/drm/i915/intel_display.c
b/drivers/gpu/drm/i915/intel_display.c
index 99eb7ca..1dd8a7c 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -9661,6 +9661,9 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
unsigned long flags;
int ret;
+ //trigger software GT busyness calculation
+ gen8_flip_interrupt(dev);
+
/*
* drm_mode_page_flip_ioctl() should already catch this, but double
* check to be safe. In the future we may enable pageflipping from
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 3f88f29..e13d0ff 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -2122,7 +2122,6 @@ int ilk_wm_max_level(const struct drm_device *dev)
else
return 2;
}
-
static void intel_print_wm_latency(struct drm_device *dev,
const char *name,
const uint16_t wm[5])
@@ -3091,6 +3090,9 @@ static void gen6_set_rps_thresholds(struct
drm_i915_private *dev_priv, u8 val)
{
int new_power;
+ if (dev_priv->rps.is_bdw_sw_turbo)
+ return;
+
new_power = dev_priv->rps.power;
switch (dev_priv->rps.power) {
case LOW_POWER:
@@ -3298,8 +3300,11 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
valleyview_set_rps(dev_priv->dev,
dev_priv->rps.min_freq_softlimit);
else if (IS_VALLEYVIEW(dev))
vlv_set_rps_idle(dev_priv);
- else
+ else if (!dev_priv->rps.is_bdw_sw_turbo
+ ||
atomic_read(&dev_priv->rps.sw_turbo.flip_received)){
gen6_set_rps(dev_priv->dev,
dev_priv->rps.min_freq_softlimit);
+ }
+
dev_priv->rps.last_adj = 0;
}
mutex_unlock(&dev_priv->rps.hw_lock);
@@ -3313,8 +3318,11 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv)
if (dev_priv->rps.enabled) {
if (IS_VALLEYVIEW(dev))
valleyview_set_rps(dev_priv->dev,
dev_priv->rps.max_freq_softlimit);
- else
+ else if (!dev_priv->rps.is_bdw_sw_turbo
+ ||
atomic_read(&dev_priv->rps.sw_turbo.flip_received)){
gen6_set_rps(dev_priv->dev,
dev_priv->rps.max_freq_softlimit);
+ }
+
dev_priv->rps.last_adj = 0;
}
mutex_unlock(&dev_priv->rps.hw_lock);
@@ -3345,21 +3353,26 @@ void valleyview_set_rps(struct drm_device *dev, u8 val)
static void gen8_disable_rps_interrupts(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
+ if (IS_BROADWELL(dev) && dev_priv->rps.is_bdw_sw_turbo){
+ if (atomic_read(&dev_priv->rps.sw_turbo.flip_received))
+ del_timer(&dev_priv->rps.sw_turbo.flip_timer);
+ dev_priv-> rps.is_bdw_sw_turbo = false;
+ } else {
+ I915_WRITE(GEN6_PMINTRMSK, ~GEN8_PMINTR_REDIRECT_TO_NON_DISP);
+ I915_WRITE(GEN8_GT_IER(2), I915_READ(GEN8_GT_IER(2)) &
+ ~dev_priv->pm_rps_events);
+ /* Complete PM interrupt masking here doesn't race with the rps
work
+ * item again unmasking PM interrupts because that is using a
different
+ * register (GEN8_GT_IMR(2)) to mask PM interrupts. The only
risk is in
+ * leaving stale bits in GEN8_GT_IIR(2) and GEN8_GT_IMR(2) which
+ * gen8_enable_rps will clean up. */
- I915_WRITE(GEN6_PMINTRMSK, ~GEN8_PMINTR_REDIRECT_TO_NON_DISP);
- I915_WRITE(GEN8_GT_IER(2), I915_READ(GEN8_GT_IER(2)) &
- ~dev_priv->pm_rps_events);
- /* Complete PM interrupt masking here doesn't race with the rps work
- * item again unmasking PM interrupts because that is using a different
- * register (GEN8_GT_IMR(2)) to mask PM interrupts. The only risk is in
- * leaving stale bits in GEN8_GT_IIR(2) and GEN8_GT_IMR(2) which
- * gen8_enable_rps will clean up. */
-
- spin_lock_irq(&dev_priv->irq_lock);
- dev_priv->rps.pm_iir = 0;
- spin_unlock_irq(&dev_priv->irq_lock);
+ spin_lock_irq(&dev_priv->irq_lock);
+ dev_priv->rps.pm_iir = 0;
+ spin_unlock_irq(&dev_priv->irq_lock);
- I915_WRITE(GEN8_GT_IIR(2), dev_priv->pm_rps_events);
+ I915_WRITE(GEN8_GT_IIR(2), dev_priv->pm_rps_events);
+ }
}
static void gen6_disable_rps_interrupts(struct drm_device *dev)
@@ -3511,13 +3524,111 @@ static void parse_rp_state_cap(struct drm_i915_private
*dev_priv, u32 rp_state_c
dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
}
+static void bdw_sw_calculate_freq(struct drm_device *dev,
+ struct intel_rps_bdw_cal *c, u32 *cur_time, u32 *c0)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ u64 busy = 0;
+ u32 busyness_pct = 0;
+ u32 elapsed_time = 0;
+ u16 new_freq = 0;
+
+ if (!c || !cur_time || !c0)
+ return;
+
+ if (0 == c->last_c0)
+ goto out;
+
+ /* Check Evaluation interval */
+ elapsed_time = *cur_time - c->last_ts;
+ if (elapsed_time < c->eval_interval)
+ return;
+
+ mutex_lock(&dev_priv->rps.hw_lock);
+
+ /*
+ * c0 unit in 32*1.28 usec, elapsed_time unit in 1 usec.
+ * Whole busyness_pct calculation should be
+ * busy = ((u64)(*c0 - c->last_c0) << 5 << 7) / 100;
+ * busyness_pct = (u32)(busy * 100 / elapsed_time);
+ * The final formula is to simplify CPU calculation
+ */
+ busy = (u64)(*c0 - c->last_c0) << 12;
+ do_div(busy, elapsed_time);
+ busyness_pct = (u32)busy;
+
+ if (c->is_up && busyness_pct >= c->it_threshold_pct)
+ new_freq = (u16)dev_priv->rps.cur_freq + 3;
+ if (!c->is_up && busyness_pct <= c->it_threshold_pct)
+ new_freq = (u16)dev_priv->rps.cur_freq - 1;
+
+ /* Adjust to new frequency busyness and compare with threshold */
+ if (0 != new_freq) {
+ if (new_freq > dev_priv->rps.max_freq_softlimit)
+ new_freq = dev_priv->rps.max_freq_softlimit;
+ else if (new_freq < dev_priv->rps.min_freq_softlimit)
+ new_freq = dev_priv->rps.min_freq_softlimit;
+
+ gen6_set_rps(dev, new_freq);
+ }
+
+ mutex_unlock(&dev_priv->rps.hw_lock);
+
+out:
+ c->last_c0 = *c0;
+ c->last_ts = *cur_time;
+}
+
+static void gen8_set_frequency_RP0(struct work_struct *work)
+{
+ struct intel_rps_bdw_turbo *p_bdw_turbo =
+ container_of(work, struct intel_rps_bdw_turbo,
work_max_freq);
+ struct intel_gen6_power_mgmt *p_power_mgmt =
+ container_of(p_bdw_turbo, struct intel_gen6_power_mgmt,
sw_turbo);
+ struct drm_i915_private *dev_priv =
+ container_of(p_power_mgmt, struct drm_i915_private,
rps);
+
+ mutex_lock(&dev_priv->rps.hw_lock);
+ gen6_set_rps(dev_priv->dev, dev_priv->rps.rp0_freq);
+ mutex_unlock(&dev_priv->rps.hw_lock);
+}
+
+static void flip_active_timeout_handler(unsigned long var)
+{
+ struct drm_i915_private *dev_priv = (struct drm_i915_private *) var;
+
+ del_timer(&dev_priv->rps.sw_turbo.flip_timer);
+ atomic_set(&dev_priv->rps.sw_turbo.flip_received, false);
+
+ queue_work(dev_priv->wq, &dev_priv->rps.sw_turbo.work_max_freq);
+}
+
+void bdw_software_turbo(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+
+ u32 current_time = I915_READ(TIMESTAMP_CTR); /* unit in usec */
+ u32 current_c0 = I915_READ(MCHBAR_PCU_C0); /* unit in 32*1.28 usec */
+
+ bdw_sw_calculate_freq(dev, &dev_priv->rps.sw_turbo.up,
+ ¤t_time, ¤t_c0);
+ bdw_sw_calculate_freq(dev, &dev_priv->rps.sw_turbo.down,
+ ¤t_time, ¤t_c0);
+}
+
static void gen8_enable_rps(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_engine_cs *ring;
uint32_t rc6_mask = 0, rp_state_cap;
+ uint32_t threshold_up_pct, threshold_down_pct;
+ uint32_t ei_up, ei_down; /* up and down evaluation interval */
+ u32 rp_ctl_flag;
int unused;
+ /* Use software Turbo for BDW */
+ dev_priv->rps.is_bdw_sw_turbo = IS_BROADWELL(dev);
+
/* 1a: Software RC state - RC0 */
I915_WRITE(GEN6_RC_STATE, 0);
@@ -3561,35 +3672,74 @@ static void gen8_enable_rps(struct drm_device *dev)
HSW_FREQUENCY(dev_priv->rps.rp1_freq));
I915_WRITE(GEN6_RC_VIDEO_FREQ,
HSW_FREQUENCY(dev_priv->rps.rp1_freq));
- /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
- I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout
*/
+ ei_up = 84480; /* 84.48ms */
+ ei_down = 448000;
+ threshold_up_pct = 90; /* x percent busy */
+ threshold_down_pct = 70;
+
+ if (dev_priv->rps.is_bdw_sw_turbo) {
+ dev_priv->rps.sw_turbo.up.it_threshold_pct = threshold_up_pct;
+ dev_priv->rps.sw_turbo.up.eval_interval = ei_up;
+ dev_priv->rps.sw_turbo.up.is_up = true;
+ dev_priv->rps.sw_turbo.up.last_ts = 0;
+ dev_priv->rps.sw_turbo.up.last_c0 = 0;
+
+ dev_priv->rps.sw_turbo.down.it_threshold_pct =
threshold_down_pct;
+ dev_priv->rps.sw_turbo.down.eval_interval = ei_down;
+ dev_priv->rps.sw_turbo.down.is_up = false;
+ dev_priv->rps.sw_turbo.down.last_ts = 0;
+ dev_priv->rps.sw_turbo.down.last_c0 = 0;
+
+ /* Start the timer to track if flip comes*/
+ dev_priv->rps.sw_turbo.timeout = 200*1000; /* in us */
+
+ init_timer(&dev_priv->rps.sw_turbo.flip_timer);
+ dev_priv->rps.sw_turbo.flip_timer.function =
flip_active_timeout_handler;
+ dev_priv->rps.sw_turbo.flip_timer.data = (unsigned long)
dev_priv;
+ dev_priv->rps.sw_turbo.flip_timer.expires =
+ usecs_to_jiffies(dev_priv->rps.sw_turbo.timeout) +
jiffies;
+ add_timer(&dev_priv->rps.sw_turbo.flip_timer);
+ INIT_WORK(&dev_priv->rps.sw_turbo.work_max_freq,
gen8_set_frequency_RP0);
+
+ atomic_set(&dev_priv->rps.sw_turbo.flip_received, true);
+ } else {
+ /* NB: Docs say 1s, and 1000000 - which aren't equivalent
+ * 1 second timeout*/
+ I915_WRITE(GEN6_RP_DOWN_TIMEOUT, FREQ_1_28_US(1000000));
- /* Docs recommend 900MHz, and 300 MHz respectively */
- I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
- dev_priv->rps.max_freq_softlimit << 24 |
- dev_priv->rps.min_freq_softlimit << 16);
+ /* Docs recommend 900MHz, and 300 MHz respectively */
+ I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
+ dev_priv->rps.max_freq_softlimit << 24 |
+ dev_priv->rps.min_freq_softlimit << 16);
- I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
- I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness
per EI, 70%*/
- I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
- I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
+ I915_WRITE(GEN6_RP_UP_THRESHOLD,
+ FREQ_1_28_US(ei_up * threshold_up_pct / 100));
+ I915_WRITE(GEN6_RP_DOWN_THRESHOLD,
+ FREQ_1_28_US(ei_down * threshold_down_pct / 100));
+ I915_WRITE(GEN6_RP_UP_EI,
+ FREQ_1_28_US(ei_up));
+ I915_WRITE(GEN6_RP_DOWN_EI,
+ FREQ_1_28_US(ei_down));
- I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
+ I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
+ }
/* 5: Enable RPS */
- I915_WRITE(GEN6_RP_CONTROL,
- GEN6_RP_MEDIA_TURBO |
- GEN6_RP_MEDIA_HW_NORMAL_MODE |
- GEN6_RP_MEDIA_IS_GFX |
- GEN6_RP_ENABLE |
- GEN6_RP_UP_BUSY_AVG |
- GEN6_RP_DOWN_IDLE_AVG);
-
- /* 6: Ring frequency + overclocking (our driver does this later */
-
+ rp_ctl_flag = GEN6_RP_MEDIA_TURBO |
+ GEN6_RP_MEDIA_HW_NORMAL_MODE |
+ GEN6_RP_MEDIA_IS_GFX |
+ GEN6_RP_UP_BUSY_AVG |
+ GEN6_RP_DOWN_IDLE_AVG;
+ if (!dev_priv->rps.is_bdw_sw_turbo)
+ rp_ctl_flag |= GEN6_RP_ENABLE;
+
+ I915_WRITE(GEN6_RP_CONTROL, rp_ctl_flag);
+
+ /* 6: Ring frequency + overclocking
+ * (our driver does this later */
gen6_set_rps(dev, (I915_READ(GEN6_GT_PERF_STATUS) & 0xff00) >> 8);
-
- gen8_enable_rps_interrupts(dev);
+ if (!dev_priv->rps.is_bdw_sw_turbo)
+ gen8_enable_rps_interrupts(dev);
gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
}
@@ -5018,6 +5168,8 @@ static void intel_gen6_powersave_work(struct work_struct
*work)
rps.delayed_resume_work.work);
struct drm_device *dev = dev_priv->dev;
+ dev_priv->rps.is_bdw_sw_turbo = false;
+
mutex_lock(&dev_priv->rps.hw_lock);
if (IS_CHERRYVIEW(dev)) {
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx