On Wed, May 29, 2013 at 07:06:15PM +0300, Ville Syrjälä wrote:
> On Fri, May 24, 2013 at 07:05:12PM -0300, Paulo Zanoni wrote:
> > From: Paulo Zanoni <paulo.r.zan...@intel.com>
> > 
> > We were previously only setting the WM_PIPE registers, now we are
> > setting the LP watermark registers. This should allow deeper PC
> > states, resulting in power savings.
> > 
> > We're only using 1/2 data buffer partitioning for now.
> > 
> > v2: Merge both hsw_compute_pri_wm_* functions (Ville)
> > 
> > Signed-off-by: Paulo Zanoni <paulo.r.zan...@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_reg.h |   4 +
> >  drivers/gpu/drm/i915/intel_pm.c | 201 
> > ++++++++++++++++++++++++++++++++++++----
> >  2 files changed, 187 insertions(+), 18 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_reg.h 
> > b/drivers/gpu/drm/i915/i915_reg.h
> > index e86606c..58230ea 100644
> > --- a/drivers/gpu/drm/i915/i915_reg.h
> > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > @@ -3057,6 +3057,10 @@
> >  #define WM3S_LP_IVB                0x45128
> >  #define  WM1S_LP_EN                (1<<31)
> >  
> > +#define HSW_WM_LP_VAL(lat, fbc, pri, cur) \
> > +   (WM3_LP_EN | ((lat) << WM1_LP_LATENCY_SHIFT) | \
> > +    ((fbc) << WM1_LP_FBC_SHIFT) | ((pri) << WM1_LP_SR_SHIFT) | (cur))
> > +
> >  /* Memory latency timer register */
> >  #define MLTR_ILK           0x11222
> >  #define  MLTR_WM1_SHIFT            0
> > diff --git a/drivers/gpu/drm/i915/intel_pm.c 
> > b/drivers/gpu/drm/i915/intel_pm.c
> > index ef58a1a..872e2a8 100644
> > --- a/drivers/gpu/drm/i915/intel_pm.c
> > +++ b/drivers/gpu/drm/i915/intel_pm.c
> > @@ -2129,6 +2129,12 @@ static uint32_t hsw_wm_method2(uint32_t pixel_rate, 
> > uint32_t pipe_htotal,
> >     return ret;
> >  }
> >  
> > +static uint32_t hsw_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels,
> > +                      uint8_t bytes_per_pixel)
> > +{
> > +   return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2;
> > +}
> > +
> >  struct hsw_pipe_wm_parameters {
> >     bool active;
> >     bool sprite_enabled;
> > @@ -2142,11 +2148,28 @@ struct hsw_pipe_wm_parameters {
> >     uint32_t pixel_rate;
> >  };
> >  
> > +struct hsw_wm_maximums {
> > +   uint16_t pri;
> > +   uint16_t spr;
> > +   uint16_t cur;
> > +   uint16_t fbc;
> > +};
> > +
> > +struct hsw_lp_wm_result {
> > +   bool enable;
> > +   bool fbc_enable;
> > +   uint32_t pri_val;
> > +   uint32_t spr_val;
> > +   uint32_t cur_val;
> > +   uint32_t fbc_val;
> > +};
> > +
> >  struct hsw_wm_values {
> >     uint32_t wm_pipe[3];
> >     uint32_t wm_lp[3];
> >     uint32_t wm_lp_spr[3];
> >     uint32_t wm_linetime[3];
> > +   bool enable_fbc_wm;
> >  };
> >  
> >  enum hsw_data_buf_partitioning {
> > @@ -2154,17 +2177,31 @@ enum hsw_data_buf_partitioning {
> >     HSW_DATA_BUF_PART_5_6,
> >  };
> >  
> > -/* Only for WM_PIPE. */
> > -static uint32_t hsw_compute_pri_wm_pipe(struct hsw_pipe_wm_parameters 
> > *params,
> > -                                   uint32_t mem_value)
> > +/* For both WM_PIPE and WM_LP. */
> > +static uint32_t hsw_compute_pri_wm(struct hsw_pipe_wm_parameters *params,
> > +                              uint32_t mem_value,
> > +                              bool is_lp)
> >  {
> > +   uint32_t method1, method2;
> > +
> >     /* TODO: for now, assume the primary plane is always enabled. */
> >     if (!params->active)
> >             return 0;
> >  
> > -   return hsw_wm_method1(params->pixel_rate,
> > -                         params->pri_bytes_per_pixel,
> > -                         mem_value);
> > +   method1 = hsw_wm_method1(params->pixel_rate,
> > +                            params->pri_bytes_per_pixel,
> > +                            mem_value);
> > +
> > +   if (!is_lp)
> > +           return method1;
> > +
> > +   method2 = hsw_wm_method2(params->pixel_rate,
> > +                            params->pipe_htotal,
> > +                            params->pri_horiz_pixels,
> > +                            params->pri_bytes_per_pixel,
> > +                            mem_value);
> > +
> > +   return min(method1, method2);
> >  }
> >  
> >  /* For both WM_PIPE and WM_LP. */
> > @@ -2201,13 +2238,59 @@ static uint32_t hsw_compute_cur_wm(struct 
> > hsw_pipe_wm_parameters *params,
> >                           mem_value);
> >  }
> >  
> > +/* Only for WM_LP. */
> > +static uint32_t hsw_compute_fbc_wm(struct hsw_pipe_wm_parameters *params,
> > +                              uint32_t pri_val,
> > +                              uint32_t mem_value)
> > +{
> > +   if (!params->active)
> > +           return 0;
> > +
> > +   return hsw_wm_fbc(pri_val,
> > +                     params->pri_horiz_pixels,
> > +                     params->pri_bytes_per_pixel);
> > +}
> > +
> > +static void hsw_compute_lp_wm(uint32_t mem_value, struct hsw_wm_maximums 
> > *max,
> > +                         struct hsw_pipe_wm_parameters *params,
> > +                         struct hsw_lp_wm_result *result)
> > +{
> > +   enum pipe pipe;
> > +   uint32_t pri_val[3], spr_val[3], cur_val[3], fbc_val[3];
> > +
> > +   for (pipe = PIPE_A; pipe <= PIPE_C; pipe++) {
> > +           struct hsw_pipe_wm_parameters *p = &params[pipe];
> > +
> > +           pri_val[pipe] = hsw_compute_pri_wm(p, mem_value, true);
> > +           spr_val[pipe] = hsw_compute_spr_wm(p, mem_value);
> > +           cur_val[pipe] = hsw_compute_cur_wm(p, mem_value);
> > +           fbc_val[pipe] = hsw_compute_fbc_wm(p, pri_val[pipe], mem_value);
> > +   }
> > +
> > +   result->pri_val = max3(pri_val[0], pri_val[1], pri_val[2]);
> > +   result->spr_val = max3(spr_val[0], spr_val[1], spr_val[2]);
> > +   result->cur_val = max3(cur_val[0], cur_val[1], cur_val[2]);
> > +   result->fbc_val = max3(fbc_val[0], fbc_val[1], fbc_val[2]);
> > +
> > +   if (result->fbc_val > max->fbc) {
> > +           result->fbc_enable = false;
> > +           result->fbc_val = 0;
> > +   } else {
> > +           result->fbc_enable = true;
> > +   }
> > +
> > +   result->enable = result->pri_val <= max->pri &&
> > +                    result->spr_val <= max->spr &&
> > +                    result->cur_val <= max->cur;
> > +}
> > +
> >  static uint32_t hsw_compute_wm_pipe(struct drm_i915_private *dev_priv,
> >                                 uint32_t mem_value, enum pipe pipe,
> >                                 struct hsw_pipe_wm_parameters *params)
> >  {
> >     uint32_t pri_val, cur_val, spr_val;
> >  
> > -   pri_val = hsw_compute_pri_wm_pipe(params, mem_value);
> > +   pri_val = hsw_compute_pri_wm(params, mem_value, false);
> >     spr_val = hsw_compute_spr_wm(params, mem_value);
> >     cur_val = hsw_compute_cur_wm(params, mem_value);
> >  
> > @@ -2250,13 +2333,15 @@ hsw_compute_linetime_wm(struct drm_device *dev, 
> > struct drm_crtc *crtc)
> >  
> >  static void hsw_compute_wm_parameters(struct drm_device *dev,
> >                                   struct hsw_pipe_wm_parameters *params,
> > -                                 uint32_t *wm)
> > +                                 uint32_t *wm,
> > +                                 struct hsw_wm_maximums *lp_max_1_2)
> >  {
> >     struct drm_i915_private *dev_priv = dev->dev_private;
> >     struct drm_crtc *crtc;
> >     struct drm_plane *plane;
> >     uint64_t sskpd = I915_READ64(MCH_SSKPD);
> >     enum pipe pipe;
> > +   int pipes_active = 0, sprites_enabled = 0;
> >  
> >     if ((sskpd >> 56) & 0xFF)
> >             wm[0] = (sskpd >> 56) & 0xFF;
> > @@ -2278,6 +2363,8 @@ static void hsw_compute_wm_parameters(struct 
> > drm_device *dev,
> >             if (!p->active)
> >                     continue;
> >  
> > +           pipes_active++;
> > +
> >             p->pipe_htotal = intel_crtc->config.adjusted_mode.htotal;
> >             p->pixel_rate = hsw_wm_get_pixel_rate(dev, crtc);
> >             p->pri_bytes_per_pixel = crtc->fb->bits_per_pixel / 8;
> > @@ -2297,25 +2384,89 @@ static void hsw_compute_wm_parameters(struct 
> > drm_device *dev,
> >             p->sprite_enabled = intel_plane->wm.enable;
> >             p->spr_bytes_per_pixel = intel_plane->wm.bytes_per_pixel;
> >             p->spr_horiz_pixels = intel_plane->wm.horiz_pixels;
> > +
> > +           if (p->sprite_enabled)
> > +                   sprites_enabled++;
> > +   }
> > +
> > +   if (pipes_active > 1) {
> > +           lp_max_1_2->pri = sprites_enabled ? 128 : 256;
> > +           lp_max_1_2->spr = 128;
> > +           lp_max_1_2->cur = 64;
> > +   } else {
> > +           lp_max_1_2->pri = sprites_enabled ? 384 : 768;
> > +           lp_max_1_2->spr = 384;
> > +           lp_max_1_2->cur = 255;
> >     }
> > +   lp_max_1_2->fbc = 15;
> >  }
> >  
> >  static void hsw_compute_wm_results(struct drm_device *dev,
> >                                struct hsw_pipe_wm_parameters *params,
> >                                uint32_t *wm,
> > +                              struct hsw_wm_maximums *lp_maximums,
> >                                struct hsw_wm_values *results)
> >  {
> >     struct drm_i915_private *dev_priv = dev->dev_private;
> >     struct drm_crtc *crtc;
> > +   struct hsw_lp_wm_result lp_results[4];
> >     enum pipe pipe;
> > +   int i;
> >  
> > -   /* No support for LP WMs yet. */
> > -   results->wm_lp[2] = 0;
> > -   results->wm_lp[1] = 0;
> > -   results->wm_lp[0] = 0;
> > -   results->wm_lp_spr[2] = 0;
> > -   results->wm_lp_spr[1] = 0;
> > -   results->wm_lp_spr[0] = 0;
> > +   hsw_compute_lp_wm(wm[1], lp_maximums, params, &lp_results[0]);
> > +   hsw_compute_lp_wm(wm[2], lp_maximums, params, &lp_results[1]);
> > +   hsw_compute_lp_wm(wm[3], lp_maximums, params, &lp_results[2]);
> > +   hsw_compute_lp_wm(wm[4], lp_maximums, params, &lp_results[3]);
> > +
> > +   /* The spec says it is preferred to disable FBC WMs instead of disabling
> > +    * a WM level. */
> > +   results->enable_fbc_wm = true;
> > +   for (i = 0; i < 4; i++) {
> > +           if (lp_results[i].enable && !lp_results[i].fbc_enable) {
> > +                   results->enable_fbc_wm = false;
> > +                   break;
> > +           }
> > +   }
> > +
> > +   if (lp_results[3].enable) {
> 
> Here you seem to assume that if lp[3] is valid, that also lp[2] and lp[0]
> are valid...
> 
> > +           results->wm_lp[2] = HSW_WM_LP_VAL(8, lp_results[3].fbc_val,
> > +                                             lp_results[3].pri_val,
> > +                                             lp_results[3].cur_val);
> > +           results->wm_lp_spr[2] = lp_results[3].spr_val;
> > +   } else if (lp_results[2].enable) {
> > +           results->wm_lp[2] = HSW_WM_LP_VAL(6, lp_results[2].fbc_val,
> > +                                             lp_results[2].pri_val,
> > +                                             lp_results[2].cur_val);
> > +           results->wm_lp_spr[2] = lp_results[2].spr_val;
> > +   } else {
> > +           results->wm_lp[2] = 0;
> > +           results->wm_lp_spr[2] = 0;
> > +   }
> > +
> > +   if (lp_results[3].enable && lp_results[2].enable) {
> 
> ... but here you check if lp[2] is actually valid.
> 
> So it seems that in theory (if the latency values are really weird) the code
> could enable LP3 and leave LP2 or LP1 disabled, which is illegal.
> 
> I think this could be cleared up a bit like so:
>    struct hsw_lp_wm_result lp_results[4] = {};
> 
>    for (level = 1; level <= 4; level++)
>      if (!hsw_compute_lp_wm(wm[level], lp_maximums, params, 
> &lp_results[level-1]))
>        break;
> 
> and obviously have hsw_compute_lp_wm() return result->enable. It could
> also save a few cycles by not computing watermark levels that will never
> be used.
> 
> 
> To simplify the results handling you could take another page out of my
> WM patch. Something like this:
> 
>  memset(results, 0, sizeof *results);
> 
>  for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
>    int level = wm_lp + (wm_lp >= 2 && lp_results[3].enable)
>    const struct hsw_lp_wm_result *r = &lp_results[level-1];
> 
>    if (!r->enable)
>      break;
>   
>    results->wm_lp[wm_lp] = HSW_WM_LP_VAL(level << 1, r->fbc_val, r->pri_val, 
> r->cur_val);
>    results->wm_lp_spr[wm_lp] = r->spr_val;
>  }

And I forgot the -1 from the results->foo[wm_lp] indexing. That's one 
reason I don't quite like this split between LP0/level=0 and and
LP1+/level>=1; The indexes no longer match the LP/level. But I think
we can ignore that for now and clear it up later.

> 
> Quite a bit less code, and avoids all those awkward checks for which
> levels are valid.
> 
> > +           results->wm_lp[1] = HSW_WM_LP_VAL(6, lp_results[2].fbc_val,
> > +                                             lp_results[2].pri_val,
> > +                                             lp_results[2].cur_val);
> > +           results->wm_lp_spr[1] = lp_results[2].spr_val;
> > +   } else if (!lp_results[3].enable && lp_results[1].enable) {
> > +           results->wm_lp[1] = HSW_WM_LP_VAL(4, lp_results[1].fbc_val,
> > +                                             lp_results[1].pri_val,
> > +                                             lp_results[1].cur_val);
> > +           results->wm_lp_spr[1] = lp_results[1].spr_val;
> > +   } else {
> > +           results->wm_lp[1] = 0;
> > +           results->wm_lp_spr[1] = 0;
> > +   }
> > +
> > +   if (lp_results[0].enable) {
> > +           results->wm_lp[0] = HSW_WM_LP_VAL(2, lp_results[0].fbc_val,
> > +                                             lp_results[0].pri_val,
> > +                                             lp_results[0].cur_val);
> > +           results->wm_lp_spr[0] = lp_results[0].spr_val;
> > +   } else {
> > +           results->wm_lp[0] = 0;
> > +           results->wm_lp_spr[0] = 0;
> > +   }
> >  
> >     for_each_pipe(pipe)
> >             results->wm_pipe[pipe] = hsw_compute_wm_pipe(dev_priv, wm[0],
> > @@ -2339,6 +2490,7 @@ static void hsw_write_wm_values(struct 
> > drm_i915_private *dev_priv,
> >     struct hsw_wm_values previous;
> >     uint32_t val;
> >     enum hsw_data_buf_partitioning prev_partitioning;
> > +   bool prev_enable_fbc_wm;
> >  
> >     previous.wm_pipe[0] = I915_READ(WM0_PIPEA_ILK);
> >     previous.wm_pipe[1] = I915_READ(WM0_PIPEB_ILK);
> > @@ -2356,11 +2508,14 @@ static void hsw_write_wm_values(struct 
> > drm_i915_private *dev_priv,
> >     prev_partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ?
> >                         HSW_DATA_BUF_PART_5_6 : HSW_DATA_BUF_PART_1_2;
> >  
> > +   prev_enable_fbc_wm = !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS);
> > +
> >     if (memcmp(results->wm_pipe, previous.wm_pipe, 3) == 0 &&
> >         memcmp(results->wm_lp, previous.wm_lp, 3) == 0 &&
> >         memcmp(results->wm_lp_spr, previous.wm_lp_spr, 3) == 0 &&
> >         memcmp(results->wm_linetime, previous.wm_linetime, 3) == 0 &&
> > -       partitioning == prev_partitioning)
> > +       partitioning == prev_partitioning &&
> > +       results->enable_fbc_wm == prev_enable_fbc_wm)
> >             return;
> >  
> >     if (previous.wm_lp[2] != 0)
> > @@ -2393,6 +2548,15 @@ static void hsw_write_wm_values(struct 
> > drm_i915_private *dev_priv,
> >             I915_WRITE(WM_MISC, val);
> >     }
> >  
> > +   if (prev_enable_fbc_wm != results->enable_fbc_wm) {
> > +           val = I915_READ(DISP_ARB_CTL);
> > +           if (results->enable_fbc_wm)
> > +                   val &= ~DISP_FBC_WM_DIS;
> > +           else
> > +                   val |= DISP_FBC_WM_DIS;
> > +           I915_WRITE(DISP_ARB_CTL, val);
> > +   }
> > +
> >     if (previous.wm_lp_spr[0] != results->wm_lp_spr[0])
> >             I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
> >     if (previous.wm_lp_spr[1] != results->wm_lp_spr[1])
> > @@ -2411,12 +2575,13 @@ static void hsw_write_wm_values(struct 
> > drm_i915_private *dev_priv,
> >  static void haswell_update_wm(struct drm_device *dev)
> >  {
> >     struct drm_i915_private *dev_priv = dev->dev_private;
> > +   struct hsw_wm_maximums lp_max_1_2;
> >     struct hsw_pipe_wm_parameters params[3];
> >     struct hsw_wm_values results;
> >     uint32_t wm[5];
> >  
> > -   hsw_compute_wm_parameters(dev, params, wm);
> > -   hsw_compute_wm_results(dev, params, wm, &results);
> > +   hsw_compute_wm_parameters(dev, params, wm, &lp_max_1_2);
> > +   hsw_compute_wm_results(dev, params, wm, &lp_max_1_2, &results);
> >     hsw_write_wm_values(dev_priv, &results, HSW_DATA_BUF_PART_1_2);
> >  }
> >  
> > -- 
> > 1.8.1.2
> > 
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
> -- 
> Ville Syrjälä
> Intel OTC
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel OTC
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to