On Tue, Sep 23, 2014 at 12:13:50PM +0100, Damien Lespiau wrote:
> From: Pradeep Bhat <pradeep.b...@intel.com>
> 
> This patch implements the watermark algorithm and its necessary
> functions. Two function pointers skl_update_wm and
> skl_update_sprite_wm are provided. The skl_update_wm will update
> the watermarks for the crtc provided as an argument and then
> checks for change in DDB allocation for other active pipes and
> recomputes the watermarks for those Pipes and planes as well.
> Finally it does the register programming for all dirty pipes.
> The trigger of the Watermark double buffer registers will have
> to be once the plane configurations are done by the caller.
> 
> v2: fixed the divide-by-0 error in the results computation func.
>     Also reworked the PLANE_WM register values computation func to
>     make it more compact. Incorporated all other review comments
>     from Damien.
> 
> v3: Changed the skl_compute_plane_wm function to now return success
>     or failure. Also the result blocks and lines are computed here
>     instead of in skl_compute_wm_results function.
> 
> v4: Adjust skl_ddb_alloc_changed() to the new planes/cursor split
>     (Damien)
> 
> v5: Reworked the affected functions to implement new plane/cursor
>     split.
> 
> v6: Rework the logic that triggers the DDB allocation and WM computation
>     of skl_update_other_pipe_wm() to not depend on non-computed DDB
>     values.
>     Always give a valid cursor_width (at boot it's 0) to keep the
>     invariant that we consider the cursor plane always enabled.
>     Otherwise we end up dividing by 0 in skl_compute_plane_wm()
>     (Damien Lespiau)
> 
> v7: Spell out allocation
>     skl_ddb_ functions should have the ddb as first argument
>     Make the skl_ddb_alloc_changed() parameters const
>     (Damien)
> 
> v8: Rebase on top of the crtc->primary changes
> 
> v9: Split the staging results structure to not exceed the 1Kb stack
>     allocation in skl_update_wm()
> 
> v10: Make skl_pipe_pixel_rate() take a pointer to the pipe config
>      Add a comment about overflow considerations for skl_wm_method1()
>      Various additions of const
>      Various use of sizeof(variable) instead of sizeof(type)
>      Various move of variable definitons to a narrower scope
>      Zero initialize some stack allocated structures to make sure we
>      don't have garbage in case we don't write all the values
>      (Ville)
> 
> v11: Remove non-necessary default number of blocks/lines when the plane
>      is disabled (Ville)
> 
> Signed-off-by: Pradeep Bhat <pradeep.b...@intel.com>
> Signed-off-by: Damien Lespiau <damien.lesp...@intel.com>

Looks OK. The fixup to move the validity check still seems to need a
bit of tweaking but otherwise I think it's sane enough.

I seem to recall that I went through all of the little details the
first time around, so I decided to not do that this time. Hopefully
my memory is correct ;)

Reviewed-by: Ville Syrjälä <ville.syrj...@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/i915_drv.h |  12 +-
>  drivers/gpu/drm/i915/intel_pm.c | 422 
> ++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 433 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 0301a91..cde5136 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1722,8 +1722,18 @@ struct drm_i915_private {
>                */
>               uint16_t skl_latency[8];
>  
> +             /*
> +              * The skl_wm_values structure is a bit too big for stack
> +              * allocation, so we keep the staging struct where we store
> +              * intermediate results here instead.
> +              */
> +             struct skl_wm_values skl_results;
> +
>               /* current hardware state */
> -             struct ilk_wm_values hw;
> +             union {
> +                     struct ilk_wm_values hw;
> +                     struct skl_wm_values skl_hw;
> +             };
>       } wm;
>  
>       struct i915_runtime_pm pm;
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 768890e..e69a833 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -2957,6 +2957,426 @@ static bool ilk_disable_lp_wm(struct drm_device *dev)
>       return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
>  }
>  
> +static uint32_t skl_pipe_pixel_rate(const struct intel_crtc_config *config)
> +{
> +     /* TODO: Take into account the scalers once we support them */
> +     return config->adjusted_mode.crtc_clock;
> +}
> +
> +/*
> + * The max latency should be 257 (max the punit can code is 255 and we add 
> 2us
> + * for the read latency) and bytes_per_pixel should always be <= 8, so that
> + * should allow pixel_rate up to ~2 GHz which seems sufficient since max
> + * 2xcdclk is 1350 MHz and the pixel rate should never exceed that.
> +*/
> +static uint32_t skl_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel,
> +                            uint32_t latency)
> +{
> +     uint32_t wm_intermediate_val, ret;
> +
> +     if (latency == 0)
> +             return UINT_MAX;
> +
> +     wm_intermediate_val = latency * pixel_rate * bytes_per_pixel;
> +     ret = DIV_ROUND_UP(wm_intermediate_val, 1000);
> +
> +     return ret;
> +}
> +
> +static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
> +                            uint32_t horiz_pixels, uint8_t bytes_per_pixel,
> +                            uint32_t latency)
> +{
> +     uint32_t ret, plane_bytes_per_line, wm_intermediate_val;
> +
> +     if (latency == 0)
> +             return UINT_MAX;
> +
> +     plane_bytes_per_line = horiz_pixels * bytes_per_pixel;
> +     wm_intermediate_val = latency * pixel_rate;
> +     ret = DIV_ROUND_UP(wm_intermediate_val, pipe_htotal * 1000) *
> +                             plane_bytes_per_line;
> +
> +     return ret;
> +}
> +
> +static void skl_compute_transition_wm(struct drm_crtc *crtc,
> +                               struct skl_pipe_wm_parameters *params,
> +                               struct skl_pipe_wm *pipe_wm)
> +{
> +     /*
> +      * For now it is suggested to use the LP0 wm val of corresponding
> +      * plane as transition wm val. This is done while computing results.
> +      */
> +     if (!params->active)
> +             return;
> +}
> +
> +static uint32_t
> +skl_compute_linetime_wm(struct drm_crtc *crtc, struct skl_pipe_wm_parameters 
> *p)
> +{
> +     if (!intel_crtc_active(crtc))
> +             return 0;
> +
> +     return DIV_ROUND_UP(8 * p->pipe_htotal * 1000, p->pixel_rate);
> +
> +}
> +
> +static bool skl_ddb_allocation_changed(const struct skl_ddb_allocation 
> *new_ddb,
> +                                    const struct intel_crtc *intel_crtc)
> +{
> +     struct drm_device *dev = intel_crtc->base.dev;
> +     struct drm_i915_private *dev_priv = dev->dev_private;
> +     const struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb;
> +     enum pipe pipe = intel_crtc->pipe;
> +
> +     if (memcmp(new_ddb->plane[pipe], cur_ddb->plane[pipe],
> +                sizeof(new_ddb->plane[pipe])))
> +             return true;
> +
> +     if (memcmp(&new_ddb->cursor[pipe], &cur_ddb->cursor[pipe],
> +                 sizeof(new_ddb->cursor[pipe])))
> +             return true;
> +
> +     return false;
> +}
> +
> +static void skl_compute_wm_global_parameters(struct drm_device *dev,
> +                                          struct intel_wm_config *config)
> +{
> +     struct drm_crtc *crtc;
> +     struct drm_plane *plane;
> +
> +     list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
> +             config->num_pipes_active += intel_crtc_active(crtc);
> +
> +     /* FIXME: I don't think we need those two global parameters on SKL */
> +     list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
> +             struct intel_plane *intel_plane = to_intel_plane(plane);
> +
> +             config->sprites_enabled |= intel_plane->wm.enabled;
> +             config->sprites_scaled |= intel_plane->wm.scaled;
> +     }
> +}
> +
> +static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc,
> +                                        struct skl_pipe_wm_parameters *p)
> +{
> +     struct drm_device *dev = crtc->dev;
> +     struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> +     enum pipe pipe = intel_crtc->pipe;
> +     struct drm_plane *plane;
> +     int i = 1; /* Index for sprite planes start */
> +
> +     p->active = intel_crtc_active(crtc);
> +     if (p->active) {
> +             p->pipe_htotal = intel_crtc->config.adjusted_mode.crtc_htotal;
> +             p->pixel_rate = skl_pipe_pixel_rate(&intel_crtc->config);
> +
> +             /*
> +              * For now, assume primary and cursor planes are always enabled.
> +              */
> +             p->plane[0].enabled = true;
> +             p->plane[0].bytes_per_pixel =
> +                     crtc->primary->fb->bits_per_pixel / 8;
> +             p->plane[0].horiz_pixels = intel_crtc->config.pipe_src_w;
> +             p->plane[0].vert_pixels = intel_crtc->config.pipe_src_h;
> +
> +             p->cursor.enabled = true;
> +             p->cursor.bytes_per_pixel = 4;
> +             p->cursor.horiz_pixels = intel_crtc->cursor_width ?
> +                                      intel_crtc->cursor_width : 64;
> +     }
> +
> +     list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
> +             struct intel_plane *intel_plane = to_intel_plane(plane);
> +
> +             if (intel_plane->pipe == pipe)
> +                     p->plane[i++] = intel_plane->wm;
> +     }
> +}
> +
> +static bool skl_compute_plane_wm(struct skl_pipe_wm_parameters *p,
> +                                struct intel_plane_wm_parameters *p_params,
> +                                uint16_t max_page_buff_alloc,
> +                                uint32_t mem_value,
> +                                uint16_t *res_blocks, /* out */
> +                                uint8_t *res_lines /* out */)
> +{
> +     uint32_t method1, method2, plane_bytes_per_line;
> +     uint32_t result_bytes;
> +
> +     if (!p->active || !p_params->enabled)
> +             return false;
> +
> +     method1 = skl_wm_method1(p->pixel_rate,
> +                              p_params->bytes_per_pixel,
> +                              mem_value);
> +     method2 = skl_wm_method2(p->pixel_rate,
> +                              p->pipe_htotal,
> +                              p_params->horiz_pixels,
> +                              p_params->bytes_per_pixel,
> +                              mem_value);
> +
> +     plane_bytes_per_line = p_params->horiz_pixels *
> +                                     p_params->bytes_per_pixel;
> +
> +     /* For now xtile and linear */
> +     if (((max_page_buff_alloc * 512) / plane_bytes_per_line) >= 1)
> +             result_bytes = min(method1, method2);
> +     else
> +             result_bytes = method1;
> +
> +     *res_blocks = DIV_ROUND_UP(result_bytes, 512) + 1;
> +     *res_lines = DIV_ROUND_UP(result_bytes, plane_bytes_per_line);
> +
> +     return true;
> +}
> +
> +static void skl_compute_wm_level(const struct drm_i915_private *dev_priv,
> +                              struct skl_ddb_allocation *ddb,
> +                              struct skl_pipe_wm_parameters *p,
> +                              enum pipe pipe,
> +                              int level,
> +                              int num_planes,
> +                              struct skl_wm_level *result)
> +{
> +     uint16_t latency = dev_priv->wm.skl_latency[level];
> +     uint16_t ddb_blocks;
> +     int i;
> +
> +     for (i = 0; i < num_planes; i++) {
> +             ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][i]);
> +
> +             result->plane_en[i] = skl_compute_plane_wm(p, &p->plane[i],
> +                                             ddb_blocks,
> +                                             latency,
> +                                             &result->plane_res_b[i],
> +                                             &result->plane_res_l[i]);
> +     }
> +
> +     ddb_blocks = skl_ddb_entry_size(&ddb->cursor[pipe]);
> +     result->cursor_en = skl_compute_plane_wm(p, &p->cursor, ddb_blocks,
> +                                              latency, &result->cursor_res_b,
> +                                              &result->cursor_res_l);
> +}
> +
> +static void skl_compute_pipe_wm(struct drm_crtc *crtc,
> +                             struct skl_ddb_allocation *ddb,
> +                             struct skl_pipe_wm_parameters *params,
> +                             struct skl_pipe_wm *pipe_wm)
> +{
> +     struct drm_device *dev = crtc->dev;
> +     const struct drm_i915_private *dev_priv = dev->dev_private;
> +     struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> +     int level, max_level = ilk_wm_max_level(dev);
> +
> +     for (level = 0; level <= max_level; level++) {
> +             skl_compute_wm_level(dev_priv, ddb, params, intel_crtc->pipe,
> +                                  level, intel_num_planes(intel_crtc),
> +                                  &pipe_wm->wm[level]);
> +     }
> +     pipe_wm->linetime = skl_compute_linetime_wm(crtc, params);
> +
> +     skl_compute_transition_wm(crtc, params, pipe_wm);
> +}
> +
> +static void skl_compute_wm_results(struct drm_device *dev,
> +                                struct skl_pipe_wm_parameters *p,
> +                                struct skl_pipe_wm *p_wm,
> +                                struct skl_wm_values *r,
> +                                struct intel_crtc *intel_crtc)
> +{
> +     int level, max_level = ilk_wm_max_level(dev);
> +     enum pipe pipe = intel_crtc->pipe;
> +
> +     for (level = 0; level <= max_level; level++) {
> +             uint16_t ddb_blocks;
> +             uint32_t temp;
> +             int i;
> +
> +             for (i = 0; i < intel_num_planes(intel_crtc); i++) {
> +                     temp = 0;
> +                     ddb_blocks = skl_ddb_entry_size(&r->ddb.plane[pipe][i]);
> +
> +                     if ((p_wm->wm[level].plane_res_b[i] > ddb_blocks) ||
> +                             (p_wm->wm[level].plane_res_l[i] > 31))
> +                             p_wm->wm[level].plane_en[i] = false;
> +
> +                     temp |= p_wm->wm[level].plane_res_l[i] <<
> +                                     PLANE_WM_LINES_SHIFT;
> +                     temp |= p_wm->wm[level].plane_res_b[i];
> +                     if (p_wm->wm[level].plane_en[i])
> +                             temp |= PLANE_WM_EN;
> +
> +                     r->plane[pipe][i][level] = temp;
> +                     /* Use the LP0 WM value for transition WM for now. */
> +                     if (level == 0)
> +                             r->plane_trans[pipe][i] = temp;
> +             }
> +
> +             temp = 0;
> +             ddb_blocks = skl_ddb_entry_size(&r->ddb.cursor[pipe]);
> +
> +             if ((p_wm->wm[level].cursor_res_b > ddb_blocks) ||
> +                     (p_wm->wm[level].cursor_res_l > 31))
> +                     p_wm->wm[level].cursor_en = false;
> +
> +             temp |= p_wm->wm[level].cursor_res_l << PLANE_WM_LINES_SHIFT;
> +             temp |= p_wm->wm[level].cursor_res_b;
> +
> +             if (p_wm->wm[level].cursor_en)
> +                     temp |= PLANE_WM_EN;
> +
> +             r->cursor[pipe][level] = temp;
> +             /* Use the LP0 WM value for transition WM for now. */
> +             if (level == 0)
> +                     r->cursor_trans[pipe] = temp;
> +
> +     }
> +
> +     r->wm_linetime[pipe] = p_wm->linetime;
> +}
> +
> +static void skl_write_wm_values(struct drm_i915_private *dev_priv,
> +                             const struct skl_wm_values *new)
> +{
> +     struct drm_device *dev = dev_priv->dev;
> +     struct intel_crtc *crtc;
> +
> +     list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) {
> +             int i, level, max_level = ilk_wm_max_level(dev);
> +             enum pipe pipe = crtc->pipe;
> +
> +             if (new->dirty[pipe]) {
> +                     I915_WRITE(PIPE_WM_LINETIME(pipe),
> +                                     new->wm_linetime[pipe]);
> +
> +                     for (level = 0; level <= max_level; level++) {
> +                             for (i = 0; i < intel_num_planes(crtc); i++)
> +                                     I915_WRITE(PLANE_WM(pipe, i, level),
> +                                             new->plane[pipe][i][level]);
> +                             I915_WRITE(CUR_WM(pipe, level),
> +                                     new->cursor[pipe][level]);
> +                     }
> +                     for (i = 0; i < intel_num_planes(crtc); i++)
> +                             I915_WRITE(PLANE_WM_TRANS(pipe, i),
> +                                             new->plane_trans[pipe][i]);
> +                     I915_WRITE(CUR_WM_TRANS(pipe), new->cursor_trans[pipe]);
> +             }
> +     }
> +
> +     dev_priv->wm.skl_hw = *new;
> +}
> +
> +static bool skl_update_pipe_wm(struct drm_crtc *crtc,
> +                            struct skl_pipe_wm_parameters *params,
> +                            struct intel_wm_config *config,
> +                            struct skl_ddb_allocation *ddb, /* out */
> +                            struct skl_pipe_wm *pipe_wm /* out */)
> +{
> +     struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> +
> +     skl_compute_wm_pipe_parameters(crtc, params);
> +     skl_compute_pipe_wm(crtc, ddb, params, pipe_wm);
> +
> +     if (!memcmp(&intel_crtc->wm.skl_active, pipe_wm, sizeof(*pipe_wm)))
> +             return false;
> +
> +     intel_crtc->wm.skl_active = *pipe_wm;
> +     return true;
> +}
> +
> +static void skl_update_other_pipe_wm(struct drm_device *dev,
> +                                  struct drm_crtc *crtc,
> +                                  struct intel_wm_config *config,
> +                                  struct skl_wm_values *r)
> +{
> +     struct intel_crtc *intel_crtc;
> +     struct intel_crtc *this_crtc = to_intel_crtc(crtc);
> +
> +     /*
> +      * If the WM update hasn't changed the allocation for this_crtc (the
> +      * crtc we are currently computing the new WM values for), other
> +      * enabled crtcs will keep the same allocation and we don't need to
> +      * recompute anything for them.
> +      */
> +     if (!skl_ddb_allocation_changed(&r->ddb, this_crtc))
> +             return;
> +
> +     /*
> +      * Otherwise, because of this_crtc being freshly enabled/disabled, the
> +      * other active pipes need new DDB allocation and WM values.
> +      */
> +     list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list,
> +                             base.head) {
> +             struct skl_pipe_wm_parameters params = {};
> +             struct skl_pipe_wm pipe_wm = {};
> +             bool wm_changed;
> +
> +             if (this_crtc->pipe == intel_crtc->pipe)
> +                     continue;
> +
> +             if (!intel_crtc->active)
> +                     continue;
> +
> +             wm_changed = skl_update_pipe_wm(&intel_crtc->base,
> +                                             &params, config,
> +                                             &r->ddb, &pipe_wm);
> +
> +             /*
> +              * If we end up re-computing the other pipe WM values, it's
> +              * because it was really needed, so we expect the WM values to
> +              * be different.
> +              */
> +             WARN_ON(!wm_changed);
> +
> +             skl_compute_wm_results(dev, &params, &pipe_wm, r, intel_crtc);
> +             r->dirty[intel_crtc->pipe] = true;
> +     }
> +}
> +
> +static void skl_update_wm(struct drm_crtc *crtc)
> +{
> +     struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> +     struct drm_device *dev = crtc->dev;
> +     struct drm_i915_private *dev_priv = dev->dev_private;
> +     struct skl_pipe_wm_parameters params = {};
> +     struct skl_wm_values *results = &dev_priv->wm.skl_results;
> +     struct skl_pipe_wm pipe_wm = {};
> +     struct intel_wm_config config = {};
> +
> +     memset(results, 0, sizeof(*results));
> +
> +     skl_compute_wm_global_parameters(dev, &config);
> +
> +     if (!skl_update_pipe_wm(crtc, &params, &config,
> +                             &results->ddb, &pipe_wm))
> +             return;
> +
> +     skl_compute_wm_results(dev, &params, &pipe_wm, results, intel_crtc);
> +     results->dirty[intel_crtc->pipe] = true;
> +
> +     skl_update_other_pipe_wm(dev, crtc, &config, results);
> +     skl_write_wm_values(dev_priv, results);
> +}
> +
> +static void
> +skl_update_sprite_wm(struct drm_plane *plane, struct drm_crtc *crtc,
> +                  uint32_t sprite_width, uint32_t sprite_height,
> +                  int pixel_size, bool enabled, bool scaled)
> +{
> +     struct intel_plane *intel_plane = to_intel_plane(plane);
> +
> +     intel_plane->wm.enabled = enabled;
> +     intel_plane->wm.scaled = scaled;
> +     intel_plane->wm.horiz_pixels = sprite_width;
> +     intel_plane->wm.vert_pixels = sprite_height;
> +     intel_plane->wm.bytes_per_pixel = pixel_size;
> +
> +     skl_update_wm(crtc);
> +}
> +
>  static void ilk_update_wm(struct drm_crtc *crtc)
>  {
>       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> @@ -7486,6 +7906,8 @@ void intel_init_pm(struct drm_device *dev)
>               skl_setup_wm_latency(dev);
>  
>               dev_priv->display.init_clock_gating = gen9_init_clock_gating;
> +             dev_priv->display.update_wm = skl_update_wm;
> +             dev_priv->display.update_sprite_wm = skl_update_sprite_wm;
>       } else if (HAS_PCH_SPLIT(dev)) {
>               ilk_setup_wm_latency(dev);
>  
> -- 
> 1.8.3.1

-- 
Ville Syrjälä
Intel OTC
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to