On 11/12/2018 10:14, Ankit Navik wrote:
From: Praveen Diwakar <praveen.diwa...@intel.com>

This patch will select optimum eu/slice/sub-slice configuration based on
type of load (low, medium, high) as input.
Based on our readings and experiments we have predefined set of optimum
configuration for each platform(CHT, KBL).
i915_gem_context_set_load_type will select optimum configuration from
pre-defined optimum configuration table(opt_config).

It also introduce flag update_render_config which can set by any governor.

v2:
  * Move static optimum_config to device init time.
  * Rename function to appropriate name, fix data types and patch ordering.
  * Rename prev_load_type to pending_load_type. (Tvrtko Ursulin)

v3:
  * Add safe guard check in i915_gem_context_set_load_type.
  * Rename struct from optimum_config to i915_sseu_optimum_config to
    avoid namespace clashes.
  * Reduces memcpy for space efficient.
  * Rebase.
  * Improved commit message. (Tvrtko Ursulin)

Cc: Kedar J Karanje <kedar.j.kara...@intel.com>
Cc: Yogesh Marathe <yogesh.mara...@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursu...@linux.intel.com>

Again for the record no, I did not r-b this.

Signed-off-by: Praveen Diwakar <praveen.diwa...@intel.com>
Signed-off-by: Aravindan Muthukumar <aravindan.muthuku...@intel.com>
Signed-off-by: Ankit Navik <ankit.p.na...@intel.com>
---
  drivers/gpu/drm/i915/i915_drv.h          |  3 ++
  drivers/gpu/drm/i915/i915_gem_context.c  | 18 ++++++++++++
  drivers/gpu/drm/i915/i915_gem_context.h  | 25 +++++++++++++++++
  drivers/gpu/drm/i915/intel_device_info.c | 47 ++++++++++++++++++++++++++++++--
  drivers/gpu/drm/i915/intel_lrc.c         |  4 ++-
  5 files changed, 94 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4aca534..4b9a8c5 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1681,6 +1681,9 @@ struct drm_i915_private {
        struct drm_i915_fence_reg fence_regs[I915_MAX_NUM_FENCES]; /* assume 
965 */
        int num_fence_regs; /* 8 on pre-965, 16 otherwise */
+ /* optimal slice/subslice/EU configration state */
+       struct i915_sseu_optimum_config opt_config[LOAD_TYPE_LAST];

Make it a pointer to struct i915_sseu_optimum_config.

+
        unsigned int fsb_freq, mem_freq, is_ddr3;
        unsigned int skl_preferred_vco_freq;
        unsigned int max_cdclk_freq;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index d040858..c0ced72 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -392,10 +392,28 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
        ctx->subslice_cnt = hweight8(
                        INTEL_INFO(dev_priv)->sseu.subslice_mask[0]);
        ctx->eu_cnt = INTEL_INFO(dev_priv)->sseu.eu_per_subslice;
+       ctx->load_type = 0;
+       ctx->pending_load_type = 0;

Not needed because we zero the allocation and probably depend on untouched fields being zero elsewhere.

return ctx;
  }
+
+void i915_gem_context_set_load_type(struct i915_gem_context *ctx,
+               enum gem_load_type type)
+{
+       struct drm_i915_private *dev_priv = ctx->i915;
+
+       if (GEM_WARN_ON(type > LOAD_TYPE_LAST))
+               return;
+
+       /* Call opt_config to get correct configuration for eu,slice,subslice */
+       ctx->slice_cnt = dev_priv->opt_config[type].slice;
+       ctx->subslice_cnt = dev_priv->opt_config[type].subslice;
+       ctx->eu_cnt = dev_priv->opt_config[type].eu;
+       ctx->pending_load_type = type;
+}
+
  /**
   * i915_gem_context_create_gvt - create a GVT GEM context
   * @dev: drm device *
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h 
b/drivers/gpu/drm/i915/i915_gem_context.h
index e000530..a0db13c 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -53,6 +53,19 @@ struct intel_context_ops {
        void (*destroy)(struct intel_context *ce);
  };
+enum gem_load_type {
+       LOAD_TYPE_LOW,
+       LOAD_TYPE_MEDIUM,
+       LOAD_TYPE_HIGH,
+       LOAD_TYPE_LAST
+};
+
+struct i915_sseu_optimum_config {
+       u8 slice;
+       u8 subslice;
+       u8 eu;
+};
+
  /**
   * struct i915_gem_context - client state
   *
@@ -208,6 +221,16 @@ struct i915_gem_context {
/** eu_cnt: used to set the # of eu to be enabled. */
        u8 eu_cnt;
+
+       /** load_type: The designated load_type (high/medium/low) for a given
+        * number of pending commands in the command queue.
+        */
+       enum gem_load_type load_type;
+
+       /** pending_load_type: The earlier load type that the GPU was configured
+        * for (high/medium/low).
+        */
+       enum gem_load_type pending_load_type;
  };
static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx)
@@ -336,6 +359,8 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, 
void *data,
                                    struct drm_file *file_priv);
  int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
                                       struct drm_file *file);
+void i915_gem_context_set_load_type(struct i915_gem_context *ctx,
+               enum gem_load_type type);
struct i915_gem_context *
  i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio);
diff --git a/drivers/gpu/drm/i915/intel_device_info.c 
b/drivers/gpu/drm/i915/intel_device_info.c
index 0ef0c64..33c6310 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -741,6 +741,28 @@ void intel_device_info_runtime_init(struct 
intel_device_info *info)
                container_of(info, struct drm_i915_private, info);
        enum pipe pipe;
+ struct i915_sseu_optimum_config *opt_config = NULL;
+       /* static table of slice/subslice/EU for Cherryview */
+       struct i915_sseu_optimum_config chv_config[LOAD_TYPE_LAST] = {
+               {1, 1, 4},      /* Low */
+               {1, 1, 6},      /* Medium */
+               {1, 2, 6}       /* High */
+       };
+
+       /* static table of slice/subslice/EU for KBL GT2 */
+       struct i915_sseu_optimum_config kbl_gt2_config[LOAD_TYPE_LAST] = {
+               {1, 3, 2},      /* Low */
+               {1, 3, 4},      /* Medium */
+               {1, 3, 8}       /* High */
+       };
+
+       /* static table of slice/subslice/EU for KBL GT3 */
+       struct i915_sseu_optimum_config kbl_gt3_config[LOAD_TYPE_LAST] = {
+               {2, 3, 4},      /* Low */
+               {2, 3, 6},      /* Medium */
+               {2, 3, 8}       /* High */
+       };

Move these to file scope as static const.

+
        if (INTEL_GEN(dev_priv) >= 10) {
                for_each_pipe(dev_priv, pipe)
                        info->num_scalers[pipe] = 2;
@@ -840,17 +862,38 @@ void intel_device_info_runtime_init(struct 
intel_device_info *info)
        /* Initialize slice/subslice/EU info */
        if (IS_HASWELL(dev_priv))
                haswell_sseu_info_init(dev_priv);
-       else if (IS_CHERRYVIEW(dev_priv))
+       else if (IS_CHERRYVIEW(dev_priv)) {
                cherryview_sseu_info_init(dev_priv);
+               opt_config = chv_config;

For my idea to work here and below add:

BUILD_BUG_ON(ARRAY_SIZE(chv_config) != LOAD_TYPE_LAST);

+       }
        else if (IS_BROADWELL(dev_priv))
                broadwell_sseu_info_init(dev_priv);
-       else if (INTEL_GEN(dev_priv) == 9)
+       else if (INTEL_GEN(dev_priv) == 9) {
                gen9_sseu_info_init(dev_priv);
+
+               if (IS_KABYLAKE(dev_priv)) {
+                       switch (info->gt) {
+                       default:
+                               MISSING_CASE(info->gt);

This should probably be silent since absence of sseu tuning is not an error on other platforms.

+                               /* fall through */
+                       case 2:
+                               opt_config = kbl_gt2_config;
+                       break;
+                       case 3:
+                               opt_config = kbl_gt3_config;

Would Kabylake table work for other Gen9 platforms?

+                       break;
+                       }
+               }
+       }
        else if (INTEL_GEN(dev_priv) == 10)
                gen10_sseu_info_init(dev_priv);
        else if (INTEL_GEN(dev_priv) >= 11)
                gen11_sseu_info_init(dev_priv);
+ if (opt_config)
+               memcpy(dev_priv->opt_config, opt_config, LOAD_TYPE_LAST *
+                       sizeof(struct i915_sseu_optimum_config));

And if you agree with my other suggestions then you can replace a copy with:

        dev_priv->opt_config = opt_config;

Or in fact, even do it directly on assignment sites above.

+
        /* Initialize command stream timestamp frequency */
        info->cs_timestamp_frequency_khz = read_timestamp_frequency(dev_priv);
  }
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index a17f676..7fb9cd2 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -427,11 +427,13 @@ static u64 execlists_update_context(struct i915_request 
*rq)
reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
        /* FIXME: To avoid stale rpcs config, move it to context_pin */
-       if (ctx->pid && ctx->name && (rq->engine->id == RCS)) {
+       if (ctx->pid && ctx->name && (rq->engine->id == RCS) &&
+                       (ctx->load_type != ctx->pending_load_type)) {
                rpcs_config = make_rpcs(ctx->i915);
                reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
                CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
                                rpcs_config);
+               ctx->load_type = ctx->pending_load_type;
        }
/* True 32b PPGTT with dynamic page allocation: update PDP


Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to