On Sat, Feb 14, 2015 at 06:30:29PM +0000, Damien Lespiau wrote:
> When one EU is disabled in a particular subslice, we can tune how the
> work is spread between subslices to improve EU utilization.
> 
> v2: - Use a bitfield to record which subslice(s) has(have) 7 EUs. That
>       will also make the machinery work if several sublices have 7 EUs.
>       (Jeff Mcgee)
>     - Only apply the different hashing algorithm if the slice is
>       effectively unbalanced by checking there's a single subslice with
>       7 EUs. (Jeff Mcgee)
> 
> v3: Fix typo in comment (Jeff Mcgee)
> 
> Issue: VIZ-3845
> Cc: Jeff Mcgee <jeff.mc...@intel.com>
> Reviewed-by: Jeff Mcgee <jeff.mc...@intel.com>
> Signed-off-by: Damien Lespiau <damien.lesp...@intel.com>

Queued for -next, thanks for the patch.
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_dma.c         | 17 ++++++++++---
>  drivers/gpu/drm/i915/i915_drv.h         |  2 ++
>  drivers/gpu/drm/i915/i915_reg.h         |  2 ++
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 45 
> ++++++++++++++++++++++++++++++++-
>  4 files changed, 62 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 70acfe0..39164ff 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -649,13 +649,24 @@ static void intel_device_info_runtime_init(struct 
> drm_device *dev)
>                               continue;
>  
>                       for (ss = 0; ss < ss_max; ss++) {
> +                             u32 n_disabled;
> +
>                               if (ss_disable & (0x1 << ss))
>                                       /* skip disabled subslice */
>                                       continue;
>  
> -                             info->eu_total += eu_max -
> -                                               hweight8(eu_disable[s] >>
> -                                                        (ss * eu_max));
> +                             n_disabled = hweight8(eu_disable[s] >>
> +                                                   (ss * eu_max));
> +
> +                             /*
> +                              * Record which subslice(s) has(have) 7 EUs. we
> +                              * can tune the hash used to spread work among
> +                              * subslices if they are unbalanced.
> +                              */
> +                             if (eu_max - n_disabled == 7)
> +                                     info->subslice_7eu[s] |= 1 << ss;
> +
> +                             info->eu_total += eu_max - n_disabled;
>                       }
>               }
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index d910bac..5b4794b 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -699,6 +699,8 @@ struct intel_device_info {
>       u8 subslice_per_slice;
>       u8 eu_total;
>       u8 eu_per_subslice;
> +     /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
> +     u8 subslice_7eu[3];
>       u8 has_slice_pg:1;
>       u8 has_subslice_pg:1;
>       u8 has_eu_pg:1;
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 40f71bd..0aa6437 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -1341,6 +1341,8 @@ enum skl_disp_power_wells {
>  #define   GEN6_WIZ_HASHING_16x4                              
> GEN6_WIZ_HASHING(1, 0)
>  #define   GEN6_WIZ_HASHING_MASK                              
> GEN6_WIZ_HASHING(1, 1)
>  #define   GEN6_TD_FOUR_ROW_DISPATCH_DISABLE          (1 << 5)
> +#define   GEN9_IZ_HASHING_MASK(slice)                        (0x3 << (slice 
> * 2))
> +#define   GEN9_IZ_HASHING(slice, val)                        ((val) << 
> (slice * 2))
>  
>  #define GFX_MODE     0x02520
>  #define GFX_MODE_GEN7        0x0229c
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
> b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 443e19c..c24d856 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -937,6 +937,49 @@ static int gen9_init_workarounds(struct intel_engine_cs 
> *ring)
>       return 0;
>  }
>  
> +static int skl_tune_iz_hashing(struct intel_engine_cs *ring)
> +{
> +     struct drm_device *dev = ring->dev;
> +     struct drm_i915_private *dev_priv = dev->dev_private;
> +     u8 vals[3] = { 0, 0, 0 };
> +     unsigned int i;
> +
> +     for (i = 0; i < 3; i++) {
> +             u8 ss;
> +
> +             /*
> +              * Only consider slices where one, and only one, subslice has 7
> +              * EUs
> +              */
> +             if (hweight8(dev_priv->info.subslice_7eu[i]) != 1)
> +                     continue;
> +
> +             /*
> +              * subslice_7eu[i] != 0 (because of the check above) and
> +              * ss_max == 4 (maximum number of subslices possible per slice)
> +              *
> +              * ->    0 <= ss <= 3;
> +              */
> +             ss = ffs(dev_priv->info.subslice_7eu[i]) - 1;
> +             vals[i] = 3 - ss;
> +     }
> +
> +     if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
> +             return 0;
> +
> +     /* Tune IZ hashing. See intel_device_info_runtime_init() */
> +     WA_SET_FIELD_MASKED(GEN7_GT_MODE,
> +                         GEN9_IZ_HASHING_MASK(2) |
> +                         GEN9_IZ_HASHING_MASK(1) |
> +                         GEN9_IZ_HASHING_MASK(0),
> +                         GEN9_IZ_HASHING(2, vals[2]) |
> +                         GEN9_IZ_HASHING(1, vals[1]) |
> +                         GEN9_IZ_HASHING(0, vals[0]));
> +
> +     return 0;
> +}
> +
> +
>  static int skl_init_workarounds(struct intel_engine_cs *ring)
>  {
>       struct drm_device *dev = ring->dev;
> @@ -956,7 +999,7 @@ static int skl_init_workarounds(struct intel_engine_cs 
> *ring)
>                                 HDC_FENCE_DEST_SLM_DISABLE |
>                                 HDC_BARRIER_PERFORMANCE_DISABLE);
>  
> -     return 0;
> +     return skl_tune_iz_hashing(ring);
>  }
>  
>  int init_workarounds_ring(struct intel_engine_cs *ring)
> -- 
> 1.8.3.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to