On Thu, Feb 02, 2012 at 12:15:38AM +0100, Daniel Vetter wrote:
> We have to do this manually. Somebody had a Great Idea.
> 
> I've measured speed-ups just a few percent above the noise level
> (below 5% for the best case), but no slowdows. Chris Wilson measured
> quite a bit more (10-20% above the usual snb variance) on a more
> recent and better tuned version of sna, but also recorded a few
> slow-downs on benchmarks know for uglier amounts of snb-induced
> variance.
> 
> v2: Incorporate Ben Widawsky's preliminary review comments and
> elaborate a bit about the performance impact in the changelog.
> 
> v3: Add a comment as to why we don't need to check the 3rd memory
> channel.
> 
> Acked-by: Chris Wilson <ch...@chris-wilson.co.uk>
> Signed-Off-by: Daniel Vetter <daniel.vet...@ffwll.ch>
> ---
>  drivers/gpu/drm/i915/i915_dma.c        |    2 +-
>  drivers/gpu/drm/i915/i915_drv.c        |    4 ++-
>  drivers/gpu/drm/i915/i915_drv.h        |    3 +-
>  drivers/gpu/drm/i915/i915_gem.c        |   23 +++++++++++++++++++-
>  drivers/gpu/drm/i915/i915_gem_tiling.c |   19 ++++++++++++++++-
>  drivers/gpu/drm/i915/i915_reg.h        |   34 
> ++++++++++++++++++++++++++++++++
>  6 files changed, 78 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 3f27173..dfef956 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1208,7 +1208,7 @@ static int i915_load_gem_init(struct drm_device *dev)
>       i915_gem_do_init(dev, 0, mappable_size, gtt_size - PAGE_SIZE);
>  
>       mutex_lock(&dev->struct_mutex);
> -     ret = i915_gem_init_ringbuffer(dev);
> +     ret = i915_gem_init_hw(dev);
>       mutex_unlock(&dev->struct_mutex);
>       if (ret)
>               return ret;
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 1658cfd..12ddf47 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -495,7 +495,7 @@ static int i915_drm_thaw(struct drm_device *dev)
>               mutex_lock(&dev->struct_mutex);
>               dev_priv->mm.suspended = 0;
>  
> -             error = i915_gem_init_ringbuffer(dev);
> +             error = i915_gem_init_hw(dev);
>               mutex_unlock(&dev->struct_mutex);
>  
>               if (HAS_PCH_SPLIT(dev))
> @@ -686,6 +686,8 @@ int i915_reset(struct drm_device *dev, u8 flags)
>                       !dev_priv->mm.suspended) {
>               dev_priv->mm.suspended = 0;
>  
> +             i915_gem_init_swizzling(dev);
> +
>               dev_priv->ring[RCS].init(&dev_priv->ring[RCS]);
>               if (HAS_BSD(dev))
>                   dev_priv->ring[VCS].init(&dev_priv->ring[VCS]);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 865de80..0845419 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1187,7 +1187,8 @@ int __must_check i915_gem_object_set_domain(struct 
> drm_i915_gem_object *obj,
>                                           uint32_t read_domains,
>                                           uint32_t write_domain);
>  int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
> -int __must_check i915_gem_init_ringbuffer(struct drm_device *dev);
> +int __must_check i915_gem_init_hw(struct drm_device *dev);
> +void i915_gem_init_swizzling(struct drm_device *dev);
>  void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
>  void i915_gem_do_init(struct drm_device *dev,
>                     unsigned long start,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 51a2b0c..86fffd2 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3681,12 +3681,31 @@ i915_gem_idle(struct drm_device *dev)
>       return 0;
>  }
>  
> +void i915_gem_init_swizzling(struct drm_device *dev)
> +{
> +     drm_i915_private_t *dev_priv = dev->dev_private;
> +
> +     if (INTEL_INFO(dev)->gen < 6 ||
> +         dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
> +             return;
> +
> +     I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
> +                              DISP_TILE_SURFACE_SWIZZLING);
> +
> +     I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
> +     if (IS_GEN6(dev))
> +             I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_SNB));
> +     else
> +             I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_IVB));
> +}
>  int
> -i915_gem_init_ringbuffer(struct drm_device *dev)
> +i915_gem_init_hw(struct drm_device *dev)
>  {
>       drm_i915_private_t *dev_priv = dev->dev_private;
>       int ret;
>  
> +     i915_gem_init_swizzling(dev);
> +
>       ret = intel_init_render_ring_buffer(dev);
>       if (ret)
>               return ret;
> @@ -3742,7 +3761,7 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void 
> *data,
>       mutex_lock(&dev->struct_mutex);
>       dev_priv->mm.suspended = 0;
>  
> -     ret = i915_gem_init_ringbuffer(dev);
> +     ret = i915_gem_init_hw(dev);
>       if (ret != 0) {
>               mutex_unlock(&dev->struct_mutex);
>               return ret;
> diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c 
> b/drivers/gpu/drm/i915/i915_gem_tiling.c
> index 861223b..1a93066 100644
> --- a/drivers/gpu/drm/i915/i915_gem_tiling.c
> +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
> @@ -93,8 +93,23 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
>       uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
>  
>       if (INTEL_INFO(dev)->gen >= 6) {
> -             swizzle_x = I915_BIT_6_SWIZZLE_NONE;
> -             swizzle_y = I915_BIT_6_SWIZZLE_NONE;
> +             uint32_t dimm_c0, dimm_c1;
> +             dimm_c0 = I915_READ(MAD_DIMM_C0);
> +             dimm_c1 = I915_READ(MAD_DIMM_C1);
> +             dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
> +             dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
> +             /* Enable swizzling when the channels are populated with
> +              * identically sized dimms. We don't need to check the 3rd
> +              * channel because no cpu with gpu attached ships in that
> +              * configuration. Also, swizzling only makes sense for 2
> +              * channels anyway. */
> +             if (dimm_c0 == dimm_c1) {
> +                     swizzle_x = I915_BIT_6_SWIZZLE_9_10;
> +                     swizzle_y = I915_BIT_6_SWIZZLE_9;
> +             } else {
> +                     swizzle_x = I915_BIT_6_SWIZZLE_NONE;
> +                     swizzle_y = I915_BIT_6_SWIZZLE_NONE;
> +             }
>       } else if (IS_GEN5(dev)) {
>               /* On Ironlake whatever DRAM config, GPU always do
>                * same swizzling setup.

Hmm, for my curiosity, why doesn't swizzling make sense with 3 channels?
I did some searching and it appears that you're right about no product
shipping with the configuration, but I suspect the comment will help in
case any product ever does.

I'd also say it's not a bad idea to elaborate the assumption that we
never have less than 256MB of memory WARN_ON(dimm_c0 + dimm_c1 == 0).

> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index f960738..539ef90 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -295,6 +295,12 @@
>  #define FENCE_REG_SANDYBRIDGE_0              0x100000
>  #define   SANDYBRIDGE_FENCE_PITCH_SHIFT      32
>  
> +/* control register for cpu gtt access */
> +#define TILECTL                              0x101000
> +#define   TILECTL_SWZCTL                     (1 << 0)
> +#define   TILECTL_TLB_PREFETCH_DIS   (1 << 2)
> +#define   TILECTL_BACKSNOOP_DIS              (1 << 3)
> +
>  /*
>   * Instruction and interrupt control regs
>   */
> @@ -318,6 +324,11 @@
>  #define RING_MAX_IDLE(base)  ((base)+0x54)
>  #define RING_HWS_PGA(base)   ((base)+0x80)
>  #define RING_HWS_PGA_GEN6(base)      ((base)+0x2080)
> +#define ARB_MODE             0x04030
> +#define   ARB_MODE_SWIZZLE_SNB       (1<<4)
> +#define   ARB_MODE_SWIZZLE_IVB       (1<<5)
> +#define   ARB_MODE_ENABLE(x) GFX_MODE_ENABLE(x)
> +#define   ARB_MODE_DISABLE(x)        GFX_MODE_DISABLE(x)
>  #define RENDER_HWS_PGA_GEN7  (0x04080)
>  #define RING_FAULT_REG(ring) (0x4094 + 0x100*(ring)->id)
>  #define DONE_REG             0x40b0
> @@ -1037,6 +1048,29 @@
>  #define C0DRB3                       0x10206
>  #define C1DRB3                       0x10606
>  
> +/** snb MCH registers for reading the DRAM channel configuration */
> +#define MAD_DIMM_C0                  (MCHBAR_MIRROR_BASE_SNB + 0x5004)
> +#define   MAD_DIMM_C1                        (MCHBAR_MIRROR_BASE_SNB + 
> 0x5008)
> +#define   MAD_DIMM_C2                        (MCHBAR_MIRROR_BASE_SNB + 
> 0x500C)
> +#define   MAD_DIMM_ECC_MASK          (0x3 << 24)
> +#define   MAD_DIMM_ECC_OFF           (0x0 << 24)
> +#define   MAD_DIMM_ECC_IO_ON_LOGIC_OFF       (0x1 << 24)
> +#define   MAD_DIMM_ECC_IO_OFF_LOGIC_ON       (0x2 << 24)
> +#define   MAD_DIMM_ECC_ON            (0x3 << 24)
> +#define   MAD_DIMM_ENH_INTERLEAVE    (0x1 << 22)
> +#define   MAD_DIMM_RANK_INTERLEAVE   (0x1 << 21)
> +#define   MAD_DIMM_B_WIDTH_X16               (0x1 << 20) /* X8 chips if 
> unset */
> +#define   MAD_DIMM_A_WIDTH_X16               (0x1 << 19) /* X8 chips if 
> unset */
> +#define   MAD_DIMM_B_DUAL_RANK               (0x1 << 18)
> +#define   MAD_DIMM_A_DUAL_RANK               (0x1 << 17)
> +#define   MAD_DIMM_A_SELECT          (0x1 << 16)
> +/* DIMM sizes are in multiples of 256mb. */
> +#define   MAD_DIMM_B_SIZE_SHIFT              8
> +#define   MAD_DIMM_B_SIZE_MASK               (0xff << MAD_DIMM_B_SIZE_SHIFT)
> +#define   MAD_DIMM_A_SIZE_SHIFT              0
> +#define   MAD_DIMM_A_SIZE_MASK               (0xff << MAD_DIMM_A_SIZE_SHIFT)
> +
> +

White space still seems wrong to me, but I don't need to see another
version to verify it's correct. I would have expected:

** snb MCH registers for reading the DRAM channel configuration */
define MAD_DIMM_C0                      (MCHBAR_MIRROR_BASE_SNB + 0x5004)
define MAD_DIMM_C1                      (MCHBAR_MIRROR_BASE_SNB + 0x5008)
define MAD_DIMM_C2                      (MCHBAR_MIRROR_BASE_SNB + 0x500C)
#define   MAD_DIMM_ECC_MASK             (0x3 << 24)

>  /* Clocking configuration register */
>  #define CLKCFG                       0x10c00
>  #define CLKCFG_FSB_400                                       (5 << 0)        
> /* hrawclk 100 */

I don't really need to see an updated version of the patch for either
suggestion I submitted.

Reviewed-by: Ben Widawsky <b...@bwidawsk.net>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to