Ensure the view occupies the full tile row so that reads/writes into the
VMA do not escape (via fenced detiling) into neighbouring objects - we
will pad the object with scratch pages to satisfy the fence. This
applies the lazy-tiling we employed on gen2/3 to gen4+.

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h        |  5 +++--
 drivers/gpu/drm/i915/i915_gem.c        | 26 ++++++++++++++++++--------
 drivers/gpu/drm/i915/i915_gem_tiling.c | 14 +++++++++-----
 drivers/gpu/drm/i915/i915_vma.c        |  8 ++++++--
 4 files changed, 36 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 58dfe8ab9c0b..fae0f07b232c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3359,9 +3359,10 @@ int i915_gem_open(struct drm_device *dev, struct 
drm_file *file);
 void i915_gem_release(struct drm_device *dev, struct drm_file *file);
 
 u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv, u64 size,
-                          int tiling_mode);
+                          int tiling_mode, unsigned int stride);
 u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size,
-                               int tiling_mode, bool fenced);
+                               int tiling_mode, unsigned int stride,
+                               bool fenced);
 
 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
                                    enum i915_cache_level cache_level);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ea507d3eadba..695b21b7ec8b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2038,21 +2038,28 @@ void i915_gem_runtime_suspend(struct drm_i915_private 
*dev_priv)
  * @dev_priv: i915 device
  * @size: object size
  * @tiling_mode: tiling mode
+ * @stride: tiling stride
  *
  * Return the required global GTT size for an object, taking into account
  * potential fence register mapping.
  */
 u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv,
-                          u64 size, int tiling_mode)
+                          u64 size, int tiling_mode, unsigned int stride)
 {
        u64 ggtt_size;
 
-       GEM_BUG_ON(size == 0);
+       GEM_BUG_ON(!size);
 
-       if (INTEL_GEN(dev_priv) >= 4 ||
-           tiling_mode == I915_TILING_NONE)
+       if (tiling_mode == I915_TILING_NONE)
                return size;
 
+       GEM_BUG_ON(!stride);
+
+       if (INTEL_GEN(dev_priv) >= 4) {
+               stride *= tiling_mode == I915_TILING_Y ? 32 : 8;
+               return roundup(size, stride);
+       }
+
        /* Previous chips need a power-of-two fence region when tiling */
        if (IS_GEN3(dev_priv))
                ggtt_size = 1024*1024;
@@ -2070,15 +2077,17 @@ u64 i915_gem_get_ggtt_size(struct drm_i915_private 
*dev_priv,
  * @dev_priv: i915 device
  * @size: object size
  * @tiling_mode: tiling mode
+ * @stride: tiling stride
  * @fenced: is fenced alignment required or not
  *
  * Return the required global GTT alignment for an object, taking into account
  * potential fence register mapping.
  */
 u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size,
-                               int tiling_mode, bool fenced)
+                               int tiling_mode, unsigned int stride,
+                               bool fenced)
 {
-       GEM_BUG_ON(size == 0);
+       GEM_BUG_ON(!size);
 
        /*
         * Minimum alignment is 4k (GTT page size), but might be greater
@@ -2093,7 +2102,7 @@ u64 i915_gem_get_ggtt_alignment(struct drm_i915_private 
*dev_priv, u64 size,
         * Previous chips need to be aligned to the size of the smallest
         * fence register that can contain the object.
         */
-       return i915_gem_get_ggtt_size(dev_priv, size, tiling_mode);
+       return i915_gem_get_ggtt_size(dev_priv, size, tiling_mode, stride);
 }
 
 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
@@ -3703,7 +3712,8 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
                        u32 fence_size;
 
                        fence_size = i915_gem_get_ggtt_size(dev_priv, vma->size,
-                                                           
i915_gem_object_get_tiling(obj));
+                                                           
i915_gem_object_get_tiling(obj),
+                                                           
i915_gem_object_get_stride(obj));
                        /* If the required space is larger than the available
                         * aperture, we will not able to find a slot for the
                         * object and unbinding the object now will be in
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c 
b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 62ad375de6ca..bc9eb7e1da0e 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -117,7 +117,8 @@ i915_tiling_ok(struct drm_i915_private *dev_priv,
        return true;
 }
 
-static bool i915_vma_fence_prepare(struct i915_vma *vma, int tiling_mode)
+static bool i915_vma_fence_prepare(struct i915_vma *vma,
+                                  int tiling_mode, unsigned int stride)
 {
        struct drm_i915_private *dev_priv = vma->vm->i915;
        u32 size;
@@ -133,7 +134,7 @@ static bool i915_vma_fence_prepare(struct i915_vma *vma, 
int tiling_mode)
                        return false;
        }
 
-       size = i915_gem_get_ggtt_size(dev_priv, vma->size, tiling_mode);
+       size = i915_gem_get_ggtt_size(dev_priv, vma->size, tiling_mode, stride);
        if (vma->node.size < size)
                return false;
 
@@ -145,7 +146,8 @@ static bool i915_vma_fence_prepare(struct i915_vma *vma, 
int tiling_mode)
 
 /* Make the current GTT allocation valid for the change in tiling. */
 static int
-i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, int tiling_mode)
+i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj,
+                             int tiling_mode, unsigned int stride)
 {
        struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
        struct i915_vma *vma;
@@ -158,7 +160,7 @@ i915_gem_object_fence_prepare(struct drm_i915_gem_object 
*obj, int tiling_mode)
                return 0;
 
        list_for_each_entry(vma, &obj->vma_list, obj_link) {
-               if (i915_vma_fence_prepare(vma, tiling_mode))
+               if (i915_vma_fence_prepare(vma, tiling_mode, stride))
                        continue;
 
                ret = i915_vma_unbind(vma);
@@ -255,7 +257,9 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
                 * whilst executing a fenced command for an untiled object.
                 */
 
-               err = i915_gem_object_fence_prepare(obj, args->tiling_mode);
+               err = i915_gem_object_fence_prepare(obj,
+                                                   args->tiling_mode,
+                                                   args->stride);
                if (!err) {
                        struct i915_vma *vma;
 
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 284273056ea2..028d011f805a 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -282,10 +282,12 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma 
*vma)
 
        fence_size = i915_gem_get_ggtt_size(dev_priv,
                                            vma->size,
-                                           i915_gem_object_get_tiling(obj));
+                                           i915_gem_object_get_tiling(obj),
+                                           i915_gem_object_get_stride(obj));
        fence_alignment = i915_gem_get_ggtt_alignment(dev_priv,
                                                      vma->size,
                                                      
i915_gem_object_get_tiling(obj),
+                                                     
i915_gem_object_get_stride(obj),
                                                      true);
 
        fenceable = (vma->node.size == fence_size &&
@@ -368,11 +370,13 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 
alignment, u64 flags)
        size = max(size, vma->size);
        if (flags & PIN_MAPPABLE)
                size = i915_gem_get_ggtt_size(dev_priv, size,
-                                             i915_gem_object_get_tiling(obj));
+                                             i915_gem_object_get_tiling(obj),
+                                             i915_gem_object_get_stride(obj));
 
        alignment = max(max(alignment, vma->display_alignment),
                        i915_gem_get_ggtt_alignment(dev_priv, size,
                                                    
i915_gem_object_get_tiling(obj),
+                                                   
i915_gem_object_get_stride(obj),
                                                    flags & PIN_MAPPABLE));
 
        start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to