The issue is that with inline clflushing the clflushing isn't properly
swizzled. Fix this by
- always clflushing entire 128 byte chunks and
- unconditionally flush before writes when swizzling a given page.
  We could be clever and check whether we pwrite a partial 128 byte
  chunk instead of a partial cacheline, but I've figured that's not
  worth it.

Now the usual approach is to fold this into the original patch series, but
I've opted against this because
- this fixes a corner case only very old userspace relies on and
- I'd like to not invalidate all the testing the pwrite rewrite has gotten.

This fixes the regression notice by tests/gem_tiled_partial_prite_pread
from i-g-t. Unfortunately it doesn't fix the issues with partial pwrites to
tiled buffers on bit17 swizzling machines. But that is also broken without
the pwrite patches, so likely a different issue (or a problem with the
testcase).

v2: Simplify the patch by dropping the overly clever partial write
logic for swizzled pages.

Signed-Off-by: Daniel Vetter <daniel.vet...@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c |   39 ++++++++++++++++++++++++++++++++-------
 1 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index accb2cb..cbd3bad 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -333,6 +333,28 @@ shmem_pread_fast(struct page *page, int shmem_page_offset, 
int page_length,
        return ret;
 }
 
+static void
+shmem_clflush_swizzled_range(char *addr, unsigned long length,
+                            bool swizzled)
+{
+       if (swizzled) {
+               unsigned long start = (unsigned long) addr;
+               unsigned long end = (unsigned long) addr + length;
+
+               /* For swizzling simply ensure that we always flush both
+                * channels. Lame, but simple and it works. Swizzled
+                * pwrite/pread is far from a hotpath - current userspace
+                * doesn't use it at all. */
+               start = round_down(start, 128);
+               end = round_up(end, 128);
+
+               drm_clflush_virt_range((void *)start, end - start);
+       } else {
+               drm_clflush_virt_range(addr, length);
+       }
+
+}
+
 /* Only difference to the fast-path function is that this can handle bit17
  * and uses non-atomic copy and kmap functions. */
 static int
@@ -345,8 +367,9 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, 
int page_length,
 
        vaddr = kmap(page);
        if (needs_clflush)
-               drm_clflush_virt_range(vaddr + shmem_page_offset,
-                                      page_length);
+               shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
+                                            page_length,
+                                            page_do_bit17_swizzling);
 
        if (page_do_bit17_swizzling)
                ret = __copy_to_user_swizzled(user_data,
@@ -657,9 +680,10 @@ shmem_pwrite_slow(struct page *page, int 
shmem_page_offset, int page_length,
        int ret;
 
        vaddr = kmap(page);
-       if (needs_clflush_before)
-               drm_clflush_virt_range(vaddr + shmem_page_offset,
-                                      page_length);
+       if (needs_clflush_before || page_do_bit17_swizzling)
+               shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
+                                            page_length,
+                                            page_do_bit17_swizzling);
        if (page_do_bit17_swizzling)
                ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
                                                user_data,
@@ -669,8 +693,9 @@ shmem_pwrite_slow(struct page *page, int shmem_page_offset, 
int page_length,
                                       user_data,
                                       page_length);
        if (needs_clflush_after)
-               drm_clflush_virt_range(vaddr + shmem_page_offset,
-                                      page_length);
+               shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
+                                            page_length,
+                                            page_do_bit17_swizzling);
        kunmap(page);
 
        return ret;
-- 
1.7.7.6

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to