Originally the code tried to allocate a large enough array to perform
the copy using vmalloc, performance wasn't great and throughput was
improved by processing each individual relocation entry separately.
This too is not as efficient as one would desire. A compromise is then
to allocate a single page (since that is relatively cheap) and process
the relocations in page size batches.

x11perf -copywinwin10:  n450/pnv        i3-330m         i5-2520m (cpu)
               Before:    249000         785000          1280000 (80%)
                After:    264000         896000          1280000 (65%)

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   92 +++++++++++++++++++++++-----
 1 files changed, 77 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 2d5d41b..1da04db 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -407,28 +407,90 @@ i915_gem_execbuffer_relocate_object(struct 
drm_i915_gem_object *obj,
 {
        struct drm_i915_gem_relocation_entry __user *user_relocs;
        struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
-       int i, ret;
+       int i, ret = 0;
+
+       if (entry->relocation_count == 0)
+               return 0;
+
+#define N_RELOC_PER_PAGE \
+       (PAGE_SIZE / sizeof(struct drm_i915_gem_relocation_entry))
 
        user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr;
-       for (i = 0; i < entry->relocation_count; i++) {
-               struct drm_i915_gem_relocation_entry reloc;
 
-               if (__copy_from_user_inatomic(&reloc,
-                                             user_relocs+i,
-                                             sizeof(reloc)))
-                       return -EFAULT;
+       if (entry->relocation_count < N_RELOC_PER_PAGE / 2) {
+fallback:
+               for (i = 0; i < entry->relocation_count; i++) {
+                       struct drm_i915_gem_relocation_entry reloc;
+                       u64 offset;
 
-               ret = i915_gem_execbuffer_relocate_entry(obj, eb, &reloc);
-               if (ret)
-                       return ret;
+                       if (__copy_from_user_inatomic(&reloc,
+                                                     user_relocs+i,
+                                                     sizeof(reloc)))
+                               return -EFAULT;
 
-               if (__copy_to_user_inatomic(&user_relocs[i].presumed_offset,
-                                           &reloc.presumed_offset,
-                                           sizeof(reloc.presumed_offset)))
-                       return -EFAULT;
+                       offset = reloc.presumed_offset;
+
+                       ret = i915_gem_execbuffer_relocate_entry(obj, eb, 
&reloc);
+                       if (ret)
+                               return ret;
+
+                       if (reloc.presumed_offset != offset &&
+                           
__copy_to_user_inatomic(&user_relocs[i].presumed_offset,
+                                                   &reloc.presumed_offset,
+                                                   
sizeof(reloc.presumed_offset)))
+                               return -EFAULT;
+               }
+       } else {
+               unsigned long page;
+               int remain;
+
+               page = __get_free_page(GFP_TEMPORARY);
+               if (unlikely(page == 0))
+                       goto fallback;
+
+               i = 0;
+               remain = entry->relocation_count;
+               do {
+                       struct drm_i915_gem_relocation_entry *r =
+                               (struct drm_i915_gem_relocation_entry *)page;
+                       int count = remain;
+                       if (count > N_RELOC_PER_PAGE)
+                               count = N_RELOC_PER_PAGE;
+                       remain -= count;
+
+                       if (__copy_from_user_inatomic(r, user_relocs+i,
+                                                     count*sizeof(r[0]))) {
+                               ret = -EFAULT;
+                               goto err;
+                       }
+
+                       do {
+                               u64 offset = r->presumed_offset;
+
+                               ret = i915_gem_execbuffer_relocate_entry(obj, 
eb, r);
+                               if (ret)
+                                       goto err;
+
+                               if (r->presumed_offset != offset &&
+                                   
__copy_to_user_inatomic(&user_relocs[i].presumed_offset,
+                                                           &r->presumed_offset,
+                                                           
sizeof(r->presumed_offset))) {
+                                       ret = -EFAULT;
+                                       goto err;
+                               }
+
+                               i++;
+                               r++;
+                       } while (--count);
+               } while (remain);
+
+               ret = 0;
+err:
+               free_page(page);
        }
 
-       return 0;
+       return ret;
+#undef N_RELOC_PER_PAGE
 }
 
 static int
-- 
1.7.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to