Using copywinwin10 as an example that is dependent upon emitting a lot
of relocations (2 per operation), we see improvements of:

c2d/gm45: 618000.0/sec to 623000.0/sec.
i3-330m: 748000.0/sec to 789000.0/sec.

(measured relative to a baseline with neither optimisations applied).

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_dma.c            |    3 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  100 +++++++++++++++++-----------
 include/uapi/drm/i915_drm.h                |    8 ++-
 3 files changed, 71 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 4b2b55e..ae63318 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -995,6 +995,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
        case I915_PARAM_HAS_EXEC_NO_RELOC:
                value = 1;
                break;
+       case I915_PARAM_HAS_EXEC_HANDLE_LUT:
+               value = 1;
+               break;
        default:
                DRM_DEBUG_DRIVER("Unknown parameter %d\n",
                                 param->param);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 7e0dc15..18a6ab7 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -39,24 +39,40 @@
 struct eb_objects {
        struct list_head objects;
        int and;
-       struct hlist_head buckets[0];
+       union {
+               struct drm_i915_gem_object *lut[0];
+               struct hlist_head buckets[0];
+       };
 };
 
 static struct eb_objects *
-eb_create(int size)
+eb_create(struct drm_i915_gem_execbuffer2 *args)
 {
-       struct eb_objects *eb;
-       int count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
-       BUILD_BUG_ON(!is_power_of_2(PAGE_SIZE / sizeof(struct hlist_head)));
-       while (count > size)
-               count >>= 1;
-       eb = kzalloc(count*sizeof(struct hlist_head) +
-                    sizeof(struct eb_objects),
-                    GFP_KERNEL);
-       if (eb == NULL)
-               return eb;
-
-       eb->and = count - 1;
+       struct eb_objects *eb = NULL;
+
+       if (args->flags & I915_EXEC_HANDLE_LUT) {
+               int size = args->buffer_count;
+               size *= sizeof(struct drm_i915_gem_object *);
+               size += sizeof(struct eb_objects);
+               eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | 
__GFP_NORETRY);
+       }
+
+       if (eb == NULL) {
+               int size = args->buffer_count;
+               int count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
+               BUILD_BUG_ON(!is_power_of_2(PAGE_SIZE / sizeof(struct 
hlist_head)));
+               while (count > 2*size)
+                       count >>= 1;
+               eb = kzalloc(count*sizeof(struct hlist_head) +
+                            sizeof(struct eb_objects),
+                            GFP_TEMPORARY);
+               if (eb == NULL)
+                       return eb;
+
+               eb->and = count - 1;
+       } else
+               eb->and = -args->buffer_count;
+
        INIT_LIST_HEAD(&eb->objects);
        return eb;
 }
@@ -64,26 +80,20 @@ eb_create(int size)
 static void
 eb_reset(struct eb_objects *eb)
 {
-       memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
-}
-
-static void
-eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj)
-{
-       hlist_add_head(&obj->exec_node,
-                      &eb->buckets[obj->exec_handle & eb->and]);
+       if (eb->and >= 0)
+               memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
 }
 
 static int
 eb_lookup_objects(struct eb_objects *eb,
                  struct drm_i915_gem_exec_object2 *exec,
-                 int count,
+                 const struct drm_i915_gem_execbuffer2 *args,
                  struct drm_file *file)
 {
        int i;
 
        spin_lock(&file->table_lock);
-       for (i = 0; i < count; i++) {
+       for (i = 0; i < args->buffer_count; i++) {
                struct drm_i915_gem_object *obj;
 
                obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle));
@@ -104,9 +114,15 @@ eb_lookup_objects(struct eb_objects *eb,
                drm_gem_object_reference(&obj->base);
                list_add_tail(&obj->exec_list, &eb->objects);
 
-               obj->exec_handle = exec[i].handle;
                obj->exec_entry = &exec[i];
-               eb_add_object(eb, obj);
+               if (eb->and < 0) {
+                       eb->lut[i] = obj;
+               } else {
+                       uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? 
i : exec[i].handle;
+                       obj->exec_handle = handle;
+                       hlist_add_head(&obj->exec_node,
+                                      &eb->buckets[handle & eb->and]);
+               }
        }
        spin_unlock(&file->table_lock);
 
@@ -116,18 +132,24 @@ eb_lookup_objects(struct eb_objects *eb,
 static struct drm_i915_gem_object *
 eb_get_object(struct eb_objects *eb, unsigned long handle)
 {
-       struct hlist_head *head;
-       struct hlist_node *node;
-       struct drm_i915_gem_object *obj;
+       if (eb->and < 0) {
+               if (handle >= -eb->and)
+                       return NULL;
+               return eb->lut[handle];
+       } else {
+               struct hlist_head *head;
+               struct hlist_node *node;
 
-       head = &eb->buckets[handle & eb->and];
-       hlist_for_each(node, head) {
-               obj = hlist_entry(node, struct drm_i915_gem_object, exec_node);
-               if (obj->exec_handle == handle)
-                       return obj;
-       }
+               head = &eb->buckets[handle & eb->and];
+               hlist_for_each(node, head) {
+                       struct drm_i915_gem_object *obj;
 
-       return NULL;
+                       obj = hlist_entry(node, struct drm_i915_gem_object, 
exec_node);
+                       if (obj->exec_handle == handle)
+                               return obj;
+               }
+               return NULL;
+       }
 }
 
 static void
@@ -624,7 +646,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
 
        /* reacquire the objects */
        eb_reset(eb);
-       ret = eb_lookup_objects(eb, exec, count, file);
+       ret = eb_lookup_objects(eb, exec, args, file);
        if (ret)
                goto err;
 
@@ -934,7 +956,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
                goto pre_mutex_err;
        }
 
-       eb = eb_create(args->buffer_count);
+       eb = eb_create(args);
        if (eb == NULL) {
                mutex_unlock(&dev->struct_mutex);
                ret = -ENOMEM;
@@ -942,7 +964,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
        }
 
        /* Look up object handles */
-       ret = eb_lookup_objects(eb, exec, args->buffer_count, file);
+       ret = eb_lookup_objects(eb, exec, args, file);
        if (ret)
                goto err;
 
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 2430b6a..07d5941 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -309,6 +309,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_SECURE_BATCHES   23
 #define I915_PARAM_HAS_PINNED_BATCHES   24
 #define I915_PARAM_HAS_EXEC_NO_RELOC    25
+#define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
 
 typedef struct drm_i915_getparam {
        int param;
@@ -699,7 +700,12 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_NO_RELOC             (1<<11)
 
-#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_NO_RELOC<<1)
+/** Use the reloc.handle as an index into the exec object array rather
+ * than as the per-file handle.
+ */
+#define I915_EXEC_HANDLE_LUT           (1<<12)
+
+#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_HANDLE_LUT<<1)
 
 #define I915_EXEC_CONTEXT_ID_MASK      (0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
1.7.10.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to