From: Akash Goel <akash.g...@intel.com>

This patch could help to reduce the time, 'struct_mutex' is kept
locked during either the exec-buffer path or Page fault
handling path as now the backing pages are requested from shmem layer
without holding the 'struct_mutex'.

v2: Fixed the merge issue, due to which 'exec_lock' mutex was not released.

Signed-off-by: Akash Goel <akash.g...@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c        |  9 +++-
 drivers/gpu/drm/i915/i915_dma.c            |  1 +
 drivers/gpu/drm/i915/i915_drv.h            |  5 ++
 drivers/gpu/drm/i915/i915_gem.c            | 75 ++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 87 ++++++++++++++++++++++++++----
 drivers/gpu/drm/i915/i915_trace.h          | 35 ++++++++++++
 6 files changed, 200 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 18b3565..70c752b 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -247,10 +247,16 @@ static int i915_gem_stolen_list_info(struct seq_file *m, 
void *data)
        LIST_HEAD(stolen);
        int count, ret;
 
-       ret = mutex_lock_interruptible(&dev->struct_mutex);
+       ret = mutex_lock_interruptible(&dev_priv->exec_lock);
        if (ret)
                return ret;
 
+       ret = mutex_lock_interruptible(&dev->struct_mutex);
+       if (ret) {
+               mutex_unlock(&dev_priv->exec_lock);
+               return ret;
+       }
+
        total_obj_size = total_gtt_size = count = 0;
        list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
                if (obj->stolen == NULL)
@@ -281,6 +287,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, 
void *data)
                list_del_init(&obj->obj_exec_link);
        }
        mutex_unlock(&dev->struct_mutex);
+       mutex_unlock(&dev_priv->exec_lock);
 
        seq_printf(m, "Total %d objects, %zu bytes, %zu GTT size\n",
                   count, total_obj_size, total_gtt_size);
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index e393a14..e4d1cb0 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1572,6 +1572,7 @@ int i915_driver_load(struct drm_device *dev, unsigned 
long flags)
        dev_priv->ring_index = 0;
        mutex_init(&dev_priv->dpio_lock);
        mutex_init(&dev_priv->modeset_restore_lock);
+       mutex_init(&dev_priv->exec_lock);
 
        intel_pm_setup(dev);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5f4f631..6dc579a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1475,6 +1475,8 @@ struct drm_i915_private {
        struct i915_ums_state ums;
        /* the indicator for dispatch video commands on two BSD rings */
        int ring_index;
+       /* for concurrent execbuffer protection */
+       struct mutex exec_lock;
 };
 
 static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
@@ -2116,6 +2118,9 @@ int i915_gem_dumb_create(struct drm_file *file_priv,
                         struct drm_mode_create_dumb *args);
 int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev,
                      uint32_t handle, uint64_t *offset);
+void
+i915_gem_object_shmem_preallocate(struct drm_i915_gem_object *obj);
+
 /**
  * Returns true if seq1 is later than seq2.
  */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index dae51c3..b19ccb8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1408,6 +1408,8 @@ int i915_gem_fault(struct vm_area_struct *vma, struct 
vm_fault *vmf)
        page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
                PAGE_SHIFT;
 
+       i915_gem_object_shmem_preallocate(obj);
+
        ret = i915_mutex_lock_interruptible(dev);
        if (ret)
                goto out;
@@ -1873,6 +1875,79 @@ i915_gem_shrink_all(struct drm_i915_private *dev_priv)
        return __i915_gem_shrink(dev_priv, LONG_MAX, false);
 }
 
+void
+i915_gem_object_shmem_preallocate(struct drm_i915_gem_object *obj)
+{
+       int page_count, i;
+       struct address_space *mapping;
+       struct page *page;
+       gfp_t gfp;
+
+       if (obj->pages)
+               return;
+
+       if (obj->madv != I915_MADV_WILLNEED) {
+               DRM_ERROR("Attempting to preallocate a purgeable object\n");
+               return;
+       }
+
+       if (obj->base.filp) {
+               int ret;
+               struct inode *inode = file_inode(obj->base.filp);
+               struct shmem_inode_info *info = SHMEM_I(inode);
+               if (!inode)
+                       return;
+               /* The alloced field stores how many data pages are allocated
+                * to the file. If already shmem space has been allocated for
+                * the object, then we can simply return */
+               spin_lock(&info->lock);
+               ret = info->alloced;
+               spin_unlock(&info->lock);
+               if (ret > 0) {
+                       DRM_DEBUG_DRIVER("Already shmem space alloced for obj 
%p, %d pages\n",
+                                       obj, ret);
+                       return;
+               }
+       } else
+               return;
+
+       BUG_ON(obj->pages_pin_count);
+
+       /* Assert that the object is not currently in any GPU domain. As it
+        * wasn't in the GTT, there shouldn't be any way it could have been in
+        * a GPU cache
+        */
+       BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
+       BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
+
+       trace_i915_gem_obj_prealloc_start(obj, obj->base.size);
+
+       page_count = obj->base.size / PAGE_SIZE;
+
+       /* Get the list of pages out of our struct file
+        * Fail silently without starting the shrinker
+        */
+       mapping = file_inode(obj->base.filp)->i_mapping;
+       gfp = mapping_gfp_mask(mapping);
+       gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
+       gfp &= ~(__GFP_IO | __GFP_WAIT);
+       for (i = 0; i < page_count; i++) {
+               page = shmem_read_mapping_page_gfp(mapping, i, gfp);
+               if (IS_ERR(page)) {
+                       DRM_DEBUG_DRIVER("Failure for obj(%p), size(%x) at 
page(%d)\n",
+                                               obj, obj->base.size, i);
+                       break;
+               }
+               /* Decrement the extra ref count on the returned page,
+                  otherwise when 'get_pages_gtt' will be called later on
+                  in the regular path, it will also increment the ref count,
+                  which will disturb the ref count management */
+               page_cache_release(page);
+       }
+
+       trace_i915_gem_obj_prealloc_end(obj);
+}
+
 static int
 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 6cc004f..da3cbdc 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -38,6 +38,7 @@
 
 struct eb_vmas {
        struct list_head vmas;
+       struct list_head objects;
        int and;
        union {
                struct i915_vma *lut[0];
@@ -93,10 +94,9 @@ eb_lookup_vmas(struct eb_vmas *eb,
 {
        struct drm_i915_private *dev_priv = vm->dev->dev_private;
        struct drm_i915_gem_object *obj;
-       struct list_head objects;
        int i, ret;
 
-       INIT_LIST_HEAD(&objects);
+       INIT_LIST_HEAD(&eb->objects);
        spin_lock(&file->table_lock);
        /* Grab a reference to the object and release the lock so we can lookup
         * or create the VMA without using GFP_ATOMIC */
@@ -119,12 +119,12 @@ eb_lookup_vmas(struct eb_vmas *eb,
                }
 
                drm_gem_object_reference(&obj->base);
-               list_add_tail(&obj->obj_exec_link, &objects);
+               list_add_tail(&obj->obj_exec_link, &eb->objects);
        }
        spin_unlock(&file->table_lock);
 
        i = 0;
-       while (!list_empty(&objects)) {
+       while (!list_empty(&eb->objects)) {
                struct i915_vma *vma;
                struct i915_address_space *bind_vm = vm;
 
@@ -141,7 +141,7 @@ eb_lookup_vmas(struct eb_vmas *eb,
                    (i == (args->buffer_count - 1))))
                        bind_vm = &dev_priv->gtt.base;
 
-               obj = list_first_entry(&objects,
+               obj = list_first_entry(&eb->objects,
                                       struct drm_i915_gem_object,
                                       obj_exec_link);
 
@@ -162,7 +162,6 @@ eb_lookup_vmas(struct eb_vmas *eb,
 
                /* Transfer ownership from the objects list to the vmas list. */
                list_add_tail(&vma->exec_list, &eb->vmas);
-               list_del_init(&obj->obj_exec_link);
 
                vma->exec_entry = &exec[i];
                if (eb->and < 0) {
@@ -180,8 +179,8 @@ eb_lookup_vmas(struct eb_vmas *eb,
 
 
 err:
-       while (!list_empty(&objects)) {
-               obj = list_first_entry(&objects,
+       while (!list_empty(&eb->objects)) {
+               obj = list_first_entry(&eb->objects,
                                       struct drm_i915_gem_object,
                                       obj_exec_link);
                list_del_init(&obj->obj_exec_link);
@@ -249,6 +248,15 @@ static void eb_destroy(struct eb_vmas *eb)
                i915_gem_execbuffer_unreserve_vma(vma);
                drm_gem_object_unreference(&vma->obj->base);
        }
+
+       while (!list_empty(&eb->objects)) {
+               struct drm_i915_gem_object *obj;
+               obj = list_first_entry(&eb->objects,
+                               struct drm_i915_gem_object,
+                               obj_exec_link);
+               list_del_init(&obj->obj_exec_link);
+       }
+
        kfree(eb);
 }
 
@@ -712,6 +720,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
                                  struct eb_vmas *eb,
                                  struct drm_i915_gem_exec_object2 *exec)
 {
+       struct drm_i915_private *dev_priv = dev->dev_private;
        struct drm_i915_gem_relocation_entry *reloc;
        struct i915_address_space *vm;
        struct i915_vma *vma;
@@ -786,12 +795,24 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
                total += exec[i].relocation_count;
        }
 
+       /* First acquire the 'exec_lock' mutex to prevent the concurrent
+        * access to the 'obj_exec_link' field of the objects, by the
+        * preallocation routine from the context of a new execbuffer ioctl */
+       ret = mutex_lock_interruptible(&dev_priv->exec_lock);
+       if (ret) {
+               mutex_lock(&dev->struct_mutex);
+               goto err;
+       }
+
        ret = i915_mutex_lock_interruptible(dev);
        if (ret) {
                mutex_lock(&dev->struct_mutex);
                goto err;
        }
 
+       /* Now release the 'exec_lock' after acquiring the 'struct mutex' */
+       mutex_unlock(&dev_priv->exec_lock);
+
        /* reacquire the objects */
        eb_reset(eb);
        ret = eb_lookup_vmas(eb, exec, args, vm, file);
@@ -856,6 +877,21 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer 
*ring,
        return intel_ring_invalidate_all_caches(ring);
 }
 
+static void
+i915_gem_execbuffer_preallocate_objs(struct list_head *objects)
+{
+       struct drm_i915_gem_object *obj;
+
+       /* Try to get the obj pages atomically */
+       while (!list_empty(objects)) {
+               obj = list_first_entry(objects,
+                               struct drm_i915_gem_object,
+                               obj_exec_link);
+               i915_gem_object_shmem_preallocate(obj);
+               list_del_init(&obj->obj_exec_link);
+       }
+}
+
 static bool
 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
 {
@@ -1173,12 +1209,19 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
 
        intel_runtime_pm_get(dev_priv);
 
-       ret = i915_mutex_lock_interruptible(dev);
+       ret = mutex_lock_interruptible(&dev_priv->exec_lock);
        if (ret)
                goto pre_mutex_err;
 
+       ret = i915_mutex_lock_interruptible(dev);
+       if (ret) {
+               mutex_unlock(&dev_priv->exec_lock);
+               goto pre_mutex_err;
+       }
+
        if (dev_priv->ums.mm_suspended) {
                mutex_unlock(&dev->struct_mutex);
+               mutex_unlock(&dev_priv->exec_lock);
                ret = -EBUSY;
                goto pre_mutex_err;
        }
@@ -1200,14 +1243,36 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
        if (eb == NULL) {
                i915_gem_context_unreference(ctx);
                mutex_unlock(&dev->struct_mutex);
+               mutex_unlock(&dev_priv->exec_lock);
                ret = -ENOMEM;
                goto pre_mutex_err;
        }
 
        /* Look up object handles */
        ret = eb_lookup_vmas(eb, exec, args, vm, file);
-       if (ret)
-               goto err;
+       if (ret) {
+               eb_destroy(eb);
+               i915_gem_context_unreference(ctx);
+               mutex_unlock(&dev->struct_mutex);
+               mutex_unlock(&dev_priv->exec_lock);
+               goto pre_mutex_err;
+       }
+
+       /*
+        * Release the 'struct_mutex' before extracting the backing
+        * pages of the objects, so as to allow other ioctls to get serviced,
+        * while backing pages are being allocated (which is generally
+        * the most time consuming phase). The 'exec_lock' mutex will provide
+        * the protection meanwhile.
+        */
+       mutex_unlock(&dev->struct_mutex);
+
+       i915_gem_execbuffer_preallocate_objs(&eb->objects);
+
+       /* Reacquire the 'struct_mutex' post preallocation */
+       ret = i915_mutex_lock_interruptible(dev);
+
+       mutex_unlock(&dev_priv->exec_lock);
 
        /* take note of the batch buffer before we might reorder the lists */
        batch_obj = list_entry(eb->vmas.prev, struct i915_vma, exec_list)->obj;
diff --git a/drivers/gpu/drm/i915/i915_trace.h 
b/drivers/gpu/drm/i915/i915_trace.h
index b29d7b1..21bf10d 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -245,6 +245,41 @@ TRACE_EVENT(i915_gem_object_fault,
                      __entry->write ? ", writable" : "")
 );
 
+TRACE_EVENT(i915_gem_obj_prealloc_start,
+           TP_PROTO(struct drm_i915_gem_object *obj, u32 size),
+           TP_ARGS(obj, size),
+
+           TP_STRUCT__entry(
+                            __field(struct drm_i915_gem_object *, obj)
+                            __field(u32, size)
+                            ),
+
+           TP_fast_assign(
+                          __entry->obj = obj;
+                          __entry->size = size;
+                          ),
+
+           TP_printk("obj=%p, size=%x",
+                     __entry->obj,
+                     __entry->size)
+);
+
+TRACE_EVENT(i915_gem_obj_prealloc_end,
+           TP_PROTO(struct drm_i915_gem_object *obj),
+           TP_ARGS(obj),
+
+           TP_STRUCT__entry(
+                            __field(struct drm_i915_gem_object *, obj)
+                            ),
+
+           TP_fast_assign(
+                          __entry->obj = obj;
+                          ),
+
+           TP_printk("obj=%p",
+                     __entry->obj)
+);
+
 DECLARE_EVENT_CLASS(i915_gem_object,
            TP_PROTO(struct drm_i915_gem_object *obj),
            TP_ARGS(obj),
-- 
1.9.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to