Re-arrange things a bit so that we can get work requested after a bo
fence passes, like pageflip, done before retiring bo's.  Without any
sort of bo cache in userspace, some games can trigger hundred's of
transient bo's, which can cause retire to take a long time (5-10ms).
Obviously we want a bo cache.. but this cleanup will make things a
bit easier for atomic as well and makes things a bit cleaner.

Signed-off-by: Rob Clark <robdclark at gmail.com>
---
 drivers/gpu/drm/msm/mdp4/mdp4_crtc.c | 11 +++++------
 drivers/gpu/drm/msm/msm_drv.c        | 30 ++++++++++++++++++++++++++----
 drivers/gpu/drm/msm/msm_drv.h        | 21 +++++++++++++++++++--
 drivers/gpu/drm/msm/msm_gem.c        | 35 +++++++++++++++++++----------------
 drivers/gpu/drm/msm/msm_gem.h        |  3 ---
 drivers/gpu/drm/msm/msm_gpu.c        |  4 ++--
 6 files changed, 71 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/msm/mdp4/mdp4_crtc.c 
b/drivers/gpu/drm/msm/mdp4/mdp4_crtc.c
index 5a68aab..1d52896 100644
--- a/drivers/gpu/drm/msm/mdp4/mdp4_crtc.c
+++ b/drivers/gpu/drm/msm/mdp4/mdp4_crtc.c
@@ -51,7 +51,7 @@ struct mdp4_crtc {

        /* if there is a pending flip, these will be non-null: */
        struct drm_pending_vblank_event *event;
-       struct work_struct pageflip_work;
+       struct msm_fence_cb pageflip_cb;

        /* the fb that we currently hold a scanout ref to: */
        struct drm_framebuffer *fb;
@@ -132,10 +132,10 @@ static void crtc_flush(struct drm_crtc *crtc)
        mdp4_write(mdp4_kms, REG_MDP4_OVERLAY_FLUSH, flush);
 }

-static void pageflip_worker(struct work_struct *work)
+static void pageflip_cb(struct msm_fence_cb *cb)
 {
        struct mdp4_crtc *mdp4_crtc =
-               container_of(work, struct mdp4_crtc, pageflip_work);
+               container_of(cb, struct mdp4_crtc, pageflip_cb);
        struct drm_crtc *crtc = &mdp4_crtc->base;

        mdp4_plane_set_scanout(mdp4_crtc->plane, crtc->fb);
@@ -397,8 +397,7 @@ static int mdp4_crtc_page_flip(struct drm_crtc *crtc,
        mdp4_crtc->event = event;
        update_fb(crtc, true, new_fb);

-       return msm_gem_queue_inactive_work(obj,
-                       &mdp4_crtc->pageflip_work);
+       return msm_gem_queue_inactive_cb(obj, &mdp4_crtc->pageflip_cb);
 }

 static int mdp4_crtc_set_property(struct drm_crtc *crtc,
@@ -702,7 +701,7 @@ struct drm_crtc *mdp4_crtc_init(struct drm_device *dev,
        ret = drm_flip_work_init(&mdp4_crtc->unref_cursor_work, 64,
                        "unref cursor", unref_cursor_worker);

-       INIT_WORK(&mdp4_crtc->pageflip_work, pageflip_worker);
+       INIT_FENCE_CB(&mdp4_crtc->pageflip_cb, pageflip_cb);

        drm_crtc_init(dev, crtc, &mdp4_crtc_funcs);
        drm_crtc_helper_add(crtc, &mdp4_crtc_helper_funcs);
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index e7ac95a..8653769 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -187,6 +187,7 @@ static int msm_load(struct drm_device *dev, unsigned long 
flags)
        init_waitqueue_head(&priv->fence_event);

        INIT_LIST_HEAD(&priv->inactive_list);
+       INIT_LIST_HEAD(&priv->fence_cbs);

        drm_mode_config_init(dev);

@@ -539,15 +540,36 @@ int msm_wait_fence_interruptable(struct drm_device *dev, 
uint32_t fence,
        return ret;
 }

-/* call under struct_mutex */
+/* called from workqueue */
 void msm_update_fence(struct drm_device *dev, uint32_t fence)
 {
        struct msm_drm_private *priv = dev->dev_private;

-       if (fence > priv->completed_fence) {
-               priv->completed_fence = fence;
-               wake_up_all(&priv->fence_event);
+       mutex_lock(&dev->struct_mutex);
+       priv->completed_fence = max(fence, priv->completed_fence);
+
+       while (!list_empty(&priv->fence_cbs)) {
+               struct msm_fence_cb *cb;
+
+               cb = list_first_entry(&priv->fence_cbs,
+                               struct msm_fence_cb, work.entry);
+
+               if (cb->fence > priv->completed_fence)
+                       break;
+
+               list_del_init(&cb->work.entry);
+               queue_work(priv->wq, &cb->work);
        }
+
+       mutex_unlock(&dev->struct_mutex);
+
+       wake_up_all(&priv->fence_event);
+}
+
+void __msm_fence_worker(struct work_struct *work)
+{
+       struct msm_fence_cb *cb = container_of(work, struct msm_fence_cb, work);
+       cb->func(cb);
 }

 /*
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 2c6bad5..d39f086 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -73,6 +73,9 @@ struct msm_drm_private {

        struct workqueue_struct *wq;

+       /* callbacks deferred until bo is inactive: */
+       struct list_head fence_cbs;
+
        /* registered IOMMU domains: */
        unsigned int num_iommus;
        struct iommu_domain *iommus[NUM_DOMAINS];
@@ -97,6 +100,20 @@ struct msm_format {
        uint32_t pixel_format;
 };

+/* callback from wq once fence has passed: */
+struct msm_fence_cb {
+       struct work_struct work;
+       uint32_t fence;
+       void (*func)(struct msm_fence_cb *cb);
+};
+
+void __msm_fence_worker(struct work_struct *work);
+
+#define INIT_FENCE_CB(_cb, _func)  do {                     \
+               INIT_WORK(&(_cb)->work, __msm_fence_worker); \
+               (_cb)->func = _func;                         \
+       } while (0)
+
 /* As there are different display controller blocks depending on the
  * snapdragon version, the kms support is split out and the appropriate
  * implementation is loaded at runtime.  The kms module is responsible
@@ -160,8 +177,8 @@ int msm_gem_prime_pin(struct drm_gem_object *obj);
 void msm_gem_prime_unpin(struct drm_gem_object *obj);
 void *msm_gem_vaddr_locked(struct drm_gem_object *obj);
 void *msm_gem_vaddr(struct drm_gem_object *obj);
-int msm_gem_queue_inactive_work(struct drm_gem_object *obj,
-               struct work_struct *work);
+int msm_gem_queue_inactive_cb(struct drm_gem_object *obj,
+               struct msm_fence_cb *cb);
 void msm_gem_move_to_active(struct drm_gem_object *obj,
                struct msm_gpu *gpu, bool write, uint32_t fence);
 void msm_gem_move_to_inactive(struct drm_gem_object *obj);
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index ea2c96f..291939d 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -309,7 +309,17 @@ int msm_gem_get_iova_locked(struct drm_gem_object *obj, 
int id,

 int msm_gem_get_iova(struct drm_gem_object *obj, int id, uint32_t *iova)
 {
+       struct msm_gem_object *msm_obj = to_msm_bo(obj);
        int ret;
+
+       /* this is safe right now because we don't unmap until the
+        * bo is deleted:
+        */
+       if (msm_obj->domain[id].iova) {
+               *iova = msm_obj->domain[id].iova;
+               return 0;
+       }
+
        mutex_lock(&obj->dev->struct_mutex);
        ret = msm_gem_get_iova_locked(obj, id, iova);
        mutex_unlock(&obj->dev->struct_mutex);
@@ -379,8 +389,11 @@ void *msm_gem_vaddr(struct drm_gem_object *obj)
        return ret;
 }

-int msm_gem_queue_inactive_work(struct drm_gem_object *obj,
-               struct work_struct *work)
+/* setup callback for when bo is no longer busy..
+ * TODO probably want to differentiate read vs write..
+ */
+int msm_gem_queue_inactive_cb(struct drm_gem_object *obj,
+               struct msm_fence_cb *cb)
 {
        struct drm_device *dev = obj->dev;
        struct msm_drm_private *priv = dev->dev_private;
@@ -388,12 +401,13 @@ int msm_gem_queue_inactive_work(struct drm_gem_object 
*obj,
        int ret = 0;

        mutex_lock(&dev->struct_mutex);
-       if (!list_empty(&work->entry)) {
+       if (!list_empty(&cb->work.entry)) {
                ret = -EINVAL;
        } else if (is_active(msm_obj)) {
-               list_add_tail(&work->entry, &msm_obj->inactive_work);
+               cb->fence = max(msm_obj->read_fence, msm_obj->write_fence);
+               list_add_tail(&cb->work.entry, &priv->fence_cbs);
        } else {
-               queue_work(priv->wq, work);
+               queue_work(priv->wq, &cb->work);
        }
        mutex_unlock(&dev->struct_mutex);

@@ -426,16 +440,6 @@ void msm_gem_move_to_inactive(struct drm_gem_object *obj)
        msm_obj->write_fence = 0;
        list_del_init(&msm_obj->mm_list);
        list_add_tail(&msm_obj->mm_list, &priv->inactive_list);
-
-       while (!list_empty(&msm_obj->inactive_work)) {
-               struct work_struct *work;
-
-               work = list_first_entry(&msm_obj->inactive_work,
-                               struct work_struct, entry);
-
-               list_del_init(&work->entry);
-               queue_work(priv->wq, work);
-       }
 }

 int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op,
@@ -604,7 +608,6 @@ static int msm_gem_new_impl(struct drm_device *dev,
        reservation_object_init(msm_obj->resv);

        INIT_LIST_HEAD(&msm_obj->submit_entry);
-       INIT_LIST_HEAD(&msm_obj->inactive_work);
        list_add_tail(&msm_obj->mm_list, &priv->inactive_list);

        *obj = &msm_obj->base;
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index 0676f32..f4f23a5 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -45,9 +45,6 @@ struct msm_gem_object {
         */
        struct list_head submit_entry;

-       /* work defered until bo is inactive: */
-       struct list_head inactive_work;
-
        struct page **pages;
        struct sg_table *sgt;
        void *vaddr;
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 3bab937..4583d61 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -268,6 +268,8 @@ static void retire_worker(struct work_struct *work)
        struct drm_device *dev = gpu->dev;
        uint32_t fence = gpu->funcs->last_fence(gpu);

+       msm_update_fence(gpu->dev, fence);
+
        mutex_lock(&dev->struct_mutex);

        while (!list_empty(&gpu->active_list)) {
@@ -287,8 +289,6 @@ static void retire_worker(struct work_struct *work)
                }
        }

-       msm_update_fence(gpu->dev, fence);
-
        mutex_unlock(&dev->struct_mutex);
 }

-- 
1.8.3.1

Reply via email to