On 03/06/16 17:08, Chris Wilson wrote:
Under the assumption that enabling signaling will be a frequent
operation, lets preallocate our attachments for signaling inside the
request struct (and so benefiting from the slab cache).

Oh you did this part which I suggested in the previous patch. :)

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
---
  drivers/gpu/drm/i915/i915_drv.h          |  1 +
  drivers/gpu/drm/i915/intel_breadcrumbs.c | 89 ++++++++++++++++++--------------
  drivers/gpu/drm/i915/intel_ringbuffer.h  |  6 +++
  3 files changed, 56 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b0235372cfdf..88d9242398ce 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2363,6 +2363,7 @@ struct drm_i915_gem_request {
        struct drm_i915_private *i915;
        struct intel_engine_cs *engine;
        unsigned reset_counter;
+       struct intel_signal_node signaling;

         /** GEM sequence number associated with the previous request,
          * when the HWS breadcrumb is equal to this the GPU is processing
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c 
b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 143891a2b68a..8ab508ed4248 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -128,16 +128,14 @@ static inline void __intel_breadcrumbs_finish(struct 
intel_breadcrumbs *b,
        wake_up_process(wait->task); /* implicit smp_wmb() */
  }

-bool intel_engine_add_wait(struct intel_engine_cs *engine,
-                          struct intel_wait *wait)
+static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
+                                   struct intel_wait *wait)
  {
        struct intel_breadcrumbs *b = &engine->breadcrumbs;
        struct rb_node **p, *parent, *completed;
        bool first;
        u32 seqno;

-       spin_lock(&b->lock);
-
        /* Insert the request into the retirement ordered list
         * of waiters by walking the rbtree. If we are the oldest
         * seqno in the tree (the first to be retired), then
@@ -223,6 +221,17 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine,
        GEM_BUG_ON(!b->first_wait);
        GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node);

+       return first;
+}
+
+bool intel_engine_add_wait(struct intel_engine_cs *engine,
+                          struct intel_wait *wait)
+{
+       struct intel_breadcrumbs *b = &engine->breadcrumbs;
+       bool first;
+
+       spin_lock(&b->lock);
+       first = __intel_engine_add_wait(engine, wait);
        spin_unlock(&b->lock);

        return first;
@@ -323,35 +332,29 @@ out_unlock:
        spin_unlock(&b->lock);
  }

-struct signal {
-       struct rb_node node;
-       struct intel_wait wait;
-       struct drm_i915_gem_request *request;
-};
-
-static bool signal_complete(struct signal *signal)
+static bool signal_complete(struct drm_i915_gem_request *request)
  {
-       if (signal == NULL)
+       if (request == NULL)
                return false;

        /* If another process served as the bottom-half it may have already
         * signalled that this wait is already completed.
         */
-       if (intel_wait_complete(&signal->wait))
+       if (intel_wait_complete(&request->signaling.wait))
                return true;

        /* Carefully check if the request is complete, giving time for the
         * seqno to be visible or if the GPU hung.
         */
-       if (__i915_request_irq_complete(signal->request))
+       if (__i915_request_irq_complete(request))
                return true;

        return false;
  }

-static struct signal *to_signal(struct rb_node *rb)
+static struct drm_i915_gem_request *to_signal(struct rb_node *rb)

Why it is call to_signal then?

  {
-       return container_of(rb, struct signal, node);
+       return container_of(rb, struct drm_i915_gem_request, signaling.node);
  }

  static void signaler_set_rtpriority(void)
@@ -364,7 +367,7 @@ static int intel_breadcrumbs_signaler(void *arg)
  {
        struct intel_engine_cs *engine = arg;
        struct intel_breadcrumbs *b = &engine->breadcrumbs;
-       struct signal *signal;
+       struct drm_i915_gem_request *request;

        /* Install ourselves with high priority to reduce signalling latency */
        signaler_set_rtpriority();
@@ -380,14 +383,13 @@ static int intel_breadcrumbs_signaler(void *arg)
                 * need to wait for a new interrupt from the GPU or for
                 * a new client.
                 */
-               signal = READ_ONCE(b->first_signal);
-               if (signal_complete(signal)) {
+               request = READ_ONCE(b->first_signal);
+               if (signal_complete(request)) {
                        /* Wake up all other completed waiters and select the
                         * next bottom-half for the next user interrupt.
                         */
-                       intel_engine_remove_wait(engine, &signal->wait);
-
-                       i915_gem_request_unreference(signal->request);
+                       intel_engine_remove_wait(engine,
+                                                &request->signaling.wait);

                        /* Find the next oldest signal. Note that as we have
                         * not been holding the lock, another client may
@@ -396,12 +398,15 @@ static int intel_breadcrumbs_signaler(void *arg)
                         * the oldest before picking the next one.
                         */
                        spin_lock(&b->lock);
-                       if (signal == b->first_signal)
-                               b->first_signal = rb_next(&signal->node);
-                       rb_erase(&signal->node, &b->signals);
+                       if (request == b->first_signal) {
+                               struct rb_node *rb =
+                                       rb_next(&request->signaling.node);
+                               b->first_signal = rb ? to_signal(rb) : NULL;

Made me look in the previous patch on how you didn't need to change the type for first_signal in this one. void* ! :) Please fix there. :)

+                       }
+                       rb_erase(&request->signaling.node, &b->signals);
                        spin_unlock(&b->lock);

-                       kfree(signal);
+                       i915_gem_request_unreference(request);
                } else {
                        if (kthread_should_stop())
                                break;
@@ -418,20 +423,23 @@ int intel_engine_enable_signaling(struct 
drm_i915_gem_request *request)
        struct intel_engine_cs *engine = request->engine;
        struct intel_breadcrumbs *b = &engine->breadcrumbs;
        struct rb_node *parent, **p;
-       struct signal *signal;
        bool first, wakeup;

        if (unlikely(IS_ERR(b->signaler)))
                return PTR_ERR(b->signaler);

-       signal = kmalloc(sizeof(*signal), GFP_ATOMIC);
-       if (unlikely(!signal))
-               return -ENOMEM;
+       if (unlikely(READ_ONCE(request->signaling.wait.task)))
+               return 0;

Hmm it will depend on following patches whether this is safe. I don't like the explosion of READ_ONCE and smp_store_mb's in these patches. Something is bound to be broken.

You even check it below under the lock. So I am not sure this optimisation is worth it. Maybe leave it for later?


-       signal->wait.task = b->signaler;
-       signal->wait.seqno = request->seqno;
+       spin_lock(&b->lock);
+       if (unlikely(request->signaling.wait.task)) {
+               wakeup = false;
+               goto unlock;
+       }

-       signal->request = i915_gem_request_reference(request);
+       request->signaling.wait.task = b->signaler;
+       request->signaling.wait.seqno = request->seqno;
+       i915_gem_request_reference(request);

        /* First add ourselves into the list of waiters, but register our
         * bottom-half as the signaller thread. As per usual, only the oldest
@@ -441,29 +449,30 @@ int intel_engine_enable_signaling(struct 
drm_i915_gem_request *request)
         * If we are the oldest waiter, enable the irq (after which we
         * must double check that the seqno did not complete).
         */
-       wakeup = intel_engine_add_wait(engine, &signal->wait);
+       wakeup = __intel_engine_add_wait(engine, &request->signaling.wait);

        /* Now insert ourselves into the retirement ordered list of signals
         * on this engine. We track the oldest seqno as that will be the
         * first signal to complete.
         */
-       spin_lock(&b->lock);
        parent = NULL;
        first = true;
        p = &b->signals.rb_node;
        while (*p) {
                parent = *p;
-               if (i915_seqno_passed(signal->wait.seqno,
-                                     to_signal(parent)->wait.seqno)) {
+               if (i915_seqno_passed(request->seqno,
+                                     to_signal(parent)->seqno)) {
                        p = &parent->rb_right;
                        first = false;
                } else
                        p = &parent->rb_left;
        }
-       rb_link_node(&signal->node, parent, p);
-       rb_insert_color(&signal->node, &b->signals);
+       rb_link_node(&request->signaling.node, parent, p);
+       rb_insert_color(&request->signaling.node, &b->signals);
        if (first)
-               smp_store_mb(b->first_signal, signal);
+               smp_store_mb(b->first_signal, request);
+
+unlock:
        spin_unlock(&b->lock);

        if (wakeup)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index f4bca38caef0..5f7cb3d0ea1c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -530,6 +530,12 @@ struct intel_wait {
        struct task_struct *task;
        u32 seqno;
  };
+
+struct intel_signal_node {
+       struct rb_node node;
+       struct intel_wait wait;
+};
+
  void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
  static inline void intel_wait_init(struct intel_wait *wait, u32 seqno)
  {


Otherwise looks OK.

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to