On Thu, Jun 29, 2017 at 01:59:30PM +0100, Chris Wilson wrote:
> Reduce the list iteration when incrementing the timeline by storing the
> fences in increasing order.
> 
> Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
> Cc: Sumit Semwal <sumit.sem...@linaro.org>
> Cc: Sean Paul <seanp...@chromium.org>
> Cc: Gustavo Padovan <gust...@padovan.org>
> ---
>  drivers/dma-buf/sw_sync.c    | 49 
> ++++++++++++++++++++++++++++++++++++++------
>  drivers/dma-buf/sync_debug.h |  5 +++++
>  2 files changed, 48 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c
> index e51fe11bbbea..8cef5d345316 100644
> --- a/drivers/dma-buf/sw_sync.c
> +++ b/drivers/dma-buf/sw_sync.c
> @@ -96,6 +96,7 @@ static struct sync_timeline *sync_timeline_create(const 
> char *name)
>       obj->context = dma_fence_context_alloc(1);
>       strlcpy(obj->name, name, sizeof(obj->name));
>  
> +     obj->pt_tree = RB_ROOT;
>       INIT_LIST_HEAD(&obj->pt_list);
>       spin_lock_init(&obj->lock);
>  
> @@ -142,9 +143,13 @@ static void sync_timeline_signal(struct sync_timeline 
> *obj, unsigned int inc)
>  
>       obj->value += inc;
>  
> -     list_for_each_entry_safe(pt, next, &obj->pt_list, link)
> -             if (dma_fence_is_signaled_locked(&pt->base))
> -                     list_del_init(&pt->link);
> +     list_for_each_entry_safe(pt, next, &obj->pt_list, link) {
> +             if (!dma_fence_is_signaled_locked(&pt->base))
> +                     break;
> +
> +             list_del_init(&pt->link);
> +             rb_erase(&pt->node, &obj->pt_tree);
> +     }
>  
>       spin_unlock_irq(&obj->lock);
>  }
> @@ -174,8 +179,38 @@ static struct sync_pt *sync_pt_create(struct 
> sync_timeline *obj,
>       INIT_LIST_HEAD(&pt->link);
>  
>       spin_lock_irq(&obj->lock);
> -     if (!dma_fence_is_signaled_locked(&pt->base))
> -             list_add_tail(&pt->link, &obj->pt_list);
> +     if (!dma_fence_is_signaled_locked(&pt->base)) {
> +             struct rb_node **p = &obj->pt_tree.rb_node;
> +             struct rb_node *parent = NULL;
> +
> +             while (*p) {
> +                     struct sync_pt *other;
> +                     int cmp;
> +
> +                     parent = *p;
> +                     other = rb_entry(parent, typeof(*pt), node);
> +                     cmp = value - other->base.seqno;

We're imposing an implicit limitation on userspace here that points cannot be
> INT_MAX apart (since they'll be in the wrong order in the tree). 

I wonder how much this patch will actually save, given that the number of 
active points
on a given timeline should be small. Do we have any evidence that this
optimization is warranted?

Sean

> +                     if (cmp > 0) {
> +                             p = &parent->rb_right;
> +                     } else if (cmp < 0) {
> +                             p = &parent->rb_left;
> +                     } else {
> +                             if (dma_fence_get_rcu(&other->base)) {
> +                                     dma_fence_put(&pt->base);
> +                                     pt = other;
> +                                     goto unlock;
> +                             }
> +                             p = &parent->rb_left;
> +                     }
> +             }
> +             rb_link_node(&pt->node, parent, p);
> +             rb_insert_color(&pt->node, &obj->pt_tree);
> +
> +             parent = rb_next(&pt->node);
> +             list_add_tail(&pt->link,
> +                           parent ? &rb_entry(parent, typeof(*pt), 
> node)->link : &obj->pt_list);
> +     }
> +unlock:
>       spin_unlock_irq(&obj->lock);
>  
>       return pt;
> @@ -201,8 +236,10 @@ static void timeline_fence_release(struct dma_fence 
> *fence)
>  
>       spin_lock_irqsave(fence->lock, flags);
>  
> -     if (!list_empty(&pt->link))
> +     if (!list_empty(&pt->link)) {
>               list_del(&pt->link);
> +             rb_erase(&pt->node, &parent->pt_tree);
> +     }
>  
>       spin_unlock_irqrestore(fence->lock, flags);
>  
> diff --git a/drivers/dma-buf/sync_debug.h b/drivers/dma-buf/sync_debug.h
> index 899ba0e19fd3..b7a5fab12179 100644
> --- a/drivers/dma-buf/sync_debug.h
> +++ b/drivers/dma-buf/sync_debug.h
> @@ -14,6 +14,7 @@
>  #define _LINUX_SYNC_H
>  
>  #include <linux/list.h>
> +#include <linux/rbtree.h>
>  #include <linux/spinlock.h>
>  #include <linux/dma-fence.h>
>  
> @@ -25,6 +26,7 @@
>   * @kref:            reference count on fence.
>   * @name:            name of the sync_timeline. Useful for debugging
>   * @lock:            lock protecting @child_list_head and fence.status
> + * @pt_tree:         rbtree of active (unsignaled/errored) sync_pts
>   * @pt_list:         list of active (unsignaled/errored) sync_pts
>   * @sync_timeline_list:      membership in global sync_timeline_list
>   */
> @@ -36,6 +38,7 @@ struct sync_timeline {
>       u64                     context;
>       int                     value;
>  
> +     struct rb_root          pt_tree;
>       struct list_head        pt_list;
>       spinlock_t              lock;
>  
> @@ -51,10 +54,12 @@ static inline struct sync_timeline 
> *dma_fence_parent(struct dma_fence *fence)
>   * struct sync_pt - sync_pt object
>   * @base: base fence object
>   * @link: link on the sync timeline's list
> + * @node: node in the sync timeline's tree
>   */
>  struct sync_pt {
>       struct dma_fence base;
>       struct list_head link;
> +     struct rb_node node;
>  };
>  
>  #ifdef CONFIG_SW_SYNC
> -- 
> 2.13.1

-- 
Sean Paul, Software Engineer, Google / Chromium OS
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

Reply via email to