On Mon, Apr 10, 2017 at 06:44:14PM -0700, Eric Anholt wrote:
> This is needed for proper synchronization with display on another DRM
> device (pl111 or tinydrm) with buffers produced by vc4 V3D.  Fixes the
> new igt vc4_dmabuf_poll testcase, and rendering of one of the glmark2
> desktop tests on pl111+vc4.
> 
> This doesn't yet introduce waits on other device's fences before vc4's
> rendering/display, because I don't have testcases for them.
> 
> Signed-off-by: Eric Anholt <e...@anholt.net>
> Cc: Gustavo Padovan <gustavo.pado...@collabora.com>

So not sure I didn't look hard enough or why exactly, but I didn't find
anything like ->prepare_fb as implemented in e.g. drm_fb_cma_prepare_fb().
Where is that? Otherwise looks good to me.
-Daniel

> ---
>  drivers/gpu/drm/vc4/Makefile    |   1 +
>  drivers/gpu/drm/vc4/vc4_bo.c    |  37 ++++++++++-
>  drivers/gpu/drm/vc4/vc4_drv.c   |   3 +-
>  drivers/gpu/drm/vc4/vc4_drv.h   |  30 +++++++++
>  drivers/gpu/drm/vc4/vc4_fence.c |  63 +++++++++++++++++++
>  drivers/gpu/drm/vc4/vc4_gem.c   | 136 
> +++++++++++++++++++++++++++++++++++++++-
>  drivers/gpu/drm/vc4/vc4_irq.c   |   4 ++
>  7 files changed, 269 insertions(+), 5 deletions(-)
>  create mode 100644 drivers/gpu/drm/vc4/vc4_fence.c
> 
> diff --git a/drivers/gpu/drm/vc4/Makefile b/drivers/gpu/drm/vc4/Makefile
> index 61f45d122bd0..ab687fba4916 100644
> --- a/drivers/gpu/drm/vc4/Makefile
> +++ b/drivers/gpu/drm/vc4/Makefile
> @@ -9,6 +9,7 @@ vc4-y := \
>       vc4_drv.o \
>       vc4_dpi.o \
>       vc4_dsi.o \
> +     vc4_fence.o \
>       vc4_kms.o \
>       vc4_gem.o \
>       vc4_hdmi.o \
> diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
> index af29432a6471..80b2f9e55c5c 100644
> --- a/drivers/gpu/drm/vc4/vc4_bo.c
> +++ b/drivers/gpu/drm/vc4/vc4_bo.c
> @@ -19,6 +19,8 @@
>   * rendering can return quickly.
>   */
>  
> +#include <linux/dma-buf.h>
> +
>  #include "vc4_drv.h"
>  #include "uapi/drm/vc4_drm.h"
>  
> @@ -88,6 +90,10 @@ static void vc4_bo_destroy(struct vc4_bo *bo)
>  
>       vc4->bo_stats.num_allocated--;
>       vc4->bo_stats.size_allocated -= obj->size;
> +
> +     if (bo->resv == &bo->_resv)
> +             reservation_object_fini(bo->resv);
> +
>       drm_gem_cma_free_object(obj);
>  }
>  
> @@ -244,8 +250,12 @@ struct vc4_bo *vc4_bo_create(struct drm_device *dev, 
> size_t unaligned_size,
>                       return ERR_PTR(-ENOMEM);
>               }
>       }
> +     bo = to_vc4_bo(&cma_obj->base);
>  
> -     return to_vc4_bo(&cma_obj->base);
> +     bo->resv = &bo->_resv;
> +     reservation_object_init(bo->resv);
> +
> +     return bo;
>  }
>  
>  int vc4_dumb_create(struct drm_file *file_priv,
> @@ -369,6 +379,13 @@ static void vc4_bo_cache_time_timer(unsigned long data)
>       schedule_work(&vc4->bo_cache.time_work);
>  }
>  
> +struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj)
> +{
> +     struct vc4_bo *bo = to_vc4_bo(obj);
> +
> +     return bo->resv;
> +}
> +
>  struct dma_buf *
>  vc4_prime_export(struct drm_device *dev, struct drm_gem_object *obj, int 
> flags)
>  {
> @@ -440,6 +457,24 @@ void *vc4_prime_vmap(struct drm_gem_object *obj)
>       return drm_gem_cma_prime_vmap(obj);
>  }
>  
> +struct drm_gem_object *
> +vc4_prime_import_sg_table(struct drm_device *dev,
> +                       struct dma_buf_attachment *attach,
> +                       struct sg_table *sgt)
> +{
> +     struct drm_gem_object *obj;
> +     struct vc4_bo *bo;
> +
> +     obj = drm_gem_cma_prime_import_sg_table(dev, attach, sgt);
> +     if (IS_ERR(obj))
> +             return obj;
> +
> +     bo = to_vc4_bo(obj);
> +     bo->resv = attach->dmabuf->resv;
> +
> +     return obj;
> +}
> +
>  int vc4_create_bo_ioctl(struct drm_device *dev, void *data,
>                       struct drm_file *file_priv)
>  {
> diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
> index 61e674baf3a6..92fb9a41fe7c 100644
> --- a/drivers/gpu/drm/vc4/vc4_drv.c
> +++ b/drivers/gpu/drm/vc4/vc4_drv.c
> @@ -168,8 +168,9 @@ static struct drm_driver vc4_drm_driver = {
>       .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
>       .gem_prime_import = drm_gem_prime_import,
>       .gem_prime_export = vc4_prime_export,
> +     .gem_prime_res_obj = vc4_prime_res_obj,
>       .gem_prime_get_sg_table = drm_gem_cma_prime_get_sg_table,
> -     .gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table,
> +     .gem_prime_import_sg_table = vc4_prime_import_sg_table,
>       .gem_prime_vmap = vc4_prime_vmap,
>       .gem_prime_vunmap = drm_gem_cma_prime_vunmap,
>       .gem_prime_mmap = vc4_prime_mmap,
> diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
> index dea304966e65..08d5c2213c80 100644
> --- a/drivers/gpu/drm/vc4/vc4_drv.h
> +++ b/drivers/gpu/drm/vc4/vc4_drv.h
> @@ -8,7 +8,9 @@
>  
>  #include "drmP.h"
>  #include "drm_gem_cma_helper.h"
> +#include "drm_gem_cma_helper.h"
>  
> +#include <linux/reservation.h>
>  #include <drm/drm_encoder.h>
>  
>  struct vc4_dev {
> @@ -56,6 +58,8 @@ struct vc4_dev {
>       /* Protects bo_cache and the BO stats. */
>       struct mutex bo_lock;
>  
> +     uint64_t dma_fence_context;
> +
>       /* Sequence number for the last job queued in bin_job_list.
>        * Starts at 0 (no jobs emitted).
>        */
> @@ -150,6 +154,10 @@ struct vc4_bo {
>        * DRM_IOCTL_VC4_CREATE_SHADER_BO.
>        */
>       struct vc4_validated_shader_info *validated_shader;
> +
> +     /* normally (resv == &_resv) except for imported bo's */
> +     struct reservation_object *resv;
> +     struct reservation_object _resv;
>  };
>  
>  static inline struct vc4_bo *
> @@ -158,6 +166,19 @@ to_vc4_bo(struct drm_gem_object *bo)
>       return (struct vc4_bo *)bo;
>  }
>  
> +struct vc4_fence {
> +     struct dma_fence base;
> +     struct drm_device *dev;
> +     /* vc4 seqno for signaled() test */
> +     uint64_t seqno;
> +};
> +
> +static inline struct vc4_fence *
> +to_vc4_fence(struct dma_fence *fence)
> +{
> +     return (struct vc4_fence *)fence;
> +}
> +
>  struct vc4_seqno_cb {
>       struct work_struct work;
>       uint64_t seqno;
> @@ -231,6 +252,8 @@ struct vc4_exec_info {
>       /* Latest write_seqno of any BO that binning depends on. */
>       uint64_t bin_dep_seqno;
>  
> +     struct dma_fence *fence;
> +
>       /* Last current addresses the hardware was processing when the
>        * hangcheck timer checked on us.
>        */
> @@ -437,7 +460,11 @@ int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
>  int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
>                            struct drm_file *file_priv);
>  int vc4_mmap(struct file *filp, struct vm_area_struct *vma);
> +struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj);
>  int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
> +struct drm_gem_object *vc4_prime_import_sg_table(struct drm_device *dev,
> +                                              struct dma_buf_attachment 
> *attach,
> +                                              struct sg_table *sgt);
>  void *vc4_prime_vmap(struct drm_gem_object *obj);
>  void vc4_bo_cache_init(struct drm_device *dev);
>  void vc4_bo_cache_destroy(struct drm_device *dev);
> @@ -469,6 +496,9 @@ int vc4_dpi_debugfs_regs(struct seq_file *m, void 
> *unused);
>  extern struct platform_driver vc4_dsi_driver;
>  int vc4_dsi_debugfs_regs(struct seq_file *m, void *unused);
>  
> +/* vc4_fence.c */
> +extern const struct dma_fence_ops vc4_fence_ops;
> +
>  /* vc4_gem.c */
>  void vc4_gem_init(struct drm_device *dev);
>  void vc4_gem_destroy(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/vc4/vc4_fence.c b/drivers/gpu/drm/vc4/vc4_fence.c
> new file mode 100644
> index 000000000000..a2845eeae94a
> --- /dev/null
> +++ b/drivers/gpu/drm/vc4/vc4_fence.c
> @@ -0,0 +1,63 @@
> +/*
> + * Copyright © 2017 Broadcom
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
> DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "vc4_drv.h"
> +
> +static const char *vc4_fence_get_driver_name(struct dma_fence *fence)
> +{
> +     return "vc4";
> +}
> +
> +static const char *vc4_fence_get_timeline_name(struct dma_fence *fence)
> +{
> +     return "vc4-v3d";
> +}
> +
> +static bool vc4_fence_enable_signaling(struct dma_fence *fence)
> +{
> +     return true;
> +}
> +
> +static bool vc4_fence_signaled(struct dma_fence *fence)
> +{
> +     struct vc4_fence *f = to_vc4_fence(fence);
> +     struct vc4_dev *vc4 = to_vc4_dev(f->dev);
> +
> +     return vc4->finished_seqno >= f->seqno;
> +}
> +
> +static void vc4_fence_release(struct dma_fence *fence)
> +{
> +     struct vc4_fence *f = to_vc4_fence(fence);
> +
> +     kfree_rcu(f, base.rcu);
> +}
> +
> +const struct dma_fence_ops vc4_fence_ops = {
> +     .get_driver_name = vc4_fence_get_driver_name,
> +     .get_timeline_name = vc4_fence_get_timeline_name,
> +     .enable_signaling = vc4_fence_enable_signaling,
> +     .signaled = vc4_fence_signaled,
> +     .wait = dma_fence_default_wait,
> +     .release = vc4_fence_release,
> +};
> diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
> index e9c381c42139..a1a01044263c 100644
> --- a/drivers/gpu/drm/vc4/vc4_gem.c
> +++ b/drivers/gpu/drm/vc4/vc4_gem.c
> @@ -463,6 +463,8 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t 
> seqno)
>       for (i = 0; i < exec->bo_count; i++) {
>               bo = to_vc4_bo(&exec->bo[i]->base);
>               bo->seqno = seqno;
> +
> +             reservation_object_add_shared_fence(bo->resv, exec->fence);
>       }
>  
>       list_for_each_entry(bo, &exec->unref_list, unref_head) {
> @@ -472,7 +474,103 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, 
> uint64_t seqno)
>       for (i = 0; i < exec->rcl_write_bo_count; i++) {
>               bo = to_vc4_bo(&exec->rcl_write_bo[i]->base);
>               bo->write_seqno = seqno;
> +
> +             reservation_object_add_excl_fence(bo->resv, exec->fence);
> +     }
> +}
> +
> +static void
> +vc4_unlock_bo_reservations(struct drm_device *dev,
> +                        struct vc4_exec_info *exec,
> +                        struct ww_acquire_ctx *acquire_ctx)
> +{
> +     int i;
> +
> +     for (i = 0; i < exec->bo_count; i++) {
> +             struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base);
> +
> +             ww_mutex_unlock(&bo->resv->lock);
>       }
> +
> +     ww_acquire_fini(acquire_ctx);
> +}
> +
> +/* Takes the reservation lock on all the BOs being referenced, so that
> + * at queue submit time we can update the reservations.
> + *
> + * We don't lock the RCL the tile alloc/state BOs, or overflow memory
> + * (all of which are on exec->unref_list).  They're entirely private
> + * to vc4, so we don't attach dma-buf fences to them.
> + */
> +static int
> +vc4_lock_bo_reservations(struct drm_device *dev,
> +                      struct vc4_exec_info *exec,
> +                      struct ww_acquire_ctx *acquire_ctx)
> +{
> +     int contended_lock = -1;
> +     int i, ret;
> +     struct vc4_bo *bo;
> +
> +     ww_acquire_init(acquire_ctx, &reservation_ww_class);
> +
> +retry:
> +     if (contended_lock != -1) {
> +             bo = to_vc4_bo(&exec->bo[contended_lock]->base);
> +             ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock,
> +                                                    acquire_ctx);
> +             if (ret) {
> +                     ww_acquire_done(acquire_ctx);
> +                     return ret;
> +             }
> +     }
> +
> +     for (i = 0; i < exec->bo_count; i++) {
> +             if (i == contended_lock)
> +                     continue;
> +
> +             bo = to_vc4_bo(&exec->bo[i]->base);
> +
> +             ret = ww_mutex_lock_interruptible(&bo->resv->lock, acquire_ctx);
> +             if (ret) {
> +                     int j;
> +
> +                     for (j = 0; j < i; j++) {
> +                             bo = to_vc4_bo(&exec->bo[j]->base);
> +                             ww_mutex_unlock(&bo->resv->lock);
> +                     }
> +
> +                     if (contended_lock != -1 && contended_lock >= i) {
> +                             bo = to_vc4_bo(&exec->bo[contended_lock]->base);
> +
> +                             ww_mutex_unlock(&bo->resv->lock);
> +                     }
> +
> +                     if (ret == -EDEADLK) {
> +                             contended_lock = i;
> +                             goto retry;
> +                     }
> +
> +                     ww_acquire_done(acquire_ctx);
> +                     return ret;
> +             }
> +     }
> +
> +     ww_acquire_done(acquire_ctx);
> +
> +     /* Reserve space for our shared (read-only) fence references,
> +      * before we commit the CL to the hardware.
> +      */
> +     for (i = 0; i < exec->bo_count; i++) {
> +             bo = to_vc4_bo(&exec->bo[i]->base);
> +
> +             ret = reservation_object_reserve_shared(bo->resv);
> +             if (ret) {
> +                     vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
> +                     return ret;
> +             }
> +     }
> +
> +     return 0;
>  }
>  
>  /* Queues a struct vc4_exec_info for execution.  If no job is
> @@ -484,19 +582,34 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, 
> uint64_t seqno)
>   * then bump the end address.  That's a change for a later date,
>   * though.
>   */
> -static void
> -vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec)
> +static int
> +vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec,
> +              struct ww_acquire_ctx *acquire_ctx)
>  {
>       struct vc4_dev *vc4 = to_vc4_dev(dev);
>       uint64_t seqno;
>       unsigned long irqflags;
> +     struct vc4_fence *fence;
> +
> +     fence = kzalloc(sizeof(*fence), GFP_KERNEL);
> +     if (!fence)
> +             return -ENOMEM;
> +     fence->dev = dev;
>  
>       spin_lock_irqsave(&vc4->job_lock, irqflags);
>  
>       seqno = ++vc4->emit_seqno;
>       exec->seqno = seqno;
> +
> +     dma_fence_init(&fence->base, &vc4_fence_ops, &vc4->job_lock,
> +                    vc4->dma_fence_context, exec->seqno);
> +     fence->seqno = exec->seqno;
> +     exec->fence = &fence->base;
> +
>       vc4_update_bo_seqnos(exec, seqno);
>  
> +     vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
> +
>       list_add_tail(&exec->head, &vc4->bin_job_list);
>  
>       /* If no job was executing, kick ours off.  Otherwise, it'll
> @@ -509,6 +622,8 @@ vc4_queue_submit(struct drm_device *dev, struct 
> vc4_exec_info *exec)
>       }
>  
>       spin_unlock_irqrestore(&vc4->job_lock, irqflags);
> +
> +     return 0;
>  }
>  
>  /**
> @@ -707,6 +822,12 @@ vc4_complete_exec(struct drm_device *dev, struct 
> vc4_exec_info *exec)
>       struct vc4_dev *vc4 = to_vc4_dev(dev);
>       unsigned i;
>  
> +     /* If we got force-completed because of GPU reset rather than
> +      * through our IRQ handler, signal the fence now.
> +      */
> +     if (exec->fence)
> +             dma_fence_signal(exec->fence);
> +
>       if (exec->bo) {
>               for (i = 0; i < exec->bo_count; i++)
>                       drm_gem_object_unreference_unlocked(&exec->bo[i]->base);
> @@ -874,6 +995,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
>       struct vc4_dev *vc4 = to_vc4_dev(dev);
>       struct drm_vc4_submit_cl *args = data;
>       struct vc4_exec_info *exec;
> +     struct ww_acquire_ctx acquire_ctx;
>       int ret = 0;
>  
>       if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) {
> @@ -916,12 +1038,18 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
>       if (ret)
>               goto fail;
>  
> +     ret = vc4_lock_bo_reservations(dev, exec, &acquire_ctx);
> +     if (ret)
> +             goto fail;
> +
>       /* Clear this out of the struct we'll be putting in the queue,
>        * since it's part of our stack.
>        */
>       exec->args = NULL;
>  
> -     vc4_queue_submit(dev, exec);
> +     ret = vc4_queue_submit(dev, exec, &acquire_ctx);
> +     if (ret)
> +             goto fail;
>  
>       /* Return the seqno for our job. */
>       args->seqno = vc4->emit_seqno;
> @@ -939,6 +1067,8 @@ vc4_gem_init(struct drm_device *dev)
>  {
>       struct vc4_dev *vc4 = to_vc4_dev(dev);
>  
> +     vc4->dma_fence_context = dma_fence_context_alloc(1);
> +
>       INIT_LIST_HEAD(&vc4->bin_job_list);
>       INIT_LIST_HEAD(&vc4->render_job_list);
>       INIT_LIST_HEAD(&vc4->job_done_list);
> diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c
> index cdc6e6760705..1384af9fc987 100644
> --- a/drivers/gpu/drm/vc4/vc4_irq.c
> +++ b/drivers/gpu/drm/vc4/vc4_irq.c
> @@ -142,6 +142,10 @@ vc4_irq_finish_render_job(struct drm_device *dev)
>  
>       vc4->finished_seqno++;
>       list_move_tail(&exec->head, &vc4->job_done_list);
> +     if (exec->fence) {
> +             dma_fence_signal_locked(exec->fence);
> +             exec->fence = NULL;
> +     }
>       vc4_submit_next_render_job(dev);
>  
>       wake_up_all(&vc4->job_wait_queue);
> -- 
> 2.11.0
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

Reply via email to