This should help limit the number of ioctls when submitting multiple
jobs. The new ioctl also supports syncobj timelines and BO access flags.

v3:
* Re-use panfrost_get_job_bos() and panfrost_get_job_in_syncs() in the
  old submit path

Signed-off-by: Boris Brezillon <boris.brezil...@collabora.com>
---
 drivers/gpu/drm/panfrost/panfrost_drv.c | 366 +++++++++++++++++++-----
 drivers/gpu/drm/panfrost/panfrost_job.c |   3 +
 include/uapi/drm/panfrost_drm.h         |  84 ++++++
 3 files changed, 375 insertions(+), 78 deletions(-)

diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c 
b/drivers/gpu/drm/panfrost/panfrost_drv.c
index 6529e5972b47..e2897de6e77d 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -138,111 +138,95 @@ panfrost_get_job_mappings(struct drm_file *file_priv, 
struct panfrost_job *job)
        return 0;
 }
 
-/**
- * panfrost_lookup_bos() - Sets up job->bo[] with the GEM objects
- * referenced by the job.
- * @dev: DRM device
- * @file_priv: DRM file for this fd
- * @args: IOCTL args
- * @job: job being set up
- *
- * Resolve handles from userspace to BOs and attach them to job.
- *
- * Note that this function doesn't need to unreference the BOs on
- * failure, because that will happen at panfrost_job_cleanup() time.
- */
+#define PANFROST_BO_REF_ALLOWED_FLAGS \
+       (PANFROST_BO_REF_EXCLUSIVE | PANFROST_BO_REF_NO_IMPLICIT_DEP)
+
 static int
-panfrost_lookup_bos(struct drm_device *dev,
-                 struct drm_file *file_priv,
-                 struct drm_panfrost_submit *args,
-                 struct panfrost_job *job)
+panfrost_get_job_bos(struct drm_file *file_priv,
+                    u64 refs, u32 ref_stride, u32 count,
+                    struct panfrost_job *job)
 {
+       void __user *in = u64_to_user_ptr(refs);
        unsigned int i;
-       int ret;
 
-       job->bo_count = args->bo_handle_count;
+       job->bo_count = count;
 
-       if (!job->bo_count)
+       if (!count)
                return 0;
 
+       job->bos = kvmalloc_array(job->bo_count, sizeof(*job->bos),
+                                 GFP_KERNEL | __GFP_ZERO);
        job->bo_flags = kvmalloc_array(job->bo_count,
                                       sizeof(*job->bo_flags),
                                       GFP_KERNEL | __GFP_ZERO);
-       if (!job->bo_flags)
+       if (!job->bos || !job->bo_flags)
                return -ENOMEM;
 
-       for (i = 0; i < job->bo_count; i++)
-               job->bo_flags[i] = PANFROST_BO_REF_EXCLUSIVE;
+       for (i = 0; i < count; i++) {
+               struct drm_panfrost_bo_ref ref = { };
+               int ret;
 
-       ret = drm_gem_objects_lookup(file_priv,
-                                    (void __user *)(uintptr_t)args->bo_handles,
-                                    job->bo_count, &job->bos);
-       if (ret)
-               return ret;
+               ret = copy_struct_from_user(&ref, sizeof(ref),
+                                           in + (i * ref_stride),
+                                           ref_stride);
+               if (ret)
+                       return ret;
 
-       return panfrost_get_job_mappings(file_priv, job);
+               /* Prior to the BATCH_SUBMIT ioctl all accessed BOs were
+                * treated as exclusive.
+                */
+               if (ref_stride == sizeof(u32))
+                       ref.flags = PANFROST_BO_REF_EXCLUSIVE;
+
+               if ((ref.flags & ~PANFROST_BO_REF_ALLOWED_FLAGS))
+                       return -EINVAL;
+
+               job->bos[i] = drm_gem_object_lookup(file_priv, ref.handle);
+               if (!job->bos[i])
+                       return -EINVAL;
+
+               job->bo_flags[i] = ref.flags;
+       }
+
+       return 0;
 }
 
-/**
- * panfrost_copy_in_sync() - Sets up job->deps with the sync objects
- * referenced by the job.
- * @dev: DRM device
- * @file_priv: DRM file for this fd
- * @args: IOCTL args
- * @job: job being set up
- *
- * Resolve syncobjs from userspace to fences and attach them to job.
- *
- * Note that this function doesn't need to unreference the fences on
- * failure, because that will happen at panfrost_job_cleanup() time.
- */
 static int
-panfrost_copy_in_sync(struct drm_device *dev,
-                 struct drm_file *file_priv,
-                 struct drm_panfrost_submit *args,
-                 struct panfrost_job *job)
+panfrost_get_job_in_syncs(struct drm_file *file_priv,
+                         u64 refs, u32 ref_stride,
+                         u32 count, struct panfrost_job *job)
 {
-       u32 *handles;
-       int ret = 0;
-       int i, in_fence_count;
+       const void __user *in = u64_to_user_ptr(refs);
+       unsigned int i;
+       int ret;
 
-       in_fence_count = args->in_sync_count;
-
-       if (!in_fence_count)
+       if (!count)
                return 0;
 
-       handles = kvmalloc_array(in_fence_count, sizeof(u32), GFP_KERNEL);
-       if (!handles) {
-               ret = -ENOMEM;
-               DRM_DEBUG("Failed to allocate incoming syncobj handles\n");
-               goto fail;
-       }
-
-       if (copy_from_user(handles,
-                          (void __user *)(uintptr_t)args->in_syncs,
-                          in_fence_count * sizeof(u32))) {
-               ret = -EFAULT;
-               DRM_DEBUG("Failed to copy in syncobj handles\n");
-               goto fail;
-       }
-
-       for (i = 0; i < in_fence_count; i++) {
+       for (i = 0; i < count; i++) {
+               struct drm_panfrost_syncobj_ref ref = { };
                struct dma_fence *fence;
 
-               ret = drm_syncobj_find_fence(file_priv, handles[i], 0, 0,
-                                            &fence);
+               ret = copy_struct_from_user(&ref, sizeof(ref),
+                                           in + (i * ref_stride),
+                                           ref_stride);
                if (ret)
-                       goto fail;
+                       return ret;
+
+               if (ref.pad)
+                       return -EINVAL;
+
+               ret = drm_syncobj_find_fence(file_priv, ref.handle, ref.point,
+                                            0, &fence);
+               if (ret)
+                       return ret;
 
                ret = drm_gem_fence_array_add(&job->deps, fence);
-
                if (ret)
-                       goto fail;
+                       return ret;
        }
 
-fail:
-       kvfree(handles);
-       return ret;
+       return 0;
 }
 
 static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
@@ -289,11 +273,17 @@ static int panfrost_ioctl_submit(struct drm_device *dev, 
void *data,
        job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev);
        job->file_priv = file->driver_priv;
 
-       ret = panfrost_copy_in_sync(dev, file, args, job);
+       ret = panfrost_get_job_in_syncs(file, args->in_syncs, sizeof(u32),
+                                       args->in_sync_count, job);
        if (ret)
                goto fail_job;
 
-       ret = panfrost_lookup_bos(dev, file, args, job);
+       ret = panfrost_get_job_bos(file, args->bo_handles, sizeof(u32),
+                                  args->bo_handle_count, job);
+       if (ret)
+               goto fail_job;
+
+       ret = panfrost_get_job_mappings(file, job);
        if (ret)
                goto fail_job;
 
@@ -491,6 +481,225 @@ panfrost_ioctl_destroy_submitqueue(struct drm_device 
*dev, void *data,
        return panfrost_submitqueue_destroy(priv, id);
 }
 
+struct panfrost_job_out_sync {
+       struct drm_syncobj *syncobj;
+       struct dma_fence_chain *chain;
+       u64 point;
+};
+
+static void
+panfrost_put_job_out_syncs(struct panfrost_job_out_sync *out_syncs, u32 count)
+{
+       unsigned int i;
+
+       for (i = 0; i < count; i++) {
+               if (!out_syncs[i].syncobj)
+                       break;
+
+               drm_syncobj_put(out_syncs[i].syncobj);
+               kvfree(out_syncs[i].chain);
+       }
+
+       kvfree(out_syncs);
+}
+
+static struct panfrost_job_out_sync *
+panfrost_get_job_out_syncs(struct drm_file *file_priv,
+                          u64 refs, u32 ref_stride,
+                          u32 count)
+{
+       void __user *in = u64_to_user_ptr(refs);
+       struct panfrost_job_out_sync *out_syncs;
+       unsigned int i;
+       int ret;
+
+       if (!count)
+               return NULL;
+
+       out_syncs = kvmalloc_array(count, sizeof(*out_syncs),
+                                  GFP_KERNEL | __GFP_ZERO);
+       if (!out_syncs)
+               return ERR_PTR(-ENOMEM);
+
+       for (i = 0; i < count; i++) {
+               struct drm_panfrost_syncobj_ref ref = { };
+
+               ret = copy_struct_from_user(&ref, sizeof(ref),
+                                           in + (i * ref_stride),
+                                           ref_stride);
+               if (ret)
+                       goto err_free_out_syncs;
+
+               if (ref.pad) {
+                       ret = -EINVAL;
+                       goto err_free_out_syncs;
+               }
+
+               out_syncs[i].syncobj = drm_syncobj_find(file_priv, ref.handle);
+               if (!out_syncs[i].syncobj) {
+                       ret = -EINVAL;
+                       goto err_free_out_syncs;
+               }
+
+               out_syncs[i].point = ref.point;
+               if (!out_syncs[i].point)
+                       continue;
+
+               out_syncs[i].chain = kmalloc(sizeof(*out_syncs[i].chain),
+                                            GFP_KERNEL);
+               if (!out_syncs[i].chain) {
+                       ret = -ENOMEM;
+                       goto err_free_out_syncs;
+               }
+       }
+
+       return out_syncs;
+
+err_free_out_syncs:
+       panfrost_put_job_out_syncs(out_syncs, count);
+       return ERR_PTR(ret);
+}
+
+static void
+panfrost_set_job_out_fence(struct panfrost_job_out_sync *out_syncs,
+                          unsigned int count, struct dma_fence *fence)
+{
+       unsigned int i;
+
+       for (i = 0; i < count; i++) {
+               if (out_syncs[i].chain) {
+                       drm_syncobj_add_point(out_syncs[i].syncobj,
+                                             out_syncs[i].chain,
+                                             fence, out_syncs[i].point);
+                       out_syncs[i].chain = NULL;
+               } else {
+                       drm_syncobj_replace_fence(out_syncs[i].syncobj,
+                                                 fence);
+               }
+       }
+}
+
+#define PANFROST_JD_ALLOWED_REQS PANFROST_JD_REQ_FS
+
+static int
+panfrost_submit_job(struct drm_device *dev, struct drm_file *file_priv,
+                   struct panfrost_submitqueue *queue,
+                   const struct drm_panfrost_job *args,
+                   u32 bo_stride, u32 syncobj_stride)
+{
+       struct panfrost_device *pfdev = dev->dev_private;
+       struct panfrost_job_out_sync *out_syncs;
+       struct panfrost_job *job;
+       int ret;
+
+       if (!args->head)
+               return -EINVAL;
+
+       if (args->requirements & ~PANFROST_JD_ALLOWED_REQS)
+               return -EINVAL;
+
+       job = kzalloc(sizeof(*job), GFP_KERNEL);
+       if (!job)
+               return -ENOMEM;
+
+       kref_init(&job->refcount);
+
+       job->pfdev = pfdev;
+       job->jc = args->head;
+       job->requirements = args->requirements;
+       job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev);
+       job->file_priv = file_priv->driver_priv;
+       xa_init_flags(&job->deps, XA_FLAGS_ALLOC);
+
+       ret = panfrost_get_job_in_syncs(file_priv,
+                                       args->in_syncs,
+                                       syncobj_stride,
+                                       args->in_sync_count,
+                                       job);
+       if (ret)
+               goto err_put_job;
+
+       out_syncs = panfrost_get_job_out_syncs(file_priv,
+                                              args->out_syncs,
+                                              syncobj_stride,
+                                              args->out_sync_count);
+       if (IS_ERR(out_syncs)) {
+               ret = PTR_ERR(out_syncs);
+               goto err_put_job;
+       }
+
+       ret = panfrost_get_job_bos(file_priv, args->bos, bo_stride,
+                                  args->bo_count, job);
+       if (ret)
+               goto err_put_job;
+
+       ret = panfrost_get_job_mappings(file_priv, job);
+       if (ret)
+               goto err_put_job;
+
+       ret = panfrost_job_push(queue, job);
+       if (ret) {
+               panfrost_put_job_out_syncs(out_syncs, args->out_sync_count);
+               goto err_put_job;
+       }
+
+       panfrost_set_job_out_fence(out_syncs, args->out_sync_count,
+                                  job->render_done_fence);
+       panfrost_put_job_out_syncs(out_syncs, args->out_sync_count);
+       return 0;
+
+err_put_job:
+       panfrost_job_put(job);
+       return ret;
+}
+
+static int
+panfrost_ioctl_batch_submit(struct drm_device *dev, void *data,
+                           struct drm_file *file_priv)
+{
+       struct drm_panfrost_batch_submit *args = data;
+       void __user *jobs_args = u64_to_user_ptr(args->jobs);
+       struct panfrost_submitqueue *queue;
+       unsigned int i;
+       int ret;
+
+       /* Relax this test if new fields are added to
+        * drm_panfrost_{bo_ref,syncobj_ref,job}.
+        */
+       if (args->job_stride < sizeof(struct drm_panfrost_job) ||
+           args->bo_ref_stride < sizeof(struct drm_panfrost_bo_ref) ||
+           args->syncobj_ref_stride < sizeof(struct drm_panfrost_syncobj_ref))
+               return -EINVAL;
+
+       queue = panfrost_submitqueue_get(file_priv->driver_priv, args->queue);
+       if (IS_ERR(queue))
+               return PTR_ERR(queue);
+
+       for (i = 0; i < args->job_count; i++) {
+               struct drm_panfrost_job job_args = { };
+
+               ret = copy_struct_from_user(&job_args, sizeof(job_args),
+                                           jobs_args + (i * args->job_stride),
+                                           args->job_stride);
+               if (ret) {
+                       args->fail_idx = i;
+                       goto out_put_queue;
+               }
+
+               ret = panfrost_submit_job(dev, file_priv, queue, &job_args,
+                                         args->bo_ref_stride,
+                                         args->syncobj_ref_stride);
+               if (ret) {
+                       args->fail_idx = i;
+                       goto out_put_queue;
+               }
+       }
+
+out_put_queue:
+       panfrost_submitqueue_put(queue);
+       return 0;
+}
+
 int panfrost_unstable_ioctl_check(void)
 {
        if (!unstable_ioctls)
@@ -570,6 +779,7 @@ static const struct drm_ioctl_desc 
panfrost_drm_driver_ioctls[] = {
        PANFROST_IOCTL(MADVISE,         madvise,        DRM_RENDER_ALLOW),
        PANFROST_IOCTL(CREATE_SUBMITQUEUE, create_submitqueue, 
DRM_RENDER_ALLOW),
        PANFROST_IOCTL(DESTROY_SUBMITQUEUE, destroy_submitqueue, 
DRM_RENDER_ALLOW),
+       PANFROST_IOCTL(BATCH_SUBMIT,    batch_submit,   DRM_RENDER_ALLOW),
 };
 
 DEFINE_DRM_GEM_FOPS(panfrost_drm_driver_fops);
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c 
b/drivers/gpu/drm/panfrost/panfrost_job.c
index 56ae89272e19..4e1540bce865 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -254,6 +254,9 @@ static int panfrost_acquire_object_fences(struct 
panfrost_job *job)
                                return ret;
                }
 
+               if (job->bo_flags[i] & PANFROST_BO_REF_NO_IMPLICIT_DEP)
+                       continue;
+
                ret = drm_gem_fence_array_add_implicit(&job->deps, job->bos[i],
                                                       exclusive);
                if (ret)
diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h
index 7ee02fd1ac75..ce0c1b96a58c 100644
--- a/include/uapi/drm/panfrost_drm.h
+++ b/include/uapi/drm/panfrost_drm.h
@@ -23,6 +23,7 @@ extern "C" {
 #define DRM_PANFROST_MADVISE                   0x08
 #define DRM_PANFROST_CREATE_SUBMITQUEUE                0x09
 #define DRM_PANFROST_DESTROY_SUBMITQUEUE       0x0a
+#define DRM_PANFROST_BATCH_SUBMIT              0x0b
 
 #define DRM_IOCTL_PANFROST_SUBMIT              DRM_IOW(DRM_COMMAND_BASE + 
DRM_PANFROST_SUBMIT, struct drm_panfrost_submit)
 #define DRM_IOCTL_PANFROST_WAIT_BO             DRM_IOW(DRM_COMMAND_BASE + 
DRM_PANFROST_WAIT_BO, struct drm_panfrost_wait_bo)
@@ -33,6 +34,7 @@ extern "C" {
 #define DRM_IOCTL_PANFROST_MADVISE             DRM_IOWR(DRM_COMMAND_BASE + 
DRM_PANFROST_MADVISE, struct drm_panfrost_madvise)
 #define DRM_IOCTL_PANFROST_CREATE_SUBMITQUEUE  DRM_IOWR(DRM_COMMAND_BASE + 
DRM_PANFROST_CREATE_SUBMITQUEUE, struct drm_panfrost_create_submitqueue)
 #define DRM_IOCTL_PANFROST_DESTROY_SUBMITQUEUE DRM_IOWR(DRM_COMMAND_BASE + 
DRM_PANFROST_DESTROY_SUBMITQUEUE, __u32)
+#define DRM_IOCTL_PANFROST_BATCH_SUBMIT                
DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_BATCH_SUBMIT, struct 
drm_panfrost_batch_submit)
 
 /*
  * Unstable ioctl(s): only exposed when the unsafe unstable_ioctls module
@@ -241,7 +243,89 @@ struct drm_panfrost_create_submitqueue {
        __u32 id;       /* out, identifier */
 };
 
+/* Syncobj reference passed at job submission time to encode explicit
+ * input/output fences.
+ */
+struct drm_panfrost_syncobj_ref {
+       __u32 handle;
+       __u32 pad;
+       __u64 point;
+};
+
 #define PANFROST_BO_REF_EXCLUSIVE      0x1
+#define PANFROST_BO_REF_NO_IMPLICIT_DEP        0x2
+
+/* Describes a BO referenced by a job and the type of access. */
+struct drm_panfrost_bo_ref {
+       __u32 handle;
+       __u32 flags;
+};
+
+/* Describes a GPU job and the resources attached to it. */
+struct drm_panfrost_job {
+       /** GPU pointer to the head of the job chain. */
+       __u64 head;
+
+       /**
+        * Array of drm_panfrost_bo_ref objects describing the BOs referenced
+        * by this job.
+        */
+       __u64 bos;
+
+       /**
+        * Arrays of drm_panfrost_syncobj_ref objects describing the input
+        * and output fences.
+        */
+       __u64 in_syncs;
+       __u64 out_syncs;
+
+       /** Syncobj reference array sizes. */
+       __u32 in_sync_count;
+       __u32 out_sync_count;
+
+       /** BO reference array size. */
+       __u32 bo_count;
+
+       /** Combination of PANFROST_JD_REQ_* flags. */
+       __u32 requirements;
+};
+
+/* Used to submit multiple jobs in one call */
+struct drm_panfrost_batch_submit {
+       /**
+        * Stride of the jobs array (needed to ease extension of the
+        * BATCH_SUBMIT ioctl). Should be set to
+        * sizeof(struct drm_panfrost_job).
+        */
+       __u32 job_stride;
+
+       /** Number of jobs to submit. */
+       __u32 job_count;
+
+       /* Pointer to a job array. */
+       __u64 jobs;
+
+       /**
+        * Stride of the BO and syncobj reference arrays (needed to ease
+        * extension of the BATCH_SUBMIT ioctl). Should be set to
+        * sizeof(struct drm_panfrost_bo_ref).
+        */
+       __u32 bo_ref_stride;
+       __u32 syncobj_ref_stride;
+
+       /**
+        * If the submission fails, this encodes the index of the job
+        * failed.
+        */
+       __u32 fail_idx;
+
+       /**
+        * ID of the queue to submit those jobs to. 0 is the default
+        * submit queue and should always exists. If you need a dedicated
+        * queue, create it with DRM_IOCTL_PANFROST_CREATE_SUBMITQUEUE.
+        */
+       __u32 queue;
+};
 
 #if defined(__cplusplus)
 }
-- 
2.31.1

Reply via email to