We want the ability to dispatch a set of command buffer to the
hardware, each with a different OA configuration. To achieve this, we
reuse a couple of fields from the execbuf2 struct (I CAN HAZ
execbuf3?) to notify what OA configuration should be used for a batch
buffer. This requires the process making the execbuf with this flag to
also own the perf fd at the time of execbuf.

v2: Add a emit_oa_config() vfunc in the intel_engine_cs (Chris)
    Move oa_config vma to active (Chris)

v3: Don't drop the lock for engine lookup (Chris)
    Move OA config vma to active before writing the ringbuffer (Chris)

Signed-off-by: Lionel Landwerlin <lionel.g.landwer...@intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 110 +++++++++++++++++-
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |   7 ++
 drivers/gpu/drm/i915/gt/intel_lrc.c           |   1 +
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c    |   4 +-
 drivers/gpu/drm/i915/i915_drv.c               |   4 +
 drivers/gpu/drm/i915/i915_drv.h               |   1 +
 drivers/gpu/drm/i915/i915_perf.c              |  14 +--
 include/uapi/drm/i915_drm.h                   |  37 ++++++
 8 files changed, 169 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 02dc5480e8fe..4a785999a9c5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -280,7 +280,11 @@ struct i915_execbuffer {
        struct {
                u64 flags; /** Available extensions parameters */
                struct drm_i915_gem_exec_timeline_fences timeline_fences;
+               struct drm_i915_gem_execbuffer_perf_ext perf_config;
        } extensions;
+
+       struct i915_oa_config *oa_config; /** HW configuration for OA, NULL is 
not needed. */
+       struct drm_i915_gem_object *oa_bo;
 };
 
 #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
@@ -1198,6 +1202,34 @@ static int reloc_move_to_gpu(struct i915_request *rq, 
struct i915_vma *vma)
        return err;
 }
 
+
+static int
+get_execbuf_oa_config(struct drm_i915_private *dev_priv,
+                     s32 perf_fd, u64 oa_config_id,
+                     struct i915_oa_config **out_oa_config,
+                     struct drm_i915_gem_object **out_oa_obj)
+{
+       struct file *perf_file;
+       int ret;
+
+       if (!dev_priv->perf.oa.exclusive_stream)
+               return -EINVAL;
+
+       perf_file = fget(perf_fd);
+       if (!perf_file)
+               return -EINVAL;
+
+       if (perf_file->private_data != dev_priv->perf.oa.exclusive_stream)
+               return -EINVAL;
+
+       fput(perf_file);
+
+       ret = i915_perf_get_oa_config(dev_priv, oa_config_id,
+                                     out_oa_config, out_oa_obj);
+
+       return ret;
+}
+
 static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
                             struct i915_vma *vma,
                             unsigned int len)
@@ -2060,6 +2092,51 @@ add_to_client(struct i915_request *rq, struct drm_file 
*file)
        list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list);
 }
 
+static int eb_oa_config(struct i915_execbuffer *eb)
+{
+       struct i915_vma *oa_vma;
+       int err;
+
+       if (!eb->oa_config)
+               return 0;
+
+       /*
+        * If the config hasn't changed, skip reconfiguring the HW (this is
+        * subject to a delay we want to avoid has much as possible).
+        */
+       if (eb->oa_config == eb->i915->perf.oa.exclusive_stream->oa_config)
+               return 0;
+
+       oa_vma = i915_vma_instance(eb->oa_bo,
+                                  &eb->engine->i915->ggtt.vm, NULL);
+       if (unlikely(IS_ERR(oa_vma)))
+               return PTR_ERR(oa_vma);
+
+       err = i915_vma_pin(oa_vma, 0, 0, PIN_GLOBAL);
+       if (err)
+               return err;
+
+       err = i915_vma_move_to_active(oa_vma, eb->request, 0);
+       if (err) {
+               i915_vma_unpin(oa_vma);
+               return err;
+       }
+
+       err = eb->engine->emit_bb_start(eb->request,
+                                       oa_vma->node.start,
+                                       0, I915_DISPATCH_SECURE);
+       if (err) {
+               i915_vma_unpin(oa_vma);
+               return err;
+       }
+
+       i915_vma_unpin(oa_vma);
+
+       swap(eb->oa_config, eb->i915->perf.oa.exclusive_stream->oa_config);
+
+       return 0;
+}
+
 static int eb_submit(struct i915_execbuffer *eb)
 {
        int err;
@@ -2086,6 +2163,10 @@ static int eb_submit(struct i915_execbuffer *eb)
                        return err;
        }
 
+       err = eb_oa_config(eb);
+       if (err)
+               return err;
+
        err = eb->engine->emit_bb_start(eb->request,
                                        eb->batch->node.start +
                                        eb->batch_start_offset,
@@ -2519,6 +2600,13 @@ parse_execbuf2_extensions(struct 
drm_i915_gem_execbuffer2 *args,
                                return -EFAULT;
                        break;
 
+               case DRM_I915_GEM_BASE_EXECBUFFER_TYPE_PERF:
+                       if (copy_from_user(&eb->extensions.perf_config,
+                                          u64_to_user_ptr(iter_ptr),
+                                          sizeof(eb->extensions.perf_config)))
+                               return -EFAULT;
+                       break;
+
                default:
                        return -EINVAL;
                }
@@ -2567,6 +2655,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
        eb.buffer_count = args->buffer_count;
        eb.batch_start_offset = args->batch_start_offset;
        eb.batch_len = args->batch_len;
+       eb.oa_config = NULL;
 
        eb.batch_flags = 0;
        if (args->flags & I915_EXEC_SECURE) {
@@ -2651,9 +2740,23 @@ i915_gem_do_execbuffer(struct drm_device *dev,
        if (unlikely(err))
                goto err_unlock;
 
+       if (eb.extensions.flags & BIT(DRM_I915_GEM_BASE_EXECBUFFER_TYPE_PERF)) {
+               if (!intel_engine_has_oa(eb.engine)) {
+                       err = -ENODEV;
+                       goto err_engine;
+               }
+
+               err = get_execbuf_oa_config(eb.i915,
+                                           eb.extensions.perf_config.perf_fd,
+                                           eb.extensions.perf_config.oa_config,
+                                           &eb.oa_config, &eb.oa_bo);
+               if (err)
+                       goto err_engine;
+       }
+
        err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
        if (unlikely(err))
-               goto err_engine;
+               goto err_oa;
 
        err = eb_relocate(&eb);
        if (err) {
@@ -2806,6 +2909,11 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 err_vma:
        if (eb.exec)
                eb_release_vmas(&eb);
+err_oa:
+       if (eb.oa_config) {
+               i915_gem_object_put(eb.oa_bo);
+               i915_oa_config_put(eb.oa_config);
+       }
 err_engine:
        eb_unpin_context(&eb);
 err_unlock:
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h 
b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 01223864237a..97badb185eb2 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -457,6 +457,7 @@ struct intel_engine_cs {
 #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
 #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
 #define I915_ENGINE_IS_VIRTUAL       BIT(5)
+#define I915_ENGINE_HAS_OA           BIT(6)
        unsigned int flags;
 
        /*
@@ -552,6 +553,12 @@ intel_engine_is_virtual(const struct intel_engine_cs 
*engine)
        return engine->flags & I915_ENGINE_IS_VIRTUAL;
 }
 
+static inline bool
+intel_engine_has_oa(const struct intel_engine_cs *engine)
+{
+       return engine->flags & I915_ENGINE_HAS_OA;
+}
+
 #define instdone_slice_mask(dev_priv__) \
        (IS_GEN(dev_priv__, 7) ? \
         1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index fed704802c57..ed19f4e53d31 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2732,6 +2732,7 @@ int intel_execlists_submission_setup(struct 
intel_engine_cs *engine)
                engine->init_context = gen8_init_rcs_context;
                engine->emit_flush = gen8_emit_flush_render;
                engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
+               engine->flags |= I915_ENGINE_HAS_OA;
        }
 
        return 0;
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index ff58d658e3e2..972193cfbf41 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -2212,8 +2212,10 @@ static void setup_rcs(struct intel_engine_cs *engine)
                engine->irq_enable_mask = I915_USER_INTERRUPT;
        }
 
-       if (IS_HASWELL(i915))
+       if (IS_HASWELL(i915)) {
                engine->emit_bb_start = hsw_emit_bb_start;
+               engine->flags |= I915_ENGINE_HAS_OA;
+       }
 
        engine->resume = rcs_resume;
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index a872791f98bd..75029d1a3802 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -478,6 +478,10 @@ static int i915_getparam_ioctl(struct drm_device *dev, 
void *data,
        case I915_PARAM_PERF_REVISION:
                value = 1;
                break;
+       case I915_PARAM_HAS_EXEC_PERF_CONFIG:
+               /* Obviously requires perf support. */
+               value = dev_priv->perf.initialized;
+               break;
        default:
                DRM_DEBUG("Unknown parameter %d\n", param->param);
                return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f1e51307253a..aefdae856e77 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2833,6 +2833,7 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915,
                            int metrics_set,
                            struct i915_oa_config **out_config,
                            struct drm_i915_gem_object **out_obj);
+void i915_oa_config_put(struct i915_oa_config *oa_config);
 
 /* i915_gem_evict.c */
 int __must_check i915_gem_evict_something(struct i915_address_space *vm,
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index e0071e44de3d..82a282f668c0 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -366,7 +366,7 @@ struct perf_open_properties {
        int oa_period_exponent;
 };
 
-static void put_oa_config(struct i915_oa_config *oa_config)
+void i915_oa_config_put(struct i915_oa_config *oa_config)
 {
        if (!atomic_dec_and_test(&oa_config->ref_count))
                return;
@@ -500,7 +500,7 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915,
 
 err_buf_alloc:
        if (out_config) {
-               put_oa_config(oa_config);
+               i915_oa_config_put(oa_config);
                *out_config = NULL;
        }
 unlock:
@@ -1475,7 +1475,7 @@ static void i915_oa_stream_destroy(struct 
i915_perf_stream *stream)
        if (stream->ctx)
                oa_put_render_ctx_id(stream);
 
-       put_oa_config(stream->oa_config);
+       i915_oa_config_put(stream->oa_config);
 
        if (dev_priv->perf.oa.spurious_report_rs.missed) {
                DRM_NOTE("%d spurious OA report notices suppressed due to 
ratelimiting\n",
@@ -2243,7 +2243,7 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
        free_oa_buffer(dev_priv);
 
 err_oa_buf_alloc:
-       put_oa_config(stream->oa_config);
+       i915_oa_config_put(stream->oa_config);
 
        intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
        intel_runtime_pm_put(dev_priv, stream->wakeref);
@@ -3406,7 +3406,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, 
void *data,
 sysfs_err:
        mutex_unlock(&dev_priv->perf.metrics_lock);
 reg_err:
-       put_oa_config(oa_config);
+       i915_oa_config_put(oa_config);
        DRM_DEBUG("Failed to add new OA config\n");
        return err;
 }
@@ -3460,7 +3460,7 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, 
void *data,
 
        DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id);
 
-       put_oa_config(oa_config);
+       i915_oa_config_put(oa_config);
 
 config_err:
        mutex_unlock(&dev_priv->perf.metrics_lock);
@@ -3622,7 +3622,7 @@ static int destroy_config(int id, void *p, void *data)
 {
        struct i915_oa_config *oa_config = p;
 
-       put_oa_config(oa_config);
+       i915_oa_config_put(oa_config);
 
        return 0;
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index ba1b02859346..7f770183ee31 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -623,6 +623,16 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
 
+/*
+ * Request an OA performance configuration change before running the commands
+ * given in an execbuf.
+ *
+ * Performance configuration ID is given in the DR4 field of
+ * drm_i915_gem_execbuffer2 and the file descriptor of the i915 perf stream is
+ * given in DR1. Execbuffer will fail if any of these parameter is invalid.
+ */
+#define I915_PARAM_HAS_EXEC_PERF_CONFIG 56
+
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -1026,6 +1036,12 @@ enum drm_i915_gem_base_execbuffer_type {
         */
        DRM_I915_GEM_BASE_EXECBUFFER_TYPE_TIMELINE_FENCES,
 
+       /**
+        * This identifier is associated with
+        * drm_i915_gem_execbuffer_perf_ext.
+        */
+       DRM_I915_GEM_BASE_EXECBUFFER_TYPE_PERF,
+
        DRM_I915_GEM_BASE_EXECBUFFER_TYPE_MAX /* non-ABI */
 };
 
@@ -1073,6 +1089,27 @@ struct drm_i915_gem_exec_timeline_fences {
        __u64 values_ptr;
 };
 
+struct drm_i915_gem_execbuffer_perf_ext {
+       struct drm_i915_gem_base_execbuffer_ext base;
+
+       /**
+        * Performance file descriptor returned by DRM_IOCTL_I915_PERF_OPEN.
+        * This is used to identify that the application
+        */
+       __s32 perf_fd;
+
+       /**
+        * Unused for now. Must be cleared to zero.
+        */
+       __u32 pad;
+
+       /**
+        * OA configuration ID to switch to before executing the commands
+        * associated to the execbuf.
+        */
+       __u64 oa_config;
+};
+
 struct drm_i915_gem_execbuffer2 {
        /**
         * List of gem_exec_object2 structs
-- 
2.21.0.392.gf8f6787159e

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to