We want the ability to dispatch a set of command buffer to the
hardware, each with a different OA configuration. To achieve this, we
reuse a couple of fields from the execbuf2 struct (I CAN HAZ
execbuf3?) to notify what OA configuration should be used for a batch
buffer. This requires the process making the execbuf with this flag to
also own the perf fd at the time of execbuf.

v2: Add a emit_oa_config() vfunc in the intel_engine_cs (Chris)
    Move oa_config vma to active (Chris)

Signed-off-by: Lionel Landwerlin <lionel.g.landwer...@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_types.h |   7 ++
 drivers/gpu/drm/i915/gt/intel_lrc.c          |   1 +
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c   |   4 +-
 drivers/gpu/drm/i915/i915_drv.c              |   4 +
 drivers/gpu/drm/i915/i915_drv.h              |   1 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c   | 113 ++++++++++++++++---
 drivers/gpu/drm/i915/i915_perf.c             |  14 +--
 include/uapi/drm/i915_drm.h                  |  20 +++-
 8 files changed, 142 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h 
b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index e381c1c73902..766fbbede430 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -445,6 +445,7 @@ struct intel_engine_cs {
 #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
 #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
 #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
+#define I915_ENGINE_HAS_OA           BIT(5)
        unsigned int flags;
 
        /*
@@ -534,6 +535,12 @@ intel_engine_needs_breadcrumb_tasklet(const struct 
intel_engine_cs *engine)
        return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
 }
 
+static inline bool
+intel_engine_has_oa(const struct intel_engine_cs *engine)
+{
+       return engine->flags & I915_ENGINE_HAS_OA;
+}
+
 #define instdone_slice_mask(dev_priv__) \
        (IS_GEN(dev_priv__, 7) ? \
         1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 2ad95977f7a8..cad6fca4ba0f 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2395,6 +2395,7 @@ int intel_execlists_submission_setup(struct 
intel_engine_cs *engine)
                engine->init_context = gen8_init_rcs_context;
                engine->emit_flush = gen8_emit_flush_render;
                engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
+               engine->flags |= I915_ENGINE_HAS_OA;
        }
 
        return 0;
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index f0d60affdba3..dc85a3e474b9 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -2210,8 +2210,10 @@ static void setup_rcs(struct intel_engine_cs *engine)
                engine->irq_enable_mask = I915_USER_INTERRUPT;
        }
 
-       if (IS_HASWELL(i915))
+       if (IS_HASWELL(i915)) {
                engine->emit_bb_start = hsw_emit_bb_start;
+               engine->flags |= I915_ENGINE_HAS_OA;
+       }
 
        engine->resume = rcs_resume;
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 5871e0cfbab0..6d9a15642342 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -472,6 +472,10 @@ static int i915_getparam_ioctl(struct drm_device *dev, 
void *data,
        case I915_PARAM_PERF_REVISION:
                value = 2;
                break;
+       case I915_PARAM_HAS_EXEC_PERF_CONFIG:
+               /* Obviously requires perf support. */
+               value = dev_priv->perf.initialized;
+               break;
        default:
                DRM_DEBUG("Unknown parameter %d\n", param->param);
                return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index abd564bfa03b..25860d99ffc6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3154,6 +3154,7 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915,
                            int metrics_set,
                            struct i915_oa_config **out_config,
                            struct drm_i915_gem_object **out_obj);
+void i915_oa_config_put(struct i915_oa_config *oa_config);
 
 /* i915_gem_evict.c */
 int __must_check i915_gem_evict_something(struct i915_address_space *vm,
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 361c232dde83..3794c6ce71e3 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -288,6 +288,9 @@ struct i915_execbuffer {
         */
        int lut_size;
        struct hlist_head *buckets; /** ht for relocation handles */
+
+       struct i915_oa_config *oa_config; /** HW configuration for OA, NULL is 
not needed. */
+       struct drm_i915_gem_object *oa_bo;
 };
 
 #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
@@ -1183,6 +1186,33 @@ static void clflush_write32(u32 *addr, u32 value, 
unsigned int flushes)
                *addr = value;
 }
 
+static int
+get_execbuf_oa_config(struct drm_i915_private *dev_priv,
+                     int perf_fd, u32 oa_config_id,
+                     struct i915_oa_config **out_oa_config,
+                     struct drm_i915_gem_object **out_oa_obj)
+{
+       struct file *perf_file;
+       int ret;
+
+       if (!dev_priv->perf.oa.exclusive_stream)
+               return -EINVAL;
+
+       perf_file = fget(perf_fd);
+       if (!perf_file)
+               return -EINVAL;
+
+       if (perf_file->private_data != dev_priv->perf.oa.exclusive_stream)
+               return -EINVAL;
+
+       fput(perf_file);
+
+       ret = i915_perf_get_oa_config(dev_priv, oa_config_id,
+                                     out_oa_config, out_oa_obj);
+
+       return ret;
+}
+
 static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
                             struct i915_vma *vma,
                             unsigned int len)
@@ -1937,12 +1967,15 @@ static bool i915_gem_check_execbuffer(struct 
drm_i915_gem_execbuffer2 *exec)
                        return false;
        }
 
-       if (exec->DR4 == 0xffffffff) {
-               DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
-               exec->DR4 = 0;
+       /* We reuse DR1 & DR4 fields for passing the perf config detail. */
+       if (!(exec->flags & I915_EXEC_PERF_CONFIG)) {
+               if (exec->DR4 == 0xffffffff) {
+                       DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
+                       exec->DR4 = 0;
+               }
+               if (exec->DR1 || exec->DR4)
+                       return false;
        }
-       if (exec->DR1 || exec->DR4)
-               return false;
 
        if ((exec->batch_start_offset | exec->batch_len) & 0x7)
                return false;
@@ -2048,6 +2081,42 @@ static int eb_submit(struct i915_execbuffer *eb)
                        return err;
        }
 
+       if (eb->oa_config &&
+           eb->oa_config != eb->i915->perf.oa.exclusive_stream->oa_config) {
+               struct i915_vma *oa_vma;
+
+               oa_vma = i915_vma_instance(eb->oa_bo,
+                                             &eb->engine->i915->ggtt.vm, NULL);
+               if (unlikely(IS_ERR(oa_vma))) {
+                       err = PTR_ERR(oa_vma);
+                       return err;
+               }
+
+               err = i915_vma_pin(oa_vma, 0, 0, PIN_GLOBAL);
+               if (err)
+                       return err;
+
+               err = eb->engine->emit_bb_start(eb->request,
+                                               oa_vma->node.start,
+                                               0, I915_DISPATCH_SECURE);
+               if (err) {
+                       i915_vma_unpin(oa_vma);
+                       return err;
+               }
+
+               err = i915_vma_move_to_active(oa_vma, eb->request, 0);
+               if (err) {
+                       i915_vma_unpin(oa_vma);
+                       return err;
+               }
+
+
+               i915_vma_unpin(oa_vma);
+
+
+               swap(eb->oa_config, 
eb->i915->perf.oa.exclusive_stream->oa_config);
+       }
+
        err = eb->engine->emit_bb_start(eb->request,
                                        eb->batch->node.start +
                                        eb->batch_start_offset,
@@ -2341,6 +2410,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
        eb.buffer_count = args->buffer_count;
        eb.batch_start_offset = args->batch_start_offset;
        eb.batch_len = args->batch_len;
+       eb.oa_config = NULL;
 
        eb.batch_flags = 0;
        if (args->flags & I915_EXEC_SECURE) {
@@ -2385,17 +2455,29 @@ i915_gem_do_execbuffer(struct drm_device *dev,
         */
        intel_gt_pm_get(eb.i915);
 
-       err = i915_mutex_lock_interruptible(dev);
-       if (err)
-               goto err_rpm;
-
        err = eb_select_engine(&eb, file, args);
        if (unlikely(err))
-               goto err_unlock;
+               goto err_rpm;
+
+       if (args->flags & I915_EXEC_PERF_CONFIG) {
+               if (!intel_engine_has_oa(eb.engine)) {
+                       err = -ENODEV;
+                       goto err_engine;
+               }
+
+               err = get_execbuf_oa_config(eb.i915, args->DR1, args->DR4,
+                                           &eb.oa_config, &eb.oa_bo);
+               if (err)
+                       goto err_engine;
+       }
+
+       err = i915_mutex_lock_interruptible(dev);
+       if (err)
+               goto err_oa;
 
        err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
        if (unlikely(err))
-               goto err_engine;
+               goto err_unlock;
 
        err = eb_relocate(&eb);
        if (err) {
@@ -2541,10 +2623,15 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 err_vma:
        if (eb.exec)
                eb_release_vmas(&eb);
-err_engine:
-       eb_unpin_context(&eb);
 err_unlock:
        mutex_unlock(&dev->struct_mutex);
+err_oa:
+       if (eb.oa_config) {
+               i915_gem_object_put(eb.oa_bo);
+               i915_oa_config_put(eb.oa_config);
+       }
+err_engine:
+       eb_unpin_context(&eb);
 err_rpm:
        intel_gt_pm_put(eb.i915);
        i915_gem_context_put(eb.gem_context);
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 7e0ebd4bc8f2..7b861f12f161 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -365,7 +365,7 @@ struct perf_open_properties {
        int oa_period_exponent;
 };
 
-static void put_oa_config(struct i915_oa_config *oa_config)
+void i915_oa_config_put(struct i915_oa_config *oa_config)
 {
        if (!atomic_dec_and_test(&oa_config->ref_count))
                return;
@@ -515,7 +515,7 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915,
 
 err_buf_alloc:
        if (out_config) {
-               put_oa_config(oa_config);
+               i915_oa_config_put(oa_config);
                *out_config = NULL;
        }
 unlock:
@@ -1496,7 +1496,7 @@ static void i915_oa_stream_destroy(struct 
i915_perf_stream *stream)
        if (stream->ctx)
                oa_put_render_ctx_id(stream);
 
-       put_oa_config(stream->oa_config);
+       i915_oa_config_put(stream->oa_config);
 
        if (dev_priv->perf.oa.spurious_report_rs.missed) {
                DRM_NOTE("%d spurious OA report notices suppressed due to 
ratelimiting\n",
@@ -2264,7 +2264,7 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
        free_oa_buffer(dev_priv);
 
 err_oa_buf_alloc:
-       put_oa_config(stream->oa_config);
+       i915_oa_config_put(stream->oa_config);
 
        intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
        intel_runtime_pm_put(dev_priv, stream->wakeref);
@@ -3441,7 +3441,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, 
void *data,
 sysfs_err:
        mutex_unlock(&dev_priv->perf.metrics_lock);
 reg_err:
-       put_oa_config(oa_config);
+       i915_oa_config_put(oa_config);
        DRM_DEBUG("Failed to add new OA config\n");
        return err;
 }
@@ -3495,7 +3495,7 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, 
void *data,
 
        DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id);
 
-       put_oa_config(oa_config);
+       i915_oa_config_put(oa_config);
 
 config_err:
        mutex_unlock(&dev_priv->perf.metrics_lock);
@@ -3657,7 +3657,7 @@ static int destroy_config(int id, void *p, void *data)
 {
        struct i915_oa_config *oa_config = p;
 
-       put_oa_config(oa_config);
+       i915_oa_config_put(oa_config);
 
        return 0;
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 5601dc688295..e57fb5f249da 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -604,6 +604,16 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_PERF_REVISION       53
 
+/*
+ * Request an OA performance configuration change before running the commands
+ * given in an execbuf.
+ *
+ * Performance configuration ID is given in the DR4 field of
+ * drm_i915_gem_execbuffer2 and the file descriptor of the i915 perf stream is
+ * given in DR1. Execbuffer will fail if any of these parameter is invalid.
+ */
+#define I915_PARAM_HAS_EXEC_PERF_CONFIG 54
+
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -1126,7 +1136,15 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_FENCE_ARRAY   (1<<19)
 
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1))
+/* Request that perf monitoring hardware be reprogrammed before executing the
+ * commands from the batch in the execbuf. The DR1 & DR4 fields of the execbuf
+ * must respectively contain the file descriptor of the perf monitoring device
+ * and the configuration to program.
+ */
+#define I915_EXEC_PERF_CONFIG   (1<<20)
+
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_PERF_CONFIG<<1))
 
 #define I915_EXEC_CONTEXT_ID_MASK      (0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
2.21.0.392.gf8f6787159e

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to