From: Sourab Gupta <sourab.gu...@intel.com>

This patch adds support for opening multiple concurrent perf streams for
different gpu engines, while having the restriction to open only a single
stream open for a particular gpu engine.
This enables userspace client to open multiple streams, one per engine,
at any time to capture sample data for multiple gpu engines.

Signed-off-by: Sourab Gupta <sourab.gu...@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h  |  2 +-
 drivers/gpu/drm/i915/i915_perf.c | 69 ++++++++++++++++++++++------------------
 2 files changed, 39 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 119c82b..e912679 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2204,7 +2204,7 @@ struct drm_i915_private {
 
 
                struct hrtimer poll_check_timer;
-               struct i915_perf_stream *exclusive_stream;
+               struct i915_perf_stream *ring_stream[I915_NUM_ENGINES];
                wait_queue_head_t poll_wq[I915_NUM_ENGINES];
                atomic_t pollin[I915_NUM_ENGINES];
 
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index b05c41a..8eb80e8 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1086,7 +1086,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream 
*stream,
                 * an invalid ID. It could be good to annotate these
                 * reports with a _CTX_SWITCH_AWAY reason later.
                 */
-               if (!dev_priv->perf.exclusive_stream->ctx ||
+               if (!stream->ctx ||
                    dev_priv->perf.oa.specific_ctx_id == ctx_id ||
                    dev_priv->perf.oa.oa_buffer.last_ctx_id == ctx_id) {
 
@@ -1097,7 +1097,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream 
*stream,
                         * the switch-away reports with an invalid
                         * context id to be recognisable by userspace.
                         */
-                       if (dev_priv->perf.exclusive_stream->ctx &&
+                       if (stream->ctx &&
                            dev_priv->perf.oa.specific_ctx_id != ctx_id)
                                report32[2] = 0xffffffff;
 
@@ -1763,7 +1763,7 @@ static void i915_ring_stream_destroy(struct 
i915_perf_stream *stream)
 {
        struct drm_i915_private *dev_priv = stream->dev_priv;
 
-       BUG_ON(stream != dev_priv->perf.exclusive_stream);
+       BUG_ON(stream != dev_priv->perf.ring_stream[stream->engine]);
 
        if (stream->using_oa) {
                dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
@@ -1777,7 +1777,7 @@ static void i915_ring_stream_destroy(struct 
i915_perf_stream *stream)
        if (stream->cs_mode)
                free_command_stream_buf(dev_priv, stream->engine);
 
-       dev_priv->perf.exclusive_stream = NULL;
+       dev_priv->perf.ring_stream[stream->engine] = NULL;
 }
 
 static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
@@ -2220,14 +2220,14 @@ static void gen7_update_oacontrol_locked(struct 
drm_i915_private *dev_priv)
 {
        assert_spin_locked(&dev_priv->perf.hook_lock);
 
-       if (dev_priv->perf.exclusive_stream->state !=
+       if (dev_priv->perf.ring_stream[RCS]->state !=
                                        I915_PERF_STREAM_DISABLED) {
                unsigned long ctx_id = 0;
 
-               if (dev_priv->perf.exclusive_stream->ctx)
+               if (dev_priv->perf.ring_stream[RCS]->ctx)
                        ctx_id = dev_priv->perf.oa.specific_ctx_id;
 
-               if (dev_priv->perf.exclusive_stream->ctx == NULL || ctx_id) {
+               if (dev_priv->perf.ring_stream[RCS]->ctx == NULL || ctx_id) {
                        bool periodic = dev_priv->perf.oa.periodic;
                        u32 period_exponent = dev_priv->perf.oa.period_exponent;
                        u32 report_format = dev_priv->perf.oa.oa_buffer.format;
@@ -2366,15 +2366,6 @@ static int i915_ring_stream_init(struct i915_perf_stream 
*stream,
                                                        SAMPLE_TS);
        int ret;
 
-       /* To avoid the complexity of having to accurately filter
-        * counter reports and marshal to the appropriate client
-        * we currently only allow exclusive access
-        */
-       if (dev_priv->perf.exclusive_stream) {
-               DRM_ERROR("Stream already in use\n");
-               return -EBUSY;
-       }
-
        if ((props->sample_flags & SAMPLE_CTX_ID) && !props->cs_mode) {
                if (IS_HASWELL(dev_priv)) {
                        DRM_ERROR(
@@ -2392,6 +2383,12 @@ static int i915_ring_stream_init(struct i915_perf_stream 
*stream,
        if (require_oa_unit) {
                int format_size;
 
+               /* Only allow exclusive access per stream */
+               if (dev_priv->perf.ring_stream[RCS]) {
+                       DRM_ERROR("Stream:0 already in use\n");
+                       return -EBUSY;
+               }
+
                /* If the sysfs metrics/ directory wasn't registered for some
                 * reason then don't let userspace try their luck with config
                 * IDs
@@ -2536,6 +2533,13 @@ static int i915_ring_stream_init(struct i915_perf_stream 
*stream,
        }
 
        if (props->cs_mode) {
+               /* Only allow exclusive access per stream */
+               if (dev_priv->perf.ring_stream[props->engine]) {
+                       DRM_ERROR("Stream:%d already in use\n", props->engine);
+                       ret = -EBUSY;
+                       goto cs_error;
+               }
+
                if (!cs_sample_data) {
                        DRM_ERROR(
                                "Ring given without requesting any CS data to 
sample");
@@ -2576,7 +2580,7 @@ static int i915_ring_stream_init(struct i915_perf_stream 
*stream,
 
        stream->ops = &i915_oa_stream_ops;
 
-       dev_priv->perf.exclusive_stream = stream;
+       dev_priv->perf.ring_stream[stream->engine] = stream;
 
        return 0;
 
@@ -2612,8 +2616,8 @@ i915_oa_legacy_context_pin_notify_locked(struct 
drm_i915_private *dev_priv,
        if (dev_priv->perf.oa.ops.update_hw_ctx_id_locked == NULL)
                return;
 
-       if (dev_priv->perf.exclusive_stream &&
-           dev_priv->perf.exclusive_stream->ctx == ctx) {
+       if (dev_priv->perf.ring_stream[RCS] &&
+           dev_priv->perf.ring_stream[RCS]->ctx == ctx) {
                struct i915_vma *vma = ctx->engine[RCS].state;
                u32 ctx_id = i915_ggtt_offset(vma);
 
@@ -2682,8 +2686,8 @@ void i915_oa_legacy_ctx_switch_notify(struct 
drm_i915_gem_request *req)
        if (dev_priv->perf.oa.ops.legacy_ctx_switch_unlocked == NULL)
                return;
 
-       if (dev_priv->perf.exclusive_stream &&
-           dev_priv->perf.exclusive_stream->state !=
+       if (dev_priv->perf.ring_stream[RCS] &&
+           dev_priv->perf.ring_stream[RCS]->state !=
                                I915_PERF_STREAM_DISABLED) {
 
                /* XXX: We don't take a lock here and this may run
@@ -2846,19 +2850,12 @@ static ssize_t i915_perf_read(struct file *file,
        return ret;
 }
 
-static enum hrtimer_restart poll_check_timer_cb(struct hrtimer *hrtimer)
+static void wake_up_perf_streams(void *data, async_cookie_t cookie)
 {
+       struct drm_i915_private *dev_priv = data;
        struct i915_perf_stream *stream;
 
-       struct drm_i915_private *dev_priv =
-               container_of(hrtimer, typeof(*dev_priv),
-                            perf.poll_check_timer);
-
-       /* No need to protect the streams list here, since the hrtimer is
-        * disabled before the stream is removed from list, and currently a
-        * single exclusive_stream is supported.
-        * XXX: revisit this when multiple concurrent streams are supported.
-        */
+       mutex_lock(&dev_priv->perf.streams_lock);
        list_for_each_entry(stream, &dev_priv->perf.streams, link) {
                if (stream_have_data__unlocked(stream)) {
                        atomic_set(&dev_priv->perf.pollin[stream->engine],
@@ -2866,6 +2863,16 @@ static enum hrtimer_restart poll_check_timer_cb(struct 
hrtimer *hrtimer)
                        wake_up(&dev_priv->perf.poll_wq[stream->engine]);
                }
        }
+       mutex_unlock(&dev_priv->perf.streams_lock);
+}
+
+static enum hrtimer_restart poll_check_timer_cb(struct hrtimer *hrtimer)
+{
+       struct drm_i915_private *dev_priv =
+               container_of(hrtimer, typeof(*dev_priv),
+                            perf.poll_check_timer);
+
+       async_schedule(wake_up_perf_streams, dev_priv);
 
        hrtimer_forward_now(hrtimer, ns_to_ktime(POLL_PERIOD));
 
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to