This patch enables to sample CTX ID corresponding to requests
being submitted on the engines through perf stream. OA report
already embed this information from Gen8+. For previous Gens
we can leverage value sampled from request->ctx->hw_id to
associate with OA reports.

v2: Updated stream->last_ctx_id to INVALID_CTX_ID during
stream_init.

Testcase: igt/intel_perf_dapc/perf-ctxid
Signed-off-by: Sagar Arun Kamble <sagar.a.kam...@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h  |  5 ++++
 drivers/gpu/drm/i915/i915_perf.c | 56 ++++++++++++++++++++++++++++++++++++++++
 include/uapi/drm/i915_drm.h      |  7 +++++
 3 files changed, 68 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3c1bc01..4b425f2 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2245,6 +2245,11 @@ struct i915_perf_cs_sample {
 
        /* Is this sample prior to request start or post request end */
        enum request_sample_id id;
+
+       /**
+        * @ctx_id: Context ID associated with this perf sample
+        */
+       u32 ctx_id;
 };
 
 struct intel_cdclk_state {
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index bce7388..8c6913a 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -293,6 +293,7 @@
 /* Data common to periodic and RCS based OA samples */
 struct i915_perf_sample_data {
        u64 source;
+       u64 ctx_id;
        const u8 *report;
 };
 
@@ -347,6 +348,7 @@ struct i915_perf_sample_data {
 
 #define SAMPLE_OA_REPORT      (1<<0)
 #define SAMPLE_OA_SOURCE      (1<<1)
+#define SAMPLE_CTX_ID        (1<<2)
 
 /**
  * struct perf_open_properties - for validated properties given to open a 
stream
@@ -620,6 +622,7 @@ static void i915_perf_stream_patch_request(struct 
i915_perf_stream *stream,
                list_move_tail(&sample->link, &stream->cs_samples);
                sample->request = i915_gem_request_get(request);
                sample->id = sample_id;
+               sample->ctx_id = request->ctx->hw_id;
                if (stream->sample_flags &
                    (SAMPLE_OA_REPORT | SAMPLE_OA_SOURCE))
                        i915_perf_stream_patch_sample_oa(stream, request,
@@ -877,6 +880,12 @@ static int append_perf_sample(struct i915_perf_stream 
*stream,
                buf += 8;
        }
 
+       if (sample_flags & SAMPLE_CTX_ID) {
+               if (copy_to_user(buf, &data->ctx_id, 8))
+                       return -EFAULT;
+               buf += 8;
+       }
+
        if (sample_flags & SAMPLE_OA_REPORT) {
                if (copy_to_user(buf, data->report, report_size))
                        return -EFAULT;
@@ -903,12 +912,27 @@ static int append_oa_buffer_sample(struct 
i915_perf_stream *stream,
                                   char __user *buf, size_t count,
                                   size_t *offset, const u8 *report)
 {
+       struct drm_i915_private *dev_priv = stream->dev_priv;
        u32 sample_flags = stream->sample_flags;
        struct i915_perf_sample_data data = { 0 };
+       u32 *report32 = (u32 *)report;
 
        if (sample_flags & SAMPLE_OA_SOURCE)
                data.source = I915_PERF_SAMPLE_OA_SOURCE_OABUFFER;
 
+       if (sample_flags & SAMPLE_CTX_ID) {
+               if (INTEL_INFO(dev_priv)->gen < 8)
+                       data.ctx_id = 0;
+               else {
+                       /*
+                        * XXX: Just keep the lower 21 bits for now since I'm
+                        * not entirely sure if the HW touches any of the higher
+                        * bits in this field
+                        */
+                       data.ctx_id = report32[2] & 0x1fffff;
+               }
+       }
+
        if (sample_flags & SAMPLE_OA_REPORT)
                data.report = report;
 
@@ -1487,6 +1511,9 @@ static int append_cs_buffer_sample(struct 
i915_perf_stream *stream,
        if (sample_flags & SAMPLE_OA_SOURCE)
                data.source = I915_PERF_SAMPLE_OA_SOURCE_CS;
 
+       if (sample_flags & SAMPLE_CTX_ID)
+               data.ctx_id = node->ctx_id;
+
        return append_perf_sample(stream, buf, count, offset, &data);
 }
 
@@ -2665,6 +2692,19 @@ static int i915_perf_stream_init(struct i915_perf_stream 
*stream,
        struct intel_engine_cs *engine = NULL;
        int ret;
 
+       if ((props->sample_flags & SAMPLE_CTX_ID) && !props->cs_mode) {
+               if (IS_HASWELL(dev_priv)) {
+                       DRM_ERROR("On HSW, context ID sampling only supported "
+                                 "via command stream\n");
+                       return -EINVAL;
+               } else if (!i915.enable_execlists) {
+                       DRM_ERROR("On Gen8+ without execlists, context ID "
+                                 "sampling only supported via "
+                                 "command stream\n");
+                       return -EINVAL;
+               }
+       }
+
        /* We set up some ratelimit state to potentially throttle any _NOTES
         * about spurious, invalid OA reports which we don't forward to
         * userspace.
@@ -2794,6 +2834,12 @@ static int i915_perf_stream_init(struct i915_perf_stream 
*stream,
 
        }
 
+       if (props->sample_flags & SAMPLE_CTX_ID) {
+               stream->sample_flags |= SAMPLE_CTX_ID;
+               stream->sample_size += 8;
+               stream->last_ctx_id = INVALID_CTX_ID;
+       }
+
        if (props->cs_mode) {
                if (!cs_sample_data) {
                        DRM_DEBUG_DRIVER(
@@ -2803,6 +2849,13 @@ static int i915_perf_stream_init(struct i915_perf_stream 
*stream,
                        goto err_enable;
                }
 
+               if (!(props->sample_flags & SAMPLE_CTX_ID)) {
+                       DRM_ERROR("Stream engine given without requesting any "
+                                 "CS specific property\n");
+                       ret = -EINVAL;
+                       goto err_enable;
+               }
+
                idx = srcu_read_lock(&dev_priv->perf.oa.srcu);
                curr_stream = srcu_dereference(
                                        dev_priv->perf.oa.exclusive_stream,
@@ -3520,6 +3573,9 @@ static int read_properties_unlocked(struct 
drm_i915_private *dev_priv,
                                props->engine = engine;
                        }
                        break;
+               case DRM_I915_PERF_PROP_SAMPLE_CTX_ID:
+                       props->sample_flags |= SAMPLE_CTX_ID;
+                       break;
                case DRM_I915_PERF_PROP_MAX:
                        MISSING_CASE(id);
                        return -EINVAL;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 2e0b239..68baaf9 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1433,6 +1433,12 @@ enum drm_i915_perf_property_id {
         */
        DRM_I915_PERF_PROP_ENGINE,
 
+       /**
+        * The value of this property set to 1 requests inclusion of context ID
+        * in the perf sample data.
+        */
+       DRM_I915_PERF_PROP_SAMPLE_CTX_ID,
+
        DRM_I915_PERF_PROP_MAX /* non-ABI */
 };
 
@@ -1499,6 +1505,7 @@ enum drm_i915_perf_record_type {
         *     struct drm_i915_perf_record_header header;
         *
         *     { u64 source; } && DRM_I915_PERF_PROP_SAMPLE_OA_SOURCE
+        *     { u64 ctx_id; } && DRM_I915_PERF_PROP_SAMPLE_CTX_ID
         *     { u32 oa_report[]; } && DRM_I915_PERF_PROP_SAMPLE_OA
         * };
         */
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to