From: Sourab Gupta <sourab.gu...@intel.com>

The periodic reports and the RCS based reports are collected in two
separate buffers. While forwarding to userspace, these have to be sent to
single perf event ringbuffer. From a userspace perspective, it is good to
have the reports in the single buffer in order to their timestamps.
This patch addresses this problem by forwarding the periodic OA reports
with a lower timestamp, whenever we are forwarding the Command streamer
based report.

Signed-off-by: Sourab Gupta <sourab.gu...@intel.com>
---
 drivers/gpu/drm/i915/i915_oa_perf.c | 38 ++++++++++++++++++++++---------------
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c 
b/drivers/gpu/drm/i915/i915_oa_perf.c
index a4fdca3..491496b 100644
--- a/drivers/gpu/drm/i915/i915_oa_perf.c
+++ b/drivers/gpu/drm/i915/i915_oa_perf.c
@@ -48,8 +48,7 @@ static void forward_one_oa_snapshot_to_event(struct 
drm_i915_private *dev_priv,
 }
 
 static u32 forward_oa_snapshots(struct drm_i915_private *dev_priv,
-                               u32 head,
-                               u32 tail)
+                               u32 head, u32 tail, u64 gpu_ts)
 {
        struct perf_event *exclusive_event = dev_priv->oa_pmu.exclusive_event;
        int snapshot_size = dev_priv->oa_pmu.oa_buffer.format_size;
@@ -58,14 +57,6 @@ static u32 forward_oa_snapshots(struct drm_i915_private 
*dev_priv,
        u8 *snapshot;
        u32 taken;
 
-       /*
-        * Schedule a worker to forward the RCS based OA reports collected.
-        * A worker is needed since it requires device mutex to be taken
-        * which can't be done here because of atomic context
-        */
-       if (dev_priv->oa_pmu.multiple_ctx_mode)
-               schedule_work(&dev_priv->oa_pmu.work_timer);
-
        head -= dev_priv->oa_pmu.oa_buffer.gtt_offset;
        tail -= dev_priv->oa_pmu.oa_buffer.gtt_offset;
 
@@ -75,12 +66,19 @@ static u32 forward_oa_snapshots(struct drm_i915_private 
*dev_priv,
         */
 
        while ((taken = OA_TAKEN(tail, head))) {
+               u64 snapshot_ts;
+
                /* The tail increases in 64 byte increments, not in
                 * format_size steps. */
                if (taken < snapshot_size)
                        break;
 
                snapshot = oa_buf_base + (head & mask);
+
+               snapshot_ts = *(u64 *)(snapshot + 4);
+               if (snapshot_ts > gpu_ts)
+                       break;
+
                head += snapshot_size;
 
                /* We currently only allow exclusive access to the counters
@@ -122,7 +120,7 @@ static void log_oa_status(struct drm_i915_private *dev_priv,
 }
 
 static void flush_oa_snapshots(struct drm_i915_private *dev_priv,
-                              bool skip_if_flushing)
+                              bool skip_if_flushing, u64 gpu_ts)
 {
        unsigned long flags;
        u32 oastatus2;
@@ -165,7 +163,7 @@ static void flush_oa_snapshots(struct drm_i915_private 
*dev_priv,
                             GEN7_OASTATUS1_REPORT_LOST));
        }
 
-       head = forward_oa_snapshots(dev_priv, head, tail);
+       head = forward_oa_snapshots(dev_priv, head, tail, gpu_ts);
 
        I915_WRITE(GEN7_OASTATUS2, (head & GEN7_OASTATUS2_HEAD_MASK) |
                                    GEN7_OASTATUS2_GGTT);
@@ -215,6 +213,7 @@ static void forward_one_oa_rcs_sample(struct 
drm_i915_private *dev_priv,
        u8 *snapshot;
        struct drm_i915_oa_node_ctx_id *ctx_info;
        struct perf_raw_record raw;
+       u64 snapshot_ts;
 
        format_size = dev_priv->oa_pmu.oa_rcs_buffer.format_size;
        snapshot_size = format_size + sizeof(*ctx_info);
@@ -223,6 +222,10 @@ static void forward_one_oa_rcs_sample(struct 
drm_i915_private *dev_priv,
        ctx_info = (struct drm_i915_oa_node_ctx_id *)(snapshot + format_size);
        ctx_info->ctx_id = node->ctx_id;
 
+       /* Flush the periodic snapshots till the ts of this OA report */
+       snapshot_ts = *(u64 *)(snapshot + 4);
+       flush_oa_snapshots(dev_priv, true, snapshot_ts);
+
        perf_sample_data_init(&data, 0, event->hw.last_period);
 
        /* Note: the combined u32 raw->size member + raw data itself must be 8
@@ -502,7 +505,10 @@ static enum hrtimer_restart hrtimer_sample(struct hrtimer 
*hrtimer)
        struct drm_i915_private *i915 =
                container_of(hrtimer, typeof(*i915), oa_pmu.timer);
 
-       flush_oa_snapshots(i915, true);
+       if (i915->oa_pmu.multiple_ctx_mode)
+               schedule_work(&i915->oa_pmu.work_timer);
+       else
+               flush_oa_snapshots(i915, true, U64_MAX);
 
        hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD));
        return HRTIMER_RESTART;
@@ -931,7 +937,9 @@ static void i915_oa_event_stop(struct perf_event *event, 
int flags)
 
        if (event->attr.sample_period) {
                hrtimer_cancel(&dev_priv->oa_pmu.timer);
-               flush_oa_snapshots(dev_priv, false);
+               if (dev_priv->oa_pmu.multiple_ctx_mode)
+                       schedule_work(&dev_priv->oa_pmu.work_timer);
+               flush_oa_snapshots(dev_priv, false, U64_MAX);
        }
 
        event->hw.state = PERF_HES_STOPPED;
@@ -971,7 +979,7 @@ static int i915_oa_event_flush(struct perf_event *event)
                        if (ret)
                                return ret;
                }
-               flush_oa_snapshots(i915, true);
+               flush_oa_snapshots(i915, true, U64_MAX);
        }
 
        return 0;
-- 
1.8.5.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to