Combine our per-engine metrics to track begin/end of the class as a
whole and present that as culmulative runtime via PMU.

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahti...@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursu...@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c    |  18 +++
 drivers/gpu/drm/i915/gt/intel_engine_stats.h |  34 ++++++
 drivers/gpu/drm/i915/gt/intel_engine_types.h |   1 +
 drivers/gpu/drm/i915/gt/intel_engine_user.c  |   3 +
 drivers/gpu/drm/i915/gt/intel_gt.h           |   2 +
 drivers/gpu/drm/i915/gt/intel_gt_types.h     |  28 +++++
 drivers/gpu/drm/i915/i915_pmu.c              | 119 +++++++++++++++----
 include/uapi/drm/i915_drm.h                  |   3 +
 8 files changed, 185 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index b1b44afc94ba..3f71483bd6f5 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -314,6 +314,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum 
intel_engine_id id)
        engine->instance = info->instance;
        __sprint_engine_name(engine);
 
+       engine->class_stats = &gt->stats_class[info->class];
+
        engine->props.dma_latency_ns =
                CONFIG_DRM_I915_DMA_LATENCY;
        engine->props.heartbeat_interval_ms =
@@ -1789,6 +1791,22 @@ intel_engine_find_active_request(struct intel_engine_cs 
*engine)
        return active;
 }
 
+ktime_t intel_runtime_stats_get_busy_time(const struct intel_runtime_stats *st)
+{
+       unsigned int seq;
+       ktime_t total;
+
+       do {
+               seq = read_seqbegin(&st->lock);
+               total = st->total;
+               if (atomic_read(&st->active))
+                       total = ktime_add(total,
+                                         ktime_sub(ktime_get(), st->start));
+       } while (unlikely(read_seqretry(&st->lock, seq)));
+
+       return total;
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "mock_engine.c"
 #include "selftest_engine.c"
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_stats.h 
b/drivers/gpu/drm/i915/gt/intel_engine_stats.h
index 24fbdd94351a..b199137a977e 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_stats.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_stats.h
@@ -12,6 +12,36 @@
 
 #include "i915_gem.h" /* GEM_BUG_ON */
 #include "intel_engine.h"
+#include "intel_gt_types.h"
+
+static inline void intel_engine_class_start(struct intel_runtime_stats *st)
+{
+       unsigned long flags;
+
+       if (atomic_add_unless(&st->active, 1, 0))
+               return;
+
+       write_seqlock_irqsave(&st->lock, flags);
+       if (!atomic_read(&st->active))
+               st->start = ktime_get();
+       atomic_inc(&st->active);
+       write_sequnlock_irqrestore(&st->lock, flags);
+}
+
+static inline void intel_engine_class_stop(struct intel_runtime_stats *st)
+{
+       unsigned long flags;
+
+       if (atomic_add_unless(&st->active, -1, 1))
+               return;
+
+       write_seqlock_irqsave(&st->lock, flags);
+       if (atomic_read(&st->active) == 1)
+               st->total =
+                       ktime_add(st->total, ktime_sub(ktime_get(), st->start));
+       atomic_dec(&st->active);
+       write_sequnlock_irqrestore(&st->lock, flags);
+}
 
 static inline void intel_engine_context_in(struct intel_engine_cs *engine)
 {
@@ -22,6 +52,8 @@ static inline void intel_engine_context_in(struct 
intel_engine_cs *engine)
                return;
        }
 
+       intel_engine_class_start(engine->class_stats);
+
        /* The writer is serialised; but the pmu reader may be from hardirq */
        local_irq_save(flags);
        write_seqcount_begin(&engine->stats.lock);
@@ -55,6 +87,8 @@ static inline void intel_engine_context_out(struct 
intel_engine_cs *engine)
 
        write_seqcount_end(&engine->stats.lock);
        local_irq_restore(flags);
+
+       intel_engine_class_stop(engine->class_stats);
 }
 
 #endif /* __INTEL_ENGINE_STATS_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h 
b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 21e9f9de4844..ae415725cfde 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -499,6 +499,7 @@ struct intel_engine_cs {
                 */
                ktime_t rps;
        } stats;
+       struct intel_runtime_stats *class_stats;
 
        struct {
                unsigned long heartbeat_interval_ms;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c 
b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 6b5a4fdc14a0..24ac33d1b16e 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -210,6 +210,9 @@ void intel_engines_driver_register(struct drm_i915_private 
*i915)
                GEM_BUG_ON(engine->uabi_class >= ARRAY_SIZE(uabi_instances));
                engine->uabi_instance = uabi_instances[engine->uabi_class]++;
 
+               if (intel_engine_supports_stats(engine))
+                       engine->class_stats->enabled = true;
+
                /* Replace the internal name with the final user facing name */
                memcpy(old, engine->name, sizeof(engine->name));
                scnprintf(engine->name, sizeof(engine->name), "%s%u",
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h 
b/drivers/gpu/drm/i915/gt/intel_gt.h
index 9157c7411f60..f73f1d1d3754 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -77,4 +77,6 @@ static inline bool intel_gt_is_wedged(const struct intel_gt 
*gt)
 void intel_gt_info_print(const struct intel_gt_info *info,
                         struct drm_printer *p);
 
+ktime_t intel_runtime_stats_get_busy_time(const struct intel_runtime_stats 
*st);
+
 #endif /* __INTEL_GT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h 
b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index a83d3e18254d..3c12d4a3197e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -29,6 +29,32 @@ struct i915_ggtt;
 struct intel_engine_cs;
 struct intel_uncore;
 
+struct intel_runtime_stats {
+       atomic_t active;
+
+       /**
+        * @lock: Lock protecting the below fields.
+        */
+       seqlock_t lock;
+
+       /**
+        * @total: Total time this engine was busy.
+        *
+        * Accumulated time not counting the most recent block in cases
+        * where engine is currently busy (active > 0).
+        */
+       ktime_t total;
+
+       /**
+        * @start: Timestamp of the last idle to active transition.
+        *
+        * Idle is defined as active == 0, active is active > 0.
+        */
+       ktime_t start;
+
+       bool enabled;
+};
+
 struct intel_gt {
        struct drm_i915_private *i915;
        struct intel_uncore *uncore;
@@ -112,6 +138,8 @@ struct intel_gt {
                ktime_t start;
        } stats;
 
+       struct intel_runtime_stats stats_class[MAX_ENGINE_CLASS + 1];
+
        struct intel_engine_cs *engine[I915_NUM_ENGINES];
        struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1]
                                            [MAX_ENGINE_INSTANCE + 1];
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 2b88c0baa1bf..026999a82231 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -9,6 +9,7 @@
 #include "gt/intel_engine.h"
 #include "gt/intel_engine_pm.h"
 #include "gt/intel_engine_user.h"
+#include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
 #include "gt/intel_rc6.h"
 #include "gt/intel_rps.h"
@@ -506,13 +507,28 @@ static int engine_event_init(struct perf_event *event)
        struct drm_i915_private *i915 =
                container_of(event->pmu, typeof(*i915), pmu.base);
        struct intel_engine_cs *engine;
+       u8 class = engine_event_class(event);
+       u8 instance = engine_event_instance(event);
+       u8 sample = engine_event_sample(event);
 
-       engine = intel_engine_lookup_user(i915, engine_event_class(event),
-                                         engine_event_instance(event));
+       if (instance == 0xff) {
+               if (sample == 0)
+                       return 0;
+
+               if (class >= ARRAY_SIZE(i915->gt.stats_class))
+                       return -ENODEV;
+
+               if (!i915->gt.stats_class[class].enabled)
+                       return -ENODEV;
+
+               return -ENOENT;
+       }
+
+       engine = intel_engine_lookup_user(i915, class, instance);
        if (!engine)
                return -ENODEV;
 
-       return engine_event_status(engine, engine_event_sample(event));
+       return engine_event_status(engine, sample);
 }
 
 static int i915_pmu_event_init(struct perf_event *event)
@@ -557,6 +573,33 @@ static int i915_pmu_event_init(struct perf_event *event)
        return 0;
 }
 
+static u64 engine_event_read(struct perf_event *event)
+{
+       struct drm_i915_private *i915 =
+               container_of(event->pmu, typeof(*i915), pmu.base);
+       u8 sample = engine_event_sample(event);
+       u8 class = engine_event_class(event);
+       u8 instance = engine_event_instance(event);
+       struct intel_engine_cs *engine;
+       ktime_t unused;
+
+       if (instance == 0xff) {
+               struct intel_runtime_stats *st = &i915->gt.stats_class[class];
+
+               return intel_runtime_stats_get_busy_time(st);
+       }
+
+       engine = intel_engine_lookup_user(i915, class, instance);
+       if (GEM_WARN_ON(!engine))
+               return 0; /* Do nothing */
+       else if (sample == I915_SAMPLE_BUSY &&
+                intel_engine_supports_stats(engine))
+               return ktime_to_ns(intel_engine_get_busy_time(engine,
+                                                             &unused));
+       else
+               return engine->pmu.sample[sample].cur;
+}
+
 static u64 __i915_pmu_event_read(struct perf_event *event)
 {
        struct drm_i915_private *i915 =
@@ -565,24 +608,7 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
        u64 val = 0;
 
        if (is_engine_event(event)) {
-               u8 sample = engine_event_sample(event);
-               struct intel_engine_cs *engine;
-
-               engine = intel_engine_lookup_user(i915,
-                                                 engine_event_class(event),
-                                                 engine_event_instance(event));
-
-               if (drm_WARN_ON_ONCE(&i915->drm, !engine)) {
-                       /* Do nothing */
-               } else if (sample == I915_SAMPLE_BUSY &&
-                          intel_engine_supports_stats(engine)) {
-                       ktime_t unused;
-
-                       val = ktime_to_ns(intel_engine_get_busy_time(engine,
-                                                                    &unused));
-               } else {
-                       val = engine->pmu.sample[sample].cur;
-               }
+               val = engine_event_read(event);
        } else {
                switch (event->attr.config) {
                case I915_PMU_ACTUAL_FREQUENCY:
@@ -666,7 +692,7 @@ static void i915_pmu_enable(struct perf_event *event)
         * For per-engine events the bitmask and reference counting
         * is stored per engine.
         */
-       if (is_engine_event(event)) {
+       if (is_engine_event(event) && engine_event_instance(event) != 0xff) {
                u8 sample = engine_event_sample(event);
                struct intel_engine_cs *engine;
 
@@ -710,7 +736,7 @@ static void i915_pmu_disable(struct perf_event *event)
 
        spin_lock_irqsave(&pmu->lock, flags);
 
-       if (is_engine_event(event)) {
+       if (is_engine_event(event) && engine_event_instance(event) != 0xff) {
                u8 sample = engine_event_sample(event);
                struct intel_engine_cs *engine;
 
@@ -891,6 +917,18 @@ add_pmu_attr(struct perf_pmu_events_attr *attr, const char 
*name,
        return ++attr;
 }
 
+static const char *class_repr(u8 class)
+{
+       switch (class) {
+       case RENDER_CLASS: return "render";
+       case VIDEO_DECODE_CLASS: return "video-decode";
+       case VIDEO_ENHANCEMENT_CLASS: return "video-enhance";
+       case COPY_ENGINE_CLASS: return "copy";
+       }
+
+       return NULL;
+}
+
 static struct attribute **
 create_event_attributes(struct i915_pmu *pmu)
 {
@@ -919,6 +957,8 @@ create_event_attributes(struct i915_pmu *pmu)
        struct i915_ext_attribute *i915_attr = NULL, *i915_iter;
        struct attribute **attr = NULL, **attr_iter;
        struct intel_engine_cs *engine;
+       unsigned long class_enable = 0;
+       unsigned long class_disable = 0;
        unsigned int i;
 
        /* Count how many counters we will be exposing. */
@@ -928,6 +968,11 @@ create_event_attributes(struct i915_pmu *pmu)
        }
 
        for_each_uabi_engine(engine, i915) {
+               if (intel_engine_supports_stats(engine))
+                       class_enable |= BIT(engine->class);
+               else
+                       class_disable |= BIT(engine->class);
+
                for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
                        if (!engine_event_status(engine,
                                                 engine_events[i].sample))
@@ -935,6 +980,9 @@ create_event_attributes(struct i915_pmu *pmu)
                }
        }
 
+       class_enable &= ~class_disable;
+       count += hweight_long(class_enable);
+
        /* Allocate attribute objects and table. */
        i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL);
        if (!i915_attr)
@@ -1008,6 +1056,31 @@ create_event_attributes(struct i915_pmu *pmu)
                }
        }
 
+       for_each_set_bit(i, &class_enable, BITS_PER_LONG) {
+               const char *name;
+               char *str;
+
+               name = class_repr(i);
+               if (GEM_WARN_ON(!name))
+                       continue;
+
+               str = kasprintf(GFP_KERNEL, "%s-%s", name, "busy");
+               if (!str)
+                       goto err;
+
+               *attr_iter++ = &i915_iter->attr.attr;
+               i915_iter =
+                       add_i915_attr(i915_iter, str,
+                                     __I915_PMU_ENGINE(i, 0xff, 0));
+
+               str = kasprintf(GFP_KERNEL, "%s.unit", str);
+               if (!str)
+                       goto err;
+
+               *attr_iter++ = &pmu_iter->attr.attr;
+               pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
+       }
+
        pmu->i915_attr = i915_attr;
        pmu->pmu_attr = pmu_attr;
 
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 1987e2ea79a3..3aacdf756e90 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -171,6 +171,9 @@ enum drm_i915_pmu_engine_sample {
 #define I915_PMU_ENGINE_SEMA(class, instance) \
        __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
 
+#define I915_PMU_CLASS_BUSY(class) \
+       __I915_PMU_ENGINE(class, 0xff, 0)
+
 #define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
 
 #define I915_PMU_ACTUAL_FREQUENCY      __I915_PMU_OTHER(0)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to