From: Tvrtko Ursulin <tvrtko.ursu...@intel.com> We can use engine busy stats instead of the MMIO sampling timer for better efficiency.
As minimum this saves period * num_engines / sec mmio reads, and in a better case, when only engine busy samplers are active, it enables us to not kick off the sampling timer at all. v2: Rebase. v3: * Rebase, comments. * Leave engine busyness controls out of workers. Signed-off-by: Tvrtko Ursulin <tvrtko.ursu...@intel.com> --- drivers/gpu/drm/i915/i915_pmu.c | 36 ++++++++++++++++++++++++++++++--- drivers/gpu/drm/i915/intel_ringbuffer.h | 4 ++++ 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 26e735f27282..f8a6195c17f1 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -90,6 +90,11 @@ static unsigned int event_enabled_bit(struct perf_event *event) return config_enabled_bit(event->attr.config); } +static bool supports_busy_stats(void) +{ + return i915.enable_execlists; +} + static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active) { u64 enable = i915->pmu.enable; @@ -100,6 +105,8 @@ static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active) if (!gpu_active) enable &= ~ENGINE_SAMPLE_MASK; + else if (supports_busy_stats()) + enable &= ~BIT(I915_SAMPLE_BUSY); return enable; } @@ -163,7 +170,8 @@ static void engines_sample(struct drm_i915_private *dev_priv) if (enable & BIT(I915_SAMPLE_QUEUED)) engine->pmu.sample[I915_SAMPLE_QUEUED] += PERIOD; - if (enable & BIT(I915_SAMPLE_BUSY)) { + if ((enable & BIT(I915_SAMPLE_BUSY)) && + !engine->pmu.busy_stats) { u32 val; fw = grab_forcewake(dev_priv, fw); @@ -342,6 +350,9 @@ static u64 __i915_pmu_event_read(struct perf_event *event) if (WARN_ON_ONCE(!engine)) { /* Do nothing */ + } else if (sample == I915_SAMPLE_BUSY && + engine->pmu.busy_stats) { + val = ktime_to_ns(intel_engine_get_busy_time(engine)); } else { val = engine->pmu.sample[sample]; } @@ -385,6 +396,12 @@ static void i915_pmu_event_read(struct perf_event *event) local64_read(&event->hw.prev_count)); } +static bool engine_needs_busy_stats(struct intel_engine_cs *engine) +{ + return supports_busy_stats() && + (engine->pmu.enable & BIT(I915_SAMPLE_BUSY)); +} + static void i915_pmu_enable(struct perf_event *event) { struct drm_i915_private *i915 = @@ -429,7 +446,14 @@ static void i915_pmu_enable(struct perf_event *event) GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0); - engine->pmu.enable_count[sample]++; + if (engine->pmu.enable_count[sample]++ == 0) { + if (engine_needs_busy_stats(engine) && + !engine->pmu.busy_stats) { + engine->pmu.busy_stats = + intel_enable_engine_stats(engine) == 0; + WARN_ON_ONCE(!engine->pmu.busy_stats); + } + } } /* @@ -465,8 +489,14 @@ static void i915_pmu_disable(struct perf_event *event) * Decrement the reference count and clear the enabled * bitmask when the last listener on an event goes away. */ - if (--engine->pmu.enable_count[sample] == 0) + if (--engine->pmu.enable_count[sample] == 0) { engine->pmu.enable &= ~BIT(sample); + if (!engine_needs_busy_stats(engine) && + engine->pmu.busy_stats) { + engine->pmu.busy_stats = false; + intel_disable_engine_stats(engine); + } + } } GEM_BUG_ON(bit >= I915_PMU_MASK_BITS); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index f618c5f98edf..fe554fc76867 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -265,6 +265,10 @@ struct intel_engine_cs { * Our internal timer stores the current counter in this field. */ u64 sample[I915_ENGINE_SAMPLE_MAX]; + /** + * @busy_stats: Has enablement of engine stats tracking been requested. + */ + bool busy_stats; } pmu; /* -- 2.9.5 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx