On Wed, 24 May 2023 04:38:18 -0700, Tvrtko Ursulin wrote:
>

Hi Tvrtko,

> On 23/05/2023 16:19, Ashutosh Dixit wrote:
> > No functional changes but we can remove some unsightly index computation
> > and read/write functions if we convert the PMU sample array from a
> > one-dimensional to a two-dimensional array.
> >
> > Suggested-by: Tvrtko Ursulin <tvrtko.ursu...@linux.intel.com>
> > Signed-off-by: Ashutosh Dixit <ashutosh.di...@intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_pmu.c | 60 ++++++++++-----------------------
> >   drivers/gpu/drm/i915/i915_pmu.h |  2 +-
> >   2 files changed, 19 insertions(+), 43 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_pmu.c 
> > b/drivers/gpu/drm/i915/i915_pmu.c
> > index b47d890d4ada1..137e0df9573ee 100644
> > --- a/drivers/gpu/drm/i915/i915_pmu.c
> > +++ b/drivers/gpu/drm/i915/i915_pmu.c
> > @@ -195,33 +195,6 @@ static inline s64 ktime_since_raw(const ktime_t kt)
> >     return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
> >   }
> >   -static unsigned int
> > -__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
> > -{
> > -   unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
> > -
> > -   GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
> > -
> > -   return idx;
> > -}
> > -
> > -static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int 
> > sample)
> > -{
> > -   return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
> > -}
> > -
> > -static void
> > -store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
> > -{
> > -   pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
> > -}
> > -
> > -static void
> > -add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 
> > val, u32 mul)
> > -{
> > -   pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, 
> > mul);
> > -}
>
> IMO read and store helpers could have stayed and just changed the
> implementation. Like add_sample_mult which you just moved. I would have
> been a smaller patch. So dunno.. a bit of a reluctant r-b.

Are you referring just to add_sample_mult or to all the other functions
too? add_sample_mult I moved it to where it was before bc4be0a38b63
("drm/i915/pmu: Prepare for multi-tile non-engine counters"), could have
left it here I guess.

The other read and store helpers are not needed with the 2-d array at all
since the compiler itself will do that, so I thought it was better to get
rid of them completely.

Let me know if you want any changes, otherwise I will leave as is.

> Reviewed-by: Tvrtko Ursulin <tvrtko.ursu...@intel.com>

Thanks for the review. Thanks Andrzej too :)
--
Ashutosh

> > -
> >   static u64 get_rc6(struct intel_gt *gt)
> >   {
> >     struct drm_i915_private *i915 = gt->i915;
> > @@ -240,7 +213,7 @@ static u64 get_rc6(struct intel_gt *gt)
> >     spin_lock_irqsave(&pmu->lock, flags);
> >             if (awake) {
> > -           store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
> > +           pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
> >     } else {
> >             /*
> >              * We think we are runtime suspended.
> > @@ -250,13 +223,13 @@ static u64 get_rc6(struct intel_gt *gt)
> >              * counter value.
> >              */
> >             val = ktime_since_raw(pmu->sleep_last[gt_id]);
> > -           val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
> > +           val += pmu->sample[gt_id][__I915_SAMPLE_RC6].cur;
> >     }
> >   - if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED))
> > -           val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED);
> > +   if (val < pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur)
> > +           val = pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur;
> >     else
> > -           store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val);
> > +           pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
> >             spin_unlock_irqrestore(&pmu->lock, flags);
> >   @@ -275,9 +248,8 @@ static void init_rc6(struct i915_pmu *pmu)
> >             with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
> >                     u64 val = __get_rc6(gt);
> >   -                 store_sample(pmu, i, __I915_SAMPLE_RC6, val);
> > -                   store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED,
> > -                                val);
> > +                   pmu->sample[i][__I915_SAMPLE_RC6].cur = val;
> > +                   pmu->sample[i][__I915_SAMPLE_RC6_LAST_REPORTED].cur = 
> > val;
> >                     pmu->sleep_last[i] = ktime_get_raw();
> >             }
> >     }
> > @@ -287,7 +259,7 @@ static void park_rc6(struct intel_gt *gt)
> >   {
> >     struct i915_pmu *pmu = &gt->i915->pmu;
> >   - store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt));
> > +   pmu->sample[gt->info.id][__I915_SAMPLE_RC6].cur = __get_rc6(gt);
> >     pmu->sleep_last[gt->info.id] = ktime_get_raw();
> >   }
> >   @@ -428,6 +400,12 @@ engines_sample(struct intel_gt *gt, unsigned int
> > period_ns)
> >     }
> >   }
> >   +static void
> > +add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
> > +{
> > +   sample->cur += mul_u32_u32(val, mul);
> > +}
> > +
> >   static bool
> >   frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt)
> >   {
> > @@ -467,12 +445,12 @@ frequency_sample(struct intel_gt *gt, unsigned int 
> > period_ns)
> >             if (!val)
> >                     val = intel_gpu_freq(rps, rps->cur_freq);
> >   -         add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT,
> > +           add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT],
> >                             val, period_ns / 1000);
> >     }
> >             if (pmu->enable &
> > config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) {
> > -           add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ,
> > +           add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ],
> >                             intel_rps_get_requested_frequency(rps),
> >                             period_ns / 1000);
> >     }
> > @@ -673,14 +651,12 @@ static u64 __i915_pmu_event_read(struct perf_event 
> > *event)
> >             switch (config) {
> >             case I915_PMU_ACTUAL_FREQUENCY:
> >                     val =
> > -                      div_u64(read_sample(pmu, gt_id,
> > -                                          __I915_SAMPLE_FREQ_ACT),
> > +                      
> > div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT].cur,
> >                                USEC_PER_SEC /* to MHz */);
> >                     break;
> >             case I915_PMU_REQUESTED_FREQUENCY:
> >                     val =
> > -                      div_u64(read_sample(pmu, gt_id,
> > -                                          __I915_SAMPLE_FREQ_REQ),
> > +                      
> > div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ].cur,
> >                                USEC_PER_SEC /* to MHz */);
> >                     break;
> >             case I915_PMU_INTERRUPTS:
> > diff --git a/drivers/gpu/drm/i915/i915_pmu.h 
> > b/drivers/gpu/drm/i915/i915_pmu.h
> > index 33d80fbaab8bc..d20592e7db999 100644
> > --- a/drivers/gpu/drm/i915/i915_pmu.h
> > +++ b/drivers/gpu/drm/i915/i915_pmu.h
> > @@ -127,7 +127,7 @@ struct i915_pmu {
> >      * Only global counters are held here, while the per-engine ones are in
> >      * struct intel_engine_cs.
> >      */
> > -   struct i915_pmu_sample sample[I915_PMU_MAX_GTS * 
> > __I915_NUM_PMU_SAMPLERS];
> > +   struct i915_pmu_sample 
> > sample[I915_PMU_MAX_GTS][__I915_NUM_PMU_SAMPLERS];
> >     /**
> >      * @sleep_last: Last time GT parked for RC6 estimation.
> >      */

Reply via email to