> >> diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c > >> b/src/mesa/drivers/dri/i965/brw_performance_query.c > >> index 98666759d75..7d5b44cf61d 100644 > >> --- a/src/mesa/drivers/dri/i965/brw_performance_query.c > >> +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c > >> @@ -227,6 +227,8 @@ brw_perf_query(struct gl_perf_query_object *o) > >> > >> #define MI_RPC_BO_SIZE 4096 > >> #define MI_RPC_BO_END_OFFSET_BYTES (MI_RPC_BO_SIZE / 2) > >> +#define MI_FREQ_START_OFFSET_BYTES (3072) > >> +#define MI_FREQ_END_OFFSET_BYTES (3076) > > Why these? > > That's where I store the RPSTAT copy (before/after the workload).
Yeah, but I mean...why 3072? Is it some arbtirary number? > >> > >> /******************************************************************************/ > >> > >> @@ -1150,6 +1152,9 @@ brw_begin_perf_query(struct gl_context *ctx, > >> /* Take a starting OA counter snapshot. */ > >> brw->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo, 0, > >> obj->oa.begin_report_id); > >> + brw_store_register_mem32(brw, obj->oa.bo, GEN6_RPSTAT1, > >> + MI_FREQ_START_OFFSET_BYTES); > >> + > >> ++brw->perfquery.n_active_oa_queries; > >> > >> /* No already-buffered samples can possibly be associated with > >> this query > >> @@ -1233,6 +1238,8 @@ brw_end_perf_query(struct gl_context *ctx, > >> */ > >> if (!obj->oa.results_accumulated) { > >> /* Take an ending OA counter snapshot. */ > >> + brw_store_register_mem32(brw, obj->oa.bo, GEN6_RPSTAT1, > >> + MI_FREQ_END_OFFSET_BYTES); > >> brw->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo, > >> MI_RPC_BO_END_OFFSET_BYTES, > >> obj->oa.begin_report_id + > >> 1); > >> @@ -1333,6 +1340,43 @@ brw_is_perf_query_ready(struct gl_context *ctx, > >> return false; > >> } > >> > >> +static void > >> +read_gt_frequency(struct brw_context *brw, > >> + struct brw_perf_query_object *obj) > >> +{ > >> + const struct gen_device_info *devinfo = &brw->screen->devinfo; > >> + uint32_t *start_reg = obj->oa.map + MI_FREQ_START_OFFSET_BYTES, > >> + *end_reg = obj->oa.map + MI_FREQ_END_OFFSET_BYTES; > >> + > >> + switch (devinfo->gen) { > >> + case 7: > >> + case 8: > >> + obj->oa.gt_frequency[0] = > >> + ((start_reg[0] & GEN6_RPSTAT1_CURR_GT_FREQ_MASK) >> > >> + GEN6_RPSTAT1_CURR_GT_FREQ_SHIFT) * 50ULL; > > You can just do: > > > > GET_FIELD(start_reg[0], GEN6_RPSTAT1_CURR_GT_FREQ) > > > > instead of shifting and masking. > > > > I think your conversions may be wrong. In particular, you don't handle > > Gen9LP and Gen9 differently, while in the kernel, GT_PM_INTERVAL_TO_US > > does: > > > > Gen9 LP: 0.833 -> usec > > Gen9+ non-LP: 1.33 -> usec > > other: 1.28 -> usec > > > > #define INTERVAL_1_28_TO_US(interval) (((interval) << 7) / 100) > > #define INTERVAL_1_33_TO_US(interval) (((interval) << 2) / 3) > > #define INTERVAL_0_833_TO_US(interval) (((interval) * 5) / 6) > > #define GT_PM_INTERVAL_TO_US(dev_priv, interval) (INTEL_GEN(dev_priv) >= 9 > > ? \ > > (IS_GEN9_LP(dev_priv) ? \ > > INTERVAL_0_833_TO_US(interval) : \ > > INTERVAL_1_33_TO_US(interval)) : \ > > INTERVAL_1_28_TO_US(interval)) > > > > I could be mistaken, though. > > Actually the kernel reads rpstat1 already and computes the frequency value. > I think the current code is equivalent to what the kernel does on big > cores & small cores >= gen9. So...we need to multiply by 50, but don't need to convert to usec? Otherwise I'm struggling to see how this is equivalent. > On cherryview/valleyview, we need to read another register to figure out > the multipliers... > > So I'll just leave it out for those small cores gens for now. That seems reasonable...
signature.asc
Description: This is a digitally signed message part.
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev