While you're at it, would you mind making Vulkan use this as well? It should be a 2-line change to GetPhysicalDeviceProperties.
On Fri, Jan 6, 2017 at 1:17 PM, Kenneth Graunke <kenn...@whitecape.org> wrote: > From: Robert Bragg <rob...@sixbynine.org> > > v2: (Ken) Update timebase_scale for platforms past Skylake/Broxton too. > --- > src/intel/common/gen_device_info.c | 13 ++++++-- > src/intel/common/gen_device_info.h | 24 ++++++++++++++ > src/mesa/drivers/dri/i965/brw_context.c | 15 +++++++++ > src/mesa/drivers/dri/i965/brw_context.h | 3 ++ > src/mesa/drivers/dri/i965/brw_queryobj.c | 53 > ++++++++++++++++++++++++++++--- > src/mesa/drivers/dri/i965/gen6_queryobj.c | 28 +++++----------- > 6 files changed, 109 insertions(+), 27 deletions(-) > > diff --git a/src/intel/common/gen_device_info.c > b/src/intel/common/gen_device_info.c > index 9bf3cd5cc42..209b293e510 100644 > --- a/src/intel/common/gen_device_info.c > +++ b/src/intel/common/gen_device_info.c > @@ -36,6 +36,7 @@ static const struct gen_device_info gen_device_info_i965 > = { > .urb = { > .size = 256, > }, > + .timebase_scale = 80, > }; > > static const struct gen_device_info gen_device_info_g4x = { > @@ -51,6 +52,7 @@ static const struct gen_device_info gen_device_info_g4x > = { > .urb = { > .size = 384, > }, > + .timebase_scale = 80, > }; > > static const struct gen_device_info gen_device_info_ilk = { > @@ -65,6 +67,7 @@ static const struct gen_device_info gen_device_info_ilk > = { > .urb = { > .size = 1024, > }, > + .timebase_scale = 80, > }; > > static const struct gen_device_info gen_device_info_snb_gt1 = { > @@ -89,6 +92,7 @@ static const struct gen_device_info > gen_device_info_snb_gt1 = { > [MESA_SHADER_GEOMETRY] = 256, > }, > }, > + .timebase_scale = 80, > }; > > static const struct gen_device_info gen_device_info_snb_gt2 = { > @@ -113,6 +117,7 @@ static const struct gen_device_info > gen_device_info_snb_gt2 = { > [MESA_SHADER_GEOMETRY] = 256, > }, > }, > + .timebase_scale = 80, > }; > > #define GEN7_FEATURES \ > @@ -121,7 +126,8 @@ static const struct gen_device_info > gen_device_info_snb_gt2 = { > .must_use_separate_stencil = true, \ > .has_llc = true, \ > .has_pln = true, \ > - .has_surface_tile_offset = true > + .has_surface_tile_offset = true, \ > + .timebase_scale = 80 > > static const struct gen_device_info gen_device_info_ivb_gt1 = { > GEN7_FEATURES, .is_ivybridge = true, .gt = 1, > @@ -287,7 +293,8 @@ static const struct gen_device_info > gen_device_info_hsw_gt3 = { > .max_tcs_threads = 504, \ > .max_tes_threads = 504, \ > .max_gs_threads = 504, \ > - .max_wm_threads = 384 > + .max_wm_threads = 384, \ > + .timebase_scale = 80 > > static const struct gen_device_info gen_device_info_bdw_gt1 = { > GEN8_FEATURES, .gt = 1, > @@ -385,6 +392,7 @@ static const struct gen_device_info > gen_device_info_chv = { > .max_tcs_threads = 336, \ > .max_tes_threads = 336, \ > .max_cs_threads = 56, \ > + .timebase_scale = 1000000000.0 / 12000000.0, \ > .urb = { \ > .size = 384, \ > .min_entries = { \ > @@ -410,6 +418,7 @@ static const struct gen_device_info > gen_device_info_chv = { > .max_tes_threads = 112, \ > .max_gs_threads = 112, \ > .max_cs_threads = 6 * 6, \ > + .timebase_scale = 1000000000.0 / 19200123.0, \ > .urb = { \ > .size = 192, \ > .min_entries = { \ > diff --git a/src/intel/common/gen_device_info.h > b/src/intel/common/gen_device_info.h > index f0e8750d0ea..80676d0e003 100644 > --- a/src/intel/common/gen_device_info.h > +++ b/src/intel/common/gen_device_info.h > @@ -147,6 +147,30 @@ struct gen_device_info > */ > unsigned max_entries[4]; > } urb; > + > + /** > + * For the longest time the timestamp frequency for Gen's timestamp > counter > + * could be assumed to be 12.5MHz, where the least significant bit > neatly > + * corresponded to 80 nanoseconds. > + * > + * Since Gen9 the numbers aren't so round, with a a frequency of 12MHz > for > + * SKL (or scale factor of 83.33333333) and a frequency of 19200123Hz > for > + * BXT. > + * > + * For simplicty to fit with the current code scaling by a single > constant > + * to map from raw timestamps to nanoseconds we now do the conversion > in > + * floating point instead of integer arithmetic. > + * > + * In general it's probably worth noting that the documented constants > we > + * have for the per-platform timestamp frequencies aren't perfect and > + * shouldn't be trusted for scaling and comparing timestamps with a > large > + * delta. > + * > + * E.g. with crude testing on my system using the 'correct' scale > factor I'm > + * seeing a drift of ~2 milliseconds per second. > + */ > + double timebase_scale; > + > /** @} */ > }; > > diff --git a/src/mesa/drivers/dri/i965/brw_context.c > b/src/mesa/drivers/dri/i965/brw_context.c > index f939463539c..b071d08e95d 100644 > --- a/src/mesa/drivers/dri/i965/brw_context.c > +++ b/src/mesa/drivers/dri/i965/brw_context.c > @@ -524,6 +524,21 @@ brw_initialize_context_constants(struct brw_context > *brw) > ctx->Const.MaxCombinedShaderOutputResources = > MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS; > > + /* The timestamp register we can read for glGetTimestamp() is > + * sometimes only 32 bits, before scaling to nanoseconds (depending > + * on kernel). > + * > + * Once scaled to nanoseconds the timestamp would roll over at a > + * non-power-of-two, so an application couldn't use > + * GL_QUERY_COUNTER_BITS to handle rollover correctly. Instead, we > + * report 36 bits and truncate at that (rolling over 5 times as > + * often as the HW counter), and when the 32-bit counter rolls > + * over, it happens to also be at a rollover in the reported value > + * from near (1<<36) to 0. > + * > + * The low 32 bits rolls over in ~343 seconds. Our 36-bit result > + * rolls over every ~69 seconds. > + */ > ctx->Const.QueryCounterBits.Timestamp = 36; > > ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */ > diff --git a/src/mesa/drivers/dri/i965/brw_context.h > b/src/mesa/drivers/dri/i965/brw_context.h > index ff3f861a147..b6ac1a2e67f 100644 > --- a/src/mesa/drivers/dri/i965/brw_context.h > +++ b/src/mesa/drivers/dri/i965/brw_context.h > @@ -1300,6 +1300,9 @@ void brw_emit_query_begin(struct brw_context *brw); > void brw_emit_query_end(struct brw_context *brw); > void brw_query_counter(struct gl_context *ctx, struct gl_query_object *q); > bool brw_is_query_pipelined(struct brw_query_object *query); > +uint64_t brw_timebase_scale(struct brw_context *brw, uint64_t > gpu_timestamp); > +uint64_t brw_raw_timestamp_delta(struct brw_context *brw, > + uint64_t time0, uint64_t time1); > > /** gen6_queryobj.c */ > void gen6_init_queryobj_functions(struct dd_function_table *functions); > diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c > b/src/mesa/drivers/dri/i965/brw_queryobj.c > index dda17de7157..ebd5776d0aa 100644 > --- a/src/mesa/drivers/dri/i965/brw_queryobj.c > +++ b/src/mesa/drivers/dri/i965/brw_queryobj.c > @@ -42,6 +42,37 @@ > #include "brw_state.h" > #include "intel_batchbuffer.h" > > +uint64_t > +brw_timebase_scale(struct brw_context *brw, uint64_t gpu_timestamp) > +{ > + const struct gen_device_info *devinfo = &brw->screen->devinfo; > + > + return (double)gpu_timestamp * devinfo->timebase_scale; > +} > + > +/* As best we know currently, the Gen HW timestamps are 36bits across > + * all platforms, which we need to account for when calculating a > + * delta to measure elapsed time. > + * > + * The timestamps read via glGetTimestamp() / brw_get_timestamp() > sometimes > + * only have 32bits due to a kernel bug and so in that case we make sure > to > + * treat all raw timestamps as 32bits so they overflow consistently and > remain > + * comparable. > + */ > +uint64_t > +brw_raw_timestamp_delta(struct brw_context *brw, uint64_t time0, > uint64_t time1) > +{ > + if (brw->screen->hw_has_timestamp == 2) { > + /* Kernel clips timestamps to 32bits in this case */ > + return (uint32_t)time1 - (uint32_t)time0; > + } else { > + if (time0 > time1) > + return (1ULL << 36) + time1 - time0; > + else > + return time1 - time0; > + } > +} > + > /** > * Emit PIPE_CONTROLs to write the current GPU timestamp into a buffer. > */ > @@ -117,12 +148,18 @@ brw_queryobj_get_results(struct gl_context *ctx, > /* The query BO contains the starting and ending timestamps. > * Subtract the two and convert to nanoseconds. > */ > - query->Base.Result += 1000 * ((results[1] >> 32) - (results[0] >> > 32)); > + query->Base.Result = brw_raw_timestamp_delta(brw, results[0], > results[1]); > + query->Base.Result = brw_timebase_scale(brw, query->Base.Result); > break; > > case GL_TIMESTAMP: > /* The query BO contains a single timestamp value in results[0]. */ > - query->Base.Result = 1000 * (results[0] >> 32); > + query->Base.Result = brw_timebase_scale(brw, results[0]); > + > + /* Ensure the scaled timestamp overflows according to > + * GL_QUERY_COUNTER_BITS > + */ > + query->Base.Result &= (1ull << ctx->Const.QueryCounterBits.Timestamp) > - 1; > break; > > case GL_SAMPLES_PASSED_ARB: > @@ -508,9 +545,15 @@ brw_get_timestamp(struct gl_context *ctx) > break; > } > > - /* See logic in brw_queryobj_get_results() */ > - result *= 80; > - result &= (1ull << 36) - 1; > + /* Scale to nanosecond units */ > + result = brw_timebase_scale(brw, result); > + > + /* Ensure the scaled timestamp overflows according to > + * GL_QUERY_COUNTER_BITS. Technically this isn't required if > + * querying GL_TIMESTAMP via glGetInteger but it seems best to keep > + * QueryObject and GetInteger timestamps consistent. > + */ > + result &= (1ull << ctx->Const.QueryCounterBits.Timestamp) - 1; > return result; > } > > diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c > b/src/mesa/drivers/dri/i965/gen6_queryobj.c > index bbd3c44fb0f..3f6edf1da1a 100644 > --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c > +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c > @@ -171,30 +171,18 @@ gen6_queryobj_get_results(struct gl_context *ctx, > /* The query BO contains the starting and ending timestamps. > * Subtract the two and convert to nanoseconds. > */ > - query->Base.Result += 80 * (results[1] - results[0]); > + query->Base.Result = brw_raw_timestamp_delta(brw, results[0], > results[1]); > + query->Base.Result = brw_timebase_scale(brw, query->Base.Result); > break; > > case GL_TIMESTAMP: > - /* Our timer is a clock that increments every 80ns (regardless of > - * other clock scaling in the system). The timestamp register we > can > - * read for glGetTimestamp() masks out the top 32 bits, so we do > that > - * here too to let the two counters be compared against each other. > - * > - * If we just multiplied that 32 bits of data by 80, it would roll > - * over at a non-power-of-two, so an application couldn't use > - * GL_QUERY_COUNTER_BITS to handle rollover correctly. Instead, we > - * report 36 bits and truncate at that (rolling over 5 times as > often > - * as the HW counter), and when the 32-bit counter rolls over, it > - * happens to also be at a rollover in the reported value from near > - * (1<<36) to 0. > - * > - * The low 32 bits rolls over in ~343 seconds. Our 36-bit result > - * rolls over every ~69 seconds. > - * > - * The query BO contains a single timestamp value in results[0]. > + /* The query BO contains a single timestamp value in results[0]. */ > + query->Base.Result = brw_timebase_scale(brw, results[0]); > + > + /* Ensure the scaled timestamp overflows according to > + * GL_QUERY_COUNTER_BITS > */ > - query->Base.Result = 80 * (results[0] & 0xffffffff); > - query->Base.Result &= (1ull << 36) - 1; > + query->Base.Result &= (1ull << ctx->Const.QueryCounterBits.Timestamp) > - 1; > break; > > case GL_SAMPLES_PASSED_ARB: > -- > 2.11.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev