On Fri, Oct 25, 2013 at 10:35:48PM -0700, Kenneth Graunke wrote: > Implementing the GetTransformFeedbackVertexCount() driver hook allows > the VBO module to call us with the right number of vertices. > > The hardware doesn't directly count the number of vertices written by > SOL, so we instead use the SO_NUM_PRIMS_WRITTEN(n) counters and multiply > by the number of vertices per primitive. > > Unfortunately, counting the number of primitives generated is tricky: > a program might pause a transform feedback operation, start a second one > with a different object, then switch back and resume. Both transform > feedback operations share the SO_NUM_PRIMS_WRITTEN counters. > > To work around this, we save the counter values at Begin, Pause, Resume, > and End. This "bookends" each section where transform feedback is > active for the current object. Adding up differences of pairs gives > us the number of primitives generated. (This is similar to what we > do for occlusion queries on platforms without hardware contexts.) > > v2: Fix missing parenthesis in assertion (caught by Eric Anholt). > > Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> > Reviewed-by: Ian Romanick <ian.d.roman...@intel.com> > Reviewed-by: Eric Anholt <e...@anholt.net> > --- > src/mesa/drivers/dri/i965/brw_context.c | 2 + > src/mesa/drivers/dri/i965/brw_context.h | 26 ++++ > src/mesa/drivers/dri/i965/gen6_sol.c | 1 + > src/mesa/drivers/dri/i965/gen7_sol_state.c | 190 > ++++++++++++++++++++++++++++- > 4 files changed, 218 insertions(+), 1 deletion(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_context.c > b/src/mesa/drivers/dri/i965/brw_context.c > index 623273c..f4e04b6 100644 > --- a/src/mesa/drivers/dri/i965/brw_context.c > +++ b/src/mesa/drivers/dri/i965/brw_context.c > @@ -252,6 +252,8 @@ brw_init_driver_functions(struct brw_context *brw, > > functions->NewTransformFeedback = brw_new_transform_feedback; > functions->DeleteTransformFeedback = brw_delete_transform_feedback; > + functions->GetTransformFeedbackVertexCount = > + brw_get_transform_feedback_vertex_count; > if (brw->gen >= 7) { > functions->BeginTransformFeedback = gen7_begin_transform_feedback; > functions->EndTransformFeedback = gen7_end_transform_feedback; > diff --git a/src/mesa/drivers/dri/i965/brw_context.h > b/src/mesa/drivers/dri/i965/brw_context.h > index 48aa4c1..c72bad1 100644 > --- a/src/mesa/drivers/dri/i965/brw_context.h > +++ b/src/mesa/drivers/dri/i965/brw_context.h > @@ -880,11 +880,33 @@ struct intel_batchbuffer { > } saved; > }; > > +#define BRW_MAX_XFB_STREAMS 4 > + > struct brw_transform_feedback_object { > struct gl_transform_feedback_object base; > > /** A buffer to hold SO_WRITE_OFFSET(n) values while paused. */ > drm_intel_bo *offset_bo; > + > + /** The most recent primitive mode (GL_TRIANGLES/GL_POINTS/GL_LINES). */ > + GLenum primitive_mode; > + > + /** > + * Count of primitives generated during this transform feedback operation. > + * @{ > + */ > + uint64_t prims_generated[BRW_MAX_XFB_STREAMS]; > + drm_intel_bo *prim_count_bo; > + unsigned prim_count_buffer_index; /**< in number of uint64_t units */ > + /** @} */ > + > + /** > + * Number of vertices written between last Begin/EndTransformFeedback(). > + * > + * Used to implement DrawTransformFeedback(). > + */ > + uint64_t vertices_written[BRW_MAX_XFB_STREAMS]; > + bool vertices_written_valid; > }; > > /** > @@ -1574,6 +1596,10 @@ brw_begin_transform_feedback(struct gl_context *ctx, > GLenum mode, > void > brw_end_transform_feedback(struct gl_context *ctx, > struct gl_transform_feedback_object *obj); > +GLsizei > +brw_get_transform_feedback_vertex_count(struct gl_context *ctx, > + struct gl_transform_feedback_object > *obj, > + GLuint stream); > > /* gen7_sol_state.c */ > void > diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c > b/src/mesa/drivers/dri/i965/gen6_sol.c > index 2e6c86a..af5bed9 100644 > --- a/src/mesa/drivers/dri/i965/gen6_sol.c > +++ b/src/mesa/drivers/dri/i965/gen6_sol.c > @@ -162,6 +162,7 @@ brw_delete_transform_feedback(struct gl_context *ctx, > } > > drm_intel_bo_unreference(brw_obj->offset_bo); > + drm_intel_bo_unreference(brw_obj->prim_count_bo); > > free(brw_obj); > } > diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c > b/src/mesa/drivers/dri/i965/gen7_sol_state.c > index 27421da..7cac8fe 100644 > --- a/src/mesa/drivers/dri/i965/gen7_sol_state.c > +++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c > @@ -249,14 +249,179 @@ const struct brw_tracked_state gen7_sol_state = { > .emit = upload_sol_state, > }; > > +/** > + * Tally the number of primitives generated so far. > + * > + * The buffer contains a series of pairs: > + * (<start0, start1, start2, start3>, <end0, end1, end2, end3>) ; > + * (<start0, start1, start2, start3>, <end0, end1, end2, end3>) ; > + * > + * For each stream, we subtract the pair of values (end - start) to get the > + * number of primitives generated during one section. We accumulate these > + * values, adding them up to get the total number of primitives generated. > + */ > +static void > +gen7_tally_prims_generated(struct brw_context *brw, > + struct brw_transform_feedback_object *obj) > +{ > + /* If the current batch is still contributing to the number of primitives > + * generated, flush it now so the results will be present when mapped. > + */ > + if (drm_intel_bo_references(brw->batch.bo, obj->prim_count_bo)) > + intel_batchbuffer_flush(brw); > + > + if (unlikely(brw->perf_debug && drm_intel_bo_busy(obj->prim_count_bo))) > + perf_debug("Stalling for # of transform feedback primitives > written.\n"); > + > + drm_intel_bo_map(obj->prim_count_bo, false); > + uint64_t *prim_counts = obj->prim_count_bo->virtual; > + > + assert(obj->prim_count_buffer_index % (2 * BRW_MAX_XFB_STREAMS) == 0); > + int pairs = obj->prim_count_buffer_index / (2 * BRW_MAX_XFB_STREAMS); > + > + for (int i = 0; i < pairs; i++) { > + for (int s = 0; s < BRW_MAX_XFB_STREAMS; s++) { > + obj->prims_generated[s] += > + prim_counts[BRW_MAX_XFB_STREAMS + s] - prim_counts[s]; > + } > + prim_counts += 2 * BRW_MAX_XFB_STREAMS; /* move to the next pair */ > + } > + > + drm_intel_bo_unmap(obj->prim_count_bo); > + > + /* Release the BO; we've already tallied all the data it contained. */ > + drm_intel_bo_unreference(obj->prim_count_bo); > + obj->prim_count_bo = NULL; > +} > + > +/** > + * Store the SO_NUM_PRIMS_WRITTEN counters for each stream (4 uint64_t > values) > + * to prim_count_bo. > + * > + * If prim_count_bo is out of space, gather up the results so far into > + * prims_generated[] and allocate a new buffer with enough space. > + * > + * The number of primitives written is used to compute the number of vertices > + * written to a transform feedback stream, which is required to implement > + * DrawTransformFeedback(). > + */ > +static void > +gen7_save_primitives_written_counters(struct brw_context *brw, > + struct brw_transform_feedback_object *obj) > +{ > + const int streams = BRW_MAX_XFB_STREAMS; > + > + /* Check if there's enough space for a new pair of four values. */ > + if (obj->prim_count_bo != NULL && > + obj->prim_count_buffer_index + 2 * streams >= 4096 / > sizeof(uint64_t)) { > + /* Gather up the results so far and release the BO. */ > + gen7_tally_prims_generated(brw, obj); > + } > + > + /* Allocate a new buffer if needed. A page should be plenty. */ > + if (obj->prim_count_bo == NULL) { > + obj->prim_count_buffer_index = 0; > + obj->prim_count_bo = > + drm_intel_bo_alloc(brw->bufmgr, "xfb primitive counts", 4096, 4096);
I was wondering why 'gen7_tally_prims_generated()' needs to dispose the buffer object and this logic here to reallocate another. Couldn't we re-use the old bo and simply let 'gen7_tally_prims_generated()' reset the 'prim_count_buffer_index' to zero? Below 'brw_compute_xfb_vertices_written()' wants to tally also but it guards itself from adding counters multiple times using the flag 'vertices_written_valid'. In fact if it didn't 'gen7_tally_prims_generated()' would call 'drm_intel_bo_map()' against NULL pointer, right? Or did I understand the logic all wrong? > + } > + > + /* Flush any drawing so that the counters have the right values. */ > + intel_batchbuffer_emit_mi_flush(brw); > + > + /* Emit MI_STORE_REGISTER_MEM commands to write the values. */ > + for (int i = 0; i < streams; i++) { > + brw_store_register_mem64(brw, obj->prim_count_bo, > + GEN7_SO_NUM_PRIMS_WRITTEN(i), > + obj->prim_count_buffer_index + i); > + } > + > + /* Update where to write data to. */ > + obj->prim_count_buffer_index += streams; > +} > + > +/** > + * Compute the number of vertices written by this transform feedback > operation. > + */ > +static void > +brw_compute_xfb_vertices_written(struct brw_context *brw, > + struct brw_transform_feedback_object *obj) > +{ > + if (obj->vertices_written_valid || !obj->base.EndedAnytime) > + return; > + > + unsigned vertices_per_prim = 0; > + > + switch (obj->primitive_mode) { > + case GL_POINTS: > + vertices_per_prim = 1; > + break; > + case GL_LINES: > + vertices_per_prim = 2; > + break; > + case GL_TRIANGLES: > + vertices_per_prim = 3; > + break; > + default: > + assert(!"Invalid transform feedback primitive mode."); > + } > + > + /* Get the number of primitives generated. */ > + gen7_tally_prims_generated(brw, obj); > + > + for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) { > + obj->vertices_written[i] = vertices_per_prim * obj->prims_generated[i]; > + } > + obj->vertices_written_valid = true; > +} > + > +/** > + * GetTransformFeedbackVertexCount() driver hook. > + * > + * Returns the number of vertices written to a particular stream by the last > + * Begin/EndTransformFeedback block. Used to implement > DrawTransformFeedback(). > + */ > +GLsizei > +brw_get_transform_feedback_vertex_count(struct gl_context *ctx, > + struct gl_transform_feedback_object > *obj, > + GLuint stream) > +{ > + struct brw_context *brw = brw_context(ctx); > + struct brw_transform_feedback_object *brw_obj = > + (struct brw_transform_feedback_object *) obj; > + > + assert(obj->EndedAnytime); > + assert(stream < BRW_MAX_XFB_STREAMS); > + > + brw_compute_xfb_vertices_written(brw, brw_obj); > + return brw_obj->vertices_written[stream]; > +} > + > void > gen7_begin_transform_feedback(struct gl_context *ctx, GLenum mode, > struct gl_transform_feedback_object *obj) > { > struct brw_context *brw = brw_context(ctx); > + struct brw_transform_feedback_object *brw_obj = > + (struct brw_transform_feedback_object *) obj; > > intel_batchbuffer_flush(brw); > brw->batch.needs_sol_reset = true; > + > + /* We're about to lose the information needed to compute the number of > + * vertices written during the last Begin/EndTransformFeedback section, > + * so we can't delay it any further. > + */ > + brw_compute_xfb_vertices_written(brw, brw_obj); > + > + /* No primitives have been generated yet. */ > + for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) { > + brw_obj->prims_generated[i] = 0; > + } > + > + /* Store the starting value of the SO_NUM_PRIMS_WRITTEN counters. */ > + gen7_save_primitives_written_counters(brw, brw_obj); > + > + brw_obj->primitive_mode = mode; > } > > void > @@ -270,8 +435,18 @@ gen7_end_transform_feedback(struct gl_context *ctx, > * simplicity, just do a full flush. > */ > struct brw_context *brw = brw_context(ctx); > + struct brw_transform_feedback_object *brw_obj = > + (struct brw_transform_feedback_object *) obj; > > - intel_batchbuffer_emit_mi_flush(brw); > + /* Store the ending value of the SO_NUM_PRIMS_WRITTEN counters. */ > + gen7_save_primitives_written_counters(brw, brw_obj); > + > + /* EndTransformFeedback() means that we need to update the number of > + * vertices written. Since it's only necessary if DrawTransformFeedback() > + * is called and it means mapping a buffer object, we delay computing it > + * until it's absolutely necessary to try and avoid stalls. > + */ > + brw_obj->vertices_written_valid = false; > } > > void > @@ -282,6 +457,9 @@ gen7_pause_transform_feedback(struct gl_context *ctx, > struct brw_transform_feedback_object *brw_obj = > (struct brw_transform_feedback_object *) obj; > > + /* Flush any drawing so that the counters have the right values. */ > + intel_batchbuffer_emit_mi_flush(brw); > + > /* Save the SOL buffer offset register values. */ > for (int i = 0; i < 4; i++) { > BEGIN_BATCH(3); > @@ -292,6 +470,13 @@ gen7_pause_transform_feedback(struct gl_context *ctx, > i * sizeof(uint32_t)); > ADVANCE_BATCH(); > } > + > + /* Store the temporary ending value of the SO_NUM_PRIMS_WRITTEN counters. > + * While this operation is paused, other transform feedback actions may > + * occur, which will contribute to the counters. We need to exclude that > + * from our counts. > + */ > + gen7_save_primitives_written_counters(brw, brw_obj); > } > > void > @@ -312,4 +497,7 @@ gen7_resume_transform_feedback(struct gl_context *ctx, > i * sizeof(uint32_t)); > ADVANCE_BATCH(); > } > + > + /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */ > + gen7_save_primitives_written_counters(brw, brw_obj); > } > -- > 1.8.3.2 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev