On 12/23/2011 09:37 AM, Paul Berry wrote: > When rendering triangle strips, vertices come down the pipeline in the > order specified, even though this causes alternate triangles to have > reversed winding order. For example, if the vertices are ABCDE, then > the GS is invoked on triangles ABC, BCD, and CDE, even though this > means that triangle BCD is in the reverse of the normal winding order. > The hardware automatically flags the triangles with reversed winding > order as _3DPRIM_TRISTRIP_REVERSE, so that face culling and two-sided > coloring can be adjusted to account for the reversed order. > > In order to ensure that winding order is correct when streaming > vertices out to a transform feedback buffer, we need to alter the > ordering of BCD to BDC when the first provoking vertex convention is > in use, and to CBD when the last provoking vertex convention is in > use. > > To do this, we precompute an array of indices indicating where each > vertex will be placed in the transform feedback buffer; normally this > is SVBI[0] + (0, 1, 2), indicating that vertex order should be > preserved. When the primitive type is _3DPRIM_TRISTRIP_REVERSE, we > change this order to either SVBI[0] + (0, 2, 1) or SVBI[0] + (1, 0, > 2), depending on the provoking vertex convention. > > Fixes piglit tests "EXT_transform_feedback/tessellation > triangle_strip" on Gen6.
It's really too bad we can't use the 3DSTATE_GS "Reorder Enable" bit. It's supposed to take care of this, but it looks like it only does PV first mode on Sandybridge. Turning it on makes triangle_strip wireframe fail, and nothing else pass. I had a bit of trouble following this patch, but it looks okay to me. Reviewed-by: Kenneth Graunke <kenn...@whitecape.org> > --- > src/mesa/drivers/dri/i965/brw_gs.h | 6 ++ > src/mesa/drivers/dri/i965/brw_gs_emit.c | 84 > ++++++++++++++++++++++++------- > 2 files changed, 72 insertions(+), 18 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_gs.h > b/src/mesa/drivers/dri/i965/brw_gs.h > index 7bf2248..2ab8b72 100644 > --- a/src/mesa/drivers/dri/i965/brw_gs.h > +++ b/src/mesa/drivers/dri/i965/brw_gs.h > @@ -83,6 +83,12 @@ struct brw_gs_compile { > struct brw_reg vertex[MAX_GS_VERTS]; > struct brw_reg header; > struct brw_reg temp; > + > + /** > + * Register holding destination indices for streamed buffer writes. > + * Only used for SOL programs. > + */ > + struct brw_reg destination_indices; > } reg; > > /* Number of registers used to store vertex data */ > diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c > b/src/mesa/drivers/dri/i965/brw_gs_emit.c > index 1f96a16..607ee75 100644 > --- a/src/mesa/drivers/dri/i965/brw_gs_emit.c > +++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c > @@ -45,13 +45,16 @@ > /** > * Allocate registers for GS. > * > - * If svbi_payload_enable is true, then the thread will be spawned with the > - * "SVBI Payload Enable" bit set, so GRF 1 needs to be set aside to hold the > - * streamed vertex buffer indices. > + * If sol_program is true, then: > + * > + * - The thread will be spawned with the "SVBI Payload Enable" bit set, so > GRF > + * 1 needs to be set aside to hold the streamed vertex buffer indices. > + * > + * - The thread will need to use the destination_indices register. > */ > static void brw_gs_alloc_regs( struct brw_gs_compile *c, > GLuint nr_verts, > - bool svbi_payload_enable ) > + bool sol_program ) > { > GLuint i = 0,j; > > @@ -60,7 +63,7 @@ static void brw_gs_alloc_regs( struct brw_gs_compile *c, > c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; > > /* Streamed vertex buffer indices */ > - if (svbi_payload_enable) > + if (sol_program) > c->reg.SVBI = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD); > > /* Payload vertices plus space for more generated vertices: > @@ -73,6 +76,11 @@ static void brw_gs_alloc_regs( struct brw_gs_compile *c, > c->reg.header = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD); > c->reg.temp = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD); > > + if (sol_program) { > + c->reg.destination_indices = > + retype(brw_vec4_grf(i++, 0), BRW_REGISTER_TYPE_UD); > + } > + > c->prog_data.urb_read_length = c->nr_regs; > c->prog_data.total_grf = i; > } > @@ -351,16 +359,17 @@ gen6_sol_program(struct brw_gs_compile *c, struct > brw_gs_prog_key *key, > > if (key->num_transform_feedback_bindings > 0) { > unsigned vertex, binding; > + struct brw_reg destination_indices_uw = > + vec8(retype(c->reg.destination_indices, BRW_REGISTER_TYPE_UW)); > + > /* Note: since we use the binding table to keep track of buffer offsets > * and stride, the GS doesn't need to keep track of a separate pointer > * into each buffer; it uses a single pointer which increments by 1 for > * each vertex. So we use SVBI0 for this pointer, regardless of > whether > * transform feedback is in interleaved or separate attribs mode. > + * > + * Make sure that the buffers have enough room for all the vertices. > */ > - brw_MOV(p, get_element_ud(c->reg.header, 5), > - get_element_ud(c->reg.SVBI, 0)); > - > - /* Make sure that the buffers have enough room for all the vertices. */ > brw_ADD(p, get_element_ud(c->reg.temp, 0), > get_element_ud(c->reg.SVBI, 0), brw_imm_ud(num_verts)); > brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, > @@ -368,10 +377,58 @@ gen6_sol_program(struct brw_gs_compile *c, struct > brw_gs_prog_key *key, > get_element_ud(c->reg.SVBI, 4)); > brw_IF(p, BRW_EXECUTE_1); > > + /* Compute the destination indices to write to. Usually we use SVBI[0] > + * + (0, 1, 2). However, for odd-numbered triangles in tristrips, the > + * vertices come down the pipeline in reversed winding order, so we > need > + * to flip the order when writing to the transform feedback buffer. To > + * ensure that flatshading accuracy is preserved, we need to write them > + * in order SVBI[0] + (0, 2, 1) if we're using the first provoking > + * vertex convention, and in order SVBI[0] + (1, 0, 2) if we're using > + * the last provoking vertex convention. > + * > + * Note: since brw_imm_v can only be used in instructions in > + * packed-word execution mode, and SVBI is a double-word, we need to > + * first move the appropriate immediate constant ((0, 1, 2), (0, 2, 1), > + * or (1, 0, 2)) to the destination_indices register, and then add SVBI > + * using a separate instruction. Also, since the immediate constant is > + * expressed as packed words, and we need to load double-words into > + * destination_indices, we need to intersperse zeros to fill the upper > + * halves of each double-word. > + */ > + brw_MOV(p, destination_indices_uw, > + brw_imm_v(0x00020100)); /* (0, 1, 2) */ > + if (num_verts == 3) { > + /* Get primitive type into temp register. */ > + brw_AND(p, get_element_ud(c->reg.temp, 0), > + get_element_ud(c->reg.R0, 2), brw_imm_ud(0x1f)); > + > + /* Test if primitive type is TRISTRIP_REVERSE. We need to do this > as > + * an 8-wide comparison so that the conditional MOV that follows > + * moves all 8 words correctly. > + */ > + brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_EQ, > + get_element_ud(c->reg.temp, 0), > + brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE)); > + > + /* If so, then overwrite destination_indices_uw with the appropriate > + * reordering. > + */ > + brw_MOV(p, destination_indices_uw, > + brw_imm_v(key->pv_first ? 0x00010200 /* (0, 2, 1) */ > + : 0x00020001)); /* (1, 0, 2) */ > + brw_set_predicate_control(p, BRW_PREDICATE_NONE); > + } > + brw_ADD(p, c->reg.destination_indices, > + c->reg.destination_indices, get_element_ud(c->reg.SVBI, 0)); > + > /* For each vertex, generate code to output each varying using the > * appropriate binding table entry. > */ > for (vertex = 0; vertex < num_verts; ++vertex) { > + /* Set up the correct destination index for this vertex */ > + brw_MOV(p, get_element_ud(c->reg.header, 5), > + get_element_ud(c->reg.destination_indices, vertex)); > + > for (binding = 0; binding < key->num_transform_feedback_bindings; > ++binding) { > unsigned char vert_result = > @@ -398,15 +455,6 @@ gen6_sol_program(struct brw_gs_compile *c, struct > brw_gs_prog_key *key, > SURF_INDEX_SOL_BINDING(binding), /* > binding_table_index */ > final_write); /* send_commit_msg */ > } > - > - /* If there are more vertices to output, increment the pointer so > - * that we will start outputting to the next location in the > - * transform feedback buffers. > - */ > - if (vertex != num_verts - 1) { > - brw_ADD(p, get_element_ud(c->reg.header, 5), > - get_element_ud(c->reg.header, 5), brw_imm_ud(1)); > - } > } > brw_ENDIF(p); > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev