On jue, 2014-09-18 at 00:48 -0700, Jordan Justen wrote: > On Thu, Aug 14, 2014 at 4:11 AM, Iago Toral Quiroga <ito...@igalia.com> wrote: > > From: Samuel Iglesias Gonsalvez <sigles...@igalia.com> > > > > We will use this parameter in later patches to provide information relevant > > to transform feedback that needs to be set as part of the FF_SYNC message. > > > > Signed-off-by: Samuel Iglesias Gonsalvez <sigles...@igalia.com> > > --- > > src/mesa/drivers/dri/i965/brw_defines.h | 4 ++++ > > src/mesa/drivers/dri/i965/brw_vec4.h | 3 ++- > > src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 16 +++++++++++++--- > > src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp | 3 ++- > > 4 files changed, 21 insertions(+), 5 deletions(-) > > > > diff --git a/src/mesa/drivers/dri/i965/brw_defines.h > > b/src/mesa/drivers/dri/i965/brw_defines.h > > index 6e8b998..b0d6d9f 100644 > > --- a/src/mesa/drivers/dri/i965/brw_defines.h > > +++ b/src/mesa/drivers/dri/i965/brw_defines.h > > @@ -1030,6 +1030,10 @@ enum opcode { > > * FF_SYNC operation. > > * > > * - src1 is the number of primitives written. > > + * > > + * - src2 is the value to hold in M0.0: number of SO vertices to write > > + * and number of SO primitives needed. Its value will be overwritten > > + * with the SVBI values if transform feedback is enabled. > > */ > > GS_OPCODE_FF_SYNC, > > > > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h > > b/src/mesa/drivers/dri/i965/brw_vec4.h > > index 763cb23..58a5aac 100644 > > --- a/src/mesa/drivers/dri/i965/brw_vec4.h > > +++ b/src/mesa/drivers/dri/i965/brw_vec4.h > > @@ -679,7 +679,8 @@ private: > > struct brw_reg src2); > > void generate_gs_ff_sync(struct brw_reg dst, > > struct brw_reg src0, > > - struct brw_reg src1); > > + struct brw_reg src1, > > + struct brw_reg src2); > > void generate_gs_set_primitive_id(struct brw_reg dst); > > void generate_oword_dual_block_offsets(struct brw_reg m1, > > struct brw_reg index); > > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > > b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > > index d4554f5..c69b305 100644 > > --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > > +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > > @@ -734,7 +734,8 @@ > > vec4_generator::generate_gs_ff_sync_set_primitives(struct brw_reg dst, > > void > > vec4_generator::generate_gs_ff_sync(struct brw_reg dst, > > struct brw_reg src0, > > - struct brw_reg src1) > > + struct brw_reg src1, > > + struct brw_reg src2) > > { > > /* We use dst to setup the ff_sync header, so we expect it to be > > * initialized to R0 by the caller. Here we overwrite dword 0 (cleared > > @@ -744,7 +745,7 @@ vec4_generator::generate_gs_ff_sync(struct brw_reg dst, > > brw_push_insn_state(p); > > brw_set_default_mask_control(p, BRW_MASK_DISABLE); > > brw_set_default_access_mode(p, BRW_ALIGN_1); > > - brw_MOV(p, get_element_ud(dst, 0), brw_imm_ud(0)); > > + brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src2, 0)); > > brw_MOV(p, get_element_ud(dst, 1), get_element_ud(src1, 0)); > > brw_set_default_access_mode(p, BRW_ALIGN_16); > > brw_pop_insn_state(p); > > @@ -763,6 +764,15 @@ vec4_generator::generate_gs_ff_sync(struct brw_reg dst, > > brw_set_default_access_mode(p, BRW_ALIGN_1); > > brw_set_default_mask_control(p, BRW_MASK_DISABLE); > > brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src0, 0)); > > + > > + /* src2 is not an immediate when we use transform feedback */ > > + if (src2.file != BRW_IMMEDIATE_VALUE) { > > + brw_MOV(p, suboffset(vec1(src2), 0), suboffset(vec1(src0), 1)); > > + brw_MOV(p, suboffset(vec1(src2), 1), suboffset(vec1(src0), 2)); > > + brw_MOV(p, suboffset(vec1(src2), 2), suboffset(vec1(src0), 3)); > > + brw_MOV(p, suboffset(vec1(src2), 3), suboffset(vec1(src0), 4)); > > Ken and I discussed this a bit. Ken suggested that this: > brw_MOV(p, brw_vec4_grf(src1.nr, 0), brw_vec4_grf(dst.nr, 1)); > > Should be able to copy all 4 dwords in one instruction. What do you think?
Sure, if we can do this in just on MOV that is better. I'll give it a try. > By the way, this was for the version of this patch on the > gs-support-snb-for-submission-02092014 which has src1 as the > destination and dst as the source for the moves. (Hmm, not sure about > the src1 naming in this context...) Yes, this is used as both a src and a dst... and I supposed Samuel decided to follow naming conventions for other opcodes that have a dst and multiple src parameters. I suppose the best way to do this would have been to create a separate generator opcode for the part where this is used as a destination register only... Iago > If that change seem good, then > Reviewed-by: Jordan Justen <jordan.l.jus...@intel.com> > > > + } > > + > > brw_set_default_access_mode(p, BRW_ALIGN_16); > > brw_pop_insn_state(p); > > } > > @@ -1374,7 +1384,7 @@ > > vec4_generator::generate_vec4_instruction(vec4_instruction *instruction, > > break; > > > > case GS_OPCODE_FF_SYNC: > > - generate_gs_ff_sync(dst, src[0], src[1]); > > + generate_gs_ff_sync(dst, src[0], src[1], src[2]); > > break; > > > > case GS_OPCODE_FF_SYNC_SET_PRIMITIVES: > > diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp > > b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp > > index b45c381..c1cfe75 100644 > > --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp > > +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp > > @@ -331,7 +331,8 @@ gen6_gs_visitor::emit_thread_end() > > { > > this->current_annotation = "gen6 thread end: ff_sync"; > > emit(GS_OPCODE_FF_SYNC, > > - dst_reg(MRF, base_mrf), this->temp, this->prim_count); > > + dst_reg(MRF, base_mrf), this->temp, this->prim_count, > > + brw_imm_ud(0u)); > > > > /* Loop over all buffered vertices and emit URB write messages */ > > this->current_annotation = "gen6 thread end: urb writes init"; > > -- > > 1.9.1 > > > > _______________________________________________ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev