On 20 August 2013 11:30, Paul Berry <stereotype...@gmail.com> wrote: > --- > src/mesa/drivers/dri/i965/brw_defines.h | 9 +++++++++ > src/mesa/drivers/dri/i965/brw_eu.h | 6 ++++++ > src/mesa/drivers/dri/i965/brw_eu_emit.c | 4 ++-- > src/mesa/drivers/dri/i965/brw_shader.cpp | 5 ++++- > src/mesa/drivers/dri/i965/brw_vec4.cpp | 2 ++ > src/mesa/drivers/dri/i965/brw_vec4.h | 3 ++- > src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 23 +++++++++++++++++++++-- > 7 files changed, 46 insertions(+), 6 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_defines.h > b/src/mesa/drivers/dri/i965/brw_defines.h > index 2ab0a2b..16a1dbc 100644 > --- a/src/mesa/drivers/dri/i965/brw_defines.h > +++ b/src/mesa/drivers/dri/i965/brw_defines.h > @@ -799,6 +799,15 @@ enum opcode { > VS_OPCODE_PULL_CONSTANT_LOAD, > VS_OPCODE_PULL_CONSTANT_LOAD_GEN7, > VS_OPCODE_UNPACK_FLAGS_SIMD4X2, > + > + /** > + * Write geometry shader output data to the URB. > + * > + * Unlike VS_OPCODE_URB_WRITE, this opcode doesn't do an implied move > from > + * R0 to the first MRF. This allows the geometry shader to override > the > + * "Slot {0,1} Offset" fields in the message header. > + */ > + GS_OPCODE_URB_WRITE, > }; > > #define BRW_PREDICATE_NONE 0 > diff --git a/src/mesa/drivers/dri/i965/brw_eu.h > b/src/mesa/drivers/dri/i965/brw_eu.h > index ae4cab5..9053ea2 100644 > --- a/src/mesa/drivers/dri/i965/brw_eu.h > +++ b/src/mesa/drivers/dri/i965/brw_eu.h > @@ -252,6 +252,12 @@ enum brw_urb_write_flags { > BRW_URB_WRITE_COMPLETE = 0x8, > > /** > + * Indicates that an additional offset (which may be different for the > two > + * vec4 slots) is stored in the message header (gen == 7). > + */ > + BRW_URB_WRITE_PER_SLOT_OFFSET = 0x10, > + > + /** > * Convenient combination of flags: end the thread while simultaneously > * marking the given URB entry as complete. > */ > diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c > b/src/mesa/drivers/dri/i965/brw_eu_emit.c > index 622b22f..b55b57e 100644 > --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c > +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c > @@ -531,8 +531,8 @@ static void brw_set_urb_message( struct brw_compile *p, > insn->bits3.urb_gen7.offset = offset; > assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE); > insn->bits3.urb_gen7.swizzle_control = swizzle_control; > - /* per_slot_offset = 0 makes it ignore offsets in message header */ > - insn->bits3.urb_gen7.per_slot_offset = 0; > + insn->bits3.urb_gen7.per_slot_offset = > + flags & BRW_URB_WRITE_PER_SLOT_OFFSET ? 1 : 0; > insn->bits3.urb_gen7.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 > : 0; > } else if (brw->gen >= 5) { > insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */ > diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp > b/src/mesa/drivers/dri/i965/brw_shader.cpp > index afa14c5..d3de6ed 100644 > --- a/src/mesa/drivers/dri/i965/brw_shader.cpp > +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp > @@ -485,7 +485,7 @@ brw_instruction_name(enum opcode op) > return "placeholder_halt"; > > case VS_OPCODE_URB_WRITE: > - return "urb_write"; > + return "vs_urb_write"; > case VS_OPCODE_SCRATCH_READ: > return "scratch_read"; > case VS_OPCODE_SCRATCH_WRITE: > @@ -497,6 +497,9 @@ brw_instruction_name(enum opcode op) > case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: > return "unpack_flags_simd4x2"; > > + case GS_OPCODE_URB_WRITE: > + return "gs_urb_write"; > + > default: > /* Yes, this leaks. It's in debug code, it should never occur, and > if > * it does, you should just add the case to the list above. > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp > b/src/mesa/drivers/dri/i965/brw_vec4.cpp > index abdf3ab..c978396 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp > @@ -259,6 +259,8 @@ vec4_visitor::implied_mrf_writes(vec4_instruction > *inst) > return 2; > case VS_OPCODE_SCRATCH_WRITE: > return 3; > + case GS_OPCODE_URB_WRITE: > + return 0; > case SHADER_OPCODE_SHADER_TIME_ADD: > return 0; > case SHADER_OPCODE_TEX: > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h > b/src/mesa/drivers/dri/i965/brw_vec4.h > index a398f71..c3e2212 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4.h > +++ b/src/mesa/drivers/dri/i965/brw_vec4.h > @@ -627,7 +627,8 @@ private: > struct brw_reg dst, > struct brw_reg src); > > - void generate_urb_write(vec4_instruction *inst); > + void generate_vs_urb_write(vec4_instruction *inst); > + void generate_gs_urb_write(vec4_instruction *inst); > void generate_oword_dual_block_offsets(struct brw_reg m1, > struct brw_reg index); > void generate_scratch_write(vec4_instruction *inst, > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp > index 89831de..681dbdd 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp > @@ -399,7 +399,7 @@ vec4_generator::generate_tex(vec4_instruction *inst, > } > > void > -vec4_generator::generate_urb_write(vec4_instruction *inst) > +vec4_generator::generate_vs_urb_write(vec4_instruction *inst) > { > brw_urb_WRITE(p, > brw_null_reg(), /* dest */ > @@ -413,6 +413,21 @@ vec4_generator::generate_urb_write(vec4_instruction > *inst) > } > > void > +vec4_generator::generate_gs_urb_write(vec4_instruction *inst) > +{ > + struct brw_reg src = brw_message_reg(inst->base_mrf); > + brw_urb_WRITE(p, > + brw_null_reg(), /* dest */ > + inst->base_mrf, /* starting mrf reg nr */ > + src, > + inst->urb_write_flags, > + inst->mlen, > + 0, /* response len */ > + inst->offset, /* urb destination offset */ > + BRW_URB_SWIZZLE_INTERLEAVE); > +} >
>From our in-person code review yesterday: There are space vs. tab issues in the code above. > + > +void > vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1, > struct brw_reg index) > { > @@ -861,7 +876,7 @@ > vec4_generator::generate_vec4_instruction(vec4_instruction *instruction, > break; > > case VS_OPCODE_URB_WRITE: > - generate_urb_write(inst); > + generate_vs_urb_write(inst); > break; > > case VS_OPCODE_SCRATCH_READ: > @@ -880,6 +895,10 @@ > vec4_generator::generate_vec4_instruction(vec4_instruction *instruction, > generate_pull_constant_load_gen7(inst, dst, src[0], src[1]); > break; > > + case GS_OPCODE_URB_WRITE: > + generate_gs_urb_write(inst); > + break; > + > case SHADER_OPCODE_SHADER_TIME_ADD: > brw_shader_time_add(p, src[0], SURF_INDEX_VS_SHADER_TIME); > mark_surface_used(SURF_INDEX_VS_SHADER_TIME); > -- > 1.8.3.4 > >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev