On Thursday, August 14, 2014 01:11:39 PM Iago Toral Quiroga wrote: > This implements the FF_SYNC message required in gen6 geometry shaders to > get the initial URB handle. > --- > src/mesa/drivers/dri/i965/brw_defines.h | 14 +++++++++ > src/mesa/drivers/dri/i965/brw_shader.cpp | 2 ++ > src/mesa/drivers/dri/i965/brw_vec4.h | 3 ++ > src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 40 > ++++++++++++++++++++++++ > 4 files changed, 59 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_defines.h > b/src/mesa/drivers/dri/i965/brw_defines.h > index 3564041..125d728 100644 > --- a/src/mesa/drivers/dri/i965/brw_defines.h > +++ b/src/mesa/drivers/dri/i965/brw_defines.h > @@ -1002,6 +1002,20 @@ enum opcode { > * - dst is the GRF for gl_InvocationID. > */ > GS_OPCODE_GET_INSTANCE_ID, > + > + /** > + * Send a FF_SYNC message to allocate initial URB handles (gen6). > + * > + * - dst will hold the newly allocated VUE handle. It is expected to be > + * be initialized so that it can be used to as the FF_SYNC message > header > + * (that is, it won't do an implied move from R0). > + * > + * - src0 is a temporary that will be used as writeback register for the > + * FF_SYNC operation.
I was originally a bit concerned that writing over a source register could get your into trouble - say, optimization passes not noticing that you're modifying src0, or the scheduler not noticing dependencies. But, since you allocate a temporary, and only use it in this one instruction, there's not really any chance for optimizations to combine it with anything. So, I suppose this will work. Another way you might consider doing this... 1. Add GS_OPCODE_FF_SYNC to vec4_visitor::implied_mrf_writes, and return 1. This tells the scheduler etc. that you implicitly trash 1 MRF. 2. Make the visitor set inst->base_mrf, and MOV <base_mrf> g0. 3. Make the generator use brw_message_reg(inst->base_mrf) for the header MOVs. Then, src0 can be the number of primitives written, and you don't need src1. That seems simpler to me. brw_ff_sync will resolve your GRF (dst) to an MRF anyway; the above approach would write directly into the MRF, saving you a GRF. Thoughts? > + * > + * - src1 is the number of primitives written. > + */ > + GS_OPCODE_FF_SYNC, > }; > > enum brw_urb_write_flags { > diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp > b/src/mesa/drivers/dri/i965/brw_shader.cpp > index 0033135..5749061 100644 > --- a/src/mesa/drivers/dri/i965/brw_shader.cpp > +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp > @@ -528,6 +528,8 @@ brw_instruction_name(enum opcode op) > return "set_channel_masks"; > case GS_OPCODE_GET_INSTANCE_ID: > return "get_instance_id"; > + case GS_OPCODE_FF_SYNC: > + return "ff_sync"; > > default: > /* Yes, this leaks. It's in debug code, it should never occur, and if > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h > b/src/mesa/drivers/dri/i965/brw_vec4.h > index 67132c0..72fabdd 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4.h > +++ b/src/mesa/drivers/dri/i965/brw_vec4.h > @@ -659,6 +659,9 @@ private: > void generate_gs_prepare_channel_masks(struct brw_reg dst); > void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg > src); > void generate_gs_get_instance_id(struct brw_reg dst); > + void generate_gs_ff_sync(struct brw_reg dst, > + struct brw_reg src0, > + struct brw_reg src1); > void generate_oword_dual_block_offsets(struct brw_reg m1, > struct brw_reg index); > void generate_scratch_write(vec4_instruction *inst, > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > index c63b47a..05f4892 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > @@ -621,6 +621,42 @@ vec4_generator::generate_gs_get_instance_id(struct > brw_reg dst) > } > > void > +vec4_generator::generate_gs_ff_sync(struct brw_reg dst, > + struct brw_reg src0, > + struct brw_reg src1) > +{ > + /* We use dst to setup the ff_sync header, so we expect it to be > + * initialized to R0 by the caller. Here we overwrite dword 0 (cleared > + * for now since we are not doing transform feedback) and dword 1 > + * (to hold the number of primitives written). > + */ > + brw_push_insn_state(p); > + brw_set_default_mask_control(p, BRW_MASK_DISABLE); > + brw_set_default_access_mode(p, BRW_ALIGN_1); > + brw_MOV(p, get_element_ud(dst, 0), brw_imm_ud(0)); > + brw_MOV(p, get_element_ud(dst, 1), get_element_ud(src1, 0)); > + brw_set_default_access_mode(p, BRW_ALIGN_16); > + brw_pop_insn_state(p); > + > + /* Write allocated URB handle to temporary passed in src0 */ > + brw_ff_sync(p, > + src0, > + 0, > + dst, > + 1, /* allocate */ > + 1, /* response length */ > + 0 /* eot */); > + > + /* Now put allocated urb handle in dst.0 */ > + brw_push_insn_state(p); > + brw_set_default_access_mode(p, BRW_ALIGN_1); > + brw_set_default_mask_control(p, BRW_MASK_DISABLE); > + brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src0, 0)); > + brw_set_default_access_mode(p, BRW_ALIGN_16); > + brw_pop_insn_state(p); > +} > + > +void > vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1, > struct brw_reg index) > { > @@ -1198,6 +1234,10 @@ > vec4_generator::generate_vec4_instruction(vec4_instruction *instruction, > generate_gs_get_instance_id(dst); > break; > > + case GS_OPCODE_FF_SYNC: > + generate_gs_ff_sync(dst, src[0], src[1]); > + break; > + > case SHADER_OPCODE_SHADER_TIME_ADD: > brw_shader_time_add(p, src[0], > prog_data->base.binding_table.shader_time_start); >
signature.asc
Description: This is a digitally signed message part.
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev