On 25 October 2013 16:45, Anuj Phogat <anuj.pho...@gmail.com> wrote: > V2: > - Update comments > - Use fs_reg(0xffff) in AND instruction to get the 16 bit > sample_mask. > - Add a special backend instructions to compute sample_mask. > - Add a new variable uses_omask in brw_wm_prog_data. > > Signed-off-by: Anuj Phogat <anuj.pho...@gmail.com> > --- > src/mesa/drivers/dri/i965/brw_context.h | 1 + > src/mesa/drivers/dri/i965/brw_defines.h | 1 + > src/mesa/drivers/dri/i965/brw_fs.h | 5 +++++ > src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 23 > +++++++++++++++++++++++ > src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 15 +++++++++++++++ > 5 files changed, 45 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_context.h > b/src/mesa/drivers/dri/i965/brw_context.h > index d16f257..d623368 100644 > --- a/src/mesa/drivers/dri/i965/brw_context.h > +++ b/src/mesa/drivers/dri/i965/brw_context.h > @@ -381,6 +381,7 @@ struct brw_wm_prog_data { > GLuint nr_pull_params; > bool dual_src_blend; > bool uses_pos_offset; > + bool uses_omask; > uint32_t prog_offset_16; > > /** > diff --git a/src/mesa/drivers/dri/i965/brw_defines.h > b/src/mesa/drivers/dri/i965/brw_defines.h > index f3c994b..f9556a5 100644 > --- a/src/mesa/drivers/dri/i965/brw_defines.h > +++ b/src/mesa/drivers/dri/i965/brw_defines.h > @@ -788,6 +788,7 @@ enum opcode { > FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7, > FS_OPCODE_MOV_DISPATCH_TO_FLAGS, > FS_OPCODE_DISCARD_JUMP, > + FS_OPCODE_SET_OMASK, > FS_OPCODE_SET_SAMPLE_ID, > FS_OPCODE_SET_SIMD4X2_OFFSET, > FS_OPCODE_PACK_HALF_2x16_SPLIT, > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h > b/src/mesa/drivers/dri/i965/brw_fs.h > index 8a1a414..c9bcc4e 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.h > +++ b/src/mesa/drivers/dri/i965/brw_fs.h > @@ -436,6 +436,7 @@ public: > > struct hash_table *variable_ht; > fs_reg frag_depth; > + fs_reg sample_mask; > fs_reg outputs[BRW_MAX_DRAW_BUFFERS]; > unsigned output_components[BRW_MAX_DRAW_BUFFERS]; > fs_reg dual_src_output; > @@ -540,6 +541,10 @@ private: > struct brw_reg offset); > void generate_mov_dispatch_to_flags(fs_inst *inst); > > + void generate_set_omask(fs_inst *inst, > + struct brw_reg dst, > + struct brw_reg sample_mask); > + > void generate_set_sample_id(fs_inst *inst, > struct brw_reg dst, > struct brw_reg src0, > diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > index 4f15ed7..fc8e0bd 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > @@ -1024,6 +1024,25 @@ fs_generator::generate_set_simd4x2_offset(fs_inst > *inst, > brw_pop_insn_state(p); > } > > +/* Sets vstride=16, width=8, hstride=2 of register mask before > + * moving to register dst. > + */ > +void > +fs_generator::generate_set_omask(fs_inst *inst, > + struct brw_reg dst, > + struct brw_reg mask) > +{ > + assert(dst.type == BRW_REGISTER_TYPE_UW); > + if (dispatch_width == 16) > + dst = vec16(dst); > + brw_push_insn_state(p); > + brw_set_compression_control(p, BRW_COMPRESSION_NONE); > + brw_set_mask_control(p, BRW_MASK_DISABLE); > + brw_MOV(p, dst, stride(retype(brw_vec1_reg(mask.file, mask.nr, 0), > + dst.type), 16, 8, 2)); > + brw_pop_insn_state(p); > +} > + > /* Sets vstride=1, width=4, hstride=0 of register src1 during > * the ADD instruction. > */ > @@ -1576,6 +1595,10 @@ fs_generator::generate_code(exec_list *instructions) > generate_set_simd4x2_offset(inst, dst, src[0]); > break; > > + case FS_OPCODE_SET_OMASK: > + generate_set_omask(inst, dst, src[0]); > + break; > + > case FS_OPCODE_SET_SAMPLE_ID: > generate_set_sample_id(inst, dst, src[0], src[1]); > break; > diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > index 7a6a0b5..b9eb5b8 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > @@ -82,6 +82,8 @@ fs_visitor::visit(ir_variable *ir) > } > } else if (ir->location == FRAG_RESULT_DEPTH) { > this->frag_depth = *reg; > + } else if (ir->location == FRAG_RESULT_SAMPLE_MASK) { > + this->sample_mask = *reg; > } else { > /* gl_FragData or a user-defined FS output */ > assert(ir->location >= FRAG_RESULT_DATA0 && > @@ -2510,6 +2512,19 @@ fs_visitor::emit_fb_writes() > pop_force_uncompressed(); > } > > + c->prog_data.uses_omask = > + fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK); > + if(c->prog_data.uses_omask) { > + this->current_annotation = "FB write oMask"; > + assert(this->sample_mask.file != BAD_FILE); > + fs_reg reg = fs_reg(this, glsl_type::int_type); > + > + /* Hand over gl_SampleMask. Only lower 16 bits are relevant. */ > + emit(AND(reg, this->sample_mask, fs_reg(0xffff))); >
I think we can drop this AND instruction, since FS_OPCODE_SET_OMASK wll treat reg as UW, and it will use a stride that causes it to skip alternate words. So the upper 16 bits of sample mask are ignored anyway. With that changed, this patch is: Reviewed-by: Paul Berry <stereotype...@gmail.com> > + emit(FS_OPCODE_SET_OMASK, fs_reg(MRF, nr, BRW_REGISTER_TYPE_UW), > reg); > + nr += 1; > + } > + > /* Reserve space for color. It'll be filled in per MRT below. */ > int color_mrf = nr; > nr += 4 * reg_width; > -- > 1.8.1.4 > >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev