On Fri, Mar 21, 2014 at 7:33 AM, Juha-Pekka Heikkila <juhapekka.heikk...@gmail.com> wrote: > Our hardware has an "accumulator" register, which can be used to store > intermediate results across multiple instructions. Many instructions > can implicitly write a value to the accumulator in addition to their > normal destination register. This is enabled by the "AccWrEn" flag. > > This patch introduces a new flag, inst->writes_accumulator, which > allows us to express the AccWrEn notion in the IR. It also creates a > n ALU2_ACC macro to easily define emitters for instructions that > implicitly write the accumulator. > > Previously, we only supported implicit accumulator writes from the > ADDC, SUBB, and MACH instructions. We always enabled them on those > instructions, and left them disabled for other instructions. > > To take advantage of the MAC (multiply-accumulate) instruction, we > need to be able to set AccWrEn on other types of instructions. > > Signed-off-by: Juha-Pekka Heikkila <juhapekka.heikk...@gmail.com> > --- > src/mesa/drivers/dri/i965/brw_fs.cpp | 34 > +++++++++++------------- > src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 7 +---- > src/mesa/drivers/dri/i965/brw_shader.cpp | 3 +++ > src/mesa/drivers/dri/i965/brw_shader.h | 1 + > src/mesa/drivers/dri/i965/brw_vec4.cpp | 15 +---------- > src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 7 +---- > src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 21 +++++++++++---- > 7 files changed, 39 insertions(+), 49 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > b/src/mesa/drivers/dri/i965/brw_fs.cpp > index 27cf0f6..107d9e6 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > @@ -64,6 +64,8 @@ fs_inst::init() > > /* This will be the case for almost all instructions. */ > this->regs_written = 1; > + > + this->writes_accumulator = false; > } > > fs_inst::fs_inst() > @@ -151,6 +153,15 @@ fs_inst::fs_inst(enum opcode opcode, fs_reg dst, > return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1); \ > } > > +#define ALU2_ACC(op) \ > + fs_inst * \ > + fs_visitor::op(fs_reg dst, fs_reg src0, fs_reg src1) \ > + { \ > + fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, > src1);\ > + inst->writes_accumulator = true; \ > + return inst; \ > + } > + > #define ALU3(op) \ > fs_inst * \ > fs_visitor::op(fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) \ > @@ -166,7 +177,7 @@ ALU1(RNDE) > ALU1(RNDZ) > ALU2(ADD) > ALU2(MUL) > -ALU2(MACH) > +ALU2_ACC(MACH) > ALU2(AND) > ALU2(OR) > ALU2(XOR) > @@ -182,8 +193,8 @@ ALU1(FBH) > ALU1(FBL) > ALU1(CBIT) > ALU3(MAD) > -ALU2(ADDC) > -ALU2(SUBB) > +ALU2_ACC(ADDC) > +ALU2_ACC(SUBB) > ALU2(SEL) > > /** Gen4 predicated IF. */ > @@ -2103,21 +2114,8 @@ fs_visitor::dead_code_eliminate() > } > > if (dead) { > - /* Don't dead code eliminate instructions that write to the > - * accumulator as a side-effect. Instead just set the destination > - * to the null register to free it. > - */ > - switch (inst->opcode) { > - case BRW_OPCODE_ADDC: > - case BRW_OPCODE_SUBB: > - case BRW_OPCODE_MACH: > - inst->dst = fs_reg(retype(brw_null_reg(), inst->dst.type)); > - break; > - default: > - inst->remove(); > - progress = true; > - break; > - } > + inst->remove(); > + progress = true;
I think you meant something like if (inst->writes_accumulator) { inst->dst = fs_reg(retype(brw_null_reg(), inst->dst.type)); } else { inst->remove(); } progress = true; rather than removing instructions that implicitly wrote the accumulator when their destination was dead. > } > } > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > index e590bdf..1cf35b4 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > @@ -1411,6 +1411,7 @@ fs_generator::generate_code(exec_list *instructions, > FILE *dump_file) > brw_set_flag_reg(p, 0, inst->flag_subreg); > brw_set_saturate(p, inst->saturate); > brw_set_mask_control(p, inst->force_writemask_all); > + brw_set_acc_write_control(p, inst->writes_accumulator); > > if (inst->force_uncompressed || dispatch_width == 8) { > brw_set_compression_control(p, BRW_COMPRESSION_NONE); > @@ -1434,9 +1435,7 @@ fs_generator::generate_code(exec_list *instructions, > FILE *dump_file) > brw_AVG(p, dst, src[0], src[1]); > break; > case BRW_OPCODE_MACH: > - brw_set_acc_write_control(p, 1); > brw_MACH(p, dst, src[0], src[1]); > - brw_set_acc_write_control(p, 0); > break; > > case BRW_OPCODE_MAD: > @@ -1540,15 +1539,11 @@ fs_generator::generate_code(exec_list *instructions, > FILE *dump_file) > break; > case BRW_OPCODE_ADDC: > assert(brw->gen >= 7); > - brw_set_acc_write_control(p, 1); > brw_ADDC(p, dst, src[0], src[1]); > - brw_set_acc_write_control(p, 0); > break; > case BRW_OPCODE_SUBB: > assert(brw->gen >= 7); > - brw_set_acc_write_control(p, 1); > brw_SUBB(p, dst, src[0], src[1]); > - brw_set_acc_write_control(p, 0); > break; > > case BRW_OPCODE_BFE: > diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp > b/src/mesa/drivers/dri/i965/brw_shader.cpp > index 407319b..7a26755 100644 > --- a/src/mesa/drivers/dri/i965/brw_shader.cpp > +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp > @@ -666,6 +666,9 @@ backend_instruction::can_do_saturate() > bool > backend_instruction::has_side_effects() const > { > + if (this->writes_accumulator) > + return true; > + > switch (opcode) { > case SHADER_OPCODE_UNTYPED_ATOMIC: > return true; > diff --git a/src/mesa/drivers/dri/i965/brw_shader.h > b/src/mesa/drivers/dri/i965/brw_shader.h > index 086d042..e0f6a06 100644 > --- a/src/mesa/drivers/dri/i965/brw_shader.h > +++ b/src/mesa/drivers/dri/i965/brw_shader.h > @@ -59,6 +59,7 @@ public: > > uint8_t predicate; > bool predicate_inverse; > + bool writes_accumulator; /**< instruction implicitly writes accumulator */ > }; > > enum instruction_scheduler_mode { > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp > b/src/mesa/drivers/dri/i965/brw_vec4.cpp > index 87825f1..473f26c 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp > @@ -333,20 +333,7 @@ vec4_visitor::dead_code_eliminate() > if (inst->dst.file == GRF && !inst->has_side_effects()) { > assert(this->virtual_grf_end[inst->dst.reg] >= pc); > if (this->virtual_grf_end[inst->dst.reg] == pc) { > - /* Don't dead code eliminate instructions that write to the > - * accumulator as a side-effect. Instead just set the destination > - * to the null register to free it. > - */ > - switch (inst->opcode) { > - case BRW_OPCODE_ADDC: > - case BRW_OPCODE_SUBB: > - case BRW_OPCODE_MACH: > - inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type)); > - break; > - default: > - inst->remove(); > - break; > - } > + inst->remove(); > progress = true; Same comment as above. > } > } > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > index a74514f..5f85d31 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > @@ -971,9 +971,7 @@ > vec4_generator::generate_vec4_instruction(vec4_instruction *instruction, > brw_MUL(p, dst, src[0], src[1]); > break; > case BRW_OPCODE_MACH: > - brw_set_acc_write_control(p, 1); > brw_MACH(p, dst, src[0], src[1]); > - brw_set_acc_write_control(p, 0); > break; > > case BRW_OPCODE_MAD: > @@ -1077,15 +1075,11 @@ > vec4_generator::generate_vec4_instruction(vec4_instruction *instruction, > break; > case BRW_OPCODE_ADDC: > assert(brw->gen >= 7); > - brw_set_acc_write_control(p, 1); > brw_ADDC(p, dst, src[0], src[1]); > - brw_set_acc_write_control(p, 0); > break; > case BRW_OPCODE_SUBB: > assert(brw->gen >= 7); > - brw_set_acc_write_control(p, 1); > brw_SUBB(p, dst, src[0], src[1]); > - brw_set_acc_write_control(p, 0); > break; > > case BRW_OPCODE_BFE: > @@ -1317,6 +1311,7 @@ vec4_generator::generate_code(exec_list *instructions) > brw_set_predicate_inverse(p, inst->predicate_inverse); > brw_set_saturate(p, inst->saturate); > brw_set_mask_control(p, inst->force_writemask_all); > + brw_set_acc_write_control(p, inst->writes_accumulator); > > unsigned pre_emit_nr_insn = p->nr_insn; > > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp > index c73e58d..33df5d3 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp > @@ -42,6 +42,7 @@ vec4_instruction::vec4_instruction(vec4_visitor *v, > this->force_writemask_all = false; > this->no_dd_clear = false; > this->no_dd_check = false; > + this->writes_accumulator = false; > this->conditional_mod = BRW_CONDITIONAL_NONE; > this->sampler = 0; > this->texture_offset = 0; > @@ -113,7 +114,7 @@ vec4_visitor::emit(enum opcode opcode) > vec4_visitor::op(dst_reg dst, src_reg src0) \ > { \ > return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \ > - src0); \ > + src0); \ Spurious change. _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev