I'd probably split this into two patches: i965/vec4: Add and use writes_accumulator flag. i965/vec4: Add and use MAC instruction.
(or drop the /vec4 prefixes and implement it for both vec4 and fs backends! :) The first patch also needs to update the instruction scheduling code in brw_schedule_instructions.cpp -- specifically, ::calculate_deps(). Otherwise we might schedule instructions in an order that writes the accumulator after we were supposed to read it. On Mon, Mar 10, 2014 at 7:59 AM, Juha-Pekka Heikkila <juhapekka.heikk...@gmail.com> wrote: > This change MACH, ADDC and SUBB opcodes to use added accumulator changes > flag and add new opcode MAC which use accumulator flag. > > Signed-off-by: Juha-Pekka Heikkila <juhapekka.heikk...@gmail.com> > --- > src/mesa/drivers/dri/i965/brw_eu.h | 1 + > src/mesa/drivers/dri/i965/brw_vec4.cpp | 10 ++-------- > src/mesa/drivers/dri/i965/brw_vec4.h | 1 + > src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 12 ++++++------ > src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 20 ++++++++++++++++---- > 5 files changed, 26 insertions(+), 18 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_eu.h > b/src/mesa/drivers/dri/i965/brw_eu.h > index 5df6bb7..f10ad50 100644 > --- a/src/mesa/drivers/dri/i965/brw_eu.h > +++ b/src/mesa/drivers/dri/i965/brw_eu.h > @@ -183,6 +183,7 @@ ALU1(FBL) > ALU1(CBIT) > ALU2(ADDC) > ALU2(SUBB) > +ALU2(MAC) > > ROUND(RNDZ) > ROUND(RNDE) > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp > b/src/mesa/drivers/dri/i965/brw_vec4.cpp > index c30afae..6b1fde7 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp > @@ -337,16 +337,10 @@ vec4_visitor::dead_code_eliminate() > * accumulator as a side-effect. Instead just set the destination > * to the null register to free it. > */ > - switch (inst->opcode) { > - case BRW_OPCODE_ADDC: > - case BRW_OPCODE_SUBB: > - case BRW_OPCODE_MACH: > + if (inst->write_accumulator) > inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type)); > - break; > - default: > + else > inst->remove(); > - break; > - } > progress = true; > } > } > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h > b/src/mesa/drivers/dri/i965/brw_vec4.h > index d97c103..a1e973f 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4.h > +++ b/src/mesa/drivers/dri/i965/brw_vec4.h > @@ -242,6 +242,7 @@ public: > bool saturate; > bool force_writemask_all; > bool no_dd_clear, no_dd_check; > + bool write_accumulator; /**< intsruction write result in accumulator */ instruction I'd probably say "instruction implicitly writes accumulator" to make it clear that this flag isn't set when the destination register is the accumulator. In the same vein, maybe the field should be named something that makes that clear. I can't come up with a good name though. > int conditional_mod; /**< BRW_CONDITIONAL_* */ > > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > index a74514f..3e85ab1 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > @@ -971,9 +971,7 @@ > vec4_generator::generate_vec4_instruction(vec4_instruction *instruction, > brw_MUL(p, dst, src[0], src[1]); > break; > case BRW_OPCODE_MACH: > - brw_set_acc_write_control(p, 1); > brw_MACH(p, dst, src[0], src[1]); > - brw_set_acc_write_control(p, 0); > break; > > case BRW_OPCODE_MAD: > @@ -1077,16 +1075,17 @@ > vec4_generator::generate_vec4_instruction(vec4_instruction *instruction, > break; > case BRW_OPCODE_ADDC: > assert(brw->gen >= 7); > - brw_set_acc_write_control(p, 1); > brw_ADDC(p, dst, src[0], src[1]); > - brw_set_acc_write_control(p, 0); > break; > case BRW_OPCODE_SUBB: > assert(brw->gen >= 7); > - brw_set_acc_write_control(p, 1); > brw_SUBB(p, dst, src[0], src[1]); > - brw_set_acc_write_control(p, 0); > break; > + case BRW_OPCODE_MAC: > + assert(brw->gen >= 4); The i965 driver only runs on gen >= 4, so no need for this assert. > + brw_MAC(p, dst, src[0], src[1]); > + break; > + > > case BRW_OPCODE_BFE: > assert(brw->gen >= 7); > @@ -1317,6 +1316,7 @@ vec4_generator::generate_code(exec_list *instructions) > brw_set_predicate_inverse(p, inst->predicate_inverse); > brw_set_saturate(p, inst->saturate); > brw_set_mask_control(p, inst->force_writemask_all); > + brw_set_acc_write_control(p, inst->write_accumulator); > > unsigned pre_emit_nr_insn = p->nr_insn; > > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp > index 88ee929..dc58457 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp > @@ -42,6 +42,7 @@ vec4_instruction::vec4_instruction(vec4_visitor *v, > this->force_writemask_all = false; > this->no_dd_clear = false; > this->no_dd_check = false; > + this->write_accumulator = false; > this->conditional_mod = BRW_CONDITIONAL_NONE; > this->sampler = 0; > this->texture_offset = 0; > @@ -121,7 +122,17 @@ vec4_visitor::emit(enum opcode opcode) > vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1) \ > { \ > return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \ > - src0, src1); \ > + src0, src1); \ > + } > + > +#define ALU2_ACC(op) \ > + vec4_instruction * \ > + vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1) \ > + { \ > + vec4_instruction *rVar = new(mem_ctx) vec4_instruction(this, \ Instead of rVar, we usually just name these return values 'inst'. > + BRW_OPCODE_##op, dst, src0, src1); \ > + rVar->write_accumulator = true; \ > + return rVar; \ > } > > #define ALU3(op) \ > @@ -143,7 +154,7 @@ ALU1(F32TO16) > ALU1(F16TO32) > ALU2(ADD) > ALU2(MUL) > -ALU2(MACH) > +ALU2_ACC(MACH) > ALU2(AND) > ALU2(OR) > ALU2(XOR) > @@ -162,8 +173,9 @@ ALU1(FBH) > ALU1(FBL) > ALU1(CBIT) > ALU3(MAD) > -ALU2(ADDC) > -ALU2(SUBB) > +ALU2_ACC(ADDC) > +ALU2_ACC(SUBB) > +ALU2_ACC(MAC) > > /** Gen4 predicated IF. */ > vec4_instruction * > -- > 1.8.1.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev