On 18 October 2011 12:24, Kenneth Graunke <kenn...@whitecape.org> wrote:
> According to the documentation, Ivybridge's math instruction works in > SIMD16 mode for the fragment shader, and no longer forbids align16 mode > for the vertex shader. > > The documentation claims that SIMD16 mode isn't supported for INT DIV, > but empirical evidence shows that it works fine. Presumably the note > is trying to warn us that the variant that returns both quotient and > remainder in (dst, dst + 1) doesn't work in SIMD16 mode since dst + 1 > would be sechalf(dst), trashing half your results. Since we don't use > that variant, we don't care and can just enable SIMD16 everywhere. > > The documentation also still claims that source modifiers and > conditional modifiers aren't supported, but empirical evidence and > study of the simulator both show that they work just fine. > > Goodbye workarounds. Math just works now. > > Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> > --- > src/mesa/drivers/dri/i965/brw_eu_emit.c | 29 > +++++++++++++++--------- > src/mesa/drivers/dri/i965/brw_fs.cpp | 6 +++- > src/mesa/drivers/dri/i965/brw_fs.h | 7 ++++++ > src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 31 > ++++++++++++++++++++++++++- > src/mesa/drivers/dri/i965/brw_vec4.h | 4 +++ > src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 19 ++++++++++++++- > 6 files changed, 80 insertions(+), 16 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c > b/src/mesa/drivers/dri/i965/brw_eu_emit.c > index 5caebfc..0841478 100644 > --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c > +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c > @@ -1496,11 +1496,14 @@ void brw_math( struct brw_compile *p, > assert(src.file == BRW_GENERAL_REGISTER_FILE); > > assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); > - assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); > + if (intel->gen == 6) > + assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); > > - /* Source modifiers are ignored for extended math instructions. */ > - assert(!src.negate); > - assert(!src.abs); > + /* Source modifiers are ignored for extended math instructions on > Gen6. */ > + if (intel->gen == 6) { > + assert(!src.negate); > + assert(!src.abs); > + } > > if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT || > function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER || > @@ -1560,8 +1563,10 @@ void brw_math2(struct brw_compile *p, > assert(src1.file == BRW_GENERAL_REGISTER_FILE); > > assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); > - assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); > - assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); > + if (intel->gen == 6) { > + assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); > + assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); > + } > > if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT || > function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER || > @@ -1573,11 +1578,13 @@ void brw_math2(struct brw_compile *p, > assert(src1.type == BRW_REGISTER_TYPE_F); > } > > - /* Source modifiers are ignored for extended math instructions. */ > - assert(!src0.negate); > - assert(!src0.abs); > - assert(!src1.negate); > - assert(!src1.abs); > + /* Source modifiers are ignored for extended math instructions on Gen6. > */ > + if (intel->gen == 6) { > + assert(!src0.negate); > + assert(!src0.abs); > + assert(!src1.negate); > + assert(!src1.abs); > + } > > /* Math is the same ISA format as other opcodes, except that > CondModifier > * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > b/src/mesa/drivers/dri/i965/brw_fs.cpp > index f731662..6c417d4 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > @@ -554,7 +554,7 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst, > fs_reg src) > * The hardware ignores source modifiers (negate and abs) on math > * instructions, so we also move to a temp to set those up. > Probably should change this comment to clarify that this is only the case for Gen6. > */ > - if (intel->gen >= 6 && (src.file == UNIFORM || > + if (intel->gen == 6 && (src.file == UNIFORM || > src.abs || > src.negate)) { > fs_reg expanded = fs_reg(this, glsl_type::float_type); > @@ -588,7 +588,9 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst, > fs_reg src0, fs_reg src1) > return NULL; > } > > - if (intel->gen >= 6) { > + if (intel->gen >= 7) { > + inst = emit(opcode, dst, src0, src1); > + } else if (intel->gen == 6) { > /* Can't do hstride == 0 args to gen6 math, so expand it out. > * > * The hardware ignores source modifiers (negate and abs) on math > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h > b/src/mesa/drivers/dri/i965/brw_fs.h > index 4035186..85d3cad 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.h > +++ b/src/mesa/drivers/dri/i965/brw_fs.h > @@ -490,6 +490,13 @@ public: > void generate_linterp(fs_inst *inst, struct brw_reg dst, > struct brw_reg *src); > void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg > src); > + void generate_math1_gen7(fs_inst *inst, > + struct brw_reg dst, > + struct brw_reg src); > + void generate_math2_gen7(fs_inst *inst, > + struct brw_reg dst, > + struct brw_reg src0, > + struct brw_reg src1); > void generate_math1_gen6(fs_inst *inst, > struct brw_reg dst, > struct brw_reg src); > diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp > b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp > index 4c158fe..9697762 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp > @@ -143,6 +143,31 @@ fs_visitor::generate_linterp(fs_inst *inst, > } > > void > +fs_visitor::generate_math1_gen7(fs_inst *inst, > + struct brw_reg dst, > + struct brw_reg src0) > +{ > + assert(inst->mlen == 0); > + brw_math(p, dst, > + brw_math_function(inst->opcode), > + inst->saturate ? BRW_MATH_SATURATE_SATURATE > + : BRW_MATH_SATURATE_NONE, > + 0, src0, > + BRW_MATH_DATA_VECTOR, > + BRW_MATH_PRECISION_FULL); > +} > + > +void > +fs_visitor::generate_math2_gen7(fs_inst *inst, > + struct brw_reg dst, > + struct brw_reg src0, > + struct brw_reg src1) > +{ > + assert(inst->mlen == 0); > + brw_math2(p, dst, brw_math_function(inst->opcode), src0, src1); > +} > + > +void > fs_visitor::generate_math1_gen6(fs_inst *inst, > struct brw_reg dst, > struct brw_reg src0) > @@ -788,7 +813,9 @@ fs_visitor::generate_code() > case SHADER_OPCODE_LOG2: > case SHADER_OPCODE_SIN: > case SHADER_OPCODE_COS: > - if (intel->gen >= 6) { > + if (intel->gen >= 7) { > + generate_math1_gen7(inst, dst, src[0]); > + } else if (intel->gen == 6) { > generate_math1_gen6(inst, dst, src[0]); > } else { > generate_math_gen4(inst, dst, src[0]); > @@ -798,6 +825,8 @@ fs_visitor::generate_code() > case SHADER_OPCODE_INT_REMAINDER: > case SHADER_OPCODE_POW: > if (intel->gen >= 6) { > + generate_math2_gen7(inst, dst, src[0], src[1]); > + } else if (intel->gen == 6) { > generate_math2_gen6(inst, dst, src[0], src[1]); > } else { > generate_math_gen4(inst, dst, src[0]); > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h > b/src/mesa/drivers/dri/i965/brw_vec4.h > index eae841c..4d8bb2d 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4.h > +++ b/src/mesa/drivers/dri/i965/brw_vec4.h > @@ -532,6 +532,10 @@ public: > struct brw_reg dst, > struct brw_reg src0, > struct brw_reg src1); > + void generate_math2_gen7(vec4_instruction *inst, > + struct brw_reg dst, > + struct brw_reg src0, > + struct brw_reg src1); > > void generate_urb_write(vec4_instruction *inst); > void generate_oword_dual_block_offsets(struct brw_reg m1, > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp > index ccbad25..f66091d 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp > @@ -282,6 +282,18 @@ vec4_visitor::generate_math1_gen6(vec4_instruction > *inst, > } > > void > +vec4_visitor::generate_math2_gen7(vec4_instruction *inst, > + struct brw_reg dst, > + struct brw_reg src0, > + struct brw_reg src1) > +{ > + brw_math2(p, > + dst, > + brw_math_function(inst->opcode), > + src0, src1); > +} > + > +void > vec4_visitor::generate_math2_gen6(vec4_instruction *inst, > struct brw_reg dst, > struct brw_reg src0, > @@ -549,9 +561,10 @@ vec4_visitor::generate_vs_instruction(vec4_instruction > *instruction, > case SHADER_OPCODE_LOG2: > case SHADER_OPCODE_SIN: > case SHADER_OPCODE_COS: > - if (intel->gen >= 6) { > + if (intel->gen == 6) { > generate_math1_gen6(inst, dst, src[0]); > } else { > + /* Also works for Gen7. */ > generate_math1_gen4(inst, dst, src[0]); > } > break; > @@ -559,7 +572,9 @@ vec4_visitor::generate_vs_instruction(vec4_instruction > *instruction, > case SHADER_OPCODE_POW: > case SHADER_OPCODE_INT_QUOTIENT: > case SHADER_OPCODE_INT_REMAINDER: > - if (intel->gen >= 6) { > + if (intel->gen >= 7) { > + generate_math2_gen7(inst, dst, src[0], src[1]); > + } else if (intel->gen == 6) { > generate_math2_gen6(inst, dst, src[0], src[1]); > } else { > generate_math2_gen4(inst, dst, src[0], src[1]); > -- > 1.7.7 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > Other than the above comment, this patch is: Reviewed-by: Paul Berry <stereotype...@gmail.com>
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev