According to the documentation, Ivybridge's math instruction works in SIMD16 mode for the fragment shader, and no longer forbids align16 mode for the vertex shader.
The documentation claims that SIMD16 mode isn't supported for INT DIV, but empirical evidence shows that it works fine. Presumably the note is trying to warn us that the variant that returns both quotient and remainder in (dst, dst + 1) doesn't work in SIMD16 mode since dst + 1 would be sechalf(dst), trashing half your results. Since we don't use that variant, we don't care and can just enable SIMD16 everywhere. The documentation also still claims that source modifiers and conditional modifiers aren't supported, but empirical evidence and study of the simulator both show that they work just fine. Goodbye workarounds. Math just works now. Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 29 +++++++++++++++--------- src/mesa/drivers/dri/i965/brw_fs.cpp | 6 +++- src/mesa/drivers/dri/i965/brw_fs.h | 7 ++++++ src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 31 ++++++++++++++++++++++++++- src/mesa/drivers/dri/i965/brw_vec4.h | 4 +++ src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 19 ++++++++++++++- 6 files changed, 80 insertions(+), 16 deletions(-) Requires my INT DIV patch series to apply cleanly. diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 5c8d63d..258ffbf 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1496,11 +1496,14 @@ void brw_math( struct brw_compile *p, assert(src.file == BRW_GENERAL_REGISTER_FILE); assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); - assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); + if (intel->gen == 6) + assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); - /* Source modifiers are ignored for extended math instructions. */ - assert(!src.negate); - assert(!src.abs); + /* Source modifiers are ignored for extended math instructions on Gen6. */ + if (intel->gen == 6) { + assert(!src.negate); + assert(!src.abs); + } if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT || function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER || @@ -1560,8 +1563,10 @@ void brw_math2(struct brw_compile *p, assert(src1.file == BRW_GENERAL_REGISTER_FILE); assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); - assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); - assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); + if (intel->gen == 6) { + assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); + assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); + } if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT || function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER || @@ -1573,11 +1578,13 @@ void brw_math2(struct brw_compile *p, assert(src1.type == BRW_REGISTER_TYPE_F); } - /* Source modifiers are ignored for extended math instructions. */ - assert(!src0.negate); - assert(!src0.abs); - assert(!src1.negate); - assert(!src1.abs); + /* Source modifiers are ignored for extended math instructions on Gen6. */ + if (intel->gen == 6) { + assert(!src0.negate); + assert(!src0.abs); + assert(!src1.negate); + assert(!src1.abs); + } /* Math is the same ISA format as other opcodes, except that CondModifier * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 2000180..e3c3d85 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -554,7 +554,7 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src) * The hardware ignores source modifiers (negate and abs) on math * instructions, so we also move to a temp to set those up. */ - if (intel->gen >= 6 && (src.file == UNIFORM || + if (intel->gen == 6 && (src.file == UNIFORM || src.abs || src.negate)) { fs_reg expanded = fs_reg(this, glsl_type::float_type); @@ -588,7 +588,9 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) return NULL; } - if (intel->gen >= 6) { + if (intel->gen >= 7) { + inst = emit(opcode, dst, src0, src1); + } else if (intel->gen == 6) { /* Can't do hstride == 0 args to gen6 math, so expand it out. * * The hardware ignores source modifiers (negate and abs) on math diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 56181a3..27b3c22 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -489,6 +489,13 @@ public: void generate_linterp(fs_inst *inst, struct brw_reg dst, struct brw_reg *src); void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src); + void generate_math1_gen7(fs_inst *inst, + struct brw_reg dst, + struct brw_reg src); + void generate_math2_gen7(fs_inst *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1); void generate_math1_gen6(fs_inst *inst, struct brw_reg dst, struct brw_reg src); diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 4c158fe..9697762 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -143,6 +143,31 @@ fs_visitor::generate_linterp(fs_inst *inst, } void +fs_visitor::generate_math1_gen7(fs_inst *inst, + struct brw_reg dst, + struct brw_reg src0) +{ + assert(inst->mlen == 0); + brw_math(p, dst, + brw_math_function(inst->opcode), + inst->saturate ? BRW_MATH_SATURATE_SATURATE + : BRW_MATH_SATURATE_NONE, + 0, src0, + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); +} + +void +fs_visitor::generate_math2_gen7(fs_inst *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1) +{ + assert(inst->mlen == 0); + brw_math2(p, dst, brw_math_function(inst->opcode), src0, src1); +} + +void fs_visitor::generate_math1_gen6(fs_inst *inst, struct brw_reg dst, struct brw_reg src0) @@ -788,7 +813,9 @@ fs_visitor::generate_code() case SHADER_OPCODE_LOG2: case SHADER_OPCODE_SIN: case SHADER_OPCODE_COS: - if (intel->gen >= 6) { + if (intel->gen >= 7) { + generate_math1_gen7(inst, dst, src[0]); + } else if (intel->gen == 6) { generate_math1_gen6(inst, dst, src[0]); } else { generate_math_gen4(inst, dst, src[0]); @@ -798,6 +825,8 @@ fs_visitor::generate_code() case SHADER_OPCODE_INT_REMAINDER: case SHADER_OPCODE_POW: if (intel->gen >= 6) { + generate_math2_gen7(inst, dst, src[0], src[1]); + } else if (intel->gen == 6) { generate_math2_gen6(inst, dst, src[0], src[1]); } else { generate_math_gen4(inst, dst, src[0]); diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 5f44268..bcfedb6 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -532,6 +532,10 @@ public: struct brw_reg dst, struct brw_reg src0, struct brw_reg src1); + void generate_math2_gen7(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1); void generate_urb_write(vec4_instruction *inst); void generate_oword_dual_block_offsets(struct brw_reg m1, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index f6555ea..b956086 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -282,6 +282,18 @@ vec4_visitor::generate_math1_gen6(vec4_instruction *inst, } void +vec4_visitor::generate_math2_gen7(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1) +{ + brw_math2(p, + dst, + brw_math_function(inst->opcode), + src0, src1); +} + +void vec4_visitor::generate_math2_gen6(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src0, @@ -549,9 +561,10 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction, case SHADER_OPCODE_LOG2: case SHADER_OPCODE_SIN: case SHADER_OPCODE_COS: - if (intel->gen >= 6) { + if (intel->gen == 6) { generate_math1_gen6(inst, dst, src[0]); } else { + /* Also works for Gen7. */ generate_math1_gen4(inst, dst, src[0]); } break; @@ -559,7 +572,9 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction, case SHADER_OPCODE_POW: case SHADER_OPCODE_INT_QUOTIENT: case SHADER_OPCODE_INT_REMAINDER: - if (intel->gen >= 6) { + if (intel->gen >= 7) { + generate_math2_gen7(inst, dst, src[0], src[1]); + } else if (intel->gen == 6) { generate_math2_gen6(inst, dst, src[0], src[1]); } else { generate_math2_gen4(inst, dst, src[0], src[1]); -- 1.7.6.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev