Previously, ir_unop_any was implemented via a dot-product call, which uses floating point multiplication and addition. The multiplication was completely pointless, and the addition can just as well be done with an or. Since we know that the inputs are booleans, they must already be in canonical 0/~0 format, and the final SNE can also be avoided.
Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> --- v1 -> v2: - set swizzle mask on accum - set swizzle mask on op Roland, Marek, the changes are relatively small compared to v1, but would be nice to have another look if you don't mind. Amazingly, only one test failed without those -- glsl-fs-any. All the other any_*'s don't need this logic because they start out with a FSNE/etc which takes care of the op's swizzles. This is what the glsl-fs-any shader looks like: uniform vec2 args; void main() { bvec2 argsb = bvec2(args); bvec2 v_true = bvec2(argsb.xx); bvec2 v_some = bvec2(argsb.xy); bvec2 v_none = bvec2(argsb.yy); bool true1 = any(v_true); bool true2 = any(v_some); bool false1 = any(v_none); gl_FragColor = vec4(float(true1), float(true2), float(false1), 0.0); } FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL OUT[0], COLOR DCL CONST[0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: FSNE TEMP[0].xy, CONST[0].xyyy, IMM[0].xxxx 1: MOV TEMP[1].w, IMM[0].xxxx 2: OR TEMP[2].x, TEMP[0].xxxx, TEMP[0].xxxx 3: AND TEMP[1].x, TEMP[2].xxxx, IMM[0].yyyy 4: OR TEMP[2].x, TEMP[0].xxxx, TEMP[0].yyyy 5: AND TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy 6: MOV TEMP[1].y, TEMP[2].xxxx 7: OR TEMP[0].x, TEMP[0].yyyy, TEMP[0].yyyy 8: AND TEMP[0].x, TEMP[0].xxxx, IMM[0].yyyy 9: MOV TEMP[1].z, TEMP[0].xxxx 10: MOV OUT[0], TEMP[1] 11: END Any half-decent optimizing backend should have no trouble removing the "or a, a" types of things too, whereas that might have been trickier with a DP2 + FSNE. src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 100 ++++++++++++++++++++++------- 1 file changed, 76 insertions(+), 24 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index bdee1f4..1a37ba9 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1671,30 +1671,82 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) case ir_unop_any: { assert(ir->operands[0]->type->is_vector()); - /* After the dot-product, the value will be an integer on the - * range [0,4]. Zero stays zero, and positive values become 1.0. - */ - glsl_to_tgsi_instruction *const dp = - emit_dp(ir, result_dst, op[0], op[0], - ir->operands[0]->type->vector_elements); - if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB && - result_dst.type == GLSL_TYPE_FLOAT) { - /* The clamping to [0,1] can be done for free in the fragment - * shader with a saturate. - */ - dp->saturate = true; - } else if (result_dst.type == GLSL_TYPE_FLOAT) { - /* Negating the result of the dot-product gives values on the range - * [-4, 0]. Zero stays zero, and negative values become 1.0. This - * is achieved using SLT. - */ - st_src_reg slt_src = result_src; - slt_src.negate = ~slt_src.negate; - emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); - } - else { - /* Use SNE 0 if integers are being used as boolean values. */ - emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0)); + if (native_integers) { + int dst_swizzle = 0, op0_swizzle, i; + st_src_reg accum = op[0]; + + op0_swizzle = op[0].swizzle; + accum.swizzle = MAKE_SWIZZLE4(GET_SWZ(op0_swizzle, 0), + GET_SWZ(op0_swizzle, 0), + GET_SWZ(op0_swizzle, 0), + GET_SWZ(op0_swizzle, 0)); + for (i = 0; i < 4; i++) { + if (result_dst.writemask & (1 << i)) { + dst_swizzle = MAKE_SWIZZLE4(i, i, i, i); + break; + } + } + assert(i != 4); + assert(ir->operands[0]->type->is_boolean()); + + /* OR all the components together, since they should be either 0 or ~0 + */ + switch (ir->operands[0]->type->vector_elements) { + case 4: + op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(op0_swizzle, 3), + GET_SWZ(op0_swizzle, 3), + GET_SWZ(op0_swizzle, 3), + GET_SWZ(op0_swizzle, 3)); + emit(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]); + accum = st_src_reg(result_dst); + accum.swizzle = dst_swizzle; + /* fallthrough */ + case 3: + op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(op0_swizzle, 2), + GET_SWZ(op0_swizzle, 2), + GET_SWZ(op0_swizzle, 2), + GET_SWZ(op0_swizzle, 2)); + emit(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]); + accum = st_src_reg(result_dst); + accum.swizzle = dst_swizzle; + /* fallthrough */ + case 2: + op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(op0_swizzle, 1), + GET_SWZ(op0_swizzle, 1), + GET_SWZ(op0_swizzle, 1), + GET_SWZ(op0_swizzle, 1)); + emit(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]); + break; + default: + assert(!"Unexpected vector size"); + break; + } + } else { + /* After the dot-product, the value will be an integer on the + * range [0,4]. Zero stays zero, and positive values become 1.0. + */ + glsl_to_tgsi_instruction *const dp = + emit_dp(ir, result_dst, op[0], op[0], + ir->operands[0]->type->vector_elements); + if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB && + result_dst.type == GLSL_TYPE_FLOAT) { + /* The clamping to [0,1] can be done for free in the fragment + * shader with a saturate. + */ + dp->saturate = true; + } else if (result_dst.type == GLSL_TYPE_FLOAT) { + /* Negating the result of the dot-product gives values on the range + * [-4, 0]. Zero stays zero, and negative values become 1.0. This + * is achieved using SLT. + */ + st_src_reg slt_src = result_src; + slt_src.negate = ~slt_src.negate; + emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); + } + else { + /* Use SNE 0 if integers are being used as boolean values. */ + emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0)); + } } break; } -- 1.8.3.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev