[Mesa-dev] [PATCH v2] mesa/st: provide native integers implementation of ir_unop_any

Ilia Mirkin Thu, 08 May 2014 16:53:25 -0700

Previously, ir_unop_any was implemented via a dot-product call, which
uses floating point multiplication and addition. The multiplication was
completely pointless, and the addition can just as well be done with an
or. Since we know that the inputs are booleans, they must already be in
canonical 0/~0 format, and the final SNE can also be avoided.


Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
---

v1 -> v2:
 - set swizzle mask on accum
 - set swizzle mask on op

Roland, Marek, the changes are relatively small compared to v1, but would be
nice to have another look if you don't mind.

Amazingly, only one test failed without those -- glsl-fs-any. All the other
any_*'s don't need this logic because they start out with a FSNE/etc which
takes care of the op's swizzles.

This is what the glsl-fs-any shader looks like:

uniform vec2 args;

void main()
{
        bvec2 argsb = bvec2(args);

        bvec2 v_true = bvec2(argsb.xx);
        bvec2 v_some = bvec2(argsb.xy);
        bvec2 v_none = bvec2(argsb.yy);
        bool true1 = any(v_true);
        bool true2 = any(v_some);
        bool false1 = any(v_none);
        gl_FragColor = vec4(float(true1), float(true2), float(false1), 0.0);
}

FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL OUT[0], COLOR
DCL CONST[0]
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 {    0.0000,     1.0000,     0.0000,     0.0000}
  0: FSNE TEMP[0].xy, CONST[0].xyyy, IMM[0].xxxx
  1: MOV TEMP[1].w, IMM[0].xxxx
  2: OR TEMP[2].x, TEMP[0].xxxx, TEMP[0].xxxx
  3: AND TEMP[1].x, TEMP[2].xxxx, IMM[0].yyyy
  4: OR TEMP[2].x, TEMP[0].xxxx, TEMP[0].yyyy
  5: AND TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy
  6: MOV TEMP[1].y, TEMP[2].xxxx
  7: OR TEMP[0].x, TEMP[0].yyyy, TEMP[0].yyyy
  8: AND TEMP[0].x, TEMP[0].xxxx, IMM[0].yyyy
  9: MOV TEMP[1].z, TEMP[0].xxxx
 10: MOV OUT[0], TEMP[1]
 11: END

Any half-decent optimizing backend should have no trouble removing the "or a,
a" types of things too, whereas that might have been trickier with a DP2 +
FSNE.

 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 100 ++++++++++++++++++++++-------
 1 file changed, 76 insertions(+), 24 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index bdee1f4..1a37ba9 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1671,30 +1671,82 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
    case ir_unop_any: {
       assert(ir->operands[0]->type->is_vector());
 
-      /* After the dot-product, the value will be an integer on the
-       * range [0,4].  Zero stays zero, and positive values become 1.0.
-       */
-      glsl_to_tgsi_instruction *const dp =
-         emit_dp(ir, result_dst, op[0], op[0],
-                 ir->operands[0]->type->vector_elements);
-      if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
-          result_dst.type == GLSL_TYPE_FLOAT) {
-             /* The clamping to [0,1] can be done for free in the fragment
-              * shader with a saturate.
-              */
-             dp->saturate = true;
-      } else if (result_dst.type == GLSL_TYPE_FLOAT) {
-             /* Negating the result of the dot-product gives values on the 
range
-              * [-4, 0].  Zero stays zero, and negative values become 1.0.  
This
-              * is achieved using SLT.
-              */
-             st_src_reg slt_src = result_src;
-             slt_src.negate = ~slt_src.negate;
-             emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, 
st_src_reg_for_float(0.0));
-      }
-      else {
-         /* Use SNE 0 if integers are being used as boolean values. */
-         emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, 
st_src_reg_for_int(0));
+      if (native_integers) {
+         int dst_swizzle = 0, op0_swizzle, i;
+         st_src_reg accum = op[0];
+
+         op0_swizzle = op[0].swizzle;
+         accum.swizzle = MAKE_SWIZZLE4(GET_SWZ(op0_swizzle, 0),
+                                       GET_SWZ(op0_swizzle, 0),
+                                       GET_SWZ(op0_swizzle, 0),
+                                       GET_SWZ(op0_swizzle, 0));
+         for (i = 0; i < 4; i++) {
+            if (result_dst.writemask & (1 << i)) {
+               dst_swizzle = MAKE_SWIZZLE4(i, i, i, i);
+               break;
+            }
+         }
+         assert(i != 4);
+         assert(ir->operands[0]->type->is_boolean());
+
+         /* OR all the components together, since they should be either 0 or ~0
+          */
+         switch (ir->operands[0]->type->vector_elements) {
+         case 4:
+            op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(op0_swizzle, 3),
+                                          GET_SWZ(op0_swizzle, 3),
+                                          GET_SWZ(op0_swizzle, 3),
+                                          GET_SWZ(op0_swizzle, 3));
+            emit(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]);
+            accum = st_src_reg(result_dst);
+            accum.swizzle = dst_swizzle;
+            /* fallthrough */
+         case 3:
+            op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(op0_swizzle, 2),
+                                          GET_SWZ(op0_swizzle, 2),
+                                          GET_SWZ(op0_swizzle, 2),
+                                          GET_SWZ(op0_swizzle, 2));
+            emit(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]);
+            accum = st_src_reg(result_dst);
+            accum.swizzle = dst_swizzle;
+            /* fallthrough */
+         case 2:
+            op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(op0_swizzle, 1),
+                                          GET_SWZ(op0_swizzle, 1),
+                                          GET_SWZ(op0_swizzle, 1),
+                                          GET_SWZ(op0_swizzle, 1));
+            emit(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]);
+            break;
+         default:
+            assert(!"Unexpected vector size");
+            break;
+         }
+      } else {
+         /* After the dot-product, the value will be an integer on the
+          * range [0,4].  Zero stays zero, and positive values become 1.0.
+          */
+         glsl_to_tgsi_instruction *const dp =
+            emit_dp(ir, result_dst, op[0], op[0],
+                    ir->operands[0]->type->vector_elements);
+         if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
+             result_dst.type == GLSL_TYPE_FLOAT) {
+            /* The clamping to [0,1] can be done for free in the fragment
+             * shader with a saturate.
+             */
+            dp->saturate = true;
+         } else if (result_dst.type == GLSL_TYPE_FLOAT) {
+            /* Negating the result of the dot-product gives values on the range
+             * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
+             * is achieved using SLT.
+             */
+            st_src_reg slt_src = result_src;
+            slt_src.negate = ~slt_src.negate;
+            emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, 
st_src_reg_for_float(0.0));
+         }
+         else {
+            /* Use SNE 0 if integers are being used as boolean values. */
+            emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, 
st_src_reg_for_int(0));
+         }
       }
       break;
    }
-- 
1.8.3.2

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2] mesa/st: provide native integers implementation of ir_unop_any

Reply via email to