If mid3 is called with two constants, the resulting IR was two maxes and three
mins, when one max and one min would have sufficed. Make mid3() produce an
ir_expression with ir_triop_mid3 (new ir_expression operation) and lower it in
a lower_instructions pass to the needed amount of mins and maxs.

Tested on i965/Haswell.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=76861
Signed-off-by: Petri Latvala <petri.latv...@intel.com>
---

For the record, tested this with the following shader:


#extension GL_AMD_shader_trinary_minmax : require

uniform float zero;
uniform float one;
uniform float middle;

float test_all_constants()
{
        return mid3(0.0, 1.0, 0.5);
}

float test_two_constants()
{
        return mid3(0.5, one, 0.0);
}

float test_one_constant()
{
        return mid3(one, zero, 0.5);
}

float test_no_constants()
{
        return mid3(middle, one, zero);
}

void main()
{
        float r = test_all_constants();
        float g = test_two_constants();
        float b = test_one_constant();
        float a = test_no_constants();

        gl_FragColor = vec4(r, g, b, a);
}


total instructions in shared programs: 61 -> 57 (-6.56%)
instructions in affected programs:     56 -> 52 (-7.14%)


Existing piglit tests didn't stress the two-constants case at all so
no results from there. Other than all tests passing, naturally.



 src/glsl/builtin_functions.cpp                     |   2 +-
 src/glsl/ir.cpp                                    |   2 +
 src/glsl/ir.h                                      |   9 +-
 src/glsl/ir_constant_expression.cpp                |  22 ++++
 src/glsl/ir_optimization.h                         |   1 +
 src/glsl/ir_validate.cpp                           |   6 ++
 src/glsl/lower_instructions.cpp                    | 112 +++++++++++++++++++++
 .../dri/i965/brw_fs_channel_expressions.cpp        |   1 +
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp       |   6 ++
 src/mesa/drivers/dri/i965/brw_shader.cpp           |   3 +-
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp     |   3 +
 src/mesa/main/macros.h                             |   3 +
 src/mesa/program/ir_to_mesa.cpp                    |   5 +
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp         |   2 +
 14 files changed, 174 insertions(+), 3 deletions(-)

diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index 3991f9d..12bbfe0 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -4260,7 +4260,7 @@ builtin_builder::_mid3(const glsl_type *type)
    ir_variable *z = in_var(type, "z");
    MAKE_SIG(type, shader_trinary_minmax, 3, x, y, z);
 
-   ir_expression *mid3 = max2(min2(x, y), max2(min2(x, z), min2(y, z)));
+   ir_expression *mid3 = expr(ir_triop_mid3, x, y, z);
    body.emit(ret(mid3));
 
    return sig;
diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index 1a18b47..bc585d6 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -436,6 +436,7 @@ ir_expression::ir_expression(int op, ir_rvalue *op0, 
ir_rvalue *op1,
    case ir_triop_lrp:
    case ir_triop_bitfield_extract:
    case ir_triop_vector_insert:
+   case ir_triop_mid3:
       this->type = op0->type;
       break;
 
@@ -566,6 +567,7 @@ static const char *const operator_strs[] = {
    "bfi",
    "bitfield_extract",
    "vector_insert",
+   "mid3",
    "bitfield_insert",
    "vector",
 };
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 6c7c60a..399a4ce 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -1390,9 +1390,16 @@ enum ir_expression_operation {
    ir_triop_vector_insert,
 
    /**
+    * \name Yield the per-component median of three values, part of 
AMD_shader_trinary_minmax.
+    */
+   /*@{*/
+   ir_triop_mid3,
+   /*@}*/
+
+   /**
     * A sentinel marking the last of the ternary operations.
     */
-   ir_last_triop = ir_triop_vector_insert,
+   ir_last_triop = ir_triop_mid3,
 
    ir_quadop_bitfield_insert,
 
diff --git a/src/glsl/ir_constant_expression.cpp 
b/src/glsl/ir_constant_expression.cpp
index 8afe8f7..1d4c0e5 100644
--- a/src/glsl/ir_constant_expression.cpp
+++ b/src/glsl/ir_constant_expression.cpp
@@ -1575,6 +1575,28 @@ ir_expression::constant_expression_value(struct 
hash_table *variable_context)
       break;
    }
 
+   case ir_triop_mid3: {
+      assert(op[0]->type == op[1]->type);
+      assert(op[0]->type == op[2]->type);
+
+      for (unsigned c = 0; c < components; c++) {
+        switch (op[0]->type->base_type) {
+        case GLSL_TYPE_UINT:
+           data.u[c] = MID3(op[0]->value.u[c], op[1]->value.u[c], 
op[2]->value.u[c]);
+           break;
+        case GLSL_TYPE_INT:
+           data.i[c] = MID3(op[0]->value.i[c], op[1]->value.i[c], 
op[2]->value.i[c]);
+           break;
+        case GLSL_TYPE_FLOAT:
+           data.f[c] = MID3(op[0]->value.f[c], op[1]->value.f[c], 
op[2]->value.f[c]);
+           break;
+        default:
+           assert(0);
+        }
+      }
+      break;
+   }
+
    case ir_quadop_bitfield_insert: {
       int offset = op[2]->value.i[0];
       int bits = op[3]->value.i[0];
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 40bb613..bea5ba0 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -38,6 +38,7 @@
 #define INT_DIV_TO_MUL_RCP 0x40
 #define BITFIELD_INSERT_TO_BFM_BFI 0x80
 #define LDEXP_TO_ARITH     0x100
+#define MID3_TO_MIN_MAX    0x200
 
 /**
  * \see class lower_packing_builtins_visitor
diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp
index 71defc8..67c711b 100644
--- a/src/glsl/ir_validate.cpp
+++ b/src/glsl/ir_validate.cpp
@@ -553,6 +553,12 @@ ir_validate::visit_leave(ir_expression *ir)
       assert(ir->type == ir->operands[0]->type);
       break;
 
+   case ir_triop_mid3:
+      assert(ir->operands[0]->type == ir->type);
+      assert(ir->operands[1]->type == ir->type);
+      assert(ir->operands[2]->type == ir->type);
+      break;
+
    case ir_quadop_bitfield_insert:
       assert(ir->operands[0]->type == ir->type);
       assert(ir->operands[1]->type == ir->type);
diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp
index 49316d0..f42e217 100644
--- a/src/glsl/lower_instructions.cpp
+++ b/src/glsl/lower_instructions.cpp
@@ -39,6 +39,7 @@
  * - MOD_TO_FRACT
  * - LDEXP_TO_ARITH
  * - BITFIELD_INSERT_TO_BFM_BFI
+ * - MID3_TO_MIN_MAX
  *
  * SUB_TO_ADD_NEG:
  * ---------------
@@ -94,6 +95,11 @@
  * Many GPUs implement the bitfieldInsert() built-in from ARB_gpu_shader_5
  * with a pair of instructions.
  *
+ * MID3_TO_MIN_MAX:
+ * ----------------
+ * Many GPUs don't have native a mid3 instructions. For these GPUs, convert
+ * ir_triop_mid3(x, y, z) to max(min(x, y), max(min(x, z), min(y, z))).
+ *
  */
 
 #include "main/core.h" /* for M_LOG2E */
@@ -127,6 +133,7 @@ private:
    void log_to_log2(ir_expression *);
    void bitfield_insert_to_bfm_bfi(ir_expression *);
    void ldexp_to_arith(ir_expression *);
+   void mid3_to_min_max(ir_expression *);
 };
 
 } /* anonymous namespace */
@@ -436,6 +443,106 @@ lower_instructions_visitor::ldexp_to_arith(ir_expression 
*ir)
    this->progress = true;
 }
 
+void
+lower_instructions_visitor::mid3_to_min_max(ir_expression *ir)
+{
+   /* Translates
+    *    mid3 x y z
+    * into
+    *    max(min(x, y), max(min(x, z), min(y, z)))
+    *
+    * If two of the operands are constants, instead translate to
+    *    clamp(x, y, z)
+    * or rather,
+    *    min(max(x, y), z)
+    *
+    * where y and z contain the lower and higher vector components of the
+    * constants, respectively.
+    *
+    * If all three operands are constants, the former translation is done, and
+    * constant folding optimization will handle it.
+    */
+
+   assert(ir->operation == ir_triop_mid3);
+   assert(ir->get_num_operands() == 3);
+
+   ir_rvalue *nonconst = NULL;
+   ir_constant *constants[3] = { 0 };
+   unsigned num_constants = 0;
+
+   for (unsigned i = 0; i < 3; ++i) {
+      if (ir_constant *c = ir->operands[i]->constant_expression_value()) {
+         constants[num_constants++] = c;
+      } else {
+         nonconst = ir->operands[i];
+      }
+   }
+
+   if (num_constants == 2) {
+      ir_constant_data data[2];
+
+      memset(&data, 0, sizeof(data));
+
+      assert(nonconst != NULL);
+      assert(constants[0]->type == constants[1]->type);
+      assert(constants[0]->type == ir->type);
+
+      for (unsigned i = 0; i < constants[0]->type->components(); ++i) {
+         switch (constants[0]->type->base_type) {
+         case GLSL_TYPE_UINT:
+            data[0].u[i] = MIN2(constants[0]->value.u[i], 
constants[1]->value.u[i]);
+            data[1].u[i] = MAX2(constants[0]->value.u[i], 
constants[1]->value.u[i]);
+            break;
+         case GLSL_TYPE_INT:
+            data[0].i[i] = MIN2(constants[0]->value.i[i], 
constants[1]->value.i[i]);
+            data[1].i[i] = MAX2(constants[0]->value.i[i], 
constants[1]->value.i[i]);
+            break;
+         case GLSL_TYPE_FLOAT:
+            data[0].f[i] = MIN2(constants[0]->value.f[i], 
constants[1]->value.f[i]);
+            data[1].f[i] = MAX2(constants[0]->value.f[i], 
constants[1]->value.f[i]);
+            break;
+         default:
+            /* unreachable */
+            assert(0);
+         }
+      }
+
+      /* c1 is the lower valued constant, c2 is the higher */
+      ir_constant *c1 = new(ir) ir_constant(ir->type, &data[0]);
+      ir_constant *c2 = new(ir) ir_constant(ir->type, &data[1]);
+
+      ir_expression *exprmax = new(ir) ir_expression(ir_binop_max, nonconst, 
c1);
+      ir->operation = ir_binop_min;
+      ir->operands[0] = exprmax;
+      ir->operands[1] = c2;
+
+      this->progress = true;
+      return;
+   }
+
+   ir_rvalue *x = ir->operands[0];
+   ir_rvalue *y = ir->operands[1];
+   ir_rvalue *z = ir->operands[2];
+
+   ir_rvalue *x2 = x->clone(ir, NULL);
+   ir_rvalue *y2 = y->clone(ir, NULL);
+   ir_rvalue *z2 = z->clone(ir, NULL);
+
+   ir_expression *firstmin = new(ir) ir_expression(ir_binop_min, x, y);
+   ir_expression *secondmin = new(ir) ir_expression(ir_binop_min, x2, z);
+   ir_expression *thirdmin = new(ir) ir_expression(ir_binop_min, y2, z2);
+
+   ir_expression *secondmax = new(ir) ir_expression(ir_binop_max, secondmin, 
thirdmin);
+
+   ir->operation = ir_binop_max;
+   ir->operands[0] = firstmin;
+   ir->operands[1] = secondmax;
+
+   this->progress = true;
+   return;
+}
+
+
 ir_visitor_status
 lower_instructions_visitor::visit_leave(ir_expression *ir)
 {
@@ -482,6 +589,11 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
          ldexp_to_arith(ir);
       break;
 
+   case ir_triop_mid3:
+      if (lowering(MID3_TO_MIN_MAX))
+         mid3_to_min_max(ir);
+      break;
+
    default:
       return visit_continue;
    }
diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
index ae5bc56..db33b68 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
@@ -410,6 +410,7 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment 
*ir)
    case ir_binop_ldexp:
    case ir_binop_vector_extract:
    case ir_triop_vector_insert:
+   case ir_triop_mid3:
    case ir_quadop_bitfield_insert:
    case ir_quadop_vector:
       assert(!"should have been lowered");
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 2aa3acd..7488c8e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -808,7 +808,13 @@ fs_visitor::visit(ir_expression *ir)
       inst = emit(BRW_OPCODE_SEL, this->result, op[1], op[2]);
       inst->predicate = BRW_PREDICATE_NORMAL;
       break;
+
+   case ir_triop_mid3:
+      assert(!"not reached: should be handled by "
+              "lower_instructions::mid3_to_min_max");
+      break;
    }
+
 }
 
 void
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 6e74803..604fdb7 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -154,7 +154,8 @@ brw_link_shader(struct gl_context *ctx, struct 
gl_shader_program *shProg)
                         EXP_TO_EXP2 |
                         LOG_TO_LOG2 |
                          bitfield_insert |
-                         LDEXP_TO_ARITH);
+                         LDEXP_TO_ARITH |
+                         MID3_TO_MIN_MAX);
 
       /* Pre-gen6 HW can only nest if-statements 16 deep.  Beyond this,
        * if-statements need to be flattened.
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 8fa0aee..0aa8975 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1721,6 +1721,9 @@ vec4_visitor::visit(ir_expression *ir)
    case ir_binop_ldexp:
       assert(!"not reached: should be handled by ldexp_to_arith()");
       break;
+   case ir_triop_mid3:
+      assert(!"not reached: should be handled by mid3_to_min_max()");
+      break;
    }
 }
 
diff --git a/src/mesa/main/macros.h b/src/mesa/main/macros.h
index 5228c3a..12ad287 100644
--- a/src/mesa/main/macros.h
+++ b/src/mesa/main/macros.h
@@ -678,6 +678,9 @@ INTERP_4F(GLfloat t, GLfloat dst[4], const GLfloat out[4], 
const GLfloat in[4])
 #define MIN3( A, B, C ) ((A) < (B) ? MIN2(A, C) : MIN2(B, C))
 #define MAX3( A, B, C ) ((A) > (B) ? MAX2(A, C) : MAX2(B, C))
 
+/** Median of three values: */
+#define MID3( A, B, C ) ((A) < (B) ? CLAMP(C, A, B) : CLAMP(C, B, A))
+
 static inline unsigned
 minify(unsigned value, unsigned levels)
 {
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 59cf123..88e2073 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -1450,6 +1450,10 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
       emit(ir, OPCODE_LRP, result_dst, op[2], op[1], op[0]);
       break;
 
+   case ir_triop_mid3:
+      assert(!"not reached: should be handled by mid3_to_min_max");
+      break;
+
    case ir_binop_vector_extract:
    case ir_binop_bfm:
    case ir_triop_fma:
@@ -3002,6 +3006,7 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
         do_mat_op_to_vec(ir);
         lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
                                 | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP
+                                | MID3_TO_MIN_MAX
                                 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
 
         progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, 
options->EmitNoCont, options->EmitNoLoops) || progress;
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index d1c3856..4a61b86 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -2001,6 +2001,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
    case ir_binop_ldexp:
    case ir_binop_carry:
    case ir_binop_borrow:
+   case ir_triop_mid3:
       /* This operation is not supported, or should have already been handled.
        */
       assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()");
@@ -5396,6 +5397,7 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
                          EXP_TO_EXP2 |
                          LOG_TO_LOG2 |
                          LDEXP_TO_ARITH |
+                         MID3_TO_MIN_MAX |
                          (options->EmitNoPow ? POW_TO_EXP2 : 0) |
                          (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 
0));
 
-- 
1.9.0

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to