We generate silly code for array access, and it's easier to generally
support the cleanup than to specifically avoid the bad code in each
place we might generate it.

Removes 4.6% of instructions from 41.6% of shaders in shader-db,
particularly savage2/hon and unigine.
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp      |   91 +++++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_vec4.h        |    1 +
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp |    1 +
 3 files changed, 93 insertions(+), 0 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 436de2f..5fd4756 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -306,6 +306,97 @@ vec4_visitor::pack_uniform_registers()
    }
 }
 
+static bool
+src_reg_is_zero(src_reg *reg)
+{
+   if (reg->file != IMM)
+      return false;
+
+   if (reg->type == BRW_REGISTER_TYPE_F) {
+      return reg->imm.f == 0.0;
+   } else {
+      return reg->imm.i == 0;
+   }
+}
+
+static bool
+src_reg_is_one(src_reg *reg)
+{
+   if (reg->file != IMM)
+      return false;
+
+   if (reg->type == BRW_REGISTER_TYPE_F) {
+      return reg->imm.f == 1.0;
+   } else {
+      return reg->imm.i == 1;
+   }
+}
+
+/**
+ * Does algebraic optimizations (0 * a = 0, 1 * a = a, a + 0 = a).
+ *
+ * While GLSL IR also performs this optimization, we end up with it in
+ * our instruction stream for a couple of reasons.  One is that we
+ * sometimes generate silly instructions, for example in array access
+ * where we'll generate "ADD offset, index, base" even if base is 0.
+ * The other is that GLSL IR's constant propagation doesn't track the
+ * components of aggregates, so some VS patterns (initialize matrix to
+ * 0, accumulate in vertex blending factors) end up breaking down to
+ * instructions involving 0.
+ */
+bool
+vec4_visitor::opt_algebraic()
+{
+   bool progress = false;
+
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+
+      switch (inst->opcode) {
+      case BRW_OPCODE_ADD:
+        if (src_reg_is_zero(&inst->src[1])) {
+           inst->opcode = BRW_OPCODE_MOV;
+           inst->src[1] = src_reg();
+           progress = true;
+        }
+        break;
+
+      case BRW_OPCODE_MUL:
+        if (src_reg_is_zero(&inst->src[1])) {
+           inst->opcode = BRW_OPCODE_MOV;
+           switch (inst->src[0].type) {
+           case BRW_REGISTER_TYPE_F:
+              inst->src[0] = src_reg(0.0f);
+              break;
+           case BRW_REGISTER_TYPE_D:
+              inst->src[0] = src_reg(0);
+              break;
+           case BRW_REGISTER_TYPE_UD:
+              inst->src[0] = src_reg(0u);
+              break;
+           default:
+              assert(!"not reached");
+              inst->src[0] = src_reg(0.0f);
+              break;
+           }
+           inst->src[1] = src_reg();
+           progress = true;
+        } else if (src_reg_is_one(&inst->src[1])) {
+           inst->opcode = BRW_OPCODE_MOV;
+           inst->src[1] = src_reg();
+        }
+        break;
+      default:
+        break;
+      }
+   }
+
+   if (progress)
+      this->live_intervals_valid = false;
+
+   return progress;
+}
+
 /**
  * Only a limited number of hardware registers may be used for push
  * constants, so this turns access to the overflowed constants into
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index 7739a15..3f116ee 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -401,6 +401,7 @@ public:
    bool dead_code_eliminate();
    bool virtual_grf_interferes(int a, int b);
    bool opt_copy_propagation();
+   bool opt_algebraic();
 
    vec4_instruction *emit(vec4_instruction *inst);
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index c40c41f..7031d2a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -615,6 +615,7 @@ vec4_visitor::run()
       progress = false;
       progress = dead_code_eliminate() || progress;
       progress = opt_copy_propagation() || progress;
+      progress = opt_algebraic() || progress;
    } while (progress);
 
 
-- 
1.7.5.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to