On 09/22/2012 02:04 PM, Eric Anholt wrote:
This means that we don't get constant prop across into the first block after a
BRW_OPCODE_IF or a BRW_OPCODE_DO, but we have hope for properly doing it
across control flow at some point.  More importantly, it avoids the O(n^2)
with instruction count runtime for shaders that have many constant moves.
---
  src/mesa/drivers/dri/i965/brw_fs.cpp               |  163 --------------------
  src/mesa/drivers/dri/i965/brw_fs.h                 |    2 +-
  .../drivers/dri/i965/brw_fs_copy_propagation.cpp   |  124 ++++++++++++++-
  3 files changed, 124 insertions(+), 165 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 2701413..0545a74 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1249,168 +1249,6 @@ fs_visitor::setup_pull_constants()
     c->prog_data.nr_pull_params = pull_uniform_count;
  }

-/**
- * Attempts to move immediate constants into the immediate
- * constant slot of following instructions.
- *
- * Immediate constants are a bit tricky -- they have to be in the last
- * operand slot, you can't do abs/negate on them,
- */
-
-bool
-fs_visitor::propagate_constants()
-{
-   bool progress = false;
-
-   calculate_live_intervals();
-
-   foreach_list(node, &this->instructions) {
-      fs_inst *inst = (fs_inst *)node;
-
-      if (inst->opcode != BRW_OPCODE_MOV ||
-         inst->predicated ||
-         inst->dst.file != GRF || inst->src[0].file != IMM ||
-         inst->dst.type != inst->src[0].type ||
-         (c->dispatch_width == 16 &&
-          (inst->force_uncompressed || inst->force_sechalf)))
-        continue;
-
-      /* Don't bother with cases where we should have had the
-       * operation on the constant folded in GLSL already.
-       */
-      if (inst->saturate)
-        continue;
-
-      /* Found a move of a constant to a GRF.  Find anything else using the GRF
-       * before it's written, and replace it with the constant if we can.
-       */
-      for (fs_inst *scan_inst = (fs_inst *)inst->next;
-          !scan_inst->is_tail_sentinel();
-          scan_inst = (fs_inst *)scan_inst->next) {
-        if (scan_inst->opcode == BRW_OPCODE_DO ||
-            scan_inst->opcode == BRW_OPCODE_WHILE ||
-            scan_inst->opcode == BRW_OPCODE_ELSE ||
-            scan_inst->opcode == BRW_OPCODE_ENDIF) {
-           break;
-        }
-
-        for (int i = 2; i >= 0; i--) {
-           if (scan_inst->src[i].file != GRF ||
-               scan_inst->src[i].reg != inst->dst.reg ||
-               scan_inst->src[i].reg_offset != inst->dst.reg_offset)
-              continue;
-
-           /* Don't bother with cases where we should have had the
-            * operation on the constant folded in GLSL already.
-            */
-           if (scan_inst->src[i].negate || scan_inst->src[i].abs)
-              continue;
-
-           switch (scan_inst->opcode) {
-           case BRW_OPCODE_MOV:
-              scan_inst->src[i] = inst->src[0];
-              progress = true;
-              break;
-
-           case BRW_OPCODE_MUL:
-           case BRW_OPCODE_ADD:
-              if (i == 1) {
-                 scan_inst->src[i] = inst->src[0];
-                 progress = true;
-              } else if (i == 0 && scan_inst->src[1].file != IMM) {
-                 /* Fit this constant in by commuting the operands.
-                  * Exception: we can't do this for 32-bit integer MUL
-                  * because it's asymmetric.
-                  */
-                 if (scan_inst->opcode == BRW_OPCODE_MUL &&
-                     (scan_inst->src[1].type == BRW_REGISTER_TYPE_D ||
-                      scan_inst->src[1].type == BRW_REGISTER_TYPE_UD))
-                    break;
-                 scan_inst->src[0] = scan_inst->src[1];
-                 scan_inst->src[1] = inst->src[0];
-                 progress = true;
-              }
-              break;
-
-           case BRW_OPCODE_CMP:
-           case BRW_OPCODE_IF:
-              if (i == 1) {
-                 scan_inst->src[i] = inst->src[0];
-                 progress = true;
-              } else if (i == 0 && scan_inst->src[1].file != IMM) {
-                 uint32_t new_cmod;
-
-                 new_cmod = brw_swap_cmod(scan_inst->conditional_mod);
-                 if (new_cmod != ~0u) {
-                    /* Fit this constant in by swapping the operands and
-                     * flipping the test
-                     */
-                    scan_inst->src[0] = scan_inst->src[1];
-                    scan_inst->src[1] = inst->src[0];
-                    scan_inst->conditional_mod = new_cmod;
-                    progress = true;
-                 }
-              }
-              break;
-
-           case BRW_OPCODE_SEL:
-              if (i == 1) {
-                 scan_inst->src[i] = inst->src[0];
-                 progress = true;
-              } else if (i == 0 && scan_inst->src[1].file != IMM) {
-                 scan_inst->src[0] = scan_inst->src[1];
-                 scan_inst->src[1] = inst->src[0];
-
-                 /* If this was predicated, flipping operands means
-                  * we also need to flip the predicate.
-                  */
-                 if (scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) {
-                    scan_inst->predicate_inverse =
-                       !scan_inst->predicate_inverse;
-                 }
-                 progress = true;
-              }
-              break;
-
-           case SHADER_OPCODE_RCP:
-              /* The hardware doesn't do math on immediate values
-               * (because why are you doing that, seriously?), but
-               * the correct answer is to just constant fold it
-               * anyway.
-               */
-              assert(i == 0);
-              if (inst->src[0].imm.f != 0.0f) {
-                 scan_inst->opcode = BRW_OPCODE_MOV;
-                 scan_inst->src[0] = inst->src[0];
-                 scan_inst->src[0].imm.f = 1.0f / scan_inst->src[0].imm.f;
-                 progress = true;
-              }
-              break;
-
-            case FS_OPCODE_PULL_CONSTANT_LOAD:
-              scan_inst->src[i] = inst->src[0];
-              progress = true;
-              break;
-
-           default:
-              break;
-           }
-        }
-
-        if (scan_inst->dst.file == GRF &&
-             scan_inst->overwrites_reg(inst->dst)) {
-           break;
-        }
-      }
-   }
-
-   if (progress)
-       this->live_intervals_valid = false;
-
-   return progress;
-}
-
-
  bool
  fs_visitor::opt_algebraic()
  {
@@ -2025,7 +1863,6 @@ fs_visitor::run()

         progress = remove_duplicate_mrf_writes() || progress;

-        progress = propagate_constants() || progress;
         progress = opt_algebraic() || progress;
         progress = opt_cse() || progress;
         progress = opt_copy_propagate() || progress;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index 9fbb8e5..59a0e50 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -240,12 +240,12 @@ public:
     void split_virtual_grfs();
     void setup_pull_constants();
     void calculate_live_intervals();
-   bool propagate_constants();
     bool opt_algebraic();
     bool opt_cse();
     bool opt_cse_local(fs_bblock *block, exec_list *aeb);
     bool opt_copy_propagate();
     bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry);
+   bool try_constant_propagate(fs_inst *inst, acp_entry *entry);
     bool opt_copy_propagate_local(void *mem_ctx, fs_bblock *block,
                                 exec_list *acp);
     bool register_coalesce();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 1870f43..ad34657 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -34,6 +34,9 @@ struct acp_entry : public exec_node {
  bool
  fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
  {
+   if (entry->src.file == IMM)
+      return false;
+
     if (inst->src[arg].file != entry->dst.file ||
         inst->src[arg].reg != entry->dst.reg ||
         inst->src[arg].reg_offset != entry->dst.reg_offset) {
@@ -64,6 +67,121 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, 
acp_entry *entry)
     return true;
  }

+
+bool
+fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
+{
+   bool progress = false;
+
+   if (entry->src.file != IMM)
+      return false;
+
+   for (int i = 2; i >= 0; i--) {
+      if (inst->src[i].file != entry->dst.file ||
+          inst->src[i].reg != entry->dst.reg ||
+          inst->src[i].reg_offset != entry->dst.reg_offset)
+         continue;
+
+      /* Don't bother with cases where we should have had the
+       * operation on the constant folded in GLSL already.
> +       */

Your English doesn't quite flow here. Also, I don't like the overloading of "GLSL"...it's a language defined by Khronos. (Our compiler and IR really could use names...)

What about:

      /* Don't bother with cases which should have been taken care of
       * by the GLSL compiler's constant folding pass.
       */

Tiny nits aside, I like this. It makes sense to share code here...it's very straightforward, and ought to help us out in the future.

Reviewed-by: Kenneth Graunke <kenn...@whitecape.org>

+      if (inst->src[i].negate || inst->src[i].abs)
+         continue;
+
+      switch (inst->opcode) {
+      case BRW_OPCODE_MOV:
+         inst->src[i] = entry->src;
+         progress = true;
+         break;
+
+      case BRW_OPCODE_MUL:
+      case BRW_OPCODE_ADD:
+         if (i == 1) {
+            inst->src[i] = entry->src;
+            progress = true;
+         } else if (i == 0 && inst->src[1].file != IMM) {
+            /* Fit this constant in by commuting the operands.
+             * Exception: we can't do this for 32-bit integer MUL
+             * because it's asymmetric.
+             */
+            if (inst->opcode == BRW_OPCODE_MUL &&
+                (inst->src[1].type == BRW_REGISTER_TYPE_D ||
+                 inst->src[1].type == BRW_REGISTER_TYPE_UD))
+               break;
+            inst->src[0] = inst->src[1];
+            inst->src[1] = entry->src;
+            progress = true;
+         }
+         break;
+
+      case BRW_OPCODE_CMP:
+      case BRW_OPCODE_IF:
+         if (i == 1) {
+            inst->src[i] = entry->src;
+            progress = true;
+         } else if (i == 0 && inst->src[1].file != IMM) {
+            uint32_t new_cmod;
+
+            new_cmod = brw_swap_cmod(inst->conditional_mod);
+            if (new_cmod != ~0u) {
+               /* Fit this constant in by swapping the operands and
+                * flipping the test
+                */
+               inst->src[0] = inst->src[1];
+               inst->src[1] = entry->src;
+               inst->conditional_mod = new_cmod;
+               progress = true;
+            }
+         }
+         break;
+
+      case BRW_OPCODE_SEL:
+         if (i == 1) {
+            inst->src[i] = entry->src;
+            progress = true;
+         } else if (i == 0 && inst->src[1].file != IMM) {
+            inst->src[0] = inst->src[1];
+            inst->src[1] = entry->src;
+
+            /* If this was predicated, flipping operands means
+             * we also need to flip the predicate.
+             */
+            if (inst->conditional_mod == BRW_CONDITIONAL_NONE) {
+               inst->predicate_inverse =
+                  !inst->predicate_inverse;
+            }
+            progress = true;
+         }
+         break;
+
+      case SHADER_OPCODE_RCP:
+         /* The hardware doesn't do math on immediate values
+          * (because why are you doing that, seriously?), but
+          * the correct answer is to just constant fold it
+          * anyway.
+          */
+         assert(i == 0);
+         if (inst->src[0].imm.f != 0.0f) {
+            inst->opcode = BRW_OPCODE_MOV;
+            inst->src[0] = entry->src;
+            inst->src[0].imm.f = 1.0f / inst->src[0].imm.f;
+            progress = true;
+         }
+         break;
+
+      case FS_OPCODE_PULL_CONSTANT_LOAD:
+         inst->src[i] = entry->src;
+         progress = true;
+         break;
+
+      default:
+         break;
+      }
+   }
+
+   return progress;
+}
+
  /** @file brw_fs_copy_propagation.cpp
   *
   * Support for local copy propagation by walking the list of instructions
@@ -90,6 +208,9 @@ fs_visitor::opt_copy_propagate_local(void *mem_ctx,
        foreach_list(entry_node, acp) {
         acp_entry *entry = (acp_entry *)entry_node;

+         if (try_constant_propagate(inst, entry))
+            progress = true;
+
         for (int i = 0; i < 3; i++) {
            if (try_copy_propagate(inst, i, entry))
               progress = true;
@@ -114,7 +235,8 @@ fs_visitor::opt_copy_propagate_local(void *mem_ctx,
          ((inst->src[0].file == GRF &&
            (inst->src[0].reg != inst->dst.reg ||
             inst->src[0].reg_offset != inst->dst.reg_offset)) ||
-          inst->src[0].file == UNIFORM) &&
+           inst->src[0].file == UNIFORM ||
+           inst->src[0].file == IMM) &&
          inst->src[0].type == inst->dst.type &&
          !inst->saturate &&
          !inst->predicated &&


_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to