On 08.06.2017 05:23, Dave Airlie wrote:
From: Dave Airlie <airl...@redhat.com>

This overhauls the copy prop and dead code passes to avoid
major CPU overhead in some corner cases trigged by the fp64 patches

v2: fix level not being updated correctly
---
  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 123 +++++++++++++++++++++++++----
  1 file changed, 109 insertions(+), 14 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 97f60d3..48d48e8 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -4924,6 +4924,78 @@ glsl_to_tgsi_visitor::get_last_temp_write(int 
*last_writes)
   *
   * which allows for dead code elimination on TEMP[1]'s writes.
   */
+#define DEFAULT_LEVELS 8
+
+class per_level_info {
+
+   struct per_level_range {
+      int32_t min_temp_idx;
+      int32_t max_temp_idx;
+   } *lvls;
+
+   void *mem_ctx;
+   int num_alloced_levels;
+   int level;
+   int max_temps;
+public:
+
+   per_level_info(void *mem_ctx_in, int max_temps_in) {
+      num_alloced_levels = DEFAULT_LEVELS;
+      max_temps = max_temps_in;
+      mem_ctx = mem_ctx_in;
+      level = 0;
+      lvls = (struct per_level_range *)reralloc_array_size(mem_ctx,
+                                                           NULL,
+                                                           sizeof(struct 
per_level_range),
+                                                           num_alloced_levels);
+      lvls[0].min_temp_idx = max_temps;

I believe you can just use INT_MAX here.


+      lvls[0].max_temp_idx = 0;
+   }
+
+   ~per_level_info() {
+      ralloc_free(lvls);
+   }
+
+   int get_level(void) {
+      return level;
+   }
+
+   void push_level(void) {
+      level++;
+      if (level >= num_alloced_levels) {
+         num_alloced_levels += 4;
+         lvls = (struct per_level_range *)reralloc_array_size(mem_ctx,
+                                                              (void *)lvls,
+                                                              sizeof(struct 
per_level_range),
+                                                              
num_alloced_levels);
+      }
+      lvls[level].min_temp_idx = max_temps;
+      lvls[level].max_temp_idx = 0;
+   }
+
+   void pop_level(void) {
+      if (lvls[level - 1].min_temp_idx > lvls[level].min_temp_idx)
+         lvls[level - 1].min_temp_idx = lvls[level].min_temp_idx;
+      if (lvls[level - 1].max_temp_idx < lvls[level].max_temp_idx)
+         lvls[level - 1].max_temp_idx = lvls[level].max_temp_idx;
+      level--;
+   }
+
+   void get_level_range(int32_t *min, int32_t *max)
+   {
+      *min = lvls[level].min_temp_idx;
+      *max = lvls[level].max_temp_idx;
+   }
+
+   void update_level_range(int32_t idx)
+   {
+      if (idx < lvls[level].min_temp_idx)
+         lvls[level].min_temp_idx = idx;
+      if ((idx + 1) > lvls[level].max_temp_idx)
+         lvls[level].max_temp_idx = idx + 1;
+   }
+};
+
  void
  glsl_to_tgsi_visitor::copy_propagate(void)
  {
@@ -4931,7 +5003,9 @@ glsl_to_tgsi_visitor::copy_propagate(void)
                                                    glsl_to_tgsi_instruction *,
                                                    this->next_temp * 4);
     int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
-   int level = 0;
+   class per_level_info lvl_info(mem_ctx, this->next_temp);

class is unnecessary here.


+   int min_lvl, max_lvl;
+   int level;

I'd feel more comfortable about the code if you replaced all occurrences of level by lvl_info.get_level(), to completely eliminate the chance of getting stale data. Same in eliminate_dead_code.


foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
        assert(inst->dst[0].file != PROGRAM_TEMPORARY
@@ -4955,13 +5029,12 @@ glsl_to_tgsi_visitor::copy_propagate(void)
           for (int i = 0; i < 4; i++) {
              int src_chan = GET_SWZ(inst->src[r].swizzle, i);
              glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan];
-
              if (!copy_chan) {
                 good = false;
                 break;
              }
- assert(acp_level[acp_base + src_chan] <= level);
+            assert(acp_level[acp_base + src_chan] <= lvl_info.get_level());
if (!first) {
                 first = copy_chan;
@@ -5006,7 +5079,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
case TGSI_OPCODE_IF:
        case TGSI_OPCODE_UIF:
-         ++level;
+         lvl_info.push_level();
           break;
case TGSI_OPCODE_ENDIF:
@@ -5014,7 +5087,9 @@ glsl_to_tgsi_visitor::copy_propagate(void)
           /* Clear all channels written inside the block from the ACP, but
            * leaving those that were not touched.
            */
-         for (int r = 0; r < this->next_temp; r++) {
+         lvl_info.get_level_range(&min_lvl, &max_lvl);
+         level = lvl_info.get_level();
+         for (int r = min_lvl; r < max_lvl; r++) {
              for (int c = 0; c < 4; c++) {
                 if (!acp[4 * r + c])
                    continue;
@@ -5023,8 +5098,11 @@ glsl_to_tgsi_visitor::copy_propagate(void)
                    acp[4 * r + c] = NULL;
              }
           }
-         if (inst->op == TGSI_OPCODE_ENDIF)
-            --level;
+         lvl_info.pop_level();
+
+         if (inst->op != TGSI_OPCODE_ENDIF)
+            lvl_info.push_level();
+
           break;
default:
@@ -5042,7 +5120,8 @@ glsl_to_tgsi_visitor::copy_propagate(void)
                 /* Any output might be written, so no copy propagation
                  * from outputs across this instruction.
                  */
-               for (int r = 0; r < this->next_temp; r++) {
+               lvl_info.get_level_range(&min_lvl, &max_lvl);
+               for (int r = min_lvl; r < max_lvl; r++) {
                    for (int c = 0; c < 4; c++) {
                       if (!acp[4 * r + c])
                          continue;
@@ -5062,7 +5141,8 @@ glsl_to_tgsi_visitor::copy_propagate(void)
                 }
/* Clear where it's used as src. */
-               for (int r = 0; r < this->next_temp; r++) {
+               lvl_info.get_level_range(&min_lvl, &max_lvl);
+               for (int r = min_lvl; r < max_lvl; r++) {
                    for (int c = 0; c < 4; c++) {
                       if (!acp[4 * r + c])
                          continue;
@@ -5094,12 +5174,15 @@ glsl_to_tgsi_visitor::copy_propagate(void)
            !inst->src[0].reladdr2 &&
            !inst->src[0].negate &&
            !inst->src[0].abs) {
+         level = lvl_info.get_level();
           for (int i = 0; i < 4; i++) {
              if (inst->dst[0].writemask & (1 << i)) {
                 acp[4 * inst->dst[0].index + i] = inst;
                 acp_level[4 * inst->dst[0].index + i] = level;
              }
           }
+
+         lvl_info.update_level_range(inst->dst[0].index);
        }
     }
@@ -5130,8 +5213,10 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
                                                       glsl_to_tgsi_instruction 
*,
                                                       this->next_temp * 4);
     int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
-   int level = 0;
+   int level;
     int removed = 0;
+   int min_lvl, max_lvl;
+   class per_level_info lvl_info(mem_ctx, this->next_temp);

class is unnecessary.

Apart from the comments above, the patch is

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>


foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
        assert(inst->dst[0].file != PROGRAM_TEMPORARY
@@ -5158,7 +5243,9 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
           /* Promote the recorded level of all channels written inside the
            * preceding if or else block to the level above the if/else block.
            */
-         for (int r = 0; r < this->next_temp; r++) {
+         lvl_info.get_level_range(&min_lvl, &max_lvl);
+         level = lvl_info.get_level();
+         for (int r = min_lvl; r < max_lvl; r++) {
              for (int c = 0; c < 4; c++) {
                 if (!writes[4 * r + c])
                    continue;
@@ -5167,13 +5254,18 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
                    write_level[4 * r + c] = level-1;
              }
           }
-         if(inst->op == TGSI_OPCODE_ENDIF)
-            --level;
+
+         lvl_info.pop_level();
+
+         if(inst->op != TGSI_OPCODE_ENDIF) {
+            lvl_info.push_level();
+         }
           break;
case TGSI_OPCODE_IF:
        case TGSI_OPCODE_UIF:
-         ++level;
+         lvl_info.push_level();
+
           /* fallthrough to default case to mark the condition as read */
        default:
           /* Continuing the block, clear any channels from the write array that
@@ -5227,6 +5319,8 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
        for (unsigned i = 0; i < ARRAY_SIZE(inst->dst); i++) {
           if (inst->dst[i].file == PROGRAM_TEMPORARY &&
               !inst->dst[i].reladdr) {
+            level = lvl_info.get_level();
+
              for (int c = 0; c < 4; c++) {
                 if (inst->dst[i].writemask & (1 << c)) {
                    if (writes[4 * inst->dst[i].index + c]) {
@@ -5237,6 +5331,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
                    }
                    writes[4 * inst->dst[i].index + c] = inst;
                    write_level[4 * inst->dst[i].index + c] = level;
+                  lvl_info.update_level_range(inst->dst[i].index);
                 }
              }
           }



--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to