fs: Bake regs_written into the IR instead of recomputing it later.

Kenneth Graunke Sun, 31 Mar 2013 21:41:17 -0700

On 03/20/2013 05:36 PM, Eric Anholt wrote:

For sampler messages, it depends on the target gen, and on gen4
SIMD16-sampler-on-SIMD8-execution we were returning 4 instead of 8 like we
should.


NOTE: This is a candidate for the 9.1 branch.
---
  src/mesa/drivers/dri/i965/brw_fs.cpp               |   29 +++++++-------------
  src/mesa/drivers/dri/i965/brw_fs.h                 |    2 +-
  src/mesa/drivers/dri/i965/brw_fs_cse.cpp           |    6 ++--
  .../drivers/dri/i965/brw_fs_live_variables.cpp     |    2 +-
  src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp  |    8 +++---
  .../dri/i965/brw_fs_schedule_instructions.cpp      |    6 ++--
  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp       |    7 +++--
  7 files changed, 27 insertions(+), 33 deletions(-)

Ugh...I'm not a huge fan of this, but I think it's better than thealternative (which is passing "intel" into random functions to handlethe case you mentioned.)

The reason I'm concerned is that we sometimes change the opcode ofinstructions, and we'll need to make sure to update this too. Butthat's probably fine. For CSE, you emit new instructions, rather thanediting it, so that works.

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index f4aa9f7..c128175 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -60,6 +60,9 @@ fs_inst::init()
     this->src[0] = reg_undef;
     this->src[1] = reg_undef;
     this->src[2] = reg_undef;
+
+   /* This will be the case for almost all instructions. */
+   this->regs_written = 1;
  }

  fs_inst::fs_inst()
@@ -254,6 +257,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(fs_reg dst, fs_reg 
surf_index,
        fs_reg vec4_result = fs_reg(GRF, virtual_grf_alloc(4), dst.type);
        inst = new(mem_ctx) fs_inst(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
                                    vec4_result, surf_index, vec4_offset);
+      inst->regs_written = 4;
        instructions.push_tail(inst);

        vec4_result.reg_offset += const_offset & 3;
@@ -329,26 +333,13 @@ fs_inst::equals(fs_inst *inst)
             offset == inst->offset);
  }

-int
-fs_inst::regs_written()
-{
-   if (is_tex() || opcode == FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7)
-      return 4;
-
-   /* The SINCOS and INT_DIV_QUOTIENT_AND_REMAINDER math functions return 2,
-    * but we don't currently use them...nor do we have an opcode for them.
-    */
-
-   return 1;
-}
-
  bool
  fs_inst::overwrites_reg(const fs_reg &reg)
  {
     return (reg.file == dst.file &&
             reg.reg == dst.reg &&
             reg.reg_offset >= dst.reg_offset  &&
-           reg.reg_offset < dst.reg_offset + regs_written());
+           reg.reg_offset < dst.reg_offset + regs_written);
  }

  bool
@@ -1388,7 +1379,7 @@ fs_visitor::split_virtual_grfs()
        /* If there's a SEND message that requires contiguous destination
         * registers, no splitting is allowed.
         */
-      if (inst->regs_written() > 1) {
+      if (inst->regs_written > 1) {
         split_grf[inst->dst.reg] = false;
        }
     }
@@ -2109,7 +2100,7 @@ fs_visitor::compute_to_mrf()
              /* Things returning more than one register would need us to
               * understand coalescing out more than one MOV at a time.
               */
-            if (scan_inst->regs_written() > 1)
+            if (scan_inst->regs_written > 1)
                 break;

            /* SEND instructions can't have MRF as a destination. */
@@ -2326,7 +2317,7 @@ void
  fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
  {
     int reg_size = dispatch_width / 8;
-   int write_len = inst->regs_written() * reg_size;
+   int write_len = inst->regs_written * reg_size;
     int first_write_grf = inst->dst.reg;
     bool needs_dep[BRW_MAX_MRF];
     assert(write_len < (int)sizeof(needs_dep) - 1);
@@ -2366,7 +2357,7 @@ 
fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
         * dependency has more latency than a MOV.
         */
        if (scan_inst->dst.file == GRF) {
-         for (int i = 0; i < scan_inst->regs_written(); i++) {
+         for (int i = 0; i < scan_inst->regs_written; i++) {
              int reg = scan_inst->dst.reg + i * reg_size;

              if (reg >= first_write_grf &&
@@ -2405,7 +2396,7 @@ 
fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
  void
  fs_visitor::insert_gen4_post_send_dependency_workarounds(fs_inst *inst)
  {
-   int write_len = inst->regs_written() * dispatch_width / 8;
+   int write_len = inst->regs_written * dispatch_width / 8;
     int first_write_grf = inst->dst.reg;
     bool needs_dep[BRW_MAX_MRF];
     assert(write_len < (int)sizeof(needs_dep) - 1);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index 76130b1..0c5aad1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -174,7 +174,6 @@ public:
             fs_reg src0, fs_reg src1,fs_reg src2);

     bool equals(fs_inst *inst);
-   int regs_written();
     bool overwrites_reg(const fs_reg &reg);
     bool is_tex();
     bool is_math();
@@ -192,6 +191,7 @@ public:
     uint8_t flag_subreg;

     int mlen; /**< SEND message length */
+   int regs_written; /**< Number of vgrfs written by a SEND message, or 1 */
     int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */
     uint32_t texture_offset; /**< Texture offset bitfield */
     int sampler;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index c89da36..01a64d2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -130,7 +130,7 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
             */
            bool no_existing_temp = entry->tmp.file == BAD_FILE;
            if (no_existing_temp) {
-               int written = entry->generator->regs_written();
+               int written = entry->generator->regs_written;

                 fs_reg orig_dst = entry->generator->dst;
                 fs_reg tmp = fs_reg(GRF, virtual_grf_alloc(written),
@@ -150,8 +150,8 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
            }

            /* dest <- temp */
-            int written = inst->regs_written();
-            assert(written == entry->generator->regs_written());
+            int written = inst->regs_written;
+            assert(written == entry->generator->regs_written);
              assert(inst->dst.type == entry->tmp.type);
              fs_reg dst = inst->dst;
              fs_reg tmp = entry->tmp;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
index 63af148..373aa2d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
@@ -77,7 +77,7 @@ fs_live_variables::setup_def_use()
          * variable, and thus qualify for being in def[].
          */
         if (inst->dst.file == GRF &&
-            inst->regs_written() == v->virtual_grf_sizes[inst->dst.reg] &&
+            inst->regs_written == v->virtual_grf_sizes[inst->dst.reg] &&
             !inst->predicate &&
             !inst->force_uncompressed &&
             !inst->force_sechalf) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index b8936dc..4ee7bbc 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -553,7 +553,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
        }

        if (inst->dst.file == GRF) {
-        spill_costs[inst->dst.reg] += inst->regs_written() * loop_scale;
+        spill_costs[inst->dst.reg] += inst->regs_written * loop_scale;

           if (inst->dst.smear >= 0) {
              no_spill[inst->dst.reg] = true;
@@ -622,7 +622,7 @@ fs_visitor::spill_reg(int spill_reg)
          inst->dst.reg == spill_reg) {
           int subset_spill_offset = (spill_offset +
                                      REG_SIZE * inst->dst.reg_offset);
-         inst->dst.reg = virtual_grf_alloc(inst->regs_written());
+         inst->dst.reg = virtual_grf_alloc(inst->regs_written);
           inst->dst.reg_offset = 0;

         /* If our write is going to affect just part of the
@@ -631,7 +631,7 @@ fs_visitor::spill_reg(int spill_reg)
          */
         if (inst->predicate || inst->force_uncompressed || 
inst->force_sechalf) {
              fs_reg unspill_reg = inst->dst;
-            for (int chan = 0; chan < inst->regs_written(); chan++) {
+            for (int chan = 0; chan < inst->regs_written; chan++) {
                 emit_unspill(inst, unspill_reg,
                              subset_spill_offset + REG_SIZE * chan);
                 unspill_reg.reg_offset++;
@@ -644,7 +644,7 @@ fs_visitor::spill_reg(int spill_reg)
         spill_src.negate = false;
         spill_src.smear = -1;

-        for (int chan = 0; chan < inst->regs_written(); chan++) {
+        for (int chan = 0; chan < inst->regs_written; chan++) {
            fs_inst *spill_inst = new(mem_ctx) fs_inst(FS_OPCODE_SPILL,
                                                       reg_null_f, spill_src);
            spill_src.reg_offset++;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
index c125928..0d68e3d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
@@ -510,7 +510,7 @@ instruction_scheduler::calculate_deps()
        /* write-after-write deps. */
        if (inst->dst.file == GRF) {
           if (post_reg_alloc) {
-            for (int r = 0; r < inst->regs_written() * reg_width; r++) {
+            for (int r = 0; r < inst->regs_written * reg_width; r++) {
                 add_dep(last_grf_write[inst->dst.reg + r], n);
                 last_grf_write[inst->dst.reg + r] = n;
              }
@@ -617,7 +617,7 @@ instruction_scheduler::calculate_deps()
         */
        if (inst->dst.file == GRF) {
           if (post_reg_alloc) {
-            for (int r = 0; r < inst->regs_written() * reg_width; r++)
+            for (int r = 0; r < inst->regs_written * reg_width; r++)
                 last_grf_write[inst->dst.reg + r] = n;
           } else {
              last_grf_write[inst->dst.reg] = n;
@@ -716,7 +716,7 @@ instruction_scheduler::schedule_instructions(fs_inst 
*next_block_header)
              schedule_node *n = (schedule_node *)node;

              chosen = n;
-            if (chosen->inst->regs_written() <= 1)
+            if (chosen->inst->regs_written <= 1)
                 break;
           }

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 48c6df3..19adfc9 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -261,7 +261,7 @@ fs_visitor::try_emit_saturate(ir_expression *ir)
      * src, generate a saturated MOV
      */
     fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src);
-   if (!modify || modify->regs_written() != 1) {
+   if (!modify || modify->regs_written != 1) {
        this->result = fs_reg(this, ir->type);
        fs_inst *inst = emit(MOV(this->result, src));
        inst->saturate = true;
@@ -746,7 +746,7 @@ fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
     /* If last_rhs_inst wrote a different number of components than our LHS,
      * we can't safely rewrite it.
      */
-   if (virtual_grf_sizes[dst.reg] != modify->regs_written())
+   if (virtual_grf_sizes[dst.reg] != modify->regs_written)
        return false;

     /* Success!  Rewrite the instruction. */
@@ -948,6 +948,7 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, 
fs_reg coordinate,
     inst->base_mrf = base_mrf;
     inst->mlen = mlen;
     inst->header_present = true;
+   inst->regs_written = simd16 ? 8 : 4;

     if (simd16) {
        for (int i = 0; i < 4; i++) {
@@ -1086,6 +1087,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, 
fs_reg coordinate,
     inst->base_mrf = base_mrf;
     inst->mlen = mlen;
     inst->header_present = header_present;
+   inst->regs_written = 4;

     if (mlen > 11) {
        fail("Message length >11 disallowed by hardware\n");
@@ -1239,6 +1241,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, 
fs_reg coordinate,
     inst->base_mrf = base_mrf;
     inst->mlen = mlen;
     inst->header_present = header_present;
+   inst->regs_written = 4;

     if (mlen > 11) {
        fail("Message length >11 disallowed by hardware\n");


_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 10/13] i965/fs: Bake regs_written into the IR instead of recomputing it later.

Reply via email to