On gen 7, the MRF was removed and we gained the ability to do send
instructions directly from the GRF.  This commit enables that functinoality
for FB writes.

v2: Make handling of components more sane.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp         |   4 +
 src/mesa/drivers/dri/i965/brw_fs.h           |   1 +
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 167 +++++++++++++++++++++------
 src/mesa/drivers/dri/i965/brw_shader.cpp     |   1 +
 4 files changed, 136 insertions(+), 37 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index b43032b..143b590 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -514,6 +514,8 @@ fs_inst::is_send_from_grf() const
       return true;
    case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
       return src[1].file == GRF;
+   case FS_OPCODE_FB_WRITE:
+      return src[0].file == GRF;
    default:
       if (is_tex())
          return src[0].file == GRF;
@@ -917,6 +919,8 @@ fs_inst::regs_read(fs_visitor *v, int arg) const
 {
    if (is_tex() && arg == 0 && src[0].file == GRF) {
       return mlen;
+   } else if (opcode == FS_OPCODE_FB_WRITE && arg == 0) {
+      return mlen;
    } else if (opcode == SHADER_OPCODE_UNTYPED_ATOMIC && arg == 0) {
       return mlen;
    } else if (opcode == SHADER_OPCODE_UNTYPED_SURFACE_READ && arg == 0) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index 7500e8e..a91bf9f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -521,6 +521,7 @@ public:
                     fs_reg dst, fs_reg src0, fs_reg src1, fs_reg one);
 
    void emit_color_write(fs_reg color, int index, int first_color_mrf);
+   int setup_color_payload(fs_reg *dst, fs_reg color, unsigned components);
    void emit_alpha_test();
    fs_inst *emit_single_fb_write(fs_reg color1, fs_reg color2,
                                  fs_reg src0_alpha, unsigned components);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 8e38315..e72fb62 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -3005,6 +3005,82 @@ fs_visitor::emit_color_write(fs_reg color, int index, 
int first_color_mrf)
    }
 }
 
+int
+fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components)
+{
+   fs_inst *inst;
+
+   if (color.file == BAD_FILE) {
+      return 4 * (dispatch_width / 8);
+   }
+
+   uint8_t colors_enabled;
+   if (components == 0) {
+      /* We want to write one component to the alpha channel */
+      colors_enabled = 0x8;
+   } else {
+      /* Enable the first components-many channels */
+      colors_enabled = (1 << components) - 1;
+   }
+
+   if (dispatch_width == 8 || brw->gen >= 6) {
+      /* SIMD8 write looks like:
+       * m + 0: r0
+       * m + 1: r1
+       * m + 2: g0
+       * m + 3: g1
+       *
+       * gen6 SIMD16 DP write looks like:
+       * m + 0: r0
+       * m + 1: r1
+       * m + 2: g0
+       * m + 3: g1
+       * m + 4: b0
+       * m + 5: b1
+       * m + 6: a0
+       * m + 7: a1
+       */
+      int len = 0;
+      for (unsigned i = 0; i < 4; ++i) {
+         if (colors_enabled & (1 << i)) {
+            dst[len] = fs_reg(GRF, virtual_grf_alloc(color.width / 8),
+                              color.type, color.width);
+            inst = emit(MOV(dst[len], offset(color, i)));
+            inst->saturate = key->clamp_fragment_color;
+         } else if (color.width == 16) {
+            /* We need two BAD_FILE slots for a 16-wide color */
+            len++;
+         }
+         len++;
+      }
+      return len;
+   } else {
+      /* pre-gen6 SIMD16 single source DP write looks like:
+       * m + 0: r0
+       * m + 1: g0
+       * m + 2: b0
+       * m + 3: a0
+       * m + 4: r1
+       * m + 5: g1
+       * m + 6: b1
+       * m + 7: a1
+       */
+      for (unsigned i = 0; i < 4; ++i) {
+         if (colors_enabled & (1 << i)) {
+            dst[i] = fs_reg(GRF, virtual_grf_alloc(1), color.type);
+            inst = emit(MOV(dst[i], half(offset(color, i), 0)));
+            inst->saturate = key->clamp_fragment_color;
+
+            dst[i + 4] = fs_reg(GRF, virtual_grf_alloc(1), color.type);
+            inst = emit(MOV(dst[i + 4], half(offset(color, i), 1)));
+            inst->saturate = key->clamp_fragment_color;
+            inst->force_sechalf = true;
+         }
+      }
+      return 8;
+   }
+}
+
 static enum brw_conditional_mod
 cond_for_alpha_func(GLenum func)
 {
@@ -3063,12 +3139,13 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg 
color1,
 {
    this->current_annotation = "FB write header";
    bool header_present = true;
+   int reg_size = dispatch_width / 8;
+
    /* We can potentially have a message length of up to 15, so we have to set
     * base_mrf to either 0 or 1 in order to fit in m0..m15.
     */
-   int base_mrf = 1;
-   int nr = base_mrf;
-   int reg_width = dispatch_width / 8;
+   fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 15);
+   int length = 0;
 
    /* From the Sandy Bridge PRM, volume 4, page 198:
     *
@@ -3085,12 +3162,14 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg 
color1,
    }
 
    if (header_present)
-      /* m2, m3 header */
-      nr += 2;
+      /* Allocate 2 registers for a header */
+      length += 2;
 
    if (payload.aa_dest_stencil_reg) {
-      emit(MOV(fs_reg(MRF, nr++),
+      sources[length] = fs_reg(GRF, virtual_grf_alloc(1));
+      emit(MOV(sources[length],
                fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0))));
+      length++;
    }
 
    prog_data->uses_omask =
@@ -3098,9 +3177,13 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg 
color1,
    if (prog_data->uses_omask) {
       this->current_annotation = "FB write oMask";
       assert(this->sample_mask.file != BAD_FILE);
-      /* Hand over gl_SampleMask. Only lower 16 bits are relevant. */
-      emit(FS_OPCODE_SET_OMASK, fs_reg(MRF, nr, BRW_REGISTER_TYPE_UW), 
this->sample_mask);
-      nr += 1;
+      /* Hand over gl_SampleMask. Only lower 16 bits are relevant.  Since
+       * it's unsinged single words, one vgrf is always 16-wide.
+       */
+      sources[length] = fs_reg(GRF, virtual_grf_alloc(1),
+                               BRW_REGISTER_TYPE_UW, 16);
+      emit(FS_OPCODE_SET_OMASK, sources[length], this->sample_mask);
+      length++;
    }
 
    if (color0.file == BAD_FILE) {
@@ -3108,28 +3191,20 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg 
color1,
        * alpha out the pipeline to our null renderbuffer to support
        * alpha-testing, alpha-to-coverage, and so on.
        */
-      emit_color_write(this->outputs[0], 3, nr);
-      nr += 4 * reg_width;
+      length += setup_color_payload(sources + length, this->outputs[0], 0);
    } else if (color1.file == BAD_FILE) {
       if (src0_alpha.file != BAD_FILE) {
-         fs_inst *inst;
-         inst = emit(MOV(fs_reg(MRF, nr, src0_alpha.type), src0_alpha));
+         sources[length] = fs_reg(GRF, virtual_grf_alloc(reg_size),
+                                  src0_alpha.type, src0_alpha.width);
+         fs_inst *inst = emit(MOV(sources[length], src0_alpha));
          inst->saturate = key->clamp_fragment_color;
-         nr += reg_width;
+         length++;
       }
 
-      for (unsigned i = 0; i < components; i++)
-         emit_color_write(color0, i, nr);
-
-      nr += 4 * reg_width;
+      length += setup_color_payload(sources + length, color0, components);
    } else {
-      for (unsigned i = 0; i < components; i++)
-         emit_color_write(color0, i, nr);
-      nr += 4 * reg_width;
-
-      for (unsigned i = 0; i < components; i++)
-         emit_color_write(color1, i, nr);
-      nr += 4 * reg_width;
+      length += setup_color_payload(sources + length, color0, components);
+      length += setup_color_payload(sources + length, color1, components);
    }
 
    if (source_depth_to_render_target) {
@@ -3142,33 +3217,51 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg 
color1,
         no16("Missing support for simd16 depth writes on gen6\n");
       }
 
+      sources[length] = fs_reg(this, glsl_type::float_type);
       if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
         /* Hand over gl_FragDepth. */
         assert(this->frag_depth.file != BAD_FILE);
-        emit(MOV(fs_reg(MRF, nr), this->frag_depth));
+        emit(MOV(sources[length], this->frag_depth));
       } else {
         /* Pass through the payload depth. */
-        emit(MOV(fs_reg(MRF, nr),
+        emit(MOV(sources[length],
                   fs_reg(brw_vec8_grf(payload.source_depth_reg, 0))));
       }
-      nr += reg_width;
+      length++;
    }
 
    if (payload.dest_depth_reg) {
-      emit(MOV(fs_reg(MRF, nr),
+      sources[length] = fs_reg(this, glsl_type::float_type);
+      emit(MOV(sources[length],
                fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0))));
-      nr += reg_width;
+      length++;
    }
 
-   fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
-   inst->base_mrf = base_mrf;
-   inst->mlen = nr - base_mrf;
-   inst->header_present = header_present;
+   fs_inst *load;
+   fs_inst *write;
+   if (brw->gen >= 7) {
+      /* Send from the GRF */
+      fs_reg payload = fs_reg(GRF, -1, BRW_REGISTER_TYPE_F);
+      load = emit(LOAD_PAYLOAD(payload, sources, length));
+      payload.reg = virtual_grf_alloc(load->regs_written);
+      load->dst = payload;
+      write = emit(FS_OPCODE_FB_WRITE, reg_undef, payload);
+      write->base_mrf = -1;
+   } else {
+      /* Send from the MRF */
+      load = emit(LOAD_PAYLOAD(fs_reg(MRF, 1, BRW_REGISTER_TYPE_F),
+                               sources, length));
+      write = emit(FS_OPCODE_FB_WRITE);
+      write->base_mrf = 1;
+   }
+
+   write->mlen = load->regs_written;
+   write->header_present = header_present;
    if ((brw->gen >= 8 || brw->is_haswell) && prog_data->uses_kill) {
-      inst->predicate = BRW_PREDICATE_NORMAL;
-      inst->flag_subreg = 1;
+      write->predicate = BRW_PREDICATE_NORMAL;
+      write->flag_subreg = 1;
    }
-   return inst;
+   return write;
 }
 
 void
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 0a33063..55e2481 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -738,6 +738,7 @@ backend_instruction::has_side_effects() const
 {
    switch (opcode) {
    case SHADER_OPCODE_UNTYPED_ATOMIC:
+   case FS_OPCODE_FB_WRITE:
       return true;
    default:
       return false;
-- 
2.1.0

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to