On gen 7, the MRF was removed and we gained the ability to do send instructions directly from the GRF. This commit enables that functinoality for FB writes.
v2: Make handling of components more sane. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 4 + src/mesa/drivers/dri/i965/brw_fs.h | 1 + src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 167 +++++++++++++++++++++------ src/mesa/drivers/dri/i965/brw_shader.cpp | 1 + 4 files changed, 136 insertions(+), 37 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index b43032b..143b590 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -514,6 +514,8 @@ fs_inst::is_send_from_grf() const return true; case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: return src[1].file == GRF; + case FS_OPCODE_FB_WRITE: + return src[0].file == GRF; default: if (is_tex()) return src[0].file == GRF; @@ -917,6 +919,8 @@ fs_inst::regs_read(fs_visitor *v, int arg) const { if (is_tex() && arg == 0 && src[0].file == GRF) { return mlen; + } else if (opcode == FS_OPCODE_FB_WRITE && arg == 0) { + return mlen; } else if (opcode == SHADER_OPCODE_UNTYPED_ATOMIC && arg == 0) { return mlen; } else if (opcode == SHADER_OPCODE_UNTYPED_SURFACE_READ && arg == 0) { diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 7500e8e..a91bf9f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -521,6 +521,7 @@ public: fs_reg dst, fs_reg src0, fs_reg src1, fs_reg one); void emit_color_write(fs_reg color, int index, int first_color_mrf); + int setup_color_payload(fs_reg *dst, fs_reg color, unsigned components); void emit_alpha_test(); fs_inst *emit_single_fb_write(fs_reg color1, fs_reg color2, fs_reg src0_alpha, unsigned components); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 8e38315..e72fb62 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -3005,6 +3005,82 @@ fs_visitor::emit_color_write(fs_reg color, int index, int first_color_mrf) } } +int +fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components) +{ + fs_inst *inst; + + if (color.file == BAD_FILE) { + return 4 * (dispatch_width / 8); + } + + uint8_t colors_enabled; + if (components == 0) { + /* We want to write one component to the alpha channel */ + colors_enabled = 0x8; + } else { + /* Enable the first components-many channels */ + colors_enabled = (1 << components) - 1; + } + + if (dispatch_width == 8 || brw->gen >= 6) { + /* SIMD8 write looks like: + * m + 0: r0 + * m + 1: r1 + * m + 2: g0 + * m + 3: g1 + * + * gen6 SIMD16 DP write looks like: + * m + 0: r0 + * m + 1: r1 + * m + 2: g0 + * m + 3: g1 + * m + 4: b0 + * m + 5: b1 + * m + 6: a0 + * m + 7: a1 + */ + int len = 0; + for (unsigned i = 0; i < 4; ++i) { + if (colors_enabled & (1 << i)) { + dst[len] = fs_reg(GRF, virtual_grf_alloc(color.width / 8), + color.type, color.width); + inst = emit(MOV(dst[len], offset(color, i))); + inst->saturate = key->clamp_fragment_color; + } else if (color.width == 16) { + /* We need two BAD_FILE slots for a 16-wide color */ + len++; + } + len++; + } + return len; + } else { + /* pre-gen6 SIMD16 single source DP write looks like: + * m + 0: r0 + * m + 1: g0 + * m + 2: b0 + * m + 3: a0 + * m + 4: r1 + * m + 5: g1 + * m + 6: b1 + * m + 7: a1 + */ + for (unsigned i = 0; i < 4; ++i) { + if (colors_enabled & (1 << i)) { + dst[i] = fs_reg(GRF, virtual_grf_alloc(1), color.type); + inst = emit(MOV(dst[i], half(offset(color, i), 0))); + inst->saturate = key->clamp_fragment_color; + + dst[i + 4] = fs_reg(GRF, virtual_grf_alloc(1), color.type); + inst = emit(MOV(dst[i + 4], half(offset(color, i), 1))); + inst->saturate = key->clamp_fragment_color; + inst->force_sechalf = true; + } + } + return 8; + } +} + static enum brw_conditional_mod cond_for_alpha_func(GLenum func) { @@ -3063,12 +3139,13 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, { this->current_annotation = "FB write header"; bool header_present = true; + int reg_size = dispatch_width / 8; + /* We can potentially have a message length of up to 15, so we have to set * base_mrf to either 0 or 1 in order to fit in m0..m15. */ - int base_mrf = 1; - int nr = base_mrf; - int reg_width = dispatch_width / 8; + fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 15); + int length = 0; /* From the Sandy Bridge PRM, volume 4, page 198: * @@ -3085,12 +3162,14 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, } if (header_present) - /* m2, m3 header */ - nr += 2; + /* Allocate 2 registers for a header */ + length += 2; if (payload.aa_dest_stencil_reg) { - emit(MOV(fs_reg(MRF, nr++), + sources[length] = fs_reg(GRF, virtual_grf_alloc(1)); + emit(MOV(sources[length], fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0)))); + length++; } prog_data->uses_omask = @@ -3098,9 +3177,13 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, if (prog_data->uses_omask) { this->current_annotation = "FB write oMask"; assert(this->sample_mask.file != BAD_FILE); - /* Hand over gl_SampleMask. Only lower 16 bits are relevant. */ - emit(FS_OPCODE_SET_OMASK, fs_reg(MRF, nr, BRW_REGISTER_TYPE_UW), this->sample_mask); - nr += 1; + /* Hand over gl_SampleMask. Only lower 16 bits are relevant. Since + * it's unsinged single words, one vgrf is always 16-wide. + */ + sources[length] = fs_reg(GRF, virtual_grf_alloc(1), + BRW_REGISTER_TYPE_UW, 16); + emit(FS_OPCODE_SET_OMASK, sources[length], this->sample_mask); + length++; } if (color0.file == BAD_FILE) { @@ -3108,28 +3191,20 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, * alpha out the pipeline to our null renderbuffer to support * alpha-testing, alpha-to-coverage, and so on. */ - emit_color_write(this->outputs[0], 3, nr); - nr += 4 * reg_width; + length += setup_color_payload(sources + length, this->outputs[0], 0); } else if (color1.file == BAD_FILE) { if (src0_alpha.file != BAD_FILE) { - fs_inst *inst; - inst = emit(MOV(fs_reg(MRF, nr, src0_alpha.type), src0_alpha)); + sources[length] = fs_reg(GRF, virtual_grf_alloc(reg_size), + src0_alpha.type, src0_alpha.width); + fs_inst *inst = emit(MOV(sources[length], src0_alpha)); inst->saturate = key->clamp_fragment_color; - nr += reg_width; + length++; } - for (unsigned i = 0; i < components; i++) - emit_color_write(color0, i, nr); - - nr += 4 * reg_width; + length += setup_color_payload(sources + length, color0, components); } else { - for (unsigned i = 0; i < components; i++) - emit_color_write(color0, i, nr); - nr += 4 * reg_width; - - for (unsigned i = 0; i < components; i++) - emit_color_write(color1, i, nr); - nr += 4 * reg_width; + length += setup_color_payload(sources + length, color0, components); + length += setup_color_payload(sources + length, color1, components); } if (source_depth_to_render_target) { @@ -3142,33 +3217,51 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, no16("Missing support for simd16 depth writes on gen6\n"); } + sources[length] = fs_reg(this, glsl_type::float_type); if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { /* Hand over gl_FragDepth. */ assert(this->frag_depth.file != BAD_FILE); - emit(MOV(fs_reg(MRF, nr), this->frag_depth)); + emit(MOV(sources[length], this->frag_depth)); } else { /* Pass through the payload depth. */ - emit(MOV(fs_reg(MRF, nr), + emit(MOV(sources[length], fs_reg(brw_vec8_grf(payload.source_depth_reg, 0)))); } - nr += reg_width; + length++; } if (payload.dest_depth_reg) { - emit(MOV(fs_reg(MRF, nr), + sources[length] = fs_reg(this, glsl_type::float_type); + emit(MOV(sources[length], fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0)))); - nr += reg_width; + length++; } - fs_inst *inst = emit(FS_OPCODE_FB_WRITE); - inst->base_mrf = base_mrf; - inst->mlen = nr - base_mrf; - inst->header_present = header_present; + fs_inst *load; + fs_inst *write; + if (brw->gen >= 7) { + /* Send from the GRF */ + fs_reg payload = fs_reg(GRF, -1, BRW_REGISTER_TYPE_F); + load = emit(LOAD_PAYLOAD(payload, sources, length)); + payload.reg = virtual_grf_alloc(load->regs_written); + load->dst = payload; + write = emit(FS_OPCODE_FB_WRITE, reg_undef, payload); + write->base_mrf = -1; + } else { + /* Send from the MRF */ + load = emit(LOAD_PAYLOAD(fs_reg(MRF, 1, BRW_REGISTER_TYPE_F), + sources, length)); + write = emit(FS_OPCODE_FB_WRITE); + write->base_mrf = 1; + } + + write->mlen = load->regs_written; + write->header_present = header_present; if ((brw->gen >= 8 || brw->is_haswell) && prog_data->uses_kill) { - inst->predicate = BRW_PREDICATE_NORMAL; - inst->flag_subreg = 1; + write->predicate = BRW_PREDICATE_NORMAL; + write->flag_subreg = 1; } - return inst; + return write; } void diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 0a33063..55e2481 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -738,6 +738,7 @@ backend_instruction::has_side_effects() const { switch (opcode) { case SHADER_OPCODE_UNTYPED_ATOMIC: + case FS_OPCODE_FB_WRITE: return true; default: return false; -- 2.1.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev