Previously, setup_color_payload was a a big helper function that did a lot of gen-specific special casing for setting up the color sources of the LOAD_PAYLOAD instruction. Now that lower_load_payload is much more sane, most of that complexity isn't needed anymore. Instead, we can do a simple fixup pass for color clamps and then just stash sources directly in the LOAD_PAYLOAD. We can trust lower_load_payload to do the right thing with respect to COMPR4. --- src/mesa/drivers/dri/i965/brw_fs.h | 4 +- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 155 +++++++-------------------- 2 files changed, 43 insertions(+), 116 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 30cefe4..1d7de2e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -394,8 +394,8 @@ public: bool optimize_frontfacing_ternary(nir_alu_instr *instr, const fs_reg &result); - int setup_color_payload(fs_reg *dst, fs_reg color, unsigned components, - bool use_2nd_half); + void setup_color_payload(fs_reg *dst, fs_reg color, unsigned components, + unsigned exec_size, bool use_2nd_half); void emit_alpha_test(); fs_inst *emit_single_fb_write(fs_reg color1, fs_reg color2, fs_reg src0_alpha, unsigned components, diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 772285e..80ca1b7 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -3558,105 +3558,30 @@ fs_visitor::emit_interpolation_setup_gen6() this->current_annotation = NULL; } -int +void fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components, - bool use_2nd_half) + unsigned exec_size, bool use_2nd_half) { brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; fs_inst *inst; - if (color.file == BAD_FILE) { - return 4 * (dispatch_width / 8); - } - - uint8_t colors_enabled; - if (components == 0) { - /* We want to write one component to the alpha channel */ - colors_enabled = 0x8; - } else { - /* Enable the first components-many channels */ - colors_enabled = (1 << components) - 1; + if (key->clamp_fragment_color) { + fs_reg tmp = vgrf(glsl_type::vec4_type); + assert(color.type == BRW_REGISTER_TYPE_F); + for (unsigned i = 0; i < components; i++) { + inst = emit(MOV(offset(tmp, i), offset(color, i))); + inst->saturate = true; + } + color = tmp; } - if (dispatch_width == 8 || (devinfo->gen >= 6 && !do_dual_src)) { - /* SIMD8 write looks like: - * m + 0: r0 - * m + 1: r1 - * m + 2: g0 - * m + 3: g1 - * - * gen6 SIMD16 DP write looks like: - * m + 0: r0 - * m + 1: r1 - * m + 2: g0 - * m + 3: g1 - * m + 4: b0 - * m + 5: b1 - * m + 6: a0 - * m + 7: a1 - */ - int len = 0; - for (unsigned i = 0; i < 4; ++i) { - if (colors_enabled & (1 << i)) { - dst[len] = fs_reg(GRF, alloc.allocate(color.width / 8), - color.type, color.width); - inst = emit(MOV(dst[len], offset(color, i))); - inst->saturate = key->clamp_fragment_color; - } - len++; - } - return len; - } else if (devinfo->gen >= 6 && do_dual_src) { - /* SIMD16 dual source blending for gen6+. - * - * From the SNB PRM, volume 4, part 1, page 193: - * - * "The dual source render target messages only have SIMD8 forms due to - * maximum message length limitations. SIMD16 pixel shaders must send two - * of these messages to cover all of the pixels. Each message contains - * two colors (4 channels each) for each pixel in the message payload." - * - * So in SIMD16 dual source blending we will send 2 SIMD8 messages, - * each one will call this function twice (one for each color involved), - * so in each pass we only write 4 registers. Notice that the second - * SIMD8 message needs to read color data from the 2nd half of the color - * registers, so it needs to call this with use_2nd_half = true. - */ - for (unsigned i = 0; i < 4; ++i) { - if (colors_enabled & (1 << i)) { - dst[i] = fs_reg(GRF, alloc.allocate(1), color.type); - inst = emit(MOV(dst[i], half(offset(color, i), - use_2nd_half ? 1 : 0))); - inst->saturate = key->clamp_fragment_color; - if (use_2nd_half) - inst->force_sechalf = true; - } - } - return 4; + if (exec_size < dispatch_width) { + unsigned half_idx = use_2nd_half ? 1 : 0; + for (unsigned i = 0; i < components; i++) + dst[i] = half(offset(color, i), half_idx); } else { - /* pre-gen6 SIMD16 single source DP write looks like: - * m + 0: r0 - * m + 1: g0 - * m + 2: b0 - * m + 3: a0 - * m + 4: r1 - * m + 5: g1 - * m + 6: b1 - * m + 7: a1 - */ - for (unsigned i = 0; i < 4; ++i) { - if (colors_enabled & (1 << i)) { - dst[i] = fs_reg(GRF, alloc.allocate(1), color.type); - inst = emit(MOV(dst[i], half(offset(color, i), 0))); - inst->saturate = key->clamp_fragment_color; - - dst[i + 4] = fs_reg(GRF, alloc.allocate(1), color.type); - inst = emit(MOV(dst[i + 4], half(offset(color, i), 1))); - inst->saturate = key->clamp_fragment_color; - inst->force_sechalf = true; - } - } - return 8; + for (unsigned i = 0; i < components; i++) + dst[i] = offset(color, i); } } @@ -3725,7 +3650,6 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, this->current_annotation = "FB write header"; int header_size = 2, payload_header_size; - int reg_size = exec_size / 8; /* We can potentially have a message length of up to 15, so we have to set * base_mrf to either 0 or 1 in order to fit in m0..m15. @@ -3781,24 +3705,26 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, * alpha out the pipeline to our null renderbuffer to support * alpha-testing, alpha-to-coverage, and so on. */ - length += setup_color_payload(sources + length, this->outputs[0], 0, - false); + if (this->outputs[0].file != BAD_FILE) + setup_color_payload(&sources[length + 3], offset(this->outputs[0], 3), + 1, exec_size, false); + length += 4; } else if (color1.file == BAD_FILE) { if (src0_alpha.file != BAD_FILE) { - sources[length] = fs_reg(GRF, alloc.allocate(reg_size), - src0_alpha.type, src0_alpha.width); - fs_inst *inst = emit(MOV(sources[length], src0_alpha)); - inst->saturate = key->clamp_fragment_color; + setup_color_payload(&sources[length], src0_alpha, 1, exec_size, false); length++; } - length += setup_color_payload(sources + length, color0, components, - false); + setup_color_payload(&sources[length], color0, components, + exec_size, use_2nd_half); + length += 4; } else { - length += setup_color_payload(sources + length, color0, components, - use_2nd_half); - length += setup_color_payload(sources + length, color1, components, - use_2nd_half); + setup_color_payload(&sources[length], color0, components, + exec_size, use_2nd_half); + length += 4; + setup_color_payload(&sources[length], color1, components, + exec_size, use_2nd_half); + length += 4; } if (source_depth_to_render_target) { @@ -3811,25 +3737,19 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, no16("Missing support for simd16 depth writes on gen6\n"); } - sources[length] = vgrf(glsl_type::float_type); if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { /* Hand over gl_FragDepth. */ assert(this->frag_depth.file != BAD_FILE); - emit(MOV(sources[length], this->frag_depth)); + sources[length] = this->frag_depth; } else { /* Pass through the payload depth. */ - emit(MOV(sources[length], - fs_reg(brw_vec8_grf(payload.source_depth_reg, 0)))); + sources[length] = fs_reg(brw_vec8_grf(payload.source_depth_reg, 0)); } length++; } - if (payload.dest_depth_reg) { - sources[length] = vgrf(glsl_type::float_type); - emit(MOV(sources[length], - fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0)))); - length++; - } + if (payload.dest_depth_reg) + sources[length++] = fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0)); fs_inst *load; fs_inst *write; @@ -3845,6 +3765,13 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, /* Send from the MRF */ load = emit(LOAD_PAYLOAD(fs_reg(MRF, 1, BRW_REGISTER_TYPE_F, exec_size), sources, length, payload_header_size)); + + /* On pre-SNB, we have to interlace the color values. LOAD_PAYLOAD + * will do this for us if we just give it a COMPR4 destination. + */ + if (brw->gen < 6 && exec_size == 16) + load->dst.reg |= BRW_MRF_COMPR4; + write = emit(FS_OPCODE_FB_WRITE); write->exec_size = exec_size; write->base_mrf = 1; -- 2.3.6 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev