On Wed, Jul 22, 2015 at 12:54 AM, Francisco Jerez <curroje...@riseup.net> wrote: > Jason Ekstrand <ja...@jlekstrand.net> writes: > >> On Thu, Jul 16, 2015 at 8:41 AM, Francisco Jerez <curroje...@riseup.net> >> wrote: >>> The only non-trivial thing it still has to do is figure out where to >>> take the src/dst depth values from and predicate the instruction if >>> discard is in use. The manual SIMD unrolling logic in the dual-source >>> case goes away because this is now handled transparently by the SIMD >>> lowering pass. >>> --- >>> src/mesa/drivers/dri/i965/brw_fs.h | 5 +- >>> src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 203 >>> +++------------------------ >>> 2 files changed, 20 insertions(+), 188 deletions(-) >>> >>> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h >>> b/src/mesa/drivers/dri/i965/brw_fs.h >>> index 1ae79a9..64f89d4 100644 >>> --- a/src/mesa/drivers/dri/i965/brw_fs.h >>> +++ b/src/mesa/drivers/dri/i965/brw_fs.h >>> @@ -290,13 +290,10 @@ public: >>> bool optimize_frontfacing_ternary(nir_alu_instr *instr, >>> const fs_reg &result); >>> >>> - void setup_color_payload(fs_reg *dst, fs_reg color, unsigned components, >>> - unsigned exec_size, bool use_2nd_half); >>> void emit_alpha_test(); >>> fs_inst *emit_single_fb_write(const brw::fs_builder &bld, >>> fs_reg color1, fs_reg color2, >>> - fs_reg src0_alpha, unsigned components, >>> - unsigned exec_size, bool use_2nd_half = >>> false); >>> + fs_reg src0_alpha, unsigned components); >>> void emit_fb_writes(); >>> void emit_urb_writes(); >>> void emit_cs_terminate(); >>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp >>> b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp >>> index ba4b177..bcfeaa0 100644 >>> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp >>> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp >>> @@ -1409,33 +1409,6 @@ fs_visitor::emit_interpolation_setup_gen6() >>> } >>> } >>> >>> -void >>> -fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned >>> components, >>> - unsigned exec_size, bool use_2nd_half) >>> -{ >>> - brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; >>> - fs_inst *inst; >>> - >>> - if (key->clamp_fragment_color) { >>> - fs_reg tmp = vgrf(glsl_type::vec4_type); >>> - assert(color.type == BRW_REGISTER_TYPE_F); >>> - for (unsigned i = 0; i < components; i++) { >>> - inst = bld.MOV(offset(tmp, bld, i), offset(color, bld, i)); >>> - inst->saturate = true; >>> - } >>> - color = tmp; >>> - } >>> - >>> - if (exec_size < dispatch_width) { >>> - unsigned half_idx = use_2nd_half ? 1 : 0; >>> - for (unsigned i = 0; i < components; i++) >>> - dst[i] = half(offset(color, bld, i), half_idx); >>> - } else { >>> - for (unsigned i = 0; i < components; i++) >>> - dst[i] = offset(color, bld, i); >>> - } >>> -} >>> - >>> static enum brw_conditional_mod >>> cond_for_alpha_func(GLenum func) >>> { >>> @@ -1493,146 +1466,34 @@ fs_visitor::emit_alpha_test() >>> fs_inst * >>> fs_visitor::emit_single_fb_write(const fs_builder &bld, >>> fs_reg color0, fs_reg color1, >>> - fs_reg src0_alpha, unsigned components, >>> - unsigned exec_size, bool use_2nd_half) >>> + fs_reg src0_alpha, unsigned components) >>> { >>> assert(stage == MESA_SHADER_FRAGMENT); >>> brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data; >>> - brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; >>> - const fs_builder ubld = bld.group(exec_size, use_2nd_half); >>> - int header_size = 2, payload_header_size; >>> - >>> - /* We can potentially have a message length of up to 15, so we have to >>> set >>> - * base_mrf to either 0 or 1 in order to fit in m0..m15. >>> - */ >>> - fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 15); >>> - int length = 0; >>> - >>> - /* From the Sandy Bridge PRM, volume 4, page 198: >>> - * >>> - * "Dispatched Pixel Enables. One bit per pixel indicating >>> - * which pixels were originally enabled when the thread was >>> - * dispatched. This field is only required for the end-of- >>> - * thread message and on all dual-source messages." >>> - */ >>> - if (devinfo->gen >= 6 && >>> - (devinfo->is_haswell || devinfo->gen >= 8 || !prog_data->uses_kill) >>> && >>> - color1.file == BAD_FILE && >>> - key->nr_color_regions == 1) { >>> - header_size = 0; >>> - } >>> - >>> - if (header_size != 0) { >>> - assert(header_size == 2); >>> - /* Allocate 2 registers for a header */ >>> - length += 2; >>> - } >>> - >>> - if (payload.aa_dest_stencil_reg) { >>> - sources[length] = fs_reg(GRF, alloc.allocate(1)); >>> - bld.group(8, 0).exec_all().annotate("FB write stencil/AA alpha") >>> - .MOV(sources[length], >>> - fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0))); >>> - length++; >>> - } >>> - >>> - if (prog_data->uses_omask) { >>> - sources[length] = fs_reg(GRF, alloc.allocate(1), >>> - BRW_REGISTER_TYPE_UD); >>> - >>> - /* Hand over gl_SampleMask. Only the lower 16 bits of each channel >>> are >>> - * relevant. Since it's unsigned single words one vgrf is always >>> - * 16-wide, but only the lower or higher 8 channels will be used by >>> the >>> - * hardware when doing a SIMD8 write depending on whether we have >>> - * selected the subspans for the first or second half respectively. >>> - */ >>> - fs_reg sample_mask = this->sample_mask; >>> - assert(sample_mask.file != BAD_FILE && type_sz(sample_mask.type) == >>> 4); >>> - sample_mask.type = BRW_REGISTER_TYPE_UW; >>> - sample_mask.stride *= 2; >>> - >>> - ubld.annotate("FB write oMask") >>> - .MOV(half(retype(sources[length], BRW_REGISTER_TYPE_UW), >>> - use_2nd_half), >>> - half(sample_mask, use_2nd_half)); >>> - length++; >>> - } >>> - >>> - payload_header_size = length; >>> - >>> - if (src0_alpha.file != BAD_FILE) { >>> - /* FIXME: This is being passed at the wrong location in the payload >>> and >>> - * doesn't work when gl_SampleMask and MRTs are used simultaneously. >>> - * It's supposed to be immediately before oMask but there seems to >>> be no >>> - * reasonable way to pass them in the correct order because >>> LOAD_PAYLOAD >>> - * requires header sources to form a contiguous segment at the >>> beginning >>> - * of the message and src0_alpha has per-channel semantics. >>> - */ >>> - setup_color_payload(&sources[length], src0_alpha, 1, exec_size, >>> false); >>> - length++; >>> - } >>> - >>> - setup_color_payload(&sources[length], color0, components, >>> - exec_size, use_2nd_half); >>> - length += 4; >>> - >>> - if (color1.file != BAD_FILE) { >>> - setup_color_payload(&sources[length], color1, components, >>> - exec_size, use_2nd_half); >>> - length += 4; >>> - } >>> - >>> - if (source_depth_to_render_target) { >>> - if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { >>> - /* Hand over gl_FragDepth. */ >>> - assert(this->frag_depth.file != BAD_FILE); >>> - if (exec_size < dispatch_width) { >>> - sources[length] = half(this->frag_depth, use_2nd_half); >>> - } else { >>> - sources[length] = this->frag_depth; >>> - } >>> - } else { >>> - /* Pass through the payload depth. */ >>> - sources[length] = fs_reg(brw_vec8_grf(payload.source_depth_reg, >>> 0)); >>> - } >>> - length++; >>> - } >>> >>> - if (payload.dest_depth_reg) >>> - sources[length++] = fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0)); >>> + /* Hand over gl_FragDepth or the payload depth. */ >>> + const fs_reg src_depth = >>> + (!source_depth_to_render_target ? fs_reg() : >>> + prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH) ? >>> frag_depth : >>> + fs_reg(brw_vec8_grf(payload.source_depth_reg, 0))); >> >> I think I'd rather this be an if-ladder. There's enough stuff going >> on in the double-ternary that it's hard to read. With that fixed, >> this series is >> > > In that case I'll no longer be able to mark src_depth as const and there > will be some interleaving of declarations and control flow statements -- > What code smell do you prefer?
Yes, I understand the desire to mark it const and not mix declarations with control-flow. However, I don't think that's so bad if you declare it and then immediately have some control-flow dedicated to setting it. The nested ternaries are really hard to read. --Jason >> Reviewed-by: Jason Ekstrand <jason.ekstr...@intel.com> >> >> On to texturing... >> > > Thanks. > >>> >>> - fs_inst *load; >>> - fs_inst *write; >>> - if (devinfo->gen >= 7) { >>> - /* Send from the GRF */ >>> - fs_reg payload = fs_reg(GRF, -1, BRW_REGISTER_TYPE_F); >>> - load = ubld.LOAD_PAYLOAD(payload, sources, length, >>> payload_header_size); >>> - payload.reg = alloc.allocate(load->regs_written); >>> - load->dst = payload; >>> - write = ubld.emit(FS_OPCODE_FB_WRITE, reg_undef, payload); >>> - write->base_mrf = -1; >>> - } else { >>> - /* Send from the MRF */ >>> - load = ubld.LOAD_PAYLOAD(fs_reg(MRF, 1, BRW_REGISTER_TYPE_F), >>> - sources, length, payload_header_size); >>> + const fs_reg dst_depth = (payload.dest_depth_reg ? >>> + fs_reg(brw_vec8_grf(payload.dest_depth_reg, >>> 0)) : >>> + fs_reg()); >>> >>> - /* On pre-SNB, we have to interlace the color values. LOAD_PAYLOAD >>> - * will do this for us if we just give it a COMPR4 destination. >>> - */ >>> - if (devinfo->gen < 6 && exec_size == 16) >>> - load->dst.reg |= BRW_MRF_COMPR4; >>> + const fs_reg sources[] = { >>> + color0, color1, src0_alpha, src_depth, dst_depth, sample_mask, >>> + fs_reg(components) >>> + }; >>> >>> - write = ubld.emit(FS_OPCODE_FB_WRITE); >>> - write->exec_size = exec_size; >>> - write->base_mrf = 1; >>> - } >>> + fs_inst *write = bld.emit(FS_OPCODE_FB_WRITE_LOGICAL, fs_reg(), >>> + sources, ARRAY_SIZE(sources)); >>> >>> - write->mlen = load->regs_written; >>> - write->header_size = header_size; >>> if (prog_data->uses_kill) { >>> write->predicate = BRW_PREDICATE_NORMAL; >>> write->flag_subreg = 1; >>> } >>> + >>> return write; >>> } >>> >>> @@ -1659,33 +1520,9 @@ fs_visitor::emit_fb_writes() >>> const fs_builder abld = bld.annotate("FB dual-source write"); >>> >>> inst = emit_single_fb_write(abld, this->outputs[0], >>> - this->dual_src_output, reg_undef, 4, 8); >>> + this->dual_src_output, reg_undef, 4); >>> inst->target = 0; >>> >>> - /* SIMD16 dual source blending requires to send two SIMD8 dual source >>> - * messages, where each message contains color data for 8 pixels. >>> Color >>> - * data for the first group of pixels is stored in the "lower" half >>> of >>> - * the color registers, so in SIMD16, the previous message did: >>> - * m + 0: r0 >>> - * m + 1: g0 >>> - * m + 2: b0 >>> - * m + 3: a0 >>> - * >>> - * Here goes the second message, which packs color data for the >>> - * remaining 8 pixels. Color data for these pixels is stored in the >>> - * "upper" half of the color registers, so we need to do: >>> - * m + 0: r1 >>> - * m + 1: g1 >>> - * m + 2: b1 >>> - * m + 3: a1 >>> - */ >>> - if (dispatch_width == 16) { >>> - inst = emit_single_fb_write(abld, this->outputs[0], >>> - this->dual_src_output, reg_undef, 4, >>> 8, >>> - true); >>> - inst->target = 0; >>> - } >>> - >>> prog_data->dual_src_blend = true; >>> } else { >>> for (int target = 0; target < key->nr_color_regions; target++) { >>> @@ -1702,8 +1539,7 @@ fs_visitor::emit_fb_writes() >>> >>> inst = emit_single_fb_write(abld, this->outputs[target], >>> reg_undef, >>> src0_alpha, >>> - this->output_components[target], >>> - dispatch_width); >>> + this->output_components[target]); >>> inst->target = target; >>> } >>> } >>> @@ -1721,8 +1557,7 @@ fs_visitor::emit_fb_writes() >>> const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, 4); >>> bld.LOAD_PAYLOAD(tmp, srcs, 4, 0); >>> >>> - inst = emit_single_fb_write(bld, tmp, reg_undef, reg_undef, 4, >>> - dispatch_width); >>> + inst = emit_single_fb_write(bld, tmp, reg_undef, reg_undef, 4); >>> inst->target = 0; >>> } >>> >>> -- >>> 2.4.3 >>> _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev