On 04/18/2014 11:56 AM, Matt Turner wrote: > --- > src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 135 > +++++++++++++++------------ > 1 file changed, 73 insertions(+), 62 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > index 2aa3acd..91bbe0a 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > @@ -1257,8 +1257,11 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg > dst, fs_reg coordinate, > int reg_width = dispatch_width / 8; > bool header_present = false; > > - fs_reg payload = fs_reg(this, glsl_type::float_type); > - fs_reg next = payload; > + fs_reg *sources = ralloc_array(mem_ctx, fs_reg, MAX_SAMPLER_MESSAGE_SIZE); > + for (int i = 0; i < MAX_SAMPLER_MESSAGE_SIZE; i++) { > + sources[i] = fs_reg(this, glsl_type::float_type); > + } > + int length = 0; > > if (ir->op == ir_tg4 || (ir->offset && ir->op != ir_txf) || sampler >= > 16) { > /* For general texture offsets (no txf workaround), we need a header to > @@ -1272,12 +1275,13 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg > dst, fs_reg coordinate, > * need to offset the Sampler State Pointer in the header. > */ > header_present = true; > - next.reg_offset++; > + sources[length] = reg_undef; > + length++; > }
So...if you don't take the header_present = true path...then it looks like the next thing (say, shadow_c) will land in sources[0], rather than leaving sources[0] as BAD_FILE and putting the first part of the payload in sources[1]. Is sources[0] special and reserved for the header or isn't it? > > if (ir->shadow_comparitor) { > - emit(MOV(next, shadow_c)); > - next.reg_offset++; > + emit(MOV(sources[length], shadow_c)); I'm confused by the fact that these MOVs are still here. I would have expected: sources[length] = sample_c; When we visit expression trees, we generate results into various registers. The point of these MOVs is to put them into a large-VGRF we can SEND from, at the right ref_offset. Now, you have a new set of MAX_SAMPLER_MESSAGE_SIZE registers, and copy from the original registers to these new temporaries, then LOAD_PAYLOAD does a second copy from those into the new ones. It seems like we could just use the original registers and do a single copy. I'm probably missing something here, but I can't think of what. > + length++; > } > > bool has_nonconstant_offset = ir->offset && !ir->offset->as_constant(); > @@ -1289,12 +1293,12 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg > dst, fs_reg coordinate, > case ir_lod: > break; > case ir_txb: > - emit(MOV(next, lod)); > - next.reg_offset++; > + emit(MOV(sources[length], lod)); > + length++; > break; > case ir_txl: > - emit(MOV(next, lod)); > - next.reg_offset++; > + emit(MOV(sources[length], lod)); > + length++; > break; > case ir_txd: { > no16("Gen7 does not support sample_d/sample_d_c in SIMD16 mode."); > @@ -1303,21 +1307,21 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg > dst, fs_reg coordinate, > * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, > dPdy.z > */ > for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { > - emit(MOV(next, coordinate)); > + emit(MOV(sources[length], coordinate)); > coordinate.reg_offset++; > - next.reg_offset++; > + length++; > > /* For cube map array, the coordinate is (u,v,r,ai) but there are > * only derivatives for (u, v, r). > */ > if (i < ir->lod_info.grad.dPdx->type->vector_elements) { > - emit(MOV(next, lod)); > + emit(MOV(sources[length], lod)); > lod.reg_offset++; > - next.reg_offset++; > + length++; > > - emit(MOV(next, lod2)); > + emit(MOV(sources[length], lod2)); > lod2.reg_offset++; > - next.reg_offset++; > + length++; > } > } > > @@ -1325,45 +1329,45 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg > dst, fs_reg coordinate, > break; > } > case ir_txs: > - emit(MOV(retype(next, BRW_REGISTER_TYPE_UD), lod)); > - next.reg_offset++; > + emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), lod)); > + length++; > break; > case ir_query_levels: > - emit(MOV(retype(next, BRW_REGISTER_TYPE_UD), fs_reg(0u))); > - next.reg_offset++; > + emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), fs_reg(0u))); > + length++; > break; > case ir_txf: > /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. > */ > - emit(MOV(retype(next, BRW_REGISTER_TYPE_D), coordinate)); > + emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate)); > coordinate.reg_offset++; > - next.reg_offset++; > + length++; > > - emit(MOV(retype(next, BRW_REGISTER_TYPE_D), lod)); > - next.reg_offset++; > + emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), lod)); > + length++; > > for (int i = 1; i < ir->coordinate->type->vector_elements; i++) { > - emit(MOV(retype(next, BRW_REGISTER_TYPE_D), coordinate)); > + emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate)); > coordinate.reg_offset++; > - next.reg_offset++; > + length++; > } > > coordinate_done = true; > break; > case ir_txf_ms: > - emit(MOV(retype(next, BRW_REGISTER_TYPE_UD), sample_index)); > - next.reg_offset++; > + emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), sample_index)); > + length++; > > /* data from the multisample control surface */ > - emit(MOV(retype(next, BRW_REGISTER_TYPE_UD), mcs)); > - next.reg_offset++; > + emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs)); > + length++; > > /* there is no offsetting for this message; just copy in the integer > * texture coordinates > */ > for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { > - emit(MOV(retype(next, BRW_REGISTER_TYPE_D), coordinate)); > + emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate)); > coordinate.reg_offset++; > - next.reg_offset++; > + length++; > } > > coordinate_done = true; > @@ -1378,21 +1382,21 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg > dst, fs_reg coordinate, > fs_reg offset_value = this->result; > > for (int i = 0; i < 2; i++) { /* u, v */ > - emit(MOV(next, coordinate)); > + emit(MOV(sources[length], coordinate)); > coordinate.reg_offset++; > - next.reg_offset++; > + length++; > } > > for (int i = 0; i < 2; i++) { /* offu, offv */ > - emit(MOV(retype(next, BRW_REGISTER_TYPE_D), offset_value)); > + emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), > offset_value)); > offset_value.reg_offset++; > - next.reg_offset++; > + length++; > } > > if (ir->coordinate->type->vector_elements == 3) { /* r if present */ > - emit(MOV(next, coordinate)); > + emit(MOV(sources[length], coordinate)); > coordinate.reg_offset++; > - next.reg_offset++; > + length++; > } > > coordinate_done = true; > @@ -1403,40 +1407,44 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg > dst, fs_reg coordinate, > /* Set up the coordinate (except for cases where it was done above) */ > if (ir->coordinate && !coordinate_done) { > for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { > - emit(MOV(next, coordinate)); > + emit(MOV(sources[length], coordinate)); > coordinate.reg_offset++; > - next.reg_offset++; > + length++; > } > } > > + fs_reg src_payload = fs_reg(this, glsl_type::float_type); > + virtual_grf_sizes[src_payload.reg] = length; > + emit(SHADER_OPCODE_LOAD_PAYLOAD, src_payload, sources, length); > + I think I would prefer to see this called "payload", "msg_payload" or simply "message". There are enough things called src/sources/... Perhaps also use the virtual_grf_alloc paradigm, as you do in emit_mcs_fetch? i.e. fs_reg msg_payload(GRF, virtual_grf_alloc(length), BRW_REGISTER_TYPE_F); > /* Generate the SEND */ > - fs_inst *inst = NULL; > + enum opcode opcode; > switch (ir->op) { > - case ir_tex: inst = emit(SHADER_OPCODE_TEX, dst, payload); break; > - case ir_txb: inst = emit(FS_OPCODE_TXB, dst, payload); break; > - case ir_txl: inst = emit(SHADER_OPCODE_TXL, dst, payload); break; > - case ir_txd: inst = emit(SHADER_OPCODE_TXD, dst, payload); break; > - case ir_txf: inst = emit(SHADER_OPCODE_TXF, dst, payload); break; > - case ir_txf_ms: inst = emit(SHADER_OPCODE_TXF_CMS, dst, payload); break; > - case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst, payload); break; > - case ir_query_levels: inst = emit(SHADER_OPCODE_TXS, dst, payload); break; > - case ir_lod: inst = emit(SHADER_OPCODE_LOD, dst, payload); break; > + case ir_tex: opcode = SHADER_OPCODE_TEX; break; > + case ir_txb: opcode = FS_OPCODE_TXB; break; > + case ir_txl: opcode = SHADER_OPCODE_TXL; break; > + case ir_txd: opcode = SHADER_OPCODE_TXD; break; > + case ir_txf: opcode = SHADER_OPCODE_TXF; break; > + case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break; > + case ir_txs: opcode = SHADER_OPCODE_TXS; break; > + case ir_query_levels: opcode = SHADER_OPCODE_TXS; break; > + case ir_lod: opcode = SHADER_OPCODE_LOD; break; > case ir_tg4: > if (has_nonconstant_offset) > - inst = emit(SHADER_OPCODE_TG4_OFFSET, dst, payload); > + opcode = SHADER_OPCODE_TG4_OFFSET; > else > - inst = emit(SHADER_OPCODE_TG4, dst, payload); > + opcode = SHADER_OPCODE_TG4; > break; > } > + fs_inst *inst = emit(opcode, dst, src_payload); > inst->base_mrf = -1; > if (reg_width == 2) > - inst->mlen = next.reg_offset * reg_width - header_present; > + inst->mlen = length * reg_width - header_present; > else > - inst->mlen = next.reg_offset * reg_width; > + inst->mlen = length * reg_width; > inst->header_present = header_present; > inst->regs_written = 4; > > - virtual_grf_sizes[payload.reg] = next.reg_offset; > if (inst->mlen > MAX_SAMPLER_MESSAGE_SIZE) { > fail("Message length >" STRINGIFY(MAX_SAMPLER_MESSAGE_SIZE) > " disallowed by hardware\n"); > @@ -1551,21 +1559,24 @@ fs_reg > fs_visitor::emit_mcs_fetch(ir_texture *ir, fs_reg coordinate, int sampler) > { > int reg_width = dispatch_width / 8; > - fs_reg payload = fs_reg(this, glsl_type::float_type); > + int length = ir->coordinate->type->vector_elements; > + fs_reg payload = fs_reg(GRF, virtual_grf_alloc(length), > + BRW_REGISTER_TYPE_F); FWIW, in C++, you don't need to do fs_reg name = fs_reg(...); you can just do: fs_reg name(...); Hopefully the compiler is smart enough not to construct an fs_reg with the default constructor, destruct it, and assign over it using the assignment operator...but I don't know that it is. It's also just a bit shorter. > fs_reg dest = fs_reg(this, glsl_type::uvec4_type); > - fs_reg next = payload; > + fs_reg *sources = ralloc_array(mem_ctx, fs_reg, length); > > - /* parameters are: u, v, r, lod; missing parameters are treated as zero */ > - for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { > - emit(MOV(retype(next, BRW_REGISTER_TYPE_D), coordinate)); > + /* parameters are: u, v, r; missing parameters are treated as zero */ > + for (int i = 0; i < length; i++) { > + sources[i] = fs_reg(this, glsl_type::float_type); > + emit(MOV(retype(sources[i], BRW_REGISTER_TYPE_D), coordinate)); > coordinate.reg_offset++; > - next.reg_offset++; > } > > + emit(SHADER_OPCODE_LOAD_PAYLOAD, payload, sources, length); > + > fs_inst *inst = emit(SHADER_OPCODE_TXF_MCS, dest, payload); > - virtual_grf_sizes[payload.reg] = next.reg_offset; > inst->base_mrf = -1; > - inst->mlen = next.reg_offset * reg_width; > + inst->mlen = length * reg_width; > inst->header_present = false; > inst->regs_written = 4 * reg_width; /* we only care about one reg of > response, > * but the sampler always writes 4/8 >
signature.asc
Description: OpenPGP digital signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev