On Wed, Apr 23, 2014 at 11:19 PM, Kenneth Graunke <kenn...@whitecape.org> wrote: > On 04/18/2014 11:56 AM, Matt Turner wrote: >> --- >> src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 135 >> +++++++++++++++------------ >> 1 file changed, 73 insertions(+), 62 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp >> b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp >> index 2aa3acd..91bbe0a 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp >> @@ -1257,8 +1257,11 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg >> dst, fs_reg coordinate, >> int reg_width = dispatch_width / 8; >> bool header_present = false; >> >> - fs_reg payload = fs_reg(this, glsl_type::float_type); >> - fs_reg next = payload; >> + fs_reg *sources = ralloc_array(mem_ctx, fs_reg, >> MAX_SAMPLER_MESSAGE_SIZE); >> + for (int i = 0; i < MAX_SAMPLER_MESSAGE_SIZE; i++) { >> + sources[i] = fs_reg(this, glsl_type::float_type); >> + } >> + int length = 0; >> >> if (ir->op == ir_tg4 || (ir->offset && ir->op != ir_txf) || sampler >= >> 16) { >> /* For general texture offsets (no txf workaround), we need a header >> to >> @@ -1272,12 +1275,13 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg >> dst, fs_reg coordinate, >> * need to offset the Sampler State Pointer in the header. >> */ >> header_present = true; >> - next.reg_offset++; >> + sources[length] = reg_undef; >> + length++; >> } > > So...if you don't take the header_present = true path...then it looks > like the next thing (say, shadow_c) will land in sources[0], rather than > leaving sources[0] as BAD_FILE and putting the first part of the payload > in sources[1]. > > Is sources[0] special and reserved for the header or isn't it?
It's the header if there is a header, and it's a regular source if there isn't a header. We can't make it always a header, because if the header doesn't exist we'll end up asking the register allocator for a set of registers one too large. >> >> if (ir->shadow_comparitor) { >> - emit(MOV(next, shadow_c)); >> - next.reg_offset++; >> + emit(MOV(sources[length], shadow_c)); > > I'm confused by the fact that these MOVs are still here. I would have > expected: > > sources[length] = sample_c; > > When we visit expression trees, we generate results into various > registers. The point of these MOVs is to put them into a large-VGRF we > can SEND from, at the right ref_offset. > > Now, you have a new set of MAX_SAMPLER_MESSAGE_SIZE registers, and copy > from the original registers to these new temporaries, then LOAD_PAYLOAD > does a second copy from those into the new ones. It seems like we could > just use the original registers and do a single copy. > > I'm probably missing something here, but I can't think of what. I seem to remember needing to do this for some reason. I'll probably have to try without the MOVs and see what breaks to remember. >> + length++; >> } >> >> bool has_nonconstant_offset = ir->offset && !ir->offset->as_constant(); >> @@ -1289,12 +1293,12 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg >> dst, fs_reg coordinate, >> case ir_lod: >> break; >> case ir_txb: >> - emit(MOV(next, lod)); >> - next.reg_offset++; >> + emit(MOV(sources[length], lod)); >> + length++; >> break; >> case ir_txl: >> - emit(MOV(next, lod)); >> - next.reg_offset++; >> + emit(MOV(sources[length], lod)); >> + length++; >> break; >> case ir_txd: { >> no16("Gen7 does not support sample_d/sample_d_c in SIMD16 mode."); >> @@ -1303,21 +1307,21 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg >> dst, fs_reg coordinate, >> * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, >> dPdy.z >> */ >> for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { >> - emit(MOV(next, coordinate)); >> + emit(MOV(sources[length], coordinate)); >> coordinate.reg_offset++; >> - next.reg_offset++; >> + length++; >> >> /* For cube map array, the coordinate is (u,v,r,ai) but there are >> * only derivatives for (u, v, r). >> */ >> if (i < ir->lod_info.grad.dPdx->type->vector_elements) { >> - emit(MOV(next, lod)); >> + emit(MOV(sources[length], lod)); >> lod.reg_offset++; >> - next.reg_offset++; >> + length++; >> >> - emit(MOV(next, lod2)); >> + emit(MOV(sources[length], lod2)); >> lod2.reg_offset++; >> - next.reg_offset++; >> + length++; >> } >> } >> >> @@ -1325,45 +1329,45 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg >> dst, fs_reg coordinate, >> break; >> } >> case ir_txs: >> - emit(MOV(retype(next, BRW_REGISTER_TYPE_UD), lod)); >> - next.reg_offset++; >> + emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), lod)); >> + length++; >> break; >> case ir_query_levels: >> - emit(MOV(retype(next, BRW_REGISTER_TYPE_UD), fs_reg(0u))); >> - next.reg_offset++; >> + emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), fs_reg(0u))); >> + length++; >> break; >> case ir_txf: >> /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. >> */ >> - emit(MOV(retype(next, BRW_REGISTER_TYPE_D), coordinate)); >> + emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate)); >> coordinate.reg_offset++; >> - next.reg_offset++; >> + length++; >> >> - emit(MOV(retype(next, BRW_REGISTER_TYPE_D), lod)); >> - next.reg_offset++; >> + emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), lod)); >> + length++; >> >> for (int i = 1; i < ir->coordinate->type->vector_elements; i++) { >> - emit(MOV(retype(next, BRW_REGISTER_TYPE_D), coordinate)); >> + emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate)); >> coordinate.reg_offset++; >> - next.reg_offset++; >> + length++; >> } >> >> coordinate_done = true; >> break; >> case ir_txf_ms: >> - emit(MOV(retype(next, BRW_REGISTER_TYPE_UD), sample_index)); >> - next.reg_offset++; >> + emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), >> sample_index)); >> + length++; >> >> /* data from the multisample control surface */ >> - emit(MOV(retype(next, BRW_REGISTER_TYPE_UD), mcs)); >> - next.reg_offset++; >> + emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs)); >> + length++; >> >> /* there is no offsetting for this message; just copy in the integer >> * texture coordinates >> */ >> for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { >> - emit(MOV(retype(next, BRW_REGISTER_TYPE_D), coordinate)); >> + emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), >> coordinate)); >> coordinate.reg_offset++; >> - next.reg_offset++; >> + length++; >> } >> >> coordinate_done = true; >> @@ -1378,21 +1382,21 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg >> dst, fs_reg coordinate, >> fs_reg offset_value = this->result; >> >> for (int i = 0; i < 2; i++) { /* u, v */ >> - emit(MOV(next, coordinate)); >> + emit(MOV(sources[length], coordinate)); >> coordinate.reg_offset++; >> - next.reg_offset++; >> + length++; >> } >> >> for (int i = 0; i < 2; i++) { /* offu, offv */ >> - emit(MOV(retype(next, BRW_REGISTER_TYPE_D), offset_value)); >> + emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), >> offset_value)); >> offset_value.reg_offset++; >> - next.reg_offset++; >> + length++; >> } >> >> if (ir->coordinate->type->vector_elements == 3) { /* r if present >> */ >> - emit(MOV(next, coordinate)); >> + emit(MOV(sources[length], coordinate)); >> coordinate.reg_offset++; >> - next.reg_offset++; >> + length++; >> } >> >> coordinate_done = true; >> @@ -1403,40 +1407,44 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg >> dst, fs_reg coordinate, >> /* Set up the coordinate (except for cases where it was done above) */ >> if (ir->coordinate && !coordinate_done) { >> for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { >> - emit(MOV(next, coordinate)); >> + emit(MOV(sources[length], coordinate)); >> coordinate.reg_offset++; >> - next.reg_offset++; >> + length++; >> } >> } >> >> + fs_reg src_payload = fs_reg(this, glsl_type::float_type); >> + virtual_grf_sizes[src_payload.reg] = length; >> + emit(SHADER_OPCODE_LOAD_PAYLOAD, src_payload, sources, length); >> + > > I think I would prefer to see this called "payload", "msg_payload" or > simply "message". There are enough things called src/sources/... Right. I don't know, I thought "load the sources into the source payload" read pretty well. All of the other names suggest that they're already a contiguous set of registers. > > Perhaps also use the virtual_grf_alloc paradigm, as you do in > emit_mcs_fetch? i.e. > > fs_reg msg_payload(GRF, virtual_grf_alloc(length), BRW_REGISTER_TYPE_F); Yep, will do. >> /* Generate the SEND */ >> - fs_inst *inst = NULL; >> + enum opcode opcode; >> switch (ir->op) { >> - case ir_tex: inst = emit(SHADER_OPCODE_TEX, dst, payload); break; >> - case ir_txb: inst = emit(FS_OPCODE_TXB, dst, payload); break; >> - case ir_txl: inst = emit(SHADER_OPCODE_TXL, dst, payload); break; >> - case ir_txd: inst = emit(SHADER_OPCODE_TXD, dst, payload); break; >> - case ir_txf: inst = emit(SHADER_OPCODE_TXF, dst, payload); break; >> - case ir_txf_ms: inst = emit(SHADER_OPCODE_TXF_CMS, dst, payload); break; >> - case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst, payload); break; >> - case ir_query_levels: inst = emit(SHADER_OPCODE_TXS, dst, payload); >> break; >> - case ir_lod: inst = emit(SHADER_OPCODE_LOD, dst, payload); break; >> + case ir_tex: opcode = SHADER_OPCODE_TEX; break; >> + case ir_txb: opcode = FS_OPCODE_TXB; break; >> + case ir_txl: opcode = SHADER_OPCODE_TXL; break; >> + case ir_txd: opcode = SHADER_OPCODE_TXD; break; >> + case ir_txf: opcode = SHADER_OPCODE_TXF; break; >> + case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break; >> + case ir_txs: opcode = SHADER_OPCODE_TXS; break; >> + case ir_query_levels: opcode = SHADER_OPCODE_TXS; break; >> + case ir_lod: opcode = SHADER_OPCODE_LOD; break; >> case ir_tg4: >> if (has_nonconstant_offset) >> - inst = emit(SHADER_OPCODE_TG4_OFFSET, dst, payload); >> + opcode = SHADER_OPCODE_TG4_OFFSET; >> else >> - inst = emit(SHADER_OPCODE_TG4, dst, payload); >> + opcode = SHADER_OPCODE_TG4; >> break; >> } >> + fs_inst *inst = emit(opcode, dst, src_payload); >> inst->base_mrf = -1; >> if (reg_width == 2) >> - inst->mlen = next.reg_offset * reg_width - header_present; >> + inst->mlen = length * reg_width - header_present; >> else >> - inst->mlen = next.reg_offset * reg_width; >> + inst->mlen = length * reg_width; >> inst->header_present = header_present; >> inst->regs_written = 4; >> >> - virtual_grf_sizes[payload.reg] = next.reg_offset; >> if (inst->mlen > MAX_SAMPLER_MESSAGE_SIZE) { >> fail("Message length >" STRINGIFY(MAX_SAMPLER_MESSAGE_SIZE) >> " disallowed by hardware\n"); >> @@ -1551,21 +1559,24 @@ fs_reg >> fs_visitor::emit_mcs_fetch(ir_texture *ir, fs_reg coordinate, int sampler) >> { >> int reg_width = dispatch_width / 8; >> - fs_reg payload = fs_reg(this, glsl_type::float_type); >> + int length = ir->coordinate->type->vector_elements; >> + fs_reg payload = fs_reg(GRF, virtual_grf_alloc(length), >> + BRW_REGISTER_TYPE_F); > > FWIW, in C++, you don't need to do > > fs_reg name = fs_reg(...); > > you can just do: > > fs_reg name(...); > > Hopefully the compiler is smart enough not to construct an fs_reg with > the default constructor, destruct it, and assign over it using the > assignment operator...but I don't know that it is. > > It's also just a bit shorter. Yep, will do. > >> fs_reg dest = fs_reg(this, glsl_type::uvec4_type); >> - fs_reg next = payload; >> + fs_reg *sources = ralloc_array(mem_ctx, fs_reg, length); >> >> - /* parameters are: u, v, r, lod; missing parameters are treated as zero >> */ >> - for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { >> - emit(MOV(retype(next, BRW_REGISTER_TYPE_D), coordinate)); >> + /* parameters are: u, v, r; missing parameters are treated as zero */ >> + for (int i = 0; i < length; i++) { >> + sources[i] = fs_reg(this, glsl_type::float_type); >> + emit(MOV(retype(sources[i], BRW_REGISTER_TYPE_D), coordinate)); >> coordinate.reg_offset++; >> - next.reg_offset++; >> } >> >> + emit(SHADER_OPCODE_LOAD_PAYLOAD, payload, sources, length); >> + >> fs_inst *inst = emit(SHADER_OPCODE_TXF_MCS, dest, payload); >> - virtual_grf_sizes[payload.reg] = next.reg_offset; >> inst->base_mrf = -1; >> - inst->mlen = next.reg_offset * reg_width; >> + inst->mlen = length * reg_width; >> inst->header_present = false; >> inst->regs_written = 4 * reg_width; /* we only care about one reg of >> response, >> * but the sampler always writes 4/8 >> > > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev