Iago Toral Quiroga <ito...@igalia.com> writes: > This is a problem when we have IR like this: > > (array_ref (var_ref temps) (swiz x (expression ivec4 bitcast_f2i > (swiz xxxx (array_ref (var_ref temps) (constant int (2)) ) )) )) ) ) > > where we are indexing an array with the result of an expression that > accesses the same array. > > In this scenario, temps will be moved to scratch space and we will need > to add scratch reads/writes for all accesses to temps, however, the > current implementation does not consider the case where a reladdr pointer > (obtained by indexing into temps trough a expression) points to a register > that is also stored in scratch space (as in this case, where the expression > used to index temps access temps[2]), and thus, requires a scratch read > before it is accessed. > > v2 (Francisco Jerez): > - Handle also recursive reladdr addressing. > - Do not memcpy dst_reg into src_reg when rewriting reladdr. > > v3 (Francisco Jerez): > - Reduce complexity by moving recursive reladdr scratch access handling > to a separate recursive function. > - Do not skip demoting reladdr index registers to scratch space if the > top level GRF has already been visited. > > v4 (Francisco Jerez) > - Remove redundant checks. > - Simplify code by making emit_resolve_reladdr return a register with > the original src data except for reg, reg_offset and reladdr. > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89508 > --- > src/mesa/drivers/dri/i965/brw_vec4.h | 2 + > src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 100 > ++++++++++++++++++------- > 2 files changed, 76 insertions(+), 26 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h > b/src/mesa/drivers/dri/i965/brw_vec4.h > index 33297ae..6ec00d5 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4.h > +++ b/src/mesa/drivers/dri/i965/brw_vec4.h > @@ -367,6 +367,8 @@ public: > dst_reg dst, > src_reg orig_src, > int base_offset); > + src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block, > + vec4_instruction *inst, src_reg src); > > bool try_emit_mad(ir_expression *ir); > bool try_emit_b2f_of_compare(ir_expression *ir); > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp > index 26a3b9f..ca1a995 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp > @@ -3352,6 +3352,39 @@ vec4_visitor::emit_scratch_write(bblock_t *block, > vec4_instruction *inst, > } > > /** > + * Checks if \p src and/or \p src.reladdr require a scratch read, and if so, > + * adds the scratch read(s) before \p inst. The function also checks for > + * recursive reladdr scratch accesses, issuing the corresponding scratch > + * loads and rewriting reladdr references accordingly. > + * > + * \return \p src if it did not require a scratch load, otherwise, the > + * register holding the result of the scratch load that the caller should > + * use to rewrite src. > + */ > +src_reg > +vec4_visitor::emit_resolve_reladdr(int scratch_loc[], bblock_t *block, > + vec4_instruction *inst, src_reg src) > +{ > + /* Resolve recursive reladdr scratch access by calling ourselves > + * with src.reladdr > + */ > + if (src.reladdr) > + *src.reladdr = emit_resolve_reladdr(scratch_loc, block, inst, > + *src.reladdr); > + > + /* Now handle scratch access on src */ > + if (src.file == GRF && scratch_loc[src.reg] != -1) { > + dst_reg temp = dst_reg(this, glsl_type::vec4_type); > + emit_scratch_read(block, inst, temp, src, scratch_loc[src.reg]); > + src.reg = temp.reg; > + src.reg_offset = temp.reg_offset; > + src.reladdr = NULL; > + } > + > + return src; > +} > + > +/** > * We can't generally support array access in GRF space, because a > * single instruction's destination can only span 2 contiguous > * registers. So, we send all GRF arrays that get variable index > @@ -3368,20 +3401,31 @@ vec4_visitor::move_grf_array_access_to_scratch() > * scratch. > */ > foreach_block_and_inst(block, vec4_instruction, inst, cfg) { > - if (inst->dst.file == GRF && inst->dst.reladdr && > - scratch_loc[inst->dst.reg] == -1) { > - scratch_loc[inst->dst.reg] = c->last_scratch; > - c->last_scratch += this->alloc.sizes[inst->dst.reg]; > + if (inst->dst.file == GRF && inst->dst.reladdr) { > + if (scratch_loc[inst->dst.reg] == -1) { > + scratch_loc[inst->dst.reg] = c->last_scratch; > + c->last_scratch += this->alloc.sizes[inst->dst.reg]; > + } > + > + for (src_reg *iter = inst->dst.reladdr; > + iter->reladdr; > + iter = iter->reladdr) { > + if (iter->file == GRF && scratch_loc[iter->reg] == -1) { > + scratch_loc[iter->reg] = c->last_scratch; > + c->last_scratch += this->alloc.sizes[iter->reg]; > + } > + } > } > > for (int i = 0 ; i < 3; i++) { > - src_reg *src = &inst->src[i]; > - > - if (src->file == GRF && src->reladdr && > - scratch_loc[src->reg] == -1) { > - scratch_loc[src->reg] = c->last_scratch; > - c->last_scratch += this->alloc.sizes[src->reg]; > - } > + for (src_reg *iter = &inst->src[i]; > + iter->reladdr; > + iter = iter->reladdr) { > + if (iter->file == GRF && scratch_loc[iter->reg] == -1) { > + scratch_loc[iter->reg] = c->last_scratch; > + c->last_scratch += this->alloc.sizes[iter->reg]; > + } > + } > } > } > > @@ -3395,23 +3439,27 @@ vec4_visitor::move_grf_array_access_to_scratch() > base_ir = inst->ir; > current_annotation = inst->annotation; > > - if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) { > - emit_scratch_write(block, inst, scratch_loc[inst->dst.reg]); > - } > - > - for (int i = 0 ; i < 3; i++) { > - if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1) > - continue; > - > - dst_reg temp = dst_reg(this, glsl_type::vec4_type); > + /* First handle scratch access on the dst. Notice we have to handle > + * the case where the dst's reladdr also points to scratch space. > + */ > + if (inst->dst.reladdr) > + *inst->dst.reladdr = emit_resolve_reladdr(scratch_loc, block, inst, > + *inst->dst.reladdr); > > - emit_scratch_read(block, inst, temp, inst->src[i], > - scratch_loc[inst->src[i].reg]); > + /* Now that we have handled any (possibly recursive) reladdr scratch > + * accesses for dst we can safely do the scratch write for dst itself > + */ > + if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) > + emit_scratch_write(block, inst, scratch_loc[inst->dst.reg]); > > - inst->src[i].file = temp.file; > - inst->src[i].reg = temp.reg; > - inst->src[i].reg_offset = temp.reg_offset; > - inst->src[i].reladdr = NULL; > + /* Now handle scratch access on any src. In this case, since > inst->src[i] > + * already is a src_reg, we can just call emit_resolve_reladdr with > + * inst->src[i] and it will take care of handling scratch loads for > + * both src and src.reladdr (recursively). > + */ > + for (int i = 0 ; i < 3; i++) { > + inst->src[i] = emit_resolve_reladdr(scratch_loc, block, inst, > + inst->src[i]); > } > } > } > -- > 1.9.1
Looks great, thanks, Reviewed-by: Francisco Jerez <curroje...@riseup.net>
signature.asc
Description: PGP signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev