This is a problem when we have IR like this: (array_ref (var_ref temps) (swiz x (expression ivec4 bitcast_f2i (swiz xxxx (array_ref (var_ref temps) (constant int (2)) ) )) )) ) )
where we are indexing an array with the result of an expression that accesses the same array. In this scenario, temps will be moved to scratch space and we will need to add scratch reads/writes for all accesses to temps, however, the current implementation does not consider the case where a reladdr pointer (obtained by indexing into temps trough a expression) points to a register that is also stored in scratch space (as in this case, where the expression used to index temps access temps[2]), and thus, requires a scratch read before it is accessed. v2 (Francisco Jerez): - Handle also recursive reladdr addressing. - Do not memcpy dst_reg into src_reg when rewriting reladdr. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89508 --- A couple of notes for reviewers: 1. The implementation resolves recursive reladdr scratch accesses in one go so we don't have to do multiple passes over the complete set of instructions. This is more performant than doing something similar to what we do in move_uniform_array_access_to_pull_constants. 2. Once we start handling recursive reladdr we are rewriting reladdr accesses to point to the destination registers of the scratch loads. This means that alloc.count increases and we can have a reladdr pointing to a register location beyond the original alloc.count, so we should take this into account when indexing scratch_loc[] with reladdr registers. I tested this for recursive reladdr accesses in both src and dst, including indexing different arrays: a[b[a[b[0]]]], etc and seems to work well. No piglit regressions on IvyBridge. As a side note, I also noticed that opt_array_splitting.cpp does not handle these situations well and hits an assertion in some cases where it wrongly assumes that an array only has constant indexing. This problem is happening in master and is unrelated to this patch so I'll upload a bug report for that. I mention this just in case someone wants to test the patch and hits the problem. In my tests I had to disable that optimization pass for the more complex cases. src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 138 +++++++++++++++++++++---- 1 file changed, 117 insertions(+), 21 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 5bf9e1b..c776c7a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -3391,7 +3391,8 @@ vec4_visitor::emit_scratch_write(bblock_t *block, vec4_instruction *inst, void vec4_visitor::move_grf_array_access_to_scratch() { - int scratch_loc[this->alloc.count]; + int alloc_count = alloc.count; + int scratch_loc[alloc_count]; memset(scratch_loc, -1, sizeof(scratch_loc)); /* First, calculate the set of virtual GRFs that need to be punted @@ -3400,19 +3401,29 @@ vec4_visitor::move_grf_array_access_to_scratch() */ foreach_block_and_inst(block, vec4_instruction, inst, cfg) { if (inst->dst.file == GRF && inst->dst.reladdr && - scratch_loc[inst->dst.reg] == -1) { - scratch_loc[inst->dst.reg] = c->last_scratch; - c->last_scratch += this->alloc.sizes[inst->dst.reg]; + scratch_loc[inst->dst.reg] == -1) { + scratch_loc[inst->dst.reg] = c->last_scratch; + c->last_scratch += this->alloc.sizes[inst->dst.reg]; + + src_reg *iter = inst->dst.reladdr; + while (iter->reladdr) { + if (iter->file == GRF && scratch_loc[iter->reg] == -1) { + scratch_loc[iter->reg] = c->last_scratch; + c->last_scratch += this->alloc.sizes[iter->reg]; + } + iter = iter->reladdr; + } } for (int i = 0 ; i < 3; i++) { - src_reg *src = &inst->src[i]; - - if (src->file == GRF && src->reladdr && - scratch_loc[src->reg] == -1) { - scratch_loc[src->reg] = c->last_scratch; - c->last_scratch += this->alloc.sizes[src->reg]; - } + src_reg *iter = &inst->src[i]; + while (iter->reladdr) { + if (iter->file == GRF && scratch_loc[iter->reg] == -1) { + scratch_loc[iter->reg] = c->last_scratch; + c->last_scratch += this->alloc.sizes[iter->reg]; + } + iter = iter->reladdr; + } } } @@ -3422,27 +3433,112 @@ vec4_visitor::move_grf_array_access_to_scratch() * we're processing. */ foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) { + bool nested_reladdr; + /* Set up the annotation tracking for new generated instructions. */ base_ir = inst->ir; current_annotation = inst->annotation; + /* First handle scratch access on the dst */ if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) { - emit_scratch_write(block, inst, scratch_loc[inst->dst.reg]); + /* If our reladdr points to scratch space, then we have to load + * it too before we use it to load dst from scratch space. + * Notice that scratch reladdr access can be recursive! + */ + dst_reg *dst = &inst->dst; + src_reg dst_as_src = src_reg(*dst); + if (dst->reladdr && + dst->reladdr->file == GRF && + dst->reladdr->reg < alloc_count && + scratch_loc[dst->reladdr->reg] != -1) { + do { + nested_reladdr = false; + + /* Find the last recursive reladdr scratch access */ + src_reg *iter = &dst_as_src; + while (iter->reladdr->reladdr && + iter->reladdr->reladdr->file == GRF && + iter->reladdr->reladdr->reg < alloc_count && + scratch_loc[iter->reladdr->reladdr->reg] != -1) { + iter = iter->reladdr; + nested_reladdr = true; + } + + /* Load this non-recursive reladdr scratch access */ + dst_reg temp = dst_reg(this, glsl_type::vec4_type); + emit_scratch_read(block, inst, temp, *iter->reladdr, + scratch_loc[iter->reladdr->reg]); + if (nested_reladdr) { + iter->reladdr->file = temp.file; + iter->reladdr->reg = temp.reg; + iter->reladdr->reg_offset = temp.reg_offset; + iter->reladdr->reladdr = NULL; + } else { + dst->reladdr->file = temp.file; + dst->reladdr->reg = temp.reg; + dst->reladdr->reg_offset = temp.reg_offset; + dst->reladdr->reladdr = NULL; + } + } while (nested_reladdr); + } + + /* Now that we have handled any (possibly recursive) reladdr scratch + * accesses for dst we can safely do the scratch write for dst itself + */ + if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) { + emit_scratch_write(block, inst, scratch_loc[inst->dst.reg]); + } } + /* Now handle scratch access on any src */ for (int i = 0 ; i < 3; i++) { - if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1) - continue; + if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1) + continue; + + /* If our reladdr points to scratch space, then we have to load + * it too before we use it to loading src[i] from scratch space. + * Notice that scratch reladdr access can be recursive! + */ + src_reg *src = &inst->src[i]; + if (src->reladdr && + src->reladdr->file == GRF && + src->reladdr->reg < alloc_count && + scratch_loc[src->reladdr->reg] != -1) { + do { + src_reg *iter = src; + nested_reladdr = false; + + /* Find the last recursive reladdr scratch access */ + while (iter->reladdr->reladdr && + iter->reladdr->reladdr->file == GRF && + iter->reladdr->reladdr->reg < alloc_count && + scratch_loc[iter->reladdr->reladdr->reg] != -1) { + iter = iter->reladdr; + nested_reladdr = true; + } - dst_reg temp = dst_reg(this, glsl_type::vec4_type); + /* Load this non-recursive reladdr scratch access */ + dst_reg temp = dst_reg(this, glsl_type::vec4_type); + emit_scratch_read(block, inst, temp, *iter->reladdr, + scratch_loc[iter->reladdr->reg]); - emit_scratch_read(block, inst, temp, inst->src[i], - scratch_loc[inst->src[i].reg]); + iter->reladdr->file = temp.file; + iter->reladdr->reg = temp.reg; + iter->reladdr->reg_offset = temp.reg_offset; + iter->reladdr->reladdr = NULL; + } while (nested_reladdr); + } + + /* Now that we have handled any (possibly recursive) reladdr scratch + * accesses for src[i] we can safely do the scratch for src[i] itself + */ + dst_reg temp = dst_reg(this, glsl_type::vec4_type); + emit_scratch_read(block, inst, temp, *src, scratch_loc[src->reg]); - inst->src[i].file = temp.file; - inst->src[i].reg = temp.reg; - inst->src[i].reg_offset = temp.reg_offset; - inst->src[i].reladdr = NULL; + src->file = temp.file; + src->reg = temp.reg; + src->reg_offset = temp.reg_offset; + src->reladdr = NULL; } } } -- 1.9.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev