When 64-bit registers are (un)spilled, we need to execute data shuffling code before writing to or after reading from memory. If we have instructions that operate on 64-bit data via 32-bit instructions, (un)spills for the register produced by 32-bit instructions will not do data shuffling at all (because we only see a normal 32-bit istruction seemingly operating on 32-bit data). This means that subsequent reads with that register using DF access will unshuffle data read from memory that was never adequately shuffled when it was written.
Fixing this would require to identify which 32-bit instructions write 64-bit data and emit spill instructions only when the full 64-bit data has been written (by multiple 32-bit instructions writing to different offsets of the same register) and always emit 64-bit unspills whenever 64-bit data is read, even when the instruction uses a 32-bit type to read from them. --- .../drivers/dri/i965/brw_vec4_reg_allocate.cpp | 24 ++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp index 7aff2d8..79951e2 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -374,9 +374,13 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) { float loop_scale = 1.0; + unsigned *reg_type_size = (unsigned *) + ralloc_size(NULL, this->alloc.count * sizeof(unsigned)); + for (unsigned i = 0; i < this->alloc.count; i++) { spill_costs[i] = 0.0; no_spill[i] = alloc.sizes[i] != 1 && alloc.sizes[i] != 2; + reg_type_size[i] = 0; } /* Calculate costs for spilling nodes. Call it a cost of 1 per @@ -406,6 +410,15 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) if (type_sz(inst->src[i].type) == 8 && inst->exec_size != 8) no_spill[inst->src[i].nr] = true; } + + /* We can't spill registers that mix 32-bit and 64-bit access (that + * contain 64-bit data that is operated on via 32-bit instructions) + */ + unsigned type_size = type_sz(inst->src[i].type); + if (reg_type_size[inst->src[i].nr] == 0) + reg_type_size[inst->src[i].nr] = type_size; + else if (reg_type_size[inst->src[i].nr] != type_size) + no_spill[inst->src[i].nr] = true; } } @@ -422,6 +435,15 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) */ if (type_sz(inst->dst.type) == 8 && inst->exec_size != 8) no_spill[inst->dst.nr] = true; + + /* We can't spill registers that mix 32-bit and 64-bit access (that + * contain 64-bit data that is operated on via 32-bit instructions) + */ + unsigned type_size = type_sz(inst->dst.type); + if (reg_type_size[inst->dst.nr] == 0) + reg_type_size[inst->dst.nr] = type_size; + else if (reg_type_size[inst->dst.nr] != type_size) + no_spill[inst->dst.nr] = true; } switch (inst->opcode) { @@ -448,6 +470,8 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) break; } } + + ralloc_free(reg_type_size); } int -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev