From: Dave Airlie <airl...@redhat.com> r600, rv610 and rv630 all have a bug in their GPR indexing and how the hw inserts access to PV.
If the base index for the src is the same as the dst gpr in a previous group, then it will use PV instead of using the indexed gpr correctly. The workaround is to insert a NOP when you detect this. v2: add second part of fix detecting DST rel writes followed by same src base index reads. Fixes ~200 piglit regressions on rv635 since SB was introduced. Signed-off-by: Dave Airlie <airl...@redhat.com> --- src/gallium/drivers/r600/sb/sb_bc.h | 2 + src/gallium/drivers/r600/sb/sb_bc_finalize.cpp | 61 ++++++++++++++++++++++---- src/gallium/drivers/r600/sb/sb_context.cpp | 2 + src/gallium/drivers/r600/sb/sb_ir.h | 2 + src/gallium/drivers/r600/sb/sb_pass.h | 5 ++- 5 files changed, 62 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/r600/sb/sb_bc.h b/src/gallium/drivers/r600/sb/sb_bc.h index d03da98..6d3dc4d 100644 --- a/src/gallium/drivers/r600/sb/sb_bc.h +++ b/src/gallium/drivers/r600/sb/sb_bc.h @@ -616,6 +616,8 @@ public: unsigned num_slots; bool uses_mova_gpr; + bool r6xx_gpr_index_workaround; + bool stack_workaround_8xx; bool stack_workaround_9xx; diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp index 3f362c4..f3e34aa 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp @@ -38,6 +38,18 @@ namespace r600_sb { +void bc_finalizer::insert_rv6xx_load_ar_workaround(alu_group_node *b4) { + + alu_group_node *g = sh.create_alu_group(); + alu_node *a = sh.create_alu(); + + a->bc.set_op(ALU_OP0_NOP); + a->bc.last = 1; + + g->push_back(a); + b4->insert_before(g); +} + int bc_finalizer::run() { run_on(sh.root); @@ -211,12 +223,12 @@ void bc_finalizer::finalize_if(region_node* r) { } void bc_finalizer::run_on(container_node* c) { - + node *prev_node = NULL; for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { node *n = *I; if (n->is_alu_group()) { - finalize_alu_group(static_cast<alu_group_node*>(n)); + finalize_alu_group(static_cast<alu_group_node*>(n), prev_node); } else { if (n->is_alu_clause()) { cf_node *c = static_cast<cf_node*>(n); @@ -251,19 +263,28 @@ void bc_finalizer::run_on(container_node* c) { if (n->is_container()) run_on(static_cast<container_node*>(n)); } + prev_node = n; } } -void bc_finalizer::finalize_alu_group(alu_group_node* g) { +void bc_finalizer::finalize_alu_group(alu_group_node* g, node *prev_node) { alu_node *last = NULL; + alu_group_node *prev_g = NULL; + bool add_nop = false; + if (prev_node && prev_node->is_alu_group()) { + prev_g = static_cast<alu_group_node*>(prev_node); + } + for (int i = 0; i < 5; i++) { + g->dst_slot_regs[i] = -1; + g->dst_slot_rel[i] = false; + } for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) { alu_node *n = static_cast<alu_node*>(*I); unsigned slot = n->bc.slot; - value *d = n->dst.empty() ? NULL : n->dst[0]; - + bool local_nop; if (d && d->is_special_reg()) { assert(n->bc.op_ptr->flags & AF_MOVA); d = NULL; @@ -286,6 +307,9 @@ void bc_finalizer::finalize_alu_group(alu_group_node* g) { n->bc.dst_rel = 0; } + g->dst_slot_regs[slot] = n->bc.dst_gpr; + g->dst_slot_rel[slot] = n->bc.dst_rel; + n->bc.write_mask = d != NULL; n->bc.last = 0; @@ -299,17 +323,24 @@ void bc_finalizer::finalize_alu_group(alu_group_node* g) { update_ngpr(n->bc.dst_gpr); - finalize_alu_src(g, n); + local_nop = finalize_alu_src(g, n, prev_g); + if (local_nop) + add_nop = true; last = n; } + if (add_nop) { + if (sh.get_ctx().r6xx_gpr_index_workaround) { + insert_rv6xx_load_ar_workaround(g); + } + } last->bc.last = 1; } -void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) { +bool bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a, alu_group_node *prev) { vvec &sv = a->src; - + bool add_nop = false; FBC_DUMP( sblog << "finalize_alu_src: "; dump::dump_op(a); @@ -336,6 +367,12 @@ void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) { if (!v->rel->is_const()) { src.rel = 1; update_ngpr(v->array->gpr.sel() + v->array->array_size -1); + if (prev && !add_nop) { + for (int i = 0; i < 5; i++) { + if (prev->dst_slot_regs[i] == src.sel) + add_nop = true; + } + } } else src.rel = 0; @@ -393,11 +430,19 @@ void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) { assert(!"unknown value kind"); break; } + if (prev && !add_nop) { + for (int i = 0; i < 5; i++) { + if (prev->dst_slot_rel[i]) + if (prev->dst_slot_regs[i] == src.sel) + add_nop = true; + } + } } while (si < 3) { a->bc.src[si++].sel = 0; } + return add_nop; } void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start) diff --git a/src/gallium/drivers/r600/sb/sb_context.cpp b/src/gallium/drivers/r600/sb/sb_context.cpp index 8e11428..5dba85b 100644 --- a/src/gallium/drivers/r600/sb/sb_context.cpp +++ b/src/gallium/drivers/r600/sb/sb_context.cpp @@ -61,6 +61,8 @@ int sb_context::init(r600_isa *isa, sb_hw_chip chip, sb_hw_class cclass) { uses_mova_gpr = is_r600() && chip != HW_CHIP_RV670; + r6xx_gpr_index_workaround = is_r600() && chip != HW_CHIP_RV670 && chip != HW_CHIP_RS780 && chip != HW_CHIP_RS880; + switch (chip) { case HW_CHIP_RV610: case HW_CHIP_RS780: diff --git a/src/gallium/drivers/r600/sb/sb_ir.h b/src/gallium/drivers/r600/sb/sb_ir.h index 711c2eb..38bccfd 100644 --- a/src/gallium/drivers/r600/sb/sb_ir.h +++ b/src/gallium/drivers/r600/sb/sb_ir.h @@ -960,6 +960,8 @@ public: return F - literals.begin(); } + int dst_slot_regs[5]; + bool dst_slot_rel[5]; friend class shader; }; diff --git a/src/gallium/drivers/r600/sb/sb_pass.h b/src/gallium/drivers/r600/sb/sb_pass.h index 812d14a..0346df1 100644 --- a/src/gallium/drivers/r600/sb/sb_pass.h +++ b/src/gallium/drivers/r600/sb/sb_pass.h @@ -695,8 +695,9 @@ public: void run_on(container_node *c); - void finalize_alu_group(alu_group_node *g); - void finalize_alu_src(alu_group_node *g, alu_node *a); + void insert_rv6xx_load_ar_workaround(alu_group_node *b4); + void finalize_alu_group(alu_group_node *g, node *prev_node); + bool finalize_alu_src(alu_group_node *g, alu_node *a, alu_group_node *prev_node); void emit_set_grad(fetch_node* f); void finalize_fetch(fetch_node *f); -- 2.1.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev