From: Dave Airlie <airl...@redhat.com>

r600, rv610 and rv630 all have a bug in their GPR indexing
and how the hw inserts access to PV.

If the base index for the src is the same as the dst gpr
in a previous group, then it will use PV instead of using
the indexed gpr correctly.

The workaround is to insert a NOP when you detect this.

v2: add second part of fix detecting DST rel writes followed
by same src base index reads.

Fixes ~200 piglit regressions on rv635 since SB was introduced.

Signed-off-by: Dave Airlie <airl...@redhat.com>
---
 src/gallium/drivers/r600/sb/sb_bc.h            |  2 +
 src/gallium/drivers/r600/sb/sb_bc_finalize.cpp | 61 ++++++++++++++++++++++----
 src/gallium/drivers/r600/sb/sb_context.cpp     |  2 +
 src/gallium/drivers/r600/sb/sb_ir.h            |  2 +
 src/gallium/drivers/r600/sb/sb_pass.h          |  5 ++-
 5 files changed, 62 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/r600/sb/sb_bc.h 
b/src/gallium/drivers/r600/sb/sb_bc.h
index d03da98..6d3dc4d 100644
--- a/src/gallium/drivers/r600/sb/sb_bc.h
+++ b/src/gallium/drivers/r600/sb/sb_bc.h
@@ -616,6 +616,8 @@ public:
        unsigned num_slots;
        bool uses_mova_gpr;
 
+       bool r6xx_gpr_index_workaround;
+
        bool stack_workaround_8xx;
        bool stack_workaround_9xx;
 
diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp 
b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
index 3f362c4..f3e34aa 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
@@ -38,6 +38,18 @@
 
 namespace r600_sb {
 
+void bc_finalizer::insert_rv6xx_load_ar_workaround(alu_group_node *b4) {
+
+       alu_group_node *g = sh.create_alu_group();
+       alu_node *a = sh.create_alu();
+
+       a->bc.set_op(ALU_OP0_NOP);
+       a->bc.last = 1;
+
+       g->push_back(a);
+       b4->insert_before(g);
+}
+
 int bc_finalizer::run() {
 
        run_on(sh.root);
@@ -211,12 +223,12 @@ void bc_finalizer::finalize_if(region_node* r) {
 }
 
 void bc_finalizer::run_on(container_node* c) {
-
+       node *prev_node = NULL;
        for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
                node *n = *I;
 
                if (n->is_alu_group()) {
-                       finalize_alu_group(static_cast<alu_group_node*>(n));
+                       finalize_alu_group(static_cast<alu_group_node*>(n), 
prev_node);
                } else {
                        if (n->is_alu_clause()) {
                                cf_node *c = static_cast<cf_node*>(n);
@@ -251,19 +263,28 @@ void bc_finalizer::run_on(container_node* c) {
                        if (n->is_container())
                                run_on(static_cast<container_node*>(n));
                }
+               prev_node = n;
        }
 }
 
-void bc_finalizer::finalize_alu_group(alu_group_node* g) {
+void bc_finalizer::finalize_alu_group(alu_group_node* g, node *prev_node) {
 
        alu_node *last = NULL;
+       alu_group_node *prev_g = NULL;
+       bool add_nop = false;
+       if (prev_node && prev_node->is_alu_group()) {
+               prev_g = static_cast<alu_group_node*>(prev_node);
+       }
+       for (int i = 0; i < 5; i++) {
+               g->dst_slot_regs[i] = -1;
+               g->dst_slot_rel[i] = false;
+       }
 
        for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
                alu_node *n = static_cast<alu_node*>(*I);
                unsigned slot = n->bc.slot;
-
                value *d = n->dst.empty() ? NULL : n->dst[0];
-
+               bool local_nop;
                if (d && d->is_special_reg()) {
                        assert(n->bc.op_ptr->flags & AF_MOVA);
                        d = NULL;
@@ -286,6 +307,9 @@ void bc_finalizer::finalize_alu_group(alu_group_node* g) {
                        n->bc.dst_rel = 0;
                }
 
+               g->dst_slot_regs[slot] = n->bc.dst_gpr;
+               g->dst_slot_rel[slot] = n->bc.dst_rel;
+
                n->bc.write_mask = d != NULL;
                n->bc.last = 0;
 
@@ -299,17 +323,24 @@ void bc_finalizer::finalize_alu_group(alu_group_node* g) {
 
                update_ngpr(n->bc.dst_gpr);
 
-               finalize_alu_src(g, n);
+               local_nop = finalize_alu_src(g, n, prev_g);
+               if (local_nop)
+                       add_nop = true;
 
                last = n;
        }
 
+       if (add_nop) {
+               if (sh.get_ctx().r6xx_gpr_index_workaround) {
+                       insert_rv6xx_load_ar_workaround(g);
+               }
+       }
        last->bc.last = 1;
 }
 
-void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) {
+bool bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a, 
alu_group_node *prev) {
        vvec &sv = a->src;
-
+       bool add_nop = false;
        FBC_DUMP(
                sblog << "finalize_alu_src: ";
                dump::dump_op(a);
@@ -336,6 +367,12 @@ void bc_finalizer::finalize_alu_src(alu_group_node* g, 
alu_node* a) {
                        if (!v->rel->is_const()) {
                                src.rel = 1;
                                update_ngpr(v->array->gpr.sel() + 
v->array->array_size -1);
+                               if (prev && !add_nop) {
+                                       for (int i = 0; i < 5; i++) {
+                                               if (prev->dst_slot_regs[i] == 
src.sel)
+                                                       add_nop = true;
+                                       }
+                               }
                        } else
                                src.rel = 0;
 
@@ -393,11 +430,19 @@ void bc_finalizer::finalize_alu_src(alu_group_node* g, 
alu_node* a) {
                        assert(!"unknown value kind");
                        break;
                }
+               if (prev && !add_nop) {
+                       for (int i = 0; i < 5; i++) {
+                               if (prev->dst_slot_rel[i])
+                                       if (prev->dst_slot_regs[i] == src.sel)
+                                               add_nop = true;
+                       }
+               }
        }
 
        while (si < 3) {
                a->bc.src[si++].sel = 0;
        }
+       return add_nop;
 }
 
 void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned 
arg_start)
diff --git a/src/gallium/drivers/r600/sb/sb_context.cpp 
b/src/gallium/drivers/r600/sb/sb_context.cpp
index 8e11428..5dba85b 100644
--- a/src/gallium/drivers/r600/sb/sb_context.cpp
+++ b/src/gallium/drivers/r600/sb/sb_context.cpp
@@ -61,6 +61,8 @@ int sb_context::init(r600_isa *isa, sb_hw_chip chip, 
sb_hw_class cclass) {
 
        uses_mova_gpr = is_r600() && chip != HW_CHIP_RV670;
 
+       r6xx_gpr_index_workaround = is_r600() && chip != HW_CHIP_RV670 && chip 
!= HW_CHIP_RS780 && chip != HW_CHIP_RS880;
+
        switch (chip) {
        case HW_CHIP_RV610:
        case HW_CHIP_RS780:
diff --git a/src/gallium/drivers/r600/sb/sb_ir.h 
b/src/gallium/drivers/r600/sb/sb_ir.h
index 711c2eb..38bccfd 100644
--- a/src/gallium/drivers/r600/sb/sb_ir.h
+++ b/src/gallium/drivers/r600/sb/sb_ir.h
@@ -960,6 +960,8 @@ public:
                return F - literals.begin();
        }
 
+       int dst_slot_regs[5];
+       bool dst_slot_rel[5];
        friend class shader;
 };
 
diff --git a/src/gallium/drivers/r600/sb/sb_pass.h 
b/src/gallium/drivers/r600/sb/sb_pass.h
index 812d14a..0346df1 100644
--- a/src/gallium/drivers/r600/sb/sb_pass.h
+++ b/src/gallium/drivers/r600/sb/sb_pass.h
@@ -695,8 +695,9 @@ public:
 
        void run_on(container_node *c);
 
-       void finalize_alu_group(alu_group_node *g);
-       void finalize_alu_src(alu_group_node *g, alu_node *a);
+       void insert_rv6xx_load_ar_workaround(alu_group_node *b4);
+       void finalize_alu_group(alu_group_node *g, node *prev_node);
+       bool finalize_alu_src(alu_group_node *g, alu_node *a, alu_group_node 
*prev_node);
 
        void emit_set_grad(fetch_node* f);
        void finalize_fetch(fetch_node *f);
-- 
2.1.0

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to