Signed-off-by: Glenn Kennard <glenn.kenn...@gmail.com>
---
This patch depends on prior patch:
  r600g/sb: Support gs5 sampler indexing

Two items that could be improved on in some future patch:
Clauses using UBO indexing still lock the cache line for a
constant used to load the index register, which causes some
instruction groups to be broken up as SB thinks they are
using too many constant read ports.

The MOVA_INT/SET_CF_IDX[01] ops can often be emitted directly into
the preceeding clause rather than always creating a new one.

 src/gallium/drivers/r600/r600_shader.c         |   6 --
 src/gallium/drivers/r600/r600_shader.h         |   2 -
 src/gallium/drivers/r600/sb/sb_bc.h            |   4 +-
 src/gallium/drivers/r600/sb/sb_bc_finalize.cpp |   6 +-
 src/gallium/drivers/r600/sb/sb_bc_parser.cpp   |  20 ++++-
 src/gallium/drivers/r600/sb/sb_expr.cpp        |   3 +-
 src/gallium/drivers/r600/sb/sb_ir.h            |   7 ++
 src/gallium/drivers/r600/sb/sb_sched.cpp       | 108 ++++++++++++++++++++++---
 src/gallium/drivers/r600/sb/sb_sched.h         |   4 +
 src/gallium/drivers/r600/sb/sb_shader.cpp      |   4 +-
 src/gallium/drivers/r600/sb/sb_shader.h        |   2 +-
 11 files changed, 139 insertions(+), 27 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 24c3d43..8efe902 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -166,8 +166,6 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
     if (rctx->b.chip_class <= R700) {
            use_sb &= (shader->shader.processor_type != 
TGSI_PROCESSOR_GEOMETRY);
     }
-       /* disable SB for shaders using ubo array indexing as it doesn't handle 
those currently */
-       use_sb &= !shader->shader.uses_ubo_indexing;
        /* disable SB for shaders using doubles */
        use_sb &= !shader->shader.uses_doubles;
 
@@ -1250,9 +1248,6 @@ static int tgsi_split_constant(struct r600_shader_ctx 
*ctx)
                        continue;
                }
 
-               if (ctx->src[i].kc_rel)
-                       ctx->shader->uses_ubo_indexing = true;
-
                if (ctx->src[i].rel) {
                        int chan = inst->Src[i].Indirect.Swizzle;
                        int treg = r600_get_temp(ctx);
@@ -1936,7 +1931,6 @@ static int r600_shader_from_tgsi(struct r600_context 
*rctx,
        ctx.gs_next_vertex = 0;
        ctx.gs_stream_output_info = &so;
 
-       shader->uses_ubo_indexing = false;
        ctx.face_gpr = -1;
        ctx.fixed_pt_position_gpr = -1;
        ctx.fragcoord_input = -1;
diff --git a/src/gallium/drivers/r600/r600_shader.h 
b/src/gallium/drivers/r600/r600_shader.h
index 8ba32ae..c240e71 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -75,8 +75,6 @@ struct r600_shader {
        boolean                 has_txq_cube_array_z_comp;
        boolean                 uses_tex_buffers;
        boolean                 gs_prim_id_input;
-       /* Temporarily workaround SB not handling ubo indexing */
-       boolean                 uses_ubo_indexing;
 
        /* Size in bytes of a data item in the ring(s) (single vertex data).
           Stages with only one ring items 123 will be set to 0. */
diff --git a/src/gallium/drivers/r600/sb/sb_bc.h 
b/src/gallium/drivers/r600/sb/sb_bc.h
index 126750d..9c2a917 100644
--- a/src/gallium/drivers/r600/sb/sb_bc.h
+++ b/src/gallium/drivers/r600/sb/sb_bc.h
@@ -478,7 +478,9 @@ struct bc_cf {
 
        bool is_alu_extended() {
                assert(op_ptr->flags & CF_ALU);
-               return kc[2].mode != KC_LOCK_NONE || kc[3].mode != KC_LOCK_NONE;
+               return kc[2].mode != KC_LOCK_NONE || kc[3].mode != KC_LOCK_NONE 
||
+                       kc[0].index_mode != KC_INDEX_NONE || kc[1].index_mode 
!= KC_INDEX_NONE ||
+                       kc[2].index_mode != KC_INDEX_NONE || kc[3].index_mode 
!= KC_INDEX_NONE;
        }
 
 };
diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp 
b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
index 522ff9d..17fe2a5 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
@@ -514,7 +514,7 @@ void bc_finalizer::copy_fetch_src(fetch_node &dst, 
fetch_node &src, unsigned arg
 
 void bc_finalizer::emit_set_grad(fetch_node* f) {
 
-       assert(f->src.size() == 12);
+       assert(f->src.size() == 12 || f->src.size() == 13);
        unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H 
};
 
        unsigned arg_start = 0;
@@ -809,8 +809,8 @@ void bc_finalizer::finalize_cf(cf_node* c) {
 }
 
 sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) {
-       unsigned sel = v->select.sel();
-       unsigned bank = sel >> 12;
+       unsigned sel = v->select.kcache_sel();
+       unsigned bank = v->select.kcache_bank();
        unsigned chan = v->select.chan();
        static const unsigned kc_base[] = {128, 160, 256, 288};
 
diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp 
b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
index eb43670..01aeeaf 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
@@ -338,6 +338,7 @@ void bc_parser::save_set_cf_index(value *val, unsigned idx)
 value *bc_parser::get_cf_index_value(unsigned idx)
 {
        assert(idx <= 1);
+       assert(cf_index_value[idx]);
        return cf_index_value[idx];
 }
 void bc_parser::save_mova(alu_node *mova)
@@ -361,6 +362,7 @@ int bc_parser::prepare_alu_group(cf_node* cf, 
alu_group_node *g) {
        for (node_iterator I = g->begin(), E = g->end();
                        I != E; ++I) {
                n = static_cast<alu_node*>(*I);
+               bool ubo_indexing[2] = {};
 
                if (!sh->assign_slot(n, slots[cgroup])) {
                        assert(!"alu slot assignment failed");
@@ -470,7 +472,12 @@ int bc_parser::prepare_alu_group(cf_node* cf, 
alu_group_node *g) {
 
                                bc_kcache &kc = cf->bc.kc[kc_set];
                                kc_addr = (kc.addr << 4) + (sel & 0x1F);
-                               n->src[s] = sh->get_kcache_value(kc.bank, 
kc_addr, src.chan);
+                               n->src[s] = sh->get_kcache_value(kc.bank, 
kc_addr, src.chan, (alu_kcache_index_mode)kc.index_mode);
+
+                               if (kc.index_mode != KC_INDEX_NONE) {
+                                       assert(kc.index_mode != KC_LOCK_LOOP);
+                                       ubo_indexing[kc.index_mode - 
KC_INDEX_0] = true;
+                               }
                        } else if (src.sel < MAX_GPR) {
                                value *v = sh->get_gpr_value(true, src.sel, 
src.chan, src.rel);
 
@@ -507,6 +514,14 @@ int bc_parser::prepare_alu_group(cf_node* cf, 
alu_group_node *g) {
                                }
                        }
                }
+
+               // add UBO index values if any as dependencies
+               if (ubo_indexing[0]) {
+                       n->src.push_back(get_cf_index_value(0));
+               }
+               if (ubo_indexing[1]) {
+                       n->src.push_back(get_cf_index_value(1));
+               }
        }
 
        // pack multislot instructions into alu_packed_node
@@ -650,6 +665,9 @@ int bc_parser::prepare_fetch_clause(cf_node *cf) {
                        if (n->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE) {
                                
n->src.push_back(get_cf_index_value(n->bc.sampler_index_mode == 
V_SQ_CF_INDEX_1));
                        }
+                       if (n->bc.resource_index_mode != V_SQ_CF_INDEX_NONE) {
+                               
n->src.push_back(get_cf_index_value(n->bc.resource_index_mode == 
V_SQ_CF_INDEX_1));
+                       }
                }
        }
 
diff --git a/src/gallium/drivers/r600/sb/sb_expr.cpp 
b/src/gallium/drivers/r600/sb/sb_expr.cpp
index 9c2274e..556a05d 100644
--- a/src/gallium/drivers/r600/sb/sb_expr.cpp
+++ b/src/gallium/drivers/r600/sb/sb_expr.cpp
@@ -403,7 +403,8 @@ bool expr_handler::fold_alu_op1(alu_node& n) {
                if ((n.bc.op == ALU_OP1_MOV || n.bc.op == ALU_OP1_MOVA_INT ||
                                n.bc.op == ALU_OP1_MOVA_GPR_INT)
                                && n.bc.clamp == 0 && n.bc.omod == 0
-                               && n.bc.src[0].abs == 0 && n.bc.src[0].neg == 
0) {
+                               && n.bc.src[0].abs == 0 && n.bc.src[0].neg == 0 
&&
+                               n.src.size() == 1 /* RIM/SIM can be appended as 
additional values */) {
                        assign_source(n.dst[0], v0);
                        return true;
                }
diff --git a/src/gallium/drivers/r600/sb/sb_ir.h 
b/src/gallium/drivers/r600/sb/sb_ir.h
index 560a4a9..c612e6c 100644
--- a/src/gallium/drivers/r600/sb/sb_ir.h
+++ b/src/gallium/drivers/r600/sb/sb_ir.h
@@ -62,6 +62,13 @@ struct sel_chan
 
        static unsigned sel(unsigned idx) { return (idx-1) >> 2; }
        static unsigned chan(unsigned idx) { return (idx-1) & 3; }
+
+       sel_chan(unsigned bank, unsigned index,
+                        unsigned chan, alu_kcache_index_mode index_mode)
+               : id(sel_chan((bank << 12) | index | ((unsigned)index_mode << 
28), chan).id) {}
+       unsigned kcache_index_mode() const { return sel() >> 28; }
+       unsigned kcache_sel() const { return sel() & 0x0fffffffu; }
+       unsigned kcache_bank() const { return kcache_sel() >> 12; }
 };
 
 inline sb_ostream& operator <<(sb_ostream& o, sel_chan r) {
diff --git a/src/gallium/drivers/r600/sb/sb_sched.cpp 
b/src/gallium/drivers/r600/sb/sb_sched.cpp
index 601445f..5113b75 100644
--- a/src/gallium/drivers/r600/sb/sb_sched.cpp
+++ b/src/gallium/drivers/r600/sb/sb_sched.cpp
@@ -843,7 +843,7 @@ static alu_node *create_set_idx(shader &sh, unsigned 
ar_idx) {
        a->dst.resize(1); // Dummy needed for recolor
 
        PSC_DUMP(
-               sblog << "created IDX load: "
+               sblog << "created IDX load: ";
                dump::dump_op(a);
                sblog << "\n";
        );
@@ -909,15 +909,21 @@ void post_scheduler::process_fetch(container_node *c) {
                        sblog << "  ";
                );
 
-               if (f->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE) {
+               // TODO: If same values used can avoid reloading index register
+               if (f->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE ||
+                       f->bc.resource_index_mode != V_SQ_CF_INDEX_NONE) {
+                       unsigned index_mode = f->bc.sampler_index_mode != 
V_SQ_CF_INDEX_NONE ?
+                               f->bc.sampler_index_mode : 
f->bc.resource_index_mode;
+
                        // Currently require prior opt passes to use one TEX 
per indexed op
                        assert(f->parent->count() == 1);
 
                        value *v = f->src.back(); // Last src is index offset
+                       assert(v);
 
                        cur_bb->push_front(c);
 
-                       load_index_register(v, f->bc.sampler_index_mode);
+                       load_index_register(v, index_mode);
                        f->src.pop_back(); // Don't need index value any more
 
                        return;
@@ -959,6 +965,7 @@ void post_scheduler::process_alu(container_node *c) {
 
                if (uc) {
                        n->remove();
+
                        pending.push_back(n);
                        PSC_DUMP( sblog << "pending\n"; );
                } else {
@@ -1101,6 +1108,18 @@ void post_scheduler::init_globals(val_set &s, bool 
prealloc) {
        }
 }
 
+void post_scheduler::emit_index_registers() {
+       for (unsigned i = 0; i < 2; i++) {
+               if (alu.current_idx[i]) {
+                       regmap = prev_regmap;
+                       alu.discard_current_group();
+
+                       load_index_register(alu.current_idx[i], KC_INDEX_0 + i);
+                       alu.current_idx[i] = NULL;
+               }
+       }
+}
+
 void post_scheduler::emit_clause() {
 
        if (alu.current_ar) {
@@ -1109,7 +1128,11 @@ void post_scheduler::emit_clause() {
                alu.emit_group();
        }
 
-       alu.emit_clause(cur_bb);
+       if (!alu.is_empty()) {
+               alu.emit_clause(cur_bb);
+       }
+
+       emit_index_registers();
 }
 
 void post_scheduler::schedule_alu(container_node *c) {
@@ -1121,6 +1144,14 @@ void post_scheduler::schedule_alu(container_node *c) {
                prev_regmap = regmap;
 
                if (!prepare_alu_group()) {
+                       if (alu.current_idx[0] || alu.current_idx[1]) {
+                               regmap = prev_regmap;
+                               emit_clause();
+                               init_globals(live, false);
+
+                               continue;
+                       }
+
                        if (alu.current_ar) {
                                emit_load_ar();
                                continue;
@@ -1132,6 +1163,7 @@ void post_scheduler::schedule_alu(container_node *c) {
                        regmap = prev_regmap;
                        emit_clause();
                        init_globals(live, false);
+
                        continue;
                }
 
@@ -1391,6 +1423,42 @@ bool post_scheduler::map_src_val(value *v) {
 }
 
 bool post_scheduler::map_src_vec(vvec &vv, bool src) {
+       if (src) {
+               // Handle possible UBO indexing
+               bool ubo_indexing[2] = { false, false };
+               for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
+                       value *v = *I;
+                       if (!v)
+                               continue;
+
+                       if (v->is_kcache()) {
+                               unsigned index_mode = 
v->select.kcache_index_mode();
+                               if (index_mode == KC_INDEX_0 || index_mode == 
KC_INDEX_1) {
+                                       ubo_indexing[index_mode - KC_INDEX_0] = 
true;
+                               }
+                       }
+               }
+
+               // idx values stored at end of src vec, see 
bc_parser::prepare_alu_group
+               for (unsigned i = 2; i != 0; i--) {
+                       if (ubo_indexing[i-1]) {
+                               // TODO: skip adding value to kcache 
reservation somehow, causes
+                               // unnecessary group breaks and cache line locks
+                               value *v = vv.back();
+                               if (alu.current_idx[i-1] && 
alu.current_idx[i-1] != v) {
+                                       PSC_DUMP(
+                                               sblog << "IDX" << i-1 << " 
already set to " <<
+                                               *alu.current_idx[i-1] << ", 
trying to set " << *v << "\n";
+                                       );
+                                       return false;
+                               }
+
+                               alu.current_idx[i-1] = v;
+                               PSC_DUMP(sblog << "IDX" << i-1 << " set to " << 
*v << "\n";);
+                       }
+               }
+       }
+
        for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
                value *v = *I;
                if (!v)
@@ -1456,6 +1524,10 @@ void post_scheduler::dump_regmap() {
                sblog << "    current_AR: " << *alu.current_ar << "\n";
        if (alu.current_pr)
                sblog << "    current_PR: " << *alu.current_pr << "\n";
+       if (alu.current_idx[0])
+               sblog << "    current IDX0: " << *alu.current_idx[0] << "\n";
+       if (alu.current_idx[1])
+               sblog << "    current IDX1: " << *alu.current_idx[1] << "\n";
 }
 
 void post_scheduler::recolor_locals() {
@@ -1545,6 +1617,13 @@ unsigned post_scheduler::try_add_instruction(node *n) {
 
        unsigned avail_slots = rt.avail_slots();
 
+       // Cannot schedule in same clause as instructions using this index value
+       if (!n->dst.empty() && n->dst[0] &&
+               (n->dst[0] == alu.current_idx[0] || n->dst[0] == 
alu.current_idx[1])) {
+               PSC_DUMP(sblog << "   CF_IDX source: " << *n->dst[0] << "\n";);
+               return 0;
+       }
+
        if (n->is_alu_packed()) {
                alu_packed_node *p = static_cast<alu_packed_node*>(n);
                unsigned slots = p->get_slot_mask();
@@ -1874,7 +1953,7 @@ alu_clause_tracker::alu_clause_tracker(shader &sh)
          grp0(sh), grp1(sh),
          group(), clause(),
          push_exec_mask(),
-         current_ar(), current_pr() {}
+         current_ar(), current_pr(), current_idx() {}
 
 void alu_clause_tracker::emit_group() {
 
@@ -1931,6 +2010,8 @@ bool alu_clause_tracker::check_clause_limits() {
 
        // reserving slots to load AR and PR values
        unsigned reserve_slots = (current_ar ? 1 : 0) + (current_pr ? 1 : 0);
+       // ...and index registers
+       reserve_slots += (current_idx[0] != NULL) + (current_idx[1] != NULL);
 
        if (slot_count + slots > MAX_ALU_SLOTS - reserve_slots)
                return false;
@@ -1996,13 +2077,15 @@ unsigned rp_kcache_tracker::get_lines(kc_lines& lines) {
        unsigned cnt = 0;
 
        for (unsigned i = 0; i < sel_count; ++i) {
-               unsigned line = rp[i];
+               unsigned line = rp[i] & 0x1fffffffu;
+               unsigned index_mode = rp[i] >> 29;
 
                if (!line)
                        return cnt;
 
                --line;
                line = (sel_count == 2) ? line >> 5 : line >> 6;
+               line |= index_mode << 29;
 
                if (lines.insert(line).second)
                        ++cnt;
@@ -2017,14 +2100,18 @@ bool alu_kcache_tracker::update_kc() {
        memcpy(old_kc, kc, sizeof(kc));
 
        for (kc_lines::iterator I = lines.begin(), E = lines.end(); I != E; 
++I) {
-               unsigned line = *I;
+               unsigned index_mode = *I >> 29;
+               unsigned line = *I & 0x1fffffffu;
                unsigned bank = line >> 8;
 
+               assert(index_mode <= KC_INDEX_INVALID);
                line &= 0xFF;
 
-               if (c && (bank == kc[c-1].bank) && (kc[c-1].addr + 1 == line))
-                       ++kc[c-1].mode;
-               else {
+               if (c && (bank == kc[c-1].bank) && (kc[c-1].addr + 1 == line) &&
+                       kc[c-1].index_mode == index_mode)
+               {
+                       kc[c-1].mode = KC_LOCK_2;
+               } else {
                        if (c == max_kcs) {
                                memcpy(kc, old_kc, sizeof(kc));
                                return false;
@@ -2034,6 +2121,7 @@ bool alu_kcache_tracker::update_kc() {
 
                        kc[c].bank = bank;
                        kc[c].addr = line;
+                       kc[c].index_mode = index_mode;
                        ++c;
                }
        }
diff --git a/src/gallium/drivers/r600/sb/sb_sched.h 
b/src/gallium/drivers/r600/sb/sb_sched.h
index 2ca7146..05b428c 100644
--- a/src/gallium/drivers/r600/sb/sb_sched.h
+++ b/src/gallium/drivers/r600/sb/sb_sched.h
@@ -66,6 +66,7 @@ public:
 class literal_tracker {
        literal lt[4];
        unsigned uc[4];
+
 public:
        literal_tracker() : lt(), uc() {}
 
@@ -219,6 +220,8 @@ public:
        // bottom-up)
        value *current_ar;
        value *current_pr;
+       // current values of CF_IDX registers that need preloading
+       value *current_idx[2];
 
        alu_clause_tracker(shader &sh);
 
@@ -256,6 +259,7 @@ class post_scheduler : public pass {
 
        val_set cleared_interf;
 
+       void emit_index_registers();
 public:
 
        post_scheduler(shader &sh) : pass(sh),
diff --git a/src/gallium/drivers/r600/sb/sb_shader.cpp 
b/src/gallium/drivers/r600/sb/sb_shader.cpp
index f996c07..87e28e9 100644
--- a/src/gallium/drivers/r600/sb/sb_shader.cpp
+++ b/src/gallium/drivers/r600/sb/sb_shader.cpp
@@ -188,9 +188,9 @@ value* shader::create_temp_value() {
        return get_value(VLK_TEMP, id, 0);
 }
 
-value* shader::get_kcache_value(unsigned bank, unsigned index, unsigned chan) {
+value* shader::get_kcache_value(unsigned bank, unsigned index, unsigned chan, 
alu_kcache_index_mode index_mode) {
        return get_ro_value(kcache_values, VLK_KCACHE,
-                       sel_chan((bank << 12) | index, chan));
+                       sel_chan(bank, index, chan, index_mode));
 }
 
 void shader::add_input(unsigned gpr, bool preloaded, unsigned comp_mask) {
diff --git a/src/gallium/drivers/r600/sb/sb_shader.h 
b/src/gallium/drivers/r600/sb/sb_shader.h
index 7955bba..70bea89 100644
--- a/src/gallium/drivers/r600/sb/sb_shader.h
+++ b/src/gallium/drivers/r600/sb/sb_shader.h
@@ -323,7 +323,7 @@ public:
 
 
        value* get_special_ro_value(unsigned sel);
-       value* get_kcache_value(unsigned bank, unsigned index, unsigned chan);
+       value* get_kcache_value(unsigned bank, unsigned index, unsigned chan, 
alu_kcache_index_mode index_mode);
 
        value* get_value_version(value* v, unsigned ver);
 
-- 
1.9.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to