Reviewed-by: Edward O'Callaghan <eocallag...@alterapraxis.com> -- Edward O'Callaghan edward.ocallag...@koparo.com
On Tue, Sep 22, 2015, at 12:21 AM, Glenn Kennard wrote: > Signed-off-by: Glenn Kennard <glenn.kenn...@gmail.com> > --- > Just UBO support left before gs5 can be enabled. > Could improve how the two index registers are set/used to reduce > the number of clauses, but as is its about as good as what the blob > emits. > > src/gallium/drivers/r600/r600_shader.c | 12 ++- > src/gallium/drivers/r600/r600_shader.h | 4 +- > src/gallium/drivers/r600/sb/sb_bc.h | 10 ++- > src/gallium/drivers/r600/sb/sb_bc_dump.cpp | 17 +++- > src/gallium/drivers/r600/sb/sb_bc_parser.cpp | 50 +++++++++++- > src/gallium/drivers/r600/sb/sb_gcm.cpp | 11 ++- > src/gallium/drivers/r600/sb/sb_sched.cpp | 118 > +++++++++++++++++++++++++-- > src/gallium/drivers/r600/sb/sb_sched.h | 5 +- > 8 files changed, 201 insertions(+), 26 deletions(-) > > diff --git a/src/gallium/drivers/r600/r600_shader.c > b/src/gallium/drivers/r600/r600_shader.c > index 1d90582..24c3d43 100644 > --- a/src/gallium/drivers/r600/r600_shader.c > +++ b/src/gallium/drivers/r600/r600_shader.c > @@ -166,8 +166,8 @@ int r600_pipe_shader_create(struct pipe_context *ctx, > if (rctx->b.chip_class <= R700) { > use_sb &= (shader->shader.processor_type != > TGSI_PROCESSOR_GEOMETRY); > } > - /* disable SB for shaders using CF_INDEX_0/1 (sampler/ubo array > indexing) as it doesn't handle those currently */ > - use_sb &= !shader->shader.uses_index_registers; > + /* disable SB for shaders using ubo array indexing as it doesn't > handle those currently */ > + use_sb &= !shader->shader.uses_ubo_indexing; > /* disable SB for shaders using doubles */ > use_sb &= !shader->shader.uses_doubles; > > @@ -1251,7 +1251,7 @@ static int tgsi_split_constant(struct > r600_shader_ctx *ctx) > } > > if (ctx->src[i].kc_rel) > - ctx->shader->uses_index_registers = true; > + ctx->shader->uses_ubo_indexing = true; > > if (ctx->src[i].rel) { > int chan = inst->Src[i].Indirect.Swizzle; > @@ -1912,7 +1912,7 @@ static int r600_shader_from_tgsi(struct > r600_context *rctx, > > shader->uses_doubles = ctx.info.uses_doubles; > > - indirect_gprs = ctx.info.indirect_files & ~(1 << > TGSI_FILE_CONSTANT); > + indirect_gprs = ctx.info.indirect_files & ~((1 << > TGSI_FILE_CONSTANT) | (1 << TGSI_FILE_SAMPLER)); > tgsi_parse_init(&ctx.parse, tokens); > ctx.type = ctx.info.processor; > shader->processor_type = ctx.type; > @@ -1936,7 +1936,7 @@ static int r600_shader_from_tgsi(struct > r600_context *rctx, > ctx.gs_next_vertex = 0; > ctx.gs_stream_output_info = &so; > > - shader->uses_index_registers = false; > + shader->uses_ubo_indexing = false; > ctx.face_gpr = -1; > ctx.fixed_pt_position_gpr = -1; > ctx.fragcoord_input = -1; > @@ -5703,8 +5703,6 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) > sampler_src_reg = 3; > > sampler_index_mode = inst->Src[sampler_src_reg].Indirect.Index == 2 ? 2 > : 0; // CF_INDEX_1 : CF_INDEX_NONE > - if (sampler_index_mode) > - ctx->shader->uses_index_registers = true; > > src_gpr = tgsi_tex_get_src_gpr(ctx, 0); > > diff --git a/src/gallium/drivers/r600/r600_shader.h > b/src/gallium/drivers/r600/r600_shader.h > index 48de9cd..8ba32ae 100644 > --- a/src/gallium/drivers/r600/r600_shader.h > +++ b/src/gallium/drivers/r600/r600_shader.h > @@ -75,8 +75,8 @@ struct r600_shader { > boolean has_txq_cube_array_z_comp; > boolean uses_tex_buffers; > boolean gs_prim_id_input; > - /* Temporarily workaround SB not handling CF_INDEX_[01] index > registers */ > - boolean uses_index_registers; > + /* Temporarily workaround SB not handling ubo indexing */ > + boolean uses_ubo_indexing; > > /* Size in bytes of a data item in the ring(s) (single vertex data). > Stages with only one ring items 123 will be set to 0. */ > diff --git a/src/gallium/drivers/r600/sb/sb_bc.h > b/src/gallium/drivers/r600/sb/sb_bc.h > index ab988f8..126750d 100644 > --- a/src/gallium/drivers/r600/sb/sb_bc.h > +++ b/src/gallium/drivers/r600/sb/sb_bc.h > @@ -48,6 +48,7 @@ class fetch_node; > class alu_group_node; > class region_node; > class shader; > +class value; > > class sb_ostream { > public: > @@ -818,13 +819,16 @@ class bc_parser { > > bool gpr_reladdr; > > + // Note: currently relies on input emitting SET_CF in same basic > block as uses > + value *cf_index_value[2]; > + alu_node *mova; > public: > > bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader) : > ctx(sctx), dec(), bc(bc), pshader(pshader), > dw(), bc_ndw(), max_cf(), > sh(), error(), slots(), cgroup(), > - cf_map(), loop_stack(), gpr_reladdr() { } > + cf_map(), loop_stack(), gpr_reladdr(), cf_index_value(), > mova() { } > > int decode(); > int prepare(); > @@ -852,6 +856,10 @@ private: > int prepare_loop(cf_node *c); > int prepare_if(cf_node *c); > > + void save_set_cf_index(value *val, unsigned idx); > + value *get_cf_index_value(unsigned idx); > + void save_mova(alu_node *mova); > + alu_node *get_mova(); > }; > > > diff --git a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp > b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp > index 0fc73c4..3c70ea7 100644 > --- a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp > +++ b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp > @@ -27,6 +27,7 @@ > #include "sb_bc.h" > #include "sb_shader.h" > #include "sb_pass.h" > +#include "eg_sq.h" // V_SQ_CF_INDEX_0/1 > > namespace r600_sb { > > @@ -354,6 +355,14 @@ void bc_dump::dump(alu_node& n) { > s << " " << vec_bs[n.bc.bank_swizzle]; > } > > + if (ctx.is_cayman()) { > + if (n.bc.op == ALU_OP1_MOVA_INT) { > + static const char *mova_str[] = { " AR_X", " PC", > " CF_IDX0", " CF_IDX1", > + " Unknown MOVA_INT dest" }; > + s << mova_str[std::min(n.bc.dst_gpr, 4u)]; // > CM_V_SQ_MOVA_DST_AR_* > + } > + } > + > sblog << s.str() << "\n"; > } > > @@ -450,9 +459,9 @@ void bc_dump::dump(fetch_node& n) { > if (n.bc.fetch_whole_quad) > s << " FWQ"; > if (ctx.is_egcm() && n.bc.resource_index_mode) > - s << " RIM:SQ_CF_INDEX_" << > n.bc.resource_index_mode; > + s << " RIM:SQ_CF_INDEX_" << > (n.bc.resource_index_mode - V_SQ_CF_INDEX_0); > if (ctx.is_egcm() && n.bc.sampler_index_mode) > - s << " SID:SQ_CF_INDEX_" << > n.bc.sampler_index_mode; > + s << " SID:SQ_CF_INDEX_" << > (n.bc.sampler_index_mode - V_SQ_CF_INDEX_0); > > s << " UCF:" << n.bc.use_const_fields > << " FMT(DTA:" << n.bc.data_format > @@ -470,9 +479,9 @@ void bc_dump::dump(fetch_node& n) { > if (n.bc.offset[k]) > s << " O" << chans[k] << ":" << n.bc.offset[k]; > if (ctx.is_egcm() && n.bc.resource_index_mode) > - s << " RIM:SQ_CF_INDEX_" << > n.bc.resource_index_mode; > + s << " RIM:SQ_CF_INDEX_" << > (n.bc.resource_index_mode - V_SQ_CF_INDEX_0); > if (ctx.is_egcm() && n.bc.sampler_index_mode) > - s << " SID:SQ_CF_INDEX_" << > n.bc.sampler_index_mode; > + s << " SID:SQ_CF_INDEX_" << > (n.bc.sampler_index_mode - V_SQ_CF_INDEX_0); > } > > sblog << s.str() << "\n"; > diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp > b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp > index 19bd078..eb43670 100644 > --- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp > +++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp > @@ -34,6 +34,7 @@ > > #include "r600_pipe.h" > #include "r600_shader.h" > +#include "eg_sq.h" // CM_V_SQ_MOVA_DST_CF_IDX0/1 > > #include <stack> > > @@ -121,7 +122,7 @@ int bc_parser::parse_decls() { > return 0; > } > > - if (pshader->indirect_files & ~(1 << TGSI_FILE_CONSTANT)) { > + if (pshader->indirect_files & ~((1 << TGSI_FILE_CONSTANT) | (1 << > TGSI_FILE_SAMPLER))) { > > assert(pshader->num_arrays); > > @@ -328,6 +329,28 @@ int bc_parser::prepare_alu_clause(cf_node* cf) { > return 0; > } > > +void bc_parser::save_set_cf_index(value *val, unsigned idx) > +{ > + assert(idx <= 1); > + assert(val); > + cf_index_value[idx] = val; > +} > +value *bc_parser::get_cf_index_value(unsigned idx) > +{ > + assert(idx <= 1); > + return cf_index_value[idx]; > +} > +void bc_parser::save_mova(alu_node *mova) > +{ > + assert(mova); > + this->mova = mova; > +} > +alu_node *bc_parser::get_mova() > +{ > + assert(mova); > + return mova; > +} > + > int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) { > > alu_node *n; > @@ -375,9 +398,24 @@ int bc_parser::prepare_alu_group(cf_node* cf, > alu_group_node *g) { > n->dst.resize(1); > } > > - if (flags & AF_MOVA) { > - > - n->dst[0] = sh->get_special_value(SV_AR_INDEX); > + if (n->bc.op == ALU_OP0_SET_CF_IDX0 || n->bc.op == > ALU_OP0_SET_CF_IDX1) { > + // Move CF_IDX value into tex instruction > operands, scheduler will later re-emit setting of CF_IDX > + // DCE will kill this op > + save_set_cf_index(get_mova()->src[0], n->bc.op == > ALU_OP0_SET_CF_IDX1); > + } else if (flags & AF_MOVA) { > + > + if ((n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX0 || > + n->bc.dst_gpr == > CM_V_SQ_MOVA_DST_CF_IDX1) && > + ctx.is_cayman()) > + { > + // Move CF_IDX value into tex instruction > operands, scheduler will later re-emit setting of CF_IDX > + save_set_cf_index(n->src[0], > n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1); > + n->dst.resize(0); > + } > + else { > + n->dst[0] = > sh->get_special_value(SV_AR_INDEX); > + save_mova(n); > + } > > n->flags |= NF_DONT_HOIST; > > @@ -608,6 +646,10 @@ int bc_parser::prepare_fetch_clause(cf_node *cf) { > > n->bc.src_sel[s], false); > } > > + // Scheduler will emit the appropriate > instructions to set CF_IDX0/1 > + if (n->bc.sampler_index_mode != > V_SQ_CF_INDEX_NONE) { > + > n->src.push_back(get_cf_index_value(n->bc.sampler_index_mode == > V_SQ_CF_INDEX_1)); > + } > } > } > > diff --git a/src/gallium/drivers/r600/sb/sb_gcm.cpp > b/src/gallium/drivers/r600/sb/sb_gcm.cpp > index bccb671..236b2ea 100644 > --- a/src/gallium/drivers/r600/sb/sb_gcm.cpp > +++ b/src/gallium/drivers/r600/sb/sb_gcm.cpp > @@ -37,6 +37,7 @@ > #include "sb_bc.h" > #include "sb_shader.h" > #include "sb_pass.h" > +#include "eg_sq.h" // V_SQ_CF_INDEX_NONE > > namespace r600_sb { > > @@ -406,6 +407,14 @@ void gcm::bu_sched_bb(bb_node* bb) { > ncnt = 3; > } > > + bool sampler_indexing = false; > + if (n->is_fetch_inst() && > + static_cast<fetch_node > *>(n)->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE) > + { > + sampler_indexing = true; // Give > sampler indexed ops get their own clause > + ncnt = sh.get_ctx().is_cayman() ? > 2 : 3; // MOVA + SET_CF_IDX0/1 > + } > + > if ((sq == SQ_TEX || sq == SQ_VTX) && > ((last_count >= ctx.max_fetch/2 > && > check_alu_ready_count(24)) || > @@ -418,7 +427,7 @@ void gcm::bu_sched_bb(bb_node* bb) { > bu_ready[sq].pop_front(); > > if (sq != SQ_CF) { > - if (!clause) { > + if (!clause || sampler_indexing) > { > clause = sh.create_clause(sq == > SQ_ALU ? > NST_ALU_CLAUSE : > sq == > SQ_TEX ? NST_TEX_CLAUSE : > diff --git a/src/gallium/drivers/r600/sb/sb_sched.cpp > b/src/gallium/drivers/r600/sb/sb_sched.cpp > index c98b8ff..601445f 100644 > --- a/src/gallium/drivers/r600/sb/sb_sched.cpp > +++ b/src/gallium/drivers/r600/sb/sb_sched.cpp > @@ -36,6 +36,7 @@ > #include "sb_shader.h" > #include "sb_pass.h" > #include "sb_sched.h" > +#include "eg_sq.h" // V_SQ_CF_INDEX_NONE/0/1 > > namespace r600_sb { > > @@ -781,7 +782,14 @@ void post_scheduler::schedule_bb(bb_node* bb) { > sblog << "\n"; > ); > > - if (n->subtype == NST_ALU_CLAUSE) { > + // May require emitting ALU ops to load index registers > + if (n->is_fetch_clause()) { > + n->remove(); > + process_fetch(static_cast<container_node *>(n)); > + continue; > + } > + > + if (n->is_alu_clause()) { > n->remove(); > process_alu(static_cast<container_node*>(n)); > continue; > @@ -823,6 +831,102 @@ void post_scheduler::init_regmap() { > } > } > > +static alu_node *create_set_idx(shader &sh, unsigned ar_idx) { > + alu_node *a = sh.create_alu(); > + > + assert(ar_idx == V_SQ_CF_INDEX_0 || ar_idx == V_SQ_CF_INDEX_1); > + if (ar_idx == V_SQ_CF_INDEX_0) > + a->bc.set_op(ALU_OP0_SET_CF_IDX0); > + else > + a->bc.set_op(ALU_OP0_SET_CF_IDX1); > + a->bc.slot = SLOT_X; > + a->dst.resize(1); // Dummy needed for recolor > + > + PSC_DUMP( > + sblog << "created IDX load: " > + dump::dump_op(a); > + sblog << "\n"; > + ); > + > + return a; > +} > + > +void post_scheduler::load_index_register(value *v, unsigned ar_idx) > +{ > + alu.reset(); > + > + if (!sh.get_ctx().is_cayman()) { > + // Evergreen has to first load address register, then use > CF_SET_IDX0/1 > + alu_group_tracker &rt = alu.grp(); > + alu_node *set_idx = create_set_idx(sh, ar_idx); > + if (!rt.try_reserve(set_idx)) { > + sblog << "can't emit SET_CF_IDX"; > + dump::dump_op(set_idx); > + sblog << "\n"; > + } > + process_group(); > + > + if (!alu.check_clause_limits()) { > + // Can't happen since clause only contains > MOVA/CF_SET_IDX0/1 > + } > + alu.emit_group(); > + } > + > + alu_group_tracker &rt = alu.grp(); > + alu_node *a = alu.create_ar_load(v, ar_idx == V_SQ_CF_INDEX_1 ? > SEL_Z : SEL_Y); > + > + if (!rt.try_reserve(a)) { > + sblog << "can't emit AR load : "; > + dump::dump_op(a); > + sblog << "\n"; > + } > + > + process_group(); > + > + if (!alu.check_clause_limits()) { > + // Can't happen since clause only contains > MOVA/CF_SET_IDX0/1 > + } > + > + alu.emit_group(); > + alu.emit_clause(cur_bb); > +} > + > +void post_scheduler::process_fetch(container_node *c) { > + if (c->empty()) > + return; > + > + for (node_iterator N, I = c->begin(), E = c->end(); I != E; I = > N) { > + N = I; > + ++N; > + > + node *n = *I; > + > + fetch_node *f = static_cast<fetch_node*>(n); > + > + PSC_DUMP( > + sblog << "process_tex "; > + dump::dump_op(n); > + sblog << " "; > + ); > + > + if (f->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE) { > + // Currently require prior opt passes to use one > TEX per indexed op > + assert(f->parent->count() == 1); > + > + value *v = f->src.back(); // Last src is index > offset > + > + cur_bb->push_front(c); > + > + load_index_register(v, f->bc.sampler_index_mode); > + f->src.pop_back(); // Don't need index value any > more > + > + return; > + } > + } > + > + cur_bb->push_front(c); > +} > + > void post_scheduler::process_alu(container_node *c) { > > if (c->empty()) > @@ -1180,7 +1284,7 @@ void post_scheduler::emit_load_ar() { > alu.discard_current_group(); > > alu_group_tracker &rt = alu.grp(); > - alu_node *a = alu.create_ar_load(); > + alu_node *a = alu.create_ar_load(alu.current_ar, SEL_X); > > if (!rt.try_reserve(a)) { > sblog << "can't emit AR load : "; > @@ -1936,11 +2040,9 @@ bool alu_kcache_tracker::update_kc() { > return true; > } > > -alu_node* alu_clause_tracker::create_ar_load() { > +alu_node* alu_clause_tracker::create_ar_load(value *v, chan_select > ar_channel) { > alu_node *a = sh.create_alu(); > > - // FIXME use MOVA_GPR on R6xx > - > if (sh.get_ctx().uses_mova_gpr) { > a->bc.set_op(ALU_OP1_MOVA_GPR_INT); > a->bc.slot = SLOT_TRANS; > @@ -1948,9 +2050,13 @@ alu_node* alu_clause_tracker::create_ar_load() { > a->bc.set_op(ALU_OP1_MOVA_INT); > a->bc.slot = SLOT_X; > } > + a->bc.dst_chan = ar_channel; > + if (ar_channel != SEL_X && sh.get_ctx().is_cayman()) { > + a->bc.dst_gpr = ar_channel == SEL_Y ? > CM_V_SQ_MOVA_DST_CF_IDX0 : CM_V_SQ_MOVA_DST_CF_IDX1; > + } > > a->dst.resize(1); > - a->src.push_back(current_ar); > + a->src.push_back(v); > > PSC_DUMP( > sblog << "created AR load: "; > diff --git a/src/gallium/drivers/r600/sb/sb_sched.h > b/src/gallium/drivers/r600/sb/sb_sched.h > index 87c4586..2ca7146 100644 > --- a/src/gallium/drivers/r600/sb/sb_sched.h > +++ b/src/gallium/drivers/r600/sb/sb_sched.h > @@ -235,7 +235,7 @@ public: > void new_group(); > bool is_empty(); > > - alu_node* create_ar_load(); > + alu_node* create_ar_load(value *v, chan_select ar_channel); > > void discard_current_group(); > > @@ -266,6 +266,9 @@ public: > void run_on(container_node *n); > void schedule_bb(bb_node *bb); > > + void load_index_register(value *v, unsigned idx); > + void process_fetch(container_node *c); > + > void process_alu(container_node *c); > void schedule_alu(container_node *c); > bool prepare_alu_group(); > -- > 1.9.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev