On Tue, Oct 15, 2013 at 7:14 AM, Eric Anholt <e...@anholt.net> wrote: > Previously, the best thing we had was to schedule the things unblocked by > the current instruction, on the hope that it would be consuming two values > at the end of their live intervals while only producing one new value. > Sometimes that wasn't the case. > > Now, when an instruction is the first user of a GRF we schedule (i.e. it > will probably be the virtual_grf_def[] instruction after computing live > intervals again), penalize it by how many regs it would take up. When an > instruction is the last user of a GRF we have to schedule (when it will > probably be the virtual_grf_end[] instruction), give it a boost by how > many regs it would free. texture2D() takes up 4 regs and at best free 2 regs. It will always be scheduled last. When there are more than ~60 texture2D() calls (it could happen because of loop unrolling), the message payloads could take up all available registers.
I wonder if it helps to take how long an instruction is in the available queue into consideration. For after a couple of texture2D()s are scheduled, the instructions that use the results may become available and free them up. > > The new functions are made virtual (only 1 of 2 really needs to be > virtual) because I expect we'll soon lift the pre-regalloc scheduling > heuristic over to the vec4 backend. > > shader-db: > total instructions in shared programs: 1512756 -> 1511604 (-0.08%) > instructions in affected programs: 10292 -> 9140 (-11.19%) > GAINED: 121 > LOST: 38 > > Improves tropics performance at my current settings by 4.50602% +/- > 2.60694% (n=5). No difference on Lightsmark (n=5). No difference on > GLB2.7 (n=11). > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=70445 > --- > .../drivers/dri/i965/brw_schedule_instructions.cpp | 125 > ++++++++++++++++++--- > 1 file changed, 111 insertions(+), 14 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp > b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp > index b24c38c..7cb0265 100644 > --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp > +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp > @@ -353,6 +353,13 @@ public: > this->instructions_to_schedule = 0; > this->post_reg_alloc = post_reg_alloc; > this->time = 0; > + if (!post_reg_alloc) { > + this->remaining_grf_uses = rzalloc_array(mem_ctx, int, grf_count); > + this->grf_active = rzalloc_array(mem_ctx, bool, grf_count); > + } else { > + this->remaining_grf_uses = NULL; > + this->grf_active = NULL; > + } > } > > ~instruction_scheduler() > @@ -377,6 +384,9 @@ public: > */ > virtual int issue_time(backend_instruction *inst) = 0; > > + virtual void mod_remaining_grf_uses(backend_instruction *inst, int mod) = > 0; > + virtual int get_grf_pressure_benefit(backend_instruction *inst) = 0; > + > void schedule_instructions(backend_instruction *next_block_header); > > void *mem_ctx; > @@ -387,6 +397,17 @@ public: > int time; > exec_list instructions; > backend_visitor *bv; > + > + /** Number of instructions left to schedule that reference each vgrf. */ > + int *remaining_grf_uses; > + > + /** > + * Tracks whether each VGRF has had an instruction scheduled that uses it. > + * > + * This is used to estimate whether scheduling a new instruction will > + * increase register pressure. > + */ > + bool *grf_active; > }; > > class fs_instruction_scheduler : public instruction_scheduler > @@ -398,6 +419,9 @@ public: > schedule_node *choose_instruction_to_schedule(); > int issue_time(backend_instruction *inst); > fs_visitor *v; > + > + void mod_remaining_grf_uses(backend_instruction *inst, int mod); > + int get_grf_pressure_benefit(backend_instruction *inst); > }; > > fs_instruction_scheduler::fs_instruction_scheduler(fs_visitor *v, > @@ -408,6 +432,57 @@ > fs_instruction_scheduler::fs_instruction_scheduler(fs_visitor *v, > { > } > > +void > +fs_instruction_scheduler::mod_remaining_grf_uses(backend_instruction *be, > + int mod) > +{ > + fs_inst *inst = (fs_inst *)be; > + > + if (!remaining_grf_uses) > + return; > + > + if (inst->dst.file == GRF) { > + remaining_grf_uses[inst->dst.reg] += mod; > + if (mod < 0 && !grf_active[inst->dst.reg]) > + grf_active[inst->dst.reg] = true; > + } > + > + for (int i = 0; i < 3; i++) { > + if (inst->src[i].file != GRF) > + continue; > + > + remaining_grf_uses[inst->src[i].reg] += mod; > + if (mod < 0 && !grf_active[inst->src[i].reg]) > + grf_active[inst->src[i].reg] = true; > + } > +} > + > +int > +fs_instruction_scheduler::get_grf_pressure_benefit(backend_instruction *be) > +{ > + fs_inst *inst = (fs_inst *)be; > + int benefit = 0; > + > + if (inst->dst.file == GRF) { > + if (remaining_grf_uses[inst->dst.reg] == 1) > + benefit += v->virtual_grf_sizes[inst->dst.reg]; > + if (!grf_active[inst->dst.reg]) > + benefit -= v->virtual_grf_sizes[inst->dst.reg]; > + } > + > + for (int i = 0; i < 3; i++) { > + if (inst->src[i].file != GRF) > + continue; > + > + if (remaining_grf_uses[inst->src[i].reg] == 1) > + benefit += v->virtual_grf_sizes[inst->src[i].reg]; > + if (!grf_active[inst->src[i].reg]) > + benefit -= v->virtual_grf_sizes[inst->src[i].reg]; > + } > + > + return benefit; > +} > + > class vec4_instruction_scheduler : public instruction_scheduler > { > public: > @@ -416,6 +491,9 @@ public: > schedule_node *choose_instruction_to_schedule(); > int issue_time(backend_instruction *inst); > vec4_visitor *v; > + > + void mod_remaining_grf_uses(backend_instruction *inst, int mod); > + int get_grf_pressure_benefit(backend_instruction *inst); > }; > > vec4_instruction_scheduler::vec4_instruction_scheduler(vec4_visitor *v, > @@ -426,6 +504,18 @@ > vec4_instruction_scheduler::vec4_instruction_scheduler(vec4_visitor *v, > } > > void > +vec4_instruction_scheduler::mod_remaining_grf_uses(backend_instruction *be, > + int mod) > +{ > +} > + > +int > +vec4_instruction_scheduler::get_grf_pressure_benefit(backend_instruction *be) > +{ > + return 0; > +} > + > +void > instruction_scheduler::add_inst(backend_instruction *inst) > { > schedule_node *n = new(mem_ctx) schedule_node(inst, bv->brw); > @@ -946,23 +1036,18 @@ > fs_instruction_scheduler::choose_instruction_to_schedule() > } > } > } else { > + int chosen_score = -1000000; /* Any instruction is better than nothing > */ > + > /* Before register allocation, we don't care about the latencies of > * instructions. All we care about is reducing live intervals of > * variables so that we can avoid register spilling, or get 16-wide > * shaders which naturally do a better job of hiding instruction > * latency. > * > - * To do so, schedule our instructions in a roughly LIFO/depth-first > - * order: when new instructions become available as a result of > - * scheduling something, choose those first so that our result > - * hopefully is consumed quickly. > - * > - * The exception is messages that generate more than one result > - * register (AKA texturing). In those cases, the LIFO search would > - * normally tend to choose them quickly (because scheduling the > - * previous message not only unblocked the children using its result, > - * but also the MRF setup for the next sampler message, which in turn > - * unblocks the next sampler message). > + * If this instruction would be the last use of any GRFs, we bump up > its > + * score since it means it should be reducing register pressure. If > + * it's the first use of a GRF, reduce its score since it means it > + * should be increasing register pressure. > */ > for (schedule_node *node = (schedule_node *)instructions.get_tail(); > node != instructions.get_head()->prev; > @@ -970,9 +1055,12 @@ > fs_instruction_scheduler::choose_instruction_to_schedule() > schedule_node *n = (schedule_node *)node; > fs_inst *inst = (fs_inst *)n->inst; > > - chosen = n; > - if (inst->regs_written <= 1) > - break; > + int this_score = get_grf_pressure_benefit(inst); > + > + if (this_score > chosen_score) { > + chosen = n; > + chosen_score = this_score; > + } > } > } > > @@ -1036,6 +1124,7 @@ > instruction_scheduler::schedule_instructions(backend_instruction > *next_block_hea > chosen->remove(); > next_block_header->insert_before(chosen->inst); > instructions_to_schedule--; > + mod_remaining_grf_uses(chosen->inst, -1); > > /* Update the clock for how soon an instruction could start after the > * chosen one. > @@ -1105,6 +1194,14 @@ instruction_scheduler::run(exec_list *all_instructions) > bv->dump_instructions(); > } > > + /* Populate the remaining GRF uses array to improve the pre-regalloc > + * scheduling. > + */ > + if (remaining_grf_uses) { > + foreach_list(node, &instructions) > + mod_remaining_grf_uses((backend_instruction *)node, 1); > + } > + > while (!next_block_header->is_tail_sentinel()) { > /* Add things to be scheduled until we get to a new BB. */ > while (!next_block_header->is_tail_sentinel()) { > -- > 1.8.4.rc3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev -- o...@lunarg.com _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev