Making madness (e9daead7) legit? :) Reviewed-by: Jordan Justen <jordan.l.jus...@intel.com>
On Tue, Nov 19, 2013 at 11:34 PM, Eric Anholt <e...@anholt.net> wrote: > I recently made us try two different things that tried to reduce register > pressure so that we would be more likely to allocate successfully. But > now that we have the logic for trying two, we can make the first thing we > try be the normal, not-prioritizing-register-pressure heuristic. > > This means one less scheduling pass in the common case of that heuristic > not producing spills, plus the best schedule we know how to produce, if > that one happens to succeed. This is important, because our register > allocation produces a lot of possibly avoidable dependencies for the > post-register-allocation schedule, despite ra_set_allocate_round_robin(). > > GLB2.7: 1.04127% +/- 0.732461% fps improvement (n=31) > nexuiz: No difference (n=5) > lightsmark: 0.838512% +/- 0.300147% fps improvement (n=86) > minecraft apitrace: No difference (n=15) > --- > src/mesa/drivers/dri/i965/brw_fs.cpp | 62 > ++++++++++++++-------- > .../drivers/dri/i965/brw_schedule_instructions.cpp | 4 +- > src/mesa/drivers/dri/i965/brw_shader.h | 1 + > 3 files changed, 44 insertions(+), 23 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > b/src/mesa/drivers/dri/i965/brw_fs.cpp > index 8b3f8df..dbe7503 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > @@ -3190,6 +3190,7 @@ fs_visitor::run() > { > sanity_param_count = fp->Base.Parameters->NumParameters; > uint32_t orig_nr_params = c->prog_data.nr_params; > + bool allocated_without_spills; > > assign_binding_table_offsets(); > > @@ -3273,27 +3274,45 @@ fs_visitor::run() > assign_curb_setup(); > assign_urb_setup(); > > - schedule_instructions(SCHEDULE_PRE_NON_LIFO); > + static enum instruction_scheduler_mode pre_modes[] = { > + SCHEDULE_PRE, > + SCHEDULE_PRE_NON_LIFO, > + SCHEDULE_PRE_LIFO, > + }; > > - if (0) > - assign_regs_trivial(); > - else { > - if (!assign_regs(false)) { > - /* Try a non-spilling register allocation again with a different > - * scheduling heuristic. > - */ > - schedule_instructions(SCHEDULE_PRE_LIFO); > - if (!assign_regs(false)) { > - if (dispatch_width == 16) { > - fail("Failure to register allocate. Reduce number of " > - "live scalar values to avoid this."); > - } else { > - while (!assign_regs(true)) { > - if (failed) > - break; > - } > - } > - } > + /* Try each scheduling heuristic to see if it can successfully register > + * allocate without spilling. They should be ordered by decreasing > + * performance but increasing likelihood of allocating. > + */ > + for (unsigned i = 0; i < ARRAY_SIZE(pre_modes); i++) { > + schedule_instructions(pre_modes[i]); > + > + if (0) { > + assign_regs_trivial(); > + allocated_without_spills = true; > + } else { > + allocated_without_spills = assign_regs(false); > + } > + if (allocated_without_spills) > + break; > + } > + > + if (!allocated_without_spills) { > + /* We assume that any spilling is worse than just dropping back to > + * SIMD8. There's probably actually some intermediate point where > + * SIMD16 with a couple of spills is still better. > + */ > + if (dispatch_width == 16) { > + fail("Failure to register allocate. Reduce number of " > + "live scalar values to avoid this."); > + } > + > + /* Since we're out of heuristics, just go spill registers until we > + * get an allocation. > + */ > + while (!assign_regs(true)) { > + if (failed) > + break; > } > } > } > @@ -3308,7 +3327,8 @@ fs_visitor::run() > if (failed) > return false; > > - schedule_instructions(SCHEDULE_POST); > + if (!allocated_without_spills) > + schedule_instructions(SCHEDULE_POST); > > if (dispatch_width == 8) { > c->prog_data.reg_blocks = brw_register_blocks(grf_used); > diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp > b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp > index a4fae0d..a40a46a 100644 > --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp > +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp > @@ -1140,10 +1140,10 @@ > fs_instruction_scheduler::choose_instruction_to_schedule() > { > schedule_node *chosen = NULL; > > - if (post_reg_alloc) { > + if (mode == SCHEDULE_PRE || mode == SCHEDULE_POST) { > int chosen_time = 0; > > - /* Of the instructions closest ready to execute or the closest to > + /* Of the instructions ready to execute or the closest to > * being ready, choose the oldest one. > */ > foreach_list(node, &instructions) { > diff --git a/src/mesa/drivers/dri/i965/brw_shader.h > b/src/mesa/drivers/dri/i965/brw_shader.h > index aba24c5..34779ff 100644 > --- a/src/mesa/drivers/dri/i965/brw_shader.h > +++ b/src/mesa/drivers/dri/i965/brw_shader.h > @@ -60,6 +60,7 @@ public: > }; > > enum instruction_scheduler_mode { > + SCHEDULE_PRE, > SCHEDULE_PRE_NON_LIFO, > SCHEDULE_PRE_LIFO, > SCHEDULE_POST, > -- > 1.8.4.rc3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev