On Thu, Nov 13, 2014 at 7:28 PM, Kristian Høgsberg <k...@bitplanet.net> wrote: > This will be reused for the scalar VS pass. > > Signed-off-by: Kristian Høgsberg <k...@bitplanet.net> > --- > src/mesa/drivers/dri/i965/brw_fs.cpp | 132 > +++++++++++++++++++---------------- > src/mesa/drivers/dri/i965/brw_fs.h | 1 + > 2 files changed, 71 insertions(+), 62 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > b/src/mesa/drivers/dri/i965/brw_fs.cpp > index cb73b9f..4dce0a2 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > @@ -3538,11 +3538,79 @@ fs_visitor::optimize() > lower_uniform_pull_constant_loads(); > } > > +void > +fs_visitor::allocate_registers() > +{ > + bool allocated_without_spills; > + > + static enum instruction_scheduler_mode pre_modes[] = { > + SCHEDULE_PRE, > + SCHEDULE_PRE_NON_LIFO, > + SCHEDULE_PRE_LIFO, > + }; > + > + /* Try each scheduling heuristic to see if it can successfully register > + * allocate without spilling. They should be ordered by decreasing > + * performance but increasing likelihood of allocating. > + */ > + for (unsigned i = 0; i < ARRAY_SIZE(pre_modes); i++) { > + schedule_instructions(pre_modes[i]); > + > + if (0) { > + assign_regs_trivial(); > + allocated_without_spills = true; > + } else { > + allocated_without_spills = assign_regs(false); > + } > + if (allocated_without_spills) > + break; > + } > + > + if (!allocated_without_spills) { > + /* We assume that any spilling is worse than just dropping back to > + * SIMD8. There's probably actually some intermediate point where > + * SIMD16 with a couple of spills is still better. > + */ > + if (dispatch_width == 16) { > + fail("Failure to register allocate. Reduce number of " > + "live scalar values to avoid this."); > + } else { > + perf_debug("Fragment shader triggered register spilling. " > + "Try reducing the number of live scalar values to " > + "improve performance.\n");
Hmm, this warning will be pretty confusing once we start hitting this path for vertex shaders as well... > + } > + > + /* Since we're out of heuristics, just go spill registers until we > + * get an allocation. > + */ > + while (!assign_regs(true)) { > + if (failed) > + break; > + } > + } > + > + assert(force_uncompressed_stack == 0); > + > + /* This must come after all optimization and register allocation, since > + * it inserts dead code that happens to have side effects, and it does > + * so based on the actual physical registers in use. > + */ > + insert_gen4_send_dependency_workarounds(); > + > + if (failed) > + return; > + > + if (!allocated_without_spills) > + schedule_instructions(SCHEDULE_POST); > + > + if (last_scratch > 0) > + prog_data->total_scratch = brw_get_scratch_size(last_scratch); > +} > + > bool > fs_visitor::run() > { > sanity_param_count = prog->Parameters->NumParameters; > - bool allocated_without_spills; > > assign_binding_table_offsets(); > > @@ -3555,7 +3623,6 @@ fs_visitor::run() > emit_dummy_fs(); > } else if (brw->use_rep_send && dispatch_width == 16) { > emit_repclear_shader(); > - allocated_without_spills = true; > } else { > if (INTEL_DEBUG & DEBUG_SHADER_TIME) > emit_shader_time_begin(); > @@ -3610,68 +3677,9 @@ fs_visitor::run() > assign_curb_setup(); > assign_urb_setup(); > > - static enum instruction_scheduler_mode pre_modes[] = { > - SCHEDULE_PRE, > - SCHEDULE_PRE_NON_LIFO, > - SCHEDULE_PRE_LIFO, > - }; > - > - /* Try each scheduling heuristic to see if it can successfully register > - * allocate without spilling. They should be ordered by decreasing > - * performance but increasing likelihood of allocating. > - */ > - for (unsigned i = 0; i < ARRAY_SIZE(pre_modes); i++) { > - schedule_instructions(pre_modes[i]); > - > - if (0) { > - assign_regs_trivial(); > - allocated_without_spills = true; > - } else { > - allocated_without_spills = assign_regs(false); > - } > - if (allocated_without_spills) > - break; > - } > - > - if (!allocated_without_spills) { > - /* We assume that any spilling is worse than just dropping back to > - * SIMD8. There's probably actually some intermediate point where > - * SIMD16 with a couple of spills is still better. > - */ > - if (dispatch_width == 16) { > - fail("Failure to register allocate. Reduce number of " > - "live scalar values to avoid this."); > - } else { > - perf_debug("Fragment shader triggered register spilling. " > - "Try reducing the number of live scalar values to " > - "improve performance.\n"); > - } > - > - /* Since we're out of heuristics, just go spill registers until we > - * get an allocation. > - */ > - while (!assign_regs(true)) { > - if (failed) > - break; > - } > - } > - > - assert(force_uncompressed_stack == 0); > - > - /* This must come after all optimization and register allocation, since > - * it inserts dead code that happens to have side effects, and it does > - * so based on the actual physical registers in use. > - */ > - insert_gen4_send_dependency_workarounds(); > - > + allocate_registers(); > if (failed) > return false; > - > - if (!allocated_without_spills) > - schedule_instructions(SCHEDULE_POST); > - > - if (last_scratch > 0) > - prog_data->total_scratch = brw_get_scratch_size(last_scratch); > } > > if (stage == MESA_SHADER_FRAGMENT) { > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h > b/src/mesa/drivers/dri/i965/brw_fs.h > index 31c3001..bb6f767 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.h > +++ b/src/mesa/drivers/dri/i965/brw_fs.h > @@ -407,6 +407,7 @@ public: > > bool run(); > void optimize(); > + void allocate_registers(); > void assign_binding_table_offsets(); > void setup_payload_gen4(); > void setup_payload_gen6(); > -- > 2.1.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev