Instead of just running a couple of the possible optimisations in one single iteration, it now runs the whole loop again after lowering the load payloads. According to shader-db this gives:
total instructions in shared programs: 6493365 -> 6493289 (-0.00%) instructions in affected programs: 1696 -> 1620 (-4.48%) total loops in shared programs: 2237 -> 2237 (0.00%) helped: 20 HURT: 0 Most of the shaders just benefit from running the register coalesce pass multiple times. However the following two additionally benefit from an extra pass of opt_saturate_propagation which causes a bunch of further optimisations: steam-metro-last-light-1719.shader_test steam-metro-last-light-836.shader_test The optimisations that get run after lowering the load payloads are: 04-14-register_coalesce 04-17-compact_virtual_grfs 05-12-opt_saturate_propagation 06-02-opt_algebraic 06-04-opt_copy_propagate 06-07-dead_code_eliminate 06-12-opt_saturate_propagation 06-14-register_coalesce 06-17-compact_virtual_grfs 07-18-opt_combine_constants In the first shader this drops four instructions. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 94 +++++++++++++++++++----------------- src/mesa/drivers/dri/i965/brw_fs.h | 3 ++ 2 files changed, 53 insertions(+), 44 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 01a7c99..8d1db23 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -4754,33 +4754,6 @@ fs_visitor::calculate_register_pressure() } } -void -fs_visitor::optimize() -{ - /* Start by validating the shader we currently have. */ - validate(); - - /* bld is the common builder object pointing at the end of the program we - * used to translate it into i965 IR. For the optimization and lowering - * passes coming next, any code added after the end of the program without - * having explicitly called fs_builder::at() clearly points at a mistake. - * Ideally optimization passes wouldn't be part of the visitor so they - * wouldn't have access to bld at all, but they do, so just in case some - * pass forgets to ask for a location explicitly set it to NULL here to - * make it trip. The dispatch width is initialized to a bogus value to - * make sure that optimizations set the execution controls explicitly to - * match the code they are manipulating instead of relying on the defaults. - */ - bld = fs_builder(this, 64); - - assign_constant_locations(); - demote_pull_constants(); - - validate(); - - split_virtual_grfs(); - validate(); - #define OPT(pass, args...) ({ \ pass_num++; \ bool this_progress = pass(args); \ @@ -4799,20 +4772,10 @@ fs_visitor::optimize() this_progress; \ }) - if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) { - char filename[64]; - snprintf(filename, 64, "%s%d-%s-00-start", - stage_abbrev, dispatch_width, nir->info.name); - - backend_shader::dump_instructions(filename); - } - - bool progress = false; - int iteration = 0; - int pass_num = 0; - - OPT(lower_simd_width); - OPT(lower_logical_sends); +void +fs_visitor::optimize_loop(int &iteration, int &pass_num) +{ + bool progress; do { progress = false; @@ -4839,6 +4802,51 @@ fs_visitor::optimize() OPT(compact_virtual_grfs); } while (progress); +} + +void +fs_visitor::optimize() +{ + /* Start by validating the shader we currently have. */ + validate(); + + /* bld is the common builder object pointing at the end of the program we + * used to translate it into i965 IR. For the optimization and lowering + * passes coming next, any code added after the end of the program without + * having explicitly called fs_builder::at() clearly points at a mistake. + * Ideally optimization passes wouldn't be part of the visitor so they + * wouldn't have access to bld at all, but they do, so just in case some + * pass forgets to ask for a location explicitly set it to NULL here to + * make it trip. The dispatch width is initialized to a bogus value to + * make sure that optimizations set the execution controls explicitly to + * match the code they are manipulating instead of relying on the defaults. + */ + bld = fs_builder(this, 64); + + assign_constant_locations(); + demote_pull_constants(); + + validate(); + + split_virtual_grfs(); + validate(); + + if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) { + char filename[64]; + snprintf(filename, 64, "%s%d-%s-00-start", + stage_abbrev, dispatch_width, nir->info.name); + + backend_shader::dump_instructions(filename); + } + + bool progress = false; + int iteration = 0; + int pass_num = 0; + + OPT(lower_simd_width); + OPT(lower_logical_sends); + + optimize_loop(iteration, pass_num); pass_num = 0; @@ -4846,9 +4854,7 @@ fs_visitor::optimize() if (OPT(lower_load_payload)) { split_virtual_grfs(); - OPT(register_coalesce); - OPT(compute_to_mrf); - OPT(dead_code_eliminate); + optimize_loop(iteration, pass_num); } OPT(opt_combine_constants); diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index e8b511f..c12e9e8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -386,6 +386,9 @@ public: unsigned promoted_constants; brw::fs_builder bld; + +private: + void optimize_loop(int &iteration, int &pass_num); }; /** -- 1.9.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev