Re: [Mesa-dev] [PATCH v2 13/16] i965: Move fs_visitor ra pass to new fs_visitor::allocate_registers()

Connor Abbott Thu, 13 Nov 2014 18:39:02 -0800

On Thu, Nov 13, 2014 at 7:28 PM, Kristian Høgsberg <k...@bitplanet.net> wrote:
> This will be reused for the scalar VS pass.
>
> Signed-off-by: Kristian Høgsberg <k...@bitplanet.net>
> ---
>  src/mesa/drivers/dri/i965/brw_fs.cpp | 132 
> +++++++++++++++++++----------------
>  src/mesa/drivers/dri/i965/brw_fs.h   |   1 +
>  2 files changed, 71 insertions(+), 62 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index cb73b9f..4dce0a2 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -3538,11 +3538,79 @@ fs_visitor::optimize()
>     lower_uniform_pull_constant_loads();
>  }
>
> +void
> +fs_visitor::allocate_registers()
> +{
> +   bool allocated_without_spills;
> +
> +   static enum instruction_scheduler_mode pre_modes[] = {
> +      SCHEDULE_PRE,
> +      SCHEDULE_PRE_NON_LIFO,
> +      SCHEDULE_PRE_LIFO,
> +   };
> +
> +   /* Try each scheduling heuristic to see if it can successfully register
> +    * allocate without spilling.  They should be ordered by decreasing
> +    * performance but increasing likelihood of allocating.
> +    */
> +   for (unsigned i = 0; i < ARRAY_SIZE(pre_modes); i++) {
> +      schedule_instructions(pre_modes[i]);
> +
> +      if (0) {
> +         assign_regs_trivial();
> +         allocated_without_spills = true;
> +      } else {
> +         allocated_without_spills = assign_regs(false);
> +      }
> +      if (allocated_without_spills)
> +         break;
> +   }
> +
> +   if (!allocated_without_spills) {
> +      /* We assume that any spilling is worse than just dropping back to
> +       * SIMD8.  There's probably actually some intermediate point where
> +       * SIMD16 with a couple of spills is still better.
> +       */
> +      if (dispatch_width == 16) {
> +         fail("Failure to register allocate.  Reduce number of "
> +              "live scalar values to avoid this.");
> +      } else {
> +         perf_debug("Fragment shader triggered register spilling.  "
> +                    "Try reducing the number of live scalar values to "
> +                    "improve performance.\n");


Hmm, this warning will be pretty confusing once we start hitting this
path for vertex shaders as well...

> +      }
> +
> +      /* Since we're out of heuristics, just go spill registers until we
> +       * get an allocation.
> +       */
> +      while (!assign_regs(true)) {
> +         if (failed)
> +            break;
> +      }
> +   }
> +
> +   assert(force_uncompressed_stack == 0);
> +
> +   /* This must come after all optimization and register allocation, since
> +    * it inserts dead code that happens to have side effects, and it does
> +    * so based on the actual physical registers in use.
> +    */
> +   insert_gen4_send_dependency_workarounds();
> +
> +   if (failed)
> +      return;
> +
> +   if (!allocated_without_spills)
> +      schedule_instructions(SCHEDULE_POST);
> +
> +   if (last_scratch > 0)
> +      prog_data->total_scratch = brw_get_scratch_size(last_scratch);
> +}
> +
>  bool
>  fs_visitor::run()
>  {
>     sanity_param_count = prog->Parameters->NumParameters;
> -   bool allocated_without_spills;
>
>     assign_binding_table_offsets();
>
> @@ -3555,7 +3623,6 @@ fs_visitor::run()
>        emit_dummy_fs();
>     } else if (brw->use_rep_send && dispatch_width == 16) {
>        emit_repclear_shader();
> -      allocated_without_spills = true;
>     } else {
>        if (INTEL_DEBUG & DEBUG_SHADER_TIME)
>           emit_shader_time_begin();
> @@ -3610,68 +3677,9 @@ fs_visitor::run()
>        assign_curb_setup();
>        assign_urb_setup();
>
> -      static enum instruction_scheduler_mode pre_modes[] = {
> -         SCHEDULE_PRE,
> -         SCHEDULE_PRE_NON_LIFO,
> -         SCHEDULE_PRE_LIFO,
> -      };
> -
> -      /* Try each scheduling heuristic to see if it can successfully register
> -       * allocate without spilling.  They should be ordered by decreasing
> -       * performance but increasing likelihood of allocating.
> -       */
> -      for (unsigned i = 0; i < ARRAY_SIZE(pre_modes); i++) {
> -         schedule_instructions(pre_modes[i]);
> -
> -         if (0) {
> -            assign_regs_trivial();
> -            allocated_without_spills = true;
> -         } else {
> -            allocated_without_spills = assign_regs(false);
> -         }
> -         if (allocated_without_spills)
> -            break;
> -      }
> -
> -      if (!allocated_without_spills) {
> -         /* We assume that any spilling is worse than just dropping back to
> -          * SIMD8.  There's probably actually some intermediate point where
> -          * SIMD16 with a couple of spills is still better.
> -          */
> -         if (dispatch_width == 16) {
> -            fail("Failure to register allocate.  Reduce number of "
> -                 "live scalar values to avoid this.");
> -         } else {
> -            perf_debug("Fragment shader triggered register spilling.  "
> -                       "Try reducing the number of live scalar values to "
> -                       "improve performance.\n");
> -         }
> -
> -         /* Since we're out of heuristics, just go spill registers until we
> -          * get an allocation.
> -          */
> -         while (!assign_regs(true)) {
> -            if (failed)
> -               break;
> -         }
> -      }
> -
> -      assert(force_uncompressed_stack == 0);
> -
> -      /* This must come after all optimization and register allocation, since
> -       * it inserts dead code that happens to have side effects, and it does
> -       * so based on the actual physical registers in use.
> -       */
> -      insert_gen4_send_dependency_workarounds();
> -
> +      allocate_registers();
>        if (failed)
>           return false;
> -
> -      if (!allocated_without_spills)
> -         schedule_instructions(SCHEDULE_POST);
> -
> -      if (last_scratch > 0)
> -         prog_data->total_scratch = brw_get_scratch_size(last_scratch);
>     }
>
>     if (stage == MESA_SHADER_FRAGMENT) {
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
> b/src/mesa/drivers/dri/i965/brw_fs.h
> index 31c3001..bb6f767 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -407,6 +407,7 @@ public:
>
>     bool run();
>     void optimize();
> +   void allocate_registers();
>     void assign_binding_table_offsets();
>     void setup_payload_gen4();
>     void setup_payload_gen6();
> --
> 2.1.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 13/16] i965: Move fs_visitor ra pass to new fs_visitor::allocate_registers()

Reply via email to