store via r0

Oleg Endo Sun, 21 Dec 2014 07:58:12 -0800

On Thu, 2014-12-18 at 10:04 +0900, Kaz Kojima wrote:
> This patch is discussed in PR55212
> 
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55212#c77
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55212#c82
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55212#c83
> 
> and is to improve code quality with LRA.
> SH HI/QImode load/store instructions take r0 as the target/source register,
> except for SH2A.  Because of this and combine pass, the unsigned char/short
> memory accesses causes anomalous register spills in some case with LRA
> which can be seen in the above #c77.  The patch splits these load/store
> insn to two move insns via r0 when generating mov[qh]i.  It works as
> a kind of early reload.
> It will make some codes worse but it wins on average by CSiBE numbers.
> A bit interestingly, this reduces the total text size of CSiBE test ~0.04%
> at -O2 even for the trunk i.e. with the old reload, though currently
> it's enabled only for LRA.


Yes, that's true.  Although for non-LRA it's probably better to use
post-combine splits to do that (see my comment c#91 in PR 55212).

Anyway...

> --
>       * config/sh/sh.c (prepare_move_operands): Split HI/QImode load/store
>       to two move insns via r0.
> 
> diff --git a/config/sh/sh.c b/config/sh/sh.c
> index db9226e..1bf27db 100644
> --- a/config/sh/sh.c
> +++ b/config/sh/sh.c
> @@ -1778,6 +1778,38 @@ prepare_move_operands (rtx operands[], machine_mode 
> mode)
>              && GET_CODE (XEXP (operands[0], 0)) == PLUS
>              && REG_P (XEXP (XEXP (operands[0], 0), 1)))
>       operands[1] = copy_to_mode_reg (mode, operands[1]);
> +
> +      /* When the displacement addressing is used, RA will assign r0 to
> +      the pseudo register operand for the QI/HImode load/store.
> +      This tends to make a long live range for R0 and might cause
> +      anomalous register spills in some case with LRA.  See PR
> +      target/55212.
> +      We split possible load/store to two move insns via r0 so as to
> +      shorten R0 live range.  It will make some codes worse but will
> +      win on avarage for LRA.  */
> +      else if (sh_lra_p ()
> +            && TARGET_SH1 && ! TARGET_SH2A
> +            && (mode == QImode || mode == HImode)
> +            && ((REG_P (operands[0]) && MEM_P (operands[1]))
> +                || (REG_P (operands[1]) && MEM_P (operands[0]))))
> +     {
> +       bool load_p = REG_P (operands[0]);
> +       rtx reg = operands[load_p ? 0 : 1];
> +       rtx adr = XEXP (operands[load_p ? 1 : 0], 0);
> +
> +       if (REGNO (reg) >= FIRST_PSEUDO_REGISTER
> +           && GET_CODE (adr) == PLUS
> +           && REG_P (XEXP (adr, 0))
> +           && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
> +           && CONST_INT_P (XEXP (adr, 1))
> +           && INTVAL (XEXP (adr, 1)) != 0
> +           && sh_legitimate_index_p (mode, XEXP (adr, 1), false, true))
> +         {
> +           rtx r0_rtx = gen_rtx_REG (mode, R0_REG);
> +           emit_move_insn (r0_rtx, operands[1]);
> +           operands[1] = r0_rtx;
> +         }
> +     }
>      }
>  
>    if (mode == Pmode || mode == ptr_mode)

I think it's better to use already existing functions from the SH code
to make it a bit easier to read. E.g.:

Index: gcc/config/sh/sh.c
===================================================================
--- gcc/config/sh/sh.c  (revision 218988)
+++ gcc/config/sh/sh.c  (working copy)
@@ -1793,17 +1793,14 @@
               && ((REG_P (operands[0]) && MEM_P (operands[1]))
                   || (REG_P (operands[1]) && MEM_P (operands[0]))))
        {
-         bool load_p = REG_P (operands[0]);
+         bool load_p = arith_reg_dest (operands[0], GET_MODE (operands[0]));
          rtx reg = operands[load_p ? 0 : 1];
-         rtx adr = XEXP (operands[load_p ? 1 : 0], 0);
+         rtx mem = operands[load_p ? 1 : 0];
 
-         if (REGNO (reg) >= FIRST_PSEUDO_REGISTER
-             && GET_CODE (adr) == PLUS
-             && REG_P (XEXP (adr, 0))
-             && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
-             && CONST_INT_P (XEXP (adr, 1))
-             && INTVAL (XEXP (adr, 1)) != 0
-             && sh_legitimate_index_p (mode, XEXP (adr, 1), false, true))
+         if (displacement_mem_operand (mem, mode)
+             && sh_disp_addr_displacement (mem) > 0
+             && REGNO (reg) != R0_REG
+             && !refers_to_regno_p (R0_REG, R0_REG + 1, mem, NULL))
            {
              rtx r0_rtx = gen_rtx_REG (mode, R0_REG);
              emit_move_insn (r0_rtx, operands[1]);

Re: [RFC PATCH 9/9] [SH] Split QI/HImode load/store via r0

Reply via email to