Ping :).

On Tue, 2023-12-12 at 14:47 +0800, Xi Ruoyao wrote:
> The problem with peephole2 is it uses a naive sliding-window algorithm
> and misses many cases.  For example:
> 
>     float a[10000];
>     float t() { return a[0] + a[8000]; }
> 
> is compiled to:
> 
>     la.local    $r13,a
>     la.local    $r12,a+32768
>     fld.s       $f1,$r13,0
>     fld.s       $f0,$r12,-768
>     fadd.s      $f0,$f1,$f0
> 
> by trunk.  But as we've explained in r14-4851, the following would be
> better with -mexplicit-relocs=auto:
> 
>     pcalau12i   $r13,%pc_hi20(a)
>     pcalau12i   $r12,%pc_hi20(a+32000)
>     fld.s       $f1,$r13,%pc_lo12(a)
>     fld.s       $f0,$r12,%pc_lo12(a+32000)
>     fadd.s      $f0,$f1,$f0
> 
> However the sliding-window algorithm just won't detect the pcalau12i/fld
> pair to be optimized.  Use a define_insn_and_split in combine pass will
> work around the issue.
> 
> gcc/ChangeLog:
> 
>       * config/loongarch/loongarch.md:
>       (simple_load<P:mode><LD_AT_LEAST_32_BIT:mode>): New
>       define_insn_and_split.
>       (simple_load_off<P:mode><LD_AT_LEAST_32_BIT:mode>): Likewise.
>       (simple_load_<su>ext<P:mode><SUBDI:mode><GPR:mode>): Likewise.
>       (simple_load_off<su>ext<P:mode><SUBDI:mode><GPR:mode>):
>       Likewise.
>       (simple_store<ST_ANY:mode><P:mode>): Likewise.
>       (simple_store_off<ST_ANY:mode><P:mode>): Likewise.
>       (define_peephole2): Remove la.local/[f]ld peepholes.
> 
> gcc/testsuite/ChangeLog:
> 
>       * gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c:
>       New test.
> ---
> 
> Bootstrapped & regtested on loongarch64-linux-gnu.  Ok for trunk?
> 
>  gcc/config/loongarch/loongarch.md             | 165 +++++++++---------
>  ...explicit-relocs-auto-single-load-store-2.c |  11 ++
>  2 files changed, 98 insertions(+), 78 deletions(-)
>  create mode 100644 
> gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c
> 
> diff --git a/gcc/config/loongarch/loongarch.md 
> b/gcc/config/loongarch/loongarch.md
> index 7b26d15aa4e..4009de408fb 100644
> --- a/gcc/config/loongarch/loongarch.md
> +++ b/gcc/config/loongarch/loongarch.md
> @@ -4033,101 +4033,110 @@ (define_insn "loongarch_crcc_w_<size>_w"
>  ;;
>  ;; And if the pseudo op cannot be relaxed, we'll get a worse result (with
>  ;; 3 instructions).
> -(define_peephole2
> -  [(set (match_operand:P 0 "register_operand")
> -     (match_operand:P 1 "symbolic_pcrel_operand"))
> -   (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand")
> -     (mem:LD_AT_LEAST_32_BIT (match_dup 0)))]
> -  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
> -   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
> -   && (peep2_reg_dead_p (2, operands[0]) \
> -       || REGNO (operands[0]) == REGNO (operands[2]))"
> -  [(set (match_dup 2)
> -     (mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1))))]
> +(define_insn_and_split "simple_load<P:mode><LD_AT_LEAST_32_BIT:mode>"
> +  [(set (match_operand:LD_AT_LEAST_32_BIT 0 "register_operand" "=r,f")
> +     (mem:LD_AT_LEAST_32_BIT
> +       (match_operand:P 1 "symbolic_pcrel_operand" "")))]
> +  "loongarch_pre_reload_split () \
> +   && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
> +   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)"
> +  "#"
> +  ""
> +  [(set (match_dup 0)
> +     (mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 2) (match_dup 1))))]
>    {
> -    emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
> +    operands[2] = gen_reg_rtx (Pmode);
> +    emit_insn (gen_pcalau12i_gr<P:mode> (operands[2], operands[1]));
>    })
>  
> -(define_peephole2
> -  [(set (match_operand:P 0 "register_operand")
> -     (match_operand:P 1 "symbolic_pcrel_operand"))
> -   (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand")
> -     (mem:LD_AT_LEAST_32_BIT (plus (match_dup 0)
> -                             (match_operand 3 "const_int_operand"))))]
> -  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
> -   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
> -   && (peep2_reg_dead_p (2, operands[0]) \
> -       || REGNO (operands[0]) == REGNO (operands[2]))"
> -  [(set (match_dup 2)
> -     (mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1))))]
> +(define_insn_and_split "simple_load_off<P:mode><LD_AT_LEAST_32_BIT:mode>"
> +  [(set (match_operand:LD_AT_LEAST_32_BIT 0 "register_operand" "=r,f")
> +     (mem:LD_AT_LEAST_32_BIT
> +       (plus (match_operand:P 1 "symbolic_pcrel_operand" "")
> +             (match_operand 2 "const_int_operand" ""))))]
> +  "loongarch_pre_reload_split () \
> +   && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
> +   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)"
> +  "#"
> +  ""
> +  [(set (match_dup 0)
> +     (mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 2) (match_dup 1))))]
>    {
> -    operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3]));
> -    emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
> +    HOST_WIDE_INT offset = INTVAL (operands[2]);
> +    operands[2] = gen_reg_rtx (Pmode);
> +    operands[1] = plus_constant (Pmode, operands[1], offset);
> +    emit_insn (gen_pcalau12i_gr<P:mode> (operands[2], operands[1]));
>    })
>  
> -(define_peephole2
> -  [(set (match_operand:P 0 "register_operand")
> -     (match_operand:P 1 "symbolic_pcrel_operand"))
> -   (set (match_operand:GPR 2 "register_operand")
> -     (any_extend:GPR (mem:SUBDI (match_dup 0))))]
> -  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
> -   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
> -   && (peep2_reg_dead_p (2, operands[0]) \
> -       || REGNO (operands[0]) == REGNO (operands[2]))"
> -  [(set (match_dup 2)
> -     (any_extend:GPR (mem:SUBDI (lo_sum:P (match_dup 0)
> -                                          (match_dup 1)))))]
> +(define_insn_and_split "simple_load_<su>ext<P:mode><SUBDI:mode><GPR:mode>"
> +  [(set (match_operand:GPR 0 "register_operand" "=r")
> +     (any_extend:GPR
> +       (mem:SUBDI (match_operand:P 1 "symbolic_pcrel_operand" ""))))]
> +  "loongarch_pre_reload_split () \
> +   && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
> +   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)"
> +  "#"
> +  ""
> +  [(set (match_dup 0)
> +     (any_extend:GPR
> +       (mem:SUBDI (lo_sum:P (match_dup 2) (match_dup 1)))))]
>    {
> -    emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
> +    operands[2] = gen_reg_rtx (Pmode);
> +    emit_insn (gen_pcalau12i_gr<P:mode> (operands[2], operands[1]));
>    })
>  
> -(define_peephole2
> -  [(set (match_operand:P 0 "register_operand")
> -     (match_operand:P 1 "symbolic_pcrel_operand"))
> -   (set (match_operand:GPR 2 "register_operand")
> +(define_insn_and_split
> +  "simple_load_off_<su>ext<P:mode><SUBDI:mode><GPR:mode>"
> +  [(set (match_operand:GPR 0 "register_operand" "=r")
> +     (any_extend:GPR
> +       (mem:SUBDI
> +         (plus (match_operand:P 1 "symbolic_pcrel_operand" "")
> +               (match_operand 2 "const_int_operand" "")))))]
> +  "loongarch_pre_reload_split () \
> +   && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
> +   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)"
> +  "#"
> +  ""
> +  [(set (match_dup 0)
>       (any_extend:GPR
> -       (mem:SUBDI (plus (match_dup 0)
> -                        (match_operand 3 "const_int_operand")))))]
> -  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
> -   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
> -   && (peep2_reg_dead_p (2, operands[0]) \
> -       || REGNO (operands[0]) == REGNO (operands[2]))"
> -  [(set (match_dup 2)
> -     (any_extend:GPR (mem:SUBDI (lo_sum:P (match_dup 0)
> -                                          (match_dup 1)))))]
> +       (mem:SUBDI (lo_sum:P (match_dup 2) (match_dup 1)))))]
>    {
> -    operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3]));
> -    emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
> +    HOST_WIDE_INT offset = INTVAL (operands[2]);
> +    operands[2] = gen_reg_rtx (Pmode);
> +    operands[1] = plus_constant (Pmode, operands[1], offset);
> +    emit_insn (gen_pcalau12i_gr<P:mode> (operands[2], operands[1]));
>    })
>  
> -(define_peephole2
> -  [(set (match_operand:P 0 "register_operand")
> -     (match_operand:P 1 "symbolic_pcrel_operand"))
> -   (set (mem:ST_ANY (match_dup 0))
> -     (match_operand:ST_ANY 2 "register_operand"))]
> -  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
> -   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
> -   && (peep2_reg_dead_p (2, operands[0])) \
> -   && REGNO (operands[0]) != REGNO (operands[2])"
> -  [(set (mem:ST_ANY (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))]
> +(define_insn_and_split "simple_store<ST_ANY:mode><P:mode>"
> +  [(set (mem:ST_ANY (match_operand:P 0 "symbolic_pcrel_operand"))
> +     (match_operand:ST_ANY 1 "register_operand" "r,f"))]
> +  "loongarch_pre_reload_split () \
> +   && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
> +   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)"
> +  "#"
> +  ""
> +  [(set (mem:ST_ANY (lo_sum:P (match_dup 2) (match_dup 0))) (match_dup 1))]
>    {
> -    emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
> +    operands[2] = gen_reg_rtx (Pmode);
> +    emit_insn (gen_pcalau12i_gr<P:mode> (operands[2], operands[0]));
>    })
>  
> -(define_peephole2
> -  [(set (match_operand:P 0 "register_operand")
> -     (match_operand:P 1 "symbolic_pcrel_operand"))
> -   (set (mem:ST_ANY (plus (match_dup 0)
> -                       (match_operand 3 "const_int_operand")))
> -     (match_operand:ST_ANY 2 "register_operand"))]
> -  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
> -   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
> -   && (peep2_reg_dead_p (2, operands[0])) \
> -   && REGNO (operands[0]) != REGNO (operands[2])"
> -  [(set (mem:ST_ANY (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))]
> +(define_insn_and_split "simple_store_off<ST_ANY:mode><P:mode>"
> +  [(set (mem:ST_ANY
> +       (plus (match_operand:P 0 "symbolic_pcrel_operand" "")
> +             (match_operand 1 "const_int_operand" "")))
> +     (match_operand:ST_ANY 2 "register_operand" "r,f"))]
> +  "loongarch_pre_reload_split () \
> +   && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
> +   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)"
> +  "#"
> +  ""
> +  [(set (mem:ST_ANY (lo_sum:P (match_dup 1) (match_dup 0))) (match_dup 2))]
>    {
> -    operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3]));
> -    emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
> +    HOST_WIDE_INT offset = INTVAL (operands[1]);
> +    operands[1] = gen_reg_rtx (Pmode);
> +    operands[0] = plus_constant (Pmode, operands[0], offset);
> +    emit_insn (gen_pcalau12i_gr<P:mode> (operands[1], operands[0]));
>    })
>  
>  ;; Synchronization instructions.
> diff --git 
> a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c
>  
> b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c
> new file mode 100644
> index 00000000000..42cb966d1e0
> --- /dev/null
> +++ 
> b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d -mexplicit-relocs=auto" 
> } */
> +
> +float a[8001];
> +float
> +t (void)
> +{
> +  return a[0] + a[8000];
> +}
> +
> +/* { dg-final { scan-assembler-not "la.local" } } */

-- 
Xi Ruoyao <xry...@xry111.site>
School of Aerospace Science and Technology, Xidian University

Reply via email to