Ping :). On Tue, 2023-12-12 at 14:47 +0800, Xi Ruoyao wrote: > The problem with peephole2 is it uses a naive sliding-window algorithm > and misses many cases. For example: > > float a[10000]; > float t() { return a[0] + a[8000]; } > > is compiled to: > > la.local $r13,a > la.local $r12,a+32768 > fld.s $f1,$r13,0 > fld.s $f0,$r12,-768 > fadd.s $f0,$f1,$f0 > > by trunk. But as we've explained in r14-4851, the following would be > better with -mexplicit-relocs=auto: > > pcalau12i $r13,%pc_hi20(a) > pcalau12i $r12,%pc_hi20(a+32000) > fld.s $f1,$r13,%pc_lo12(a) > fld.s $f0,$r12,%pc_lo12(a+32000) > fadd.s $f0,$f1,$f0 > > However the sliding-window algorithm just won't detect the pcalau12i/fld > pair to be optimized. Use a define_insn_and_split in combine pass will > work around the issue. > > gcc/ChangeLog: > > * config/loongarch/loongarch.md: > (simple_load<P:mode><LD_AT_LEAST_32_BIT:mode>): New > define_insn_and_split. > (simple_load_off<P:mode><LD_AT_LEAST_32_BIT:mode>): Likewise. > (simple_load_<su>ext<P:mode><SUBDI:mode><GPR:mode>): Likewise. > (simple_load_off<su>ext<P:mode><SUBDI:mode><GPR:mode>): > Likewise. > (simple_store<ST_ANY:mode><P:mode>): Likewise. > (simple_store_off<ST_ANY:mode><P:mode>): Likewise. > (define_peephole2): Remove la.local/[f]ld peepholes. > > gcc/testsuite/ChangeLog: > > * gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c: > New test. > --- > > Bootstrapped & regtested on loongarch64-linux-gnu. Ok for trunk? > > gcc/config/loongarch/loongarch.md | 165 +++++++++--------- > ...explicit-relocs-auto-single-load-store-2.c | 11 ++ > 2 files changed, 98 insertions(+), 78 deletions(-) > create mode 100644 > gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c > > diff --git a/gcc/config/loongarch/loongarch.md > b/gcc/config/loongarch/loongarch.md > index 7b26d15aa4e..4009de408fb 100644 > --- a/gcc/config/loongarch/loongarch.md > +++ b/gcc/config/loongarch/loongarch.md > @@ -4033,101 +4033,110 @@ (define_insn "loongarch_crcc_w_<size>_w" > ;; > ;; And if the pseudo op cannot be relaxed, we'll get a worse result (with > ;; 3 instructions). > -(define_peephole2 > - [(set (match_operand:P 0 "register_operand") > - (match_operand:P 1 "symbolic_pcrel_operand")) > - (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand") > - (mem:LD_AT_LEAST_32_BIT (match_dup 0)))] > - "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ > - && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ > - && (peep2_reg_dead_p (2, operands[0]) \ > - || REGNO (operands[0]) == REGNO (operands[2]))" > - [(set (match_dup 2) > - (mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1))))] > +(define_insn_and_split "simple_load<P:mode><LD_AT_LEAST_32_BIT:mode>" > + [(set (match_operand:LD_AT_LEAST_32_BIT 0 "register_operand" "=r,f") > + (mem:LD_AT_LEAST_32_BIT > + (match_operand:P 1 "symbolic_pcrel_operand" "")))] > + "loongarch_pre_reload_split () \ > + && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ > + && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)" > + "#" > + "" > + [(set (match_dup 0) > + (mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 2) (match_dup 1))))] > { > - emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1])); > + operands[2] = gen_reg_rtx (Pmode); > + emit_insn (gen_pcalau12i_gr<P:mode> (operands[2], operands[1])); > }) > > -(define_peephole2 > - [(set (match_operand:P 0 "register_operand") > - (match_operand:P 1 "symbolic_pcrel_operand")) > - (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand") > - (mem:LD_AT_LEAST_32_BIT (plus (match_dup 0) > - (match_operand 3 "const_int_operand"))))] > - "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ > - && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ > - && (peep2_reg_dead_p (2, operands[0]) \ > - || REGNO (operands[0]) == REGNO (operands[2]))" > - [(set (match_dup 2) > - (mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1))))] > +(define_insn_and_split "simple_load_off<P:mode><LD_AT_LEAST_32_BIT:mode>" > + [(set (match_operand:LD_AT_LEAST_32_BIT 0 "register_operand" "=r,f") > + (mem:LD_AT_LEAST_32_BIT > + (plus (match_operand:P 1 "symbolic_pcrel_operand" "") > + (match_operand 2 "const_int_operand" ""))))] > + "loongarch_pre_reload_split () \ > + && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ > + && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)" > + "#" > + "" > + [(set (match_dup 0) > + (mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 2) (match_dup 1))))] > { > - operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3])); > - emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1])); > + HOST_WIDE_INT offset = INTVAL (operands[2]); > + operands[2] = gen_reg_rtx (Pmode); > + operands[1] = plus_constant (Pmode, operands[1], offset); > + emit_insn (gen_pcalau12i_gr<P:mode> (operands[2], operands[1])); > }) > > -(define_peephole2 > - [(set (match_operand:P 0 "register_operand") > - (match_operand:P 1 "symbolic_pcrel_operand")) > - (set (match_operand:GPR 2 "register_operand") > - (any_extend:GPR (mem:SUBDI (match_dup 0))))] > - "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ > - && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ > - && (peep2_reg_dead_p (2, operands[0]) \ > - || REGNO (operands[0]) == REGNO (operands[2]))" > - [(set (match_dup 2) > - (any_extend:GPR (mem:SUBDI (lo_sum:P (match_dup 0) > - (match_dup 1)))))] > +(define_insn_and_split "simple_load_<su>ext<P:mode><SUBDI:mode><GPR:mode>" > + [(set (match_operand:GPR 0 "register_operand" "=r") > + (any_extend:GPR > + (mem:SUBDI (match_operand:P 1 "symbolic_pcrel_operand" ""))))] > + "loongarch_pre_reload_split () \ > + && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ > + && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)" > + "#" > + "" > + [(set (match_dup 0) > + (any_extend:GPR > + (mem:SUBDI (lo_sum:P (match_dup 2) (match_dup 1)))))] > { > - emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1])); > + operands[2] = gen_reg_rtx (Pmode); > + emit_insn (gen_pcalau12i_gr<P:mode> (operands[2], operands[1])); > }) > > -(define_peephole2 > - [(set (match_operand:P 0 "register_operand") > - (match_operand:P 1 "symbolic_pcrel_operand")) > - (set (match_operand:GPR 2 "register_operand") > +(define_insn_and_split > + "simple_load_off_<su>ext<P:mode><SUBDI:mode><GPR:mode>" > + [(set (match_operand:GPR 0 "register_operand" "=r") > + (any_extend:GPR > + (mem:SUBDI > + (plus (match_operand:P 1 "symbolic_pcrel_operand" "") > + (match_operand 2 "const_int_operand" "")))))] > + "loongarch_pre_reload_split () \ > + && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ > + && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)" > + "#" > + "" > + [(set (match_dup 0) > (any_extend:GPR > - (mem:SUBDI (plus (match_dup 0) > - (match_operand 3 "const_int_operand")))))] > - "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ > - && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ > - && (peep2_reg_dead_p (2, operands[0]) \ > - || REGNO (operands[0]) == REGNO (operands[2]))" > - [(set (match_dup 2) > - (any_extend:GPR (mem:SUBDI (lo_sum:P (match_dup 0) > - (match_dup 1)))))] > + (mem:SUBDI (lo_sum:P (match_dup 2) (match_dup 1)))))] > { > - operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3])); > - emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1])); > + HOST_WIDE_INT offset = INTVAL (operands[2]); > + operands[2] = gen_reg_rtx (Pmode); > + operands[1] = plus_constant (Pmode, operands[1], offset); > + emit_insn (gen_pcalau12i_gr<P:mode> (operands[2], operands[1])); > }) > > -(define_peephole2 > - [(set (match_operand:P 0 "register_operand") > - (match_operand:P 1 "symbolic_pcrel_operand")) > - (set (mem:ST_ANY (match_dup 0)) > - (match_operand:ST_ANY 2 "register_operand"))] > - "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ > - && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ > - && (peep2_reg_dead_p (2, operands[0])) \ > - && REGNO (operands[0]) != REGNO (operands[2])" > - [(set (mem:ST_ANY (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))] > +(define_insn_and_split "simple_store<ST_ANY:mode><P:mode>" > + [(set (mem:ST_ANY (match_operand:P 0 "symbolic_pcrel_operand")) > + (match_operand:ST_ANY 1 "register_operand" "r,f"))] > + "loongarch_pre_reload_split () \ > + && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ > + && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)" > + "#" > + "" > + [(set (mem:ST_ANY (lo_sum:P (match_dup 2) (match_dup 0))) (match_dup 1))] > { > - emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1])); > + operands[2] = gen_reg_rtx (Pmode); > + emit_insn (gen_pcalau12i_gr<P:mode> (operands[2], operands[0])); > }) > > -(define_peephole2 > - [(set (match_operand:P 0 "register_operand") > - (match_operand:P 1 "symbolic_pcrel_operand")) > - (set (mem:ST_ANY (plus (match_dup 0) > - (match_operand 3 "const_int_operand"))) > - (match_operand:ST_ANY 2 "register_operand"))] > - "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ > - && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ > - && (peep2_reg_dead_p (2, operands[0])) \ > - && REGNO (operands[0]) != REGNO (operands[2])" > - [(set (mem:ST_ANY (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))] > +(define_insn_and_split "simple_store_off<ST_ANY:mode><P:mode>" > + [(set (mem:ST_ANY > + (plus (match_operand:P 0 "symbolic_pcrel_operand" "") > + (match_operand 1 "const_int_operand" ""))) > + (match_operand:ST_ANY 2 "register_operand" "r,f"))] > + "loongarch_pre_reload_split () \ > + && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ > + && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)" > + "#" > + "" > + [(set (mem:ST_ANY (lo_sum:P (match_dup 1) (match_dup 0))) (match_dup 2))] > { > - operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3])); > - emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1])); > + HOST_WIDE_INT offset = INTVAL (operands[1]); > + operands[1] = gen_reg_rtx (Pmode); > + operands[0] = plus_constant (Pmode, operands[0], offset); > + emit_insn (gen_pcalau12i_gr<P:mode> (operands[1], operands[0])); > }) > > ;; Synchronization instructions. > diff --git > a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c > > b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c > new file mode 100644 > index 00000000000..42cb966d1e0 > --- /dev/null > +++ > b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c > @@ -0,0 +1,11 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d -mexplicit-relocs=auto" > } */ > + > +float a[8001]; > +float > +t (void) > +{ > + return a[0] + a[8000]; > +} > + > +/* { dg-final { scan-assembler-not "la.local" } } */
-- Xi Ruoyao <xry...@xry111.site> School of Aerospace Science and Technology, Xidian University