On Tue, Feb 23, 2021 at 4:48 AM acsawdey--- via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > From: Aaron Sawdey <acsaw...@linux.ibm.com> > > This patch implements a RTL pass that looks for pc-relative loads of the > address of an external variable using the PCREL_GOT relocation and a > single load or store that uses that external address. > > Produced by a cast of thousands: > * Michael Meissner > * Peter Bergner > * Bill Schmidt > * Alan Modra > * Segher Boessenkool > * Aaron Sawdey > > This incorporates the changes requested in Segher's review. A few things I > did not change were the insn-at-a-time scan that could be done with DF, and > I did not change to using statistics.[ch] for the counters struct. I did try > to improve the naming, and rewrote a number of comments to make them > consistent > with the code, and generally tried to make things more readable. > > OK for trunk if bootstrap/regtest passes?
stage1 please? > Thanks! > Aaron > > gcc/ChangeLog: > > * config.gcc: Add pcrel-opt.o. > * config/rs6000/pcrel-opt.c: New file. > * config/rs6000/pcrel-opt.md: New file. > * config/rs6000/predicates.md: Add d_form_memory predicate. > * config/rs6000/rs6000-cpus.def: Add OPTION_MASK_PCREL_OPT. > * config/rs6000/rs6000-passes.def: Add pass_pcrel_opt. > * config/rs6000/rs6000-protos.h: Add reg_to_non_prefixed(), > pcrel_opt_valid_mem_p(), output_pcrel_opt_reloc(), > and make_pass_pcrel_opt(). > * config/rs6000/rs6000.c (reg_to_non_prefixed): Make global. > (rs6000_option_override_internal): Add pcrel-opt. > (rs6000_delegitimize_address): Support pcrel-opt. > (rs6000_opt_masks): Add pcrel-opt. > (pcrel_opt_valid_mem_p): New function. > (reg_to_non_prefixed): Make global. > (rs6000_asm_output_opcode): Reset next_insn_prefixed_p. > (output_pcrel_opt_reloc): New function. > * config/rs6000/rs6000.md (loads_extern_addr): New attr. > (pcrel_extern_addr): Set loads_extern_addr. > Add include for pcrel-opt.md. > * config/rs6000/rs6000.opt: Add -mpcrel-opt. > * config/rs6000/t-rs6000: Add rules for pcrel-opt.c and > pcrel-opt.md. > > gcc/testsuite/ChangeLog: > > * gcc.target/powerpc/pcrel-opt-inc-di.c: New test. > * gcc.target/powerpc/pcrel-opt-ld-df.c: New test. > * gcc.target/powerpc/pcrel-opt-ld-di.c: New test. > * gcc.target/powerpc/pcrel-opt-ld-hi.c: New test. > * gcc.target/powerpc/pcrel-opt-ld-qi.c: New test. > * gcc.target/powerpc/pcrel-opt-ld-sf.c: New test. > * gcc.target/powerpc/pcrel-opt-ld-si.c: New test. > * gcc.target/powerpc/pcrel-opt-ld-vector.c: New test. > * gcc.target/powerpc/pcrel-opt-st-df.c: New test. > * gcc.target/powerpc/pcrel-opt-st-di.c: New test. > * gcc.target/powerpc/pcrel-opt-st-hi.c: New test. > * gcc.target/powerpc/pcrel-opt-st-qi.c: New test. > * gcc.target/powerpc/pcrel-opt-st-sf.c: New test. > * gcc.target/powerpc/pcrel-opt-st-si.c: New test. > * gcc.target/powerpc/pcrel-opt-st-vector.c: New test. > --- > gcc/config.gcc | 8 +- > gcc/config/rs6000/pcrel-opt.md | 399 ++++++++ > gcc/config/rs6000/predicates.md | 21 + > gcc/config/rs6000/rs6000-cpus.def | 2 + > gcc/config/rs6000/rs6000-passes.def | 8 + > gcc/config/rs6000/rs6000-pcrel-opt.c | 924 ++++++++++++++++++ > gcc/config/rs6000/rs6000-protos.h | 4 + > gcc/config/rs6000/rs6000.c | 111 ++- > gcc/config/rs6000/rs6000.md | 8 +- > gcc/config/rs6000/rs6000.opt | 4 + > gcc/config/rs6000/t-rs6000 | 7 +- > .../gcc.target/powerpc/pcrel-opt-inc-di.c | 17 + > .../gcc.target/powerpc/pcrel-opt-ld-df.c | 36 + > .../gcc.target/powerpc/pcrel-opt-ld-di.c | 42 + > .../gcc.target/powerpc/pcrel-opt-ld-hi.c | 42 + > .../gcc.target/powerpc/pcrel-opt-ld-qi.c | 42 + > .../gcc.target/powerpc/pcrel-opt-ld-sf.c | 42 + > .../gcc.target/powerpc/pcrel-opt-ld-si.c | 41 + > .../gcc.target/powerpc/pcrel-opt-ld-vector.c | 36 + > .../gcc.target/powerpc/pcrel-opt-st-df.c | 36 + > .../gcc.target/powerpc/pcrel-opt-st-di.c | 36 + > .../gcc.target/powerpc/pcrel-opt-st-hi.c | 42 + > .../gcc.target/powerpc/pcrel-opt-st-qi.c | 42 + > .../gcc.target/powerpc/pcrel-opt-st-sf.c | 36 + > .../gcc.target/powerpc/pcrel-opt-st-si.c | 41 + > .../gcc.target/powerpc/pcrel-opt-st-vector.c | 36 + > 26 files changed, 2054 insertions(+), 9 deletions(-) > create mode 100644 gcc/config/rs6000/pcrel-opt.md > create mode 100644 gcc/config/rs6000/rs6000-pcrel-opt.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-inc-di.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-df.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-di.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-hi.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-qi.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-sf.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-si.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-vector.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-df.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-di.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-hi.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-qi.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-sf.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-si.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-vector.c > > diff --git a/gcc/config.gcc b/gcc/config.gcc > index 17fea83b2e4..c8853009e55 100644 > --- a/gcc/config.gcc > +++ b/gcc/config.gcc > @@ -509,7 +509,8 @@ or1k*-*-*) > ;; > powerpc*-*-*) > cpu_type=rs6000 > - extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o > rs6000-call.o" > + extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o" > + extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o" > extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h" > extra_headers="${extra_headers} bmi2intrin.h bmiintrin.h" > extra_headers="${extra_headers} xmmintrin.h mm_malloc.h emmintrin.h" > @@ -524,6 +525,7 @@ powerpc*-*-*) > esac > extra_options="${extra_options} g.opt fused-madd.opt > rs6000/rs6000-tables.opt" > target_gtfiles="$target_gtfiles > \$(srcdir)/config/rs6000/rs6000-logue.c > \$(srcdir)/config/rs6000/rs6000-call.c" > + target_gtfiles="$target_gtfiles > \$(srcdir)/config/rs6000/rs6000-pcrel-opt.c" > ;; > pru-*-*) > cpu_type=pru > @@ -535,8 +537,10 @@ riscv*) > ;; > rs6000*-*-*) > extra_options="${extra_options} g.opt fused-madd.opt > rs6000/rs6000-tables.opt" > - extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o > rs6000-call.o" > + extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o" > + extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o" > target_gtfiles="$target_gtfiles > \$(srcdir)/config/rs6000/rs6000-logue.c > \$(srcdir)/config/rs6000/rs6000-call.c" > + target_gtfiles="$target_gtfiles > \$(srcdir)/config/rs6000/rs6000-pcrel-opt.c" > ;; > sparc*-*-*) > cpu_type=sparc > diff --git a/gcc/config/rs6000/pcrel-opt.md b/gcc/config/rs6000/pcrel-opt.md > new file mode 100644 > index 00000000000..33428613acd > --- /dev/null > +++ b/gcc/config/rs6000/pcrel-opt.md > @@ -0,0 +1,399 @@ > +;; Machine description for the PCREL_OPT optimization. > +;; Copyright (C) 2020-2021 Free Software Foundation, Inc. > +;; Contributed by Michael Meissner (meiss...@linux.ibm.com) > + > +;; This file is part of GCC. > + > +;; GCC is free software; you can redistribute it and/or modify it > +;; under the terms of the GNU General Public License as published > +;; by the Free Software Foundation; either version 3, or (at your > +;; option) any later version. > + > +;; GCC is distributed in the hope that it will be useful, but WITHOUT > +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY > +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public > +;; License for more details. > + > +;; You should have received a copy of the GNU General Public License > +;; along with GCC; see the file COPYING3. If not see > +;; <http://www.gnu.org/licenses/>. > + > +;; Support for the PCREL_OPT optimization. PCREL_OPT looks for instances > where > +;; an external variable is used only once, either for reading or for writing. > +;; > +;; If we are optimizing a single read, normally the code would look like: > +;; > +;; (set (reg:DI <ptr>) > +;; (symbol_ref:DI "<extern_addr>")) # <data> is currently dead > +;; > +;; ... # insns do not need to be adjacent > +;; > +;; (set (reg:SI <data>) > +;; (mem:SI (reg:DI <xxx>))) # <ptr> dies with this insn > +;; > +;; We transform this into: > +;; > +;; (parallel [(set (reg:DI <ptr>) > +;; (unspec:SI [(symbol_ref:DI <extern_addr>) > +;; (const_int <marker>)] > +;; UNSPEC_PCREL_OPT_LD_ADDR)) > +;; (set (reg:DI <data>) > +;; (unspec:DI [(const_int 0)] > +;; UNSPEC_PCREL_OPT_LD_DATA))]) > +;; > +;; ... > +;; > +;; (parallel [(set (reg:SI <data>) > +;; (unspec:SI [(mem:SI (reg:DI <ptr>)) > +;; (reg:DI <data>) > +;; (const_int <marker>)] > +;; UNSPEC_PCREL_OPT_LD_RELOC)) > +;; (clobber (reg:DI <ptr>))]) > +;; > +;; The marker is an integer constant that links the load of the external > +;; address to the load of the actual variable. > +;; > +;; In the first insn, we set both the address of the external variable, and > +;; mark that the variable being loaded both are created in that insn, and are > +;; consumed in the second insn. It doesn't matter what mode the > register that > +;; we will ultimately do the load into, so we use DImode. We just need to > mark > +;; that both registers may be set in the first insn, and will be used in the > +;; second insn. > +;; > +;; Since we use UNSPEC's and link both the the register holding the external > +;; address and the value being loaded, it should prevent other passes from > +;; modifying it. > +;; > +;; If the register being loaded is the same as the base register, we use an > +;; alternate form of the insns. > +;; > +;; (set (reg:DI <data_ptr>) > +;; (unspec:DI [(symbol_ref:DI <extern_addr>) > +;; (const_int <marker>)] > +;; UNSPEC_PCREL_OPT_LD_SAME_REG)) > +;; > +;; ... > +;; > +;; (parallel [(set (reg:SI <data>) > +;; (unspec:SI [(mem:SI (reg:DI <ptr>)) > +;; (reg:DI <data>) > +;; (const_int <marker>)] > +;; UNSPEC_PCREL_OPT_LD_RELOC)) > +;; (clobber (reg:DI <ptr>))]) > + > +(define_c_enum "unspec" > + [UNSPEC_PCREL_OPT_LD_ADDR > + UNSPEC_PCREL_OPT_LD_DATA > + UNSPEC_PCREL_OPT_LD_SAME_REG > + UNSPEC_PCREL_OPT_LD_RELOC > + UNSPEC_PCREL_OPT_ST_ADDR > + UNSPEC_PCREL_OPT_ST_RELOC]) > + > +;; Modes that are supported for PCREL_OPT > +(define_mode_iterator PCRELOPT [QI HI SI DI TI SF DF KF > + V1TI V2DI V4SI V8HI V16QI V2DF V4SF > + (TF "TARGET_FLOAT128_TYPE && TARGET_IEEEQUAD")]) > + > +;; Vector modes for PCREL_OPT > +(define_mode_iterator PCRELOPT_VECT [TI KF V1TI V2DI V4SI V8HI V16QI V2DF > V4SF > + (TF "TARGET_FLOAT128_TYPE && > TARGET_IEEEQUAD")]) > + > +;; Insn for loading the external address, where the register being loaded is > not > +;; the same as the register being loaded with the data. > +(define_insn "pcrel_opt_ld_addr" > + [(set (match_operand:DI 0 "base_reg_operand" "=&b,&b") > + (unspec:DI [(match_operand:DI 1 "pcrel_external_address") > + (match_operand 2 "const_int_operand" "n,n")] > + UNSPEC_PCREL_OPT_LD_ADDR)) > + (set (match_operand:DI 3 "gpc_reg_operand" "=r,wa") > + (unspec:DI [(const_int 0)] > + UNSPEC_PCREL_OPT_LD_DATA))] > + "TARGET_PCREL_OPT > + && reg_or_subregno (operands[0]) != reg_or_subregno (operands[3])" > + "ld %0,%a1\n.Lpcrel%2:" > + [(set_attr "prefixed" "yes") > + (set_attr "type" "load") > + (set_attr "loads_external_address" "yes")]) > + > +;; Alternate form of loading up the external address that is the same > register > +;; as the final load. > +(define_insn "pcrel_opt_ld_addr_same_reg" > + [(set (match_operand:DI 0 "base_reg_operand" "=b") > + (unspec:DI [(match_operand:DI 1 "pcrel_external_address") > + (match_operand 2 "const_int_operand" "n")] > + UNSPEC_PCREL_OPT_LD_SAME_REG))] > + "TARGET_PCREL_OPT" > + "ld %0,%a1\n.Lpcrel%2:" > + [(set_attr "prefixed" "yes") > + (set_attr "type" "load") > + (set_attr "loads_external_address" "yes")]) > + > +;; PCREL_OPT modes that are optimized for loading or storing GPRs. > +(define_mode_iterator PCRELOPT_GPR [QI HI SI DI SF DF]) > + > +(define_mode_attr PCRELOPT_GPR_LD [(QI "lbz") > + (HI "lhz") > + (SI "lwz") > + (SF "lwz") > + (DI "ld") > + (DF "ld")]) > + > +;; PCREL_OPT load operation of GPRs. Operand 4 (the register used to hold > the > +;; address of the external symbol) is SCRATCH if the same register is used > for > +;; the normal load. > +(define_insn "*pcrel_opt_ld<mode>_gpr" > + [(parallel [(set (match_operand:PCRELOPT_GPR 0 "int_reg_operand" "+r") > + (unspec:PCRELOPT_GPR [(match_operand:PCRELOPT_GPR 1 > "d_form_memory" "m") > + (match_operand:DI 2 "int_reg_operand" "0") > + (match_operand 3 "const_int_operand" "n")] > + UNSPEC_PCREL_OPT_LD_RELOC)) > + (clobber (match_scratch:DI 4 "=bX"))])] > + "TARGET_PCREL_OPT > + && (GET_CODE (operands[4]) == SCRATCH > + || reg_mentioned_p (operands[4], operands[1]))" > +{ > + output_pcrel_opt_reloc (operands[3]); > + return "<PCRELOPT_GPR_LD> %0,%1"; > +} > + [(set_attr "type" "load")]) > + > +;; PCREL_OPT load with sign/zero extension > +(define_insn "*pcrel_opt_ldsi_<u><mode>_gpr" > + [(set (match_operand:EXTSI 0 "int_reg_operand" "+r") > + (any_extend:EXTSI > + (unspec:SI [(match_operand:SI 1 "d_form_memory" "m") > + (match_operand:DI 2 "int_reg_operand" "0") > + (match_operand 3 "const_int_operand" "n")] > + UNSPEC_PCREL_OPT_LD_RELOC))) > + (clobber (match_scratch:DI 4 "=bX"))] > + "TARGET_PCREL_OPT" > +{ > + output_pcrel_opt_reloc (operands[3]); > + return "lw<az> %0,%1"; > +} > + [(set_attr "type" "load")]) > + > +(define_insn "*pcrel_opt_ldhi_<u><mode>_gpr" > + [(set (match_operand:EXTHI 0 "int_reg_operand" "+r") > + (any_extend:EXTHI > + (unspec:HI [(match_operand:HI 1 "d_form_memory" "m") > + (match_operand:DI 2 "int_reg_operand" "0") > + (match_operand 3 "const_int_operand" "n")] > + UNSPEC_PCREL_OPT_LD_RELOC))) > + (clobber (match_scratch:DI 4 "=bX"))] > + "TARGET_PCREL_OPT" > +{ > + output_pcrel_opt_reloc (operands[3]); > + return "lh<az> %0,%1"; > +} > + [(set_attr "type" "load")]) > + > +(define_insn "*pcrel_opt_ldqi_u<mode>_gpr" > + [(set (match_operand:EXTQI 0 "int_reg_operand" "+r") > + (zero_extend:EXTQI > + (unspec:QI [(match_operand:QI 1 "d_form_memory" "m") > + (match_operand:DI 2 "int_reg_operand" "0") > + (match_operand 3 "const_int_operand" "n")] > + UNSPEC_PCREL_OPT_LD_RELOC))) > + (clobber (match_scratch:DI 4 "=bX"))] > + "TARGET_PCREL_OPT" > +{ > + output_pcrel_opt_reloc (operands[3]); > + return "lbz %0,%1"; > +} > + [(set_attr "type" "load")]) > + > +;; Scalar types that can be optimized by loading them into floating point > +;; or Altivec registers. > +(define_mode_iterator PCRELOPT_FP [DI DF SF]) > + > +;; Load instructions to load up scalar floating point or 64-bit integer > values > +;; into floating point registers or Altivec registers. > +(define_mode_attr PCRELOPT_FPR_LD [(DI "lfd") (DF "lfd") (SF "lfs")]) > +(define_mode_attr PCRELOPT_VMX_LD [(DI "lxsd") (DF "lxsd") (SF "lxssp")]) > + > +;; PCREL_OPT load operation of scalar DF/DI/SF into vector registers. > +(define_insn "*pcrel_opt_ld<mode>_vsx" > + [(set (match_operand:PCRELOPT_FP 0 "vsx_register_operand" "+d,v") > + (unspec:PCRELOPT_FP [(match_operand:PCRELOPT_FP 1 "d_form_memory" > "m,m") > + (match_operand:DI 2 "vsx_register_operand" "0,0") > + (match_operand 3 "const_int_operand" "n,n")] > + UNSPEC_PCREL_OPT_LD_RELOC)) > + (clobber (match_operand:DI 4 "base_reg_operand" "=b,b"))] > + "TARGET_PCREL_OPT" > +{ > + output_pcrel_opt_reloc (operands[3]); > + return which_alternative ? "<PCRELOPT_VMX_LD> %0,%1" : "<PCRELOPT_FPR_LD> > %0,%1"; > +} > + [(set_attr "type" "fpload")]) > + > +;; PCREL_OPT optimization extending SFmode to DFmode via a load. > +(define_insn "*pcrel_opt_ldsf_df" > + [(set (match_operand:DF 0 "vsx_register_operand" "+d,v") > + (float_extend:DF > + (unspec:SF [(match_operand:SF 1 "d_form_memory" "m,m") > + (match_operand:DI 2 "vsx_register_operand" "0,0") > + (match_operand 3 "const_int_operand" "n,n")] > + UNSPEC_PCREL_OPT_LD_RELOC))) > + (clobber (match_operand:DI 4 "base_reg_operand" "=b,b"))] > + "TARGET_PCREL_OPT" > +{ > + output_pcrel_opt_reloc (operands[3]); > + return which_alternative ? "lxssp %0,%1" : "lfs %0,%1"; > +} > + [(set_attr "type" "fpload")]) > + > +;; PCREL_OPT load operation of vector/float128 types into vector registers. > +(define_insn "*pcrel_opt_ld<mode>" > + [(set (match_operand:PCRELOPT_VECT 0 "vsx_register_operand" "+wa") > + (unspec:PCRELOPT_VECT [(match_operand:PCRELOPT_VECT 1 "d_form_memory" > "m") > + (match_operand:DI 2 "vsx_register_operand" "0") > + (match_operand 3 "const_int_operand" "n")] > + UNSPEC_PCREL_OPT_LD_RELOC)) > + (clobber (match_operand:DI 4 "base_reg_operand" "=b"))] > + "TARGET_PCREL_OPT" > +{ > + output_pcrel_opt_reloc (operands[3]); > + return "lxv %x0,%1"; > +} > + [(set_attr "type" "vecload")]) > + > + > +;; PCREL_OPT optimization for stores. We need to put the label after the PLD > +;; instruction, because the assembler might insert a NOP before the PLD for > +;; alignment. > +;; > +;; If we are optimizing a single write, normally the code would look like: > +;; > +;; (set (reg:DI <ptr>) > +;; (symbol_ref:DI "<extern_addr>")) # <data> must be live here > +;; > +;; ... # insns do not need to be adjacent > +;; > +;; (set (mem:SI (reg:DI <xxx>)) > +;; (reg:SI <data>)) # <ptr> dies with this insn > +;; > +;; We optimize this to be: > +;; > +;; (parallel [(set (reg:DI <ptr>) > +;; (unspec:DI [(symbol_ref:DI "<extern_addr>") > +;; (const_int <marker>)] > +;; UNSPEC_PCREL_OPT_ST_ADDR)) > +;; (use (reg:<MODE> <data>))]) > +;; > +;; ... # insns do not need to be adjacent > +;; > +;; (parallel [(set (mem:<MODE> (reg:DI <ptr>)) > +;; (unspec:<MODE> [(reg:<MODE> <data>) > +;; (const_int <marker>)] > +;; UNSPEC_PCREL_OPT_ST_RELOC)) > +;; (clobber (reg:DI <ptr>))]) > + > +(define_insn "*pcrel_opt_st_addr<mode>" > + [(set (match_operand:DI 0 "gpc_reg_operand" "=b") > + (unspec:DI [(match_operand:DI 1 "pcrel_external_address") > + (match_operand 2 "const_int_operand" "n")] > + UNSPEC_PCREL_OPT_ST_ADDR)) > + (use (match_operand:PCRELOPT 3 "gpc_reg_operand" "rwa"))] > + "TARGET_PCREL_OPT" > + "ld %0,%a1\n.Lpcrel%2:" > + [(set_attr "prefixed" "yes") > + (set_attr "type" "load") > + (set_attr "loads_external_address" "yes")]) > + > +;; PCREL_OPT stores. > +(define_insn "*pcrel_opt_st<mode>" > + [(set (match_operand:QHSI 0 "d_form_memory" "=m") > + (unspec:QHSI [(match_operand:QHSI 1 "gpc_reg_operand" "r") > + (match_operand 2 "const_int_operand" "n")] > + UNSPEC_PCREL_OPT_ST_RELOC)) > + (clobber (match_operand:DI 3 "base_reg_operand" "=b"))] > + "TARGET_PCREL_OPT" > +{ > + output_pcrel_opt_reloc (operands[2]); > + return "st<wd> %1,%0"; > +} > + [(set_attr "type" "store")]) > + > +(define_insn "*pcrel_opt_stdi" > + [(set (match_operand:DI 0 "d_form_memory" "=m,m,m") > + (unspec:DI [(match_operand:DI 1 "gpc_reg_operand" "r,d,v") > + (match_operand 2 "const_int_operand" "n,n,n")] > + UNSPEC_PCREL_OPT_ST_RELOC)) > + (clobber (match_operand:DI 3 "base_reg_operand" "=b,b,b"))] > + "TARGET_PCREL_OPT && TARGET_POWERPC64" > +{ > + output_pcrel_opt_reloc (operands[2]); > + switch (which_alternative) > + { > + case 0: > + return "std %1,%0"; > + case 1: > + return "stfd %1,%0"; > + case 2: > + return "stxsd %1,%0"; > + default: > + gcc_unreachable (); > + } > +} > + [(set_attr "type" "store,fpstore,fpstore")]) > + > +(define_insn "*pcrel_opt_stsf" > + [(set (match_operand:SF 0 "d_form_memory" "=m,m,m") > + (unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "d,v,r") > + (match_operand 2 "const_int_operand" "n,n,n")] > + UNSPEC_PCREL_OPT_ST_RELOC)) > + (clobber (match_operand:DI 3 "base_reg_operand" "=b,b,b"))] > + "TARGET_PCREL_OPT" > +{ > + output_pcrel_opt_reloc (operands[2]); > + switch (which_alternative) > + { > + case 0: > + return "stfs %1,%0"; > + case 1: > + return "stxssp %1,%0"; > + case 2: > + return "stw %1,%0"; > + default: > + gcc_unreachable (); > + } > +} > + [(set_attr "type" "fpstore,fpstore,store")]) > + > +(define_insn "*pcrel_opt_stdf" > + [(set (match_operand:DF 0 "d_form_memory" "=m,m,m") > + (unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "d,v,r") > + (match_operand 2 "const_int_operand" "n,n,n")] > + UNSPEC_PCREL_OPT_ST_RELOC)) > + (clobber (match_operand:DI 3 "base_reg_operand" "=b,b,b"))] > + "TARGET_PCREL_OPT > + && (TARGET_POWERPC64 || vsx_register_operand (operands[1], DFmode))" > +{ > + output_pcrel_opt_reloc (operands[2]); > + switch (which_alternative) > + { > + case 0: > + return "stfd %1,%0"; > + case 1: > + return "stxsd %1,%0"; > + case 2: > + return "std %1,%0"; > + default: > + gcc_unreachable (); > + } > +} > + [(set_attr "type" "fpstore,fpstore,store")]) > + > +(define_insn "*pcrel_opt_st<mode>" > + [(set (match_operand:PCRELOPT_VECT 0 "d_form_memory" "=m") > + (unspec:PCRELOPT_VECT [(match_operand:PCRELOPT_VECT 1 > "gpc_reg_operand" "wa") > + (match_operand 2 "const_int_operand" "n")] > + UNSPEC_PCREL_OPT_ST_RELOC)) > + (clobber (match_operand:DI 3 "base_reg_operand" "=b"))] > + "TARGET_PCREL_OPT" > +{ > + output_pcrel_opt_reloc (operands[2]); > + return "stxv %x1,%0"; > +} > + [(set_attr "type" "vecstore")]) > diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md > index bd26c62b3a4..bd6ef1e56a5 100644 > --- a/gcc/config/rs6000/predicates.md > +++ b/gcc/config/rs6000/predicates.md > @@ -1904,3 +1904,24 @@ (define_predicate "prefixed_memory" > { > return address_is_prefixed (XEXP (op, 0), mode, NON_PREFIXED_DEFAULT); > }) > + > +;; Return true if the operand is a valid memory operand with a D-form > +;; address that could be merged with the load of a PC-relative external > address > +;; with the PCREL_OPT optimization. We don't check here whether or not the > +;; offset needs to be used in a DS-FORM (bottom 2 bits 0) or DQ-FORM (bottom > 4 > +;; bits 0) instruction. > +(define_predicate "d_form_memory" > + (match_code "mem") > +{ > + if (!memory_operand (op, mode)) > + return false; > + > + rtx addr = XEXP (op, 0); > + > + if (REG_P (addr)) > + return true; > + if (SUBREG_P (addr) && REG_P (SUBREG_REG (addr))) > + return true; > + > + return !indexed_address (addr, mode); > +}) > diff --git a/gcc/config/rs6000/rs6000-cpus.def > b/gcc/config/rs6000/rs6000-cpus.def > index f0cf79e2982..cbbb42c1b3a 100644 > --- a/gcc/config/rs6000/rs6000-cpus.def > +++ b/gcc/config/rs6000/rs6000-cpus.def > @@ -77,6 +77,7 @@ > /* Flags that need to be turned off if -mno-power10. */ > #define OTHER_POWER10_MASKS (OPTION_MASK_MMA \ > | OPTION_MASK_PCREL \ > + | OPTION_MASK_PCREL_OPT \ > | OPTION_MASK_PREFIXED) > > #define ISA_3_1_MASKS_SERVER (ISA_3_0_MASKS_SERVER \ > @@ -147,6 +148,7 @@ > | OPTION_MASK_P9_MISC \ > | OPTION_MASK_P9_VECTOR \ > | OPTION_MASK_PCREL \ > + | OPTION_MASK_PCREL_OPT \ > | OPTION_MASK_POPCNTB \ > | OPTION_MASK_POPCNTD \ > | OPTION_MASK_POWERPC64 \ > diff --git a/gcc/config/rs6000/rs6000-passes.def > b/gcc/config/rs6000/rs6000-passes.def > index 606ad3e14a2..c8e46ba6abe 100644 > --- a/gcc/config/rs6000/rs6000-passes.def > +++ b/gcc/config/rs6000/rs6000-passes.def > @@ -24,4 +24,12 @@ along with GCC; see the file COPYING3. If not see > REPLACE_PASS (PASS, INSTANCE, TGT_PASS) > */ > > + /* Pass to add the appropriate vector swaps on power8 little endian > systems. > + The power8 does not have instructions that automaticaly do the byte > swaps > + for loads and stores. */ > INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_swaps); > + > + /* Pass to do the PCREL_OPT optimization that combines the load of an > + external symbol's address along with a single load or store using that > + address as a base register. */ > + INSERT_PASS_BEFORE (pass_sched2, 1, pass_pcrel_opt); > diff --git a/gcc/config/rs6000/rs6000-pcrel-opt.c > b/gcc/config/rs6000/rs6000-pcrel-opt.c > new file mode 100644 > index 00000000000..9a3defd0478 > --- /dev/null > +++ b/gcc/config/rs6000/rs6000-pcrel-opt.c > @@ -0,0 +1,924 @@ > +/* Subroutines used support the pc-relative linker optimization. > + Copyright (C) 2020-2021 Free Software Foundation, Inc. > + > + This file is part of GCC. > + > + GCC is free software; you can redistribute it and/or modify it > + under the terms of the GNU General Public License as published > + by the Free Software Foundation; either version 3, or (at your > + option) any later version. > + > + GCC is distributed in the hope that it will be useful, but WITHOUT > + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY > + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public > + License for more details. > + > + You should have received a copy of the GNU General Public License > + along with GCC; see the file COPYING3. If not see > + <http://www.gnu.org/licenses/>. */ > + > +/* This file implements a RTL pass that looks for pc-relative loads of the > + address of an external variable using the PCREL_GOT relocation and a > single > + load that uses that external address. If that is found we create the > + PCREL_OPT relocation to possibly convert: > + > + pld addr_reg,var@pcrel@got > + > + <possibly other insns that do not use 'addr_reg' or 'data_reg'> > + > + lwz data_reg,0(addr_reg) > + > + into: > + > + plwz data_reg,var@pcrel > + > + <possibly other insns that do not use 'addr_reg' or 'data_reg'> > + > + nop > + > + Of course it would be nice to be able to put the plwz in this example in > + place of the lwz but the linker cannot easily replace a 4-byte instruction > + with an 8-byte one. > + > + If the variable is not defined in the main program or the code using it is > + not in the main program, the linker puts the address in the .got section > and > + generates: > + > + .section .got > + .Lvar_got: > + .dword var > + > + At the point where it is referenced, we have: > + > + .section .text > + pld addr_reg,.Lvar_got@pcrel > + > + <possibly other insns that do not use 'addr_reg' or > 'data_reg'> > + > + lwz data_reg,0(addr_reg) > + > + We look for a single usage in the basic block where this external > + address is loaded, and convert it to a PCREL_OPT relocation so the > + linker can convert it to a single plwz in this case. Multiple uses > + or references in another basic block will force us to not use the > + PCREL_OPT relocation. > + > + We also optimize stores to the address of an external variable using the > + PCREL_GOT relocation and a single store that uses that external address. > If > + that is found we create the PCREL_OPT relocation to possibly convert: > + > + pld addr_reg,var@pcrel@got > + > + <possibly other insns that do not use 'addr_reg' or 'data_reg'> > + > + stw data_reg,0(addr_reg) > + > + into: > + > + pstw data_reg,var@pcrel > + > + <possibly other insns that do not use 'addr_reg' or 'data_reg'> > + > + nop > + > + If the variable is not defined in the main program or the code using it is > + not in the main program, the linker puts the address in the .got section > and > + generates: > + > + .section .got > + .Lvar_got: > + .dword var > + > + And at our point of reference we have: > + > + .section .text > + pld addr_reg,.Lvar_got@pcrel > + > + <possibly other insns that do not use 'addr_reg' or > 'data_reg'> > + > + stw data_reg,0(addr_reg) > + > + We only look for a single usage in the basic block where the external > + address is loaded. Multiple uses or references in another basic block > will > + force us to not use the PCREL_OPT relocation. */ > + > +#define IN_TARGET_CODE 1 > + > +#include "config.h" > +#include "system.h" > +#include "coretypes.h" > +#include "backend.h" > +#include "rtl.h" > +#include "tree.h" > +#include "memmodel.h" > +#include "expmed.h" > +#include "optabs.h" > +#include "recog.h" > +#include "df.h" > +#include "tm_p.h" > +#include "ira.h" > +#include "print-tree.h" > +#include "varasm.h" > +#include "explow.h" > +#include "expr.h" > +#include "output.h" > +#include "tree-pass.h" > +#include "rtx-vector-builder.h" > +#include "print-rtl.h" > +#include "insn-attr.h" > +#include "insn-codes.h" > + > +/* Various counters. */ > +static struct { > + unsigned long extern_addrs; > + unsigned long loads; > + unsigned long adjacent_loads; > + unsigned long failed_loads; > + unsigned long stores; > + unsigned long adjacent_stores; > + unsigned long failed_stores; > +} counters; > + > +/* Unique integer that is appended to .Lpcrel to make a pcrel_opt label. */ > +static unsigned int pcrel_opt_next_num; > + > + > +/* Optimize a PC-relative load address to be used in a load. Before it calls > + this function, pcrel_opt_address () uses DF to make sure that it is safe > + to do the PCREL_OPT optimization on these insns. > + > + Convert insns of the form: > + > + (set (reg:DI addr) > + (symbol_ref:DI "ext_symbol")) > + > + ... > + > + (set (reg:<MODE> value) > + (mem:<MODE> (reg:DI addr))) > + > + into: > + > + (parallel [(set (reg:DI addr) > + (unspec:<MODE> [(symbol_ref:DI "ext_symbol") > + (const_int label_num)] > + UNSPEC_PCREL_OPT_LD_ADDR)) > + (set (reg:DI data) > + (unspec:DI [(const_int 0)] > + UNSPEC_PCREL_OPT_LD_DATA))]) > + > + ... > + > + (parallel [(set (reg:<MODE>) > + (unspec:<MODE> [(mem:<MODE> (reg:DI addr)) > + (reg:DI data) > + (const_int label_num)] > + UNSPEC_PCREL_OPT_LD_RELOC)) > + (clobber (reg:DI addr))]) > + > + Because PCREL_OPT will move the actual location of the load from the > second > + insn to the first, we need to have the register for the load data be live > + starting at the first insn. > + > + If the destination register for the data being loaded is the same register > + used to hold the extern address, we generate this insn instead: > + > + (set (reg:DI data) > + (unspec:DI [(symbol_ref:DI "ext_symbol") > + (const_int label_num)] > + UNSPEC_PCREL_OPT_LD_SAME_REG)) > + > + In the first insn, we set both the address of the external variable, and > mark > + that the variable being loaded both are created in that insn, and are > + consumed in the second insn. The mode used in the first insn for the data > + register that will be loaded in the second insn doesn't matter in the end > so > + we use DImode. We just need to mark that both registers may be set in the > + first insn, and will be used in the second insn. > + > + The UNSPEC_PCREL_OPT_LD_ADDR insn will generate the load address plus > + a definition of a label (.Lpcrel<n>), while the UNSPEC_PCREL_OPT_LD_RELOC > + insn will generate the .reloc to tell the linker to tie the load address > and > + load using that address together. > + > + pld b,ext_symbol@got@pcrel > + .Lpcrel1: > + > + ... > + > + .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) > + lwz r,0(b) > + > + If ext_symbol is defined in another object file in the main program and we > + are linking the main program, the linker will convert the above > instructions > + to: > + > + plwz r,ext_symbol@got@pcrel > + > + ... > + > + nop > + > + ADDR_INSN is the insn that is loading the address. > + LOAD_INSN is the insn that uses the address to load the actual data. */ > + > +static void > +pcrel_opt_load (rtx_insn *addr_insn, rtx_insn *load_insn) > +{ > + rtx addr_set = PATTERN (addr_insn); > + gcc_assert (GET_CODE (addr_set) == SET); > + > + rtx addr_reg = SET_DEST (addr_set); > + gcc_assert (base_reg_operand (addr_reg, Pmode)); > + > + rtx addr_symbol = SET_SRC (addr_set); > + gcc_assert (pcrel_external_address (addr_symbol, Pmode)); > + > + rtx load_set = PATTERN (load_insn); > + gcc_assert (GET_CODE (load_set) == SET); > + > + /* Make sure there are no references to the register being loaded > + between the two insns. */ > + rtx reg = SET_DEST (load_set); > + if (reg_used_between_p (reg, addr_insn, load_insn) > + || reg_set_between_p (reg, addr_insn, load_insn)) > + return; > + > + rtx mem = SET_SRC (load_set); > + machine_mode reg_mode = GET_MODE (reg); > + machine_mode mem_mode = GET_MODE (mem); > + rtx mem_inner = mem; > + unsigned int reg_regno = reg_or_subregno (reg); > + > + /* Handle the fact that LWA is a DS format instruction, but LWZ is a D > format > + instruction. If the mem load is a signed SImode (i.e. LWA would be > used) > + we set mem_mode to DImode so that pcrel_opt_valid_mem_p() will check > that > + the address will work for a DS-form instruction. If it won't work, we > skip > + the optimization. The float loads are all indexed so there are no > problems > + there. */ > + > + if (GET_CODE (mem) == SIGN_EXTEND && GET_MODE (XEXP (mem, 0)) == SImode) > + { > + if (!INT_REGNO_P (reg_regno)) > + return; > + > + mem_inner = XEXP (mem, 0); > + mem_mode = DImode; > + } > + > + else if (GET_CODE (mem) == SIGN_EXTEND > + || GET_CODE (mem) == ZERO_EXTEND > + || GET_CODE (mem) == FLOAT_EXTEND) > + { > + mem_inner = XEXP (mem, 0); > + mem_mode = GET_MODE (mem_inner); > + } > + > + if (!MEM_P (mem_inner)) > + return; > + > + /* Can we do PCREL_OPT for this reference? */ > + if (!pcrel_opt_valid_mem_p (reg, mem_mode, mem_inner)) > + return; > + > + /* Allocate a new PC-relative label, and update the load external address > + insn. > + > + If the register being loaded is different from the address register, we > + need to indicate both registers are set at the load of the address. > + > + (parallel [(set (reg load) > + (unspec [(symbol_ref addr_symbol) > + (const_int label_num)] > + UNSPEC_PCREL_OPT_LD_ADDR)) > + (set (reg addr) > + (unspec [(const_int 0)] > + UNSPEC_PCREL_OPT_LD_DATA))]) > + > + If the register being loaded is the same as the address register, we use > + an alternate form: > + > + (set (reg load) > + (unspec [(symbol_ref addr_symbol) > + (const_int label_num)] > + UNSPEC_PCREL_OPT_LD_SAME_REG)) */ > + unsigned int addr_regno = reg_or_subregno (addr_reg); > + rtx label_num = GEN_INT (++pcrel_opt_next_num); > + rtx reg_di = gen_rtx_REG (DImode, reg_regno); > + rtx addr_pattern; > + > + /* Create the load address, either using the pattern with an explicit > clobber > + if the address register is not the same as the register being loaded, or > + using the pattern that requires the address register to be the address > + loaded. */ > + if (addr_regno != reg_regno) > + addr_pattern = gen_pcrel_opt_ld_addr (addr_reg, addr_symbol, label_num, > + reg_di); > + else > + addr_pattern = gen_pcrel_opt_ld_addr_same_reg (addr_reg, addr_symbol, > + label_num); > + > + validate_change (addr_insn, &PATTERN (addr_insn), addr_pattern, false); > + > + /* Update the load insn. If the mem had a sign/zero/float extend, add that > + also after doing the UNSPEC. Add an explicit clobber of the external > + address register just to make it clear that the address register dies. > + > + (parallel [(set (reg:<MODE> data) > + (unspec:<MODE> [(mem (addr_reg) > + (reg:DI data) > + (const_int label_num)] > + UNSPEC_PCREL_OPT_LD_RELOC)) > + (clobber (reg:DI addr_reg))]) */ > + rtvec v_load = gen_rtvec (3, mem_inner, reg_di, label_num); > + rtx new_load = gen_rtx_UNSPEC (GET_MODE (mem_inner), v_load, > + UNSPEC_PCREL_OPT_LD_RELOC); > + > + if (GET_CODE (mem) != GET_CODE (mem_inner)) > + new_load = gen_rtx_fmt_e (GET_CODE (mem), reg_mode, new_load); > + > + rtx new_load_set = gen_rtx_SET (reg, new_load); > + rtx load_clobber = gen_rtx_CLOBBER (VOIDmode, > + (addr_regno == reg_regno > + ? gen_rtx_SCRATCH (Pmode) > + : addr_reg)); > + rtx new_load_pattern > + = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, new_load_set, load_clobber)); > + > + validate_change (load_insn, &PATTERN (load_insn), new_load_pattern, false); > + > + /* Attempt to apply the changes: */ > + if (!apply_change_group ()) > + { > + /* PCREL_OPT load optimization did not succeed. */ > + counters.failed_loads++; > + if (dump_file) > + fprintf (dump_file, > + "PCREL_OPT load failed (addr insn = %d, use insn = %d).\n", > + INSN_UID (addr_insn), > + INSN_UID (load_insn)); > + return; > + } > + > + /* PCREL_OPT load optimization succeeded. */ > + counters.loads++; > + if (next_nonnote_insn (addr_insn) == load_insn) > + counters.adjacent_loads++; > + > + if (dump_file) > + fprintf (dump_file, > + "PCREL_OPT load (addr insn = %d, use insn = %d).\n", > + INSN_UID (addr_insn), > + INSN_UID (load_insn)); > + > + /* Because we have set DF_DEFER_INSN_RESCAN, we have to explicitly do it > + after we have made changes to the insns. */ > + df_analyze (); > + > +} > + > +/* Optimize a PC-relative load address to be used in a store. Before calling > + this function, pcrel_opt_address () uses DF to make sure it is safe to do > + the PCREL_OPT optimization. > + > + Convert insns of the form: > + > + (set (reg:DI addr) > + (symbol_ref:DI "ext_symbol")) > + > + ... > + > + (set (mem:<MODE> (reg:DI addr)) > + (reg:<MODE> value)) > + > + into: > + > + (parallel [(set (reg:DI addr) > + (unspec:DI [(symbol_ref:DI "ext_symbol") > + (const_int label_num)] > + UNSPEC_PCREL_OPT_ST_ADDR)) > + (use (reg:<MODE> value))]) > + > + ... > + > + (parallel [(set (mem:<MODE> (reg:DI addr)) > + (unspec:<MODE> [(reg:<MODE>) > + (const_int label_num)] > + UNSPEC_PCREL_OPT_ST_RELOC)) > + (clobber (reg:DI addr))]) > + > + The UNSPEC_PCREL_OPT_ST_ADDR insn will generate the load address plus a > + definition of a label (.Lpcrel<n>), while the UNSPEC_PCREL_OPT_ST_RELOC > insn > + will generate the .reloc to tell the linker to tie the load address and > load > + using that address together. > + > + pld b,ext_symbol@got@pcrel > + .Lpcrel1: > + > + ... > + > + .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) > + stw r,0(b) > + > + If ext_symbol is defined in another object file in the main program and we > + are linking the main program, the linker will convert the above > instructions > + to: > + > + pstwz r,ext_symbol@got@pcrel > + > + ... > + > + nop */ > + > +static void > +pcrel_opt_store (rtx_insn *addr_insn, /* insn loading address. */ > + rtx_insn *store_insn) /* insn using address. */ > +{ > + rtx addr_old_set = PATTERN (addr_insn); > + gcc_assert (GET_CODE (addr_old_set) == SET); > + > + rtx addr_reg = SET_DEST (addr_old_set); > + gcc_assert (base_reg_operand (addr_reg, Pmode)); > + > + rtx addr_symbol = SET_SRC (addr_old_set); > + gcc_assert (pcrel_external_address (addr_symbol, Pmode)); > + > + rtx store_set = PATTERN (store_insn); > + gcc_assert (GET_CODE (store_set) == SET); > + > + rtx mem = SET_DEST (store_set); > + if (!MEM_P (mem)) > + return; > + > + machine_mode mem_mode = GET_MODE (mem); > + rtx reg = SET_SRC (store_set); > + > + /* Don't allow storing the address of the external variable. */ > + if (reg_or_subregno (reg) == reg_or_subregno (addr_reg)) > + return; > + > + /* Can we do PCREL_OPT for this reference? */ > + if (!pcrel_opt_valid_mem_p (reg, mem_mode, mem)) > + return; > + > + /* Allocate a new PC-relative label, and update the load address insn. > + > + (parallel [(set (reg addr) > + (unspec [(symbol_ref symbol) > + (const_int label_num)] > + UNSPEC_PCREL_OPT_ST_ADDR)) > + (use (reg store))]) > + */ > + rtx label_num = GEN_INT (++pcrel_opt_next_num); > + rtvec v_addr = gen_rtvec (2, addr_symbol, label_num); > + rtx addr_unspec = gen_rtx_UNSPEC (Pmode, v_addr, > + UNSPEC_PCREL_OPT_ST_ADDR); > + rtx addr_new_set = gen_rtx_SET (addr_reg, addr_unspec); > + rtx addr_use = gen_rtx_USE (VOIDmode, reg); > + rtx addr_new_pattern > + = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, addr_new_set, addr_use)); > + > + validate_change (addr_insn, &PATTERN (addr_insn), addr_new_pattern, false); > + > + /* Update the store insn. Add an explicit clobber of the external address > + register just to be sure there are no additional uses of the address > + register. > + > + (parallel [(set (mem (addr_reg) > + (unspec:<MODE> [(reg) > + (const_int label_num)] > + UNSPEC_PCREL_OPT_ST_RELOC)) > + (clobber (reg:DI addr_reg))]) */ > + rtvec v_store = gen_rtvec (2, reg, label_num); > + rtx new_store = gen_rtx_UNSPEC (mem_mode, v_store, > + UNSPEC_PCREL_OPT_ST_RELOC); > + > + rtx new_store_set = gen_rtx_SET (mem, new_store); > + rtx store_clobber = gen_rtx_CLOBBER (VOIDmode, addr_reg); > + rtx new_store_pattern > + = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, new_store_set, > store_clobber)); > + > + validate_change (store_insn, &PATTERN (store_insn), new_store_pattern, > false); > + > + /* Attempt to apply the changes: */ > + if (!apply_change_group ()) > + { > + /* PCREL_OPT store failed. */ > + counters.failed_stores++; > + if (dump_file) > + fprintf (dump_file, > + "PCREL_OPT store failed (addr insn = %d, use insn = %d).\n", > + INSN_UID (addr_insn), > + INSN_UID (store_insn)); > + return; > + } > + > + /* PCREL_OPT store succeeded. */ > + counters.stores++; > + if (next_nonnote_insn (addr_insn) == store_insn) > + counters.adjacent_stores++; > + > + if (dump_file) > + fprintf (dump_file, > + "PCREL_OPT store (addr insn = %d, use insn = %d).\n", > + INSN_UID (addr_insn), > + INSN_UID (store_insn)); > + > + /* Because we have set DF_DEFER_INSN_RESCAN, we have to explicitly do it > + after we have made changes to the insns. */ > + df_analyze(); > + > +} > + > +/* Return the register used as the base register of MEM, if the instruction > has > + a pc-relative form. We look for BSWAP to rule out LFIWAX/LFIWZX/STFIWX, > and > + ROTATE/VEC_SELECT are RTX_EXTRA not RTX_UNARY which rules out lxvd2x. This > + excludes instructions that do not have a pc-relative form. */ > + > +static rtx > +get_mem_base_reg (rtx mem) > +{ > + const char * fmt; > + > + while (!MEM_P (mem)) > + { > + switch (GET_CODE (mem)) > + { > + case BSWAP: /* LFIWAX/LFIWZX/STFIWX. */ > + case UNSPEC: > + case UNSPEC_VOLATILE: /* Leave this alone for obvious reasons. */ > + case ROTATE: /* lxvd2x. */ > + case VEC_SELECT: > + return NULL_RTX; > + default: ; > + } > + if (GET_RTX_CLASS (GET_CODE (mem)) != RTX_UNARY) > + return NULL_RTX; > + /* Rule out LFIWAX/LFIWZX/STFIWX. */ > + if (GET_CODE (mem) == BSWAP) > + return NULL_RTX; > + fmt = GET_RTX_FORMAT (GET_CODE (mem)); > + if (fmt[0] != 'e') > + return NULL_RTX; > + mem = XEXP (mem, 0); > + if (mem == NULL_RTX ) > + return NULL_RTX; > + } > + > + if (!MEM_SIZE_KNOWN_P (mem)) > + return NULL_RTX; > + > + rtx addr_rtx = (XEXP (mem, 0)); > + if (GET_CODE (addr_rtx) == PRE_MODIFY) > + addr_rtx = XEXP (addr_rtx, 1); > + > + while (GET_CODE (addr_rtx) == PLUS > + && CONST_INT_P (XEXP (addr_rtx, 1))) > + addr_rtx = XEXP (addr_rtx, 0); > + > + if (!REG_P (addr_rtx)) > + return NULL_RTX; > + > + return addr_rtx; > +} > + > +/* Check whether INSN contains a reference to REGNO that will inhibit the > + PCREL_OPT optimization. If TYPE is a load or store instruction, return > true > + if there is a definition of REGNO. If TYPE is a load instruction, then > + return true of there is a use of REGNO. */ > + > +static bool > +insn_references_regno_p (rtx_insn *insn, unsigned int regno, > + enum attr_type type) > +{ > + struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); > + /* If we don't have the insn_info for some reason, do not attempt to > optimize > + this reference. */ > + if (!insn_info) > + return true; > + > + df_ref ref; > + > + /* Return true if there is a definition of REGNO. */ > + for (ref = DF_INSN_INFO_DEFS (insn_info); ref; ref = DF_REF_NEXT_LOC (ref)) > + if (DF_REF_REGNO (ref) == regno) > + return true; > + > + /* If type is a load, return true if there is a use of REGNO. */ > + if (type == TYPE_LOAD > + || type == TYPE_FPLOAD > + || type == TYPE_VECLOAD) > + for (ref = DF_INSN_INFO_USES (insn_info); ref; ref = DF_REF_NEXT_LOC > (ref)) > + if (DF_REF_REGNO (ref) == regno) > + return true; > + > + return false; > +} > + > +/* Given an insn that loads up a base register with the address of an > + external symbol, see if we can optimize it with the PCREL_OPT > + optimization. > + > + DF is used to make sure that there is exactly one definition and one > + non-debug use of the address register defined by the insn. The use insn > must > + be a non-prefix insn, and must also be in the same basic block as the > address > + insn. > + > + ADDR_INSN is the insn that loads the external symbol address. */ > + > +static void > +pcrel_opt_address (rtx_insn *addr_insn) > +{ > + counters.extern_addrs++; > + > + /* Do some basic validation. */ > + rtx addr_set = PATTERN (addr_insn); > + if (GET_CODE (addr_set) != SET) > + return; > + > + rtx addr_reg = SET_DEST (addr_set); > + rtx addr_symbol = SET_SRC (addr_set); > + > + if (!base_reg_operand (addr_reg, Pmode) > + || !pcrel_external_address (addr_symbol, Pmode)) > + return; > + > + /* The address register must have exactly one definition. */ > + struct df_insn_info *insn_info = DF_INSN_INFO_GET (addr_insn); > + if (!insn_info) > + return; > + > + df_ref def = df_single_def (insn_info); > + if (!def) > + return; > + > + /* Make sure there is at least one use. */ > + df_link *chain = DF_REF_CHAIN (def); > + if (!chain || !chain->ref) > + return; > + > + /* Get the insn of the possible load or store. */ > + rtx_insn *use_insn = DF_REF_INSN (chain->ref); > + > + /* Ensure there are no other uses. */ > + for (chain = chain->next; chain; chain = chain->next) > + if (chain->ref && DF_REF_INSN_INFO (chain->ref)) > + { > + gcc_assert (DF_REF_INSN (chain->ref)); > + if (NONDEBUG_INSN_P (DF_REF_INSN (chain->ref))) > + return; > + } > + > + /* The use instruction must be a single non-prefixed instruction. */ > + if (get_attr_length (use_insn) != 4) > + return; > + > + /* The address and the memory operation must be in the same basic block. > */ > + if (BLOCK_FOR_INSN (use_insn) != BLOCK_FOR_INSN (addr_insn)) > + return; > + > + /* If this isn't a simple SET, skip doing the optimization. */ > + if (GET_CODE (PATTERN (use_insn)) != SET) > + return; > + > + enum attr_type use_insn_type = get_attr_type (use_insn); > + unsigned int use_regno; > + > + /* Make sure the use_insn is using addr_reg as its base register > + for the load or store, and determine the regno for the register > + used in the use_insn. */ > + rtx use_dest, use_src; > + switch (use_insn_type) > + { > + case TYPE_LOAD: > + case TYPE_FPLOAD: > + case TYPE_VECLOAD: > + /* Make sure our address register is the same register used in the > + base address of the load. */ > + if (addr_reg != get_mem_base_reg (SET_SRC (PATTERN (use_insn)))) > + return; > + /* Make sure we are setting a register before we look at REGNO. */ > + use_dest = SET_DEST (PATTERN (use_insn)); > + if (!register_operand (use_dest, GET_MODE (use_dest))) > + return; > + use_regno = REGNO (use_dest); > + break; > + case TYPE_STORE: > + case TYPE_FPSTORE: > + case TYPE_VECSTORE: > + /* Make sure our address register is the same register used in the > + base address of the store. */ > + if (addr_reg != get_mem_base_reg (SET_DEST (PATTERN (use_insn)))) > + return; > + /* Make sure this is a register before we look at REGNO. */ > + use_src = SET_SRC (PATTERN (use_insn)); > + if (!register_operand (use_src, GET_MODE (use_src))) > + return; > + use_regno = REGNO (use_src); > + break; > + default: > + /* We can only optimize loads and stores. Ignore everything else. */ > + return; > + } > + > + rtx_insn *insn; > + for (insn = NEXT_INSN (addr_insn); > + insn != use_insn; > + insn = NEXT_INSN (insn)) > + { > + /* If we see a call, do not do the PCREL_OPT optimization. */ > + if (CALL_P (insn)) > + return; > + > + /* For a normal insn, see if it is a load or store. */ > + if (NONDEBUG_INSN_P (insn) > + && GET_CODE (PATTERN (insn)) != USE > + && GET_CODE (PATTERN (insn)) != CLOBBER) > + { > + switch (get_attr_type (insn)) > + { > + case TYPE_LOAD: > + /* While load of the external address is a 'load' for scheduling > + purposes, it should be safe to allow loading other external > + addresses between the load of the external address we are > + currently looking at and the load or store using that > + address. */ > + if (get_attr_loads_external_address (insn) > + == LOADS_EXTERNAL_ADDRESS_YES) > + break; > + /* fall through */ > + > + case TYPE_FPLOAD: > + case TYPE_VECLOAD: > + /* Don't do the PCREL_OPT store optimization if there is a load > + operation. For example, the load might be trying to load the > + value being stored in between getting the address and doing > + the store. */ > + if (use_insn_type == TYPE_STORE > + || use_insn_type == TYPE_FPSTORE > + || use_insn_type == TYPE_VECSTORE) > + return; > + break; > + > + case TYPE_STORE: > + case TYPE_FPSTORE: > + case TYPE_VECSTORE: > + /* Don't do the PCREL_OPT load optimization if there is a store > + operation. Perhaps the store might be to the global variable > + through a pointer. */ > + return; > + > + case TYPE_LOAD_L: > + case TYPE_STORE_C: > + case TYPE_HTM: > + case TYPE_HTMSIMPLE: > + /* Don't do the optimization through atomic operations. */ > + return; > + > + default: > + break; > + } > + } > + > + /* Check for invalid references of the non-address register that is > + used in the load or store instruction. */ > + if (insn_references_regno_p (insn, use_regno, use_insn_type)) > + return; > + } > + > + /* Is this a load or a store? */ > + switch (use_insn_type) > + { > + case TYPE_LOAD: > + case TYPE_FPLOAD: > + case TYPE_VECLOAD: > + pcrel_opt_load (addr_insn, use_insn); > + break; > + > + case TYPE_STORE: > + case TYPE_FPSTORE: > + case TYPE_VECSTORE: > + pcrel_opt_store (addr_insn, use_insn); > + break; > + > + default: > + gcc_unreachable (); > + } > +} > + > +/* Optimize pcrel external variable references. */ > + > +static unsigned int > +pcrel_opt_pass (function *fun) > +{ > + basic_block bb; > + rtx_insn *insn, *curr_insn = 0; > + > + memset (&counters, 0, sizeof (counters)); > + > + /* Dataflow analysis for use-def chains. However we have to specify both > UD > + and DU as otherwise when we make changes to insns for the PCREL_OPT there > + will be dangling references. */ > + df_set_flags (DF_RD_PRUNE_DEAD_DEFS); > + df_chain_add_problem (DF_DU_CHAIN + DF_UD_CHAIN); > + df_note_add_problem (); > + df_analyze (); > + > + /* Set the defer flag as our pattern of operation will be to modify two > insns, > + then call df_analyze (). */ > + df_set_flags (DF_DEFER_INSN_RESCAN | DF_LR_RUN_DCE); > + > + if (dump_file) > + fprintf (dump_file, "\n"); > + > + /* Look at each basic block to see if there is a load of an external > + variable's external address, and a single load/store using that external > + address. */ > + FOR_ALL_BB_FN (bb, fun) > + { > + FOR_BB_INSNS_SAFE (bb, insn, curr_insn) > + { > + if (NONJUMP_INSN_P (insn) > + && single_set (insn) > + && get_attr_loads_external_address (insn) > + == LOADS_EXTERNAL_ADDRESS_YES) > + pcrel_opt_address (insn); > + } > + } > + > + if (dump_file) > + { > + fprintf (dump_file, > + "\n# of loads of an address of an external symbol = %lu\n", > + counters.extern_addrs); > + > + fprintf (dump_file, "# of PCREL_OPT loads = %lu (adjacent %lu)\n", > + counters.loads, counters.adjacent_loads); > + > + if (counters.failed_loads) > + fprintf (dump_file, "# of failed PCREL_OPT loads = %lu\n", > + counters.failed_loads); > + > + fprintf (dump_file, "# of PCREL_OPT stores = %lu (adjacent %lu)\n", > + counters.stores, counters.adjacent_stores); > + > + if (counters.failed_stores) > + fprintf (dump_file, "# of failed PCREL_OPT stores = %lu\n", > + counters.failed_stores); > + > + fprintf (dump_file, "\n"); > + } > + > + df_remove_problem (df_chain); > + df_process_deferred_rescans (); > + df_set_flags (DF_RD_PRUNE_DEAD_DEFS | DF_LR_RUN_DCE); > + df_analyze (); > + return 0; > +} > + > +/* Optimize pc-relative references for the new PCREL_OPT pass. */ > +const pass_data pass_data_pcrel_opt = > +{ > + RTL_PASS, /* type. */ > + "pcrel_opt", /* name. */ > + OPTGROUP_NONE, /* optinfo_flags. */ > + TV_NONE, /* tv_id. */ > + 0, /* properties_required. */ > + 0, /* properties_provided. */ > + 0, /* properties_destroyed. */ > + 0, /* todo_flags_start. */ > + TODO_df_finish, /* todo_flags_finish. */ > +}; > + > +/* Pass data structures. */ > +class pcrel_opt : public rtl_opt_pass > +{ > +public: > + pcrel_opt (gcc::context *ctxt) > + : rtl_opt_pass (pass_data_pcrel_opt, ctxt) > + {} > + > + ~pcrel_opt (void) > + {} > + > + /* opt_pass methods: */ > + virtual bool gate (function *) > + { > + return (TARGET_PCREL && TARGET_PCREL_OPT && optimize); > + } > + > + virtual unsigned int execute (function *fun) > + { > + return pcrel_opt_pass (fun); > + } > + > + opt_pass *clone () > + { > + return new pcrel_opt (m_ctxt); > + } > +}; > + > +rtl_opt_pass * > +make_pass_pcrel_opt (gcc::context *ctxt) > +{ > + return new pcrel_opt (ctxt); > +} > diff --git a/gcc/config/rs6000/rs6000-protos.h > b/gcc/config/rs6000/rs6000-protos.h > index d9d44fe9821..203660b0a78 100644 > --- a/gcc/config/rs6000/rs6000-protos.h > +++ b/gcc/config/rs6000/rs6000-protos.h > @@ -193,10 +193,13 @@ extern enum insn_form address_to_insn_form (rtx, > machine_mode, > enum non_prefixed_form); > extern bool address_is_non_pfx_d_or_x (rtx addr, machine_mode mode, > enum non_prefixed_form > non_prefix_format); > +extern bool pcrel_opt_valid_mem_p (rtx, machine_mode, rtx); > +enum non_prefixed_form reg_to_non_prefixed (rtx reg, machine_mode mode); > extern bool prefixed_load_p (rtx_insn *); > extern bool prefixed_store_p (rtx_insn *); > extern bool prefixed_paddi_p (rtx_insn *); > extern void rs6000_asm_output_opcode (FILE *); > +extern void output_pcrel_opt_reloc (rtx); > extern void rs6000_final_prescan_insn (rtx_insn *, rtx [], int); > extern int rs6000_adjust_insn_length (rtx_insn *, int); > > @@ -309,6 +312,7 @@ namespace gcc { class context; } > class rtl_opt_pass; > > extern rtl_opt_pass *make_pass_analyze_swaps (gcc::context *); > +extern rtl_opt_pass *make_pass_pcrel_opt (gcc::context *); > extern bool rs6000_sum_of_two_registers_p (const_rtx expr); > extern bool rs6000_quadword_masked_address_p (const_rtx exp); > extern rtx rs6000_gen_lvx (enum machine_mode, rtx, rtx); > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c > index ec068c58aa5..5edbe3293c6 100644 > --- a/gcc/config/rs6000/rs6000.c > +++ b/gcc/config/rs6000/rs6000.c > @@ -1173,7 +1173,6 @@ static bool rs6000_secondary_reload_move (enum > rs6000_reg_type, > machine_mode, > secondary_reload_info *, > bool); > -static enum non_prefixed_form reg_to_non_prefixed (rtx reg, machine_mode > mode); > rtl_opt_pass *make_pass_analyze_swaps (gcc::context*); > > /* Hash table stuff for keeping track of TOC entries. */ > @@ -4452,6 +4451,9 @@ rs6000_option_override_internal (bool global_init_p) > rs6000_isa_flags &= ~OPTION_MASK_MMA; > } > > + if (!TARGET_PCREL && TARGET_PCREL_OPT) > + rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT; > + > if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) > rs6000_print_isa_options (stderr, 0, "after subtarget", > rs6000_isa_flags); > > @@ -8985,8 +8987,57 @@ rs6000_delegitimize_address (rtx orig_x) > { > rtx x, y, offset; > > - if (GET_CODE (orig_x) == UNSPEC && XINT (orig_x, 1) == UNSPEC_FUSION_GPR) > - orig_x = XVECEXP (orig_x, 0, 0); > + /* UNSPEC_FUSION_GPR is created by the peephole2 for power8 fusion. It > + encodes loading up the high part of the address of a TOC reference along > + with a load of a GPR using the same base register used for the load. We > + return the original SYMBOL_REF. > + > + (set (reg:INT1 <reg> > + (unspec:INT1 [<combined-address>] UNSPEC_FUSION_GPR))) > + > + UNSPEC_PCREL_OPT_LD_ADDR is used by the power10 PCREL_OPT pass. These > + UNSPECs include the external SYMBOL_REF along with the value being > loaded. > + We return the original SYMBOL_REF. > + > + (parallel [(set (reg:DI <base-reg>) > + (unspec:DI [(symbol_ref <symbol>) > + (const_int <marker>)] > + UNSPEC_PCREL_OPT_LD_ADDR)) > + (set (reg:DI <load-reg>) > + (unspec:DI [(const_int 0)] > + UNSPEC_PCREL_OPT_LD_DATA))]) > + > + UNSPEC_PCREL_OPT_LD_SAME_REG is an alternative that is used if the > + GPR being loaded is the same as the GPR used to hold the external > address. > + > + (set (reg:DI <base-reg>) > + (unspec:DI [(symbol_ref <symbol>) > + (const_int <marker>)] > + UNSPEC_PCREL_OPT_LD_SAME_REG)) > + > + UNSPEC_PCREL_OPT_ST_ADDR is used by the power10 PCREL_OPT pass. This > + UNSPEC include the external SYMBOL_REF along with the value being > loaded. > + We return the original SYMBOL_REF. > + > + (parallel [(set (reg:DI <base-reg>) > + (unspec:DI [(symbol_ref <symbol>) > + (const_int <marker>)] > + UNSPEC_PCREL_OPT_ST_ADDR)) > + (use (reg <store-reg>))]) */ > + > + if (GET_CODE (orig_x) == UNSPEC) > + switch (XINT (orig_x, 1)) > + { > + case UNSPEC_FUSION_GPR: > + case UNSPEC_PCREL_OPT_LD_ADDR: > + case UNSPEC_PCREL_OPT_LD_SAME_REG: > + case UNSPEC_PCREL_OPT_ST_ADDR: > + orig_x = XVECEXP (orig_x, 0, 0); > + break; > + > + default: > + break; > + } > > orig_x = delegitimize_mem_from_attrs (orig_x); > > @@ -23788,6 +23839,7 @@ static struct rs6000_opt_mask const > rs6000_opt_masks[] = > { "mulhw", OPTION_MASK_MULHW, false, true > }, > { "multiple", OPTION_MASK_MULTIPLE, > false, true }, > { "pcrel", OPTION_MASK_PCREL, false, true > }, > + { "pcrel-opt", OPTION_MASK_PCREL_OPT, false, true > }, > { "popcntb", OPTION_MASK_POPCNTB, false, true > }, > { "popcntd", OPTION_MASK_POPCNTD, false, true > }, > { "power8-fusion", OPTION_MASK_P8_FUSION, false, true > }, > @@ -25932,6 +25984,32 @@ address_is_non_pfx_d_or_x (rtx addr, machine_mode > mode, > return false; > } > > +/* Return true if an REG with a given MODE is loaded from or stored into a > MEM > + location uses a non-prefixed D/DS/DQ-form address. This is used to > validate > + the load or store with the PCREL_OPT optimization to make sure it is an > + instruction that can be optimized. > + > + We need to specify the MODE separately from the REG to allow for loads > that > + include zero/sign/float extension. */ > + > +bool > +pcrel_opt_valid_mem_p (rtx reg, machine_mode mode, rtx mem) > +{ > + /* If the instruction is indexed only like LFIWAX/LXSIWAX we cannot do the > + PCREL_OPT optimization. */ > + enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mode); > + if (non_prefixed == NON_PREFIXED_X) > + return false; > + > + /* Check if this is a non-prefixed D/DS/DQ-form instruction. */ > + rtx addr = XEXP (mem, 0); > + enum insn_form iform = address_to_insn_form (addr, mode, non_prefixed); > + return (iform == INSN_FORM_BASE_REG > + || iform == INSN_FORM_D > + || iform == INSN_FORM_DS > + || iform == INSN_FORM_DQ); > +} > + > /* Helper function to see if we're potentially looking at lfs/stfs. > - PARALLEL containing a SET and a CLOBBER > - stfs: > @@ -25990,7 +26068,7 @@ is_lfs_stfs_insn (rtx_insn *insn) > /* Helper function to take a REG and a MODE and turn it into the non-prefixed > instruction format (D/DS/DQ) used for offset memory. */ > > -static enum non_prefixed_form > +enum non_prefixed_form > reg_to_non_prefixed (rtx reg, machine_mode mode) > { > /* If it isn't a register, use the defaults. */ > @@ -26213,11 +26291,34 @@ void > rs6000_asm_output_opcode (FILE *stream) > { > if (next_insn_prefixed_p) > - fprintf (stream, "p"); > + { > + fprintf (stream, "p"); > + > + /* Reset the flag in the case where there are separate insn lines in > the > + sequence, so the 'p' is only emitted for the first line. This shows > up > + when we are doing the PCREL_OPT optimization, in that the label > created > + with %r<n> would have a leading 'p' printed. */ > + next_insn_prefixed_p = false; > + } > > return; > } > > +/* Emit the relocation to tie the next instruction to a previous instruction > + that loads up an external address. This is used to do the PCREL_OPT > + optimization. Note, the label is generated after the PLD of the got > + pc-relative address to allow for the assembler to insert NOPs before the > PLD > + instruction. The operand is a constant integer that is the label > + number. */ > + > +void > +output_pcrel_opt_reloc (rtx label_num) > +{ > + rtx operands[1] = { label_num }; > + output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)", > + operands); > +} > + > /* Adjust the length of an INSN. LENGTH is the currently-computed length and > should be adjusted to reflect any required changes. This macro is used > when > there is some systematic length adjustment required that would be > difficult > diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md > index a1315523fec..ccecad321f6 100644 > --- a/gcc/config/rs6000/rs6000.md > +++ b/gcc/config/rs6000/rs6000.md > @@ -292,6 +292,10 @@ (define_attr "prefixed" "no,yes" > > (const_string "no"))) > > +;; Whether an insn loads an external address for the PCREL_OPT optimizaton. > +(define_attr "loads_external_address" "no,yes" > + (const_string "no")) > + > ;; Return the number of real hardware instructions in a combined insn. If it > ;; is 0, just use the length / 4. > (define_attr "num_insns" "" (const_int 0)) > @@ -10243,7 +10247,8 @@ (define_insn "*pcrel_extern_addr" > "TARGET_PCREL" > "ld %0,%a1" > [(set_attr "prefixed" "yes") > - (set_attr "type" "load")]) > + (set_attr "type" "load") > + (set_attr "loads_external_address" "yes")]) > > ;; TOC register handling. > > @@ -14928,3 +14933,4 @@ (define_insn "*cmpeqb_internal" > (include "crypto.md") > (include "htm.md") > (include "fusion.md") > +(include "pcrel-opt.md") > diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt > index ae9e91e77cc..0dbdf753673 100644 > --- a/gcc/config/rs6000/rs6000.opt > +++ b/gcc/config/rs6000/rs6000.opt > @@ -609,6 +609,10 @@ mpcrel > Target Mask(PCREL) Var(rs6000_isa_flags) > Generate (do not generate) pc-relative memory addressing. > > +mpcrel-opt > +Target Undocumented Mask(PCREL_OPT) Var(rs6000_isa_flags) > +Generate (do not generate) pc-relative memory optimizations for externals. > + > mmma > Target Mask(MMA) Var(rs6000_isa_flags) > Generate (do not generate) MMA instructions. > diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000 > index 1541a653738..44f7ffb35fe 100644 > --- a/gcc/config/rs6000/t-rs6000 > +++ b/gcc/config/rs6000/t-rs6000 > @@ -23,6 +23,10 @@ TM_H += $(srcdir)/config/rs6000/rs6000-cpus.def > TM_H += $(srcdir)/config/rs6000/rs6000-modes.h > PASSES_EXTRA += $(srcdir)/config/rs6000/rs6000-passes.def > > +rs6000-pcrel-opt.o: $(srcdir)/config/rs6000/rs6000-pcrel-opt.c > + $(COMPILE) $< > + $(POSTCOMPILE) > + > rs6000-c.o: $(srcdir)/config/rs6000/rs6000-c.c > $(COMPILE) $< > $(POSTCOMPILE) > @@ -90,4 +94,5 @@ MD_INCLUDES = $(srcdir)/config/rs6000/rs64.md \ > $(srcdir)/config/rs6000/crypto.md \ > $(srcdir)/config/rs6000/htm.md \ > $(srcdir)/config/rs6000/dfp.md \ > - $(srcdir)/config/rs6000/fusion.md > + $(srcdir)/config/rs6000/fusion.md \ > + $(srcdir)/config/rs6000/pcrel-opt.md > diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-inc-di.c > b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-inc-di.c > new file mode 100644 > index 00000000000..c82041c9dc6 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-inc-di.c > @@ -0,0 +1,17 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_pcrel } */ > +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */ > + > +#define TYPE unsigned int > + > +/* Test whether using an external variable twice (doing an increment) > prevents > + the PCREL_OPT optimization. */ > +extern TYPE ext; > + > +void > +inc (void) > +{ > + ext++; /* No PCREL_OPT (uses address twice). */ > +} > + > +/* { dg-final { scan-assembler-not "R_PPC64_PCREL_OPT" } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-df.c > b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-df.c > new file mode 100644 > index 00000000000..d35862fcb6e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-df.c > @@ -0,0 +1,36 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_pcrel } */ > +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */ > + > +#define TYPE double > +#define LARGE 0x20000 > + > +/* Test whether we get the right number of PCREL_OPT optimizations for > + double. */ > +extern TYPE ext[]; > + > +TYPE > +get (void) > +{ > + return ext[0]; /* PCREL_OPT relocation. */ > +} > + > +TYPE > +get2 (void) > +{ > + return ext[2]; /* PCREL_OPT relocation. */ > +} > + > +TYPE > +get_large (void) > +{ > + return ext[LARGE]; /* No PCREL_OPT (load is prefixed). */ > +} > + > +TYPE > +get_variable (unsigned long n) > +{ > + return ext[n]; /* No PCREL_OPT (load is indexed). */ > +} > + > +/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT" 2 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-di.c > b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-di.c > new file mode 100644 > index 00000000000..7e1ff99f20e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-di.c > @@ -0,0 +1,42 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_pcrel } */ > +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */ > + > +#define TYPE long long > +#define LARGE 0x20000 > + > +/* Test whether we get the right number of PCREL_OPT optimizations for long > + long. */ > +extern TYPE ext[]; > + > +TYPE > +get (void) > +{ > + return ext[0]; /* PCREL_OPT relocation. */ > +} > + > +TYPE > +get2 (void) > +{ > + return ext[2]; /* PCREL_OPT relocation. */ > +} > + > +TYPE > +get_large (void) > +{ > + return ext[LARGE]; /* No PCREL_OPT (load is prefixed). */ > +} > + > +TYPE > +get_variable (unsigned long n) > +{ > + return ext[n]; /* No PCREL_OPT (load is indexed). */ > +} > + > +double > +get_double (void) > +{ > + return (double) ext[0]; /* PCREL_OPT relocation. */ > +} > + > +/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT" 3 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-hi.c > b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-hi.c > new file mode 100644 > index 00000000000..4143aeb7371 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-hi.c > @@ -0,0 +1,42 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_pcrel } */ > +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */ > + > +#define TYPE unsigned short > +#define LARGE 0x20000 > + > +/* Test whether we get the right number of PCREL_OPT optimizations for > unsigned > + short. */ > +extern TYPE ext[]; > + > +TYPE > +get (void) > +{ > + return ext[0]; /* PCREL_OPT relocation. */ > +} > + > +TYPE > +get2 (void) > +{ > + return ext[2]; /* PCREL_OPT relocation. */ > +} > + > +TYPE > +get_large (void) > +{ > + return ext[LARGE]; /* No PCREL_OPT (load is prefixed). */ > +} > + > +TYPE > +get_variable (unsigned long n) > +{ > + return ext[n]; /* No PCREL_OPT (load is indexed). */ > +} > + > +double > +get_double (void) > +{ > + return (double) ext[0]; /* No PCREL_OPT (LXSIHZX is indexed). */ > +} > + > +/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT" 2 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-qi.c > b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-qi.c > new file mode 100644 > index 00000000000..30d3236f95c > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-qi.c > @@ -0,0 +1,42 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_pcrel } */ > +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */ > + > +#define TYPE unsigned char > +#define LARGE 0x20000 > + > +/* Test whether we get the right number of PCREL_OPT optimizations for > unsigned > + char. */ > +extern TYPE ext[]; > + > +TYPE > +get (void) > +{ > + return ext[0]; /* PCREL_OPT relocation. */ > +} > + > +TYPE > +get2 (void) > +{ > + return ext[2]; /* PCREL_OPT relocation. */ > +} > + > +TYPE > +get_large (void) > +{ > + return ext[LARGE]; /* No PCREL_OPT (load is prefixed). */ > +} > + > +TYPE > +get_variable (unsigned long n) > +{ > + return ext[n]; /* No PCREL_OPT (load is indexed). */ > +} > + > +double > +get_double (void) > +{ > + return (double) ext[0]; /* No PCREL_OPT (LXSIBZX is indexed). */ > +} > + > +/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT" 2 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-sf.c > b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-sf.c > new file mode 100644 > index 00000000000..9d1e2a1956f > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-sf.c > @@ -0,0 +1,42 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_pcrel } */ > +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */ > + > +#define TYPE float > +#define LARGE 0x20000 > + > +/* Test whether we get the right number of PCREL_OPT optimizations for > + float. */ > +extern TYPE ext[]; > + > +TYPE > +get (void) > +{ > + return ext[0]; /* PCREL_OPT relocation. */ > +} > + > +TYPE > +get2 (void) > +{ > + return ext[2]; /* PCREL_OPT relocation. */ > +} > + > +TYPE > +get_large (void) > +{ > + return ext[LARGE]; /* No PCREL_OPT (load is prefixed). */ > +} > + > +TYPE > +get_variable (unsigned long n) > +{ > + return ext[n]; /* No PCREL_OPT (load is indexed). */ > +} > + > +double > +get_double (void) > +{ > + return (double) ext[0]; /* PCREL_OPT relocation. */ > +} > + > +/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT" 3 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-si.c > b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-si.c > new file mode 100644 > index 00000000000..17be6fa1778 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-si.c > @@ -0,0 +1,41 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_pcrel } */ > +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */ > + > +#define TYPE int > +#define LARGE 0x20000 > + > +/* Test whether we get the right number of PCREL_OPT optimizations for int. > */ > +extern TYPE ext[]; > + > +TYPE > +get (void) > +{ > + return ext[0]; /* PCREL_OPT relocation. */ > +} > + > +TYPE > +get2 (void) > +{ > + return ext[2]; /* PCREL_OPT relocation. */ > +} > + > +TYPE > +get_large (void) > +{ > + return ext[LARGE]; /* No PCREL_OPT (load is prefixed). */ > +} > + > +TYPE > +get_variable (unsigned long n) > +{ > + return ext[n]; /* No PCREL_OPT (load is indexed). */ > +} > + > +double > +get_double (void) > +{ > + return (double) ext[0]; /* No PCREL_OPT (LFIWAX is indexed). */ > +} > + > +/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT" 2 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-vector.c > b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-vector.c > new file mode 100644 > index 00000000000..8c12aea5acd > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-vector.c > @@ -0,0 +1,36 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_pcrel } */ > +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */ > + > +#define TYPE vector double > +#define LARGE 0x20000 > + > +/* Test whether we get the right number of PCREL_OPT optimizations for > + vector double. */ > +extern TYPE ext[]; > + > +TYPE > +get (void) > +{ > + return ext[0]; /* PCREL_OPT relocation. */ > +} > + > +TYPE > +get2 (void) > +{ > + return ext[2]; /* PCREL_OPT relocation. */ > +} > + > +TYPE > +get_large (void) > +{ > + return ext[LARGE]; /* No PCREL_OPT (load is prefixed). */ > +} > + > +TYPE > +get_variable (unsigned long n) > +{ > + return ext[n]; /* No PCREL_OPT (load is indexed). */ > +} > + > +/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT" 2 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-df.c > b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-df.c > new file mode 100644 > index 00000000000..d795d35d8de > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-df.c > @@ -0,0 +1,36 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_pcrel } */ > +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */ > + > +#define TYPE double > +#define LARGE 0x20000 > + > +/* Test whether we get the right number of PCREL_OPT optimizations for > + double. */ > +extern TYPE ext[]; > + > +void > +store (TYPE a) > +{ > + ext[0] = a; /* PCREL_OPT relocation. */ > +} > + > +void > +store2 (TYPE a) > +{ > + ext[2] = a; /* PCREL_OPT relocation. */ > +} > + > +void > +store_large (TYPE a) > +{ > + ext[LARGE] = a; /* No PCREL_OPT (store is prefixed). */ > +} > + > +void > +store_variable (TYPE a, unsigned long n) > +{ > + ext[n] = a; /* No PCREL_OPT (store is indexed). */ > +} > + > +/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT" 2 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-di.c > b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-di.c > new file mode 100644 > index 00000000000..bf57de4b886 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-di.c > @@ -0,0 +1,36 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_pcrel } */ > +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */ > + > +#define TYPE long long > +#define LARGE 0x20000 > + > +/* Test whether we get the right number of PCREL_OPT optimizations for long > + long. */ > +extern TYPE ext[]; > + > +void > +store (TYPE a) > +{ > + ext[0] = a; /* PCREL_OPT relocation. */ > +} > + > +void > +store2 (TYPE a) > +{ > + ext[2] = a; /* PCREL_OPT relocation. */ > +} > + > +void > +store_large (TYPE a) > +{ > + ext[LARGE] = a; /* No PCREL_OPT (store is prefixed). */ > +} > + > +void > +store_variable (TYPE a, unsigned long n) > +{ > + ext[n] = a; /* No PCREL_OPT (store is indexed). */ > +} > + > +/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT" 2 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-hi.c > b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-hi.c > new file mode 100644 > index 00000000000..8822e767dfe > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-hi.c > @@ -0,0 +1,42 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_pcrel } */ > +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */ > + > +#define TYPE unsigned short > +#define LARGE 0x20000 > + > +/* Test whether we get the right number of PCREL_OPT optimizations for > unsigned > + short. */ > +extern TYPE ext[]; > + > +void > +store (TYPE a) > +{ > + ext[0] = a; /* PCREL_OPT relocation. */ > +} > + > +void > +store2 (TYPE a) > +{ > + ext[2] = a; /* PCREL_OPT relocation. */ > +} > + > +void > +store_large (TYPE a) > +{ > + ext[LARGE] = a; /* No PCREL_OPT (store is prefixed). */ > +} > + > +void > +store_variable (TYPE a, unsigned long n) > +{ > + ext[n] = a; /* No PCREL_OPT (store is indexed). */ > +} > + > +void > +store_double (double a) > +{ > + ext[0] = (TYPE) a; /* No PCREL_OPT (STXIHZX is indexed). */ > +} > + > +/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT" 2 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-qi.c > b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-qi.c > new file mode 100644 > index 00000000000..2f756833717 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-qi.c > @@ -0,0 +1,42 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_pcrel } */ > +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */ > + > +#define TYPE unsigned char > +#define LARGE 0x20000 > + > +/* Test whether we get the right number of PCREL_OPT optimizations for > unsigned > + char. */ > +extern TYPE ext[]; > + > +void > +store (TYPE a) > +{ > + ext[0] = a; /* PCREL_OPT relocation. */ > +} > + > +void > +store2 (TYPE a) > +{ > + ext[2] = a; /* PCREL_OPT relocation. */ > +} > + > +void > +store_large (TYPE a) > +{ > + ext[LARGE] = a; /* No PCREL_OPT (store is prefixed). */ > +} > + > +void > +store_variable (TYPE a, unsigned long n) > +{ > + ext[n] = a; /* No PCREL_OPT (store is indexed). */ > +} > + > +void > +store_double (double a) > +{ > + ext[0] = (TYPE) a; /* No PCREL_OPT (STXIBZX is indexed). */ > +} > + > +/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT" 2 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-sf.c > b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-sf.c > new file mode 100644 > index 00000000000..3dd88aad856 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-sf.c > @@ -0,0 +1,36 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_pcrel } */ > +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */ > + > +#define TYPE float > +#define LARGE 0x20000 > + > +/* Test whether we get the right number of PCREL_OPT optimizations for > + float. */ > +extern TYPE ext[]; > + > +void > +store (TYPE a) > +{ > + ext[0] = a; /* PCREL_OPT relocation. */ > +} > + > +void > +store2 (TYPE a) > +{ > + ext[2] = a; /* PCREL_OPT relocation. */ > +} > + > +void > +store_large (TYPE a) > +{ > + ext[LARGE] = a; /* No PCREL_OPT (store is prefixed). */ > +} > + > +void > +store_variable (TYPE a, unsigned long n) > +{ > + ext[n] = a; /* No PCREL_OPT (store is indexed). */ > +} > + > +/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT" 2 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-si.c > b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-si.c > new file mode 100644 > index 00000000000..78dc8120efe > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-si.c > @@ -0,0 +1,41 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_pcrel } */ > +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */ > + > +#define TYPE int > +#define LARGE 0x20000 > + > +/* Test whether we get the right number of PCREL_OPT optimizations for int. > */ > +extern TYPE ext[]; > + > +void > +store (TYPE a) > +{ > + ext[0] = a; /* PCREL_OPT relocation. */ > +} > + > +void > +store2 (TYPE a) > +{ > + ext[2] = a; /* PCREL_OPT relocation. */ > +} > + > +void > +store_large (TYPE a) > +{ > + ext[LARGE] = a; /* No PCREL_OPT (store is prefixed). */ > +} > + > +void > +store_variable (TYPE a, unsigned long n) > +{ > + ext[n] = a; /* No PCREL_OPT (store is indexed). */ > +} > + > +void > +store_double (double a) > +{ > + ext[0] = (TYPE) a; /* No PCREL_OPT (STFIWX is indexed). */ > +} > + > +/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT" 2 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-vector.c > b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-vector.c > new file mode 100644 > index 00000000000..2c602eb3103 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-vector.c > @@ -0,0 +1,36 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_pcrel } */ > +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */ > + > +#define TYPE vector double > +#define LARGE 0x20000 > + > +/* Test whether we get the right number of PCREL_OPT optimizations for > + vector double. */ > +extern TYPE ext[]; > + > +void > +store (TYPE a) > +{ > + ext[0] = a; /* PCREL_OPT relocation. */ > +} > + > +void > +store2 (TYPE a) > +{ > + ext[2] = a; /* PCREL_OPT relocation. */ > +} > + > +void > +store_large (TYPE a) > +{ > + ext[LARGE] = a; /* No PCREL_OPT (store is prefixed). */ > +} > + > +void > +store_variable (TYPE a, unsigned long n) > +{ > + ext[n] = a; /* No PCREL_OPT (store is indexed). */ > +} > + > +/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT" 2 } } */ > -- > 2.27.0 >