power10: Add PCREL_OPT store support. This patch adds support for optimizing power10 stores to an external variable to eliminate loading the address of the variable, and then doing a subsequent load using that address.
The previous patch added the support for optimizing power10 loads from an external variable. The third patch will add the test suite for these patches. I have built compilers with and without these set of 3 patches doing a bootstrap build and make check. There were no regressions, and the new tests passed. Can I check these patches into the master branch for GCC? Because this is new functionality, I do not intend to back port these patches to GCC 10 at this time. gcc/ 2020-09-04 Michael Meissner <meiss...@linux.ibm.com> * config/rs6000/pcrel-opt.c (pcrel_opt_store): New function. (pcrel_opt_address): Add PCREL_OPT support for stores. (pcrel_opt_pass): Print PCREL_OPT store statistics. * config/rs6000/pcrel-opt.md (UNSPEC_PCREL_OPT_ST_ADDR): New unspec. (UNSPEC_PCREL_OPT_ST_RELOC): New unspec. (pcrel_opt_st_addr<mode>): New insns for PCREL_OPT store support. (pcrel_opt_st<mode>, QHSI iterator): New insns for PCREL_OPT store support. (pcrel_opt_stdi): New insn for PCREL_OPT store support. (pcrel_opt_stsf): New insn for PCREL_OPT store support. (pcrel_opt_stdf): New insns for PCREL_OPT store support. (pcrel_opt_st<mode>, PO_VECT iterator): New insns for PCREL_OPT store support. * config/rs6000/rs6000.c (rs6000_delegitimize_address): Add support for PCREL_OPT store. --- gcc/config/rs6000/pcrel-opt.c | 228 ++++++++++++++++++++++++++++++++- gcc/config/rs6000/pcrel-opt.md | 132 ++++++++++++++++++- gcc/config/rs6000/rs6000.c | 13 +- 3 files changed, 365 insertions(+), 8 deletions(-) diff --git a/gcc/config/rs6000/pcrel-opt.c b/gcc/config/rs6000/pcrel-opt.c index f831853c90b..291ee35b690 100644 --- a/gcc/config/rs6000/pcrel-opt.c +++ b/gcc/config/rs6000/pcrel-opt.c @@ -51,6 +51,43 @@ lwz data_reg,0(addr_reg) + We only look for a single usage in the basic block where the external + address is loaded. Multiple uses or references in another basic block will + force us to not use the PCREL_OPT relocation. + + We also optimize stores to the address of an external variable using the + PCREL_GOT relocation and a single store that uses that external address. If + that is found we create the PCREL_OPT relocation to possibly convert: + + pld addr_reg,var@pcrel@got + + <possibly other insns that do not use 'addr_reg' or 'data_reg'> + + stw data_reg,0(addr_reg) + + into: + + pstw data_reg,var@pcrel + + <possibly other insns that do not use 'addr_reg' or 'data_reg'> + + nop + + If the variable is not defined in the main program or the code using it is + not in the main program, the linker put the address in the .got section and + do: + + .section .got + .Lvar_got: + .dword var + + .section .text + pld addr_reg,.Lvar_got@pcrel + + <possibly other insns that do not use 'addr_reg' or 'data_reg'> + + stw data_reg,0(addr_reg) + We only look for a single usage in the basic block where the external address is loaded. Multiple uses or references in another basic block will force us to not use the PCREL_OPT relocation. */ @@ -87,9 +124,12 @@ static struct { unsigned long loads; unsigned long adjacent_loads; unsigned long failed_loads; + unsigned long stores; + unsigned long adjacent_stores; + unsigned long failed_stores; } counters; -/* Return a marker to identify the PCREL_OPT load address and load +/* Return a marker to identify the PCREL_OPT load address and load/store instruction. We use a constant integer which is added to ".Lpcrel" to make the label. */ @@ -328,6 +368,160 @@ pcrel_opt_load (rtx_insn *addr_insn, /* insn loading address. */ return; } +/* Optimize a PC-relative load address to be used in a store. + + If the sequence of insns is safe to use the PCREL_OPT optimization (i.e. no + additional references to the address register, the address register dies at + the load, and no references to the load), convert insns of the form: + + (set (reg:DI addr) + (symbol_ref:DI "ext_symbol")) + + ... + + (set (mem:<MODE> (reg:DI addr)) + (reg:<MODE> value)) + + into: + + (parallel [(set (reg:DI addr) + (unspec:DI [(symbol_ref:DI "ext_symbol") + (const_int label_num)] + UNSPEC_PCREL_OPT_ST_ADDR)) + (use (reg:<MODE> value))]) + + ... + + (parallel [(set (mem:<MODE> (reg:DI addr)) + (unspec:<MODE> [(reg:<MODE>) + (const_int label_num)] + UNSPEC_PCREL_OPT_ST_RELOC)) + (clobber (reg:DI addr))]) + + The UNSPEC_PCREL_OPT_ST_ADDR insn will generate the load address plus a + definition of a label (.Lpcrel<n>), while the UNSPEC_PCREL_OPT_ST_RELOC insn + will generate the .reloc to tell the linker to tie the load address and load + using that address together. + + pld b,ext_symbol@got@pcrel + .Lpcrel1: + + ... + + .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) + stw r,0(b) + + If ext_symbol is defined in another object file in the main program and we + are linking the main program, the linker will convert the above instructions + to: + + pstwz r,ext_symbol@got@pcrel + + ... + + nop */ + +static void +pcrel_opt_store (rtx_insn *addr_insn, /* insn loading address. */ + rtx_insn *store_insn) /* insn using address. */ +{ + rtx addr_old_set = PATTERN (addr_insn); + gcc_assert (GET_CODE (addr_old_set) == SET); + + rtx addr_reg = SET_DEST (addr_old_set); + gcc_assert (base_reg_operand (addr_reg, Pmode)); + + rtx addr_symbol = SET_SRC (addr_old_set); + gcc_assert (pcrel_external_address (addr_symbol, Pmode)); + + rtx store_set = PATTERN (store_insn); + gcc_assert (GET_CODE (store_set) == SET); + + rtx mem = SET_DEST (store_set); + if (!MEM_P (mem)) + return; + + machine_mode mem_mode = GET_MODE (mem); + rtx reg = SET_SRC (store_set); + + /* Don't allow storing the address of the external variable. Make sure the + value being stored wasn't updated. */ + if (!register_operand (reg, GET_MODE (reg)) + && reg_or_subregno (reg) != reg_or_subregno (addr_reg) + && !reg_set_between_p (reg, addr_insn, store_insn)) + return; + + /* If the address isn't a non-prefixed offsettable instruction, we can't do + the optimization. */ + if (!offsettable_non_prefixed_memory (reg, mem_mode, mem)) + return; + + /* Allocate a new PC-relative label, and update the load address insn. + + (parallel [(set (reg addr) + (unspec [(symbol_ref symbol) + (const_int label_num)] + UNSPEC_PCREL_OPT_ST_ADDR)) + (use (reg store))]) */ + rtx label_num = pcrel_opt_next_marker (); + rtvec v_addr = gen_rtvec (2, addr_symbol, label_num); + rtx addr_unspec = gen_rtx_UNSPEC (Pmode, v_addr, + UNSPEC_PCREL_OPT_ST_ADDR); + rtx addr_new_set = gen_rtx_SET (addr_reg, addr_unspec); + rtx addr_use = gen_rtx_USE (VOIDmode, reg); + rtx addr_new_pattern + = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, addr_new_set, addr_use)); + + validate_change (addr_insn, &PATTERN (addr_insn), addr_new_pattern, true); + + /* Update the store insn. Add an explicit clobber of the external address + register just to be sure there are no additional uses of the address + register. + + (parallel [(set (mem (addr_reg) + (unspec:<MODE> [(reg) + (const_int label_num)] + UNSPEC_PCREL_OPT_ST_RELOC)) + (clobber (reg:DI addr_reg))]) */ + rtvec v_store = gen_rtvec (2, reg, label_num); + rtx new_store = gen_rtx_UNSPEC (mem_mode, v_store, + UNSPEC_PCREL_OPT_ST_RELOC); + + rtx new_store_set = gen_rtx_SET (mem, new_store); + rtx store_clobber = gen_rtx_CLOBBER (VOIDmode, addr_reg); + rtx new_store_pattern + = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, new_store_set, store_clobber)); + + validate_change (store_insn, &PATTERN (store_insn), new_store_pattern, true); + + /* Note whether changes succeeded or not. */ + if (apply_change_group ()) + { + /* PCREL_OPT store succeeded. */ + counters.stores++; + if (next_nonnote_insn (addr_insn) == store_insn) + counters.adjacent_stores++; + + if (dump_file) + fprintf (dump_file, + "PCREL_OPT store (addr insn = %d, use insn = %d).\n", + INSN_UID (addr_insn), + INSN_UID (store_insn)); + } + else + { + /* PCREL_OPT store failed. */ + counters.failed_stores++; + if (dump_file) + fprintf (dump_file, + "PCREL_OPT store failed (addr insn = %d, use insn = %d).\n", + INSN_UID (addr_insn), + INSN_UID (store_insn)); + } + + return; +} + /* Given an insn with that loads up a base register with the address of an external symbol, see if we can optimize it with the PCREL_OPT optimization. */ @@ -363,7 +557,7 @@ pcrel_opt_address (rtx_insn *addr_insn) if (!chain || chain->next) return; - /* Get the insn of the possible load. */ + /* Get the insn of the possible load or store. */ df_ref use = chain->ref; if (!use) return; @@ -449,7 +643,7 @@ pcrel_opt_address (rtx_insn *addr_insn) } /* If this is the last insn in the basic block, and we haven't found the - load, exit. */ + load or store, exit. */ if (insn == last_insn_in_bb) { do_pcrel_opt = false; @@ -461,7 +655,7 @@ pcrel_opt_address (rtx_insn *addr_insn) if (!do_pcrel_opt) return; - /* Is this a load? */ + /* Is this a load or a store? */ switch (get_attr_type (use_insn)) { /* Don't do the PCREL_OPT load optimization if there was a store @@ -476,7 +670,22 @@ pcrel_opt_address (rtx_insn *addr_insn) pcrel_opt_load (addr_insn, use_insn); break; - /* If the use is not a load, just skip the optimization. */ + /* Don't do the PCREL_OPT store optimization if there was a load or store + operation. For example, a load might be trying to load the value + being stored in between getting the address and doing the store. If + we do the PCREL_OPT store optimization, there is the potential for the + optimization to replace the load address with a store, which could + change the program. */ + case TYPE_STORE: + case TYPE_FPSTORE: + case TYPE_VECSTORE: + if (store_insns_found || load_insns_found) + break; + + pcrel_opt_store (addr_insn, use_insn); + break; + + /* If the use is not a load or store, just skip the optimization. */ default: break; } @@ -505,7 +714,7 @@ pcrel_opt_pass (function *fun) fprintf (dump_file, "\n"); /* Look at each basic block to see if there is a load of an external - variable's external address, and a single load using that external + variable's external address, and a single load/store using that external address. */ FOR_ALL_BB_FN (bb, fun) { @@ -531,6 +740,13 @@ pcrel_opt_pass (function *fun) fprintf (dump_file, "# of failed PCREL_OPT load(s) = %lu\n", counters.failed_loads); + fprintf (dump_file, "# of PCREL_OPT store(s) = %lu (adjacent %lu)\n", + counters.stores, counters.adjacent_stores); + + if (counters.failed_stores) + fprintf (dump_file, "# of failed PCREL_OPT store(s) = %lu\n", + counters.failed_stores); + fprintf (dump_file, "\n"); } diff --git a/gcc/config/rs6000/pcrel-opt.md b/gcc/config/rs6000/pcrel-opt.md index 70d4d8911b6..60f2faffbcb 100644 --- a/gcc/config/rs6000/pcrel-opt.md +++ b/gcc/config/rs6000/pcrel-opt.md @@ -84,7 +84,9 @@ (define_c_enum "unspec" [UNSPEC_PCREL_OPT_LD_ADDR UNSPEC_PCREL_OPT_LD_ADDR_SAME_REG - UNSPEC_PCREL_OPT_LD_RELOC]) + UNSPEC_PCREL_OPT_LD_RELOC + UNSPEC_PCREL_OPT_ST_ADDR + UNSPEC_PCREL_OPT_ST_RELOC]) ;; Modes that are supported for PCREL_OPT (define_mode_iterator PO [QI HI SI DI TI SF DF KF @@ -254,3 +256,131 @@ (define_insn "*pcrel_opt_ld<mode>" return "lxv %x0,%1"; } [(set_attr "type" "vecload")]) + + +;; PCREL_OPT optimization for stores. We need to put the label after the PLD +;; instruction, because the assembler might insert a NOP before the PLD for +;; alignment. +;; +;; If we are optimizing a single write, normally the code would look like: +;; +;; (set (reg:DI <ptr>) +;; (symbol_ref:DI "<extern_addr>")) # <data> must be live here +;; +;; ... # insns do not need to be adjacent +;; +;; (set (mem:SI (reg:DI <xxx>)) +;; (reg:SI <data>)) # <ptr> dies with this insn +;; +;; We optimize this to be: +;; +;; (parallel [(set (reg:DI <ptr>) +;; (unspec:DI [(symbol_ref:DI "<extern_addr>") +;; (const_int <marker>)] +;; UNSPEC_PCREL_OPT_ST_ADDR)) +;; (use (reg:<MODE> <data>))]) +;; +;; ... # insns do not need to be adjacent +;; +;; (parallel [(set (mem:<MODE> (reg:DI <ptr>)) +;; (unspec:<MODE> [(reg:<MODE> <data>) +;; (const_int <marker>)] +;; UNSPEC_PCREL_OPT_ST_RELOC)) +;; (clobber (reg:DI <ptr>))]) + +(define_insn "*pcrel_opt_st_addr<mode>" + [(set (match_operand:DI 0 "gpc_reg_operand" "=b") + (unspec:DI [(match_operand:DI 1 "pcrel_external_address") + (match_operand 2 "const_int_operand" "n")] + UNSPEC_PCREL_OPT_ST_ADDR)) + (use (match_operand:PO 3 "gpc_reg_operand" "rwa"))] + "TARGET_PCREL_OPT" + "ld %0,%a1\n.Lpcrel%2:" + [(set_attr "prefixed" "yes") + (set_attr "type" "load") + (set_attr "loads_extern_addr" "yes")]) + +;; PCREL_OPT stores. +(define_insn "*pcrel_opt_st<mode>" + [(set (match_operand:QHSI 0 "d_form_memory" "=o") + (unspec:QHSI [(match_operand:QHSI 1 "gpc_reg_operand" "r") + (match_operand 2 "const_int_operand" "n")] + UNSPEC_PCREL_OPT_ST_RELOC)) + (clobber (match_operand:DI 3 "base_reg_operand" "=b"))] + "TARGET_PCREL_OPT" +{ + output_pcrel_opt_reloc (operands[2]); + return "st<wd> %1,%0"; +} + [(set_attr "type" "store")]) + +(define_insn "*pcrel_opt_stdi" + [(set (match_operand:DI 0 "d_form_memory" "=o,o,o") + (unspec:DI [(match_operand:DI 1 "gpc_reg_operand" "r,d,v") + (match_operand 2 "const_int_operand" "n,n,n")] + UNSPEC_PCREL_OPT_ST_RELOC)) + (clobber (match_operand:DI 3 "base_reg_operand" "=b,b,b"))] + "TARGET_PCREL_OPT && TARGET_POWERPC64" +{ + output_pcrel_opt_reloc (operands[2]); + switch (which_alternative) + { + case 0: return "std %1,%0"; + case 1: return "stfd %1,%0"; + case 2: return "stxsd %1,%0"; + default: gcc_unreachable (); + } +} + [(set_attr "type" "store,fpstore,fpstore")]) + +(define_insn "*pcrel_opt_stsf" + [(set (match_operand:SF 0 "d_form_memory" "=o,o,o") + (unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "d,v,r") + (match_operand 2 "const_int_operand" "n,n,n")] + UNSPEC_PCREL_OPT_ST_RELOC)) + (clobber (match_operand:DI 3 "base_reg_operand" "=b,b,b"))] + "TARGET_PCREL_OPT" +{ + output_pcrel_opt_reloc (operands[2]); + switch (which_alternative) + { + case 0: return "stfs %1,%0"; + case 1: return "stxssp %1,%0"; + case 2: return "stw %1,%0"; + default: gcc_unreachable (); + } +} + [(set_attr "type" "fpstore,fpstore,store")]) + +(define_insn "*pcrel_opt_stdf" + [(set (match_operand:DF 0 "d_form_memory" "=o,o,o") + (unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "d,v,r") + (match_operand 2 "const_int_operand" "n,n,n")] + UNSPEC_PCREL_OPT_ST_RELOC)) + (clobber (match_operand:DI 3 "base_reg_operand" "=b,b,b"))] + "TARGET_PCREL_OPT + && (TARGET_POWERPC64 || vsx_register_operand (operands[1], DFmode))" +{ + output_pcrel_opt_reloc (operands[2]); + switch (which_alternative) + { + case 0: return "stfd %1,%0"; + case 1: return "stxsd %1,%0"; + case 2: return "std %1,%0"; + default: gcc_unreachable (); + } +} + [(set_attr "type" "fpstore,fpstore,store")]) + +(define_insn "*pcrel_opt_st<mode>" + [(set (match_operand:PO_VECT 0 "d_form_memory" "=o") + (unspec:PO_VECT [(match_operand:PO_VECT 1 "gpc_reg_operand" "wa") + (match_operand 2 "const_int_operand" "n")] + UNSPEC_PCREL_OPT_ST_RELOC)) + (clobber (match_operand:DI 3 "base_reg_operand" "=b"))] + "TARGET_PCREL_OPT" +{ + output_pcrel_opt_reloc (operands[2]); + return "stxv %x1,%0"; +} + [(set_attr "type" "vecstore")]) diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 226b9ce67cb..6e74bfde3c7 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -8556,7 +8556,17 @@ rs6000_delegitimize_address (rtx orig_x) (set (reg:DI <base-reg>) (unspec:DI [(symbol_ref <symbol>) (const_int <marker>)] - UNSPEC_PCREL_OPT_LD_ADDR_SAME_REG)) */ + UNSPEC_PCREL_OPT_LD_ADDR_SAME_REG)) + + UNSPEC_PCREL_OPT_ST_ADDR is used by the power10 PCREL_OPT pass. This + UNSPEC include the external SYMBOL_REF along with the value being loaded. + We return the original SYMBOL_REF. + + (parallel [(set (reg:DI <base-reg>) + (unspec:DI [(symbol_ref <symbol>) + (const_int <marker>)] + UNSPEC_PCREL_OPT_ST_ADDR)) + (use (reg <store-reg>))]) */ if (GET_CODE (orig_x) == UNSPEC) switch (XINT (orig_x, 1)) @@ -8564,6 +8574,7 @@ rs6000_delegitimize_address (rtx orig_x) case UNSPEC_FUSION_GPR: case UNSPEC_PCREL_OPT_LD_ADDR: case UNSPEC_PCREL_OPT_LD_ADDR_SAME_REG: + case UNSPEC_PCREL_OPT_ST_ADDR: orig_x = XVECEXP (orig_x, 0, 0); break; -- 2.22.0 -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797