[PATCH 2/3] Power10: Add PCREL_OPT store support. This patch adds support for optimizing power10 stores to an external variable to eliminate loading the address of the variable, and then doing a subsequent store using that address.
I have built compilers with and without these set of 3 patches doing a bootstrap build and make check. There were no regressions, and the new tests passed. Can I check these patches into the master branch for GCC? Because this is new functionality, I do not intend to back port these patches to GCC 10 at this time. gcc/ 2020-08-18 Michael Meissner <meiss...@linux.ibm.com> * config/rs6000/pcrel-opt.c (counters): Add fields to count number of PCREL_OPT stores that were processed. (do_pcrel_opt_store): New function to do PCREL_OPT stores. (do_pcrel_opt_addr): Add support to optimize PCREL_OPT stores. (do_pcrel_opt_pass): Print out statistics for PCREL_OPT stores. * config/rs6000/pcrel-opt.md (UNSPEC_PCREL_OPT_ST_ADDR): New unspec. (UNSPEC_PCREL_OPT_ST_RELOC): New unspec. (pcrel_opt_st_addr<mode>): New insns for PCREL_OPT stores. (pcrel_opt_st<mode>): New insns for QI/HI/SI PCREL_OPT stores. (pcrel_opt_stdi): New insn to optimize DI PCREL_OPT stores. (pcrel_opt_stsf): New insn to optimize SF PCREL_OPT stores. (pcrel_opt_stdf): New insn to optimize DF PCREL_OPT stores. (pcrel_opt_st<mode>): New insns to optimize vector PCREL_OPT stores. * config/rs6000/rs6000.c (rs6000_delegitimize_address): Add support to de-legitimize PCREL_OPT stores. --- gcc/config/rs6000/pcrel-opt.c | 259 +++++++++++++++++++++++++++++++++++++++-- gcc/config/rs6000/pcrel-opt.md | 115 +++++++++++++++++- gcc/config/rs6000/rs6000.c | 3 +- 3 files changed, 367 insertions(+), 10 deletions(-) diff --git a/gcc/config/rs6000/pcrel-opt.c b/gcc/config/rs6000/pcrel-opt.c index 10b4bc4..61dce67 100644 --- a/gcc/config/rs6000/pcrel-opt.c +++ b/gcc/config/rs6000/pcrel-opt.c @@ -53,6 +53,43 @@ We only look for a single usage in the basic block where the external address is loaded. Multiple uses or references in another basic block will + force us to not use the PCREL_OPT relocation. + + We also optimize stores to the address of an external variable using the + PCREL_GOT relocation and a single store that uses that external address. If + that is found we create the PCREL_OPT relocation to possibly convert: + + pld addr_reg,var@pcrel@got(0),1 + + <possibly other insns that do not use 'addr_reg' or 'data_reg'> + + stw data_reg,0(addr_reg) + + into: + + pstw data_reg,var@pcrel(0),1 + + <possibly other insns that do not use 'addr_reg' or 'data_reg'> + + nop + + If the variable is not defined in the main program or the code using it is + not in the main program, the linker put the address in the .got section and + do: + + .section .got + .Lvar_got: + .dword var + + .section .text + pld addr_reg,.Lvar_got@pcrel(0),1 + + <possibly other insns that do not use 'addr_reg' or 'data_reg'> + + stw data_reg,0(addr_reg) + + We only look for a single usage in the basic block where the external + address is loaded. Multiple uses or references in another basic block will force us to not use the PCREL_OPT relocation. */ #define IN_TARGET_CODE 1 @@ -82,11 +119,11 @@ #include "insn-codes.h" -// Maximum number of insns to scan between the load address and the load that -// uses that address. This can be bumped up if desired. If the insns are far -// enough away, the PCREL_OPT optimization probably does not help, since the -// load of the external address has probably completed by the time we do the -// load of the variable at that address. +// Maximum number of insns to scan between the load address and the load or +// store that uses that address. This can be bumped up if desired. If the +// insns are far enough away, the PCREL_OPT optimization probably does not +// help, since the load of the external address has probably completed by the +// time we do the load or store of the variable at that address. const int MAX_PCREL_OPT_INSNS = 10; /* Next PCREL_OPT label number. */ @@ -97,6 +134,8 @@ static struct { unsigned long extern_addrs; unsigned long loads; unsigned long load_separation[MAX_PCREL_OPT_INSNS+1]; + unsigned long stores; + unsigned long store_separation[MAX_PCREL_OPT_INSNS+1]; } counters; @@ -306,6 +345,156 @@ do_pcrel_opt_load (rtx_insn *addr_insn, // insn loading address } +// Optimize a PC-relative load address to be used in a store. + +// If the sequence of insns is safe to use the PCREL_OPT optimization (i.e. no +// additional references to the address register, the address register dies at +// the load, and no references to the load), convert insns of the form: +// +// (set (reg:DI addr) +// (symbol_ref:DI "ext_symbol")) +// +// ... +// +// (set (mem:<MODE> (reg:DI addr)) +// (reg:<MODE> value)) +// +// into: +// +// (parallel [(set (reg:DI addr) +// (unspec:DI [(symbol_ref:DI "ext_symbol") +// (const_int label_num)] +// UNSPEC_PCREL_OPT_ST_ADDR)) +// (use (reg:<MODE> value))]) +// +// ... +// +// (parallel [(set (mem:<MODE> (reg:DI addr)) +// (unspec:<MODE> [(reg:<MODE>) +// (const_int label_num)] +// UNSPEC_PCREL_OPT_ST_RELOC)) +// (clobber (reg:DI addr))]) +// +// +// The UNSPEC_PCREL_OPT_ST_ADDR insn will generate the load address plus +// a definition of a label (.Lpcrel<n>), while the UNSPEC_PCREL_OPT_ST_RELOC +// insn will generate the .reloc to tell the linker to tie the load address and +// load using that address together. +// +// pld b,ext_symbol@got@pcrel(0),1 +// .Lpcrel1: +// +// ... +// +// .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) +// stw r,0(b) +// +// If ext_symbol is defined in another object file in the main program and we +// are linking the main program, the linker will convert the above instructions +// to: +// +// pstwz r,ext_symbol@got@pcrel(0),1 +// +// ... +// +// nop +// +// Return the number of insns between the load of the external address and the +// actual load or 0 if the load of the external address could not be combined +// with a load with the PCREL_OPT optimization (i.e. if the load of the +// external address was adjacent to the load that uses that external address, 1 +// would be returned).. +// +// Return true if the PCREL_OPT store optimization succeeded. + +static bool +do_pcrel_opt_store (rtx_insn *addr_insn, // insn loading address + rtx_insn *store_insn) // insn using address +{ + rtx addr_set = PATTERN (addr_insn); + rtx addr_reg = SET_DEST (addr_set); + rtx addr_symbol = SET_SRC (addr_set); + rtx store_set = single_set (store_insn); + rtx mem = SET_DEST (store_set); + rtx reg = SET_SRC (store_set); + machine_mode mem_mode = GET_MODE (mem); + + // If this is LFIWAX or similar instructions that are indexed only, we can't + // do the optimization. + enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode); + if (non_prefixed == NON_PREFIXED_X) + return false; + + // The optimization will only work on non-prefixed offsettable loads. + rtx addr = XEXP (mem, 0); + enum insn_form iform = address_to_insn_form (addr, mem_mode, non_prefixed); + if (iform != INSN_FORM_BASE_REG + && iform != INSN_FORM_D + && iform != INSN_FORM_DS + && iform != INSN_FORM_DQ) + return false; + + // Allocate a new PC-relative label, and update the load address insn. + + ++pcrel_opt_next_num; + rtx label_num = GEN_INT (pcrel_opt_next_num); + rtvec v_addr = gen_rtvec (2, addr_symbol, label_num); + rtx addr_unspec = gen_rtx_UNSPEC (Pmode, v_addr, + UNSPEC_PCREL_OPT_ST_ADDR); + rtx addr_new_set = gen_rtx_SET (addr_reg, addr_unspec); + rtx addr_use = gen_rtx_USE (VOIDmode, reg); + + PATTERN (addr_insn) + = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, addr_new_set, addr_use)); + + // Revalidate the insn, backing out of the optimization if the insn is not + // supported. + INSN_CODE (addr_insn) = recog (PATTERN (addr_insn), addr_insn, 0); + if (INSN_CODE (addr_insn) < 0) + { + PATTERN (addr_insn) = addr_set; + INSN_CODE (addr_insn) = recog (PATTERN (addr_insn), addr_insn, 0); + return false; + } + + // Update the store insn. Add an explicit clobber of the external address + // register just in case something runs after this pass. + // + // (parallel [(set (mem (addr_reg) + // (unspec:<MODE> [(reg) + // (const_int label_num)] + // UNSPEC_PCREL_OPT_ST_RELOC)) + // (clobber (reg:DI addr_reg))]) + + rtvec v_store = gen_rtvec (2, reg, label_num); + rtx new_store = gen_rtx_UNSPEC (mem_mode, v_store, + UNSPEC_PCREL_OPT_ST_RELOC); + + rtx old_store_set = PATTERN (store_insn); + rtx new_store_set = gen_rtx_SET (mem, new_store); + rtx store_clobber = gen_rtx_CLOBBER (VOIDmode, addr_reg); + + PATTERN (store_insn) + = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, new_store_set, store_clobber)); + + // Revalidate the insn, backing out of the optimization if the insn is not + // supported. + + INSN_CODE (store_insn) = recog (PATTERN (store_insn), store_insn, 0); + if (INSN_CODE (store_insn) < 0) + { + PATTERN (addr_insn) = addr_set; + INSN_CODE (addr_insn) = recog (PATTERN (addr_insn), addr_insn, 0); + + PATTERN (store_insn) = old_store_set; + INSN_CODE (store_insn) = recog (PATTERN (store_insn), store_insn, 0); + return false; + } + + return true; +} + + /* Given an insn, find the next insn in the basic block. Stop if we find a the end of a basic block, such as a label, call or jump, and return NULL. */ @@ -340,8 +529,8 @@ next_active_insn_in_basic_block (rtx_insn *insn) } -// Validate that a load is actually a single instruction that can be optimized -// with the PCREL_OPT optimization. +// Validate that a load or store is actually a single instruction that can be +// optimized with the PCREL_OPT optimization. static bool is_single_instruction (rtx_insn *insn, rtx reg) @@ -522,6 +711,36 @@ do_pcrel_opt_addr (rtx_insn *addr_insn) } } + // Optimize stores + else if (is_store) + { + // If there were any loads in the insns between loading the external + // address and doing the store, turn off the optimization. + if (had_load) + return; + + rtx reg = SET_SRC (set); + rtx mem = SET_DEST (set); + if (!is_single_instruction (insn, reg)) + return; + + if (!MEM_P (mem)) + return; + + // If the register being loaded or stored was used or set between the + // load of the external address and the load or store using the address, + // we can't do the optimization. + if (reg_used_between_p (reg, addr_insn, insn) + || reg_set_between_p (reg, addr_insn, insn)) + return; + + if (do_pcrel_opt_store (addr_insn, insn)) + { + counters.stores++; + counters.store_separation[num_insns-1]++; + } + } + return; } @@ -544,7 +763,7 @@ do_pcrel_opt_pass (function *fun) df_set_flags (DF_DEFER_INSN_RESCAN | DF_LR_RUN_DCE); // Look at each basic block to see if there is a load of an external - // variable's external address, and a single load using that external + // variable's external address, and a single load/store using that external // address. FOR_ALL_BB_FN (bb, fun) { @@ -598,6 +817,30 @@ do_pcrel_opt_pass (function *fun) counters.load_separation[i]); } } + + if (!counters.stores) + fprintf (dump_file, + "No PCREL_OPT store optimizations were done\n"); + + else + { + fprintf (dump_file, "# of PCREL_OPT stores = %lu\n", + counters.stores); + + fprintf (dump_file, "# of adjacent PCREL_OPT stores = %lu\n", + counters.store_separation[0]); + + for (int i = 1; i < MAX_PCREL_OPT_INSNS; i++) + { + if (counters.store_separation[i]) + fprintf (dump_file, + "# of PCREL_OPT stores separated by " + "%d insn%s = %lu\n", + i, (i == 1) ? "" : "s", + counters.store_separation[i]); + } + } + } fprintf (dump_file, "\n"); diff --git a/gcc/config/rs6000/pcrel-opt.md b/gcc/config/rs6000/pcrel-opt.md index 00a3bc4..d98c6ce 100644 --- a/gcc/config/rs6000/pcrel-opt.md +++ b/gcc/config/rs6000/pcrel-opt.md @@ -84,7 +84,9 @@ (define_c_enum "unspec" [UNSPEC_PCREL_OPT_LD_ADDR UNSPEC_PCREL_OPT_LD_ADDR_SAME_REG - UNSPEC_PCREL_OPT_LD_RELOC]) + UNSPEC_PCREL_OPT_LD_RELOC + UNSPEC_PCREL_OPT_ST_ADDR + UNSPEC_PCREL_OPT_ST_RELOC]) ;; Modes that are supported for PCREL_OPT (define_mode_iterator PO [QI HI SI DI TI SF DF KF @@ -246,3 +248,114 @@ (define_insn "*pcrel_opt_ld<mode>" "%r3lxv %x0,%1" [(set_attr "type" "vecload") (set_attr "isa" "pcrel_opt")]) + + +;; PCREL_OPT optimization for stores. We need to put the label after the PLD +;; instruction, because the assembler might insert a NOP before the PLD for +;; alignment. +;; +;; If we are optimizing a single write, normally the code would look like: +;; +;; (set (reg:DI <ptr>) +;; (symbol_ref:DI "<extern_addr>")) # <data> must be live here +;; +;; ... # insns do not need to be adjacent +;; +;; (set (mem:SI (reg:DI <xxx>)) +;; (reg:SI <data>)) # <ptr> dies with this insn +;; +;; We optimize this to be: +;; +;; (parallel [(set (reg:DI <ptr>) +;; (unspec:DI [(symbol_ref:DI "<extern_addr>") +;; (const_int <marker>)] +;; UNSPEC_PCREL_OPT_ST_ADDR)) +;; (use (reg:<MODE> <data>))]) +;; +;; ... # insns do not need to be adjacent +;; +;; (parallel [(set (mem:<MODE> (reg:DI <ptr>)) +;; (unspec:<MODE> [(reg:<MODE> <data>) +;; (const_int <marker>)] +;; UNSPEC_PCREL_OPT_ST_RELOC)) +;; (clobber (reg:DI <ptr>))]) + +(define_insn "*pcrel_opt_st_addr<mode>" + [(set (match_operand:DI 0 "gpc_reg_operand" "=b") + (unspec:DI [(match_operand:DI 1 "pcrel_external_address") + (match_operand 2 "const_int_operand" "n")] + UNSPEC_PCREL_OPT_ST_ADDR)) + (use (match_operand:PO 3 "gpc_reg_operand" "rwa"))] + "TARGET_PCREL_OPT" + "ld %0,%a1\n.Lpcrel%2:" + [(set_attr "prefixed" "yes") + (set_attr "type" "load") + (set_attr "isa" "pcrel_opt") + (set_attr "loads_extern_addr" "yes")]) + +;; Alternate form of the stores that include a marker to identify whether we +;; can do the PCREL_OPT optimization. +(define_insn "*pcrel_opt_st<mode>" + [(set (match_operand:QHSI 0 "d_form_memory" "=o") + (unspec:QHSI [(match_operand:QHSI 1 "gpc_reg_operand" "r") + (match_operand 2 "const_int_operand" "n")] + UNSPEC_PCREL_OPT_ST_RELOC)) + (clobber (match_operand:DI 3 "base_reg_operand" "=b"))] + "TARGET_PCREL_OPT" + "%r2st<wd> %1,%0" + [(set_attr "type" "store") + (set_attr "isa" "pcrel_opt")]) + +(define_insn "*pcrel_opt_stdi" + [(set (match_operand:DI 0 "d_form_memory" "=o,o,o") + (unspec:DI [(match_operand:DI 1 "gpc_reg_operand" "r,d,v") + (match_operand 2 "const_int_operand" "n,n,n")] + UNSPEC_PCREL_OPT_ST_RELOC)) + (clobber (match_operand:DI 3 "base_reg_operand" "=b,b,b"))] + "TARGET_PCREL_OPT && TARGET_POWERPC64" + "@ + %r2std %1,%0 + %r2stfd %1,%0 + %r2stxsd %1,%0" + [(set_attr "type" "store,fpstore,fpstore") + (set_attr "isa" "pcrel_opt")]) + +(define_insn "*pcrel_opt_stsf" + [(set (match_operand:SF 0 "d_form_memory" "=o,o,o") + (unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "d,v,r") + (match_operand 2 "const_int_operand" "n,n,n")] + UNSPEC_PCREL_OPT_ST_RELOC)) + (clobber (match_operand:DI 3 "base_reg_operand" "=b,b,b"))] + "TARGET_PCREL_OPT" + "@ + %r2stfs %1,%0 + %r2stxssp %1,%0 + %r2stw %1,%0" + [(set_attr "type" "fpstore,fpstore,store") + (set_attr "isa" "pcrel_opt")]) + +(define_insn "*pcrel_opt_stdf" + [(set (match_operand:DF 0 "d_form_memory" "=o,o,o") + (unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "d,v,r") + (match_operand 2 "const_int_operand" "n,n,n")] + UNSPEC_PCREL_OPT_ST_RELOC)) + (clobber (match_operand:DI 3 "base_reg_operand" "=b,b,b"))] + "TARGET_PCREL_OPT + && (TARGET_POWERPC64 || vsx_register_operand (operands[1], DFmode))" + "@ + %r2stfd %1,%0 + %r2stxsd %1,%0 + %r2std %1,%0" + [(set_attr "type" "fpstore,fpstore,store") + (set_attr "isa" "pcrel_opt")]) + +(define_insn "*pcrel_opt_st<mode>" + [(set (match_operand:PO_VECT 0 "d_form_memory" "=o") + (unspec:PO_VECT [(match_operand:PO_VECT 1 "gpc_reg_operand" "wa") + (match_operand 2 "const_int_operand" "n")] + UNSPEC_PCREL_OPT_ST_RELOC)) + (clobber (match_operand:DI 3 "base_reg_operand" "=b"))] + "TARGET_PCREL_OPT" + "%r2stxv %x1,%0" + [(set_attr "type" "vecstore") + (set_attr "isa" "pcrel_opt")]) diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 6877de5..9ec346c 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -8525,7 +8525,8 @@ rs6000_delegitimize_address (rtx orig_x) if (GET_CODE (orig_x) == UNSPEC && (XINT (orig_x, 1) == UNSPEC_FUSION_GPR || XINT (orig_x, 1) == UNSPEC_PCREL_OPT_LD_ADDR - || XINT (orig_x, 1) == UNSPEC_PCREL_OPT_LD_ADDR_SAME_REG)) + || XINT (orig_x, 1) == UNSPEC_PCREL_OPT_LD_ADDR_SAME_REG + || XINT (orig_x, 1) == UNSPEC_PCREL_OPT_ST_ADDR)) orig_x = XVECEXP (orig_x, 0, 0); orig_x = delegitimize_mem_from_attrs (orig_x); -- 1.8.3.1 -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797