currently, the instruction sink in "prepare_shrink_wrap" is a bit conservative that some further optimization opportunities have been missed.
given the prologue use register A by: (store A, [sp + offset]) then given the entry_basic_block contains a simply register copy like: (move A, B) current "prepare_shrink_wrap" will sink the move instruction as deep as it can, then the entry_basic_block could be marked as "don't need prologue". while if we replace "(move A, B)" into either one of * "(move B, CONST_K)", * "(move B, (plus A, CONST_K))" we still could do the same sink optimization, but *current gcc do not*. pattern like (move B, CONST_K) are very normal for some RISC targets. for example on AArch64, we could have the following pair: adrp x22, global_data_a add x0, x22, :lo12:global_data_a if "adrp" be scheduled into the entry_basic_block then the write of x22 may prevent shrink-wrap happen. when judge whether one instruction is sink-able, move_insn_for_shrink_wrap only accept simply reg copy that both dest and src are REG_P, while the second operand of adrp is actually a SYMBOL_REF, thus it's reject by the optimization. this patch relax the restriction on src to accept any one of the following: + REG + CONST_OBJ, like SYMBOL_REF + combination of single REG and any other CONST_OBJs. (reg def/use calculation will not affected by CONST_OBJs) RISC backend may benefit more from this relax, although there still be minor improvements on x86. for example, there are 17 more functions shrink-wrapped during x86-64 bootstrap, like sort_bucket in ira-color.c. test done ========= no regression on aarch64-none-elf bare-metal. no regression on x86-64 check-gcc. both aarch64 and x86-64 bootstrap OK. ok for install? 2014-09-04 Jiong Wang<jiong.w...@arm.com> gcc/ * shrink-wrap.c (rtx_search_arg): New structure type. (rtx_search_arg_p): New typedef. (count_reg_const): New callback function. (move_insn_for_shrink_wrap): Relax the restriction on src operand.
diff --git a/gcc/shrink-wrap.c b/gcc/shrink-wrap.c index 0938f2c..5b5ca85 100644 --- a/gcc/shrink-wrap.c +++ b/gcc/shrink-wrap.c @@ -156,6 +156,37 @@ live_edge_for_reg (basic_block bb, int regno, int end_regno) return live_edge; } +struct rtx_search_arg +{ + unsigned int reg_found; + unsigned int nonconst_found; + rtx reg; +}; + +typedef struct rtx_search_arg *rtx_search_arg_p; + +/* A for_each_rtx callback used by move_insn_for_shrink_wrap to count the + numbers of register and non-constant objects. */ + +static int +count_reg_const (rtx *loc, void *arg) +{ + rtx_search_arg_p p = (rtx_search_arg_p) arg; + rtx x; + + x = *loc; + + if (REG_P (x)) + { + p->reg_found++; + p->reg = x; + } + else if (! CONSTANT_P (x)) + p->nonconst_found++; + + return 0; +} + /* Try to move INSN from BB to a successor. Return true on success. USES and DEFS are the set of registers that are used and defined after INSN in BB. SPLIT_P indicates whether a live edge from BB @@ -169,7 +200,9 @@ move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn, { rtx set, src, dest; bitmap live_out, live_in, bb_uses, bb_defs; - unsigned int i, dregno, end_dregno, sregno, end_sregno; + unsigned int i, dregno, end_dregno; + unsigned int sregno = FIRST_PSEUDO_REGISTER; + unsigned int end_sregno = FIRST_PSEUDO_REGISTER; basic_block next_block; edge live_edge; @@ -179,7 +212,25 @@ move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn, return false; src = SET_SRC (set); dest = SET_DEST (set); - if (!REG_P (dest) || !REG_P (src) + + if (!REG_P (src)) + { + struct rtx_search_arg arg; + + arg.reg_found = 0; + arg.nonconst_found = 0; + arg.reg = NULL_RTX; + + for_each_rtx (&src, count_reg_const, (void *) &arg); + + if (arg.nonconst_found + || arg.reg_found > 1) + src = NULL_RTX; + else if (arg.reg_found == 1) + src = arg.reg; + } + + if (!REG_P (dest) || src == NULL_RTX /* STACK or FRAME related adjustment might be part of prologue. So keep them in the entry block. */ || dest == stack_pointer_rtx @@ -188,10 +238,13 @@ move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn, return false; /* Make sure that the source register isn't defined later in BB. */ - sregno = REGNO (src); - end_sregno = END_REGNO (src); - if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno)) - return false; + if (REG_P (src)) + { + sregno = REGNO (src); + end_sregno = END_REGNO (src); + if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno)) + return false; + } /* Make sure that the destination register isn't referenced later in BB. */ dregno = REGNO (dest); diff --git a/gcc/testsuite/gcc.target/aarch64/shrink_wrap_symbol_ref_1.c b/gcc/testsuite/gcc.target/aarch64/shrink_wrap_symbol_ref_1.c new file mode 100644 index 0000000..ad2e588 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/shrink_wrap_symbol_ref_1.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-rtl-pro_and_epilogue" } */ + +extern char *asm_out_file; +extern void default_elf_asm_output_ascii (char *, const char *, int); + +void +assemble_string (const char *p, int size) +{ + int pos = 0; + int maximum = 2000; + + while (pos < size) + { + int thissize = size - pos; + + if (thissize > maximum) + thissize = maximum; + + default_elf_asm_output_ascii (asm_out_file, p, thissize);; + + pos += thissize; + p += thissize; + } +} + +/* { dg-final { scan-rtl-dump "Performing shrink-wrapping" "pro_and_epilogue" } } */ +/* { dg-final { cleanup-rtl-dump "pro_and_epilogue" } } */