Oops, here is patch. Dave -- John David Anglin dave.ang...@bell.net
2016-07-16 John David Anglin <dang...@gcc.gnu.org> * config/pa/pa.c (hppa_profile_hook): Allocate stack space for register parameters. Remove code to initialize argument pointer on TARGET_64BIT. Optimize call to _mcount when it can be reached using a pc-relative branch. Cleanup conditional code. * config/pa/pa.md (call_mcount): New expander. (call_mcount_nonpic): New insn. (call_mcount_pic): New insn and split. (call_mcount_pic_post_reload): New insn. (call_mcount_64bit): New insn and split. (call_mcount_64bit_post_reload): New insn. Index: config/pa/pa.c =================================================================== --- config/pa/pa.c (revision 238404) +++ config/pa/pa.c (working copy) @@ -4532,64 +4532,79 @@ lcla2 and load_offset_label_address insn patterns. */ rtx reg = gen_reg_rtx (SImode); rtx_code_label *label_rtx = gen_label_rtx (); - rtx begin_label_rtx; + rtx mcount = gen_rtx_MEM (Pmode, gen_rtx_SYMBOL_REF (Pmode, "_mcount")); + int reg_parm_stack_space = REG_PARM_STACK_SPACE (NULL_TREE); + rtx arg_bytes, begin_label_rtx; rtx_insn *call_insn; char begin_label_name[16]; + bool use_mcount_pcrel_call; + /* If we can reach _mcount with a pc-relative call, we can optimize + loading the address of the current function. This requires linker + long branch stub support. */ + if (!TARGET_PORTABLE_RUNTIME + && !TARGET_LONG_CALLS + && (TARGET_SOM || flag_function_sections)) + use_mcount_pcrel_call = TRUE; + else + use_mcount_pcrel_call = FALSE; + ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL, label_no); begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name)); - if (TARGET_64BIT) - emit_move_insn (arg_pointer_rtx, - gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx, - GEN_INT (64))); - emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2)); - /* The address of the function is loaded into %r25 with an instruction- - relative sequence that avoids the use of relocations. The sequence - is split so that the load_offset_label_address instruction can - occupy the delay slot of the call to _mcount. */ - if (TARGET_PA_20) - emit_insn (gen_lcla2 (reg, label_rtx)); - else - emit_insn (gen_lcla1 (reg, label_rtx)); + if (!use_mcount_pcrel_call) + { + /* The address of the function is loaded into %r25 with an instruction- + relative sequence that avoids the use of relocations. The sequence + is split so that the load_offset_label_address instruction can + occupy the delay slot of the call to _mcount. */ + if (TARGET_PA_20) + emit_insn (gen_lcla2 (reg, label_rtx)); + else + emit_insn (gen_lcla1 (reg, label_rtx)); - emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25), - reg, begin_label_rtx, label_rtx)); + emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25), + reg, + begin_label_rtx, + label_rtx)); + } -#if !NO_DEFERRED_PROFILE_COUNTERS - { - rtx count_label_rtx, addr, r24; - char count_label_name[16]; + if (!NO_DEFERRED_PROFILE_COUNTERS) + { + rtx count_label_rtx, addr, r24; + char count_label_name[16]; - funcdef_nos.safe_push (label_no); - ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no); - count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name)); + funcdef_nos.safe_push (label_no); + ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no); + count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, + ggc_strdup (count_label_name)); - addr = force_reg (Pmode, count_label_rtx); - r24 = gen_rtx_REG (Pmode, 24); - emit_move_insn (r24, addr); + addr = force_reg (Pmode, count_label_rtx); + r24 = gen_rtx_REG (Pmode, 24); + emit_move_insn (r24, addr); - call_insn = - emit_call_insn (gen_call (gen_rtx_MEM (Pmode, - gen_rtx_SYMBOL_REF (Pmode, - "_mcount")), - GEN_INT (TARGET_64BIT ? 24 : 12))); + arg_bytes = GEN_INT (TARGET_64BIT ? 24 : 12); + if (use_mcount_pcrel_call) + call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes, + begin_label_rtx)); + else + call_insn = emit_call_insn (gen_call (mcount, arg_bytes)); - use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24); - } -#else + use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24); + } + else + { + arg_bytes = GEN_INT (TARGET_64BIT ? 16 : 8); + if (use_mcount_pcrel_call) + call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes, + begin_label_rtx)); + else + call_insn = emit_call_insn (gen_call (mcount, arg_bytes)); + } - call_insn = - emit_call_insn (gen_call (gen_rtx_MEM (Pmode, - gen_rtx_SYMBOL_REF (Pmode, - "_mcount")), - GEN_INT (TARGET_64BIT ? 16 : 8))); - -#endif - use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25)); use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26)); @@ -4596,6 +4611,10 @@ /* Indicate the _mcount call cannot throw, nor will it execute a non-local goto. */ make_reg_eh_region_note_nothrow_nononlocal (call_insn); + + /* Allocate space for fixed arguments. */ + if (reg_parm_stack_space > crtl->outgoing_args_size) + crtl->outgoing_args_size = reg_parm_stack_space; } /* Fetch the return address for the frame COUNT steps up from Index: config/pa/pa.md =================================================================== --- config/pa/pa.md (revision 238404) +++ config/pa/pa.md (working copy) @@ -8207,6 +8207,170 @@ (cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 12)] (symbol_ref "pa_attr_length_indirect_call (insn)")))]) +/* Expand special pc-relative call to _mcount. */ + +(define_expand "call_mcount" + [(parallel [(call (match_operand:SI 0 "" "") + (match_operand 1 "" "")) + (set (reg:SI 25) + (plus:SI (reg:SI 2) + (minus:SI (match_operand 2 "" "") + (plus:SI (pc) (const_int 4))))) + (clobber (reg:SI 2))])] + "!TARGET_PORTABLE_RUNTIME" + " +{ + rtx op = XEXP (operands[0], 0); + rtx nb = operands[1]; + rtx lab = operands[2]; + + if (TARGET_64BIT) + { + rtx r4 = gen_rtx_REG (word_mode, 4); + emit_move_insn (arg_pointer_rtx, + gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx, + GEN_INT (64))); + emit_call_insn (gen_call_mcount_64bit (op, nb, lab, r4)); + } + else + { + if (flag_pic) + { + rtx r4 = gen_rtx_REG (word_mode, 4); + emit_call_insn (gen_call_mcount_pic (op, nb, lab, r4)); + } + else + emit_call_insn (gen_call_mcount_nonpic (op, nb, lab)); + } + + DONE; +}") + +(define_insn "call_mcount_nonpic" + [(call (mem:SI (match_operand 0 "call_operand_address" "")) + (match_operand 1 "" "i")) + (set (reg:SI 25) + (plus:SI (reg:SI 2) + (minus:SI (match_operand 2 "" "") + (plus:SI (pc) (const_int 4))))) + (clobber (reg:SI 2))] + "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT" + "* +{ + pa_output_arg_descriptor (insn); + return \"{bl|b,l} %0,%%r2\;ldo %2-.-4(%%r2),%%r25\"; +}" + [(set_attr "type" "multi") + (set_attr "length" "8")]) + +(define_insn "call_mcount_pic" + [(call (mem:SI (match_operand 0 "call_operand_address" "")) + (match_operand 1 "" "i")) + (set (reg:SI 25) + (plus:SI (reg:SI 2) + (minus:SI (match_operand 2 "" "") + (plus:SI (pc) (const_int 4))))) + (clobber (reg:SI 2)) + (clobber (match_operand 3)) + (use (reg:SI 19))] + "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT" + "#") + +(define_split + [(parallel [(call (mem:SI (match_operand 0 "call_operand_address" "")) + (match_operand 1 "" "")) + (set (reg:SI 25) + (plus:SI (reg:SI 2) + (minus:SI (match_operand 2 "" "") + (plus:SI (pc) (const_int 4))))) + (clobber (reg:SI 2)) + (clobber (match_operand 3)) + (use (reg:SI 19))])] + "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT && reload_completed" + [(set (match_dup 3) (reg:SI 19)) + (parallel [(call (mem:SI (match_dup 0)) + (match_dup 1)) + (set (reg:SI 25) + (plus:SI (reg:SI 2) + (minus:SI (match_dup 2) + (plus:SI (pc) (const_int 4))))) + (clobber (reg:SI 2)) + (use (reg:SI 19))]) + (set (reg:SI 19) (match_dup 3))] + "") + +(define_insn "*call_mcount_pic_post_reload" + [(call (mem:SI (match_operand 0 "call_operand_address" "")) + (match_operand 1 "" "i")) + (set (reg:SI 25) + (plus:SI (reg:SI 2) + (minus:SI (match_operand 2 "" "") + (plus:SI (pc) (const_int 4))))) + (clobber (reg:SI 2)) + (use (reg:SI 19))] + "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT" + "* +{ + pa_output_arg_descriptor (insn); + return \"{bl|b,l} %0,%%r2\;ldo %2-.-4(%%r2),%%r25\"; +}" + [(set_attr "type" "multi") + (set_attr "length" "8")]) + +(define_insn "call_mcount_64bit" + [(call (mem:SI (match_operand 0 "call_operand_address" "")) + (match_operand 1 "" "i")) + (set (reg:SI 25) + (plus:SI (reg:SI 2) + (minus:SI (match_operand 2 "" "") + (plus:SI (pc) (const_int 4))))) + (clobber (reg:DI 2)) + (clobber (match_operand 3)) + (use (reg:DI 27)) + (use (reg:DI 29))] + "TARGET_64BIT" + "#") + +(define_split + [(parallel [(call (mem:SI (match_operand 0 "call_operand_address" "")) + (match_operand 1 "" "")) + (set (reg:SI 25) + (plus:SI (reg:SI 2) + (minus:SI (match_operand 2 "" "") + (plus:SI (pc) (const_int 4))))) + (clobber (reg:DI 2)) + (clobber (match_operand 3)) + (use (reg:DI 27)) + (use (reg:DI 29))])] + "TARGET_64BIT && reload_completed" + [(set (match_dup 3) (reg:DI 27)) + (parallel [(call (mem:SI (match_dup 0)) + (match_dup 1)) + (set (reg:SI 25) + (plus:SI (reg:SI 2) + (minus:SI (match_dup 2) + (plus:SI (pc) (const_int 4))))) + (clobber (reg:DI 2)) + (use (reg:DI 27)) + (use (reg:DI 29))]) + (set (reg:DI 27) (match_dup 3))] + "") + +(define_insn "*call_mcount_64bit_post_reload" + [(call (mem:SI (match_operand 0 "call_operand_address" "")) + (match_operand 1 "" "i")) + (set (reg:SI 25) + (plus:SI (reg:SI 2) + (minus:SI (match_operand 2 "" "") + (plus:SI (pc) (const_int 4))))) + (clobber (reg:DI 2)) + (use (reg:DI 27)) + (use (reg:DI 29))] + "TARGET_64BIT" + "{bl|b,l} %0,%%r2\;ldo %2-.-4(%%r2),%%r25" + [(set_attr "type" "multi") + (set_attr "length" "8")]) + ;; Call subroutine returning any type. (define_expand "untyped_call"