On Wed, 2015-09-23 at 21:04 +0900, Oleg Endo wrote: > Hi, > > The attached patch fixes PR 67391. Some additional reg overlapping were > added to the addsi3 patterns while making LRA on SH work, but not all of > them seem to be good. Removing them, seems to be working just fine. > Tested on sh-elf (LRA enabled) with make -k check > RUNTESTFLAGS="--target_board=sh-sim > \{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}" > and by Kaz on sh4-linux. > > Committed to trunk as r228046 and to the GCC 5 branch as r228047.
This has opened a small can of worms. The follow up patch is attached and has been commited to trunk as r228176. Tested by me on sh-elf and by Kaz on sh4-linux with LRA enabled/disabled. As a positive side effect, we get some code size reduction on the CSiBE set: 3345527 bytes -> 3334351 bytes -11176 bytes / -0.334058 % However, this is only when LRA is disabled because of some problems with LRA and its usage/handling of addsi3 insns. Backport to GCC 5 branch will follow. Cheers, Oleg gcc/ChangeLog: PR target/67391 * config/sh/sh-protos.h (sh_lra_p): Declare. * config/sh/sh.c (sh_lra_p): Make non-static. * config/sh/sh.md (addsi3): Use arith_reg_dest for operands[0] and arith_reg_operand for operands[1]. Remove TARGET_SHMEDIA case. Expand into addsi3_scr if operands[2] if needed. (*addsi3_compact): Rename to *addsi3_compact_lra. Use arith_reg_operand for operands[1]. Allow it only when LRA is enabled. (addsi3_scr, *addsi3): New insn_and_split patterns.
Index: gcc/config/sh/sh-protos.h =================================================================== --- gcc/config/sh/sh-protos.h (revision 228117) +++ gcc/config/sh/sh-protos.h (working copy) @@ -93,6 +93,7 @@ extern rtx sh_fsca_int2sf (void); /* Declare functions defined in sh.c and used in templates. */ +extern bool sh_lra_p (void); extern const char *output_branch (int, rtx_insn *, rtx *); extern const char *output_ieee_ccmpeq (rtx_insn *, rtx *); Index: gcc/config/sh/sh.c =================================================================== --- gcc/config/sh/sh.c (revision 228117) +++ gcc/config/sh/sh.c (working copy) @@ -216,7 +216,6 @@ static int sh_mode_entry (int); static int sh_mode_exit (int); static int sh_mode_priority (int entity, int n); -static bool sh_lra_p (void); static rtx mark_constant_pool_use (rtx); static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, @@ -14507,7 +14506,7 @@ */ /* Return true if we use LRA instead of reload pass. */ -static bool +bool sh_lra_p (void) { return sh_lra_flag; Index: gcc/config/sh/sh.md =================================================================== --- gcc/config/sh/sh.md (revision 228117) +++ gcc/config/sh/sh.md (working copy) @@ -2122,13 +2122,19 @@ }) (define_expand "addsi3" - [(set (match_operand:SI 0 "arith_reg_operand" "") - (plus:SI (match_operand:SI 1 "arith_operand" "") - (match_operand:SI 2 "arith_or_int_operand" "")))] + [(set (match_operand:SI 0 "arith_reg_dest") + (plus:SI (match_operand:SI 1 "arith_reg_operand") + (match_operand:SI 2 "arith_or_int_operand")))] "" { - if (TARGET_SHMEDIA) - operands[1] = force_reg (SImode, operands[1]); + if (TARGET_SH1 && !arith_operand (operands[2], SImode)) + { + if (!sh_lra_p () || reg_overlap_mentioned_p (operands[0], operands[1])) + { + emit_insn (gen_addsi3_scr (operands[0], operands[1], operands[2])); + DONE; + } + } }) (define_insn "addsi3_media" @@ -2163,15 +2169,22 @@ ;; copy or constant load before the actual add insn. ;; Use u constraint for that case to avoid the invalid value in the stack ;; pointer. -(define_insn_and_split "*addsi3_compact" +;; This also results in better code when LRA is not used. However, we have +;; to use different sets of patterns and the order of these patterns is +;; important. +;; In some cases the constant zero might end up in operands[2] of the +;; patterns. We have to accept that and convert it into a reg-reg move. +(define_insn_and_split "*addsi3_compact_lra" [(set (match_operand:SI 0 "arith_reg_dest" "=r,&u") - (plus:SI (match_operand:SI 1 "arith_operand" "%0,r") + (plus:SI (match_operand:SI 1 "arith_reg_operand" "%0,r") (match_operand:SI 2 "arith_or_int_operand" "rI08,rn")))] - "TARGET_SH1" + "TARGET_SH1 && sh_lra_p () + && (! reg_overlap_mentioned_p (operands[0], operands[1]) + || arith_operand (operands[2], SImode))" "@ add %2,%0 #" - "reload_completed + "&& reload_completed && ! reg_overlap_mentioned_p (operands[0], operands[1])" [(set (match_dup 0) (match_dup 2)) (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1)))] @@ -2182,6 +2195,58 @@ } [(set_attr "type" "arith")]) +(define_insn_and_split "addsi3_scr" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,&u,&u") + (plus:SI (match_operand:SI 1 "arith_reg_operand" "%0,r,r") + (match_operand:SI 2 "arith_or_int_operand" "rI08,r,n"))) + (clobber (match_scratch:SI 3 "=X,X,&u"))] + "TARGET_SH1" + "@ + add %2,%0 + # + #" + "&& reload_completed" + [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2)))] +{ + if (operands[2] == const0_rtx) + { + emit_move_insn (operands[0], operands[1]); + DONE; + } + + if (CONST_INT_P (operands[2]) && !satisfies_constraint_I08 (operands[2])) + { + if (reg_overlap_mentioned_p (operands[0], operands[1])) + { + emit_move_insn (operands[3], operands[2]); + emit_move_insn (operands[0], operands[1]); + operands[2] = operands[3]; + } + else + { + emit_move_insn (operands[0], operands[2]); + operands[2] = operands[1]; + } + } + else if (!reg_overlap_mentioned_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); +} + [(set_attr "type" "arith")]) + +(define_insn_and_split "*addsi3" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,r") + (plus:SI (match_operand:SI 1 "arith_reg_operand" "%0,r") + (match_operand:SI 2 "arith_operand" "rI08,Z")))] + "TARGET_SH1 && !sh_lra_p ()" + "@ + add %2,%0 + #" + "&& operands[2] == const0_rtx" + [(set (match_dup 0) (match_dup 1))] +{ +} + [(set_attr "type" "arith")]) + ;; ------------------------------------------------------------------------- ;; Subtraction instructions ;; -------------------------------------------------------------------------