On Tue, Sep 30, 2014 at 09:37:12AM +0100, James Greenhalgh wrote: > *ping*
*pingx2* Cheers, James > > Thanks, > James > > On Tue, Sep 23, 2014 at 10:17:21AM +0100, James Greenhalgh wrote: > > > > Hi, > > > > The comment on widest_int_mode_for_size claims that it returns the > > widest integer mode no wider than size. The implementation looks more > > like it finds the widest integer mode smaller than size. Everywhere it > > is used, the mode it is looking for is ultimately checked against an > > expected alignment or is used for heuristics that should be thinking > > about that check, so pull it in to here. > > > > Throughout expr.c corrections are made for this fact - adding one to > > the size passed to this function. This feels a bit backwards to me. > > > > This patch fixes that, and then fixes the fallout throughout expr.c. > > Generally, this means simplifying a bunch of move_by_pieces style copy > > code. > > > > Bootstrapped on x86_64, arm and AArch64 with no issues. > > > > OK for trunk? > > > > Thanks, > > James > > > > 2014-09-23 James Greenhalgh <james.greenha...@arm.com> > > > > * expr.c (MOVE_BY_PIECES_P): Remove off-by-one correction to > > move_by_pieces_ninsns. > > (CLEAR_BY_PIECES_P): Likewise. > > (SET_BY_PIECES_P): Likewise. > > (STORE_BY_PIECES_P): Likwise. > > (widest_int_mode_for_size): Return the widest mode in which the > > given size fits. > > (move_by_pieces): Remove off-by-one correction for max_size, > > simplify copy loop body. > > (move_by_pieces_ninsns): Simplify copy body. > > (can_store_by_pieces): Remove off-by-one correction for max_size, > > simplify copy body. > > (store_by_pieces_1): Likewise. > > > > > diff --git a/gcc/expr.c b/gcc/expr.c > > index a6233f3..0af9b9a 100644 > > --- a/gcc/expr.c > > +++ b/gcc/expr.c > > @@ -161,7 +161,7 @@ static void write_complex_part (rtx, rtx, bool); > > to perform a structure copy. */ > > #ifndef MOVE_BY_PIECES_P > > #define MOVE_BY_PIECES_P(SIZE, ALIGN) \ > > - (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \ > > + (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES) \ > > < (unsigned int) MOVE_RATIO (optimize_insn_for_speed_p ())) > > #endif > > > > @@ -169,7 +169,7 @@ static void write_complex_part (rtx, rtx, bool); > > called to clear storage. */ > > #ifndef CLEAR_BY_PIECES_P > > #define CLEAR_BY_PIECES_P(SIZE, ALIGN) \ > > - (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \ > > + (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES) \ > > < (unsigned int) CLEAR_RATIO (optimize_insn_for_speed_p ())) > > #endif > > > > @@ -177,7 +177,7 @@ static void write_complex_part (rtx, rtx, bool); > > called to "memset" storage with byte values other than zero. */ > > #ifndef SET_BY_PIECES_P > > #define SET_BY_PIECES_P(SIZE, ALIGN) \ > > - (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \ > > + (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES) \ > > < (unsigned int) SET_RATIO (optimize_insn_for_speed_p ())) > > #endif > > > > @@ -185,7 +185,7 @@ static void write_complex_part (rtx, rtx, bool); > > called to "memcpy" storage when the source is a constant string. */ > > #ifndef STORE_BY_PIECES_P > > #define STORE_BY_PIECES_P(SIZE, ALIGN) \ > > - (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \ > > + (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES) \ > > < (unsigned int) MOVE_RATIO (optimize_insn_for_speed_p ())) > > #endif > > > > @@ -801,18 +801,23 @@ alignment_for_piecewise_move (unsigned int > > max_pieces, unsigned int align) > > return align; > > } > > > > -/* Return the widest integer mode no wider than SIZE. If no such mode > > - can be found, return VOIDmode. */ > > +/* Return the widest integer mode no wider than SIZE which can be accessed > > + at the given ALIGNMENT. If no such mode can be found, return VOIDmode. > > + If SIZE would fit exactly in a mode, return that mode. */ > > > > static enum machine_mode > > -widest_int_mode_for_size (unsigned int size) > > +widest_int_mode_for_size_and_alignment (unsigned int size, > > + unsigned int align) > > { > > enum machine_mode tmode, mode = VOIDmode; > > > > for (tmode = GET_CLASS_NARROWEST_MODE (MODE_INT); > > tmode != VOIDmode; tmode = GET_MODE_WIDER_MODE (tmode)) > > - if (GET_MODE_SIZE (tmode) < size) > > - mode = tmode; > > + { > > + if (GET_MODE_SIZE (tmode) <= size > > + && align >= GET_MODE_ALIGNMENT (mode)) > > + mode = tmode; > > + } > > > > return mode; > > } > > @@ -855,7 +860,7 @@ move_by_pieces (rtx to, rtx from, unsigned > > HOST_WIDE_INT len, > > enum machine_mode to_addr_mode; > > enum machine_mode from_addr_mode = get_address_mode (from); > > rtx to_addr, from_addr = XEXP (from, 0); > > - unsigned int max_size = MOVE_MAX_PIECES + 1; > > + unsigned int max_size = MOVE_MAX_PIECES; > > enum insn_code icode; > > > > align = MIN (to ? MEM_ALIGN (to) : align, MEM_ALIGN (from)); > > @@ -907,7 +912,7 @@ move_by_pieces (rtx to, rtx from, unsigned > > HOST_WIDE_INT len, > > MODE might not be used depending on the definitions of the > > USE_* macros below. */ > > enum machine_mode mode ATTRIBUTE_UNUSED > > - = widest_int_mode_for_size (max_size); > > + = widest_int_mode_for_size_and_alignment (max_size, align); > > > > if (USE_LOAD_PRE_DECREMENT (mode) && data.reverse && ! > > data.autinc_from) > > { > > @@ -948,18 +953,20 @@ move_by_pieces (rtx to, rtx from, unsigned > > HOST_WIDE_INT len, > > /* First move what we can in the largest integer mode, then go to > > successively smaller modes. */ > > > > - while (max_size > 1 && data.len > 0) > > + while (data.len > 0) > > { > > - enum machine_mode mode = widest_int_mode_for_size (max_size); > > + enum machine_mode mode > > + = widest_int_mode_for_size_and_alignment (MIN (max_size, data.len), > > + align); > > > > if (mode == VOIDmode) > > break; > > > > icode = optab_handler (mov_optab, mode); > > - if (icode != CODE_FOR_nothing && align >= GET_MODE_ALIGNMENT (mode)) > > + if (icode != CODE_FOR_nothing) > > move_by_pieces_1 (GEN_FCN (icode), mode, &data); > > - > > - max_size = GET_MODE_SIZE (mode); > > + else > > + max_size = GET_MODE_SIZE (mode) - 1; > > } > > > > /* The code above should have handled everything. */ > > @@ -1008,21 +1015,25 @@ move_by_pieces_ninsns (unsigned HOST_WIDE_INT l, > > unsigned int align, > > > > align = alignment_for_piecewise_move (MOVE_MAX_PIECES, align); > > > > - while (max_size > 1 && l > 0) > > + while (l > 0) > > { > > enum machine_mode mode; > > enum insn_code icode; > > > > - mode = widest_int_mode_for_size (max_size); > > + mode = widest_int_mode_for_size_and_alignment (MIN (max_size, l), > > + align); > > > > if (mode == VOIDmode) > > break; > > > > icode = optab_handler (mov_optab, mode); > > - if (icode != CODE_FOR_nothing && align >= GET_MODE_ALIGNMENT (mode)) > > - n_insns += l / GET_MODE_SIZE (mode), l %= GET_MODE_SIZE (mode); > > - > > - max_size = GET_MODE_SIZE (mode); > > + if (icode != CODE_FOR_nothing) > > + { > > + n_insns += l / GET_MODE_SIZE (mode); > > + l %= GET_MODE_SIZE (mode); > > + } > > + else > > + max_size = GET_MODE_SIZE (mode) - 1; > > } > > > > gcc_assert (!l); > > @@ -2475,10 +2486,10 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len, > > void *constfundata, unsigned int align, bool memsetp) > > { > > unsigned HOST_WIDE_INT l; > > - unsigned int max_size; > > HOST_WIDE_INT offset = 0; > > enum machine_mode mode; > > enum insn_code icode; > > + unsigned int max_size = STORE_MAX_PIECES; > > int reverse; > > /* cst is set but not used if LEGITIMATE_CONSTANT doesn't use it. */ > > rtx cst ATTRIBUTE_UNUSED; > > @@ -2501,17 +2512,16 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len, > > reverse++) > > { > > l = len; > > - max_size = STORE_MAX_PIECES + 1; > > - while (max_size > 1 && l > 0) > > + while (l > 0) > > { > > - mode = widest_int_mode_for_size (max_size); > > + mode = widest_int_mode_for_size_and_alignment > > + (MIN (max_size, l), align); > > > > if (mode == VOIDmode) > > break; > > > > icode = optab_handler (mov_optab, mode); > > - if (icode != CODE_FOR_nothing > > - && align >= GET_MODE_ALIGNMENT (mode)) > > + if (icode != CODE_FOR_nothing) > > { > > unsigned int size = GET_MODE_SIZE (mode); > > > > @@ -2530,8 +2540,8 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len, > > l -= size; > > } > > } > > - > > - max_size = GET_MODE_SIZE (mode); > > + else > > + max_size = GET_MODE_SIZE (mode) - 1; > > } > > > > /* The code above should have handled everything. */ > > @@ -2643,7 +2653,7 @@ store_by_pieces_1 (struct store_by_pieces_d *data > > ATTRIBUTE_UNUSED, > > { > > enum machine_mode to_addr_mode = get_address_mode (data->to); > > rtx to_addr = XEXP (data->to, 0); > > - unsigned int max_size = STORE_MAX_PIECES + 1; > > + unsigned int max_size = STORE_MAX_PIECES; > > enum insn_code icode; > > > > data->offset = 0; > > @@ -2668,7 +2678,7 @@ store_by_pieces_1 (struct store_by_pieces_d *data > > ATTRIBUTE_UNUSED, > > MODE might not be used depending on the definitions of the > > USE_* macros below. */ > > enum machine_mode mode ATTRIBUTE_UNUSED > > - = widest_int_mode_for_size (max_size); > > + = widest_int_mode_for_size_and_alignment (max_size, align); > > > > if (USE_STORE_PRE_DECREMENT (mode) && data->reverse && ! > > data->autinc_to) > > { > > @@ -2697,18 +2707,20 @@ store_by_pieces_1 (struct store_by_pieces_d *data > > ATTRIBUTE_UNUSED, > > /* First store what we can in the largest integer mode, then go to > > successively smaller modes. */ > > > > - while (max_size > 1 && data->len > 0) > > + while (data->len > 0) > > { > > - enum machine_mode mode = widest_int_mode_for_size (max_size); > > + enum machine_mode mode > > + = widest_int_mode_for_size_and_alignment (MIN (max_size, data->len), > > + align); > > > > if (mode == VOIDmode) > > break; > > > > icode = optab_handler (mov_optab, mode); > > - if (icode != CODE_FOR_nothing && align >= GET_MODE_ALIGNMENT (mode)) > > + if (icode != CODE_FOR_nothing) > > store_by_pieces_2 (GEN_FCN (icode), mode, data); > > - > > - max_size = GET_MODE_SIZE (mode); > > + else > > + max_size = GET_MODE_SIZE (mode) - 1; > > } > > > > /* The code above should have handled everything. */ > >