Hi, This patch started off by Hookizing MOVE_RATIO, but pulling on that thread made it clear that most users of MOVE_RATIO really want to know whether move_by_pieces is going to be used or not. For that we have MOVE_BY_PIECES_P.
We can hookize this, and clean up most other callers of MOVE_RATIO. We leave behind one in SRA and one in tree-inline, which we will clean up shortly. Bootstrapped on x86_64, AArch64 and ARM. OK for trunk? Thanks, James --- gcc/ 2014-09-25 James Greenhalgh <james.greenha...@arm.com> * target.def (move_by_pieces_profitable_p): New. * doc/tm.texi.in (MOVE_BY_PIECES_P): Reduce documentation to a stub describing that this macro is deprecated. (TARGET_MOVE_BY_PIECES_PROFITABLE_P): Add hook. * doc/tm.texi: Regenerate. * expr.c (MOVE_BY_PIECES_P): Remove. (STORE_BY_PIECES_P): Rewrite in terms of TARGET_MOVE_BY_PIECES_PROFITABLE_P. (can_move_by_pieces): Likewise. (emit_block_move_hints): Rewrite in terms of can_move_by_pieces. (emit_push_insn): Likewise. (expand_constructor): Likewise. * targhooks.c (get_move_ratio): New. (default_move_by_pieces_profitable_p): Likewise. * targhooks.h (default_move_by_pieces_profitable_p): New.
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 10af50e..162aa30 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -6114,11 +6114,38 @@ If you don't define this, a reasonable default is used. @end defmac @defmac MOVE_BY_PIECES_P (@var{size}, @var{alignment}) -A C expression used to determine whether @code{move_by_pieces} will be used to -copy a chunk of memory, or whether some other block move mechanism -will be used. Defaults to 1 if @code{move_by_pieces_ninsns} returns less -than @code{MOVE_RATIO}. -@end defmac +A C expression used to implement the default behaviour of +@code{TARGET_MOVE_BY_PIECES_PROFITABLE_P}. New ports should implement +that hook in preference to this macro, which is deprecated. +@end defmac + +@deftypefn {Target Hook} bool TARGET_MOVE_BY_PIECES_PROFITABLE_P (unsigned int @var{size}, unsigned int @var{alignment}, bool @var{speed_p}) +GCC will attempt several strategies when asked to copy between +two areas of memory, for example when copying a @code{struct}. +@code{move_by_pieces} implements such a copy as a sequence of +memory-to-memory move insns. Alternate strategies are to expand the +@code{movmem} optab, to emit a library call, or to emit a unit-by-unit +loop-based copy. + +This target hook should return true if, for a memory move with a given +@var{size} and @var{alignment}, using the @code{move_by_pieces} +infrastructure is expected to result in better code generation. +Both @var{size} and @var{alignment} are measured in terms of storage +units. + +The parameter @var{speed_p} is true if the code is currently being +optimized for speed rather than size. + +Returning true for higher values of @var{size} can improve code generation +for speed if the target does not provide an implementation of the +@code{movmem} standard name, if the @code{movmem} implementation would be +more expensive than a sequence of move insns, or if the overhead of a +library call would dominate that of the body of the copy. + +Returning true for higher values of @code{size} may also cause an increase +in code size, for example where the number of insns emitted to perform a +move would be greater than that of a library call. +@end deftypefn @defmac MOVE_MAX_PIECES A C expression used by @code{move_by_pieces} to determine the largest unit diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index f6f241b..1894745 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -4597,12 +4597,13 @@ If you don't define this, a reasonable default is used. @end defmac @defmac MOVE_BY_PIECES_P (@var{size}, @var{alignment}) -A C expression used to determine whether @code{move_by_pieces} will be used to -copy a chunk of memory, or whether some other block move mechanism -will be used. Defaults to 1 if @code{move_by_pieces_ninsns} returns less -than @code{MOVE_RATIO}. +A C expression used to implement the default behaviour of +@code{TARGET_MOVE_BY_PIECES_PROFITABLE_P}. New ports should implement +that hook in preference to this macro, which is deprecated. @end defmac +@hook TARGET_MOVE_BY_PIECES_PROFITABLE_P + @defmac MOVE_MAX_PIECES A C expression used by @code{move_by_pieces} to determine the largest unit a load or store used to copy memory is. Defaults to @code{MOVE_MAX}. diff --git a/gcc/expr.c b/gcc/expr.c index 0af9b9a..59a85f7 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -157,14 +157,6 @@ static void do_tablejump (rtx, enum machine_mode, rtx, rtx, rtx, int); static rtx const_vector_from_tree (tree); static void write_complex_part (rtx, rtx, bool); -/* This macro is used to determine whether move_by_pieces should be called - to perform a structure copy. */ -#ifndef MOVE_BY_PIECES_P -#define MOVE_BY_PIECES_P(SIZE, ALIGN) \ - (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES) \ - < (unsigned int) MOVE_RATIO (optimize_insn_for_speed_p ())) -#endif - /* This macro is used to determine whether clear_by_pieces should be called to clear storage. */ #ifndef CLEAR_BY_PIECES_P @@ -185,8 +177,7 @@ static void write_complex_part (rtx, rtx, bool); called to "memcpy" storage when the source is a constant string. */ #ifndef STORE_BY_PIECES_P #define STORE_BY_PIECES_P(SIZE, ALIGN) \ - (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES) \ - < (unsigned int) MOVE_RATIO (optimize_insn_for_speed_p ())) + (targetm.move_by_pieces_profitable_p (SIZE, ALIGN, STORE_MAX_PIECES)) #endif /* This is run to set up which modes can be used @@ -837,7 +828,8 @@ int can_move_by_pieces (unsigned HOST_WIDE_INT len ATTRIBUTE_UNUSED, unsigned int align ATTRIBUTE_UNUSED) { - return MOVE_BY_PIECES_P (len, align); + return targetm.move_by_pieces_profitable_p (len, align, + optimize_insn_for_speed_p ()); } /* Generate several move instructions to copy LEN bytes from block FROM to @@ -1180,7 +1172,7 @@ emit_block_move_hints (rtx x, rtx y, rtx size, enum block_op_methods method, set_mem_size (y, INTVAL (size)); } - if (CONST_INT_P (size) && MOVE_BY_PIECES_P (INTVAL (size), align)) + if (CONST_INT_P (size) && can_move_by_pieces (INTVAL (size), align)) move_by_pieces (x, y, INTVAL (size), align, 0); else if (emit_block_move_via_movmem (x, y, size, align, expected_align, expected_size, @@ -4224,7 +4216,7 @@ emit_push_insn (rtx x, enum machine_mode mode, tree type, rtx size, && CONST_INT_P (size) && skip == 0 && MEM_ALIGN (xinner) >= align - && (MOVE_BY_PIECES_P ((unsigned) INTVAL (size) - used, align)) + && can_move_by_pieces ((unsigned) INTVAL (size) - used, align) /* Here we avoid the case of a structure whose weak alignment forces many pushes of a small amount of data, and such small pushes do rounding that causes trouble. */ @@ -7845,7 +7837,7 @@ expand_constructor (tree exp, rtx target, enum expand_modifier modifier, && ! (target != 0 && safe_from_p (target, exp, 1))) || TREE_ADDRESSABLE (exp) || (tree_fits_uhwi_p (TYPE_SIZE_UNIT (type)) - && (! MOVE_BY_PIECES_P + && (! can_move_by_pieces (tree_to_uhwi (TYPE_SIZE_UNIT (type)), TYPE_ALIGN (type))) && ! mostly_zeros_p (exp)))) diff --git a/gcc/target.def b/gcc/target.def index ce11eae..0fd6235 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -3049,6 +3049,36 @@ are the same as to this target hook.", int, (enum machine_mode mode, reg_class_t rclass, bool in), default_memory_move_cost) +DEFHOOK +(move_by_pieces_profitable_p, + "GCC will attempt several strategies when asked to copy between\n\ +two areas of memory, for example when copying a @code{struct}.\n\ +@code{move_by_pieces} implements such a copy as a sequence of\n\ +memory-to-memory move insns. Alternate strategies are to expand the\n\ +@code{movmem} optab, to emit a library call, or to emit a unit-by-unit\n\ +loop-based copy.\n\ +\n\ +This target hook should return true if, for a memory move with a given\n\ +@var{size} and @var{alignment}, using the @code{move_by_pieces}\n\ +infrastructure is expected to result in better code generation.\n\ +Both @var{size} and @var{alignment} are measured in terms of storage\n\ +units.\n\ +\n\ +The parameter @var{speed_p} is true if the code is currently being\n\ +optimized for speed rather than size.\n\ +\n\ +Returning true for higher values of @var{size} can improve code generation\n\ +for speed if the target does not provide an implementation of the\n\ +@code{movmem} standard name, if the @code{movmem} implementation would be\n\ +more expensive than a sequence of move insns, or if the overhead of a\n\ +library call would dominate that of the body of the copy.\n\ +\n\ +Returning true for higher values of @code{size} may also cause an increase\n\ +in code size, for example where the number of insns emitted to perform a\n\ +move would be greater than that of a library call.", + bool, (unsigned int size, unsigned int alignment, bool speed_p), + default_move_by_pieces_profitable_p) + /* True for MODE if the target expects that registers in this mode will be allocated to registers in a small register class. The compiler is allowed to use registers explicitly used in the rtl as spill registers diff --git a/gcc/targhooks.c b/gcc/targhooks.c index 9f15559..ffe7080 100644 --- a/gcc/targhooks.c +++ b/gcc/targhooks.c @@ -1402,6 +1402,41 @@ default_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, #endif } +/* For hooks which use the MOVE_RATIO macro, this gives the legacy default + behaviour. SPEED_P is true if we are compiling for speed. */ + +static unsigned int +get_move_ratio (bool speed_p ATTRIBUTE_UNUSED) +{ + unsigned int move_ratio; +#ifdef MOVE_RATIO + move_ratio = (unsigned int) MOVE_RATIO (speed_p); +#else +#if defined (HAVE_movmemqi) || defined (HAVE_movmemhi) || defined (HAVE_movmemsi) || defined (HAVE_movmemdi) || defined (HAVE_movmemti) + move_ratio = 2; +#else /* No movmem patterns, pick a default. */ + move_ratio = ((speed_p) ? 15 : 3); +#endif +#endif + return move_ratio; +} + +/* The threshold of move insns below which the movmem optab is expanded or a + call to memcpy is emitted. */ + +bool +default_move_by_pieces_profitable_p (unsigned int size ATTRIBUTE_UNUSED, + unsigned int alignment ATTRIBUTE_UNUSED, + bool speed_p ATTRIBUTE_UNUSED) +{ +#ifndef MOVE_BY_PIECES_P + return move_by_pieces_ninsns (size, alignment, MOVE_MAX_PIECES) + < get_move_ratio (speed_p); +#else + return !!(MOVE_BY_PIECES_P (size, alignment)); +#endif +} + bool default_profile_before_prologue (void) { diff --git a/gcc/targhooks.h b/gcc/targhooks.h index 9178c30..93f21f8 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -181,6 +181,9 @@ extern int default_memory_move_cost (enum machine_mode, reg_class_t, bool); extern int default_register_move_cost (enum machine_mode, reg_class_t, reg_class_t); +extern bool default_move_by_pieces_profitable_p (unsigned int, + unsigned int, bool); + extern bool default_profile_before_prologue (void); extern reg_class_t default_preferred_reload_class (rtx, reg_class_t); extern reg_class_t default_preferred_output_reload_class (rtx, reg_class_t);