"H.J. Lu via Gcc-patches" <gcc-patches@gcc.gnu.org> writes: > +/* Callback routine for store_by_pieces. Read GET_MODE_BITSIZE (MODE) > + bytes from constant string DATA + OFFSET and return it as target > + constant. If PREV isn't nullptr, it has the RTL info from the > + previous iteration. */ > + > +rtx > +builtin_memset_read_str (void *data, void *prev, > + HOST_WIDE_INT offset ATTRIBUTE_UNUSED, > + fixed_size_mode mode) > +{ > const char *c = (const char *) data; > - char *p = XALLOCAVEC (char, GET_MODE_SIZE (mode)); > + unsigned int size = GET_MODE_SIZE (mode); > > - memset (p, *c, GET_MODE_SIZE (mode)); > + rtx target = gen_memset_value_from_prev ((by_pieces_prev *) prev, > + mode); > + if (target != nullptr) > + return target; > + rtx src = gen_int_mode (*c, QImode); > + target = gen_memset_broadcast (src, mode); > + if (target != nullptr) > + return target; > > - return c_readstr (p, mode); > + char *p = XALLOCAVEC (char, size); > + > + memset (p, *c, size); > + > + /* Vector mode should be handled by gen_memset_broadcast above. */
Nit: s/Vector mode should/Vector modes should/ > + return c_readstr (p, as_a <scalar_int_mode> (mode)); > } > > /* Callback routine for store_by_pieces. Return the RTL of a register > @@ -6788,33 +6910,29 @@ builtin_memset_read_str (void *data, void *prevp, > nullptr, it has the RTL info from the previous iteration. */ > > static rtx > -builtin_memset_gen_str (void *data, void *prevp, > +builtin_memset_gen_str (void *data, void *prev, > HOST_WIDE_INT offset ATTRIBUTE_UNUSED, > - scalar_int_mode mode) > + fixed_size_mode mode) > { > rtx target, coeff; > size_t size; > char *p; > > - by_pieces_prev *prev = (by_pieces_prev *) prevp; > - if (prev != nullptr && prev->data != nullptr) > - { > - /* Use the previous data in the same mode. */ > - if (prev->mode == mode) > - return prev->data; > - > - target = simplify_gen_subreg (mode, prev->data, prev->mode, 0); > - if (target != nullptr) > - return target; > - } > - > size = GET_MODE_SIZE (mode); > if (size == 1) > return (rtx) data; > > + target = gen_memset_value_from_prev ((by_pieces_prev *) prev, mode); > + if (target != nullptr) > + return target; > + > + target = gen_memset_broadcast ((rtx) data, mode); > + if (target != nullptr) > + return target; > + > p = XALLOCAVEC (char, size); > memset (p, 1, size); > - coeff = c_readstr (p, mode); > + coeff = c_readstr (p, as_a <scalar_int_mode> (mode)); The same comment would be useful here. > > target = convert_to_mode (mode, (rtx) data, 1); > target = expand_mult (mode, target, coeff, NULL_RTX, 1); > @@ -6918,7 +7036,7 @@ try_store_by_multiple_pieces (rtx to, rtx len, unsigned > int ctz_len, > &valc, align, true)) > return false; > > - rtx (*constfun) (void *, void *, HOST_WIDE_INT, scalar_int_mode); > + by_pieces_constfn constfun; > void *constfundata; > if (val) > { > diff --git a/gcc/builtins.h b/gcc/builtins.h > index a64ece3f1cd..4b2ad766c61 100644 > --- a/gcc/builtins.h > +++ b/gcc/builtins.h > @@ -111,9 +111,9 @@ extern tree mathfn_built_in (tree, enum built_in_function > fn); > extern tree mathfn_built_in (tree, combined_fn); > extern tree mathfn_built_in_type (combined_fn); > extern rtx builtin_strncpy_read_str (void *, void *, HOST_WIDE_INT, > - scalar_int_mode); > + fixed_size_mode); > extern rtx builtin_memset_read_str (void *, void *, HOST_WIDE_INT, > - scalar_int_mode); > + fixed_size_mode); > extern rtx expand_builtin_saveregs (void); > extern tree std_build_builtin_va_list (void); > extern tree std_fn_abi_va_list (tree); > diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi > index c8f4abe3e41..14d387750a8 100644 > --- a/gcc/doc/tm.texi > +++ b/gcc/doc/tm.texi > @@ -12149,6 +12149,13 @@ This function prepares to emit a conditional > comparison within a sequence > @var{bit_code} is @code{AND} or @code{IOR}, which is the op on the compares. > @end deftypefn > > +@deftypefn {Target Hook} rtx TARGET_GEN_MEMSET_SCRATCH_RTX (machine_mode > @var{mode}) > +This hook should return an rtx for scratch register in @var{mode} to s/for scratch register/for a scratch register/ > +be used by memset broadcast. The backend can use a hard scratch register How about: s/be used by memset broadcast/be used when expanding memset calls/ since the hook is (IMO rightly) not restricted to vector modes. > +to avoid stack realignment when expanding memset. The default is > +@code{gen_reg_rtx}. > +@end deftypefn > + > @deftypefn {Target Hook} unsigned TARGET_LOOP_UNROLL_ADJUST (unsigned > @var{nunroll}, class loop *@var{loop}) > This target hook returns a new value for the number of times @var{loop} > should be unrolled. The parameter @var{nunroll} is the number of times > […] > @@ -1446,7 +1511,10 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len, > max_size = STORE_MAX_PIECES + 1; > while (max_size > 1 && l > 0) > { > - scalar_int_mode mode = widest_int_mode_for_size (max_size); > + /* Since this can be called before virtual registers are ready > + to use, avoid QI vector mode here. */ > + fixed_size_mode mode > + = widest_fixed_size_mode_for_size (max_size, false); I think I might have asked this before, sorry, but: when is that true and why does it matter? Apart from that final question, the patch looks good with the changes above (no need to repost). I'd just like to know about the reason for the “false” argument above before the patch goes in. Thanks, Richard