On Mon, Jul 26, 2021 at 11:42 AM Richard Sandiford
<richard.sandif...@arm.com> wrote:
>
> "H.J. Lu via Gcc-patches" <gcc-patches@gcc.gnu.org> writes:
> > +/* Callback routine for store_by_pieces.  Read GET_MODE_BITSIZE (MODE)
> > +   bytes from constant string DATA + OFFSET and return it as target
> > +   constant.  If PREV isn't nullptr, it has the RTL info from the
> > +   previous iteration.  */
> > +
> > +rtx
> > +builtin_memset_read_str (void *data, void *prev,
> > +                      HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
> > +                      fixed_size_mode mode)
> > +{
> >    const char *c = (const char *) data;
> > -  char *p = XALLOCAVEC (char, GET_MODE_SIZE (mode));
> > +  unsigned int size = GET_MODE_SIZE (mode);
> >
> > -  memset (p, *c, GET_MODE_SIZE (mode));
> > +  rtx target = gen_memset_value_from_prev ((by_pieces_prev *) prev,
> > +                                        mode);
> > +  if (target != nullptr)
> > +    return target;
> > +  rtx src = gen_int_mode (*c, QImode);
> > +  target = gen_memset_broadcast (src, mode);
> > +  if (target != nullptr)
> > +    return target;
> >
> > -  return c_readstr (p, mode);
> > +  char *p = XALLOCAVEC (char, size);
> > +
> > +  memset (p, *c, size);
> > +
> > +  /* Vector mode should be handled by gen_memset_broadcast above.  */
>
> Nit: s/Vector mode should/Vector modes should/

Fixed.

> > +  return c_readstr (p, as_a <scalar_int_mode> (mode));
> >  }
> >
> >  /* Callback routine for store_by_pieces.  Return the RTL of a register
> > @@ -6788,33 +6910,29 @@ builtin_memset_read_str (void *data, void *prevp,
> >     nullptr, it has the RTL info from the previous iteration.  */
> >
> >  static rtx
> > -builtin_memset_gen_str (void *data, void *prevp,
> > +builtin_memset_gen_str (void *data, void *prev,
> >                       HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
> > -                     scalar_int_mode mode)
> > +                     fixed_size_mode mode)
> >  {
> >    rtx target, coeff;
> >    size_t size;
> >    char *p;
> >
> > -  by_pieces_prev *prev = (by_pieces_prev *) prevp;
> > -  if (prev != nullptr && prev->data != nullptr)
> > -    {
> > -      /* Use the previous data in the same mode.  */
> > -      if (prev->mode == mode)
> > -     return prev->data;
> > -
> > -      target = simplify_gen_subreg (mode, prev->data, prev->mode, 0);
> > -      if (target != nullptr)
> > -     return target;
> > -    }
> > -
> >    size = GET_MODE_SIZE (mode);
> >    if (size == 1)
> >      return (rtx) data;
> >
> > +  target = gen_memset_value_from_prev ((by_pieces_prev *) prev, mode);
> > +  if (target != nullptr)
> > +    return target;
> > +
> > +  target = gen_memset_broadcast ((rtx) data, mode);
> > +  if (target != nullptr)
> > +    return target;
> > +
> >    p = XALLOCAVEC (char, size);
> >    memset (p, 1, size);
> > -  coeff = c_readstr (p, mode);
> > +  coeff = c_readstr (p, as_a <scalar_int_mode> (mode));
>
> The same comment would be useful here.

I added the same comment here.

> >
> >    target = convert_to_mode (mode, (rtx) data, 1);
> >    target = expand_mult (mode, target, coeff, NULL_RTX, 1);
> > @@ -6918,7 +7036,7 @@ try_store_by_multiple_pieces (rtx to, rtx len, 
> > unsigned int ctz_len,
> >                           &valc, align, true))
> >      return false;
> >
> > -  rtx (*constfun) (void *, void *, HOST_WIDE_INT, scalar_int_mode);
> > +  by_pieces_constfn constfun;
> >    void *constfundata;
> >    if (val)
> >      {
> > diff --git a/gcc/builtins.h b/gcc/builtins.h
> > index a64ece3f1cd..4b2ad766c61 100644
> > --- a/gcc/builtins.h
> > +++ b/gcc/builtins.h
> > @@ -111,9 +111,9 @@ extern tree mathfn_built_in (tree, enum 
> > built_in_function fn);
> >  extern tree mathfn_built_in (tree, combined_fn);
> >  extern tree mathfn_built_in_type (combined_fn);
> >  extern rtx builtin_strncpy_read_str (void *, void *, HOST_WIDE_INT,
> > -                                  scalar_int_mode);
> > +                                  fixed_size_mode);
> >  extern rtx builtin_memset_read_str (void *, void *, HOST_WIDE_INT,
> > -                                 scalar_int_mode);
> > +                                 fixed_size_mode);
> >  extern rtx expand_builtin_saveregs (void);
> >  extern tree std_build_builtin_va_list (void);
> >  extern tree std_fn_abi_va_list (tree);
> > diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
> > index c8f4abe3e41..14d387750a8 100644
> > --- a/gcc/doc/tm.texi
> > +++ b/gcc/doc/tm.texi
> > @@ -12149,6 +12149,13 @@ This function prepares to emit a conditional 
> > comparison within a sequence
> >   @var{bit_code} is @code{AND} or @code{IOR}, which is the op on the 
> > compares.
> >  @end deftypefn
> >
> > +@deftypefn {Target Hook} rtx TARGET_GEN_MEMSET_SCRATCH_RTX (machine_mode 
> > @var{mode})
> > +This hook should return an rtx for scratch register in @var{mode} to
>
> s/for scratch register/for a scratch register/

Fixed.

> > +be used by memset broadcast.  The backend can use a hard scratch register
>
> How about: s/be used by memset broadcast/be used when expanding memset calls/
> since the hook is (IMO rightly) not restricted to vector modes.

Fixed.

> > +to avoid stack realignment when expanding memset.  The default is
> > +@code{gen_reg_rtx}.
> > +@end deftypefn
> > +
> >  @deftypefn {Target Hook} unsigned TARGET_LOOP_UNROLL_ADJUST (unsigned 
> > @var{nunroll}, class loop *@var{loop})
> >  This target hook returns a new value for the number of times @var{loop}
> >  should be unrolled. The parameter @var{nunroll} is the number of times
> > […]
> > @@ -1446,7 +1511,10 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len,
> >        max_size = STORE_MAX_PIECES + 1;
> >        while (max_size > 1 && l > 0)
> >       {
> > -       scalar_int_mode mode = widest_int_mode_for_size (max_size);
> > +       /* Since this can be called before virtual registers are ready
> > +          to use, avoid QI vector mode here.  */
> > +       fixed_size_mode mode
> > +         = widest_fixed_size_mode_for_size (max_size, false);
>
> I think I might have asked this before, sorry, but: when is that true
> and why does it matter?

can_store_by_pieces may be called:

value-prof.c:      if (!can_store_by_pieces (val, builtin_memset_read_str,
value-prof.c:      if (!can_store_by_pieces (val, builtin_memset_read_str,

before virtual registers can be used.   When true is passed to
widest_fixed_size_mode_for_size,  virtual registers may be used
to expand memset to broadcast, which leads to ICE.   Since for the
purpose of can_store_by_pieces, we don't need to expand memset
to broadcast and pass false here can avoid ICE.

> Apart from that final question, the patch looks good with the changes
> above (no need to repost).  I'd just like to know about the reason for
> the “false” argument above before the patch goes in.
>
> Thanks,
> Richard

Thanks.


-- 
H.J.

Reply via email to