On Fri, May 15, 2020 at 8:27 AM Uros Bizjak via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> On Fri, May 15, 2020 at 1:13 AM H.J. Lu <hjl.to...@gmail.com> wrote:
> >
> > The -mgeneral-regs-only option generates code that uses only the
> > general-purpose registers.  It prevents the compiler from using vector
> > registers.  But GCC may still generate calls to memcpy, memmove, memset
> > and memcmp library functions.  In the GNU C library, these library
> > functions are implementated with vector registers, which makes the
> > -mgeneral-regs-only option less effective.  The new -mavoid-libcall
> > option expands memcpy, memmove and memset into REP MOVSB and REP STOSB
> > sequence.  This option can be further enhanced with a cmpmem pattern
> > to expand memcmp into REP CMPSB sequence in the future.
> >
> > Tested on Linux/x86 and Linux/x86-64.  OK for master?
>
> No. Library should provide functions that are appropriate for your
> target. There are probably other places in the library that use XMM
> registers, so there is no point working around only some specific
> functions.

For those specific functions -minline-all-stringops should also work, no?

Richard.

> Uros.
>
> > Thanks.
> >
> > H.J.
> > ---
> > gcc/
> >
> >         PR target/95134
> >         * config/i386/i386-expand.c (alg_usable_p): Return false for
> >         libcall with -mavoid-libcall.
> >         (decide_alg): Avoid libcall and rep_prefix_1_byte instead of
> >         libcall with -mavoid-libcall.
> >         * config/i386/i386.opt: Add -mavoid-libcall.
> >         * doc/invoke.texi: Document -mavoid-libcall.
> >
> > gcc/testsuite/
> >
> >         PR target/95134
> >         * gcc.target/i386/pr95134-1.c: New test.
> >         * gcc.target/i386/pr95134-2.c: Likewise.
> >         * gcc.target/i386/pr95134-3.c: Likewise.
> >         * gcc.target/i386/pr95134-4.c: Likewise.
> > ---
> >  gcc/config/i386/i386-expand.c             | 15 ++++++++++-----
> >  gcc/config/i386/i386.opt                  |  6 +++++-
> >  gcc/doc/invoke.texi                       | 10 +++++++++-
> >  gcc/testsuite/gcc.target/i386/pr95134-1.c | 18 ++++++++++++++++++
> >  gcc/testsuite/gcc.target/i386/pr95134-2.c | 18 ++++++++++++++++++
> >  gcc/testsuite/gcc.target/i386/pr95134-3.c | 18 ++++++++++++++++++
> >  gcc/testsuite/gcc.target/i386/pr95134-4.c | 11 +++++++++++
> >  7 files changed, 89 insertions(+), 7 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr95134-1.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr95134-2.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr95134-3.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr95134-4.c
> >
> > diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
> > index 26531585c5f..b38463bf88c 100644
> > --- a/gcc/config/i386/i386-expand.c
> > +++ b/gcc/config/i386/i386-expand.c
> > @@ -6816,7 +6816,7 @@ alg_usable_p (enum stringop_alg alg, bool memset, 
> > bool have_as)
> >           || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]))
> >         return false;
> >      }
> > -  return true;
> > +  return !flag_avoid_libcall || alg != libcall;
> >  }
> >
> >  /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation.  
> > */
> > @@ -6889,7 +6889,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT 
> > expected_size,
> >       setup.  */
> >    else if (expected_size != -1 && expected_size < 4)
> >      return loop_1_byte;
> > -  else if (expected_size != -1)
> > +  else if (expected_size != -1 && !flag_avoid_libcall)
> >      {
> >        enum stringop_alg alg = libcall;
> >        bool alg_noalign = false;
> > @@ -6934,6 +6934,9 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT 
> > expected_size,
> >             }
> >         }
> >      }
> > +
> > +  enum stringop_alg alg;
> > +
> >    /* When asked to inline the call anyway, try to pick meaningful choice.
> >       We look for maximal size of block that is faster to copy by hand and
> >       take blocks of at most of that size guessing that average size will
> > @@ -6945,7 +6948,6 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT 
> > expected_size,
> >        && (algs->unknown_size == libcall
> >           || !alg_usable_p (algs->unknown_size, memset, have_as)))
> >      {
> > -      enum stringop_alg alg;
> >        HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2;
> >
> >        /* If there aren't any usable algorithms or if recursing already,
> > @@ -6967,8 +6969,11 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT 
> > expected_size,
> >         gcc_assert (alg != libcall);
> >        return alg;
> >      }
> > -  return (alg_usable_p (algs->unknown_size, memset, have_as)
> > -         ? algs->unknown_size : libcall);
> > +  alg = (alg_usable_p (algs->unknown_size, memset, have_as)
> > +        ? algs->unknown_size : libcall);
> > +  if (flag_avoid_libcall && alg == libcall)
> > +    alg = rep_prefix_1_byte;
> > +  return alg;
> >  }
> >
> >  /* Decide on alignment.  We know that the operand is already aligned to 
> > ALIGN
> > diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
> > index c9f7195d423..23b401bd424 100644
> > --- a/gcc/config/i386/i386.opt
> > +++ b/gcc/config/i386/i386.opt
> > @@ -1114,4 +1114,8 @@ Support SERIALIZE built-in functions and code 
> > generation.
> >
> >  mtsxldtrk
> >  Target Report Mask(ISA2_TSXLDTRK) Var(ix86_isa_flags2) Save
> > -Support TSXLDTRK built-in functions and code generation.
> > \ No newline at end of file
> > +Support TSXLDTRK built-in functions and code generation.
> > +
> > +mavoid-libcall
> > +Target Report Var(flag_avoid_libcall) Init(0)
> > +Avoid generation of libcall.
> > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> > index 850aeac033d..0d2d70419d5 100644
> > --- a/gcc/doc/invoke.texi
> > +++ b/gcc/doc/invoke.texi
> > @@ -1364,7 +1364,7 @@ See RS/6000 and PowerPC Options.
> >  -mstack-protector-guard-reg=@var{reg} @gol
> >  -mstack-protector-guard-offset=@var{offset} @gol
> >  -mstack-protector-guard-symbol=@var{symbol} @gol
> > --mgeneral-regs-only  -mcall-ms2sysv-xlogues @gol
> > +-mgeneral-regs-only -mavoid-libcall -mcall-ms2sysv-xlogues @gol
> >  -mindirect-branch=@var{choice}  -mfunction-return=@var{choice} @gol
> >  -mindirect-branch-register}
> >
> > @@ -30115,6 +30115,14 @@ Generate code that uses only the general-purpose 
> > registers.  This
> >  prevents the compiler from using floating-point, vector, mask and bound
> >  registers.
> >
> > +@item -mavoid-libcall
> > +@opindex mavoid-libcall
> > +Avoid generation of calls to @code{memcpy}, @code{memmove} and
> > +@code{memset} library functions.  It can be used together with the
> > +option @option{-mgeneral-regs-only} to avoid implicit vector register
> > +usage in @code{memcpy}, @code{memmove} and @code{memset} library
> > +functions.
> > +
> >  @item -mindirect-branch=@var{choice}
> >  @opindex mindirect-branch
> >  Convert indirect call and jump with @var{choice}.  The default is
> > diff --git a/gcc/testsuite/gcc.target/i386/pr95134-1.c 
> > b/gcc/testsuite/gcc.target/i386/pr95134-1.c
> > new file mode 100644
> > index 00000000000..8ffa680559d
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr95134-1.c
> > @@ -0,0 +1,18 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -mgeneral-regs-only -mavoid-libcall -mtune=skylake" } 
> > */
> > +
> > +struct foo
> > +{
> > +  char array[513];
> > +};
> > +
> > +extern struct foo x;
> > +
> > +int
> > +func (void)
> > +{
> > +  __builtin_memset (&x, 0, sizeof (x));
> > +  return 0;
> > +}
> > +
> > +/* { dg-final { scan-assembler-not "call\[\\t \]*_?memset" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr95134-2.c 
> > b/gcc/testsuite/gcc.target/i386/pr95134-2.c
> > new file mode 100644
> > index 00000000000..7c6c42a736d
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr95134-2.c
> > @@ -0,0 +1,18 @@
> > +/* { dg-do compile { target ia32 } } */
> > +/* { dg-options "-O2 -mgeneral-regs-only -mavoid-libcall -mtune=pentium" } 
> > */
> > +
> > +struct foo
> > +{
> > +  char array[257];
> > +};
> > +
> > +extern struct foo x;
> > +
> > +int
> > +func (struct foo i)
> > +{
> > +  x = i;
> > +  return 0;
> > +}
> > +
> > +/* { dg-final { scan-assembler-not "call\[\\t \]*_?memcpy" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr95134-3.c 
> > b/gcc/testsuite/gcc.target/i386/pr95134-3.c
> > new file mode 100644
> > index 00000000000..4e4428cd0ae
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr95134-3.c
> > @@ -0,0 +1,18 @@
> > +/* { dg-do compile { target ia32 } } */
> > +/* { dg-options "-O2 -mgeneral-regs-only -mavoid-libcall -mtune=pentium" } 
> > */
> > +
> > +struct foo
> > +{
> > +  char array[257];
> > +};
> > +
> > +extern struct foo x;
> > +
> > +int
> > +func (struct foo i)
> > +{
> > +  __builtin_memcpy (&x, &i, sizeof (x));
> > +  return 0;
> > +}
> > +
> > +/* { dg-final { scan-assembler-not "call\[\\t \]*_?memcpy" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr95134-4.c 
> > b/gcc/testsuite/gcc.target/i386/pr95134-4.c
> > new file mode 100644
> > index 00000000000..d1bd8fbf4c1
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr95134-4.c
> > @@ -0,0 +1,11 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -mgeneral-regs-only -mavoid-libcall" } */
> > +
> > +int
> > +func (void *d, void *s, unsigned int l)
> > +{
> > +  __builtin_memcpy (d, s, l);
> > +  return 0;
> > +}
> > +
> > +/* { dg-final { scan-assembler-not "call\[\\t \]*_?memcpy" } } */
> > --
> > 2.26.2
> >

Reply via email to