On Fri, May 15, 2020 at 1:13 AM H.J. Lu <hjl.to...@gmail.com> wrote:
>
> The -mgeneral-regs-only option generates code that uses only the
> general-purpose registers.  It prevents the compiler from using vector
> registers.  But GCC may still generate calls to memcpy, memmove, memset
> and memcmp library functions.  In the GNU C library, these library
> functions are implementated with vector registers, which makes the
> -mgeneral-regs-only option less effective.  The new -mavoid-libcall
> option expands memcpy, memmove and memset into REP MOVSB and REP STOSB
> sequence.  This option can be further enhanced with a cmpmem pattern
> to expand memcmp into REP CMPSB sequence in the future.
>
> Tested on Linux/x86 and Linux/x86-64.  OK for master?

No. Library should provide functions that are appropriate for your
target. There are probably other places in the library that use XMM
registers, so there is no point working around only some specific
functions.

Uros.

> Thanks.
>
> H.J.
> ---
> gcc/
>
>         PR target/95134
>         * config/i386/i386-expand.c (alg_usable_p): Return false for
>         libcall with -mavoid-libcall.
>         (decide_alg): Avoid libcall and rep_prefix_1_byte instead of
>         libcall with -mavoid-libcall.
>         * config/i386/i386.opt: Add -mavoid-libcall.
>         * doc/invoke.texi: Document -mavoid-libcall.
>
> gcc/testsuite/
>
>         PR target/95134
>         * gcc.target/i386/pr95134-1.c: New test.
>         * gcc.target/i386/pr95134-2.c: Likewise.
>         * gcc.target/i386/pr95134-3.c: Likewise.
>         * gcc.target/i386/pr95134-4.c: Likewise.
> ---
>  gcc/config/i386/i386-expand.c             | 15 ++++++++++-----
>  gcc/config/i386/i386.opt                  |  6 +++++-
>  gcc/doc/invoke.texi                       | 10 +++++++++-
>  gcc/testsuite/gcc.target/i386/pr95134-1.c | 18 ++++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr95134-2.c | 18 ++++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr95134-3.c | 18 ++++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr95134-4.c | 11 +++++++++++
>  7 files changed, 89 insertions(+), 7 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr95134-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr95134-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr95134-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr95134-4.c
>
> diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
> index 26531585c5f..b38463bf88c 100644
> --- a/gcc/config/i386/i386-expand.c
> +++ b/gcc/config/i386/i386-expand.c
> @@ -6816,7 +6816,7 @@ alg_usable_p (enum stringop_alg alg, bool memset, bool 
> have_as)
>           || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]))
>         return false;
>      }
> -  return true;
> +  return !flag_avoid_libcall || alg != libcall;
>  }
>
>  /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation.  */
> @@ -6889,7 +6889,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT 
> expected_size,
>       setup.  */
>    else if (expected_size != -1 && expected_size < 4)
>      return loop_1_byte;
> -  else if (expected_size != -1)
> +  else if (expected_size != -1 && !flag_avoid_libcall)
>      {
>        enum stringop_alg alg = libcall;
>        bool alg_noalign = false;
> @@ -6934,6 +6934,9 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT 
> expected_size,
>             }
>         }
>      }
> +
> +  enum stringop_alg alg;
> +
>    /* When asked to inline the call anyway, try to pick meaningful choice.
>       We look for maximal size of block that is faster to copy by hand and
>       take blocks of at most of that size guessing that average size will
> @@ -6945,7 +6948,6 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT 
> expected_size,
>        && (algs->unknown_size == libcall
>           || !alg_usable_p (algs->unknown_size, memset, have_as)))
>      {
> -      enum stringop_alg alg;
>        HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2;
>
>        /* If there aren't any usable algorithms or if recursing already,
> @@ -6967,8 +6969,11 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT 
> expected_size,
>         gcc_assert (alg != libcall);
>        return alg;
>      }
> -  return (alg_usable_p (algs->unknown_size, memset, have_as)
> -         ? algs->unknown_size : libcall);
> +  alg = (alg_usable_p (algs->unknown_size, memset, have_as)
> +        ? algs->unknown_size : libcall);
> +  if (flag_avoid_libcall && alg == libcall)
> +    alg = rep_prefix_1_byte;
> +  return alg;
>  }
>
>  /* Decide on alignment.  We know that the operand is already aligned to ALIGN
> diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
> index c9f7195d423..23b401bd424 100644
> --- a/gcc/config/i386/i386.opt
> +++ b/gcc/config/i386/i386.opt
> @@ -1114,4 +1114,8 @@ Support SERIALIZE built-in functions and code 
> generation.
>
>  mtsxldtrk
>  Target Report Mask(ISA2_TSXLDTRK) Var(ix86_isa_flags2) Save
> -Support TSXLDTRK built-in functions and code generation.
> \ No newline at end of file
> +Support TSXLDTRK built-in functions and code generation.
> +
> +mavoid-libcall
> +Target Report Var(flag_avoid_libcall) Init(0)
> +Avoid generation of libcall.
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 850aeac033d..0d2d70419d5 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -1364,7 +1364,7 @@ See RS/6000 and PowerPC Options.
>  -mstack-protector-guard-reg=@var{reg} @gol
>  -mstack-protector-guard-offset=@var{offset} @gol
>  -mstack-protector-guard-symbol=@var{symbol} @gol
> --mgeneral-regs-only  -mcall-ms2sysv-xlogues @gol
> +-mgeneral-regs-only -mavoid-libcall -mcall-ms2sysv-xlogues @gol
>  -mindirect-branch=@var{choice}  -mfunction-return=@var{choice} @gol
>  -mindirect-branch-register}
>
> @@ -30115,6 +30115,14 @@ Generate code that uses only the general-purpose 
> registers.  This
>  prevents the compiler from using floating-point, vector, mask and bound
>  registers.
>
> +@item -mavoid-libcall
> +@opindex mavoid-libcall
> +Avoid generation of calls to @code{memcpy}, @code{memmove} and
> +@code{memset} library functions.  It can be used together with the
> +option @option{-mgeneral-regs-only} to avoid implicit vector register
> +usage in @code{memcpy}, @code{memmove} and @code{memset} library
> +functions.
> +
>  @item -mindirect-branch=@var{choice}
>  @opindex mindirect-branch
>  Convert indirect call and jump with @var{choice}.  The default is
> diff --git a/gcc/testsuite/gcc.target/i386/pr95134-1.c 
> b/gcc/testsuite/gcc.target/i386/pr95134-1.c
> new file mode 100644
> index 00000000000..8ffa680559d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr95134-1.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mgeneral-regs-only -mavoid-libcall -mtune=skylake" } */
> +
> +struct foo
> +{
> +  char array[513];
> +};
> +
> +extern struct foo x;
> +
> +int
> +func (void)
> +{
> +  __builtin_memset (&x, 0, sizeof (x));
> +  return 0;
> +}
> +
> +/* { dg-final { scan-assembler-not "call\[\\t \]*_?memset" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr95134-2.c 
> b/gcc/testsuite/gcc.target/i386/pr95134-2.c
> new file mode 100644
> index 00000000000..7c6c42a736d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr95134-2.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile { target ia32 } } */
> +/* { dg-options "-O2 -mgeneral-regs-only -mavoid-libcall -mtune=pentium" } */
> +
> +struct foo
> +{
> +  char array[257];
> +};
> +
> +extern struct foo x;
> +
> +int
> +func (struct foo i)
> +{
> +  x = i;
> +  return 0;
> +}
> +
> +/* { dg-final { scan-assembler-not "call\[\\t \]*_?memcpy" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr95134-3.c 
> b/gcc/testsuite/gcc.target/i386/pr95134-3.c
> new file mode 100644
> index 00000000000..4e4428cd0ae
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr95134-3.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile { target ia32 } } */
> +/* { dg-options "-O2 -mgeneral-regs-only -mavoid-libcall -mtune=pentium" } */
> +
> +struct foo
> +{
> +  char array[257];
> +};
> +
> +extern struct foo x;
> +
> +int
> +func (struct foo i)
> +{
> +  __builtin_memcpy (&x, &i, sizeof (x));
> +  return 0;
> +}
> +
> +/* { dg-final { scan-assembler-not "call\[\\t \]*_?memcpy" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr95134-4.c 
> b/gcc/testsuite/gcc.target/i386/pr95134-4.c
> new file mode 100644
> index 00000000000..d1bd8fbf4c1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr95134-4.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mgeneral-regs-only -mavoid-libcall" } */
> +
> +int
> +func (void *d, void *s, unsigned int l)
> +{
> +  __builtin_memcpy (d, s, l);
> +  return 0;
> +}
> +
> +/* { dg-final { scan-assembler-not "call\[\\t \]*_?memcpy" } } */
> --
> 2.26.2
>

Reply via email to