On Tue, Aug 5, 2025 at 9:22 AM Uros Bizjak <ubiz...@gmail.com> wrote:
>
> On Tue, Aug 5, 2025 at 3:32 PM H.J. Lu <hjl.to...@gmail.com> wrote:
> >
> > commit 050b1708ea532ea4840e97d85fad4ca63d4cd631
> > Author: H.J. Lu <hjl.to...@gmail.com>
> > Date:   Thu Jun 19 05:03:48 2025 +0800
> >
> >     x86: Get the widest vector mode from MOVE_MAX
> >
> > gets the widest vector mode from MOVE_MAX.  But for memset, it should
> > use STORE_MAX_PIECES.
> >
> > gcc/
> >
> >         PR target/121410
> >         * config/i386/i386-expand.cc (ix86_expand_set_or_cpymem): Use
> >         STORE_MAX_PIECES to get the widest vector mode in vector loop
> >         for memset.
> >
> > gcc/testsuite/
> >
> >         PR target/121410
> >         * gcc.target/i386/pr121410.c: New test.
>
> OK with a small cosmetic change suggested below.
>
> Thanks,
> Uros.
>
> >
> > Signed-off-by: H.J. Lu <hjl.to...@gmail.com>
> > ---
> >  gcc/config/i386/i386-expand.cc           |  5 +++--
> >  gcc/testsuite/gcc.target/i386/pr121410.c | 11 +++++++++++
> >  2 files changed, 14 insertions(+), 2 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr121410.c
> >
> > diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> > index 0e5af5319ff..a5527081d9e 100644
> > --- a/gcc/config/i386/i386-expand.cc
> > +++ b/gcc/config/i386/i386-expand.cc
> > @@ -9574,8 +9574,9 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx 
> > count_exp, rtx val_exp,
> >      case vector_loop:
> >        need_zero_guard = true;
> >        unroll_factor = 4;
> > -      /* Get the vector mode to move MOVE_MAX bytes.  */
> > -      nunits = MOVE_MAX / GET_MODE_SIZE (word_mode);
> > +      /* Get the vector mode to move STORE_MAX_PIECES/MOVE_MAX bytes.  */
> > +      nunits = ((issetmem ? STORE_MAX_PIECES : MOVE_MAX)
>
> Please put the number of bytes into a temporary.

This is the patch I am checking in:

      /* Get the vector mode to move STORE_MAX_PIECES/MOVE_MAX bytes.  */
      nunits = issetmem ? STORE_MAX_PIECES : MOVE_MAX;
      nunits /= GET_MODE_SIZE (word_mode);

> > +               / GET_MODE_SIZE (word_mode));
> >        if (nunits > 1)
> >         {
> >           move_mode = mode_for_vector (word_mode, nunits).require ();
> > diff --git a/gcc/testsuite/gcc.target/i386/pr121410.c 
> > b/gcc/testsuite/gcc.target/i386/pr121410.c
> > new file mode 100644
> > index 00000000000..04bab91e1b8
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr121410.c
> > @@ -0,0 +1,11 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -march=x86-64 -mavx512f -mstore-max=128" } */
> > +
> > +extern unsigned _BitInt(3719) a;
> > +extern _BitInt(465) g;
> > +void
> > +foo(void)
> > +{
> > +  _BitInt(465) b = a >> 1860;
> > +  g = b + b;
> > +}
> > --
> > 2.50.1
> >



-- 
H.J.
From 0f91925f628f36292a94688fdb68a343ae43af30 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.to...@gmail.com>
Date: Tue, 5 Aug 2025 06:27:15 -0700
Subject: [PATCH v2] x86: Get the widest vector mode from STORE_MAX_PIECES for
 memset

commit 050b1708ea532ea4840e97d85fad4ca63d4cd631
Author: H.J. Lu <hjl.to...@gmail.com>
Date:   Thu Jun 19 05:03:48 2025 +0800

    x86: Get the widest vector mode from MOVE_MAX

gets the widest vector mode from MOVE_MAX.  But for memset, it should
use STORE_MAX_PIECES.

gcc/

	PR target/121410
	* config/i386/i386-expand.cc (ix86_expand_set_or_cpymem): Use
	STORE_MAX_PIECES to get the widest vector mode in vector loop
	for memset.

gcc/testsuite/

	PR target/121410
	* gcc.target/i386/pr121410.c: New test.

Signed-off-by: H.J. Lu <hjl.to...@gmail.com>
---
 gcc/config/i386/i386-expand.cc           |  5 +++--
 gcc/testsuite/gcc.target/i386/pr121410.c | 11 +++++++++++
 2 files changed, 14 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr121410.c

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 0e5af5319ff..911dd8e1e65 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -9574,8 +9574,9 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
     case vector_loop:
       need_zero_guard = true;
       unroll_factor = 4;
-      /* Get the vector mode to move MOVE_MAX bytes.  */
-      nunits = MOVE_MAX / GET_MODE_SIZE (word_mode);
+      /* Get the vector mode to move STORE_MAX_PIECES/MOVE_MAX bytes.  */
+      nunits = issetmem ? STORE_MAX_PIECES : MOVE_MAX;
+      nunits /= GET_MODE_SIZE (word_mode);
       if (nunits > 1)
 	{
 	  move_mode = mode_for_vector (word_mode, nunits).require ();
diff --git a/gcc/testsuite/gcc.target/i386/pr121410.c b/gcc/testsuite/gcc.target/i386/pr121410.c
new file mode 100644
index 00000000000..04bab91e1b8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121410.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64 -mavx512f -mstore-max=128" } */
+
+extern unsigned _BitInt(3719) a;
+extern _BitInt(465) g;
+void
+foo(void)
+{
+  _BitInt(465) b = a >> 1860;
+  g = b + b;
+}
-- 
2.50.1

Reply via email to