https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89226

Jakub Jelinek <jakub at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|UNCONFIRMED                 |NEW
   Last reconfirmed|                            |2019-02-06
                 CC|                            |hjl.tools at gmail dot com,
                   |                            |jakub at gcc dot gnu.org,
                   |                            |uros at gcc dot gnu.org
     Ever confirmed|0                           |1

--- Comment #3 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
Seems most of the *by_pieces code actually uses widest_int_mode_for_size which
already handles even the wider modes as long as they have a mov<mode>
instruction.  With this completely untested patch I get roughly the same code
with -mavx and better with -mavx512f, just as a drawback for some reason the
functions have frame pointer (dunno if that is caused by the OI/XImode, while
vector modes can be handled or what else).  Tried memset with zero too, but
haven't tried other memsets (those could be problematic already) or
comparisons.

Thoughts on this?  Not a GCC9 material though.  Perhaps it should also depend
on the selected preferred vector width, so that we don't e.g. enable AVX512F if
that is undesirable from power consumption POV.

--- gcc/config/i386/i386.h.jj   2019-01-01 12:37:32.988715207 +0100
+++ gcc/config/i386/i386.h      2019-02-06 21:13:01.047765193 +0100
@@ -1886,7 +1886,9 @@ typedef struct ix86_args {
     && TARGET_SSE2 \
     && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL \
     && TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \
-   ? GET_MODE_SIZE (TImode) : UNITS_PER_WORD)
+   ? (TARGET_AVX512F ? GET_MODE_SIZE (XImode) \
+      : TARGET_AVX ? GET_MODE_SIZE (OImode) \
+      : GET_MODE_SIZE (TImode)) : UNITS_PER_WORD)

 /* If a memory-to-memory move would take MOVE_RATIO or more simple
    move-instruction pairs, we will do a movmem or libcall instead.
--- gcc/expr.c.jj       2019-01-22 10:09:47.304587822 +0100
+++ gcc/expr.c  2019-02-06 21:23:18.041603623 +0100
@@ -713,14 +713,31 @@ convert_modes (machine_mode mode, machin
   return temp;
 }


+/* Return the widest integer mode that is narrower than SIZE bytes.  */
+
+static scalar_int_mode
+widest_int_mode_for_size (unsigned int size)
+{
+  scalar_int_mode result = NARROWEST_INT_MODE;
+
+  gcc_checking_assert (size > 1);
+
+  opt_scalar_int_mode tmode;
+  FOR_EACH_MODE_IN_CLASS (tmode, MODE_INT)
+    if (GET_MODE_SIZE (tmode.require ()) < size)
+      result = tmode.require ();
+
+  return result;
+}
+
 /* Return the largest alignment we can use for doing a move (or store)
    of MAX_PIECES.  ALIGN is the largest alignment we could use.  */

 static unsigned int
 alignment_for_piecewise_move (unsigned int max_pieces, unsigned int align)
 {
-  scalar_int_mode tmode
-    = int_mode_for_size (max_pieces * BITS_PER_UNIT, 1).require ();
+  scalar_int_mode tmode = widest_int_mode_for_size (max_pieces + 1);
+  gcc_assert (GET_MODE_SIZE (tmode) == max_pieces);

   if (align >= GET_MODE_ALIGNMENT (tmode))
     align = GET_MODE_ALIGNMENT (tmode);
@@ -743,23 +760,6 @@ alignment_for_piecewise_move (unsigned i
   return align;
 }

-/* Return the widest integer mode that is narrower than SIZE bytes.  */
-
-static scalar_int_mode
-widest_int_mode_for_size (unsigned int size)
-{
-  scalar_int_mode result = NARROWEST_INT_MODE;
-
-  gcc_checking_assert (size > 1);
-
-  opt_scalar_int_mode tmode;
-  FOR_EACH_MODE_IN_CLASS (tmode, MODE_INT)
-    if (GET_MODE_SIZE (tmode.require ()) < size)
-      result = tmode.require ();
-
-  return result;
-}
-
 /* Determine whether an operation OP on LEN bytes with alignment ALIGN can
    and should be performed piecewise.  */

Reply via email to