Hi!

On the following testcase, we've regressed in bar since 8.x, in 8.x
store merging came up with mem = 64-bit constant, but starting with the
change to transform {0,1,2,3,4,5,6,7} char initializers into STRING_CSTs,
we don't do that anymore.  The mem = STRING_CST expansion can do that,
but only if there are no embedded zeros.  The following patch improves
it even for embedded zeros, by using a new callback for the
can_store_by_pieces/store_by_pieces calls which knows how to handle
STRING_CST.  We don't need strlen in that case, can use TREE_STRING_CST
instead.  Additionally, if the STRING_CST is slightly shorter than the
destination region, it might generate better code by trying to
store_by_pieces it all in one go (bytes from STRING_CST until the last one,
followed by artificially added zeros) and only if that doesn't seem to be
beneficial (e.g. very small STRING_CST followed by kilobytes of zeros)
goes for the store_by_pieces of STRING_CST (rounded up to next
STORE_MAX_PIECES) followed by a clear_storage.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2019-02-16  Jakub Jelinek  <ja...@redhat.com>

        PR rtl-optimization/66152
        * builtins.h (c_readstr): Declare.
        * builtins.c (c_readstr): Remove forward declaration.  Add
        null_terminated_p argument, if false, read all bytes from the
        string instead of stopping after '\0'.
        * expr.c (string_cst_read_str): New function.
        (store_expr): Use string_cst_read_str instead of
        builtin_strncpy_read_str.  Try to store by pieces the whole
        exp_len first, and only if that fails, split it up into
        store by pieces followed by clear_storage.  Formatting fix.

        * gcc.target/i386/pr66152.c: New test.

--- gcc/builtins.h.jj   2019-02-14 08:06:37.878546571 +0100
+++ gcc/builtins.h      2019-02-15 11:33:50.208180171 +0100
@@ -103,6 +103,7 @@ struct c_strlen_data
 };
 
 extern tree c_strlen (tree, int, c_strlen_data * = NULL, unsigned = 1);
+extern rtx c_readstr (const char *, scalar_int_mode, bool = true);
 extern void expand_builtin_setjmp_setup (rtx, rtx);
 extern void expand_builtin_setjmp_receiver (rtx);
 extern void expand_builtin_update_setjmp_buf (rtx);
--- gcc/builtins.c.jj   2019-02-11 20:58:48.509965578 +0100
+++ gcc/builtins.c      2019-02-15 11:37:00.046029652 +0100
@@ -95,7 +95,6 @@ builtin_info_type builtin_info[(int)END_
 /* Non-zero if __builtin_constant_p should be folded right away.  */
 bool force_folding_builtin_constant_p;
 
-static rtx c_readstr (const char *, scalar_int_mode);
 static int target_char_cast (tree, char *);
 static rtx get_memory_rtx (tree, tree);
 static int apply_args_size (void);
@@ -802,10 +801,14 @@ c_strlen (tree src, int only_value, c_st
 }
 
 /* Return a constant integer corresponding to target reading
-   GET_MODE_BITSIZE (MODE) bits from string constant STR.  */
-
-static rtx
-c_readstr (const char *str, scalar_int_mode mode)
+   GET_MODE_BITSIZE (MODE) bits from string constant STR.  If
+   NULL_TERMINATED_P, reading stops after '\0' character, all further ones
+   are assumed to be zero, otherwise it reads as many characters
+   as needed.  */
+
+rtx
+c_readstr (const char *str, scalar_int_mode mode,
+          bool null_terminated_p/*=true*/)
 {
   HOST_WIDE_INT ch;
   unsigned int i, j;
@@ -830,7 +833,7 @@ c_readstr (const char *str, scalar_int_m
        j = j + UNITS_PER_WORD - 2 * (j % UNITS_PER_WORD) - 1;
       j *= BITS_PER_UNIT;
 
-      if (ch)
+      if (ch || !null_terminated_p)
        ch = (unsigned char) str[i];
       tmp[j / HOST_BITS_PER_WIDE_INT] |= ch << (j % HOST_BITS_PER_WIDE_INT);
     }
--- gcc/expr.c.jj       2019-02-08 20:00:40.309835608 +0100
+++ gcc/expr.c  2019-02-15 11:37:18.715719809 +0100
@@ -5453,6 +5453,30 @@ emit_storent_insn (rtx to, rtx from)
   return maybe_expand_insn (code, 2, ops);
 }
 
+/* Helper function for store_expr storing of STRING_CST.  */
+
+static rtx
+string_cst_read_str (void *data, HOST_WIDE_INT offset, scalar_int_mode mode)
+{
+  tree str = (tree) data;
+
+  gcc_assert (offset >= 0);
+  if (offset >= TREE_STRING_LENGTH (str))
+    return const0_rtx;
+
+  if ((unsigned HOST_WIDE_INT) offset + GET_MODE_SIZE (mode)
+      > (unsigned HOST_WIDE_INT) TREE_STRING_LENGTH (str))
+    {
+      char *p = XALLOCAVEC (char, GET_MODE_SIZE (mode));
+      size_t l = TREE_STRING_LENGTH (str) - offset;
+      memcpy (p, TREE_STRING_POINTER (str) + offset, l);
+      memset (p + l, '\0', GET_MODE_SIZE (mode) - l);
+      return c_readstr (p, mode, false);
+    }
+
+  return c_readstr (TREE_STRING_POINTER (str) + offset, mode, false);
+}
+
 /* Generate code for computing expression EXP,
    and storing the value into TARGET.
 
@@ -5472,7 +5496,7 @@ emit_storent_insn (rtx to, rtx from)
 
 rtx
 store_expr (tree exp, rtx target, int call_param_p,
-                       bool nontemporal, bool reverse)
+           bool nontemporal, bool reverse)
 {
   rtx temp;
   rtx alt_rtl = NULL_RTX;
@@ -5606,36 +5630,32 @@ store_expr (tree exp, rtx target, int ca
       if (TREE_STRING_LENGTH (str) <= 0)
        goto normal_expr;
 
-      str_copy_len = strlen (TREE_STRING_POINTER (str));
-      if (str_copy_len < TREE_STRING_LENGTH (str) - 1)
-       goto normal_expr;
+      if (can_store_by_pieces (exp_len, string_cst_read_str, (void *) str,
+                              MEM_ALIGN (target), false))
+       {
+         store_by_pieces (target, exp_len, string_cst_read_str, (void *) str,
+                          MEM_ALIGN (target), false, RETURN_BEGIN);
+         return NULL_RTX;
+       }
 
       str_copy_len = TREE_STRING_LENGTH (str);
-      if ((STORE_MAX_PIECES & (STORE_MAX_PIECES - 1)) == 0
-         && TREE_STRING_POINTER (str)[TREE_STRING_LENGTH (str) - 1] == '\0')
+      if ((STORE_MAX_PIECES & (STORE_MAX_PIECES - 1)) == 0)
        {
          str_copy_len += STORE_MAX_PIECES - 1;
          str_copy_len &= ~(STORE_MAX_PIECES - 1);
        }
-      str_copy_len = MIN (str_copy_len, exp_len);
-      if (!can_store_by_pieces (str_copy_len, builtin_strncpy_read_str,
-                               CONST_CAST (char *, TREE_STRING_POINTER (str)),
-                               MEM_ALIGN (target), false))
+      if (str_copy_len >= exp_len)
        goto normal_expr;
 
-      dest_mem = target;
+      if (!can_store_by_pieces (str_copy_len, string_cst_read_str,
+                               (void *) str, MEM_ALIGN (target), false))
+       goto normal_expr;
 
-      memop_ret retmode = exp_len > str_copy_len ? RETURN_END : RETURN_BEGIN;
-      dest_mem = store_by_pieces (dest_mem,
-                                 str_copy_len, builtin_strncpy_read_str,
-                                 CONST_CAST (char *,
-                                             TREE_STRING_POINTER (str)),
-                                 MEM_ALIGN (target), false,
-                                 retmode);
-      if (exp_len > str_copy_len)
-       clear_storage (adjust_address (dest_mem, BLKmode, 0),
-                      GEN_INT (exp_len - str_copy_len),
-                      BLOCK_OP_NORMAL);
+      dest_mem = store_by_pieces (target, str_copy_len, string_cst_read_str,
+                                 (void *) str, MEM_ALIGN (target), false,
+                                 RETURN_END);
+      clear_storage (adjust_address (dest_mem, BLKmode, 0),
+                    GEN_INT (exp_len - str_copy_len), BLOCK_OP_NORMAL);
       return NULL_RTX;
     }
   else
--- gcc/testsuite/gcc.target/i386/pr66152.c.jj  2019-02-15 11:55:56.212164557 
+0100
+++ gcc/testsuite/gcc.target/i386/pr66152.c     2019-02-15 11:56:47.769308378 
+0100
@@ -0,0 +1,25 @@
+/* PR rtl-optimization/66152 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler "movabs\[^\n\r]*506097522914230528" { target { 
! ia32 } } } } */
+/* { dg-final { scan-assembler "movabs\[^\n\r]*505813836079825408" { target { 
! ia32 } } } } */
+/* { dg-final { scan-assembler "mov\[^\n\r]*50462976" { target ia32 } } } */
+/* { dg-final { scan-assembler "mov\[^\n\r]*117835012" { target ia32 } } } */
+/* { dg-final { scan-assembler "mov\[^\n\r]*100925952" { target ia32 } } } */
+/* { dg-final { scan-assembler "mov\[^\n\r]*117768961" { target ia32 } } } */
+
+void foo (char *);
+
+void
+bar (void)
+{
+  char a[] = {0,1,2,3,4,5,6,7};
+  foo (a);
+}
+
+void
+baz (void)
+{
+  char a[8] = "\0\2\4\6\1\3\5\7";
+  foo (a);
+}

        Jakub

Reply via email to