On Tue, 3 Dec 2019, Richard Biener wrote:

> On Mon, 2 Dec 2019, Richard Biener wrote:
> 
> > On December 2, 2019 4:27:47 PM GMT+01:00, Alexander Monakov 
> > <amona...@ispras.ru> wrote:
> > >On Mon, 2 Dec 2019, Richard Biener wrote:
> > >
> > >> +typedef long long v4di __attribute__((vector_size(32)));
> > >> +struct Vec
> > >> +{
> > >> +  unsigned int v[8];
> > >> +};
> > >> +
> > >> +v4di pun (struct Vec *s)
> > >> +{
> > >> +  v4di tem;
> > >> +  __builtin_memcpy (&tem, s, 32);
> > >> +  return tem;
> > >> +}
> > >> +
> > >> +/* We're expecting exactly two stmts, in particular no
> > >BIT_INSERT_EXPR
> > >> +   and no memcpy call.
> > >> +    _3 = MEM <vector(4) long long int> [(char * {ref-all})s_2(D)];
> > >> +    return _3;  */
> > >
> > >So just to make sure I understand correctly: the type in angle brackets
> > >does
> > >not imply 256-bit alignment, and this access has implied alignment of
> > >just 
> > >32 bits, which is deduced from the type of s_2, right?
> > 
> > Yes. I should have quoted the more obvious -gimple IL dump instead. 
> 
>   _3 = __MEM <vector(4) long long int, 8> ((char * {ref-all})s_2(D));
> 
> so it's actually only 8 bits alignment.

And the following is what I have applied (a bit more restricted to
avoid FAILs for strlenopt testcases).

Bootstrapped and tested on x86_64-unknown-linux-gnu.

Richard.

2019-12-03  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/92645
        * gimple-fold.c (gimple_fold_builtin_memory_op): Fold memcpy
        from or to a properly aligned register variable.

        * gcc.target/i386/pr92645-5.c: New testcase.

Index: gcc/gimple-fold.c
===================================================================
--- gcc/gimple-fold.c   (revision 278893)
+++ gcc/gimple-fold.c   (working copy)
@@ -986,36 +986,33 @@ gimple_fold_builtin_memory_op (gimple_st
 
       src_align = get_pointer_alignment (src);
       dest_align = get_pointer_alignment (dest);
-      if (dest_align < TYPE_ALIGN (desttype)
-         || src_align < TYPE_ALIGN (srctype))
-       return false;
 
+      /* Choose between src and destination type for the access based
+         on alignment, whether the access constitutes a register access
+        and whether it may actually expose a declaration for SSA rewrite
+        or SRA decomposition.  */
       destvar = NULL_TREE;
+      srcvar = NULL_TREE;
       if (TREE_CODE (dest) == ADDR_EXPR
          && var_decl_component_p (TREE_OPERAND (dest, 0))
-         && tree_int_cst_equal (TYPE_SIZE_UNIT (desttype), len))
+         && tree_int_cst_equal (TYPE_SIZE_UNIT (desttype), len)
+         && dest_align >= TYPE_ALIGN (desttype)
+         && (is_gimple_reg_type (desttype)
+             || src_align >= TYPE_ALIGN (desttype)))
        destvar = fold_build2 (MEM_REF, desttype, dest, off0);
-
-      srcvar = NULL_TREE;
-      if (TREE_CODE (src) == ADDR_EXPR
-         && var_decl_component_p (TREE_OPERAND (src, 0))
-         && tree_int_cst_equal (TYPE_SIZE_UNIT (srctype), len))
-       {
-         if (!destvar
-             || src_align >= TYPE_ALIGN (desttype))
-           srcvar = fold_build2 (MEM_REF, destvar ? desttype : srctype,
-                                 src, off0);
-         else if (!STRICT_ALIGNMENT)
-           {
-             srctype = build_aligned_type (TYPE_MAIN_VARIANT (desttype),
-                                           src_align);
-             srcvar = fold_build2 (MEM_REF, srctype, src, off0);
-           }
-       }
-
+      else if (TREE_CODE (src) == ADDR_EXPR
+              && var_decl_component_p (TREE_OPERAND (src, 0))
+              && tree_int_cst_equal (TYPE_SIZE_UNIT (srctype), len)
+              && src_align >= TYPE_ALIGN (srctype)
+              && (is_gimple_reg_type (srctype)
+                  || dest_align >= TYPE_ALIGN (srctype)))
+       srcvar = fold_build2 (MEM_REF, srctype, src, off0);
       if (srcvar == NULL_TREE && destvar == NULL_TREE)
        return false;
 
+      /* Now that we chose an access type express the other side in
+         terms of it if the target allows that with respect to alignment
+        constraints.  */
       if (srcvar == NULL_TREE)
        {
          if (src_align >= TYPE_ALIGN (desttype))
Index: gcc/testsuite/gcc.target/i386/pr92645-5.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr92645-5.c   (nonexistent)
+++ gcc/testsuite/gcc.target/i386/pr92645-5.c   (working copy)
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-cddce1 -mavx2 -Wno-psabi" } */
+typedef long long v4di __attribute__((vector_size(32)));
+struct Vec
+{
+  unsigned int v[8];
+};
+
+v4di pun (struct Vec *s)
+{
+  v4di tem;
+  __builtin_memcpy (&tem, s, 32);
+  return tem;
+}
+
+/* We're expecting exactly two stmts, in particular no BIT_INSERT_EXPR
+   and no memcpy call.
+    _3 = MEM <vector(4) long long int> [(char * {ref-all})s_2(D)];
+    return _3;  */
+/* { dg-final { scan-tree-dump-times " = MEM" 1 "cddce1" } } */
+/* { dg-final { scan-tree-dump-not "memcpy" "cddce1" } } */

Reply via email to