This recovers some of the nearly dead code in
gimple_fold_builtin_memory_op by allowing a rewrite of memcpy
with a properly aligned source or destination decl. In particular
this handles register typed vars to be tranformed (and later
rewritten into SSA form).
Together with 1/2 the testcase then optimizes from
skvx::bit_pun<__vector(4) long long int, skvx::Vec<8, unsigned int> >
(const struct Vec & s)
{
vector(4) long long int D.151565;
vector(4) long long int d;
try
{
memcpy (&d, s, 32);
D.151565 = d;
return D.151565;
}
finally
{
d = {CLOBBER};
}
}
to
skvx::bit_pun<__vector(4) long long int, skvx::Vec<8, unsigned int> >
(const struct Vec & s)
{
vector(4) long long int d;
vector(4) long long int _3;
<bb 2> :
_3 = MEM <vector(4) long long int> [(char * {ref-all})s_2(D)];
return _3;
}
instead of a weird memcpy + bit-insert combo.
Bootstrap and regtest running on x86_64-unknown-linux-gnu.
Richard.
2019-11-02 Richard Biener <[email protected]>
PR tree-optimization/92645
* gimple-fold.c (gimple_fold_builtin_memory_op): Fold memcpy
from or to a properly aligned register variable.
* gcc.target/i386/pr92645-5.c: New testcase.
Index: gcc/gimple-fold.c
===================================================================
--- gcc/gimple-fold.c (revision 278893)
+++ gcc/gimple-fold.c (working copy)
@@ -987,32 +987,21 @@ gimple_fold_builtin_memory_op (gimple_st
src_align = get_pointer_alignment (src);
dest_align = get_pointer_alignment (dest);
if (dest_align < TYPE_ALIGN (desttype)
- || src_align < TYPE_ALIGN (srctype))
+ && src_align < TYPE_ALIGN (srctype))
return false;
destvar = NULL_TREE;
+ srcvar = NULL_TREE;
if (TREE_CODE (dest) == ADDR_EXPR
&& var_decl_component_p (TREE_OPERAND (dest, 0))
- && tree_int_cst_equal (TYPE_SIZE_UNIT (desttype), len))
+ && tree_int_cst_equal (TYPE_SIZE_UNIT (desttype), len)
+ && dest_align >= TYPE_ALIGN (desttype))
destvar = fold_build2 (MEM_REF, desttype, dest, off0);
-
- srcvar = NULL_TREE;
- if (TREE_CODE (src) == ADDR_EXPR
- && var_decl_component_p (TREE_OPERAND (src, 0))
- && tree_int_cst_equal (TYPE_SIZE_UNIT (srctype), len))
- {
- if (!destvar
- || src_align >= TYPE_ALIGN (desttype))
- srcvar = fold_build2 (MEM_REF, destvar ? desttype : srctype,
- src, off0);
- else if (!STRICT_ALIGNMENT)
- {
- srctype = build_aligned_type (TYPE_MAIN_VARIANT (desttype),
- src_align);
- srcvar = fold_build2 (MEM_REF, srctype, src, off0);
- }
- }
-
+ else if (TREE_CODE (src) == ADDR_EXPR
+ && var_decl_component_p (TREE_OPERAND (src, 0))
+ && tree_int_cst_equal (TYPE_SIZE_UNIT (srctype), len)
+ && src_align >= TYPE_ALIGN (srctype))
+ srcvar = fold_build2 (MEM_REF, srctype, src, off0);
if (srcvar == NULL_TREE && destvar == NULL_TREE)
return false;
Index: gcc/testsuite/gcc.target/i386/pr92645-5.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr92645-5.c (nonexistent)
+++ gcc/testsuite/gcc.target/i386/pr92645-5.c (working copy)
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-cddce1 -mavx2 -Wno-psabi" } */
+typedef long long v4di __attribute__((vector_size(32)));
+struct Vec
+{
+ unsigned int v[8];
+};
+
+v4di pun (struct Vec *s)
+{
+ v4di tem;
+ __builtin_memcpy (&tem, s, 32);
+ return tem;
+}
+
+/* We're expecting exactly two stmts, in particular no BIT_INSERT_EXPR
+ and no memcpy call.
+ _3 = MEM <vector(4) long long int> [(char * {ref-all})s_2(D)];
+ return _3; */
+/* { dg-final { scan-tree-dump-times " = MEM" 1 "cddce1" } } */
+/* { dg-final { scan-tree-dump-not "memcpy" "cddce1" } } */