Thanks,
Richard.
2016-07-12 Richard Biener <rguent...@suse.de>
* gimple-fold.c (gimple_fold_builtin_memory_op): Lift alignment
restrictions from inlining register size memcpy.
* gcc.dg/torture/builtin-memcpy.c: New testcase.
Index: gcc/gimple-fold.c
===================================================================
*** gcc/gimple-fold.c (revision 238237)
--- gcc/gimple-fold.c (working copy)
*************** gimple_fold_builtin_memory_op (gimple_st
*** 705,718 ****
if (type
&& TYPE_MODE (type) != BLKmode
&& (GET_MODE_SIZE (TYPE_MODE (type)) * BITS_PER_UNIT
! == ilen * 8)
! /* If the destination pointer is not aligned we must be able
! to emit an unaligned store. */
! && (dest_align >= GET_MODE_ALIGNMENT (TYPE_MODE (type))
! || !SLOW_UNALIGNED_ACCESS (TYPE_MODE (type), dest_align)
! || (optab_handler (movmisalign_optab, TYPE_MODE (type))
! != CODE_FOR_nothing)))
{
tree srctype = type;
tree desttype = type;
if (src_align < GET_MODE_ALIGNMENT (TYPE_MODE (type)))
--- 705,718 ----
if (type
&& TYPE_MODE (type) != BLKmode
&& (GET_MODE_SIZE (TYPE_MODE (type)) * BITS_PER_UNIT
! == ilen * 8))
{
+ /* RTL expansion handles misaligned destination / source
+ MEM_REFs either via target provided movmisalign or
+ via extract/store_bit_field for targets that set
+ SLOW_UNALIGNED_ACCESS for the move. For move
+ quantities up to MOVE_MAX this should be always
+ more efficient than a libcall to memcpy. */
tree srctype = type;
tree desttype = type;
if (src_align < GET_MODE_ALIGNMENT (TYPE_MODE (type)))
*************** gimple_fold_builtin_memory_op (gimple_st
*** 721,767 ****
tree tem = fold_const_aggregate_ref (srcmem);
if (tem)
srcmem = tem;
! else if (src_align < GET_MODE_ALIGNMENT (TYPE_MODE (type))
! && SLOW_UNALIGNED_ACCESS (TYPE_MODE (type),
! src_align)
! && (optab_handler (movmisalign_optab,
! TYPE_MODE (type))
! == CODE_FOR_nothing))
! srcmem = NULL_TREE;
! if (srcmem)
{
! gimple *new_stmt;
! if (is_gimple_reg_type (TREE_TYPE (srcmem)))
! {
! new_stmt = gimple_build_assign (NULL_TREE, srcmem);
! if (gimple_in_ssa_p (cfun))
! srcmem = make_ssa_name (TREE_TYPE (srcmem),
! new_stmt);
! else
! srcmem = create_tmp_reg (TREE_TYPE (srcmem));
! gimple_assign_set_lhs (new_stmt, srcmem);
! gimple_set_vuse (new_stmt, gimple_vuse (stmt));
! gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
! }
! if (dest_align < GET_MODE_ALIGNMENT (TYPE_MODE (type)))
! desttype = build_aligned_type (type, dest_align);
! new_stmt
! = gimple_build_assign (fold_build2 (MEM_REF, desttype,
! dest, off0),
! srcmem);
gimple_set_vuse (new_stmt, gimple_vuse (stmt));
- gimple_set_vdef (new_stmt, gimple_vdef (stmt));
- if (gimple_vdef (new_stmt)
- && TREE_CODE (gimple_vdef (new_stmt)) == SSA_NAME)
- SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt;
- if (!lhs)
- {
- gsi_replace (gsi, new_stmt, false);
- return true;
- }
gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
- goto done;
}
}
}
}
--- 721,756 ----
tree tem = fold_const_aggregate_ref (srcmem);
if (tem)
srcmem = tem;
! gimple *new_stmt;
! if (is_gimple_reg_type (TREE_TYPE (srcmem)))
{
! new_stmt = gimple_build_assign (NULL_TREE, srcmem);
! if (gimple_in_ssa_p (cfun))
! srcmem = make_ssa_name (TREE_TYPE (srcmem),
! new_stmt);
! else
! srcmem = create_tmp_reg (TREE_TYPE (srcmem));
! gimple_assign_set_lhs (new_stmt, srcmem);
gimple_set_vuse (new_stmt, gimple_vuse (stmt));
gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
}
+ if (dest_align < GET_MODE_ALIGNMENT (TYPE_MODE (type)))
+ desttype = build_aligned_type (type, dest_align);
+ new_stmt
+ = gimple_build_assign (fold_build2 (MEM_REF, desttype,
+ dest, off0), srcmem);
+ gimple_set_vuse (new_stmt, gimple_vuse (stmt));
+ gimple_set_vdef (new_stmt, gimple_vdef (stmt));
+ if (gimple_vdef (new_stmt)
+ && TREE_CODE (gimple_vdef (new_stmt)) == SSA_NAME)
+ SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt;
+ if (!lhs)
+ {
+ gsi_replace (gsi, new_stmt, false);
+ return true;
+ }
+ gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
+ goto done;
}
}
}
Index: gcc/testsuite/gcc.dg/torture/builtin-memcpy.c
===================================================================
*** gcc/testsuite/gcc.dg/torture/builtin-memcpy.c (revision 0)
--- gcc/testsuite/gcc.dg/torture/builtin-memcpy.c (working copy)
***************
*** 0 ****
--- 1,89 ----
+ /* { dg-do run } */
+
+ char src[32], dest[32];
+ long long llsrc, lldest;
+
+ /* Unaligned source/dest variants. */
+ void __attribute__((noinline,noclone))
+ copyuu16 (long long *src, long long *dest)
+ {
+ __builtin_memcpy (dest, src, 16);
+ }
+ void __attribute__((noinline,noclone))
+ copyuu8 (long *src, long *dest)
+ {
+ __builtin_memcpy (dest, src, 8);
+ }
+ void __attribute__((noinline,noclone))
+ copyuu4 (int *src, int *dest)
+ {
+ __builtin_memcpy (dest, src, 4);
+ }
+ void __attribute__((noinline,noclone))
+ copyuu2 (short *src, short *dest)
+ {
+ __builtin_memcpy (dest, src, 2);
+ }
+
+ /* Aligned source, unaligned dest variants. */
+ void __attribute__((noinline,noclone))
+ copyau16 (long long *dest)
+ {
+ __builtin_memcpy (dest, &llsrc, 16);
+ }
+ void __attribute__((noinline,noclone))
+ copyau8 (long long *dest)
+ {
+ __builtin_memcpy (dest, &llsrc, 8);
+ }
+ void __attribute__((noinline,noclone))
+ copyau4 (long long *dest)
+ {
+ __builtin_memcpy (dest, &llsrc, 4);
+ }
+ void __attribute__((noinline,noclone))
+ copyau2 (long long *dest)
+ {
+ __builtin_memcpy (dest, &llsrc, 2);
+ }
+
+ /* Unaligned source, aligned dest variants. */
+ void __attribute__((noinline,noclone))
+ copyua16 (long long *src)
+ {
+ __builtin_memcpy (&lldest, src, 16);
+ }
+ void __attribute__((noinline,noclone))
+ copyua8 (long *src)
+ {
+ __builtin_memcpy (&lldest, src, 8);
+ }
+ void __attribute__((noinline,noclone))
+ copyua4 (int *src)
+ {
+ __builtin_memcpy (&lldest, src, 4);
+ }
+ void __attribute__((noinline,noclone))
+ copyua2 (short *src)
+ {
+ __builtin_memcpy (&lldest, src, 2);
+ }
+
+ int main()
+ {
+ void *usrc = (void *)(((__UINTPTR_TYPE__)src & -16) + 17);
+ void *udest = (void *)(((__UINTPTR_TYPE__)dest & -16) + 17);
+ copyuu16 (udest, usrc);
+ copyuu8 (udest, usrc);
+ copyuu4 (udest, usrc);
+ copyuu2 (udest, usrc);
+ copyau16 (usrc);
+ copyau8 (usrc);
+ copyau4 (usrc);
+ copyau2 (usrc);
+ copyua16 (udest);
+ copyua8 (udest);
+ copyua4 (udest);
+ copyua2 (udest);
+ return 0;
+ }