The following patch does $subject which is requested in a comment in
PR50417 as this restriction defeats the purpose of having memcpy
as a portable and efficient way to circumvent strict-aliasing violations
(or even as a portable and efficient way to do unaligned loads).

Bootstrap / regtest running on x86_64-unknown-linux-gnu (quite pointless
as there is no change on that arch).

I have added a testcase that should exercise most relevant cases so
we can look for fallout on "interesting" targets.

Boostrap / regtest on strict-alignment platforms welcome.

I do expect that with -Os expanding unaligned-unaligned moves
might be a size pessimization compared to a libcall (or what
the targets block move expander does).  But the whole point is
to remove abstraction penalty so it isn't an easy stmt-local
decision to make.  Comments on this front welcome as well.

Unless I hear some positives I'll let the patch sit here as I
don't really care too much about those pesky targets (and
targets can choose to opt-in by providing movmisalign optabs
anyway so they don't go the store/extract_bit_field path).

Thanks,
Richard.

2016-07-12  Richard Biener  <rguent...@suse.de>

        * gimple-fold.c (gimple_fold_builtin_memory_op): Lift alignment
        restrictions from inlining register size memcpy.

        * gcc.dg/torture/builtin-memcpy.c: New testcase.

Index: gcc/gimple-fold.c
===================================================================
*** gcc/gimple-fold.c   (revision 238237)
--- gcc/gimple-fold.c   (working copy)
*************** gimple_fold_builtin_memory_op (gimple_st
*** 705,718 ****
              if (type
                  && TYPE_MODE (type) != BLKmode
                  && (GET_MODE_SIZE (TYPE_MODE (type)) * BITS_PER_UNIT
!                     == ilen * 8)
!                 /* If the destination pointer is not aligned we must be able
!                    to emit an unaligned store.  */
!                 && (dest_align >= GET_MODE_ALIGNMENT (TYPE_MODE (type))
!                     || !SLOW_UNALIGNED_ACCESS (TYPE_MODE (type), dest_align)
!                     || (optab_handler (movmisalign_optab, TYPE_MODE (type))
!                         != CODE_FOR_nothing)))
                {
                  tree srctype = type;
                  tree desttype = type;
                  if (src_align < GET_MODE_ALIGNMENT (TYPE_MODE (type)))
--- 705,718 ----
              if (type
                  && TYPE_MODE (type) != BLKmode
                  && (GET_MODE_SIZE (TYPE_MODE (type)) * BITS_PER_UNIT
!                     == ilen * 8))
                {
+                 /* RTL expansion handles misaligned destination / source
+                    MEM_REFs either via target provided movmisalign or
+                    via extract/store_bit_field for targets that set
+                    SLOW_UNALIGNED_ACCESS for the move.  For move
+                    quantities up to MOVE_MAX this should be always
+                    more efficient than a libcall to memcpy.  */
                  tree srctype = type;
                  tree desttype = type;
                  if (src_align < GET_MODE_ALIGNMENT (TYPE_MODE (type)))
*************** gimple_fold_builtin_memory_op (gimple_st
*** 721,767 ****
                  tree tem = fold_const_aggregate_ref (srcmem);
                  if (tem)
                    srcmem = tem;
!                 else if (src_align < GET_MODE_ALIGNMENT (TYPE_MODE (type))
!                          && SLOW_UNALIGNED_ACCESS (TYPE_MODE (type),
!                                                    src_align)
!                          && (optab_handler (movmisalign_optab,
!                                             TYPE_MODE (type))
!                              == CODE_FOR_nothing))
!                   srcmem = NULL_TREE;
!                 if (srcmem)
                    {
!                     gimple *new_stmt;
!                     if (is_gimple_reg_type (TREE_TYPE (srcmem)))
!                       {
!                         new_stmt = gimple_build_assign (NULL_TREE, srcmem);
!                         if (gimple_in_ssa_p (cfun))
!                           srcmem = make_ssa_name (TREE_TYPE (srcmem),
!                                                   new_stmt);
!                         else
!                           srcmem = create_tmp_reg (TREE_TYPE (srcmem));
!                         gimple_assign_set_lhs (new_stmt, srcmem);
!                         gimple_set_vuse (new_stmt, gimple_vuse (stmt));
!                         gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
!                       }
!                     if (dest_align < GET_MODE_ALIGNMENT (TYPE_MODE (type)))
!                       desttype = build_aligned_type (type, dest_align);
!                     new_stmt
!                       = gimple_build_assign (fold_build2 (MEM_REF, desttype,
!                                                           dest, off0),
!                                              srcmem);
                      gimple_set_vuse (new_stmt, gimple_vuse (stmt));
-                     gimple_set_vdef (new_stmt, gimple_vdef (stmt));
-                     if (gimple_vdef (new_stmt)
-                         && TREE_CODE (gimple_vdef (new_stmt)) == SSA_NAME)
-                       SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt;
-                     if (!lhs)
-                       {
-                         gsi_replace (gsi, new_stmt, false);
-                         return true;
-                       }
                      gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
-                     goto done;
                    }
                }
            }
        }
--- 721,756 ----
                  tree tem = fold_const_aggregate_ref (srcmem);
                  if (tem)
                    srcmem = tem;
!                 gimple *new_stmt;
!                 if (is_gimple_reg_type (TREE_TYPE (srcmem)))
                    {
!                     new_stmt = gimple_build_assign (NULL_TREE, srcmem);
!                     if (gimple_in_ssa_p (cfun))
!                       srcmem = make_ssa_name (TREE_TYPE (srcmem),
!                                               new_stmt);
!                     else
!                       srcmem = create_tmp_reg (TREE_TYPE (srcmem));
!                     gimple_assign_set_lhs (new_stmt, srcmem);
                      gimple_set_vuse (new_stmt, gimple_vuse (stmt));
                      gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
                    }
+                 if (dest_align < GET_MODE_ALIGNMENT (TYPE_MODE (type)))
+                   desttype = build_aligned_type (type, dest_align);
+                 new_stmt
+                   = gimple_build_assign (fold_build2 (MEM_REF, desttype,
+                                                       dest, off0), srcmem);
+                 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
+                 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
+                 if (gimple_vdef (new_stmt)
+                     && TREE_CODE (gimple_vdef (new_stmt)) == SSA_NAME)
+                   SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt;
+                 if (!lhs)
+                   {
+                     gsi_replace (gsi, new_stmt, false);
+                     return true;
+                   }
+                 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
+                 goto done;
                }
            }
        }
Index: gcc/testsuite/gcc.dg/torture/builtin-memcpy.c
===================================================================
*** gcc/testsuite/gcc.dg/torture/builtin-memcpy.c       (revision 0)
--- gcc/testsuite/gcc.dg/torture/builtin-memcpy.c       (working copy)
***************
*** 0 ****
--- 1,89 ----
+ /* { dg-do run } */
+ 
+ char src[32], dest[32];
+ long long llsrc, lldest;
+ 
+ /* Unaligned source/dest variants.  */
+ void __attribute__((noinline,noclone))
+ copyuu16 (long long *src, long long *dest)
+ {
+   __builtin_memcpy (dest, src, 16);
+ }
+ void __attribute__((noinline,noclone))
+ copyuu8 (long *src, long *dest)
+ {
+   __builtin_memcpy (dest, src, 8);
+ }
+ void __attribute__((noinline,noclone))
+ copyuu4 (int *src, int *dest)
+ {
+   __builtin_memcpy (dest, src, 4);
+ }
+ void __attribute__((noinline,noclone))
+ copyuu2 (short *src, short *dest)
+ {
+   __builtin_memcpy (dest, src, 2);
+ }
+ 
+ /* Aligned source, unaligned dest variants.  */
+ void __attribute__((noinline,noclone))
+ copyau16 (long long *dest)
+ {
+   __builtin_memcpy (dest, &llsrc, 16);
+ }
+ void __attribute__((noinline,noclone))
+ copyau8 (long long *dest)
+ {
+   __builtin_memcpy (dest, &llsrc, 8);
+ }
+ void __attribute__((noinline,noclone))
+ copyau4 (long long *dest)
+ {
+   __builtin_memcpy (dest, &llsrc, 4);
+ }
+ void __attribute__((noinline,noclone))
+ copyau2 (long long *dest)
+ {
+   __builtin_memcpy (dest, &llsrc, 2);
+ }
+ 
+ /* Unaligned source, aligned dest variants.  */
+ void __attribute__((noinline,noclone))
+ copyua16 (long long *src)
+ {
+   __builtin_memcpy (&lldest, src, 16);
+ }
+ void __attribute__((noinline,noclone))
+ copyua8 (long *src)
+ {
+   __builtin_memcpy (&lldest, src, 8);
+ }
+ void __attribute__((noinline,noclone))
+ copyua4 (int *src)
+ {
+   __builtin_memcpy (&lldest, src, 4);
+ }
+ void __attribute__((noinline,noclone))
+ copyua2 (short *src)
+ {
+   __builtin_memcpy (&lldest, src, 2);
+ }
+ 
+ int main()
+ {
+   void *usrc = (void *)(((__UINTPTR_TYPE__)src & -16) + 17);
+   void *udest = (void *)(((__UINTPTR_TYPE__)dest & -16) + 17);
+   copyuu16 (udest, usrc);
+   copyuu8 (udest, usrc);
+   copyuu4 (udest, usrc);
+   copyuu2 (udest, usrc);
+   copyau16 (usrc);
+   copyau8 (usrc);
+   copyau4 (usrc);
+   copyau2 (usrc);
+   copyua16 (udest);
+   copyua8 (udest);
+   copyua4 (udest);
+   copyua2 (udest);
+   return 0;
+ }

Reply via email to