The following patch does $subject which is requested in a comment in PR50417 as this restriction defeats the purpose of having memcpy as a portable and efficient way to circumvent strict-aliasing violations (or even as a portable and efficient way to do unaligned loads).
Bootstrap / regtest running on x86_64-unknown-linux-gnu (quite pointless as there is no change on that arch). I have added a testcase that should exercise most relevant cases so we can look for fallout on "interesting" targets. Boostrap / regtest on strict-alignment platforms welcome. I do expect that with -Os expanding unaligned-unaligned moves might be a size pessimization compared to a libcall (or what the targets block move expander does). But the whole point is to remove abstraction penalty so it isn't an easy stmt-local decision to make. Comments on this front welcome as well. Unless I hear some positives I'll let the patch sit here as I don't really care too much about those pesky targets (and targets can choose to opt-in by providing movmisalign optabs anyway so they don't go the store/extract_bit_field path). Thanks, Richard. 2016-07-12 Richard Biener <rguent...@suse.de> * gimple-fold.c (gimple_fold_builtin_memory_op): Lift alignment restrictions from inlining register size memcpy. * gcc.dg/torture/builtin-memcpy.c: New testcase. Index: gcc/gimple-fold.c =================================================================== *** gcc/gimple-fold.c (revision 238237) --- gcc/gimple-fold.c (working copy) *************** gimple_fold_builtin_memory_op (gimple_st *** 705,718 **** if (type && TYPE_MODE (type) != BLKmode && (GET_MODE_SIZE (TYPE_MODE (type)) * BITS_PER_UNIT ! == ilen * 8) ! /* If the destination pointer is not aligned we must be able ! to emit an unaligned store. */ ! && (dest_align >= GET_MODE_ALIGNMENT (TYPE_MODE (type)) ! || !SLOW_UNALIGNED_ACCESS (TYPE_MODE (type), dest_align) ! || (optab_handler (movmisalign_optab, TYPE_MODE (type)) ! != CODE_FOR_nothing))) { tree srctype = type; tree desttype = type; if (src_align < GET_MODE_ALIGNMENT (TYPE_MODE (type))) --- 705,718 ---- if (type && TYPE_MODE (type) != BLKmode && (GET_MODE_SIZE (TYPE_MODE (type)) * BITS_PER_UNIT ! == ilen * 8)) { + /* RTL expansion handles misaligned destination / source + MEM_REFs either via target provided movmisalign or + via extract/store_bit_field for targets that set + SLOW_UNALIGNED_ACCESS for the move. For move + quantities up to MOVE_MAX this should be always + more efficient than a libcall to memcpy. */ tree srctype = type; tree desttype = type; if (src_align < GET_MODE_ALIGNMENT (TYPE_MODE (type))) *************** gimple_fold_builtin_memory_op (gimple_st *** 721,767 **** tree tem = fold_const_aggregate_ref (srcmem); if (tem) srcmem = tem; ! else if (src_align < GET_MODE_ALIGNMENT (TYPE_MODE (type)) ! && SLOW_UNALIGNED_ACCESS (TYPE_MODE (type), ! src_align) ! && (optab_handler (movmisalign_optab, ! TYPE_MODE (type)) ! == CODE_FOR_nothing)) ! srcmem = NULL_TREE; ! if (srcmem) { ! gimple *new_stmt; ! if (is_gimple_reg_type (TREE_TYPE (srcmem))) ! { ! new_stmt = gimple_build_assign (NULL_TREE, srcmem); ! if (gimple_in_ssa_p (cfun)) ! srcmem = make_ssa_name (TREE_TYPE (srcmem), ! new_stmt); ! else ! srcmem = create_tmp_reg (TREE_TYPE (srcmem)); ! gimple_assign_set_lhs (new_stmt, srcmem); ! gimple_set_vuse (new_stmt, gimple_vuse (stmt)); ! gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); ! } ! if (dest_align < GET_MODE_ALIGNMENT (TYPE_MODE (type))) ! desttype = build_aligned_type (type, dest_align); ! new_stmt ! = gimple_build_assign (fold_build2 (MEM_REF, desttype, ! dest, off0), ! srcmem); gimple_set_vuse (new_stmt, gimple_vuse (stmt)); - gimple_set_vdef (new_stmt, gimple_vdef (stmt)); - if (gimple_vdef (new_stmt) - && TREE_CODE (gimple_vdef (new_stmt)) == SSA_NAME) - SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt; - if (!lhs) - { - gsi_replace (gsi, new_stmt, false); - return true; - } gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); - goto done; } } } } --- 721,756 ---- tree tem = fold_const_aggregate_ref (srcmem); if (tem) srcmem = tem; ! gimple *new_stmt; ! if (is_gimple_reg_type (TREE_TYPE (srcmem))) { ! new_stmt = gimple_build_assign (NULL_TREE, srcmem); ! if (gimple_in_ssa_p (cfun)) ! srcmem = make_ssa_name (TREE_TYPE (srcmem), ! new_stmt); ! else ! srcmem = create_tmp_reg (TREE_TYPE (srcmem)); ! gimple_assign_set_lhs (new_stmt, srcmem); gimple_set_vuse (new_stmt, gimple_vuse (stmt)); gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); } + if (dest_align < GET_MODE_ALIGNMENT (TYPE_MODE (type))) + desttype = build_aligned_type (type, dest_align); + new_stmt + = gimple_build_assign (fold_build2 (MEM_REF, desttype, + dest, off0), srcmem); + gimple_set_vuse (new_stmt, gimple_vuse (stmt)); + gimple_set_vdef (new_stmt, gimple_vdef (stmt)); + if (gimple_vdef (new_stmt) + && TREE_CODE (gimple_vdef (new_stmt)) == SSA_NAME) + SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt; + if (!lhs) + { + gsi_replace (gsi, new_stmt, false); + return true; + } + gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); + goto done; } } } Index: gcc/testsuite/gcc.dg/torture/builtin-memcpy.c =================================================================== *** gcc/testsuite/gcc.dg/torture/builtin-memcpy.c (revision 0) --- gcc/testsuite/gcc.dg/torture/builtin-memcpy.c (working copy) *************** *** 0 **** --- 1,89 ---- + /* { dg-do run } */ + + char src[32], dest[32]; + long long llsrc, lldest; + + /* Unaligned source/dest variants. */ + void __attribute__((noinline,noclone)) + copyuu16 (long long *src, long long *dest) + { + __builtin_memcpy (dest, src, 16); + } + void __attribute__((noinline,noclone)) + copyuu8 (long *src, long *dest) + { + __builtin_memcpy (dest, src, 8); + } + void __attribute__((noinline,noclone)) + copyuu4 (int *src, int *dest) + { + __builtin_memcpy (dest, src, 4); + } + void __attribute__((noinline,noclone)) + copyuu2 (short *src, short *dest) + { + __builtin_memcpy (dest, src, 2); + } + + /* Aligned source, unaligned dest variants. */ + void __attribute__((noinline,noclone)) + copyau16 (long long *dest) + { + __builtin_memcpy (dest, &llsrc, 16); + } + void __attribute__((noinline,noclone)) + copyau8 (long long *dest) + { + __builtin_memcpy (dest, &llsrc, 8); + } + void __attribute__((noinline,noclone)) + copyau4 (long long *dest) + { + __builtin_memcpy (dest, &llsrc, 4); + } + void __attribute__((noinline,noclone)) + copyau2 (long long *dest) + { + __builtin_memcpy (dest, &llsrc, 2); + } + + /* Unaligned source, aligned dest variants. */ + void __attribute__((noinline,noclone)) + copyua16 (long long *src) + { + __builtin_memcpy (&lldest, src, 16); + } + void __attribute__((noinline,noclone)) + copyua8 (long *src) + { + __builtin_memcpy (&lldest, src, 8); + } + void __attribute__((noinline,noclone)) + copyua4 (int *src) + { + __builtin_memcpy (&lldest, src, 4); + } + void __attribute__((noinline,noclone)) + copyua2 (short *src) + { + __builtin_memcpy (&lldest, src, 2); + } + + int main() + { + void *usrc = (void *)(((__UINTPTR_TYPE__)src & -16) + 17); + void *udest = (void *)(((__UINTPTR_TYPE__)dest & -16) + 17); + copyuu16 (udest, usrc); + copyuu8 (udest, usrc); + copyuu4 (udest, usrc); + copyuu2 (udest, usrc); + copyau16 (usrc); + copyau8 (usrc); + copyau4 (usrc); + copyau2 (usrc); + copyua16 (udest); + copyua8 (udest); + copyua4 (udest); + copyua2 (udest); + return 0; + }