We hardly ever emit code using machine instructions for aligned memory accesses for block move and clear operation and the reason for this appears to be that suboptimal alignment is often passed by the caller and then we only try to find a better alignment by checking pseudo register pointer alignment information, and from observation it's most often only set for stack frame references.
This code originates from before Tree SSA days and we can do better nowadays, by looking up the original tree node associated with a MEM RTL, so implement this approach, factoring out repeating code from `alpha_expand_block_move' and `alpha_expand_block_clear' to a new function. In some cases howewer tree information is not available while pointer alignment is, such as with the case concerned with PR target/115459, where we have: (gdb) pr orig_src (mem:BLK (plus:DI (reg/f:DI 65 virtual-stack-vars [ lock.206_2 ]) (const_int 8368 [0x20b0])) [8 S18 A8]) (gdb) pr orig_dst (mem/j/c:BLK (plus:DI (reg/f:DI 65 virtual-stack-vars [ lock.206_2 ]) (const_int 8208 [0x2010])) [8 MEM[(struct gnat__debug_pools__print_info_stdout__internal__L_18__B1182b__S1183b___PAD *)_339].F[1 ...]{lb: 1 sz: 1}+0 S18 A128]) (gdb) showing no tree information and the alignment of 8 only for `orig_src', while indeed REGNO_POINTER_ALIGN returns 128 for pseudo 65. So retain the old approach and return the largest alignment determined and its associated offset. Add test cases accordingly and remove XFAILs from memclr-a2-o1-c9-ptr.c now that it does get aligned code produced now. gcc/ * config/alpha/alpha.cc (alpha_get_mem_rtx_alignment_and_offset): New function. (alpha_expand_block_move, alpha_expand_block_clear): Use it for alignment retrieval. gcc/testsuite/ * gcc.target/alpha/memclr-a2-o1-c9-ptr.c: Remove XFAILs. * gcc.target/alpha/memcpy-di-aligned.c: New file. * gcc.target/alpha/memcpy-di-unaligned.c: New file. * gcc.target/alpha/memcpy-di-unaligned-dst.c: New file. * gcc.target/alpha/memcpy-di-unaligned-src.c: New file. --- gcc/config/alpha/alpha.cc | 158 +++++++++------ gcc/testsuite/gcc.target/alpha/memclr-a2-o1-c9-ptr.c | 10 gcc/testsuite/gcc.target/alpha/memcpy-di-aligned.c | 16 + gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned-dst.c | 16 + gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned-src.c | 15 + gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned.c | 51 ++++ 6 files changed, 205 insertions(+), 61 deletions(-) gcc-alpha-mem-object-alignment.diff Index: gcc/gcc/config/alpha/alpha.cc =================================================================== --- gcc.orig/gcc/config/alpha/alpha.cc +++ gcc/gcc/config/alpha/alpha.cc @@ -3771,6 +3771,78 @@ alpha_expand_unaligned_store_words (rtx emit_move_insn (st_addr_1, st_tmp_1); } +/* Get the base alignment and offset of EXPR in A and O respectively. + Check for any pseudo register pointer alignment and for any tree + node information and return the largest alignment determined and + its associated offset. */ + +static void +alpha_get_mem_rtx_alignment_and_offset (rtx expr, int &a, HOST_WIDE_INT &o) +{ + HOST_WIDE_INT tree_offset = 0, reg_offset = 0, mem_offset = 0; + int tree_align = 0, reg_align = 0, mem_align = MEM_ALIGN (expr); + + gcc_assert (MEM_P (expr)); + + rtx addr = XEXP (expr, 0); + switch (GET_CODE (addr)) + { + case REG: + reg_align = REGNO_POINTER_ALIGN (REGNO (addr)); + break; + + case PLUS: + if (REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1))) + { + reg_offset = INTVAL (XEXP (addr, 1)); + reg_align = REGNO_POINTER_ALIGN (REGNO (XEXP (addr, 0))); + } + break; + + default: + break; + } + + tree mem = MEM_EXPR (expr); + if (mem != NULL_TREE) + switch (TREE_CODE (mem)) + { + case MEM_REF: + tree_offset = mem_ref_offset (mem).force_shwi (); + tree_align = get_object_alignment (get_base_address (mem)); + break; + + case COMPONENT_REF: + { + tree byte_offset = component_ref_field_offset (mem); + tree bit_offset = DECL_FIELD_BIT_OFFSET (TREE_OPERAND (mem, 1)); + poly_int64 offset; + if (!byte_offset + || !poly_int_tree_p (byte_offset, &offset) + || !tree_fits_shwi_p (bit_offset)) + break; + tree_offset = offset + tree_to_shwi (bit_offset) / BITS_PER_UNIT; + } + tree_align = get_object_alignment (get_base_address (mem)); + break; + + default: + break; + } + + if (reg_align > mem_align) + { + mem_offset = reg_offset; + mem_align = reg_align; + } + if (tree_align > mem_align) + { + mem_offset = tree_offset; + mem_align = tree_align; + } + o = mem_offset; + a = mem_align; +} /* Expand string/block move operations. @@ -3799,27 +3871,19 @@ alpha_expand_block_move (rtx operands[]) else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD) return 0; - /* Look for additional alignment information from recorded register info. */ + /* Look for stricter alignment. */ + HOST_WIDE_INT c; + int a; - tmp = XEXP (orig_src, 0); - if (REG_P (tmp)) - src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp))); - else if (GET_CODE (tmp) == PLUS - && REG_P (XEXP (tmp, 0)) - && CONST_INT_P (XEXP (tmp, 1))) + alpha_get_mem_rtx_alignment_and_offset (orig_src, a, c); + if (a > src_align) { - unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1)); - unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0))); - - if (a > src_align) - { - if (a >= 64 && c % 8 == 0) - src_align = 64; - else if (a >= 32 && c % 4 == 0) - src_align = 32; - else if (a >= 16 && c % 2 == 0) - src_align = 16; - } + if (a >= 64 && c % 8 == 0) + src_align = 64; + else if (a >= 32 && c % 4 == 0) + src_align = 32; + else if (a >= 16 && c % 2 == 0) + src_align = 16; if (MEM_P (orig_src) && MEM_ALIGN (orig_src) < src_align) { @@ -3828,25 +3892,15 @@ alpha_expand_block_move (rtx operands[]) } } - tmp = XEXP (orig_dst, 0); - if (REG_P (tmp)) - dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp))); - else if (GET_CODE (tmp) == PLUS - && REG_P (XEXP (tmp, 0)) - && CONST_INT_P (XEXP (tmp, 1))) + alpha_get_mem_rtx_alignment_and_offset (orig_dst, a, c); + if (a > dst_align) { - unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1)); - unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0))); - - if (a > dst_align) - { - if (a >= 64 && c % 8 == 0) - dst_align = 64; - else if (a >= 32 && c % 4 == 0) - dst_align = 32; - else if (a >= 16 && c % 2 == 0) - dst_align = 16; - } + if (a >= 64 && c % 8 == 0) + dst_align = 64; + else if (a >= 32 && c % 4 == 0) + dst_align = 32; + else if (a >= 16 && c % 2 == 0) + dst_align = 16; if (MEM_P (orig_dst) && MEM_ALIGN (orig_dst) < dst_align) { @@ -4048,7 +4102,6 @@ alpha_expand_block_clear (rtx operands[] HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT; HOST_WIDE_INT alignofs = 0; rtx orig_dst = operands[0]; - rtx tmp; int i, words, ofs = 0; if (orig_bytes <= 0) @@ -4057,25 +4110,18 @@ alpha_expand_block_clear (rtx operands[] return 0; /* Look for stricter alignment. */ - tmp = XEXP (orig_dst, 0); - if (REG_P (tmp)) - align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp))); - else if (GET_CODE (tmp) == PLUS - && REG_P (XEXP (tmp, 0)) - && CONST_INT_P (XEXP (tmp, 1))) - { - HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1)); - int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0))); + HOST_WIDE_INT c; + int a; - if (a > align) - { - if (a >= 64) - align = a, alignofs = 8 - c % 8; - else if (a >= 32) - align = a, alignofs = 4 - c % 4; - else if (a >= 16) - align = a, alignofs = 2 - c % 2; - } + alpha_get_mem_rtx_alignment_and_offset (orig_dst, a, c); + if (a > align) + { + if (a >= 64) + align = a, alignofs = -c & 7; + else if (a >= 32) + align = a, alignofs = -c & 3; + else if (a >= 16) + align = a, alignofs = -c & 1; if (MEM_P (orig_dst) && MEM_ALIGN (orig_dst) < align) { Index: gcc/gcc/testsuite/gcc.target/alpha/memclr-a2-o1-c9-ptr.c =================================================================== --- gcc.orig/gcc/testsuite/gcc.target/alpha/memclr-a2-o1-c9-ptr.c +++ gcc/gcc/testsuite/gcc.target/alpha/memclr-a2-o1-c9-ptr.c @@ -43,8 +43,8 @@ memclr_a2_o1_c9 (u_t *u) that is with a byte store at offset 1 and with two unaligned load/store pairs at offsets 2 and 9 each. */ -/* { dg-final { scan-assembler-times "\\sldq_u\\s\\\$\[0-9\]+,2\\\(\\\$16\\\)\\s" 1 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times "\\sldq_u\\s\\\$\[0-9\]+,9\\\(\\\$16\\\)\\s" 1 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times "\\sstb\\s\\\$31,1\\\(\\\$16\\\)\\s" 1 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times "\\sstq_u\\s\\\$\[0-9\]+,2\\\(\\\$16\\\)\\s" 1 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times "\\sstq_u\\s\\\$\[0-9\]+,9\\\(\\\$16\\\)\\s" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "\\sldq_u\\s\\\$\[0-9\]+,2\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sldq_u\\s\\\$\[0-9\]+,9\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sstb\\s\\\$31,1\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sstq_u\\s\\\$\[0-9\]+,2\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sstq_u\\s\\\$\[0-9\]+,9\\\(\\\$16\\\)\\s" 1 } } */ Index: gcc/gcc/testsuite/gcc.target/alpha/memcpy-di-aligned.c =================================================================== --- /dev/null +++ gcc/gcc/testsuite/gcc.target/alpha/memcpy-di-aligned.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" } } */ + +unsigned long aligned_src_di[9] = { [0 ... 8] = 0xe6e7e8e9eaebeced }; +unsigned long aligned_dst_di[9] = { [0 ... 8] = 0xdcdbdad9d8d7d6d5 }; + +void +memcpy_aligned_data_di (void) +{ + __builtin_memcpy (aligned_dst_di + 1, aligned_src_di + 1, 56); +} + +/* { dg-final { scan-assembler-times "\\sldq\\s" 7 } } */ +/* { dg-final { scan-assembler-times "\\sstq\\s" 7 } } */ +/* { dg-final { scan-assembler-not "\\s(?:ldq_u|stq_u)\\s" } } */ Index: gcc/gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned-dst.c =================================================================== --- /dev/null +++ gcc/gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned-dst.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" } } */ + +unsigned long unaligned_src_di[9] = { [0 ... 8] = 0xfefdfcfbfaf9f8f7 }; + +void +memcpy_unaligned_dst_di (void *dst) +{ + __builtin_memcpy (dst, unaligned_src_di + 1, 56); +} + +/* { dg-final { scan-assembler-times "\\sldq\\s" 7 } } */ +/* { dg-final { scan-assembler-times "\\sldq_u\\s" 2 } } */ +/* { dg-final { scan-assembler-times "\\sstq_u\\s" 8 } } */ +/* { dg-final { scan-assembler-not "\\sstq\\s" } } */ Index: gcc/gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned-src.c =================================================================== --- /dev/null +++ gcc/gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned-src.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" } } */ + +unsigned long unaligned_dst_di[9] = { [0 ... 8] = 0xc4c5c6c7c8c9cacb }; + +void +memcpy_unaligned_src_di (const void *src) +{ + __builtin_memcpy (unaligned_dst_di + 1, src, 56); +} + +/* { dg-final { scan-assembler-times "\\sstq\\s" 7 } } */ +/* { dg-final { scan-assembler-times "\\sldq_u\\s" 8 } } */ +/* { dg-final { scan-assembler-not "\\s(?:ldq|stq_u)\\s" } } */ Index: gcc/gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned.c =================================================================== --- /dev/null +++ gcc/gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned.c @@ -0,0 +1,51 @@ +/* { dg-do run } */ +/* { dg-additional-sources memcpy-di-aligned.c } */ +/* { dg-additional-sources memcpy-di-unaligned-src.c } */ +/* { dg-additional-sources memcpy-di-unaligned-dst.c } */ +/* { dg-options "" } */ + +void memcpy_aligned_data_di (void); +void memcpy_unaligned_dst_di (void *); +void memcpy_unaligned_src_di (const void *); + +extern unsigned long aligned_src_di[]; +extern unsigned long aligned_dst_di[]; +extern unsigned long unaligned_src_di[]; +extern unsigned long unaligned_dst_di[]; + +int +main (void) +{ + unsigned long v; + int i; + + for (i = 1, v = 0x0807060504030201; i < 8; i++, v += 0x0808080808080808) + unaligned_src_di[i] = v; + asm ("" : : : "memory"); + memcpy_unaligned_dst_di (aligned_src_di + 1); + asm ("" : : : "memory"); + memcpy_aligned_data_di (); + asm ("" : : : "memory"); + memcpy_unaligned_src_di (aligned_dst_di + 1); + asm ("" : : : "memory"); + for (i = 1, v = 0x0807060504030201; i < 8; i++, v += 0x0808080808080808) + if (unaligned_dst_di[i] != v) + return 1; + if (unaligned_src_di[0] != 0xfefdfcfbfaf9f8f7) + return 1; + if (unaligned_src_di[8] != 0xfefdfcfbfaf9f8f7) + return 1; + if (aligned_src_di[0] != 0xe6e7e8e9eaebeced) + return 1; + if (aligned_src_di[8] != 0xe6e7e8e9eaebeced) + return 1; + if (aligned_dst_di[0] != 0xdcdbdad9d8d7d6d5) + return 1; + if (aligned_dst_di[8] != 0xdcdbdad9d8d7d6d5) + return 1; + if (unaligned_dst_di[0] != 0xc4c5c6c7c8c9cacb) + return 1; + if (unaligned_dst_di[8] != 0xc4c5c6c7c8c9cacb) + return 1; + return 0; +}