We hardly ever emit code using machine instructions for aligned memory 
accesses for block move and clear operation and the reason for this 
appears to be that suboptimal alignment is often passed by the caller 
and then we only try to find a better alignment by checking pseudo 
register pointer alignment information, and from observation it's most 
often only set for stack frame references.

This code originates from before Tree SSA days and we can do better 
nowadays, by looking up the original tree node associated with a MEM 
RTL, so implement this approach, factoring out repeating code from 
`alpha_expand_block_move' and `alpha_expand_block_clear' to a new 
function.

In some cases howewer tree information is not available while pointer 
alignment is, such as with the case concerned with PR target/115459,
where we have:

(gdb) pr orig_src
(mem:BLK (plus:DI (reg/f:DI 65 virtual-stack-vars [ lock.206_2 ])
        (const_int 8368 [0x20b0])) [8  S18 A8])
(gdb) pr orig_dst
(mem/j/c:BLK (plus:DI (reg/f:DI 65 virtual-stack-vars [ lock.206_2 ])
        (const_int 8208 [0x2010])) [8 MEM[(struct 
gnat__debug_pools__print_info_stdout__internal__L_18__B1182b__S1183b___PAD 
*)_339].F[1 ...]{lb: 1 sz: 1}+0 S18 A128])
(gdb) 

showing no tree information and the alignment of 8 only for `orig_src', 
while indeed REGNO_POINTER_ALIGN returns 128 for pseudo 65.  So retain 
the old approach and return the largest alignment determined and its 
associated offset.

Add test cases accordingly and remove XFAILs from memclr-a2-o1-c9-ptr.c 
now that it does get aligned code produced now.

        gcc/
        * config/alpha/alpha.cc 
        (alpha_get_mem_rtx_alignment_and_offset): New function.
        (alpha_expand_block_move, alpha_expand_block_clear): Use it for 
        alignment retrieval.

        gcc/testsuite/
        * gcc.target/alpha/memclr-a2-o1-c9-ptr.c: Remove XFAILs.
        * gcc.target/alpha/memcpy-di-aligned.c: New file.
        * gcc.target/alpha/memcpy-di-unaligned.c: New file.
        * gcc.target/alpha/memcpy-di-unaligned-dst.c: New file.
        * gcc.target/alpha/memcpy-di-unaligned-src.c: New file.
---
 gcc/config/alpha/alpha.cc                                |  158 +++++++++------
 gcc/testsuite/gcc.target/alpha/memclr-a2-o1-c9-ptr.c     |   10 
 gcc/testsuite/gcc.target/alpha/memcpy-di-aligned.c       |   16 +
 gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned-dst.c |   16 +
 gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned-src.c |   15 +
 gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned.c     |   51 ++++
 6 files changed, 205 insertions(+), 61 deletions(-)

gcc-alpha-mem-object-alignment.diff
Index: gcc/gcc/config/alpha/alpha.cc
===================================================================
--- gcc.orig/gcc/config/alpha/alpha.cc
+++ gcc/gcc/config/alpha/alpha.cc
@@ -3771,6 +3771,78 @@ alpha_expand_unaligned_store_words (rtx
   emit_move_insn (st_addr_1, st_tmp_1);
 }
 
+/* Get the base alignment and offset of EXPR in A and O respectively.
+   Check for any pseudo register pointer alignment and for any tree
+   node information and return the largest alignment determined and
+   its associated offset.  */
+
+static void
+alpha_get_mem_rtx_alignment_and_offset (rtx expr, int &a, HOST_WIDE_INT &o)
+{
+  HOST_WIDE_INT tree_offset = 0, reg_offset = 0, mem_offset = 0;
+  int tree_align = 0, reg_align = 0, mem_align = MEM_ALIGN (expr);
+
+  gcc_assert (MEM_P (expr));
+
+  rtx addr = XEXP (expr, 0);
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      reg_align = REGNO_POINTER_ALIGN (REGNO (addr));
+      break;
+
+    case PLUS:
+      if (REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
+       {
+         reg_offset = INTVAL (XEXP (addr, 1));
+         reg_align = REGNO_POINTER_ALIGN (REGNO (XEXP (addr, 0)));
+       }
+      break;
+
+    default:
+      break;
+    }
+
+  tree mem = MEM_EXPR (expr);
+  if (mem != NULL_TREE)
+    switch (TREE_CODE (mem))
+      {
+      case MEM_REF:
+       tree_offset = mem_ref_offset (mem).force_shwi ();
+       tree_align = get_object_alignment (get_base_address (mem));
+       break;
+
+      case COMPONENT_REF:
+       {
+         tree byte_offset = component_ref_field_offset (mem);
+         tree bit_offset = DECL_FIELD_BIT_OFFSET (TREE_OPERAND (mem, 1));
+         poly_int64 offset;
+         if (!byte_offset
+             || !poly_int_tree_p (byte_offset, &offset)
+             || !tree_fits_shwi_p (bit_offset))
+           break;
+         tree_offset = offset + tree_to_shwi (bit_offset) / BITS_PER_UNIT;
+       }
+       tree_align = get_object_alignment (get_base_address (mem));
+       break;
+
+      default:
+       break;
+      }
+
+  if (reg_align > mem_align)
+    {
+      mem_offset = reg_offset;
+      mem_align = reg_align;
+    }
+  if (tree_align > mem_align)
+    {
+      mem_offset = tree_offset;
+      mem_align = tree_align;
+    }
+  o = mem_offset;
+  a = mem_align;
+}
 
 /* Expand string/block move operations.
 
@@ -3799,27 +3871,19 @@ alpha_expand_block_move (rtx operands[])
   else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
     return 0;
 
-  /* Look for additional alignment information from recorded register info.  */
+  /* Look for stricter alignment.  */
+  HOST_WIDE_INT c;
+  int a;
 
-  tmp = XEXP (orig_src, 0);
-  if (REG_P (tmp))
-    src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
-  else if (GET_CODE (tmp) == PLUS
-          && REG_P (XEXP (tmp, 0))
-          && CONST_INT_P (XEXP (tmp, 1)))
+  alpha_get_mem_rtx_alignment_and_offset (orig_src, a, c);
+  if (a > src_align)
     {
-      unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
-      unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
-
-      if (a > src_align)
-       {
-          if (a >= 64 && c % 8 == 0)
-           src_align = 64;
-          else if (a >= 32 && c % 4 == 0)
-           src_align = 32;
-          else if (a >= 16 && c % 2 == 0)
-           src_align = 16;
-       }
+      if (a >= 64 && c % 8 == 0)
+       src_align = 64;
+      else if (a >= 32 && c % 4 == 0)
+       src_align = 32;
+      else if (a >= 16 && c % 2 == 0)
+       src_align = 16;
 
       if (MEM_P (orig_src) && MEM_ALIGN (orig_src) < src_align)
        {
@@ -3828,25 +3892,15 @@ alpha_expand_block_move (rtx operands[])
        }
     }
 
-  tmp = XEXP (orig_dst, 0);
-  if (REG_P (tmp))
-    dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
-  else if (GET_CODE (tmp) == PLUS
-          && REG_P (XEXP (tmp, 0))
-          && CONST_INT_P (XEXP (tmp, 1)))
+  alpha_get_mem_rtx_alignment_and_offset (orig_dst, a, c);
+  if (a > dst_align)
     {
-      unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
-      unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
-
-      if (a > dst_align)
-       {
-          if (a >= 64 && c % 8 == 0)
-           dst_align = 64;
-          else if (a >= 32 && c % 4 == 0)
-           dst_align = 32;
-          else if (a >= 16 && c % 2 == 0)
-           dst_align = 16;
-       }
+      if (a >= 64 && c % 8 == 0)
+       dst_align = 64;
+      else if (a >= 32 && c % 4 == 0)
+       dst_align = 32;
+      else if (a >= 16 && c % 2 == 0)
+       dst_align = 16;
 
       if (MEM_P (orig_dst) && MEM_ALIGN (orig_dst) < dst_align)
        {
@@ -4048,7 +4102,6 @@ alpha_expand_block_clear (rtx operands[]
   HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT;
   HOST_WIDE_INT alignofs = 0;
   rtx orig_dst = operands[0];
-  rtx tmp;
   int i, words, ofs = 0;
 
   if (orig_bytes <= 0)
@@ -4057,25 +4110,18 @@ alpha_expand_block_clear (rtx operands[]
     return 0;
 
   /* Look for stricter alignment.  */
-  tmp = XEXP (orig_dst, 0);
-  if (REG_P (tmp))
-    align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp)));
-  else if (GET_CODE (tmp) == PLUS
-          && REG_P (XEXP (tmp, 0))
-          && CONST_INT_P (XEXP (tmp, 1)))
-    {
-      HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
-      int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
+  HOST_WIDE_INT c;
+  int a;
 
-      if (a > align)
-       {
-          if (a >= 64)
-           align = a, alignofs = 8 - c % 8;
-          else if (a >= 32)
-           align = a, alignofs = 4 - c % 4;
-          else if (a >= 16)
-           align = a, alignofs = 2 - c % 2;
-       }
+  alpha_get_mem_rtx_alignment_and_offset (orig_dst, a, c);
+  if (a > align)
+    {
+      if (a >= 64)
+       align = a, alignofs = -c & 7;
+      else if (a >= 32)
+       align = a, alignofs = -c & 3;
+      else if (a >= 16)
+       align = a, alignofs = -c & 1;
 
       if (MEM_P (orig_dst) && MEM_ALIGN (orig_dst) < align)
        {
Index: gcc/gcc/testsuite/gcc.target/alpha/memclr-a2-o1-c9-ptr.c
===================================================================
--- gcc.orig/gcc/testsuite/gcc.target/alpha/memclr-a2-o1-c9-ptr.c
+++ gcc/gcc/testsuite/gcc.target/alpha/memclr-a2-o1-c9-ptr.c
@@ -43,8 +43,8 @@ memclr_a2_o1_c9 (u_t *u)
    that is with a byte store at offset 1 and with two unaligned load/store
    pairs at offsets 2 and 9 each.  */
 
-/* { dg-final { scan-assembler-times 
"\\sldq_u\\s\\\$\[0-9\]+,2\\\(\\\$16\\\)\\s" 1 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times 
"\\sldq_u\\s\\\$\[0-9\]+,9\\\(\\\$16\\\)\\s" 1 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times "\\sstb\\s\\\$31,1\\\(\\\$16\\\)\\s" 1 { 
xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times 
"\\sstq_u\\s\\\$\[0-9\]+,2\\\(\\\$16\\\)\\s" 1 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times 
"\\sstq_u\\s\\\$\[0-9\]+,9\\\(\\\$16\\\)\\s" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times 
"\\sldq_u\\s\\\$\[0-9\]+,2\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times 
"\\sldq_u\\s\\\$\[0-9\]+,9\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstb\\s\\\$31,1\\\(\\\$16\\\)\\s" 1 } 
} */
+/* { dg-final { scan-assembler-times 
"\\sstq_u\\s\\\$\[0-9\]+,2\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times 
"\\sstq_u\\s\\\$\[0-9\]+,9\\\(\\\$16\\\)\\s" 1 } } */
Index: gcc/gcc/testsuite/gcc.target/alpha/memcpy-di-aligned.c
===================================================================
--- /dev/null
+++ gcc/gcc/testsuite/gcc.target/alpha/memcpy-di-aligned.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+unsigned long aligned_src_di[9] = { [0 ... 8] = 0xe6e7e8e9eaebeced };
+unsigned long aligned_dst_di[9] = { [0 ... 8] = 0xdcdbdad9d8d7d6d5 };
+
+void
+memcpy_aligned_data_di (void)
+{
+  __builtin_memcpy (aligned_dst_di + 1, aligned_src_di + 1, 56);
+}
+
+/* { dg-final { scan-assembler-times "\\sldq\\s" 7 } } */
+/* { dg-final { scan-assembler-times "\\sstq\\s" 7 } } */
+/* { dg-final { scan-assembler-not "\\s(?:ldq_u|stq_u)\\s" } } */
Index: gcc/gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned-dst.c
===================================================================
--- /dev/null
+++ gcc/gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned-dst.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+unsigned long unaligned_src_di[9] = { [0 ... 8] = 0xfefdfcfbfaf9f8f7 };
+
+void
+memcpy_unaligned_dst_di (void *dst)
+{
+  __builtin_memcpy (dst, unaligned_src_di + 1, 56);
+}
+
+/* { dg-final { scan-assembler-times "\\sldq\\s" 7 } } */
+/* { dg-final { scan-assembler-times "\\sldq_u\\s" 2 } } */
+/* { dg-final { scan-assembler-times "\\sstq_u\\s" 8 } } */
+/* { dg-final { scan-assembler-not "\\sstq\\s" } } */
Index: gcc/gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned-src.c
===================================================================
--- /dev/null
+++ gcc/gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned-src.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+unsigned long unaligned_dst_di[9] = { [0 ... 8] = 0xc4c5c6c7c8c9cacb };
+
+void
+memcpy_unaligned_src_di (const void *src)
+{
+  __builtin_memcpy (unaligned_dst_di + 1, src, 56);
+}
+
+/* { dg-final { scan-assembler-times "\\sstq\\s" 7 } } */
+/* { dg-final { scan-assembler-times "\\sldq_u\\s" 8 } } */
+/* { dg-final { scan-assembler-not "\\s(?:ldq|stq_u)\\s" } } */
Index: gcc/gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned.c
===================================================================
--- /dev/null
+++ gcc/gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-additional-sources memcpy-di-aligned.c } */
+/* { dg-additional-sources memcpy-di-unaligned-src.c } */
+/* { dg-additional-sources memcpy-di-unaligned-dst.c } */
+/* { dg-options "" } */
+
+void memcpy_aligned_data_di (void);
+void memcpy_unaligned_dst_di (void *);
+void memcpy_unaligned_src_di (const void *);
+
+extern unsigned long aligned_src_di[];
+extern unsigned long aligned_dst_di[];
+extern unsigned long unaligned_src_di[];
+extern unsigned long unaligned_dst_di[];
+
+int
+main (void)
+{
+  unsigned long v;
+  int i;
+
+  for (i = 1, v = 0x0807060504030201; i < 8; i++, v += 0x0808080808080808)
+    unaligned_src_di[i] = v;
+  asm ("" : : : "memory");
+  memcpy_unaligned_dst_di (aligned_src_di + 1);
+  asm ("" : : : "memory");
+  memcpy_aligned_data_di ();
+  asm ("" : : : "memory");
+  memcpy_unaligned_src_di (aligned_dst_di + 1);
+  asm ("" : : : "memory");
+  for (i = 1, v = 0x0807060504030201; i < 8; i++, v += 0x0808080808080808)
+    if (unaligned_dst_di[i] != v)
+      return 1;
+  if (unaligned_src_di[0] != 0xfefdfcfbfaf9f8f7)
+      return 1;
+  if (unaligned_src_di[8] != 0xfefdfcfbfaf9f8f7)
+      return 1;
+  if (aligned_src_di[0] != 0xe6e7e8e9eaebeced)
+      return 1;
+  if (aligned_src_di[8] != 0xe6e7e8e9eaebeced)
+      return 1;
+  if (aligned_dst_di[0] != 0xdcdbdad9d8d7d6d5)
+      return 1;
+  if (aligned_dst_di[8] != 0xdcdbdad9d8d7d6d5)
+      return 1;
+  if (unaligned_dst_di[0] != 0xc4c5c6c7c8c9cacb)
+      return 1;
+  if (unaligned_dst_di[8] != 0xc4c5c6c7c8c9cacb)
+      return 1;
+  return 0;
+}

Reply via email to