Hi all,

This patch addresses feedback from:
https://gcc.gnu.org/ml/gcc-patches/2014-10/msg00933.html
and https://gcc.gnu.org/ml/gcc-patches/2014-10/msg00985.html

I think the correct way is this approach. We want to return true not only when body is NULL, but also when we know that prev (from which body was extracted) is a memory op and the multiply-accumulate instruction is a DImode one. With this patch we use the FOR_EACH_SUBRTX construct to find the MEM rtx, removing the need for is_mem_p.
Some comments are added and style issues resolved.
Also, do not consider output dependencies as true dependencies between the load and multiply-accumulate.

Bootstrapped and tested on aarch64-linux.

Ok for trunk?

Thanks,
Kyrill

2014-10-13  Kyrylo Tkachov  <kyrylo.tkac...@arm.com>

    * config/aarch64/aarch64.h (ADJUST_INSN_LENGTH): Wrap definition in
    do while (0).
    * config/aarch64/aarch64.c (is_mem_p): Delete.
    (is_memory_op): Rename to...
    (has_memory_op): ... This.  Use FOR_EACH_SUBRTX.
    (dep_between_memop_and_curr): Assert that the input is a SET.
    (aarch64_madd_needs_nop): Add comment.  Do not call
    dep_between_memop_and_curr on NULL body.
    (aarch64_final_prescan_insn): Add comment.
    Include rtl-iter.h.
commit a544e0e96cd85417ded7c69a4c33d8fd72005f4c
Author: Kyrylo Tkachov <kyrylo.tkac...@arm.com>
Date:   Tue Oct 14 14:47:56 2014 +0100

    [AArch64] Cleanup logic around aarch64_final_prescan

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index db5ff59..84aa67e 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -64,6 +64,7 @@
 #include "config/arm/aarch-cost-tables.h"
 #include "dumpfile.h"
 #include "builtins.h"
+#include "rtl-iter.h"
 
 /* Defined for convenience.  */
 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
@@ -7595,17 +7596,19 @@ aarch64_mangle_type (const_tree type)
   return NULL;
 }
 
-static int
-is_mem_p (rtx *x, void *data ATTRIBUTE_UNUSED)
-{
-  return MEM_P (*x);
-}
+
+/* Return true if the rtx_insn contains a MEM RTX somewhere
+   in it.  */
 
 static bool
-is_memory_op (rtx_insn *mem_insn)
+has_memory_op (rtx_insn *mem_insn)
 {
-   rtx pattern = PATTERN (mem_insn);
-   return for_each_rtx (&pattern, is_mem_p, NULL);
+  subrtx_iterator::array_type array;
+  FOR_EACH_SUBRTX (iter, array, PATTERN (mem_insn), ALL)
+    if (MEM_P (*iter))
+      return true;
+
+  return false;
 }
 
 /* Find the first rtx_insn before insn that will generate an assembly
@@ -7655,14 +7658,13 @@ dep_between_memop_and_curr (rtx memop)
   rtx load_reg;
   int opno;
 
-  if (!memop)
-    return false;
+  gcc_assert (GET_CODE (memop) == SET);
 
   if (!REG_P (SET_DEST (memop)))
     return false;
 
   load_reg = SET_DEST (memop);
-  for (opno = 0; opno < recog_data.n_operands; opno++)
+  for (opno = 1; opno < recog_data.n_operands; opno++)
     {
       rtx operand = recog_data.operand[opno];
       if (REG_P (operand)
@@ -7673,6 +7675,12 @@ dep_between_memop_and_curr (rtx memop)
   return false;
 }
 
+
+/* When working around the Cortex-A53 erratum 835769,
+   given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
+   instruction and has a preceding memory instruction such that a NOP
+   should be inserted between them.  */
+
 bool
 aarch64_madd_needs_nop (rtx_insn* insn)
 {
@@ -7691,24 +7699,26 @@ aarch64_madd_needs_nop (rtx_insn* insn)
     return false;
 
   prev = aarch64_prev_real_insn (insn);
-  if (!prev)
+  if (!prev || !has_memory_op (prev))
     return false;
 
   body = single_set (prev);
 
   /* If the previous insn is a memory op and there is no dependency between
-     it and the madd, emit a nop between them.  If we know the previous insn is
-     a memory op but body is NULL, emit the nop to be safe, it's probably a
-     load/store pair insn.  */
-  if (is_memory_op (prev)
-      && GET_MODE (recog_data.operand[0]) == DImode
-      && (!dep_between_memop_and_curr (body)))
+     it and the DImode madd, emit a NOP between them.  If body is NULL then we
+     have a complex memory operation, probably a load/store pair.
+     Be conservative for now and emit a NOP.  */
+  if (GET_MODE (recog_data.operand[0]) == DImode
+      && (!body || !dep_between_memop_and_curr (body)))
     return true;
 
   return false;
 
 }
 
+
+/* Implement FINAL_PRESCAN_INSN.  */
+
 void
 aarch64_final_prescan_insn (rtx_insn *insn)
 {
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index e9e5fd8..71d9212 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -489,8 +489,11 @@ enum target_cpus
 /* If inserting NOP before a mult-accumulate insn remember to adjust the
    length so that conditional branching code is updated appropriately.  */
 #define ADJUST_INSN_LENGTH(insn, length)	\
-  if (aarch64_madd_needs_nop (insn))		\
-    length += 4;
+  do						\
+    {						\
+       if (aarch64_madd_needs_nop (insn))	\
+         length += 4;				\
+    } while (0)
 
 #define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS)	\
     aarch64_final_prescan_insn (INSN);			\

Reply via email to