This patch prevent tree unroller from completely unrolling inner loops if that
results in excessive strided-loads in outer loop.

Thanks,
Kugan

gcc/ChangeLog:

2017-09-12  Kugan Vivekanandarajah  <kug...@linaro.org>

    * config/aarch64/aarch64.c (count_mem_load_streams): New.
    (aarch64_ok_to_unroll): New.
    * doc/tm.texi (ok_to_unroll): Define new target hook.
    * doc/tm.texi.in (ok_to_unroll): Likewise.
    * target.def (ok_to_unroll): Likewise.
    * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Use
      ok_to_unroll while unrolling.
From 5de245bbf6ba1768e8206a61feb0f42c106a1d94 Mon Sep 17 00:00:00 2001
From: Kugan Vivekanandarajah <kugan.vivekanandara...@linaro.org>
Date: Fri, 18 Aug 2017 16:41:13 +1000
Subject: [PATCH 3/5] tree unroller limit strided loads

---
 gcc/config/aarch64/aarch64.c | 70 ++++++++++++++++++++++++++++++++++++++++++++
 gcc/doc/tm.texi              |  4 +++
 gcc/doc/tm.texi.in           |  2 ++
 gcc/target.def               |  8 +++++
 gcc/tree-ssa-loop-ivcanon.c  |  8 +++++
 5 files changed, 92 insertions(+)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 7d1ee70..e88bb6c 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -64,6 +64,7 @@
 #include "sched-int.h"
 #include "target-globals.h"
 #include "common/common-target.h"
+#include "tree-scalar-evolution.h"
 #include "selftest.h"
 #include "selftest-rtl.h"
 
@@ -15122,6 +15123,72 @@ aarch64_sched_can_speculate_insn (rtx_insn *insn)
     }
 }
 
+/* Count the strided loads in the LOOP with respect to OUT_LOOP.
+   If the strided loads are larger (compared to MAX_STRIDED_LOADS),
+   we dont need to compute all of them.  */
+
+static unsigned
+count_mem_load_streams (struct loop *out_loop,
+			struct loop *loop,
+			unsigned max_strided_loads)
+{
+  basic_block *bbs = get_loop_body (loop);
+  unsigned nbbs = loop->num_nodes;
+  gimple_stmt_iterator gsi;
+  unsigned count = 0;
+
+  for (unsigned i = 0; i < nbbs; i++)
+    {
+      bool ok;
+      basic_block bb = bbs[i];
+      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
+	   gsi_next (&gsi))
+	{
+	  gimple *stmt = gsi_stmt (gsi);
+	  if (!is_gimple_assign (stmt)
+	      || !gimple_vuse (stmt))
+	    continue;
+	  tree op = gimple_assign_rhs1 (stmt);
+	  if (!INDIRECT_REF_P (op)
+	      && TREE_CODE (op) != MEM_REF
+	      && TREE_CODE (op) != TARGET_MEM_REF)
+	    continue;
+	  op = TREE_OPERAND (op, 0);
+	  tree ev = analyze_scalar_evolution (out_loop, op);
+	  ev = instantiate_parameters (loop, ev);
+	  if (no_evolution_in_loop_p (ev, out_loop->num, &ok) && !ok)
+	    count++;
+	  if (count >= max_strided_loads)
+	    return count;
+	}
+    }
+  return count;
+}
+
+/* Target hook that prevents complete loop unrolling if this would make
+   the outer loop's prefetch strems more than hardware can handle.  */
+
+static bool
+aarch64_ok_to_unroll (struct loop *loop, unsigned HOST_WIDE_INT nunroll)
+{
+  struct loop *loop_father;
+  unsigned loads;
+  unsigned outter_loads;
+
+  if (aarch64_tune_params.prefetch->hw_prefetchers_avail == -1)
+    return true;
+
+  if ((loop_father = loop_outer (loop)))
+    {
+      unsigned max_strided_loads = aarch64_tune_params.prefetch->hw_prefetchers_avail;
+      loads = count_mem_load_streams (loop_father, loop, max_strided_loads);
+      outter_loads = count_mem_load_streams (loop_father, loop_father, max_strided_loads);
+      if ((outter_loads + (nunroll - 1) * loads) > max_strided_loads)
+	return false;
+    }
+  return true;
+}
+
 /* Target-specific selftests.  */
 
 #if CHECKING_P
@@ -15550,6 +15617,9 @@ aarch64_libgcc_floating_mode_supported_p
 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 4
 
+#undef TARGET_OK_TO_UNROLL
+#define TARGET_OK_TO_UNROLL aarch64_ok_to_unroll
+
 #if CHECKING_P
 #undef TARGET_RUN_TARGET_SELFTESTS
 #define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 795e492..45cea4c 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -11617,6 +11617,10 @@ is required only when the target has special constraints like maximum
 number of memory accesses.
 @end deftypefn
 
+@deftypefn {Target Hook} bool TARGET_OK_TO_UNROLL (struct loop *@var{loop_info}, unsigned HOST_WIDE_INT @var{nunroll})
+This hook should return false if target prefers loop should not be unrolled
+@end deftypefn
+
 @defmac POWI_MAX_MULTS
 If defined, this macro is interpreted as a signed integer C expression
 that specifies the maximum number of floating point multiplications
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 98f2e6b..64dfa51 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -8155,6 +8155,8 @@ build_type_attribute_variant (@var{mdecl},
 
 @hook TARGET_LOOP_UNROLL_ADJUST
 
+@hook TARGET_OK_TO_UNROLL
+
 @defmac POWI_MAX_MULTS
 If defined, this macro is interpreted as a signed integer C expression
 that specifies the maximum number of floating point multiplications
diff --git a/gcc/target.def b/gcc/target.def
index bbd9c01..2f62328 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -5120,6 +5120,14 @@ hardware divmod insn but defines target-specific divmod libfuncs.",
  void, (rtx libfunc, machine_mode mode, rtx op0, rtx op1, rtx *quot, rtx *rem),
  NULL)
 
+/* Target function to check complete unrolling of loop is  profitable for loop.  */
+DEFHOOK
+(ok_to_unroll,
+ "This hook should return false if target prefers loop should not be unrolled",
+ bool,
+ (struct loop *loop_info, unsigned HOST_WIDE_INT nunroll),
+ NULL)
+
 /* Return the class for a secondary reload, and fill in extra information.  */
 DEFHOOK
 (secondary_reload,
diff --git a/gcc/tree-ssa-loop-ivcanon.c b/gcc/tree-ssa-loop-ivcanon.c
index efb199a..c2016458 100644
--- a/gcc/tree-ssa-loop-ivcanon.c
+++ b/gcc/tree-ssa-loop-ivcanon.c
@@ -63,6 +63,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-inline.h"
 #include "tree-cfgcleanup.h"
 #include "builtins.h"
+#include "target.h"
 
 /* Specifies types of loops that may be unrolled.  */
 
@@ -855,6 +856,13 @@ try_unroll_loop_completely (struct loop *loop,
 		     loop->num);
 	  return false;
 	}
+
+      if (targetm.ok_to_unroll
+	  && !targetm.ok_to_unroll (loop, n_unroll))
+	{
+	  return false;
+	}
+
       if (!n_unroll)
         dump_printf_loc (report_flags, locus,
                          "loop turned into non-loop; it never loops.\n");
-- 
2.7.4

Reply via email to