gcc/ChangeLog:

2015-07-16  Aditya Kumar  <aditya...@samsung.com>
                Sebastian Pop  <s....@samsung.com>

        * common.opt (floop-fuse): New.
        * doc/invoke.texi (floop-fuse): Documented.
        * graphite-optimize-isl.c (optimize_isl): Use
        ISL_SCHEDULE_FUSE_MAX when using flag_loop_fuse.
        * graphite-poly.c (apply_poly_transforms): Call optimize_isl when
        using flag_loop_fuse.
        * graphite.c (gate_graphite_transforms): Enable graphite with
        flag_loop_fuse.

gcc/testsuite/ChangeLog:

2015-07-16  Aditya Kumar  <aditya...@samsung.com>
                Sebastian Pop  <s....@samsung.com>

        * gcc.dg/graphite/fuse-1.c: New test.
        * gcc.dg/graphite/fuse-2.c: New test.
---
 gcc/common.opt                         |  4 ++++
 gcc/doc/invoke.texi                    | 23 +++++++++++++++++++-
 gcc/graphite-optimize-isl.c            |  5 ++++-
 gcc/graphite-poly.c                    |  2 +-
 gcc/graphite.c                         |  3 ++-
 gcc/testsuite/gcc.dg/graphite/fuse-1.c | 32 ++++++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/graphite/fuse-2.c | 38 ++++++++++++++++++++++++++++++++++
 7 files changed, 103 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/graphite/fuse-1.c
 create mode 100644 gcc/testsuite/gcc.dg/graphite/fuse-2.c

diff --git a/gcc/common.opt b/gcc/common.opt
index dd49ae3..200ecc1 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1365,6 +1365,10 @@ floop-nest-optimize
 Common Report Var(flag_loop_optimize_isl) Optimization
 Enable the ISL based loop nest optimizer
 
+floop-fuse
+Common Report Var(flag_loop_fuse) Optimization
+Enable loop fusion
+
 fstrict-volatile-bitfields
 Common Report Var(flag_strict_volatile_bitfields) Init(-1) Optimization
 Force bitfield accesses to match their type width
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index b99ab1c..7cc8bb9 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -409,7 +409,7 @@ Objective-C and Objective-C++ Dialects}.
 -fivopts -fkeep-inline-functions -fkeep-static-consts @gol
 -flive-range-shrinkage @gol
 -floop-block -floop-interchange -floop-strip-mine @gol
--floop-unroll-and-jam -floop-nest-optimize @gol
+-floop-unroll-and-jam -floop-nest-optimize -floop-fuse @gol
 -floop-parallelize-all -flra-remat -flto -flto-compression-level @gol
 -flto-partition=@var{alg} -flto-report -flto-report-wpa -fmerge-all-constants 
@gol
 -fmerge-constants -fmodulo-sched -fmodulo-sched-allow-regmoves @gol
@@ -8796,6 +8796,27 @@ optimizer based on the Pluto optimization algorithms.  
It calculates a loop
 structure optimized for data-locality and parallelism.  This option
 is experimental.
 
+@item -floop-fuse
+@opindex floop-fuse
+Enable loop fusion.  This option is experimental.
+
+For example, given a loop like:
+@smallexample
+DO I = 1, N
+  A(I) = A(I) + B(I)
+ENDDO
+DO I = 1, N
+  A(I) = A(I) + C(I)
+ENDDO
+@end smallexample
+@noindent
+loop fusion transforms the loop as if it were written:
+@smallexample
+DO I = 1, N
+  A(I) = A(I) + B(I) + C(I)
+ENDDO
+@end smallexample
+
 @item -floop-unroll-and-jam
 @opindex floop-unroll-and-jam
 Enable unroll and jam for the ISL based loop nest optimizer.  The unroll 
diff --git a/gcc/graphite-optimize-isl.c b/gcc/graphite-optimize-isl.c
index 624cc87..c016461 100644
--- a/gcc/graphite-optimize-isl.c
+++ b/gcc/graphite-optimize-isl.c
@@ -599,7 +599,10 @@ optimize_isl (scop_p scop)
 
   isl_options_set_schedule_max_constant_term (scop->ctx, CONSTANT_BOUND);
   isl_options_set_schedule_maximize_band_depth (scop->ctx, 1);
-  isl_options_set_schedule_fuse (scop->ctx, ISL_SCHEDULE_FUSE_MIN);
+  if (flag_loop_fuse)
+    isl_options_set_schedule_fuse (scop->ctx, ISL_SCHEDULE_FUSE_MAX);
+  else
+    isl_options_set_schedule_fuse (scop->ctx, ISL_SCHEDULE_FUSE_MIN);
   isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_CONTINUE);
 
 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
diff --git a/gcc/graphite-poly.c b/gcc/graphite-poly.c
index 4407dc5..4808fbe 100644
--- a/gcc/graphite-poly.c
+++ b/gcc/graphite-poly.c
@@ -272,7 +272,7 @@ apply_poly_transforms (scop_p scop)
 
   /* This pass needs to be run at the final stage, as it does not
      update the lst.  */
-  if (flag_loop_optimize_isl || flag_loop_unroll_jam)
+  if (flag_loop_optimize_isl || flag_loop_unroll_jam || flag_loop_fuse)
     transform_done |= optimize_isl (scop);
 
   return transform_done;
diff --git a/gcc/graphite.c b/gcc/graphite.c
index ba8029a..51af1a2a 100644
--- a/gcc/graphite.c
+++ b/gcc/graphite.c
@@ -342,7 +342,8 @@ gate_graphite_transforms (void)
       || flag_graphite_identity
       || flag_loop_parallelize_all
       || flag_loop_optimize_isl
-      || flag_loop_unroll_jam)
+      || flag_loop_unroll_jam
+      || flag_loop_fuse)
     flag_graphite = 1;
 
   return flag_graphite != 0;
diff --git a/gcc/testsuite/gcc.dg/graphite/fuse-1.c 
b/gcc/testsuite/gcc.dg/graphite/fuse-1.c
new file mode 100644
index 0000000..f368f47
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/graphite/fuse-1.c
@@ -0,0 +1,32 @@
+/* Check that the two loops are fused and that we manage to fold the two xor
+   operations.  */
+/* { dg-options "-O2 -floop-fuse -fdump-tree-forwprop-all" } */
+/* { dg-final { scan-tree-dump-times "gimple_simplified to\[^\\n\]*\\^ 12" 1 
"forwprop4" } } */
+/* { dg-do run } */
+
+#define MAX 100
+int A[MAX];
+
+extern void abort ();
+
+void fuse() {
+}
+
+int
+main (void)
+{
+  int i;
+
+  for (i = 0; i < MAX; i++)
+    A[i] = i;
+  for(int i=0; i<MAX; i++)
+    A[i] ^= 4;
+  for(int i=0; i<MAX; i++)
+    A[i] ^= 8;
+
+  for (i = 0; i < MAX; i++)
+    if (A[i] != (i ^ 12))
+      abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/graphite/fuse-2.c 
b/gcc/testsuite/gcc.dg/graphite/fuse-2.c
new file mode 100644
index 0000000..e1a1cb3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/graphite/fuse-2.c
@@ -0,0 +1,38 @@
+/* Check that the three loops are fused.  */
+/* { dg-options "-O2 -floop-fuse" } */
+/* { dg-do run } */
+
+/* FIXME: Add a graphite dump mechanism to print the number of loops generated
+   by ISL and pattern match it.  */
+
+#define MAX 100
+int A[MAX], B[MAX], C[MAX];
+
+extern void abort ();
+
+void fuse() {
+}
+
+int
+main (void)
+{
+  int i;
+
+  /* The next three loops should be fused.  */
+  for (i = 0; i < MAX; i++)
+    {
+      A[i] = i;
+      B[i] = i + 2;
+      C[i] = i + 1;
+    }
+  for(int i=0; i<MAX; i++)
+    A[i] += B[i];
+  for(int i=0; i<MAX; i++)
+    A[i] += C[i];
+
+  for (i = 0; i < MAX; i++)
+    if (A[i] != 3*i+3)
+      abort ();
+
+  return 0;
+}
-- 
2.1.0.243.g30d45f7

Reply via email to