On 07/17/2015 12:35 AM, Sebastian Pop wrote:
gcc/ChangeLog:

2015-07-16  Aditya Kumar  <aditya...@samsung.com>
                 Sebastian Pop  <s....@samsung.com>

         * common.opt (floop-fuse): New.
         * doc/invoke.texi (floop-fuse): Documented.
         * graphite-optimize-isl.c (optimize_isl): Use
         ISL_SCHEDULE_FUSE_MAX when using flag_loop_fuse.
         * graphite-poly.c (apply_poly_transforms): Call optimize_isl when
         using flag_loop_fuse.
         * graphite.c (gate_graphite_transforms): Enable graphite with
         flag_loop_fuse.

LGTM.

Tobias

gcc/testsuite/ChangeLog:

2015-07-16  Aditya Kumar  <aditya...@samsung.com>
                 Sebastian Pop  <s....@samsung.com>

         * gcc.dg/graphite/fuse-1.c: New test.
         * gcc.dg/graphite/fuse-2.c: New test.
---
  gcc/common.opt                         |  4 ++++
  gcc/doc/invoke.texi                    | 23 +++++++++++++++++++-
  gcc/graphite-optimize-isl.c            |  5 ++++-
  gcc/graphite-poly.c                    |  2 +-
  gcc/graphite.c                         |  3 ++-
  gcc/testsuite/gcc.dg/graphite/fuse-1.c | 32 ++++++++++++++++++++++++++++
  gcc/testsuite/gcc.dg/graphite/fuse-2.c | 38 ++++++++++++++++++++++++++++++++++
  7 files changed, 103 insertions(+), 4 deletions(-)
  create mode 100644 gcc/testsuite/gcc.dg/graphite/fuse-1.c
  create mode 100644 gcc/testsuite/gcc.dg/graphite/fuse-2.c

diff --git a/gcc/common.opt b/gcc/common.opt
index dd49ae3..200ecc1 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1365,6 +1365,10 @@ floop-nest-optimize
  Common Report Var(flag_loop_optimize_isl) Optimization
  Enable the ISL based loop nest optimizer

+floop-fuse
+Common Report Var(flag_loop_fuse) Optimization
+Enable loop fusion
+
  fstrict-volatile-bitfields
  Common Report Var(flag_strict_volatile_bitfields) Init(-1) Optimization
  Force bitfield accesses to match their type width
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index b99ab1c..7cc8bb9 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -409,7 +409,7 @@ Objective-C and Objective-C++ Dialects}.
  -fivopts -fkeep-inline-functions -fkeep-static-consts @gol
  -flive-range-shrinkage @gol
  -floop-block -floop-interchange -floop-strip-mine @gol
--floop-unroll-and-jam -floop-nest-optimize @gol
+-floop-unroll-and-jam -floop-nest-optimize -floop-fuse @gol
  -floop-parallelize-all -flra-remat -flto -flto-compression-level @gol
  -flto-partition=@var{alg} -flto-report -flto-report-wpa -fmerge-all-constants 
@gol
  -fmerge-constants -fmodulo-sched -fmodulo-sched-allow-regmoves @gol
@@ -8796,6 +8796,27 @@ optimizer based on the Pluto optimization algorithms.  
It calculates a loop
  structure optimized for data-locality and parallelism.  This option
  is experimental.

+@item -floop-fuse
+@opindex floop-fuse
+Enable loop fusion.  This option is experimental.
+
+For example, given a loop like:
+@smallexample
+DO I = 1, N
+  A(I) = A(I) + B(I)
+ENDDO
+DO I = 1, N
+  A(I) = A(I) + C(I)
+ENDDO
+@end smallexample
+@noindent
+loop fusion transforms the loop as if it were written:
+@smallexample
+DO I = 1, N
+  A(I) = A(I) + B(I) + C(I)
+ENDDO
+@end smallexample
+
  @item -floop-unroll-and-jam
  @opindex floop-unroll-and-jam
  Enable unroll and jam for the ISL based loop nest optimizer.  The unroll
diff --git a/gcc/graphite-optimize-isl.c b/gcc/graphite-optimize-isl.c
index 624cc87..c016461 100644
--- a/gcc/graphite-optimize-isl.c
+++ b/gcc/graphite-optimize-isl.c
@@ -599,7 +599,10 @@ optimize_isl (scop_p scop)

    isl_options_set_schedule_max_constant_term (scop->ctx, CONSTANT_BOUND);
    isl_options_set_schedule_maximize_band_depth (scop->ctx, 1);
-  isl_options_set_schedule_fuse (scop->ctx, ISL_SCHEDULE_FUSE_MIN);
+  if (flag_loop_fuse)
+    isl_options_set_schedule_fuse (scop->ctx, ISL_SCHEDULE_FUSE_MAX);
+  else
+    isl_options_set_schedule_fuse (scop->ctx, ISL_SCHEDULE_FUSE_MIN);
    isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_CONTINUE);

  #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
diff --git a/gcc/graphite-poly.c b/gcc/graphite-poly.c
index 4407dc5..4808fbe 100644
--- a/gcc/graphite-poly.c
+++ b/gcc/graphite-poly.c
@@ -272,7 +272,7 @@ apply_poly_transforms (scop_p scop)

    /* This pass needs to be run at the final stage, as it does not
       update the lst.  */
-  if (flag_loop_optimize_isl || flag_loop_unroll_jam)
+  if (flag_loop_optimize_isl || flag_loop_unroll_jam || flag_loop_fuse)
      transform_done |= optimize_isl (scop);

    return transform_done;
diff --git a/gcc/graphite.c b/gcc/graphite.c
index ba8029a..51af1a2a 100644
--- a/gcc/graphite.c
+++ b/gcc/graphite.c
@@ -342,7 +342,8 @@ gate_graphite_transforms (void)
        || flag_graphite_identity
        || flag_loop_parallelize_all
        || flag_loop_optimize_isl
-      || flag_loop_unroll_jam)
+      || flag_loop_unroll_jam
+      || flag_loop_fuse)
      flag_graphite = 1;

    return flag_graphite != 0;
diff --git a/gcc/testsuite/gcc.dg/graphite/fuse-1.c 
b/gcc/testsuite/gcc.dg/graphite/fuse-1.c
new file mode 100644
index 0000000..f368f47
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/graphite/fuse-1.c
@@ -0,0 +1,32 @@
+/* Check that the two loops are fused and that we manage to fold the two xor
+   operations.  */
+/* { dg-options "-O2 -floop-fuse -fdump-tree-forwprop-all" } */
+/* { dg-final { scan-tree-dump-times "gimple_simplified to\[^\\n\]*\\^ 12" 1 
"forwprop4" } } */
+/* { dg-do run } */
+
+#define MAX 100
+int A[MAX];
+
+extern void abort ();
+
+void fuse() {
+}
+
+int
+main (void)
+{
+  int i;
+
+  for (i = 0; i < MAX; i++)
+    A[i] = i;
+  for(int i=0; i<MAX; i++)
+    A[i] ^= 4;
+  for(int i=0; i<MAX; i++)
+    A[i] ^= 8;
+
+  for (i = 0; i < MAX; i++)
+    if (A[i] != (i ^ 12))
+      abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/graphite/fuse-2.c 
b/gcc/testsuite/gcc.dg/graphite/fuse-2.c
new file mode 100644
index 0000000..e1a1cb3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/graphite/fuse-2.c
@@ -0,0 +1,38 @@
+/* Check that the three loops are fused.  */
+/* { dg-options "-O2 -floop-fuse" } */
+/* { dg-do run } */
+
+/* FIXME: Add a graphite dump mechanism to print the number of loops generated
+   by ISL and pattern match it.  */
+
+#define MAX 100
+int A[MAX], B[MAX], C[MAX];
+
+extern void abort ();
+
+void fuse() {
+}
+
+int
+main (void)
+{
+  int i;
+
+  /* The next three loops should be fused.  */
+  for (i = 0; i < MAX; i++)
+    {
+      A[i] = i;
+      B[i] = i + 2;
+      C[i] = i + 1;
+    }
+  for(int i=0; i<MAX; i++)
+    A[i] += B[i];
+  for(int i=0; i<MAX; i++)
+    A[i] += C[i];
+
+  for (i = 0; i < MAX; i++)
+    if (A[i] != 3*i+3)
+      abort ();
+
+  return 0;
+}


Reply via email to