On 07/17/2015 12:35 AM, Sebastian Pop wrote:
gcc/ChangeLog:
2015-07-16 Aditya Kumar <aditya...@samsung.com>
Sebastian Pop <s....@samsung.com>
* common.opt (floop-fuse): New.
* doc/invoke.texi (floop-fuse): Documented.
* graphite-optimize-isl.c (optimize_isl): Use
ISL_SCHEDULE_FUSE_MAX when using flag_loop_fuse.
* graphite-poly.c (apply_poly_transforms): Call optimize_isl when
using flag_loop_fuse.
* graphite.c (gate_graphite_transforms): Enable graphite with
flag_loop_fuse.
LGTM.
Tobias
gcc/testsuite/ChangeLog:
2015-07-16 Aditya Kumar <aditya...@samsung.com>
Sebastian Pop <s....@samsung.com>
* gcc.dg/graphite/fuse-1.c: New test.
* gcc.dg/graphite/fuse-2.c: New test.
---
gcc/common.opt | 4 ++++
gcc/doc/invoke.texi | 23 +++++++++++++++++++-
gcc/graphite-optimize-isl.c | 5 ++++-
gcc/graphite-poly.c | 2 +-
gcc/graphite.c | 3 ++-
gcc/testsuite/gcc.dg/graphite/fuse-1.c | 32 ++++++++++++++++++++++++++++
gcc/testsuite/gcc.dg/graphite/fuse-2.c | 38 ++++++++++++++++++++++++++++++++++
7 files changed, 103 insertions(+), 4 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/graphite/fuse-1.c
create mode 100644 gcc/testsuite/gcc.dg/graphite/fuse-2.c
diff --git a/gcc/common.opt b/gcc/common.opt
index dd49ae3..200ecc1 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1365,6 +1365,10 @@ floop-nest-optimize
Common Report Var(flag_loop_optimize_isl) Optimization
Enable the ISL based loop nest optimizer
+floop-fuse
+Common Report Var(flag_loop_fuse) Optimization
+Enable loop fusion
+
fstrict-volatile-bitfields
Common Report Var(flag_strict_volatile_bitfields) Init(-1) Optimization
Force bitfield accesses to match their type width
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index b99ab1c..7cc8bb9 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -409,7 +409,7 @@ Objective-C and Objective-C++ Dialects}.
-fivopts -fkeep-inline-functions -fkeep-static-consts @gol
-flive-range-shrinkage @gol
-floop-block -floop-interchange -floop-strip-mine @gol
--floop-unroll-and-jam -floop-nest-optimize @gol
+-floop-unroll-and-jam -floop-nest-optimize -floop-fuse @gol
-floop-parallelize-all -flra-remat -flto -flto-compression-level @gol
-flto-partition=@var{alg} -flto-report -flto-report-wpa -fmerge-all-constants
@gol
-fmerge-constants -fmodulo-sched -fmodulo-sched-allow-regmoves @gol
@@ -8796,6 +8796,27 @@ optimizer based on the Pluto optimization algorithms.
It calculates a loop
structure optimized for data-locality and parallelism. This option
is experimental.
+@item -floop-fuse
+@opindex floop-fuse
+Enable loop fusion. This option is experimental.
+
+For example, given a loop like:
+@smallexample
+DO I = 1, N
+ A(I) = A(I) + B(I)
+ENDDO
+DO I = 1, N
+ A(I) = A(I) + C(I)
+ENDDO
+@end smallexample
+@noindent
+loop fusion transforms the loop as if it were written:
+@smallexample
+DO I = 1, N
+ A(I) = A(I) + B(I) + C(I)
+ENDDO
+@end smallexample
+
@item -floop-unroll-and-jam
@opindex floop-unroll-and-jam
Enable unroll and jam for the ISL based loop nest optimizer. The unroll
diff --git a/gcc/graphite-optimize-isl.c b/gcc/graphite-optimize-isl.c
index 624cc87..c016461 100644
--- a/gcc/graphite-optimize-isl.c
+++ b/gcc/graphite-optimize-isl.c
@@ -599,7 +599,10 @@ optimize_isl (scop_p scop)
isl_options_set_schedule_max_constant_term (scop->ctx, CONSTANT_BOUND);
isl_options_set_schedule_maximize_band_depth (scop->ctx, 1);
- isl_options_set_schedule_fuse (scop->ctx, ISL_SCHEDULE_FUSE_MIN);
+ if (flag_loop_fuse)
+ isl_options_set_schedule_fuse (scop->ctx, ISL_SCHEDULE_FUSE_MAX);
+ else
+ isl_options_set_schedule_fuse (scop->ctx, ISL_SCHEDULE_FUSE_MIN);
isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_CONTINUE);
#ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
diff --git a/gcc/graphite-poly.c b/gcc/graphite-poly.c
index 4407dc5..4808fbe 100644
--- a/gcc/graphite-poly.c
+++ b/gcc/graphite-poly.c
@@ -272,7 +272,7 @@ apply_poly_transforms (scop_p scop)
/* This pass needs to be run at the final stage, as it does not
update the lst. */
- if (flag_loop_optimize_isl || flag_loop_unroll_jam)
+ if (flag_loop_optimize_isl || flag_loop_unroll_jam || flag_loop_fuse)
transform_done |= optimize_isl (scop);
return transform_done;
diff --git a/gcc/graphite.c b/gcc/graphite.c
index ba8029a..51af1a2a 100644
--- a/gcc/graphite.c
+++ b/gcc/graphite.c
@@ -342,7 +342,8 @@ gate_graphite_transforms (void)
|| flag_graphite_identity
|| flag_loop_parallelize_all
|| flag_loop_optimize_isl
- || flag_loop_unroll_jam)
+ || flag_loop_unroll_jam
+ || flag_loop_fuse)
flag_graphite = 1;
return flag_graphite != 0;
diff --git a/gcc/testsuite/gcc.dg/graphite/fuse-1.c
b/gcc/testsuite/gcc.dg/graphite/fuse-1.c
new file mode 100644
index 0000000..f368f47
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/graphite/fuse-1.c
@@ -0,0 +1,32 @@
+/* Check that the two loops are fused and that we manage to fold the two xor
+ operations. */
+/* { dg-options "-O2 -floop-fuse -fdump-tree-forwprop-all" } */
+/* { dg-final { scan-tree-dump-times "gimple_simplified to\[^\\n\]*\\^ 12" 1
"forwprop4" } } */
+/* { dg-do run } */
+
+#define MAX 100
+int A[MAX];
+
+extern void abort ();
+
+void fuse() {
+}
+
+int
+main (void)
+{
+ int i;
+
+ for (i = 0; i < MAX; i++)
+ A[i] = i;
+ for(int i=0; i<MAX; i++)
+ A[i] ^= 4;
+ for(int i=0; i<MAX; i++)
+ A[i] ^= 8;
+
+ for (i = 0; i < MAX; i++)
+ if (A[i] != (i ^ 12))
+ abort ();
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/graphite/fuse-2.c
b/gcc/testsuite/gcc.dg/graphite/fuse-2.c
new file mode 100644
index 0000000..e1a1cb3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/graphite/fuse-2.c
@@ -0,0 +1,38 @@
+/* Check that the three loops are fused. */
+/* { dg-options "-O2 -floop-fuse" } */
+/* { dg-do run } */
+
+/* FIXME: Add a graphite dump mechanism to print the number of loops generated
+ by ISL and pattern match it. */
+
+#define MAX 100
+int A[MAX], B[MAX], C[MAX];
+
+extern void abort ();
+
+void fuse() {
+}
+
+int
+main (void)
+{
+ int i;
+
+ /* The next three loops should be fused. */
+ for (i = 0; i < MAX; i++)
+ {
+ A[i] = i;
+ B[i] = i + 2;
+ C[i] = i + 1;
+ }
+ for(int i=0; i<MAX; i++)
+ A[i] += B[i];
+ for(int i=0; i<MAX; i++)
+ A[i] += C[i];
+
+ for (i = 0; i < MAX; i++)
+ if (A[i] != 3*i+3)
+ abort ();
+
+ return 0;
+}