gcc/ChangeLog: 2015-07-16 Aditya Kumar <aditya...@samsung.com> Sebastian Pop <s....@samsung.com>
* common.opt (floop-fuse): New. * doc/invoke.texi (floop-fuse): Documented. * graphite-optimize-isl.c (optimize_isl): Use ISL_SCHEDULE_FUSE_MAX when using flag_loop_fuse. * graphite-poly.c (apply_poly_transforms): Call optimize_isl when using flag_loop_fuse. * graphite.c (gate_graphite_transforms): Enable graphite with flag_loop_fuse. gcc/testsuite/ChangeLog: 2015-07-16 Aditya Kumar <aditya...@samsung.com> Sebastian Pop <s....@samsung.com> * gcc.dg/graphite/fuse-1.c: New test. * gcc.dg/graphite/fuse-2.c: New test. --- gcc/common.opt | 4 ++++ gcc/doc/invoke.texi | 23 +++++++++++++++++++- gcc/graphite-optimize-isl.c | 5 ++++- gcc/graphite-poly.c | 2 +- gcc/graphite.c | 3 ++- gcc/testsuite/gcc.dg/graphite/fuse-1.c | 32 ++++++++++++++++++++++++++++ gcc/testsuite/gcc.dg/graphite/fuse-2.c | 38 ++++++++++++++++++++++++++++++++++ 7 files changed, 103 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/graphite/fuse-1.c create mode 100644 gcc/testsuite/gcc.dg/graphite/fuse-2.c diff --git a/gcc/common.opt b/gcc/common.opt index dd49ae3..200ecc1 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1365,6 +1365,10 @@ floop-nest-optimize Common Report Var(flag_loop_optimize_isl) Optimization Enable the ISL based loop nest optimizer +floop-fuse +Common Report Var(flag_loop_fuse) Optimization +Enable loop fusion + fstrict-volatile-bitfields Common Report Var(flag_strict_volatile_bitfields) Init(-1) Optimization Force bitfield accesses to match their type width diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index b99ab1c..7cc8bb9 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -409,7 +409,7 @@ Objective-C and Objective-C++ Dialects}. -fivopts -fkeep-inline-functions -fkeep-static-consts @gol -flive-range-shrinkage @gol -floop-block -floop-interchange -floop-strip-mine @gol --floop-unroll-and-jam -floop-nest-optimize @gol +-floop-unroll-and-jam -floop-nest-optimize -floop-fuse @gol -floop-parallelize-all -flra-remat -flto -flto-compression-level @gol -flto-partition=@var{alg} -flto-report -flto-report-wpa -fmerge-all-constants @gol -fmerge-constants -fmodulo-sched -fmodulo-sched-allow-regmoves @gol @@ -8796,6 +8796,27 @@ optimizer based on the Pluto optimization algorithms. It calculates a loop structure optimized for data-locality and parallelism. This option is experimental. +@item -floop-fuse +@opindex floop-fuse +Enable loop fusion. This option is experimental. + +For example, given a loop like: +@smallexample +DO I = 1, N + A(I) = A(I) + B(I) +ENDDO +DO I = 1, N + A(I) = A(I) + C(I) +ENDDO +@end smallexample +@noindent +loop fusion transforms the loop as if it were written: +@smallexample +DO I = 1, N + A(I) = A(I) + B(I) + C(I) +ENDDO +@end smallexample + @item -floop-unroll-and-jam @opindex floop-unroll-and-jam Enable unroll and jam for the ISL based loop nest optimizer. The unroll diff --git a/gcc/graphite-optimize-isl.c b/gcc/graphite-optimize-isl.c index 624cc87..c016461 100644 --- a/gcc/graphite-optimize-isl.c +++ b/gcc/graphite-optimize-isl.c @@ -599,7 +599,10 @@ optimize_isl (scop_p scop) isl_options_set_schedule_max_constant_term (scop->ctx, CONSTANT_BOUND); isl_options_set_schedule_maximize_band_depth (scop->ctx, 1); - isl_options_set_schedule_fuse (scop->ctx, ISL_SCHEDULE_FUSE_MIN); + if (flag_loop_fuse) + isl_options_set_schedule_fuse (scop->ctx, ISL_SCHEDULE_FUSE_MAX); + else + isl_options_set_schedule_fuse (scop->ctx, ISL_SCHEDULE_FUSE_MIN); isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_CONTINUE); #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE diff --git a/gcc/graphite-poly.c b/gcc/graphite-poly.c index 4407dc5..4808fbe 100644 --- a/gcc/graphite-poly.c +++ b/gcc/graphite-poly.c @@ -272,7 +272,7 @@ apply_poly_transforms (scop_p scop) /* This pass needs to be run at the final stage, as it does not update the lst. */ - if (flag_loop_optimize_isl || flag_loop_unroll_jam) + if (flag_loop_optimize_isl || flag_loop_unroll_jam || flag_loop_fuse) transform_done |= optimize_isl (scop); return transform_done; diff --git a/gcc/graphite.c b/gcc/graphite.c index ba8029a..51af1a2a 100644 --- a/gcc/graphite.c +++ b/gcc/graphite.c @@ -342,7 +342,8 @@ gate_graphite_transforms (void) || flag_graphite_identity || flag_loop_parallelize_all || flag_loop_optimize_isl - || flag_loop_unroll_jam) + || flag_loop_unroll_jam + || flag_loop_fuse) flag_graphite = 1; return flag_graphite != 0; diff --git a/gcc/testsuite/gcc.dg/graphite/fuse-1.c b/gcc/testsuite/gcc.dg/graphite/fuse-1.c new file mode 100644 index 0000000..f368f47 --- /dev/null +++ b/gcc/testsuite/gcc.dg/graphite/fuse-1.c @@ -0,0 +1,32 @@ +/* Check that the two loops are fused and that we manage to fold the two xor + operations. */ +/* { dg-options "-O2 -floop-fuse -fdump-tree-forwprop-all" } */ +/* { dg-final { scan-tree-dump-times "gimple_simplified to\[^\\n\]*\\^ 12" 1 "forwprop4" } } */ +/* { dg-do run } */ + +#define MAX 100 +int A[MAX]; + +extern void abort (); + +void fuse() { +} + +int +main (void) +{ + int i; + + for (i = 0; i < MAX; i++) + A[i] = i; + for(int i=0; i<MAX; i++) + A[i] ^= 4; + for(int i=0; i<MAX; i++) + A[i] ^= 8; + + for (i = 0; i < MAX; i++) + if (A[i] != (i ^ 12)) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.dg/graphite/fuse-2.c b/gcc/testsuite/gcc.dg/graphite/fuse-2.c new file mode 100644 index 0000000..e1a1cb3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/graphite/fuse-2.c @@ -0,0 +1,38 @@ +/* Check that the three loops are fused. */ +/* { dg-options "-O2 -floop-fuse" } */ +/* { dg-do run } */ + +/* FIXME: Add a graphite dump mechanism to print the number of loops generated + by ISL and pattern match it. */ + +#define MAX 100 +int A[MAX], B[MAX], C[MAX]; + +extern void abort (); + +void fuse() { +} + +int +main (void) +{ + int i; + + /* The next three loops should be fused. */ + for (i = 0; i < MAX; i++) + { + A[i] = i; + B[i] = i + 2; + C[i] = i + 1; + } + for(int i=0; i<MAX; i++) + A[i] += B[i]; + for(int i=0; i<MAX; i++) + A[i] += C[i]; + + for (i = 0; i < MAX; i++) + if (A[i] != 3*i+3) + abort (); + + return 0; +} -- 2.1.0.243.g30d45f7