On Wed, Apr 7, 2021 at 6:43 PM Richard Biener <rguent...@suse.de> wrote: > > When we apply store motion and DSE manually to the bwaves kernel > in gfortran.dg/pr81303.f loop interchange no longer happens because > the perfect nest considered covers outer loops we cannot analyze > strides for. The following compensates for this by shrinking the > nest in this analysis which was already possible but on a too coarse > granularity. It shares the shrinked nest with the rest of the DRs > so the complexity overhead should be negligible. > > Bootstrapped & tested on x86_64-unknown-linux-gnu, SPEC FP CPU 2017 > build and there's still only the single loop interchange in bwaves. > > Queued for stage1.
And applied as g:6ff66d1ea48960fe96bb51a750c01135e65fe452 > 2021-04-07 Richard Biener <rguent...@suse.de> > > PR tree-optimization/99956 > * gimple-loop-interchange.cc (compute_access_stride): > Try instantiating the access in a shallower loop nest > if instantiating failed. > (compute_access_strides): Pass adjustable loop_nest > to compute_access_stride. > > * gfortran.dg/pr99956.f: New testcase. > --- > gcc/gimple-loop-interchange.cc | 68 +++++++++++++++++------------ > gcc/testsuite/gfortran.dg/pr99956.f | 45 +++++++++++++++++++ > 2 files changed, 85 insertions(+), 28 deletions(-) > create mode 100644 gcc/testsuite/gfortran.dg/pr99956.f > > diff --git a/gcc/gimple-loop-interchange.cc b/gcc/gimple-loop-interchange.cc > index f45b9364644..80f749b6071 100644 > --- a/gcc/gimple-loop-interchange.cc > +++ b/gcc/gimple-loop-interchange.cc > @@ -1280,12 +1280,15 @@ tree_loop_interchange::move_code_to_inner_loop (class > loop *outer, > arr[i][j - 1][k] = 0; */ > > static void > -compute_access_stride (class loop *loop_nest, class loop *loop, > +compute_access_stride (class loop *&loop_nest, class loop *loop, > data_reference_p dr) > { > vec<tree> *strides = new vec<tree> (); > - basic_block bb = gimple_bb (DR_STMT (dr)); > + dr->aux = strides; > > + basic_block bb = gimple_bb (DR_STMT (dr)); > + if (!flow_bb_inside_loop_p (loop_nest, bb)) > + return; > while (!flow_bb_inside_loop_p (loop, bb)) > { > strides->safe_push (build_int_cst (sizetype, 0)); > @@ -1313,39 +1316,47 @@ compute_access_stride (class loop *loop_nest, class > loop *loop, > } > /* Otherwise punt. */ > else > - { > - dr->aux = strides; > - return; > - } > + return; > } > tree scev_base = build_fold_addr_expr (ref); > tree scev = analyze_scalar_evolution (loop, scev_base); > - scev = instantiate_scev (loop_preheader_edge (loop_nest), loop, scev); > - if (! chrec_contains_undetermined (scev)) > + if (chrec_contains_undetermined (scev)) > + return; > + > + tree orig_scev = scev; > + do > + { > + scev = instantiate_scev (loop_preheader_edge (loop_nest), > + loop, orig_scev); > + if (! chrec_contains_undetermined (scev)) > + break; > + > + /* If we couldn't instantiate for the desired nest, shrink it. */ > + if (loop_nest == loop) > + return; > + loop_nest = loop_nest->inner; > + } while (1); > + > + tree sl = scev; > + class loop *expected = loop; > + while (TREE_CODE (sl) == POLYNOMIAL_CHREC) > { > - tree sl = scev; > - class loop *expected = loop; > - while (TREE_CODE (sl) == POLYNOMIAL_CHREC) > + class loop *sl_loop = get_chrec_loop (sl); > + while (sl_loop != expected) > { > - class loop *sl_loop = get_chrec_loop (sl); > - while (sl_loop != expected) > - { > - strides->safe_push (size_int (0)); > - expected = loop_outer (expected); > - } > - strides->safe_push (CHREC_RIGHT (sl)); > - sl = CHREC_LEFT (sl); > + strides->safe_push (size_int (0)); > expected = loop_outer (expected); > } > - if (! tree_contains_chrecs (sl, NULL)) > - while (expected != loop_outer (loop_nest)) > - { > - strides->safe_push (size_int (0)); > - expected = loop_outer (expected); > - } > + strides->safe_push (CHREC_RIGHT (sl)); > + sl = CHREC_LEFT (sl); > + expected = loop_outer (expected); > } > - > - dr->aux = strides; > + if (! tree_contains_chrecs (sl, NULL)) > + while (expected != loop_outer (loop_nest)) > + { > + strides->safe_push (size_int (0)); > + expected = loop_outer (expected); > + } > } > > /* Given loop nest LOOP_NEST with innermost LOOP, the function computes > @@ -1363,9 +1374,10 @@ compute_access_strides (class loop *loop_nest, class > loop *loop, > data_reference_p dr; > vec<tree> *stride; > > + class loop *interesting_loop_nest = loop_nest; > for (i = 0; datarefs.iterate (i, &dr); ++i) > { > - compute_access_stride (loop_nest, loop, dr); > + compute_access_stride (interesting_loop_nest, loop, dr); > stride = DR_ACCESS_STRIDE (dr); > if (stride->length () < num_loops) > { > diff --git a/gcc/testsuite/gfortran.dg/pr99956.f > b/gcc/testsuite/gfortran.dg/pr99956.f > new file mode 100644 > index 00000000000..b5c0be3912d > --- /dev/null > +++ b/gcc/testsuite/gfortran.dg/pr99956.f > @@ -0,0 +1,45 @@ > +! { dg-do compile } > +! { dg-options "-O3 -ffast-math -floop-interchange > -fdump-tree-linterchange-details" } > + > + subroutine mat_times_vec(y,x,a,axp,ayp,azp,axm,aym,azm, > + $ nb,nx,ny,nz) > + implicit none > + integer nb,nx,ny,nz,i,j,k,m,l,kit,im1,ip1,jm1,jp1,km1,kp1 > + > + real*8 y(nb,nx,ny,nz),x(nb,nx,ny,nz),tem > + > + real*8 a(nb,nb,nx,ny,nz), > + 1 axp(nb,nb,nx,ny,nz),ayp(nb,nb,nx,ny,nz),azp(nb,nb,nx,ny,nz), > + 2 axm(nb,nb,nx,ny,nz),aym(nb,nb,nx,ny,nz),azm(nb,nb,nx,ny,nz) > + > + > + do k=1,nz > + km1=mod(k+nz-2,nz)+1 > + kp1=mod(k,nz)+1 > + do j=1,ny > + jm1=mod(j+ny-2,ny)+1 > + jp1=mod(j,ny)+1 > + do i=1,nx > + im1=mod(i+nx-2,nx)+1 > + ip1=mod(i,nx)+1 > + do l=1,nb > + tem=0.0 > + do m=1,nb > + tem=tem+ > + 1 a(l,m,i,j,k)*x(m,i,j,k)+ > + 2 axp(l,m,i,j,k)*x(m,ip1,j,k)+ > + 3 ayp(l,m,i,j,k)*x(m,i,jp1,k)+ > + 4 azp(l,m,i,j,k)*x(m,i,j,kp1)+ > + 5 axm(l,m,i,j,k)*x(m,im1,j,k)+ > + 6 aym(l,m,i,j,k)*x(m,i,jm1,k)+ > + 7 azm(l,m,i,j,k)*x(m,i,j,km1) > + enddo > + y(l,i,j,k)=tem > + enddo > + enddo > + enddo > + enddo > + return > + end > + > +! { dg-final { scan-tree-dump-times "is interchanged" 1 "linterchange" } } > -- > 2.26.2