------- Additional Comments From jakub at gcc dot gnu dot org  2005-04-29 12:17 
-------
>From the *.t02.original dump, I'd say it is pretty much obvious what's wrong.
compute_inner_temp_size computes inner temporary size as:
D.905 = (int8) v[(int8) i.28 + -1].s;
but computes this before the 3 loops it then uses it in.
But that obviously depends on i.28 counter, which goes from 1 to 5 inclusive
in each of the loops.
But as D.905 assignment is evaluated before all of the 3 loops and not
inside of them as it IMHO should, it uses whatever value i.28 had before those
3 loops.  In the forall_3.f90 case there happens to be another loop before
that statement, so i.28 will be 6, so this will actually set
D.905 to (int8) v[(int8) 5].s, which is the first array element after the end
of the array.  It happens to be often 0 with -O2, which results in 0 bytes
being allocated for the temporary array, but it magically works with -O0,
because some fairly big number happens to be in that memory location.

    i.28 = 1;
    count.31 = 5;
    while (1)
      {
        if (count.31 <= 0) goto L.7; else (void) 0;
        temp.29[mi.30] = v[(int8) i.28 + -1].valid;
        mi.30 = mi.30 + 1;
        i.28 = i.28 + 1;
        count.31 = count.31 - 1;
      }
    L.7:;
    count1.32 = 0;
    D.905 = (int8) v[(int8) i.28 + -1].s;
    num.34 = 0;
    i.28 = 1;
    count.35 = 5;
    while (1)
      {
        if (count.35 <= 0) goto L.8; else (void) 0;
        num.34 = num.34 + D.905;
        i.28 = i.28 + 1;
        count.35 = count.35 - 1;
      }
    L.8:;
    temp.36 = (int4[0:] *) _gfortran_internal_malloc64 (num.34 * 4);
    mi.30 = 0;
    i.28 = 1;
    count.38 = 5;
    while (1)
      {
        if (count.38 <= 0) goto L.10; else (void) 0;
        if (temp.29[mi.30])
          {
            {
              int8 D.913;
              int4[0:] * D.912;
              int8 D.911;
              int4[0:] * D.910;

              count2.33 = 0;
              D.910 = v[(int8) (6 - i.28) + -1].p.data;
              D.911 = v[(int8) (6 - i.28) + -1].p.offset;
              D.912 = v[(int8) i.28 + -1].p.data;
              D.913 = v[(int8) i.28 + -1].p.offset;
              {
                int8 D.915;
                int8 S.37;

                D.915 = v[(int8) (6 - i.28) + -1].p.dim[0].stride;
                S.37 = 1;
                while (1)
                  {
                    if (S.37 > (int8) v[(int8) i.28 + -1].s) goto L.9; else
(void) 0;
                    (*temp.36)[count1.32 + count2.33] = (*D.910)[NON_LVALUE_EXPR
<S.37> * D.915 + D.911];
                    count2.33 = count2.33 + 1;
                    S.37 = S.37 + 1;
                  }
                L.9:;
              }
              count1.32 = count1.32 + D.905;
            }
          }
        else
          {
            (void) 0;
          }
        i.28 = i.28 + 1;
        mi.30 = mi.30 + 1;
        count.38 = count.38 - 1;
      }
    L.10:;
    count1.32 = 0;
    mi.30 = 0;
    i.28 = 1;
    count.40 = 5;
    while (1)
      {
        if (count.40 <= 0) goto L.12; else (void) 0;
        if (temp.29[mi.30])
          {
            {
              int8 D.920;
              int4[0:] * D.919;

              count2.33 = 0;
              D.919 = v[(int8) i.28 + -1].p.data;
              D.920 = v[(int8) i.28 + -1].p.offset;
              {
                int8 D.922;
                int8 S.39;

                D.922 = v[(int8) i.28 + -1].p.dim[0].stride;
                S.39 = 1;
                while (1)
                  {
                    if (S.39 > (int8) v[(int8) i.28 + -1].s) goto L.11; else
(void) 0;
                    (*D.919)[NON_LVALUE_EXPR <S.39> * D.922 + D.920] =
(*temp.36)[count1.32 + count2.33];
                    count2.33 = count2.33 + 1;
                    S.39 = S.39 + 1;
                  }
                L.11:;
              }
              count1.32 = count1.32 + D.905;
            }
          }
        else
          {
            (void) 0;
          }
        i.28 = i.28 + 1;
        mi.30 = mi.30 + 1;
        count.40 = count.40 - 1;
      }
    L.12:;
    _gfortran_internal_free ((void *) temp.36);
  }

At least in this particular case, D.905 could be computed inside of the
first loop that uses it and not be computed in the subsequent loops at all
(simply use one counter that goes from 0 all the way up instead of using
two counters), but I'm not sure if that is possible for all FOREACH loops.

-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=15080

Reply via email to