Hi,

To see what kind of code GCC generates for FOR_EACH_EDGE, I wrote a
simple dummy function.

int
dummy (basic_block bb)
{
  edge e;
  edge_iterator ei;

  FOR_EACH_EDGE (e, ei, bb->succs)
    if (e->dest == NULL)
      return 1;

  return 0;
}

Vanilla mainline
----------------

The result is rather disappointing (even with checking disabled).  The
final SSA form looks like this.

<bb 0>:
  i$container_31 = &bb_30->succs;
  D.15496_68 = *i$container_31;
  vec__114 = &D.15496_68->base;

  # ivtmp.226_141 = PHI <0(0), ivtmp.226_152(7)>;
<L4>:;
  // Check if bb->succs is NULL.
  if (D.15496_68 != 0B) goto <L6>; else goto <L12>;

<L6>:;
  // Check ???
  if (vec__114 != 0B) goto <L10>; else goto <L12>;

<L10>:;
  D.15509_116 = vec__114->num;

  # D.15509_3 = PHI <D.15509_116(3), 0(1), 0(2)>;
<L12>:;
  // Have we reached the end of the vector?
  if (D.15509_3 != ivtmp.226_141) goto <L15>; else goto <L30>;

<L15>:;
  // Check if bb->succs is NULL (again).
  if (D.15496_68 != 0B) goto <L17>; else goto <L24>;

<L17>:;

  # vec__151 = PHI <vec__114(6), 0B(5)>;
<L24>:;
  e_99 = vec__151->vec[ivtmp.226_141];
  D.15229_81 = e_99->dest;
  ivtmp.226_152 = ivtmp.226_141 + 1;
  // If edge is NULL, return.
  if (D.15229_81 == 0B) goto <L30>; else goto <L4>;

  # D.15230_1 = PHI <1(7), 0(4)>;
<L30>:;
  return D.15230_1;

Assuming bb->succs is nonzero and has at least one element, the normal
mode of operation goes like so.

bb0 -> L4 -> L6 -> L10 -> L12 -> L15 -> L17 -> L24 -+
       ^                                            |
       +---------------------------------------------

So we have seven basic blocks for a seemingly simple traversal of an
array.  Ugh.

FOR_EACH_EDGE with VEC_iterate (part 1)
---------------------------------------

Having learned VEC_iterate recently from Nathan, I then tried

#define FOR_EACH_EDGE(EDGE,ITER,EDGE_VEC)                       \
  for ((ITER) = ei_start ((EDGE_VEC));                          \
       VEC_iterate (edge, (EDGE_VEC), (ITER).index, (EDGE));    \
       (ITER).index++)

This turned out to be good except that this version of FOR_EACH_EDGE
does not use the container field in the edge iterator.  (Note that if
I don't use the container field, I am evaluating EDGE_VEC multiple
times.)  Note that with SRA, the above loop is equivalent to

  for (i = 0; VEC_iterate (edge, vec, i, e); i++)

Here is what I get.

<bb 0>:
  D.15279_41 = bb_5->succs;
  // Check if bb->preds is NULL.
  if (D.15279_41 != 0B) goto <L17>; else goto <L13>;

  # ei$index_60 = PHI <ei$index_31(2), 0(6)>;
<L3>:;
  ei$index_31 = ei$index_60 + 1;
  // Have we reached the end of the vector?
  if (ei$index_31 >= D.15431_54) goto <L13>; else goto <L9>;

Invalid sum of incoming frequencies 9750, should be 9269
<L9>:;
  e_38 = vec__33->vec[ei$index_31];
  D.15274_29 = e_38->dest;
  // If edge is NULL, return.
  if (D.15274_29 == 0B) goto <L13>; else goto <L3>;

Invalid sum of incoming frequencies 3963, should be 4444
  # D.15275_1 = PHI <1(2), 0(1), 1(6), 0(0), 0(4), 0(5)>;
<L13>:;
  return D.15275_1;

<L17>:;
  vec__33 = &D.15279_41->base;
  // Check ???
  if (vec__33 == 0B) goto <L13>; else goto <L18>;

<L18>:;
  D.15431_54 = vec__33->num;
  // If we have an empty vector, return.
  if (D.15431_54 == 0) goto <L13>; else goto <L19>;

<L19>:;
  e_45 = vec__33->vec[0];
  D.15274_57 = e_45->dest;
  // If edge is NULL, return.
  if (D.15274_57 == 0B) goto <L13>; else goto <L3>;

For normal mode of operation, we have

bb0 -> L3 -> L17 -> L18 -> L19 -> L3 -> L9
                                  ^     |
                                  +-----+

Note that the loop is as small as two basic blocks.  L18 and L19 are
just a peeled version of L3 and L9.  Again, note that I am not using
the container field of the edge iterator.

FOR_EACH_EDGE with VEC_iterate (part 2)
---------------------------------------

Trying to use the container field of the edge vector turns out to
generate worse code, namely two induction variables.  For

#define FOR_EACH_EDGE(EDGE,ITER,EDGE_VEC)                               \
  for ((ITER) = ei_start ((EDGE_VEC));                                  \
       VEC_iterate (edge, *((ITER).container), (ITER).index, (EDGE));   \
       (ITER).index++)

we get

<bb 0>:
  i$container_6 = &bb_5->succs;
  D.15290_23 = *i$container_6;
  if (D.15290_23 != 0B) goto <L17>; else goto <L13>;

  # ivtmp.198_58 = PHI <ivtmp.198_7(2), ivtmp.198_31(7)>;
  # ivtmp.195_3 = PHI <ivtmp.195_53(2), 1(7)>;
<L3>:;
  if (ivtmp.195_3 == D.15442_34) goto <L13>; else goto <L9>;

Invalid sum of incoming frequencies 9750, should be 9269
<L9>:;
  D.15498_43 = (struct edge_def * *) ivtmp.198_58;
  D.15499_42 = D.15498_43 + 12B;
  vec__24 = D.15499_42;
  e_39 = *vec__24;
  D.15284_30 = e_39->dest;
  ivtmp.195_53 = ivtmp.195_3 + 1;
  ivtmp.198_7 = ivtmp.198_58 + 4B;
  if (D.15284_30 == 0B) goto <L13>; else goto <L3>;

Invalid sum of incoming frequencies 3963, should be 4444
  # D.15285_1 = PHI <1(2), 0(1), 1(6), 0(0), 0(4), 0(5)>;
<L13>:;
  return D.15285_1;

<L17>:;
  vec__44 = &D.15290_23->base;
  if (vec__44 == 0B) goto <L13>; else goto <L18>;

<L18>:;
  D.15442_34 = vec__44->num;
  if (D.15442_34 == 0) goto <L13>; else goto <L19>;

<L19>:;
  e_57 = vec__44->vec[0];
  D.15284_47 = e_57->dest;
  if (D.15284_47 == 0B) goto <L13>; else goto <L30>;

<L30>:;
  ivtmp.198_31 = vec__44;
  goto <bb 1> (<L3>);

For normal mode of operation, we have

bb0 -> L17 -> L18 -> L19 -> L30 -> L3 -> L9
                                   ^     |
                                   +-----+

This is similar to the previous case, but notice that we have two
induction variables, namely ivtmp.195_53 and ivtmp.198_7.  My guess is
that a bunch of * and & are confusing the optimizers, but that's just
my guess.

Can we somehow have the best of the two worlds?  I want to use the
container field (as in the last illustration), but I also want to use
VEC_iterate.

Note that the second case above speeds up GCC by 0.5% or so.

Kazu Hirata

Reply via email to