http://gcc.gnu.org/bugzilla/show_bug.cgi?id=55802



             Bug #: 55802

           Summary: Various missed optimizations for a simple function in

                    GCC itself

    Classification: Unclassified

           Product: gcc

           Version: 4.8.0

            Status: UNCONFIRMED

          Severity: normal

          Priority: P3

         Component: tree-optimization

        AssignedTo: unassig...@gcc.gnu.org

        ReportedBy: ste...@gcc.gnu.org





Consider this test case:



-------------- 8< -------------- 

typedef struct bitmap_head_def {

  unsigned int indx;

} bitmap_head;

typedef bitmap_head *bitmap;

extern int bitmap_ior_into (bitmap, bitmap);



struct basic_block_def;

typedef struct basic_block_def *basic_block;

struct edge_def;

typedef struct edge_def *edge;



struct basic_block_def {

  int index;

};



struct edge_def {

  basic_block src;

  basic_block dest;

  int flags;

};



struct dataflow

{

  void *block_info;

  unsigned int block_info_size;

};



struct df_d

{

  struct dataflow *problems_by_index[(7 + 1)];

};



extern struct df_d *df;



struct df_live_bb_info

{

  bitmap_head in, out;

};



static __inline__ struct df_live_bb_info *

df_live_get_bb_info (unsigned int index)

{

  if (index < (df->problems_by_index[2])->block_info_size)

    return &((struct df_live_bb_info *)

(df->problems_by_index[2])->block_info)[index];

  else

    return ((void *)0);

}



unsigned char df_live_confluence_n (edge e);

unsigned char

df_live_confluence_n (edge e)

{

  bitmap op1 = &df_live_get_bb_info (e->dest->index)->in;

  bitmap op2 = &df_live_get_bb_info (e->src->index)->out;



  if (e->flags & 0x0010)

    return 0;



  return bitmap_ior_into (op1, op2);

}

-------------- 8< -------------- 



Compiled at -O2 on powerpc64, the ".optimized" dump at trunk r194705

looks like this:



;; Function df_live_confluence_n (df_live_confluence_n, funcdef_no=1,

decl_uid=2030, cgraph_uid=1)



df_live_confluence_n (struct edge_def * e)

{

  struct df_d * df.0;

  struct bitmap_head * op2;

  struct bitmap_head * op1;

  unsigned char _1;

  struct basic_block_def * _5;

  int _6;

  unsigned int _7;

  struct basic_block_def * _9;

  int _10;

  unsigned int _11;

  int _13;

  int _14;

  int _16;

  unsigned char _17;

  struct dataflow * _19;

  unsigned int _20;

  void * _21;

  long unsigned int _22;

  long unsigned int _23;

  struct df_live_bb_info * _24;

  struct df_live_bb_info * _25;

  void * _26;

  long unsigned int _27;

  long unsigned int _28;

  struct df_live_bb_info * _29;

  struct df_live_bb_info * _30;



  <bb 2>:

  _5 = e_4(D)->dest;

  _6 = _5->index;

  _7 = (unsigned int) _6;

  df.0_18 = df;

  _19 = df.0_18->problems_by_index[2];

  _20 = _19->block_info_size;

  if (_7 < _20)

    goto <bb 3>;

  else

    goto <bb 4>;



  <bb 3>:

  _21 = _19->block_info;

  _22 = (long unsigned int) _7;

  _23 = _22 * 8;

  _24 = _21 + _23;



  <bb 4>:

  # _25 = PHI <0B(2), _24(3)>

  _9 = e_4(D)->src;

  _10 = _9->index;

  _11 = (unsigned int) _10;

  if (_11 < _20)

    goto <bb 5>;

  else

    goto <bb 6>;



  <bb 5>:

  _26 = _19->block_info;

  _27 = (long unsigned int) _11;

  _28 = _27 * 8;

  _29 = _26 + _28;



  <bb 6>:

  # _30 = PHI <0B(4), _29(5)>

  _13 = e_4(D)->flags;

  _14 = _13 & 16;

  if (_14 != 0)

    goto <bb 8>;

  else

    goto <bb 7>;



  <bb 7>:

  op2_12 = &_30->out;

  op1_8 = &_25->in;

  _16 = bitmap_ior_into (op1_8, op2_12);

  _17 = (unsigned char) _16;



  <bb 8>:

  # _1 = PHI <0(6), _17(7)>

  return _1;



}





There are several issues here.



First, the initializations of op1 and op2 are partially dead if the

"e->flags" test is true. But this optimization is not performed.

Even if the test is modified to assert that the basic block index is

always smaller than the block_info_size (linearizing the code to load

the bitmap addresses) the partially dead code is still not moved down

below the condition.



Second, the load from block_info is performed twice, even with profile

information available. The load should be speculated.



This all leads to very branchy code...

Reply via email to