http://gcc.gnu.org/bugzilla/show_bug.cgi?id=56522



--- Comment #2 from Jakub Jelinek <jakub at gcc dot gnu.org> 2013-03-06 
15:06:41 UTC ---

Created attachment 29598

  --> http://gcc.gnu.org/bugzilla/attachment.cgi?id=29598

assign.c



With -O3 -march=corei7 -fomit-frame-pointer -funroll-loops -ffast-math

the different in *.optimized dump from r196262 to r196263 is just:

@@ -176,7 +176,6 @@ Assignment (long int[101] * x)

   short int[101][101] * pretmp_418;

   long int _429;

   long int _431;

-  unsigned long _432;

   long unsigned int patt_438;

   unsigned int _440;

   long unsigned int patt_441;

@@ -293,8 +292,7 @@ Assignment (long int[101] * x)

   _108 = _130 >> 3;

   _89 = -_108;

   _72 = (short unsigned int) _89;

-  _432 = _89 & 1;

-  prolog_loop_niters.59_193 = (short unsigned int) _432;

+  prolog_loop_niters.59_193 = _72 & 1;

   if (prolog_loop_niters.59_193 == 0)

     goto <bb 19>;

   else

@@ -307,7 +305,7 @@ Assignment (long int[101] * x)

   <bb 19>:

   # j_288 = PHI <1(18), 0(17)>

   # c_287 = PHI <c_141(18), 9223372036854775807(17)>

-  prolog_loop_adjusted_niters.60_357 = _89 & 1;

+  prolog_loop_adjusted_niters.60_357 = (sizetype) prolog_loop_niters.59_193;

   niters.61_359 = 101 - prolog_loop_niters.59_193;

   base_off.68_53 = prolog_loop_adjusted_niters.60_357 * 8;

   vect_p.69_48 = pretmp_386 + base_off.68_53;



>From the bugreport, it isn't clear if you were measuring -m32 or -m64

performance, but I guess the *.optimized dump change could just increase

register pressure and pessimize the loop RA or something.

Reply via email to