------- Comment #2 from burnus at gcc dot gnu dot org 2006-11-29 10:38 ------- If one uses -mfpmath=387 or -mfpmath=sse,387, the speed also dramatically increases.
Results with test case below on a Athlon64: icc -O3 test.c; time ./a.out d=100002.216410, r=100000.000026 real 0m2.549s; user 0m2.548s; sys 0m0.000s gcc -ftree-vectorize -O3 -msse3 -ffast-math -lm test.c d=100002.216410, r=100000.000026 real 0m5.444s; user 0m5.444s; sys 0m0.000s gcc -ftree-vectorize -O3 -msse3 -mfpmath=sse,387 -ffast-math -lm test.c d=100002.216410, r=100000.000026 real 0m1.363s; user 0m1.192s; sys 0m0.000s ---------------- #include <math.h> #include <stdio.h> int main() { double r,d; d = 0.0; for(r=0.0; r < 100000.0; r += 0.001) d = fmod(d,5.0)+r; printf("d=%f, r=%f\n",d,r); return 0; } -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=29852