https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84172
Bug ID: 84172 Summary: option "-O3" create slower code Product: gcc Version: 5.3.1 Status: UNCONFIRMED Severity: normal Priority: P3 Component: c Assignee: unassigned at gcc dot gnu.org Reporter: aotto1...@t-online.de Target Milestone: --- here my test… #:~/test> make test gcc-5 -march=native -mtune=native -g -static -O0 -o test.0 main.c gcc-5 -march=native -mtune=native -g -static -O1 -o test.1 main.c gcc-5 -march=native -mtune=native -g -static -O2 -o test.2 main.c gcc-5 -march=native -mtune=native -g -static -O3 -o test.3 main.c for t in test.0 test.1 test.2 test.3; do ./$t; done ./test.0 → T1 = 673.300964 ms → HI = 0x1, LO = 0 ./test.0 → T2 = 506.130981 ms → HI = 0x1, LO = 0 ./test.1 → T1 = 136.671005 ms → HI = 0x1, LO = 0 ./test.1 → T2 = 139.194000 ms → HI = 0x1, LO = 0 ./test.2 → T1 = 139.225998 ms → HI = 0x1, LO = 0 ./test.2 → T2 = 139.294998 ms → HI = 0x1, LO = 0 ./test.3 → T1 = 217.908997 ms → HI = 0x1, LO = 0 ./test.3 → T2 = 231.663010 ms → HI = 0x1, LO = 0 #:~/test> gcc-5 -v Using built-in specs. COLLECT_GCC=/usr/bin/gcc-5 COLLECT_LTO_WRAPPER=/usr/lib64/gcc/x86_64-suse-linux/5/lto-wrapper Target: x86_64-suse-linux Configured with: ../configure --prefix=/usr --infodir=/usr/share/info --mandir=/usr/share/man --libdir=/usr/lib64 --libexecdir=/usr/lib64 --enable-languages=c,c++,fortran,ada,go --enable-checking=release --with-gxx-include-dir=/usr/include/c++/5 --enable-ssp --disable-libssp --disable-libvtv --enable-libmpx --disable-plugin --with-bugurl=http://bugs.opensuse.org/ --with-pkgversion='SUSE Linux' --disable-libgcj --with-slibdir=/lib64 --with-system-zlib --enable-__cxa_atexit --enable-libstdcxx-allocator=new --disable-libstdcxx-pch --with-default-libstdcxx-abi=gcc4-compatible --enable-version-specific-runtime-libs --enable-linker-build-id --enable-linux-futex --program-suffix=-5 --without-system-libunwind --enable-multilib --with-arch-32=x86-64 --with-tune=generic --build=x86_64-suse-linux --host=x86_64-suse-linux Thread model: posix gcc version 5.3.1 20160301 [gcc-5-branch revision 233849] (SUSE Linux) dev1usr@linux02:~/test> Selected "main.c:40 [main]" #my code ========================================================================== #include <stdio.h> #include <stdlib.h> #include <stdint.h> #include <time.h> #define SIZE 100000000 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ # define HI 1 # define LO 0 #else # define HI 0 # define LO 1 #endif int main(int argc, char *argv[]) { int i; clock_t t1, t2; // T1 if (1) { uint64_t a = 0xffffffffffffffff; uint64_t b = 0xfffffffffffffffe; unsigned __int128 r; t1 = clock(); for (i=0; i<SIZE; i++) { r = ((unsigned __int128) a * (unsigned __int128) b); if (i%2==0) { a += (uint64_t) (r>>64); b -= (uint64_t) (r>>0); } else { a -= (uint64_t) (r>>64); b += (uint64_t) (r>>0); } } t2 = clock(); float diff = ((float)(t2 - t1) / (float)CLOCKS_PER_SEC ) * 1000; printf("%-20s → T1 = %f ms → HI = %#x, LO = %#x\n", argv[0], diff, a, b); } // T2 if (1) { typedef union valU { unsigned __int128 ui128 ; uint64_t ui64[2] ; } valU_t; uint64_t a = 0xffffffffffffffff; uint64_t b = 0xfffffffffffffffe; valU_t r; t1 = clock(); for (i=0; i<SIZE; i++) { r.ui128 = ((unsigned __int128) a * (unsigned __int128) b); if (i%2==0) { a += r.ui64[HI]; b -= r.ui64[LO]; } else { a -= r.ui64[HI]; b += r.ui64[LO]; } } t2 = clock(); float diff = ((float)(t2 - t1) / (float)CLOCKS_PER_SEC ) * 1000; printf("%-20s → T2 = %f ms → HI = %#x, LO = %#x\n", argv[0], diff, a, b); } exit(0); =======================================================================