https://gcc.gnu.org/bugzilla/show_bug.cgi?id=27077
--- Comment #8 from Helmut Schellong <var at schellong dot biz> --- Test case is a special measurement program. See assembler code below: strlen / repnz scasb clock_gettime gcc6 + gcc5 The exact gcc version is not critical. Each gcc that injects an intel string instruction injects slow code (on many cpus). Practical experience (use) is important. CPU: Intel(R) Core(TM)2 Duo CPU E8600 @ 3.33GHz (3333.40-MHz K8-class CPU) Origin = "GenuineIntel" Id = 0x1067a Family = 0x6 Model = 0x17 Stepping = 10 Features=0xbfebfbff<FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CLFLUSH,DTS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE> Features2=0xc08e3fd<SSE3,DTES64,MON,DS_CPL,VMX,SMX,EST,TM2,SSSE3,CX16,xTPR,PDCM,SSE4.1,XSAVE,OSXSAVE> AMD Features=0x20100800<SYSCALL,NX,LM> AMD Features2=0x1<LAHF> VT-x: HLT,PAUSE TSC: P-state invariant, performance statistics gcc6 -v Using built-in specs. COLLECT_GCC=gcc6 COLLECT_LTO_WRAPPER=/usr/local/libexec/gcc6/gcc/x86_64-portbld-freebsd10.1/6.2.0/lto-wrapper Target: x86_64-portbld-freebsd10.1 Configured with: ... Thread model: posix gcc version 6.2.0 (FreeBSD Ports Collection) gcc5 -v Using built-in specs. COLLECT_GCC=gcc5 COLLECT_LTO_WRAPPER=/usr/local/libexec/gcc5/gcc/x86_64-portbld-freebsd10.0/5.0.0/lto-wrapper Target: x86_64-portbld-freebsd10.0 Configured with: ... Thread model: posix gcc version 5.0.0 20140921 (experimental) (FreeBSD Ports Collection) gcc6 -O1 -static -s -opt -fno-builtin-strlen -DSTRLENF pt.c ptf.o ============================================================================== lea rsi, [rsp+32] #, mov edi, 1 #, call clock_gettime # mov ebx, DWORD PTR CntA[rip] # is, CntA .p2align 2 .L10: mov edi, OFFSET FLAT:DA #, call strlen # mov QWORD PTR [rsp+112], rax # L, _80 sub ebx, 1 # is, test ebx, ebx # is jg .L10 #, lea rsi, [rsp+64] # tmp273, mov edi, 1 #, call clock_gettime # imul rax, QWORD PTR [rsp+64], 1000000000 # tmp216, cb[0].tv_sec, add rax, QWORD PTR [rsp+72] # tmp218, cb[0].tv_nsec imul rdx, QWORD PTR [rsp+32], 1000000000 # tmp220, ca[0].tv_sec, add rdx, QWORD PTR [rsp+40] # tmp222, ca[0].tv_nsec sub rax, rdx # tmp224, tmp222 pxor xmm7, xmm7 # _94 cvtsi2sdq xmm7, rax # _94, tmp224 movsd QWORD PTR [rsp+8], xmm7 # %sfp, _94 mov rsi, QWORD PTR [rsp+112] # L.7_105, L mov edi, OFFSET FLAT:.LC3 #, mov eax, 0 #, call printf # lea rsi, [rsp+32] #, mov edi, 1 #, call clock_gettime # mov ebx, DWORD PTR CntA[rip] # is, CntA .p2align 2 .L11: mov edi, OFFSET FLAT:DA #, call strlen # mov QWORD PTR [rsp+112], rax # L, _112 mov edi, OFFSET FLAT:DA #, call strlen # mov QWORD PTR [rsp+112], rax # L, _115 sub ebx, 1 # is, test ebx, ebx # is jg .L11 #, lea rsi, [rsp+64] # tmp275, mov edi, 1 #, call clock_gettime # ============================================================================== gcc6 -O1 -static -s -opt -DSTRLENF pt.c ptf.o ============================================================================== lea rsi, [rsp+32] #, mov edi, 1 #, call clock_gettime # mov edx, DWORD PTR CntA[rip] # is, CntA mov rsi, -1 # tmp225, mov eax, 0 # tmp226, .p2align 2 .L10: mov edi, OFFSET FLAT:DA # tmp224, mov rcx, rsi # tmp222, tmp225 repnz scasb not rcx # tmp223 sub rcx, 1 # _80, mov QWORD PTR [rsp+112], rcx # L, _80 sub edx, 1 # is, test edx, edx # is jg .L10 #, lea rsi, [rsp+64] # tmp299, mov edi, 1 #, call clock_gettime # imul rax, QWORD PTR [rsp+64], 1000000000 # tmp228, cb[0].tv_sec, add rax, QWORD PTR [rsp+72] # tmp230, cb[0].tv_nsec imul rdx, QWORD PTR [rsp+32], 1000000000 # tmp232, ca[0].tv_sec, add rdx, QWORD PTR [rsp+40] # tmp234, ca[0].tv_nsec sub rax, rdx # tmp236, tmp234 pxor xmm7, xmm7 # _94 cvtsi2sdq xmm7, rax # _94, tmp236 movsd QWORD PTR [rsp+8], xmm7 # %sfp, _94 mov rsi, QWORD PTR [rsp+112] # L.7_105, L mov edi, OFFSET FLAT:.LC3 #, mov eax, 0 #, call printf # lea rsi, [rsp+32] #, mov edi, 1 #, call clock_gettime # mov edx, DWORD PTR CntA[rip] # is, CntA mov rsi, -1 # tmp242, mov eax, 0 # tmp243, .p2align 2 .L11: mov edi, OFFSET FLAT:DA # tmp241, mov rcx, rsi # tmp239, tmp242 repnz scasb not rcx # tmp240 sub rcx, 1 # _112, mov QWORD PTR [rsp+112], rcx # L, _112 mov edi, OFFSET FLAT:DA # tmp247, mov rcx, rsi # tmp245, tmp242 repnz scasb not rcx # tmp246 sub rcx, 1 # _115, mov QWORD PTR [rsp+112], rcx # L, _115 sub edx, 1 # is, test edx, edx # is jg .L11 #, lea rsi, [rsp+64] # tmp303, mov edi, 1 #, call clock_gettime # ============================================================================== gcc5 -O1 -static -s -opt -DSTRLENF pt.c ptf.o ============================================================================== lea rsi, [rsp+32] #, mov edi, 1 #, call clock_gettime # mov esi, DWORD PTR CntA[rip] # is, CntA mov r8, -1 # tmp286, mov eax, 0 # tmp287, .p2align 2 .L10: mov edi, OFFSET FLAT:DA # tmp221, mov rcx, r8 # tmp219, tmp286 repnz scasb not rcx # tmp220 mov rdx, rcx # tmp220, tmp220 sub rdx, 1 # D.3715, mov QWORD PTR [rsp+112], rdx # L, D.3715 sub esi, 1 # is, test esi, esi # is jg .L10 #, lea rsi, [rsp+64] # tmp296, mov edi, 1 #, call clock_gettime # imul rax, QWORD PTR [rsp+64], 1000000000 # D.3716, cb[0].tv_sec, add rax, QWORD PTR [rsp+72] # D.3716, cb[0].tv_nsec mov rdx, rax # D.3716, D.3716 imul rax, QWORD PTR [rsp+32], -1000000000 # D.3716, ca[0].tv_sec, sub rax, QWORD PTR [rsp+40] # D.3716, ca[0].tv_nsec add rax, rdx # D.3716, D.3716 pxor xmm7, xmm7 # D.3718 cvtsi2sdq xmm7, rax # D.3718, D.3716 movsd QWORD PTR [rsp+8], xmm7 # %sfp, D.3718 mov rsi, QWORD PTR [rsp+112] # D.3716, L mov edi, OFFSET FLAT:.LC3 #, mov eax, 0 #, call printf # lea rsi, [rsp+32] #, mov edi, 1 #, call clock_gettime # mov esi, DWORD PTR CntA[rip] # is, CntA mov r8, -1 # tmp284, mov eax, 0 # tmp285, .p2align 2 .L11: mov edi, OFFSET FLAT:DA # tmp237, mov rcx, r8 # tmp235, tmp284 repnz scasb not rcx # tmp236 mov rdx, rcx # tmp236, tmp236 sub rdx, 1 # D.3715, mov QWORD PTR [rsp+112], rdx # L, D.3715 mov edi, OFFSET FLAT:DA # tmp243, mov rcx, r8 # tmp241, tmp284 repnz scasb not rcx # tmp242 mov rdx, rcx # tmp242, tmp242 sub rdx, 1 # D.3715, mov QWORD PTR [rsp+112], rdx # L, D.3715 sub esi, 1 # is, test esi, esi # is jg .L11 #, lea rsi, [rsp+64] # tmp303, mov edi, 1 #, call clock_gettime # ==============================================================================