https://gcc.gnu.org/bugzilla/show_bug.cgi?id=27077

--- Comment #8 from Helmut Schellong <var at schellong dot biz> ---
Test case is a special measurement program.
See assembler code below:
strlen / repnz scasb
clock_gettime
gcc6 + gcc5

The exact gcc version is not critical.
Each gcc that injects an intel string instruction
injects slow code (on many cpus).
Practical experience (use) is important.

CPU: Intel(R) Core(TM)2 Duo CPU     E8600  @ 3.33GHz (3333.40-MHz K8-class CPU)
Origin = "GenuineIntel"  Id = 0x1067a  Family = 0x6  Model = 0x17  Stepping =
10
Features=0xbfebfbff<FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CLFLUSH,DTS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE>
Features2=0xc08e3fd<SSE3,DTES64,MON,DS_CPL,VMX,SMX,EST,TM2,SSSE3,CX16,xTPR,PDCM,SSE4.1,XSAVE,OSXSAVE>
  AMD Features=0x20100800<SYSCALL,NX,LM>
  AMD Features2=0x1<LAHF>
  VT-x: HLT,PAUSE
  TSC: P-state invariant, performance statistics

gcc6 -v
Using built-in specs.
COLLECT_GCC=gcc6
COLLECT_LTO_WRAPPER=/usr/local/libexec/gcc6/gcc/x86_64-portbld-freebsd10.1/6.2.0/lto-wrapper
Target: x86_64-portbld-freebsd10.1
Configured with: ...
Thread model: posix
gcc version 6.2.0 (FreeBSD Ports Collection) 

gcc5 -v
Using built-in specs.
COLLECT_GCC=gcc5
COLLECT_LTO_WRAPPER=/usr/local/libexec/gcc5/gcc/x86_64-portbld-freebsd10.0/5.0.0/lto-wrapper
Target: x86_64-portbld-freebsd10.0
Configured with: ...
Thread model: posix
gcc version 5.0.0 20140921 (experimental) (FreeBSD Ports Collection) 



gcc6 -O1 -static -s -opt -fno-builtin-strlen -DSTRLENF pt.c ptf.o
==============================================================================
        lea     rsi, [rsp+32]   #,
        mov     edi, 1  #,
        call    clock_gettime   #
        mov     ebx, DWORD PTR CntA[rip]        # is, CntA
        .p2align 2
.L10:
        mov     edi, OFFSET FLAT:DA     #,
        call    strlen  #
        mov     QWORD PTR [rsp+112], rax        # L, _80
        sub     ebx, 1  # is,
        test    ebx, ebx        # is
        jg      .L10    #,
        lea     rsi, [rsp+64]   # tmp273,
        mov     edi, 1  #,
        call    clock_gettime   #
        imul    rax, QWORD PTR [rsp+64], 1000000000     # tmp216, cb[0].tv_sec,
        add     rax, QWORD PTR [rsp+72] # tmp218, cb[0].tv_nsec
        imul    rdx, QWORD PTR [rsp+32], 1000000000     # tmp220, ca[0].tv_sec,
        add     rdx, QWORD PTR [rsp+40] # tmp222, ca[0].tv_nsec
        sub     rax, rdx        # tmp224, tmp222
        pxor    xmm7, xmm7      # _94
        cvtsi2sdq       xmm7, rax       # _94, tmp224
        movsd   QWORD PTR [rsp+8], xmm7 # %sfp, _94
        mov     rsi, QWORD PTR [rsp+112]        # L.7_105, L
        mov     edi, OFFSET FLAT:.LC3   #,
        mov     eax, 0  #,
        call    printf  #
        lea     rsi, [rsp+32]   #,
        mov     edi, 1  #,
        call    clock_gettime   #
        mov     ebx, DWORD PTR CntA[rip]        # is, CntA
        .p2align 2
.L11:
        mov     edi, OFFSET FLAT:DA     #,
        call    strlen  #
        mov     QWORD PTR [rsp+112], rax        # L, _112
        mov     edi, OFFSET FLAT:DA     #,
        call    strlen  #
        mov     QWORD PTR [rsp+112], rax        # L, _115
        sub     ebx, 1  # is,
        test    ebx, ebx        # is
        jg      .L11    #,
        lea     rsi, [rsp+64]   # tmp275,
        mov     edi, 1  #,
        call    clock_gettime   #
==============================================================================

gcc6 -O1 -static -s -opt -DSTRLENF pt.c ptf.o
==============================================================================
        lea     rsi, [rsp+32]   #,
        mov     edi, 1  #,
        call    clock_gettime   #
        mov     edx, DWORD PTR CntA[rip]        # is, CntA
        mov     rsi, -1 # tmp225,
        mov     eax, 0  # tmp226,
        .p2align 2
.L10:
        mov     edi, OFFSET FLAT:DA     # tmp224,
        mov     rcx, rsi        # tmp222, tmp225
        repnz scasb
        not     rcx     # tmp223
        sub     rcx, 1  # _80,
        mov     QWORD PTR [rsp+112], rcx        # L, _80
        sub     edx, 1  # is,
        test    edx, edx        # is
        jg      .L10    #,
        lea     rsi, [rsp+64]   # tmp299,
        mov     edi, 1  #,
        call    clock_gettime   #
        imul    rax, QWORD PTR [rsp+64], 1000000000     # tmp228, cb[0].tv_sec,
        add     rax, QWORD PTR [rsp+72] # tmp230, cb[0].tv_nsec
        imul    rdx, QWORD PTR [rsp+32], 1000000000     # tmp232, ca[0].tv_sec,
        add     rdx, QWORD PTR [rsp+40] # tmp234, ca[0].tv_nsec
        sub     rax, rdx        # tmp236, tmp234
        pxor    xmm7, xmm7      # _94
        cvtsi2sdq       xmm7, rax       # _94, tmp236
        movsd   QWORD PTR [rsp+8], xmm7 # %sfp, _94
        mov     rsi, QWORD PTR [rsp+112]        # L.7_105, L
        mov     edi, OFFSET FLAT:.LC3   #,
        mov     eax, 0  #,
        call    printf  #
        lea     rsi, [rsp+32]   #,
        mov     edi, 1  #,
        call    clock_gettime   #
        mov     edx, DWORD PTR CntA[rip]        # is, CntA
        mov     rsi, -1 # tmp242,
        mov     eax, 0  # tmp243,
        .p2align 2
.L11:
        mov     edi, OFFSET FLAT:DA     # tmp241,
        mov     rcx, rsi        # tmp239, tmp242
        repnz scasb
        not     rcx     # tmp240
        sub     rcx, 1  # _112,
        mov     QWORD PTR [rsp+112], rcx        # L, _112
        mov     edi, OFFSET FLAT:DA     # tmp247,
        mov     rcx, rsi        # tmp245, tmp242
        repnz scasb
        not     rcx     # tmp246
        sub     rcx, 1  # _115,
        mov     QWORD PTR [rsp+112], rcx        # L, _115
        sub     edx, 1  # is,
        test    edx, edx        # is
        jg      .L11    #,
        lea     rsi, [rsp+64]   # tmp303,
        mov     edi, 1  #,
        call    clock_gettime   #
==============================================================================

gcc5 -O1 -static -s -opt -DSTRLENF pt.c ptf.o
==============================================================================
        lea     rsi, [rsp+32]   #,
        mov     edi, 1  #,
        call    clock_gettime   #
        mov     esi, DWORD PTR CntA[rip]        # is, CntA
        mov     r8, -1  # tmp286,
        mov     eax, 0  # tmp287,
        .p2align 2
.L10:
        mov     edi, OFFSET FLAT:DA     # tmp221,
        mov     rcx, r8 # tmp219, tmp286
        repnz scasb
        not     rcx     # tmp220
        mov     rdx, rcx        # tmp220, tmp220
        sub     rdx, 1  # D.3715,
        mov     QWORD PTR [rsp+112], rdx        # L, D.3715
        sub     esi, 1  # is,
        test    esi, esi        # is
        jg      .L10    #,
        lea     rsi, [rsp+64]   # tmp296,
        mov     edi, 1  #,
        call    clock_gettime   #
        imul    rax, QWORD PTR [rsp+64], 1000000000     # D.3716, cb[0].tv_sec,
        add     rax, QWORD PTR [rsp+72] # D.3716, cb[0].tv_nsec
        mov     rdx, rax        # D.3716, D.3716
        imul    rax, QWORD PTR [rsp+32], -1000000000    # D.3716, ca[0].tv_sec,
        sub     rax, QWORD PTR [rsp+40] # D.3716, ca[0].tv_nsec
        add     rax, rdx        # D.3716, D.3716
        pxor    xmm7, xmm7      # D.3718
        cvtsi2sdq       xmm7, rax       # D.3718, D.3716
        movsd   QWORD PTR [rsp+8], xmm7 # %sfp, D.3718
        mov     rsi, QWORD PTR [rsp+112]        # D.3716, L
        mov     edi, OFFSET FLAT:.LC3   #,
        mov     eax, 0  #,
        call    printf  #
        lea     rsi, [rsp+32]   #,
        mov     edi, 1  #,
        call    clock_gettime   #
        mov     esi, DWORD PTR CntA[rip]        # is, CntA
        mov     r8, -1  # tmp284,
        mov     eax, 0  # tmp285,
        .p2align 2
.L11:
        mov     edi, OFFSET FLAT:DA     # tmp237,
        mov     rcx, r8 # tmp235, tmp284
        repnz scasb
        not     rcx     # tmp236
        mov     rdx, rcx        # tmp236, tmp236
        sub     rdx, 1  # D.3715,
        mov     QWORD PTR [rsp+112], rdx        # L, D.3715
        mov     edi, OFFSET FLAT:DA     # tmp243,
        mov     rcx, r8 # tmp241, tmp284
        repnz scasb
        not     rcx     # tmp242
        mov     rdx, rcx        # tmp242, tmp242
        sub     rdx, 1  # D.3715,
        mov     QWORD PTR [rsp+112], rdx        # L, D.3715
        sub     esi, 1  # is,
        test    esi, esi        # is
        jg      .L11    #,
        lea     rsi, [rsp+64]   # tmp303,
        mov     edi, 1  #,
        call    clock_gettime   #
==============================================================================

Reply via email to