https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88569

            Bug ID: 88569
           Summary: Track relations between variable values
           Product: gcc
           Version: 8.2.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: middle-end
          Assignee: unassigned at gcc dot gnu.org
          Reporter: bugzi...@poradnik-webmastera.com
  Target Milestone: ---

This example comes from code which could be compiled for various CPUs, and had
dedicated sections for AVX and SSE2. I left original ifdefs in comments. When
1st loop (for AVX) ends, following relations is true: (cnt - n <= 3). Similarly
after 2nd loop this is true: (cnt - n <= 1). With such knowledge it is possible
to optimize code of bar() to baz(). This eliminates two condition checks (after
2nd and 3rd loop), and one increment (for 3rd loop). It would be nice if gcc
could perform such transformation automatically.

[code]
void foo(int n);

void bar(int cnt)
{
    int n = 0;
//#ifdef __AVX__
    for (; n < cnt - 3; n += 4)
        foo(n);
//#endif
//#ifdef __SSE2__
    for (; n < cnt - 1; n += 2)
        foo(n);
//#endif
    for (; n < cnt; n += 1)
        foo(n);
}

void baz(int cnt)
{
    int n = 0;
    for (; n < cnt - 3; n += 4)
        foo(n);
    if (n < cnt - 1)
    {
        foo(n);
        n += 2;
    }
    if (n < cnt)
        foo(n);
}
[/code]

[asm]
bar(int):
        push    r13
        push    r12
        mov     r12d, edi
        push    rbp
        lea     ebp, [rdi-3]
        push    rbx
        xor     ebx, ebx
        sub     rsp, 8
        test    ebp, ebp
        jle     .L5
.L2:
        mov     edi, ebx
        add     ebx, 4
        call    foo(int)
        cmp     ebx, ebp
        jl      .L2
        lea     eax, [r12-4]
        shr     eax, 2
        lea     ebx, [4+rax*4]
.L5:
        lea     ebp, [r12-1]
        cmp     ebp, ebx
        jle     .L3
        mov     edi, ebx
        lea     r13d, [rbx+2]
        call    foo(int)
        cmp     ebp, r13d
        jle     .L8
        mov     edi, r13d
        call    foo(int)
.L8:
        lea     edi, [r12-2]
        sub     edi, ebx
        mov     ebx, edi
        and     ebx, -2
        add     ebx, r13d
.L3:
        cmp     r12d, ebx
        jle     .L14
        mov     edi, ebx
        call    foo(int)
        lea     edi, [rbx+1]
        cmp     r12d, edi
        jg      .L17
.L14:
        add     rsp, 8
        pop     rbx
        pop     rbp
        pop     r12
        pop     r13
        ret
.L17:
        add     rsp, 8
        pop     rbx
        pop     rbp
        pop     r12
        pop     r13
        jmp     foo(int)
baz(int):
        push    r12
        mov     r12d, edi
        push    rbp
        lea     ebp, [rdi-3]
        push    rbx
        xor     ebx, ebx
        test    ebp, ebp
        jle     .L19
.L20:
        mov     edi, ebx
        add     ebx, 4
        call    foo(int)
        cmp     ebx, ebp
        jl      .L20
        lea     eax, [r12-4]
        shr     eax, 2
        lea     ebx, [4+rax*4]
.L19:
        lea     eax, [r12-1]
        cmp     eax, ebx
        jg      .L27
        cmp     ebx, r12d
        jl      .L28
.L25:
        pop     rbx
        pop     rbp
        pop     r12
        ret
.L27:
        mov     edi, ebx
        add     ebx, 2
        call    foo(int)
        cmp     ebx, r12d
        jge     .L25
.L28:
        mov     edi, ebx
        pop     rbx
        pop     rbp
        pop     r12
        jmp     foo(int)
[/asm]

Reply via email to