On July 17, 2021 8:54:38 PM GMT+02:00, Stefan Kanthak <stefan.kant...@nexgo.de> 
wrote:
>Hi,
>
>GCC 10.2.0 (and GCC 8.3; other versions and targets except i386 and
>amd64 not tested) generate rather bad code for the following ternary
>expression:
>
>--- repro.c ---
>#define NULL (char *) 0
>
>char *dummy(char *string, long count) {
>    return count == 0 ? NULL : string + 1;
>}
>--- EOF ---
>
>$ gcc -m64 -o- -O3 -S repro.c
>
>dummy:
>        addq    $1, %rdi
>        movl    $0, %eax
>        testq   %rsi, %rsi
>        cmovne  %rdi, %rax
>        ret
>
>JFTR: why does GCC NOT generate the shorter "XOR %eax, %eax" here?
>
>$ gcc -m64 -O3 -c dummy.c
>$ objdump -D dummy.o
>
>0000000000000000 <dummy>:
>   0: 48 83 c7 01           add    $0x1,%rdi
>   4: b8 00 00 00 00        mov    $0x0,%eax
>   9: 48 85 f6              test   %rsi,%rsi
>   c: 48 0f 45 c7           cmovne %rdi,%rax
>  10: c3                    retq   
>
>
>i386 and AMD64 use the ILP32 and LP64 data model where a "long" and
>a "pointer" have the same size, and 0L and the null pointer have the
>same binary representation, so the contents of RSI should be used to
>load RAX with 0 conditionally:
>
>dummy:
>        leaq    1(%rdi), %rax
>        testq   %rsi, %rsi
>        cmoveq  %rdi, %rax
>        ret
>
>$ gcc -m32 -o- -O3 -S dummy.c
>
>_dummy:
>        movl   8(%esp), %edx
>        movl   4(%esp), %eax
>        addl   $1, %eax
>        testl  %edx, %edx
>        movl   $0, %edx
>       cmove  %edx, %eax    # OUCH: if this executes, EDX was 0 before,
>        ret                  #       so the MOV is really a NOP!
>
>
>$ gcc -m32 -O3 -c dummy.c
>$ objdump -D dummy.o
>
>00000000 <_dummy>:
>   0:   8b 54 24 08             mov    0x8(%esp),%edx
>   4:   8b 44 24 04             mov    0x4(%esp),%eax
>   8:   83 c0 01                add    $0x1,%eax
>   b:   85 d2                   test   %edx,%edx
>   d:   ba 00 00 00 00          mov    $0x0,%edx
>  12:   0f 44 c2                cmove  %edx,%eax
>  15:   c3                      ret    
>
>Here's what GCC should but generate:
>
>00000000 <_dummy>:
>   0:   8b 44 24 04             mov    0x4(%esp),%eax
>   4:   8b 4c 24 08             mov    0x8(%esp),%ecx
>   8:   40                      inc    %eax
>   9:   f7 d9                   neg    %ecx
>   b:   19 c9                   sbb    %ecx,%ecx
>   d:   21 c8                   and    %ecx,%eax
>   f:   c3                      ret    
>
>
>For (pre)historic processors which don't support CMOVcc the
>following code is generated:
>
>$ gcc -m32 -mtune=i386 -o- -S dummy.c
>
>_dummy:
>        movl    8(%esp), %eax
>        testl   %eax, %eax
>        je      L3
>        movl    4(%esp), %eax
>        incl    %eax
>        ret
>        .p2align 2
>L3:                        # OUCH: EAX is already 0 here!
>        xorl    %eax, %eax
>        ret
>
>00000000 <dummy>:
>   0:   8b 44 24 08             mov    0x8(%esp),%eax
>   4:   85 c0                   test   %eax,%eax
>   6:   74 08                   je     10 <dummy+0x10>
>   8:   8b 44 24 04             mov    0x4(%esp),%eax
>   c:   40                      inc    %eax
>   d:   c3                      ret
>   e:   66 90                   xchg   %ax,%ax
>  10:   31 c0                   xor    %eax,%eax
>  12:   c3                      ret
>
>
>not amused

Patches welcome.  You might want to file a bugzilla report which has a higher 
chance of being found after a while.

Richard. 

>Stefan Kanthak

Reply via email to