On July 17, 2021 8:54:38 PM GMT+02:00, Stefan Kanthak <stefan.kant...@nexgo.de> wrote: >Hi, > >GCC 10.2.0 (and GCC 8.3; other versions and targets except i386 and >amd64 not tested) generate rather bad code for the following ternary >expression: > >--- repro.c --- >#define NULL (char *) 0 > >char *dummy(char *string, long count) { > return count == 0 ? NULL : string + 1; >} >--- EOF --- > >$ gcc -m64 -o- -O3 -S repro.c > >dummy: > addq $1, %rdi > movl $0, %eax > testq %rsi, %rsi > cmovne %rdi, %rax > ret > >JFTR: why does GCC NOT generate the shorter "XOR %eax, %eax" here? > >$ gcc -m64 -O3 -c dummy.c >$ objdump -D dummy.o > >0000000000000000 <dummy>: > 0: 48 83 c7 01 add $0x1,%rdi > 4: b8 00 00 00 00 mov $0x0,%eax > 9: 48 85 f6 test %rsi,%rsi > c: 48 0f 45 c7 cmovne %rdi,%rax > 10: c3 retq > > >i386 and AMD64 use the ILP32 and LP64 data model where a "long" and >a "pointer" have the same size, and 0L and the null pointer have the >same binary representation, so the contents of RSI should be used to >load RAX with 0 conditionally: > >dummy: > leaq 1(%rdi), %rax > testq %rsi, %rsi > cmoveq %rdi, %rax > ret > >$ gcc -m32 -o- -O3 -S dummy.c > >_dummy: > movl 8(%esp), %edx > movl 4(%esp), %eax > addl $1, %eax > testl %edx, %edx > movl $0, %edx > cmove %edx, %eax # OUCH: if this executes, EDX was 0 before, > ret # so the MOV is really a NOP! > > >$ gcc -m32 -O3 -c dummy.c >$ objdump -D dummy.o > >00000000 <_dummy>: > 0: 8b 54 24 08 mov 0x8(%esp),%edx > 4: 8b 44 24 04 mov 0x4(%esp),%eax > 8: 83 c0 01 add $0x1,%eax > b: 85 d2 test %edx,%edx > d: ba 00 00 00 00 mov $0x0,%edx > 12: 0f 44 c2 cmove %edx,%eax > 15: c3 ret > >Here's what GCC should but generate: > >00000000 <_dummy>: > 0: 8b 44 24 04 mov 0x4(%esp),%eax > 4: 8b 4c 24 08 mov 0x8(%esp),%ecx > 8: 40 inc %eax > 9: f7 d9 neg %ecx > b: 19 c9 sbb %ecx,%ecx > d: 21 c8 and %ecx,%eax > f: c3 ret > > >For (pre)historic processors which don't support CMOVcc the >following code is generated: > >$ gcc -m32 -mtune=i386 -o- -S dummy.c > >_dummy: > movl 8(%esp), %eax > testl %eax, %eax > je L3 > movl 4(%esp), %eax > incl %eax > ret > .p2align 2 >L3: # OUCH: EAX is already 0 here! > xorl %eax, %eax > ret > >00000000 <dummy>: > 0: 8b 44 24 08 mov 0x8(%esp),%eax > 4: 85 c0 test %eax,%eax > 6: 74 08 je 10 <dummy+0x10> > 8: 8b 44 24 04 mov 0x4(%esp),%eax > c: 40 inc %eax > d: c3 ret > e: 66 90 xchg %ax,%ax > 10: 31 c0 xor %eax,%eax > 12: c3 ret > > >not amused
Patches welcome. You might want to file a bugzilla report which has a higher chance of being found after a while. Richard. >Stefan Kanthak