The following source implements the __absv?i2() functions (see <https://gcc.gnu.org/onlinedocs/gccint/Integer-library-routines.html>) for 32-bit, 64-bit and 128-bit integers in 3 different ways:
--- ub_or_!ub.c --- // Copyleft 2014-2020, Stefan Kanthak #ifdef __amd64__ __int128_t __absuti2(__int128_t argument) { if (argument < 0) argument = -argument; if (argument < 0) __builtin_trap(); return argument; } __int128_t __absvti2(__int128_t argument) { const __int128_t sign = 0 - (argument < 0); argument += sign; argument ^= sign; if (argument < 0) __builtin_trap(); return argument; } __int128_t __abswti2(__int128_t argument) { const __int128_t sign = argument >> 127; argument ^= sign; argument -= sign; if (argument < 0) __builtin_trap(); return argument; } #endif // __amd64__ long long __absudi2(long long argument) { if (argument < 0) argument = -argument; if (argument < 0) __builtin_trap(); return argument; } long long __absvdi2(long long argument) { const long long sign = 0 - (argument < 0); argument ^= sign; argument -= sign; if (argument < 0) __builtin_trap(); return argument; } long long __abswdi2(long long argument) { const long long sign = argument >> 63; argument += sign; argument ^= sign; if (argument < 0) __builtin_trap(); return argument; } int __absusi2(int argument) { if (argument < 0) argument = -argument; if (argument < 0) __builtin_trap(); return argument; } int __absvsi2(int argument) { const int sign = 0 - (argument < 0); argument ^= sign; argument -= sign; if (argument < 0) __builtin_trap(); return argument; } int __abswsi2(int argument) { const int sign = argument >> 31; argument += sign; argument ^= sign; if (argument < 0) __builtin_trap(); return argument; } --- EOF --- Compile it with GCC 10.2, printing the assembly: gcc -o- -O3 -S -Wall -Wextra ub_or_!ub.c NOTE: older versions of GCC generate BAD code for the expression 0 - (argument < 0) (Output rearranged in 3 columns to ease comparision) __absuti2: __absvti2: __abswti2: movq %rsi, %rax movq %rdi, %rax movq %rsi, %rax movq %rdi, %r8 movq %rsi, %rdi movq %rdi, %r8 movq %rsi, %rcx movq %rsi, %rdx movq %rsi, %rcx sarq $63, %rax sarq $63, %rdi sarq $63, %rax movq %rax, %rsi movslq %edi, %rcx movq %rax, %rsi xorq %rax, %r8 movq %rcx, %rsi xorq %rax, %r8 xorq %rsi, %rcx sarq $63, %rcx xorq %rsi, %rcx movq %r8, %rax addq %rsi, %rax movq %r8, %rax movq %rcx, %rdx movq %rcx, %rdi movq %rcx, %rdx subq %rsi, %rax adcq %rcx, %rdx subq %rsi, %rax sbbq %rsi, %rdx xorq %rsi, %rax sbbq %rsi, %rdx ret xorq %rdi, %rdx testq %rdx, %rdx jns .L2 jns .L5 ud2 ud2 .L2: .L5: ret ret __absudi2: __absvdi2: __abswdi2: movq %rdi, %rax movq %rdi, %rax movq %rdi, %rdx cqto cqto sarq $63, %rdx xorq %rdx, %rax movslq %edx, %rdx leaq (%rdi,%rdx), %rax subq %rdx, %rax xorq %rdx, %rax xorq %rdx, %rax ret subq %rdx, %rax jns .L10 jns .L8 ud2 ud2 .L10: .L8: ret ret __absusi2: __absvsi2: __abswsi2: movl %edi, %eax movl %edi, %eax movl %edi, %edx cltd movl %edi, %edx sarl $31, %edx xorl %edx, %eax shrl $31, %eax leal (%rdi,%rdx), %eax subl %edx, %eax movl %eax, %edi xorl %edx, %eax ret negl %edi jns .L15 xorl %edx, %edi ud2 addl %edi, %eax .L15: jns .L13 ret ud2 .L13: ret 1. The 3 absu?i2() functions demonstrate that GCC (ab)uses the undefined behaviour of unary minus/negation for INT_MIN to "optimise" the test for overflow detection following the negation away -- WITHOUT warning the user, despite the -Wall and -Wextra options! 2. The 3 absv?i2() and the 3 absw?i2() functions demonstrate that GCC FAILS to recognise the two common and well-known patterns for abs() -- although it uses this pattern itself -- and does NOT remove the test for overflow detection. Is this inconsistent behaviour intended? 3. The MOVSLQ instruction in the __absvdi2() function is nonsense: the preceeding CQTO instruction just extended the (sign of the) argument from RAX into RDX 4. The code generated for the __absvsi2() function is clumsy and quite BAD: GCC should generate the same code as for the __absvdi2() function, using the 32-bit registers instead of the 64-bit registers. 5. The register allocation in the __abswsi2() and __abswdi2() functions is BAD; the LEA instruction should be replaced with a shorter ADD, and the SAR with a shorter CLTD/CQTO: __abswsi2: __abswsi2: __abswdi2: movl %edi, %eax movl %edi, %eax movq %rdi, %rax sarl $31, %edi cltd cqto addl %edi, %eax addl %edx, %eax addq %rdx, %rax xorl %edi, %eax xorl %edx, %eax xorq %rdx, %rax jns .L15 jns .L15 jns .L10 ud2 ud2 ud2 .L15: .L15: .L10: ret ret ret 6. The register allocation in the __abs?ti2() functions is also REALLY bad, leading to 4 superfluous MOV instructions; additionally the MOVSLQ and TESTQ as well as the second SARQ are superfluous: __absvti2: movq %rsi, %rax cqto movq %rdx, %rax addq %rdx, %rdi adcq %rdx, %rsi xorq %rdi, %rax xorq %rsi, %rdx jns .L2 ud2 .L2: ret Conclusion: there's MUCH room for improvement in the code generator and the (peephole) optimiser! Stefan