https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100247
Bug ID: 100247
Summary: x86-64 bad register allocation for unsigned type
Product: gcc
Version: 12.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: tree-optimization
Assignee: unassigned at gcc dot gnu.org
Reporter: ammarfaizi2 at gmail dot com
Target Milestone: ---
GCC 12.0.0 allocates unnecessary register for unsigned type. Below is the
reproduction code:
Compile with: `gcc -Wall -Wextra -O3 -fno-tree-vectorize -fno-unroll-loops`
--------------------------------------
#include <stddef.h>
long add_arrays(long *arr1, long *arr2, size_t num) {
size_t i = 0;
long sum = 0;
for (i = 0; i < num; ++i) {
sum += arr1[i];
sum += arr2[i];
}
return sum;
}
unsigned long unsigned_add_arrays(unsigned long *arr1, unsigned long *arr2,
size_t num) {
size_t i = 0;
unsigned long sum = 0;
for (i = 0; i < num; ++i) {
sum += arr1[i];
sum += arr2[i];
}
return sum;
}
#define PSTR(P) #P
#define XSTR(P) PSTR(P)
const char gcc_ver[] =
"It is GCC "
XSTR(__GNUC__) "."
XSTR(__GNUC_MINOR__) "."
XSTR(__GNUC_PATCHLEVEL__);
--------------------------------------
GCC 12.0.0 Result
add_arrays:
xorl %eax, %eax
testq %rdx, %rdx
je .L4
xorl %ecx, %ecx
.L3:
addq (%rdi,%rcx,8), %rax
addq (%rsi,%rcx,8), %rax
addq $1, %rcx
cmpq %rcx, %rdx
jne .L3
ret # Why do even we need this ret?
.L4:
ret
unsigned_add_arrays:
xorl %r8d, %r8d # Using %r8 is unnecessary
testq %rdx, %rdx
je .L7
xorl %eax, %eax
.L9:
movq (%rsi,%rax,8), %rcx
addq (%rdi,%rax,8), %rcx
addq $1, %rax
addq %rcx, %r8 # %r8 is used as `sum` variable
cmpq %rax, %rdx
jne .L9
.L7:
movq %r8, %rax # set return value to %r8
ret
gcc_ver:
.string "It is GCC 12.0.0"
--------------------------------------
GCC 4.6.4 produces better result here:
add_arrays:
xorl %eax, %eax
testq %rdx, %rdx
je .L2
xorl %ecx, %ecx
.L3:
addq (%rdi,%rcx,8), %rax
addq (%rsi,%rcx,8), %rax
addq $1, %rcx
cmpq %rdx, %rcx
jne .L3
.L2:
rep
ret
unsigned_add_arrays:
xorl %eax, %eax
testq %rdx, %rdx
je .L8
xorl %ecx, %ecx
.L9:
addq (%rdi,%rcx,8), %rax
addq (%rsi,%rcx,8), %rax
addq $1, %rcx
cmpq %rdx, %rcx
jne .L9
.L8:
rep
ret
gcc_ver:
.string "It is GCC 4.6.4"
--------------------------------------
Golbolt link: https://godbolt.org/z/9Pj5Ph1Gn