https://gcc.gnu.org/bugzilla/show_bug.cgi?id=124441
Bug ID: 124441
Summary: The `volatile` keyword will block the optimization of
replacing temporary variables in the `expand/TER` RTL
pass.
Product: gcc
Version: 15.2.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: rtl-optimization
Assignee: unassigned at gcc dot gnu.org
Reporter: xingyushuai at hygon dot cn
Target Milestone: ---
Code:
--------------
unsigned int AdvanceToNextTagX86Optimized(const unsigned char** ip_p, unsigned
int* tag) {
const unsigned char*& ip = *ip_p;
unsigned int literal_len = *tag >> 2;
unsigned int tag_type = *tag;
bool is_literal;
#if defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(__x86_64__)
asm("and $3, %k[tag_type]\n\t"
: [tag_type] "+r"(tag_type), "=@ccz"(is_literal)
:: "cc");
#else
tag_type &= 3;
is_literal = (tag_type == 0);
#endif
unsigned int tag_literal =
static_cast<const volatile unsigned char*>(ip)[1 + literal_len];
unsigned int tag_copy = static_cast<const volatile unsigned
char*>(ip)[tag_type];
*tag = is_literal ? tag_literal : tag_copy;
const unsigned char* ip_copy = ip + 1 + tag_type;
const unsigned char* ip_literal = ip + 2 + literal_len;
ip = is_literal ? ip_literal : ip_copy;
#if defined(__GNUC__) && defined(__x86_64__)
asm("" ::"r"(tag_copy));
#endif
return tag_type;
}
--------------
The above code was compiled using gcc 15.2.0 (compiler option -O3), and the
generated assembly code is as follows:
mov eax, DWORD PTR [rsi]
mov rdx, QWORD PTR [rdi]
mov r8, rsi
mov r10d, eax
shr r10d, 2
lea ecx, [r10+1]
and $3, eax
sete r11b
mov r9d, eax
add rcx, rdx
movzx esi, BYTE PTR [rcx]
lea rcx, [rdx+r9]
movzx ecx, BYTE PTR [rcx]
test r11b, r11b
je .L2
lea r9d, [r10+2]
mov DWORD PTR [r8], esi
add rdx, r9
mov QWORD PTR [rdi], rdx
ret
.L2:
lea rdx, [rdx+1+r9]
mov esi, ecx
mov DWORD PTR [r8], esi
mov QWORD PTR [rdi], rdx
ret
However, if the `volatile` keyword is removed from the source code, it will
become:
mov ecx, DWORD PTR [rsi]
mov rdx, QWORD PTR [rdi]
mov eax, ecx
and $3, eax
mov r9d, eax
movzx r8d, BYTE PTR [rdx+r9]
jne .L2
shr ecx, 2
lea r9d, [rcx+1]
add ecx, 2
movzx r9d, BYTE PTR [rdx+r9]
add rdx, rcx
mov DWORD PTR [rsi], r9d
mov QWORD PTR [rdi], rdx
ret
.L2:
lea rdx, [rdx+1+r9]
mov r9d, r8d
mov DWORD PTR [rsi], r9d
mov QWORD PTR [rdi], rdx
ret
The dump information from the `expand pass` command shows that the presence of
the `volatile` keyword prevents temporary variables from being replaced.
However, in my tests, I found that in versions with `volatile` enabled,
modifying the GCC source code to allow temporary variables to be replaced still
resulted in normal program execution, and a 4.5% performance improvement
compared to when temporary variables were not replaced.
So, why does adding the `volatile` keyword prevent temporary variables from
being replaced?
You might refer to the following link:
https://godbolt.org/