https://gcc.gnu.org/bugzilla/show_bug.cgi?id=124441

            Bug ID: 124441
           Summary: The `volatile` keyword will block the optimization of
                    replacing temporary variables in the `expand/TER` RTL
                    pass.
           Product: gcc
           Version: 15.2.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: rtl-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: xingyushuai at hygon dot cn
  Target Milestone: ---

Code:
--------------
unsigned int  AdvanceToNextTagX86Optimized(const unsigned char** ip_p, unsigned
int* tag) {
  const unsigned char*& ip = *ip_p;

  unsigned int literal_len = *tag >> 2;
  unsigned int tag_type = *tag;
  bool is_literal;
#if defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(__x86_64__)
  asm("and $3, %k[tag_type]\n\t"
      : [tag_type] "+r"(tag_type), "=@ccz"(is_literal)
      :: "cc");
#else
  tag_type &= 3;
  is_literal = (tag_type == 0);
#endif

  unsigned int tag_literal =
      static_cast<const volatile unsigned char*>(ip)[1 + literal_len];
  unsigned int tag_copy = static_cast<const volatile unsigned
char*>(ip)[tag_type];
  *tag = is_literal ? tag_literal : tag_copy;
  const unsigned char* ip_copy = ip + 1 + tag_type;
  const unsigned char* ip_literal = ip + 2 + literal_len;
  ip = is_literal ? ip_literal : ip_copy;
#if defined(__GNUC__) && defined(__x86_64__)
  asm("" ::"r"(tag_copy));
#endif
  return tag_type;
}
--------------
The above code was compiled using gcc 15.2.0 (compiler option -O3), and the
generated assembly code is as follows:

        mov     eax, DWORD PTR [rsi]
        mov     rdx, QWORD PTR [rdi]
        mov     r8, rsi
        mov     r10d, eax
        shr     r10d, 2
        lea     ecx, [r10+1]
        and $3, eax

        sete    r11b
        mov     r9d, eax
        add     rcx, rdx
        movzx   esi, BYTE PTR [rcx]
        lea     rcx, [rdx+r9]
        movzx   ecx, BYTE PTR [rcx]
        test    r11b, r11b
        je      .L2
        lea     r9d, [r10+2]
        mov     DWORD PTR [r8], esi
        add     rdx, r9
        mov     QWORD PTR [rdi], rdx
        ret
.L2:
        lea     rdx, [rdx+1+r9]
        mov     esi, ecx
        mov     DWORD PTR [r8], esi
        mov     QWORD PTR [rdi], rdx
        ret

However, if the `volatile` keyword is removed from the source code, it will
become:

        mov     ecx, DWORD PTR [rsi]
        mov     rdx, QWORD PTR [rdi]
        mov     eax, ecx
        and $3, eax

        mov     r9d, eax
        movzx   r8d, BYTE PTR [rdx+r9]
        jne     .L2
        shr     ecx, 2
        lea     r9d, [rcx+1]
        add     ecx, 2
        movzx   r9d, BYTE PTR [rdx+r9]
        add     rdx, rcx
        mov     DWORD PTR [rsi], r9d
        mov     QWORD PTR [rdi], rdx
        ret
.L2:
        lea     rdx, [rdx+1+r9]
        mov     r9d, r8d
        mov     DWORD PTR [rsi], r9d
        mov     QWORD PTR [rdi], rdx
        ret

The dump information from the `expand pass` command shows that the presence of
the `volatile` keyword prevents temporary variables from being replaced.
However, in my tests, I found that in versions with `volatile` enabled,
modifying the GCC source code to allow temporary variables to be replaced still
resulted in normal program execution, and a 4.5% performance improvement
compared to when temporary variables were not replaced.

So, why does adding the `volatile` keyword prevent temporary variables from
being replaced?

You might refer to the following link:
https://godbolt.org/

Reply via email to