https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118888
Bug ID: 118888 Summary: GCC only optimize 1 bits-manipulation function out of many despite having the same implementations. Product: gcc Version: 14.2.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: tree-optimization Assignee: unassigned at gcc dot gnu.org Reporter: evanhyd2003 at gmail dot com Target Milestone: --- Created attachment 60504 --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=60504&action=edit Compile this source code with -S and -O3 to observe its assembly output. gcc version 14.2.0 (GCC) Target: x86_64-w64-mingw32 COLLECT_GCC=C:\Users\evanh\works\coding\w64devkit\bin\gcc.exe COLLECT_LTO_WRAPPER=C:/Users/evanh/works/coding/w64devkit/bin/../libexec/gcc/x86_64-w64-mingw32/14.2.0/lto-wrapper.exe Configured with: /gcc-14.2.0/configure --prefix=/w64devkit --with-sysroot=/w64devkit/x86_64-w64-mingw32 --with-native-system-header-dir=/include --target=x86_64-w64-mingw32 --host=x86_64-w64-mingw32 --enable-static --disable-shared --with-pic --with-gmp-include=/deps/include --with-gmp-lib=/deps/lib --with-mpc-include=/deps/include --with-mpc-lib=/deps/lib --with-mpfr-include=/deps/include --with-mpfr-lib=/deps/lib --enable-languages=c,c++,fortran --enable-libgomp --enable-threads=posix --enable-version-specific-runtime-libs --disable-dependency-tracking --disable-lto --disable-multilib --disable-nls --disable-win32-registry --enable-mingw-wildcard CFLAGS_FOR_TARGET=-Os CXXFLAGS_FOR_TARGET=-Os LDFLAGS_FOR_TARGET=-s CFLAGS=-Os CXXFLAGS=-Os LDFLAGS=-s Thread model: posix Supported LTO compression algorithms: zlib Command to gcc that triggers the bug: g++ -S .\bug.cpp -O3 Expected behavior: All versions of setSquare() should get optimized to assembly code similar to setSquare3()'s. Actual behaviors: Only setSquare3()'s assembly code got optimized despite having the same implementation as other setSquare() version. In addition, the code optimization breaks when you shuffle the function order. The correct optimization only applies to 1 function at a time. .file "bug.cpp" .text .p2align 4 .globl _Z10setSquare1yjjjj .def _Z10setSquare1yjjjj; .scl 2; .type 32; .endef .seh_proc _Z10setSquare1yjjjj _Z10setSquare1yjjjj: .LFB31: .seh_endprologue movl $1, %eax movq %rax, %r11 movq %rcx, %r10 movl %r9d, %ecx salq %cl, %r11 movl 40(%rsp), %ecx movq %r11, %r9 movq %rax, %r11 btsq %rcx, %r9 movl %r8d, %ecx salq %cl, %r11 movl %edx, %ecx orq %r11, %r9 salq %cl, %rax orq %r10, %r9 orq %r9, %rax ret .seh_endproc .p2align 4 .globl _Z10setSquare2yjjjj .def _Z10setSquare2yjjjj; .scl 2; .type 32; .endef .seh_proc _Z10setSquare2yjjjj _Z10setSquare2yjjjj: .LFB32: .seh_endprologue movl $1, %eax movq %rax, %r11 movq %rcx, %r10 movl %r9d, %ecx salq %cl, %r11 movl 40(%rsp), %ecx movq %r11, %r9 btsq %rcx, %r9 movl %r8d, %ecx orq %r10, %r9 movq %rax, %r10 salq %cl, %r10 movl %edx, %ecx orq %r10, %r9 salq %cl, %rax orq %r9, %rax ret .seh_endproc .p2align 4 .globl _Z10setSquare3yjjjj .def _Z10setSquare3yjjjj; .scl 2; .type 32; .endef .seh_proc _Z10setSquare3yjjjj _Z10setSquare3yjjjj: .LFB33: .seh_endprologue movl 40(%rsp), %eax btsq %rax, %rcx btsq %r9, %rcx btsq %r8, %rcx movq %rcx, %rax btsq %rdx, %rax ret .seh_endproc .p2align 4 .globl _Z10setSquare4yjjjj .def _Z10setSquare4yjjjj; .scl 2; .type 32; .endef .seh_proc _Z10setSquare4yjjjj _Z10setSquare4yjjjj: .LFB34: .seh_endprologue movl $1, %r10d movq %r10, %rax movq %rcx, %r11 movl %r9d, %ecx salq %cl, %rax movl 40(%rsp), %ecx btsq %rcx, %rax movl %r8d, %ecx orq %r11, %rax movq %r10, %r11 salq %cl, %r11 movl %edx, %ecx orq %r11, %rax salq %cl, %r10 orq %r10, %rax ret .seh_endproc .p2align 4 .globl _Z10setSquare5yjjjj .def _Z10setSquare5yjjjj; .scl 2; .type 32; .endef .seh_proc _Z10setSquare5yjjjj _Z10setSquare5yjjjj: .LFB41: .seh_endprologue movl $1, %eax movq %rax, %r11 movq %rcx, %r10 movl %r9d, %ecx salq %cl, %r11 movl 40(%rsp), %ecx movq %r11, %r9 movq %rax, %r11 btsq %rcx, %r9 movl %r8d, %ecx salq %cl, %r11 movl %edx, %ecx orq %r11, %r9 salq %cl, %rax orq %r10, %r9 orq %r9, %rax ret .seh_endproc .p2align 4 .globl _Z10setSquare6yjjjj .def _Z10setSquare6yjjjj; .scl 2; .type 32; .endef .seh_proc _Z10setSquare6yjjjj _Z10setSquare6yjjjj: .LFB39: .seh_endprologue movl $1, %eax movq %rax, %r11 movq %rcx, %r10 movl %r9d, %ecx salq %cl, %r11 movl 40(%rsp), %ecx movq %r11, %r9 movq %rax, %r11 btsq %rcx, %r9 movl %r8d, %ecx salq %cl, %r11 movl %edx, %ecx orq %r11, %r9 salq %cl, %rax orq %r10, %r9 orq %r9, %rax ret .seh_endproc .ident "GCC: (GNU) 14.2.0" Compiler Explorer: https://godbolt.org/z/GnbKzd33s