https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118888

            Bug ID: 118888
           Summary: GCC only optimize 1 bits-manipulation function out of
                    many despite having the same implementations.
           Product: gcc
           Version: 14.2.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: evanhyd2003 at gmail dot com
  Target Milestone: ---

Created attachment 60504
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=60504&action=edit
Compile this source code with -S and -O3 to observe its assembly output.

gcc version 14.2.0 (GCC)

Target: x86_64-w64-mingw32

COLLECT_GCC=C:\Users\evanh\works\coding\w64devkit\bin\gcc.exe
COLLECT_LTO_WRAPPER=C:/Users/evanh/works/coding/w64devkit/bin/../libexec/gcc/x86_64-w64-mingw32/14.2.0/lto-wrapper.exe
Configured with: /gcc-14.2.0/configure --prefix=/w64devkit
--with-sysroot=/w64devkit/x86_64-w64-mingw32
--with-native-system-header-dir=/include --target=x86_64-w64-mingw32
--host=x86_64-w64-mingw32 --enable-static --disable-shared --with-pic
--with-gmp-include=/deps/include --with-gmp-lib=/deps/lib
--with-mpc-include=/deps/include --with-mpc-lib=/deps/lib
--with-mpfr-include=/deps/include --with-mpfr-lib=/deps/lib
--enable-languages=c,c++,fortran --enable-libgomp --enable-threads=posix
--enable-version-specific-runtime-libs --disable-dependency-tracking
--disable-lto --disable-multilib --disable-nls --disable-win32-registry
--enable-mingw-wildcard CFLAGS_FOR_TARGET=-Os CXXFLAGS_FOR_TARGET=-Os
LDFLAGS_FOR_TARGET=-s CFLAGS=-Os CXXFLAGS=-Os LDFLAGS=-s
Thread model: posix
Supported LTO compression algorithms: zlib

Command to gcc that triggers the bug: g++ -S .\bug.cpp -O3

Expected behavior:
    All versions of setSquare() should get optimized to assembly code similar
to setSquare3()'s.

Actual behaviors:
    Only setSquare3()'s assembly code got optimized despite having the same
implementation as other setSquare() version. In addition, the code optimization
breaks when you shuffle the function order. The correct optimization only
applies to 1 function at a time.

        .file   "bug.cpp"
        .text
        .p2align 4
        .globl  _Z10setSquare1yjjjj
        .def    _Z10setSquare1yjjjj;    .scl    2;      .type   32;     .endef
        .seh_proc       _Z10setSquare1yjjjj
_Z10setSquare1yjjjj:
.LFB31:
        .seh_endprologue
        movl    $1, %eax
        movq    %rax, %r11
        movq    %rcx, %r10
        movl    %r9d, %ecx
        salq    %cl, %r11
        movl    40(%rsp), %ecx
        movq    %r11, %r9
        movq    %rax, %r11
        btsq    %rcx, %r9
        movl    %r8d, %ecx
        salq    %cl, %r11
        movl    %edx, %ecx
        orq     %r11, %r9
        salq    %cl, %rax
        orq     %r10, %r9
        orq     %r9, %rax
        ret
        .seh_endproc
        .p2align 4
        .globl  _Z10setSquare2yjjjj
        .def    _Z10setSquare2yjjjj;    .scl    2;      .type   32;     .endef
        .seh_proc       _Z10setSquare2yjjjj
_Z10setSquare2yjjjj:
.LFB32:
        .seh_endprologue
        movl    $1, %eax
        movq    %rax, %r11
        movq    %rcx, %r10
        movl    %r9d, %ecx
        salq    %cl, %r11
        movl    40(%rsp), %ecx
        movq    %r11, %r9
        btsq    %rcx, %r9
        movl    %r8d, %ecx
        orq     %r10, %r9
        movq    %rax, %r10
        salq    %cl, %r10
        movl    %edx, %ecx
        orq     %r10, %r9
        salq    %cl, %rax
        orq     %r9, %rax
        ret
        .seh_endproc
        .p2align 4
        .globl  _Z10setSquare3yjjjj
        .def    _Z10setSquare3yjjjj;    .scl    2;      .type   32;     .endef
        .seh_proc       _Z10setSquare3yjjjj
_Z10setSquare3yjjjj:
.LFB33:
        .seh_endprologue
        movl    40(%rsp), %eax
        btsq    %rax, %rcx
        btsq    %r9, %rcx
        btsq    %r8, %rcx
        movq    %rcx, %rax
        btsq    %rdx, %rax
        ret
        .seh_endproc
        .p2align 4
        .globl  _Z10setSquare4yjjjj
        .def    _Z10setSquare4yjjjj;    .scl    2;      .type   32;     .endef
        .seh_proc       _Z10setSquare4yjjjj
_Z10setSquare4yjjjj:
.LFB34:
        .seh_endprologue
        movl    $1, %r10d
        movq    %r10, %rax
        movq    %rcx, %r11
        movl    %r9d, %ecx
        salq    %cl, %rax
        movl    40(%rsp), %ecx
        btsq    %rcx, %rax
        movl    %r8d, %ecx
        orq     %r11, %rax
        movq    %r10, %r11
        salq    %cl, %r11
        movl    %edx, %ecx
        orq     %r11, %rax
        salq    %cl, %r10
        orq     %r10, %rax
        ret
        .seh_endproc
        .p2align 4
        .globl  _Z10setSquare5yjjjj
        .def    _Z10setSquare5yjjjj;    .scl    2;      .type   32;     .endef
        .seh_proc       _Z10setSquare5yjjjj
_Z10setSquare5yjjjj:
.LFB41:
        .seh_endprologue
        movl    $1, %eax
        movq    %rax, %r11
        movq    %rcx, %r10
        movl    %r9d, %ecx
        salq    %cl, %r11
        movl    40(%rsp), %ecx
        movq    %r11, %r9
        movq    %rax, %r11
        btsq    %rcx, %r9
        movl    %r8d, %ecx
        salq    %cl, %r11
        movl    %edx, %ecx
        orq     %r11, %r9
        salq    %cl, %rax
        orq     %r10, %r9
        orq     %r9, %rax
        ret
        .seh_endproc
        .p2align 4
        .globl  _Z10setSquare6yjjjj
        .def    _Z10setSquare6yjjjj;    .scl    2;      .type   32;     .endef
        .seh_proc       _Z10setSquare6yjjjj
_Z10setSquare6yjjjj:
.LFB39:
        .seh_endprologue
        movl    $1, %eax
        movq    %rax, %r11
        movq    %rcx, %r10
        movl    %r9d, %ecx
        salq    %cl, %r11
        movl    40(%rsp), %ecx
        movq    %r11, %r9
        movq    %rax, %r11
        btsq    %rcx, %r9
        movl    %r8d, %ecx
        salq    %cl, %r11
        movl    %edx, %ecx
        orq     %r11, %r9
        salq    %cl, %rax
        orq     %r10, %r9
        orq     %r9, %rax
        ret
        .seh_endproc
        .ident  "GCC: (GNU) 14.2.0"

Compiler Explorer: https://godbolt.org/z/GnbKzd33s

Reply via email to