https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118091

            Bug ID: 118091
           Summary: Missed Optimization in Creating Medium-Sized Struct
                    and Passing It as Parameter
           Product: gcc
           Version: 14.2.1
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: rtl-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: jonathan.gruber.jg at gmail dot com
  Target Milestone: ---

Created attachment 59893
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=59893&action=edit
Minimal test case.

GCC does not sufficiently optimize creating a medium-sized struct and passing
it as a parameter. I tested the architectures x86_64, aarch64, and riscv64, on
optimization levels -O2, -O3, -Os, and -Oz. The minimal test case is attached
to this bug report but reproduced below:

struct S {
        void *x, *y;
};

extern int extern_func(struct S);

int tail_call(void *x, void *y) {
        struct S s = { x, y };

        return extern_func(s);
}

int non_tail_call(void *x, void *y) {
        struct S s = { x, y };

        return ~extern_func(s);
}


x86_64 assembly, -O3:

tail_call:
        .cfi_startproc
        xchgq   %rdi, %rsi
        movq    %rsi, %rax
        movq    %rdi, %rsi
        movq    %rax, %rdi
        jmp     extern_func@PLT
        .cfi_endproc

non_tail_call:
        .cfi_startproc
        xchgq   %rdi, %rsi
        subq    $8, %rsp
        .cfi_def_cfa_offset 16
        movq    %rsi, %rax
        movq    %rdi, %rsi
        movq    %rax, %rdi
        call    extern_func@PLT
        addq    $8, %rsp
        .cfi_def_cfa_offset 8
        notl    %eax
        ret
        .cfi_endproc


aarch64 assembly, -O3:

tail_call:
        .cfi_startproc
        b       extern_func
        .cfi_endproc

non_tail_call:
        .cfi_startproc
        stp     x29, x30, [sp, -16]!
        .cfi_def_cfa_offset 16
        .cfi_offset 29, -16
        .cfi_offset 30, -8
        mov     x29, sp
        bl      extern_func
        ldp     x29, x30, [sp], 16
        .cfi_restore 30
        .cfi_restore 29
        .cfi_def_cfa_offset 0
        mvn     w0, w0
        ret
        .cfi_endproc


riscv64 assembly, -O3:

tail_call:
        .cfi_startproc
        addi    sp,sp,-16
        .cfi_def_cfa_offset 16
        addi    sp,sp,16
        .cfi_def_cfa_offset 0
        tail    extern_func@plt
        .cfi_endproc

non_tail_call:
        .cfi_startproc
        addi    sp,sp,-32
        .cfi_def_cfa_offset 32
        sd      ra,24(sp)
        .cfi_offset 1, -8
        call    extern_func@plt
        ld      ra,24(sp)
        .cfi_restore 1
        not     a0,a0
        sext.w  a0,a0
        addi    sp,sp,32
        .cfi_def_cfa_offset 0
        jr      ra
        .cfi_endproc


I have only a passing familiarity with these architectures' calling
conventions, so please excuse any errors in my analysis below.

x86_64 effectively does a slightly slow no-op: it first swaps %rdi and %rsi and
then undoes that by swapping them again. The aarch64 version seems okay. The
tail call in riscv64 does another slightly slow no-op: it first decreases sp by
16 and then undoes that by increasing sp by 16. The non-tail call in riscv64
decreases sp by 32, which seems excessive when it only stores the 8-byte value
of ra on the stack.

I'm not sure if I filed this bug under the correct component
(rtl-optimization), so feel free to reassign it to the correct component after
the fact.

Host system type: Arch Linux, x86_64

gcc information:
Version: 14.2.1 20240910 (GCC)
Configured with: /build/gcc/src/gcc/configure
--enable-languages=ada,c,c++,d,fortran,go,lto,m2,objc,obj-c++,rust
--enable-bootstrap --prefix=/usr --libdir=/usr/lib --libexecdir=/usr/lib
--mandir=/usr/share/man --infodir=/usr/share/info
--with-bugurl=https://gitlab.archlinux.org/archlinux/packaging/packages/gcc/-/issues
--with-build-config=bootstrap-lto --with-linker-hash-style=gnu
--with-system-zlib --enable-__cxa_atexit --enable-cet=auto
--enable-checking=release --enable-clocale=gnu --enable-default-pie
--enable-default-ssp --enable-gnu-indirect-function --enable-gnu-unique-object
--enable-libstdcxx-backtrace --enable-link-serialization=1
--enable-linker-build-id --enable-lto --enable-multilib --enable-plugin
--enable-shared --enable-threads=posix --disable-libssp --disable-libstdcxx-pch
--disable-werror

aarch64-linux-gnu-gcc information:
Version: 14.2.0
Configured with: /build/aarch64-linux-gnu-gcc/src/gcc-14.2.0/configure
--prefix=/usr --program-prefix=aarch64-linux-gnu-
--with-local-prefix=/usr/aarch64-linux-gnu
--with-sysroot=/usr/aarch64-linux-gnu
--with-build-sysroot=/usr/aarch64-linux-gnu
--with-native-system-header-dir=/include --libdir=/usr/lib
--libexecdir=/usr/lib --target=aarch64-linux-gnu --host=x86_64-pc-linux-gnu
--build=x86_64-pc-linux-gnu --disable-nls --enable-default-pie
--enable-languages=c,c++,fortran --enable-shared --enable-threads=posix
--with-system-zlib --with-isl --enable-__cxa_atexit
--disable-libunwind-exceptions --enable-clocale=gnu --disable-libstdcxx-pch
--disable-libssp --enable-gnu-unique-object --enable-linker-build-id
--enable-lto --enable-plugin --enable-install-libiberty
--with-linker-hash-style=gnu --enable-gnu-indirect-function --disable-multilib
--disable-werror --enable-checking=release

riscv64-linux-gnu-gcc information:
Version: 14.2.0
Configured with: /build/riscv64-linux-gnu-gcc/src/gcc-14.2.0/configure
--prefix=/usr --program-prefix=riscv64-linux-gnu-
--with-local-prefix=/usr/riscv64-linux-gnu
--with-sysroot=/usr/riscv64-linux-gnu
--with-build-sysroot=/usr/riscv64-linux-gnu --libdir=/usr/lib
--libexecdir=/usr/lib --target=riscv64-linux-gnu --host=x86_64-pc-linux-gnu
--build=x86_64-pc-linux-gnu --with-system-zlib --with-isl
--with-linker-hash-style=gnu --disable-nls --disable-libunwind-exceptions
--disable-libstdcxx-pch --disable-libssp --disable-multilib --disable-werror
--enable-languages=c,c++ --enable-shared --enable-threads=posix
--enable-__cxa_atexit --enable-clocale=gnu --enable-gnu-unique-object
--enable-linker-build-id --enable-lto --enable-plugin
--enable-install-libiberty --enable-gnu-indirect-function --enable-default-pie
--enable-checking=release
  • [Bug rtl-optimization/118... jonathan.gruber.jg at gmail dot com via Gcc-bugs

Reply via email to