https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921

            Bug ID: 113921
           Summary: Output register of an "asm volatile goto" is
                    incorrectly clobbered/discarded
           Product: gcc
           Version: 11.4.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c
          Assignee: unassigned at gcc dot gnu.org
          Reporter: seanjc at google dot com
                CC: jakub at redhat dot com, ndesaulniers at google dot com,
                    torva...@linux-foundation.org, ubizjak at gmail dot com
  Target Milestone: ---

Created attachment 57428
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=57428&action=edit
Intermediate output of the miscompiled file

gcc-11 appears to have a bug that results in gcc incorrectly clobbering the
output register of an "asm volatile goto".

The failing asm blob is a sequence of VMREADs in the Linux kernel, with the
outputs stored into a dynamically allocated structure whose lifecycle is far
beyond the scope of the code in question:

  vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
  vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
  vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
  vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);

where vmcs_read64() eventually becomes:

        asm volatile goto("1: vmread %[field], %[output]\n\t"
                          "jna %l[do_fail]\n\t"

                          _ASM_EXTABLE(1b, %l[do_exception])

                          : [output] "=r" (value)
                          : [field] "r" (field)
                          : "cc"
                          : do_fail, do_exception);

        return value;

  do_fail:
        instrumentation_begin();
        vmread_error(field);
        instrumentation_end();
        return 0;

  do_exception:
        kvm_spurious_fault();
        return 0;


The first three PDPTR VMREADs generate correctly, but the fourth effectively
gets ignored, and '0' is written to vmcs12->guest_pdptr3.

    3597:       mov    $0x280a,%r13d
    359d:       vmread %r13,%r13
    35a1:       jbe    3724 <sync_vmcs02_to_vmcs12+0x7c4>
    35a7:       mov    %r13,0xd8(%rbx)
    35ae:       jmp    396b <sync_vmcs02_to_vmcs12+0xa0b>
    35b3:       mov    $0x280c,%r13d
    35b9:       vmread %r13,%r13
    35bd:       jbe    3705 <sync_vmcs02_to_vmcs12+0x7a5>
    35c3:       mov    %r13,0xe0(%rbx)
    35ca:       jmp    393a <sync_vmcs02_to_vmcs12+0x9da>
    35cf:       mov    $0x280e,%r13d
    35d5:       vmread %r13,%r13
    35d9:       jbe    36e6 <sync_vmcs02_to_vmcs12+0x786>
    35df:       mov    %r13,0xe8(%rbx)
    35e6:       jmp    3909 <sync_vmcs02_to_vmcs12+0x9a9>
    35eb:       mov    $0x2810,%eax
    35f0:       vmread %rax,%rax        <= VMREAD to nowhere   
    35f3:       jbe    36ca <sync_vmcs02_to_vmcs12+0x76a>
    35f9:       xor    %r12d,%r12d      <= zeroing of output
    35fc:       mov    %r12,0xf0(%rbx)  <= store to vmcs12->guest_pdptr3

Replacing "asm volatile goto" with the following macro

  #define asm_goto(x...) \
          do { asm volatile goto(x); asm (""); } while (0)

to force a second barrier generates functional code, although the attempt to
miscompile the sequence is still evident, as the output of the affected VMREAD
is unnecessarily bounced through an extra register:

    35f8:       mov    $0x280a,%r13d
    35fe:       vmread %r13,%r13
    3602:       jbe    36b2 <sync_vmcs02_to_vmcs12+0x762>
    3608:       mov    %r13,0xd8(%rbx)
    360f:       jmp    3925 <sync_vmcs02_to_vmcs12+0x9d5>
    3614:       mov    $0x280c,%r13d
    361a:       vmread %r13,%r13
    361e:       jbe    3693 <sync_vmcs02_to_vmcs12+0x743>
    3620:       mov    %r13,0xe0(%rbx)
    3627:       jmp    38f4 <sync_vmcs02_to_vmcs12+0x9a4>
    362c:       mov    $0x280e,%r13d
    3632:       vmread %r13,%r13
    3636:       jbe    367a <sync_vmcs02_to_vmcs12+0x72a>
    3638:       mov    %r13,0xe8(%rbx)
    363f:       jmp    38c3 <sync_vmcs02_to_vmcs12+0x973>
    3644:       mov    $0x2810,%eax
    3649:       vmread %rax,%rax
    364c:       jbe    3664 <sync_vmcs02_to_vmcs12+0x714>
    364e:       mov    %rax,%r12
    3651:       mov    %r12,0xf0(%rbx)

The bug reproduces with two different 11.4.0 builds, on three different systems
(Intel i7-9850H, Intel i7-13700K, AMD EPYC 7B12), all running Debian-based
Linux.

$ gcc -v
Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper
OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa
OFFLOAD_TARGET_DEFAULT=1
Target: x86_64-linux-gnu
Configured with: ../src/configure -v --with-pkgversion='Ubuntu
11.4.0-1ubuntu1~22.04' --with-bugurl=file:///usr/share/doc/gcc-11/README.Bugs
--enable-languages=c,ada,c++,go,brig,d,fortran,objc,obj-c++,m2 --prefix=/usr
--with-gcc-major-version-only --program-suffix=-11
--program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id
--libexecdir=/usr/lib --without-included-gettext --enable-threads=posix
--libdir=/usr/lib --enable-nls --enable-bootstrap --enable-clocale=gnu
--enable-libstdcxx-debug --enable-libstdcxx-time=yes
--with-default-libstdcxx-abi=new --enable-gnu-unique-object
--disable-vtable-verify --enable-plugin --enable-default-pie --with-system-zlib
--enable-libphobos-checking=release --with-target-system-zlib=auto
--enable-objc-gc=auto --enable-multiarch --disable-werror --enable-cet
--with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32
--enable-multilib --with-tune=generic
--enable-offload-targets=nvptx-none=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-nvptx/usr,amdgcn-amdhsa=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-gcn/usr
--without-cuda-driver --enable-checking=release --build=x86_64-linux-gnu
--host=x86_64-linux-gnu --target=x86_64-linux-gnu
--with-build-config=bootstrap-lto-lean --enable-link-serialization=2
Thread model: posix
Supported LTO compression algorithms: zlib zstd
gcc version 11.4.0 (Ubuntu 11.4.0-1ubuntu1~22.04)

$ gcc-11 -v
Using built-in specs.
COLLECT_GCC=gcc-11
COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper
OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa
OFFLOAD_TARGET_DEFAULT=1
Target: x86_64-linux-gnu
Configured with: ../src/configure -v --with-pkgversion='Debian 11.4.0-4'
--with-bugurl=file:///usr/share/doc/gcc-11/README.Bugs
--enable-languages=c,ada,c++,go,brig,d,fortran,objc,obj-c++,m2 --prefix=/usr
--with-gcc-major-version-only --program-suffix=-11
--program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id
--libexecdir=/usr/lib --without-included-gettext --enable-threads=posix
--libdir=/usr/lib --enable-nls --enable-bootstrap --enable-clocale=gnu
--enable-libstdcxx-debug --enable-libstdcxx-time=yes
--with-default-libstdcxx-abi=new --enable-gnu-unique-object
--disable-vtable-verify --enable-plugin --enable-default-pie --with-system-zlib
--enable-libphobos-checking=release --with-target-system-zlib=auto
--enable-objc-gc=auto --enable-multiarch --disable-werror --enable-cet
--with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32
--enable-multilib --with-tune=generic
--enable-offload-targets=nvptx-none=/build/gcc-11-IBEKnH/gcc-11-11.4.0/debian/tmp-nvptx/usr,amdgcn-amdhsa=/build/gcc-11-IBEKnH/gcc-11-11.4.0/debian/tmp-gcn/usr
--without-cuda-driver --enable-checking=release --build=x86_64-linux-gnu
--host=x86_64-linux-gnu --target=x86_64-linux-gnu
--with-build-config=bootstrap-lto-lean --enable-link-serialization=28
Thread model: posix
Supported LTO compression algorithms: zlib zstd
gcc version 11.4.0 (Debian 11.4.0-4)

Reply via email to