https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921
Bug ID: 113921 Summary: Output register of an "asm volatile goto" is incorrectly clobbered/discarded Product: gcc Version: 11.4.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: c Assignee: unassigned at gcc dot gnu.org Reporter: seanjc at google dot com CC: jakub at redhat dot com, ndesaulniers at google dot com, torva...@linux-foundation.org, ubizjak at gmail dot com Target Milestone: --- Created attachment 57428 --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=57428&action=edit Intermediate output of the miscompiled file gcc-11 appears to have a bug that results in gcc incorrectly clobbering the output register of an "asm volatile goto". The failing asm blob is a sequence of VMREADs in the Linux kernel, with the outputs stored into a dynamically allocated structure whose lifecycle is far beyond the scope of the code in question: vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0); vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1); vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2); vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3); where vmcs_read64() eventually becomes: asm volatile goto("1: vmread %[field], %[output]\n\t" "jna %l[do_fail]\n\t" _ASM_EXTABLE(1b, %l[do_exception]) : [output] "=r" (value) : [field] "r" (field) : "cc" : do_fail, do_exception); return value; do_fail: instrumentation_begin(); vmread_error(field); instrumentation_end(); return 0; do_exception: kvm_spurious_fault(); return 0; The first three PDPTR VMREADs generate correctly, but the fourth effectively gets ignored, and '0' is written to vmcs12->guest_pdptr3. 3597: mov $0x280a,%r13d 359d: vmread %r13,%r13 35a1: jbe 3724 <sync_vmcs02_to_vmcs12+0x7c4> 35a7: mov %r13,0xd8(%rbx) 35ae: jmp 396b <sync_vmcs02_to_vmcs12+0xa0b> 35b3: mov $0x280c,%r13d 35b9: vmread %r13,%r13 35bd: jbe 3705 <sync_vmcs02_to_vmcs12+0x7a5> 35c3: mov %r13,0xe0(%rbx) 35ca: jmp 393a <sync_vmcs02_to_vmcs12+0x9da> 35cf: mov $0x280e,%r13d 35d5: vmread %r13,%r13 35d9: jbe 36e6 <sync_vmcs02_to_vmcs12+0x786> 35df: mov %r13,0xe8(%rbx) 35e6: jmp 3909 <sync_vmcs02_to_vmcs12+0x9a9> 35eb: mov $0x2810,%eax 35f0: vmread %rax,%rax <= VMREAD to nowhere 35f3: jbe 36ca <sync_vmcs02_to_vmcs12+0x76a> 35f9: xor %r12d,%r12d <= zeroing of output 35fc: mov %r12,0xf0(%rbx) <= store to vmcs12->guest_pdptr3 Replacing "asm volatile goto" with the following macro #define asm_goto(x...) \ do { asm volatile goto(x); asm (""); } while (0) to force a second barrier generates functional code, although the attempt to miscompile the sequence is still evident, as the output of the affected VMREAD is unnecessarily bounced through an extra register: 35f8: mov $0x280a,%r13d 35fe: vmread %r13,%r13 3602: jbe 36b2 <sync_vmcs02_to_vmcs12+0x762> 3608: mov %r13,0xd8(%rbx) 360f: jmp 3925 <sync_vmcs02_to_vmcs12+0x9d5> 3614: mov $0x280c,%r13d 361a: vmread %r13,%r13 361e: jbe 3693 <sync_vmcs02_to_vmcs12+0x743> 3620: mov %r13,0xe0(%rbx) 3627: jmp 38f4 <sync_vmcs02_to_vmcs12+0x9a4> 362c: mov $0x280e,%r13d 3632: vmread %r13,%r13 3636: jbe 367a <sync_vmcs02_to_vmcs12+0x72a> 3638: mov %r13,0xe8(%rbx) 363f: jmp 38c3 <sync_vmcs02_to_vmcs12+0x973> 3644: mov $0x2810,%eax 3649: vmread %rax,%rax 364c: jbe 3664 <sync_vmcs02_to_vmcs12+0x714> 364e: mov %rax,%r12 3651: mov %r12,0xf0(%rbx) The bug reproduces with two different 11.4.0 builds, on three different systems (Intel i7-9850H, Intel i7-13700K, AMD EPYC 7B12), all running Debian-based Linux. $ gcc -v Using built-in specs. COLLECT_GCC=gcc COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa OFFLOAD_TARGET_DEFAULT=1 Target: x86_64-linux-gnu Configured with: ../src/configure -v --with-pkgversion='Ubuntu 11.4.0-1ubuntu1~22.04' --with-bugurl=file:///usr/share/doc/gcc-11/README.Bugs --enable-languages=c,ada,c++,go,brig,d,fortran,objc,obj-c++,m2 --prefix=/usr --with-gcc-major-version-only --program-suffix=-11 --program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --enable-bootstrap --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-plugin --enable-default-pie --with-system-zlib --enable-libphobos-checking=release --with-target-system-zlib=auto --enable-objc-gc=auto --enable-multiarch --disable-werror --enable-cet --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-offload-targets=nvptx-none=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-nvptx/usr,amdgcn-amdhsa=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-gcn/usr --without-cuda-driver --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu --with-build-config=bootstrap-lto-lean --enable-link-serialization=2 Thread model: posix Supported LTO compression algorithms: zlib zstd gcc version 11.4.0 (Ubuntu 11.4.0-1ubuntu1~22.04) $ gcc-11 -v Using built-in specs. COLLECT_GCC=gcc-11 COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa OFFLOAD_TARGET_DEFAULT=1 Target: x86_64-linux-gnu Configured with: ../src/configure -v --with-pkgversion='Debian 11.4.0-4' --with-bugurl=file:///usr/share/doc/gcc-11/README.Bugs --enable-languages=c,ada,c++,go,brig,d,fortran,objc,obj-c++,m2 --prefix=/usr --with-gcc-major-version-only --program-suffix=-11 --program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --enable-bootstrap --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-plugin --enable-default-pie --with-system-zlib --enable-libphobos-checking=release --with-target-system-zlib=auto --enable-objc-gc=auto --enable-multiarch --disable-werror --enable-cet --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-offload-targets=nvptx-none=/build/gcc-11-IBEKnH/gcc-11-11.4.0/debian/tmp-nvptx/usr,amdgcn-amdhsa=/build/gcc-11-IBEKnH/gcc-11-11.4.0/debian/tmp-gcn/usr --without-cuda-driver --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu --with-build-config=bootstrap-lto-lean --enable-link-serialization=28 Thread model: posix Supported LTO compression algorithms: zlib zstd gcc version 11.4.0 (Debian 11.4.0-4)