Issue 119959
Summary [llvm] cmpxchg16b uses pointer from overwritten rbx
Labels new issue
Assignees
Reporter vasama
    Reduced IR:

```ll
target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "amd64-pc-windows-msvc19.41.34123"

%struct.anon = type { [2 x %"struct.(anonymous namespace)::mt_shared_object"], %"class.vsm::atomic_intrusive_ptr", [48 x i8], %"struct.std::atomic.3", [56 x i8], %"struct.std::atomic_flag", [60 x i8] }
%"struct.(anonymous namespace)::mt_shared_object" = type { %"class.vsm::detail::basic_intrusive_refcount", %"struct.std::atomic_flag", ptr, [40 x i8] }
%"class.vsm::detail::basic_intrusive_refcount" = type { %"struct.vsm::detail::intrusive_refcount_base" }
%"struct.vsm::detail::intrusive_refcount_base" = type { %"class.vsm::atomic" }
%"class.vsm::atomic" = type { i64 }
%"class.vsm::atomic_intrusive_ptr" = type { %"class.vsm::atomic.2" }
%"class.vsm::atomic.2" = type { %"struct.vsm::atomic_intrusive_ptr<(anonymous namespace)::mt_shared_object>::atom" }
%"struct.vsm::atomic_intrusive_ptr<(anonymous namespace)::mt_shared_object>::atom" = type { ptr, i64 }
%"struct.std::atomic.3" = type { %"struct.std::_Atomic_integral_facade.4" }
%"struct.std::_Atomic_integral_facade.4" = type { %"struct.std::_Atomic_integral.5" }
%"struct.std::_Atomic_integral.5" = type { %"struct.std::_Atomic_storage.6" }
%"struct.std::_Atomic_storage.6" = type { %"struct.std::_Atomic_padded.7" }
%"struct.std::_Atomic_padded.7" = type { i64 }
%"struct.std::atomic_flag" = type { %"struct.std::atomic" }
%"struct.std::atomic" = type { %"struct.std::_Atomic_integral_facade" }
%"struct.std::_Atomic_integral_facade" = type { %"struct.std::_Atomic_integral" }
%"struct.std::_Atomic_integral" = type { %"struct.std::_Atomic_storage" }
%"struct.std::_Atomic_storage" = type { %"struct.std::_Atomic_padded" }
%"struct.std::_Atomic_padded" = type { i32 }

define fastcc void @"?test_case@?A0x7E1854EA@@YAXXZ"() #0 personality ptr @__CxxFrameHandler3 {
  %1 = alloca [0 x [0 x %struct.anon]], i32 0, align 64
  %2 = cmpxchg ptr %1, i128 0, i128 0 monotonic monotonic, align 16
  invoke void @"?_Throw_Cpp_error@std@@YAXH@Z"(i32 0)
          to label %3 unwind label %4

3:                                                ; preds = %0
 unreachable

4:                                                ; preds = %0
  %5 = cleanuppad within none []
  ret void
}

declare i32 @__CxxFrameHandler3(...)

declare void @"?_Throw_Cpp_error@std@@YAXH@Z"()

; uselistorder directives
uselistorder i32 0, { 1, 0 }

attributes #0 = { "target-cpu"="nehalem" }
```

Here is the resulting object code:
(`clang-19 -cc1 -emit-obj -triple "amd64-pc-windows-msvc19.41.34123" -O3 reduced.ll -o - | llvm-objdump-19 -M intel -d -`)
```asm
0000000000000000 <?test_case@?A0x7E1854EA@@YAXXZ>:
       0: 55 push    rbp
       1: 53 push    rbx
       2: 48 83 ec 68                   sub     rsp, 0x68
 6: 48 8d 6c 24 60                lea     rbp, [rsp + 0x60]
       b: 48 83 e4 c0                   and     rsp, -0x40
       f: 48 89 e3 mov     rbx, rsp
      12: 48 89 6b 58                   mov qword ptr [rbx + 0x58], rbp
      16: 48 c7 45 00 fe ff ff ff       mov qword ptr [rbp], -0x2
      1e: 49 89 d8                      mov     r8, rbx
      21: 45 31 c9                      xor     r9d, r9d
      24: 31 c0                         xor     eax, eax
      26: 31 d2 xor     edx, edx
      28: 31 c9                         xor     ecx, ecx
      2a: 4c 89 cb                      mov     rbx, r9
      2d: f0 lock
      2e: 48 0f c7 4b 40 cmpxchg16b      xmmword ptr [rbx + 0x40]
      33: 4c 89 c3 mov     rbx, r8
      36: 31 c9                         xor     ecx, ecx
      38: e8 00 00 00 00                call    0x3d <?test_case@?A0x7E1854EA@@YAXXZ+0x3d>
      3d: cc int3
      3e: 66 90                         nop
```

Note `mov     rbx, r9` followed by `cmpxchg16b      xmmword ptr [rbx + 0x40]` where `rbx` is used after having just been overwritten for the purposes of `cmpxchg16b` which uses it as an input register.

The original unreduced input produces slightly different object code but has the same problem:
```asm
00007FF786689459  lea         r8,[rbx+100h] 
00007FF786689460  mov         rax,qword ptr [rbx+140h]  
00007FF786689467 mov         rdx,qword ptr [rbx+148h]  
00007FF78668946E  nop 
00007FF786689470  mov         r9,rbx  
00007FF786689473  xor ecx,ecx  
00007FF786689475  mov         rbx,r8  
00007FF786689478  lock cmpxchg16b oword ptr [rbx+140h]
```

_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to