Issue |
130443
|
Summary |
[AMDGPU] Illegal VGPR to SGPR Copy When Argument Passing Has SGPR to VGPR Spill
|
Labels |
backend:AMDGPU
|
Assignees |
shiltian
|
Reporter |
shiltian
|
This is a follow up of https://github.com/llvm/llvm-project/issues/113782. The same input IR now has illegal VGPR to SGPR copy.
```
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 %s -o -
declare hidden void @external_void_func_a15i32_inreg([16 x i32] inreg)
define void @test_call_external_void_func_a15i32_inreg([16 x i32] inreg %arg0) {
call void @external_void_func_a15i32_inreg([16 x i32] inreg %arg0)
ret void
}
```
<details>
<summary>Error output</summary>
```
error: <unknown>:0:0: in function test_call_external_void_func_a15i32_inreg void ([16 x i32]): illegal VGPR to SGPR copy
error: <unknown>:0:0: in function test_call_external_void_func_a15i32_inreg void ([16 x i32]): illegal VGPR to SGPR copy
.amdgcn_target "amdgcn-amd-amdhsa--gfx900"
.amdhsa_code_object_version 5
.text
.globl test_call_external_void_func_a15i32_inreg ; -- Begin function test_call_external_void_func_a15i32_inreg
.p2align 2
.type test_call_external_void_func_a15i32_inreg,@function
test_call_external_void_func_a15i32_inreg: ; @test_call_external_void_func_a15i32_inreg
; %bb.0:
s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
s_mov_b32 s40, s33
s_mov_b32 s33, s32
s_or_saveexec_b64 s[42:43], -1
buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
s_mov_b64 exec, s[42:43]
v_writelane_b32 v40, s40, 2
s_addk_i32 s32, 0x400
v_writelane_b32 v40, s30, 0
v_writelane_b32 v40, s31, 1
s_getpc_b64 s[40:41]
s_add_u32 s40, s40, external_void_func_a15i32_inreg@rel32@lo+4
s_addc_u32 s41, s41, external_void_func_a15i32_inreg@rel32@hi+12
s_mov_b32 s3, s19
s_mov_b32 s2, s18
s_mov_b32 s1, s17
s_mov_b32 s0, s16
s_mov_b32 s16, s20
s_mov_b32 s17, s21
s_mov_b32 s18, s22
s_mov_b32 s19, s23
s_mov_b32 s20, s24
s_mov_b32 s21, s25
s_mov_b32 s22, s26
s_mov_b32 s23, s27
s_mov_b32 s24, s28
s_mov_b32 s25, s29
; illegal copy v0 to s26
; illegal copy v1 to s27
s_swappc_b64 s[30:31], s[40:41]
v_readlane_b32 s31, v40, 1
v_readlane_b32 s30, v40, 0
s_mov_b32 s32, s33
v_readlane_b32 s4, v40, 2
s_or_saveexec_b64 s[6:7], -1
buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
s_mov_b64 exec, s[6:7]
s_mov_b32 s33, s4
s_waitcnt vmcnt(0)
s_setpc_b64 s[30:31]
.Lfunc_end0:
.size test_call_external_void_func_a15i32_inreg, .Lfunc_end0-test_call_external_void_func_a15i32_inreg
; -- End function
.set test_call_external_void_func_a15i32_inreg.num_vgpr, max(41, amdgpu.max_num_vgpr)
.set test_call_external_void_func_a15i32_inreg.num_agpr, max(0, amdgpu.max_num_agpr)
.set test_call_external_void_func_a15i32_inreg.numbered_sgpr, max(44, amdgpu.max_num_sgpr)
.set test_call_external_void_func_a15i32_inreg.private_seg_size, 16
.set test_call_external_void_func_a15i32_inreg.uses_vcc, 1
.set test_call_external_void_func_a15i32_inreg.uses_flat_scratch, 1
.set test_call_external_void_func_a15i32_inreg.has_dyn_sized_stack, 1
.set test_call_external_void_func_a15i32_inreg.has_recursion, 1
.set test_call_external_void_func_a15i32_inreg.has_indirect_call, 1
.section .AMDGPU.csdata,"",@progbits
; Function info:
; codeLenInByte = 192
; TotalNumSgprs: test_call_external_void_func_a15i32_inreg.numbered_sgpr+6
; NumVgprs: max(41, amdgpu.max_num_vgpr)
; ScratchSize: 16
; MemoryBound: 0
.section .AMDGPU.gpr_maximums,"",@progbits
.set amdgpu.max_num_vgpr, 41
.set amdgpu.max_num_agpr, 0
.set amdgpu.max_num_sgpr, 44
.section .AMDGPU.csdata,"",@progbits
.hidden external_void_func_a15i32_inreg
.section ".note.GNU-stack","",@progbits
.amdgpu_metadata
---
amdhsa.kernels: []
amdhsa.target: amdgcn-amd-amdhsa--gfx900
amdhsa.version:
- 1
- 2
...
.end_amdgpu_metadata
```
</details>
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs