This revision was automatically updated to reflect the committed changes.
Closed by commit rG864a2b25beac: [AMDGPU] Reserve extra SGPR blocks wth XNACK
"any" TID Setting (authored by kerbowa).
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D145401/new/
https://reviews.llvm.org/D145401
Files:
clang/test/Frontend/amdgcn-machine-analysis-remarks.cl
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
llvm/test/CodeGen/AMDGPU/occupancy-levels.ll
llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
llvm/test/CodeGen/AMDGPU/tid-kd-xnack-any.ll
llvm/test/CodeGen/AMDGPU/tid-kd-xnack-off.ll
llvm/test/CodeGen/AMDGPU/tid-kd-xnack-on.ll
llvm/test/CodeGen/AMDGPU/trap-abis.ll
Index: llvm/test/CodeGen/AMDGPU/trap-abis.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/trap-abis.ll
+++ llvm/test/CodeGen/AMDGPU/trap-abis.ll
@@ -17,7 +17,75 @@
define amdgpu_kernel void @trap(ptr addrspace(1) nocapture readonly %arg0) {
; NOHSA-TRAP-GFX900-V2-LABEL: trap:
-; NOHSA-TRAP-GFX900-V2: ; %bb.0:
+; NOHSA-TRAP-GFX900-V2: .amd_kernel_code_t
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_code_version_major = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_code_version_minor = 2
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_kind = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_version_major = 9
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_version_minor = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_version_stepping = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256
+; NOHSA-TRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: priority = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: float_mode = 240
+; NOHSA-TRAP-GFX900-V2-NEXT: priv = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_dx10_clamp = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: debug_mode = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_ieee_mode = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_wgp_mode = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_mem_ordered = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_fwd_progress = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: user_sgpr_count = 4
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_trap_handler = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_x = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_y = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_z = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_info = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_vgpr_workitem_id = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_exception_msb = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: granulated_lds_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_exception = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_buffer = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_ptr = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_queue_ptr = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_id = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_flat_scratch_init = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_wavefront_size32 = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_ordered_append_gds = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: private_element_size = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: is_ptr64 = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: is_dynamic_callstack = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: is_debug_enabled = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: is_xnack_enabled = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: workitem_private_segment_byte_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: workgroup_group_segment_byte_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 44
+; NOHSA-TRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 8
+; NOHSA-TRAP-GFX900-V2-NEXT: workitem_vgpr_count = 2
+; NOHSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_first = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_count = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: debug_private_segment_buffer_sgpr = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: kernarg_segment_alignment = 4
+; NOHSA-TRAP-GFX900-V2-NEXT: group_segment_alignment = 4
+; NOHSA-TRAP-GFX900-V2-NEXT: private_segment_alignment = 4
+; NOHSA-TRAP-GFX900-V2-NEXT: wavefront_size = 6
+; NOHSA-TRAP-GFX900-V2-NEXT: call_convention = -1
+; NOHSA-TRAP-GFX900-V2-NEXT: runtime_loader_kernel_symbol = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: .end_amd_kernel_code_t
+; NOHSA-TRAP-GFX900-V2-NEXT: ; %bb.0:
; NOHSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0
; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 1
@@ -161,7 +229,7 @@
; HSA-TRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256
; HSA-TRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0
; HSA-TRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0
-; HSA-TRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 0
+; HSA-TRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 1
; HSA-TRAP-GFX900-V2-NEXT: priority = 0
; HSA-TRAP-GFX900-V2-NEXT: float_mode = 240
; HSA-TRAP-GFX900-V2-NEXT: priv = 0
@@ -204,7 +272,7 @@
; HSA-TRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0
; HSA-TRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8
; HSA-TRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0
-; HSA-TRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 8
+; HSA-TRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 12
; HSA-TRAP-GFX900-V2-NEXT: workitem_vgpr_count = 2
; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0
; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0
@@ -261,7 +329,7 @@
; HSA-NOTRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256
; HSA-NOTRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0
; HSA-NOTRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0
-; HSA-NOTRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 1
; HSA-NOTRAP-GFX900-V2-NEXT: priority = 0
; HSA-NOTRAP-GFX900-V2-NEXT: float_mode = 240
; HSA-NOTRAP-GFX900-V2-NEXT: priv = 0
@@ -304,7 +372,7 @@
; HSA-NOTRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0
; HSA-NOTRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8
; HSA-NOTRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0
-; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 8
+; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 12
; HSA-NOTRAP-GFX900-V2-NEXT: workitem_vgpr_count = 2
; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0
; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0
@@ -356,7 +424,75 @@
define amdgpu_kernel void @non_entry_trap(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr {
; NOHSA-TRAP-GFX900-V2-LABEL: non_entry_trap:
-; NOHSA-TRAP-GFX900-V2: ; %bb.0: ; %entry
+; NOHSA-TRAP-GFX900-V2: .amd_kernel_code_t
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_code_version_major = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_code_version_minor = 2
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_kind = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_version_major = 9
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_version_minor = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_version_stepping = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256
+; NOHSA-TRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: priority = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: float_mode = 240
+; NOHSA-TRAP-GFX900-V2-NEXT: priv = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_dx10_clamp = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: debug_mode = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_ieee_mode = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_wgp_mode = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_mem_ordered = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_fwd_progress = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: user_sgpr_count = 4
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_trap_handler = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_x = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_y = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_z = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_info = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_vgpr_workitem_id = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_exception_msb = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: granulated_lds_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_exception = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_buffer = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_ptr = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_queue_ptr = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_id = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_flat_scratch_init = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_wavefront_size32 = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_ordered_append_gds = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: private_element_size = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: is_ptr64 = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: is_dynamic_callstack = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: is_debug_enabled = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: is_xnack_enabled = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: workitem_private_segment_byte_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: workgroup_group_segment_byte_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 44
+; NOHSA-TRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 8
+; NOHSA-TRAP-GFX900-V2-NEXT: workitem_vgpr_count = 2
+; NOHSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_first = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_count = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: debug_private_segment_buffer_sgpr = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: kernarg_segment_alignment = 4
+; NOHSA-TRAP-GFX900-V2-NEXT: group_segment_alignment = 4
+; NOHSA-TRAP-GFX900-V2-NEXT: private_segment_alignment = 4
+; NOHSA-TRAP-GFX900-V2-NEXT: wavefront_size = 6
+; NOHSA-TRAP-GFX900-V2-NEXT: call_convention = -1
+; NOHSA-TRAP-GFX900-V2-NEXT: runtime_loader_kernel_symbol = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: .end_amd_kernel_code_t
+; NOHSA-TRAP-GFX900-V2-NEXT: ; %bb.0: ; %entry
; NOHSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0
; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0)
@@ -591,7 +727,7 @@
; HSA-TRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0
; HSA-TRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8
; HSA-TRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0
-; HSA-TRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 10
+; HSA-TRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 12
; HSA-TRAP-GFX900-V2-NEXT: workitem_vgpr_count = 2
; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0
; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0
@@ -712,7 +848,7 @@
; HSA-NOTRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0
; HSA-NOTRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8
; HSA-NOTRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0
-; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 10
+; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 12
; HSA-NOTRAP-GFX900-V2-NEXT: workitem_vgpr_count = 2
; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0
; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0
@@ -792,7 +928,75 @@
define amdgpu_kernel void @debugtrap(ptr addrspace(1) nocapture readonly %arg0) {
; NOHSA-TRAP-GFX900-V2-LABEL: debugtrap:
-; NOHSA-TRAP-GFX900-V2: ; %bb.0:
+; NOHSA-TRAP-GFX900-V2: .amd_kernel_code_t
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_code_version_major = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_code_version_minor = 2
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_kind = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_version_major = 9
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_version_minor = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_version_stepping = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256
+; NOHSA-TRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: priority = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: float_mode = 240
+; NOHSA-TRAP-GFX900-V2-NEXT: priv = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_dx10_clamp = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: debug_mode = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_ieee_mode = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_wgp_mode = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_mem_ordered = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_fwd_progress = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: user_sgpr_count = 2
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_trap_handler = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_x = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_y = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_z = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_info = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_vgpr_workitem_id = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_exception_msb = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: granulated_lds_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_exception = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_buffer = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_ptr = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_queue_ptr = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_id = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_flat_scratch_init = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_wavefront_size32 = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_ordered_append_gds = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: private_element_size = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: is_ptr64 = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: is_dynamic_callstack = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: is_debug_enabled = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: is_xnack_enabled = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: workitem_private_segment_byte_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: workgroup_group_segment_byte_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 44
+; NOHSA-TRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 6
+; NOHSA-TRAP-GFX900-V2-NEXT: workitem_vgpr_count = 3
+; NOHSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_first = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_count = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: debug_private_segment_buffer_sgpr = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: kernarg_segment_alignment = 4
+; NOHSA-TRAP-GFX900-V2-NEXT: group_segment_alignment = 4
+; NOHSA-TRAP-GFX900-V2-NEXT: private_segment_alignment = 4
+; NOHSA-TRAP-GFX900-V2-NEXT: wavefront_size = 6
+; NOHSA-TRAP-GFX900-V2-NEXT: call_convention = -1
+; NOHSA-TRAP-GFX900-V2-NEXT: runtime_loader_kernel_symbol = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: .end_amd_kernel_code_t
+; NOHSA-TRAP-GFX900-V2-NEXT: ; %bb.0:
; NOHSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0
; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 1
@@ -954,7 +1158,7 @@
; HSA-TRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256
; HSA-TRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0
; HSA-TRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0
-; HSA-TRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 0
+; HSA-TRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 1
; HSA-TRAP-GFX900-V2-NEXT: priority = 0
; HSA-TRAP-GFX900-V2-NEXT: float_mode = 240
; HSA-TRAP-GFX900-V2-NEXT: priv = 0
@@ -997,7 +1201,7 @@
; HSA-TRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0
; HSA-TRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8
; HSA-TRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0
-; HSA-TRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 6
+; HSA-TRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 10
; HSA-TRAP-GFX900-V2-NEXT: workitem_vgpr_count = 3
; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0
; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0
@@ -1064,7 +1268,7 @@
; HSA-NOTRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256
; HSA-NOTRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0
; HSA-NOTRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0
-; HSA-NOTRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 1
; HSA-NOTRAP-GFX900-V2-NEXT: priority = 0
; HSA-NOTRAP-GFX900-V2-NEXT: float_mode = 240
; HSA-NOTRAP-GFX900-V2-NEXT: priv = 0
@@ -1107,7 +1311,7 @@
; HSA-NOTRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0
; HSA-NOTRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8
; HSA-NOTRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0
-; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 6
+; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 10
; HSA-NOTRAP-GFX900-V2-NEXT: workitem_vgpr_count = 3
; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0
; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0
Index: llvm/test/CodeGen/AMDGPU/tid-kd-xnack-on.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/tid-kd-xnack-on.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack < %s | FileCheck --check-prefixes=ASM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack --filetype=obj < %s | llvm-objdump -s -j .rodata - | FileCheck --check-prefixes=OBJ %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack --filetype=obj < %s | llvm-readelf --notes - | FileCheck --check-prefixes=ELF %s
+
+; TODO: Update to check for granulated sgpr count directive once one is added.
+
+define amdgpu_kernel void @kern() {
+; ASM-LABEL: kern:
+; ASM: .amdhsa_next_free_sgpr 5
+; ASM: .amdhsa_reserve_xnack_mask 1
+
+; Verify that an extra SGPR block is reserved with XNACK "on" tid setting.
+; OBJ: Contents of section .rodata:
+; OBJ-NEXT: 0000 00000000 00000000 00000000 00000000 ................
+; OBJ-NEXT: 0010 00000000 00000000 00000000 00000000 ................
+; OBJ-NEXT: 0020 00000000 00000000 00000000 00000000 ................
+; OBJ-NEXT: 0030 4000af00 88000000 01000000 00000000 @...............
+
+; ELF: AMDGPU Metadata
+; ELF: .sgpr_count: 9
+entry:
+ tail call void asm sideeffect "", "~{s[0:4]}"()
+ ret void
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
Index: llvm/test/CodeGen/AMDGPU/tid-kd-xnack-off.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/tid-kd-xnack-off.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=-xnack < %s | FileCheck --check-prefixes=ASM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=-xnack --filetype=obj < %s | llvm-objdump -s -j .rodata - | FileCheck --check-prefixes=OBJ %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=-xnack --filetype=obj < %s | llvm-readelf --notes - | FileCheck --check-prefixes=ELF %s
+
+; TODO: Update to check for granulated sgpr count directive once one is added.
+
+define amdgpu_kernel void @kern() {
+; ASM-LABEL: kern:
+; ASM: .amdhsa_next_free_sgpr 5
+; ASM: .amdhsa_reserve_xnack_mask 0
+
+; Verify that an extra SGPR block is not reserved with XNACK "off" tid setting.
+; OBJ: Contents of section .rodata:
+; OBJ-NEXT: 0000 00000000 00000000 00000000 00000000 ................
+; OBJ-NEXT: 0010 00000000 00000000 00000000 00000000 ................
+; OBJ-NEXT: 0020 00000000 00000000 00000000 00000000 ................
+; OBJ-NEXT: 0030 0000af00 88000000 01000000 00000000 ................
+
+; ELF: AMDGPU Metadata
+; ELF: .sgpr_count: 5
+entry:
+ tail call void asm sideeffect "", "~{s[0:4]}"()
+ ret void
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
Index: llvm/test/CodeGen/AMDGPU/tid-kd-xnack-any.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/tid-kd-xnack-any.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck --check-prefixes=ASM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --filetype=obj < %s | llvm-objdump -s -j .rodata - | FileCheck --check-prefixes=OBJ %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --filetype=obj < %s | llvm-readelf --notes - | FileCheck --check-prefixes=ELF %s
+
+; TODO: Update to check for granulated sgpr count directive once one is added.
+
+define amdgpu_kernel void @kern() {
+; ASM-LABEL: kern:
+; ASM: .amdhsa_next_free_sgpr 5
+; ASM: .amdhsa_reserve_xnack_mask 1
+
+; Verify that an extra SGPR block is reserved with XNACK "any" tid setting.
+; OBJ: Contents of section .rodata:
+; OBJ-NEXT: 0000 00000000 00000000 00000000 00000000 ................
+; OBJ-NEXT: 0010 00000000 00000000 00000000 00000000 ................
+; OBJ-NEXT: 0020 00000000 00000000 00000000 00000000 ................
+; OBJ-NEXT: 0030 4000af00 88000000 01000000 00000000 @...............
+
+; ELF: AMDGPU Metadata
+; ELF: .sgpr_count: 9
+entry:
+ tail call void asm sideeffect "", "~{s[0:4]}"()
+ ret void
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
Index: llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
+++ llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
@@ -2,7 +2,7 @@
; RUN: FileCheck -check-prefix=REMARK %s < %t
; STDERR: remark: foo.cl:27:0: Function Name: test_kernel
-; STDERR-NEXT: remark: foo.cl:27:0: SGPRs: 24
+; STDERR-NEXT: remark: foo.cl:27:0: SGPRs: 28
; STDERR-NEXT: remark: foo.cl:27:0: VGPRs: 9
; STDERR-NEXT: remark: foo.cl:27:0: AGPRs: 43
; STDERR-NEXT: remark: foo.cl:27:0: ScratchSize [bytes/lane]: 0
@@ -27,7 +27,7 @@
; REMARK-NEXT: Function: test_kernel
; REMARK-NEXT: Args:
; REMARK-NEXT: - String: ' SGPRs: '
-; REMARK-NEXT: - NumSGPR: '24'
+; REMARK-NEXT: - NumSGPR: '28'
; REMARK-NEXT: ...
; REMARK-NEXT: --- !Analysis
; REMARK-NEXT: Pass: kernel-resource-usage
@@ -120,7 +120,7 @@
}
; STDERR: remark: foo.cl:8:0: Function Name: empty_kernel
-; STDERR-NEXT: remark: foo.cl:8:0: SGPRs: 0
+; STDERR-NEXT: remark: foo.cl:8:0: SGPRs: 4
; STDERR-NEXT: remark: foo.cl:8:0: VGPRs: 0
; STDERR-NEXT: remark: foo.cl:8:0: AGPRs: 0
; STDERR-NEXT: remark: foo.cl:8:0: ScratchSize [bytes/lane]: 0
Index: llvm/test/CodeGen/AMDGPU/occupancy-levels.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/occupancy-levels.ll
+++ llvm/test/CodeGen/AMDGPU/occupancy-levels.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck --check-prefixes=GCN,GFX9 %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10W32,GFX1010,GFX1010W32 %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10W64,GFX1010,GFX1010W64 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-xnack < %s | FileCheck --check-prefixes=GCN,GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-xnack < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10W32,GFX1010,GFX1010W32 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-xnack -mattr=+wavefrontsize64 < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10W64,GFX1010,GFX1010W64 %s
; RUN: llc -march=amdgcn -mcpu=gfx1030 < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10W32,GFX1030,GFX1030W32 %s
; RUN: llc -march=amdgcn -mcpu=gfx1030 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10W64,GFX1030,GFX1030W64 %s
; RUN: llc -march=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GCN,GFX1100,GFX1100W32 %s
Index: llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
+++ llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
@@ -1,6 +1,6 @@
-; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX8 -enable-var-scope %s
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX9 -enable-var-scope %s
-; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL,GFX9 -enable-var-scope %s
+; RUN: llc -mtriple=amdgcn--amdpal -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX8 -enable-var-scope %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX9 -enable-var-scope %s
+; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mattr=-xnack -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL,GFX9 -enable-var-scope %s
declare amdgpu_gfx float @extern_func(float) #0
declare amdgpu_gfx float @extern_func_many_args(<64 x float>) #0
Index: llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
@@ -3061,7 +3061,7 @@
; GPRIDX-NEXT: gds_segment_byte_size = 0
; GPRIDX-NEXT: kernarg_segment_byte_size = 12
; GPRIDX-NEXT: workgroup_fbarrier_count = 0
-; GPRIDX-NEXT: wavefront_sgpr_count = 9
+; GPRIDX-NEXT: wavefront_sgpr_count = 13
; GPRIDX-NEXT: workitem_vgpr_count = 3
; GPRIDX-NEXT: reserved_vgpr_first = 0
; GPRIDX-NEXT: reserved_vgpr_count = 0
@@ -3913,7 +3913,7 @@
; GPRIDX-NEXT: kernel_code_entry_byte_offset = 256
; GPRIDX-NEXT: kernel_code_prefetch_byte_size = 0
; GPRIDX-NEXT: granulated_workitem_vgpr_count = 0
-; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 0
+; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 1
; GPRIDX-NEXT: priority = 0
; GPRIDX-NEXT: float_mode = 240
; GPRIDX-NEXT: priv = 0
@@ -3956,7 +3956,7 @@
; GPRIDX-NEXT: gds_segment_byte_size = 0
; GPRIDX-NEXT: kernarg_segment_byte_size = 12
; GPRIDX-NEXT: workgroup_fbarrier_count = 0
-; GPRIDX-NEXT: wavefront_sgpr_count = 6
+; GPRIDX-NEXT: wavefront_sgpr_count = 10
; GPRIDX-NEXT: workitem_vgpr_count = 2
; GPRIDX-NEXT: reserved_vgpr_first = 0
; GPRIDX-NEXT: reserved_vgpr_count = 0
@@ -4259,7 +4259,7 @@
; GPRIDX-NEXT: kernel_code_entry_byte_offset = 256
; GPRIDX-NEXT: kernel_code_prefetch_byte_size = 0
; GPRIDX-NEXT: granulated_workitem_vgpr_count = 0
-; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 0
+; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 1
; GPRIDX-NEXT: priority = 0
; GPRIDX-NEXT: float_mode = 240
; GPRIDX-NEXT: priv = 0
@@ -4302,7 +4302,7 @@
; GPRIDX-NEXT: gds_segment_byte_size = 0
; GPRIDX-NEXT: kernarg_segment_byte_size = 12
; GPRIDX-NEXT: workgroup_fbarrier_count = 0
-; GPRIDX-NEXT: wavefront_sgpr_count = 7
+; GPRIDX-NEXT: wavefront_sgpr_count = 11
; GPRIDX-NEXT: workitem_vgpr_count = 3
; GPRIDX-NEXT: reserved_vgpr_first = 0
; GPRIDX-NEXT: reserved_vgpr_count = 0
Index: llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -251,9 +251,9 @@
STM, KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
CurrentProgramInfo.NumVGPRsForWavesPerEU,
CurrentProgramInfo.NumSGPRsForWavesPerEU -
- IsaInfo::getNumExtraSGPRs(&STM,
- CurrentProgramInfo.VCCUsed,
- CurrentProgramInfo.FlatUsed),
+ IsaInfo::getNumExtraSGPRs(
+ &STM, CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
+ getTargetStreamer()->getTargetID()->isXnackOnOrAny()),
CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
CodeObjectVersion);
@@ -721,7 +721,8 @@
// duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
// unified.
unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
- &STM, ProgInfo.VCCUsed, ProgInfo.FlatUsed);
+ &STM, ProgInfo.VCCUsed, ProgInfo.FlatUsed,
+ getTargetStreamer()->getTargetID()->isXnackOnOrAny());
// Check the addressable register limit before we add ExtraSGPRs.
if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
Index: clang/test/Frontend/amdgcn-machine-analysis-remarks.cl
===================================================================
--- clang/test/Frontend/amdgcn-machine-analysis-remarks.cl
+++ clang/test/Frontend/amdgcn-machine-analysis-remarks.cl
@@ -2,7 +2,7 @@
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx908 -Rpass-analysis=kernel-resource-usage -S -O0 -verify %s -o /dev/null
// expected-remark@+9 {{Function Name: foo}}
-// expected-remark@+8 {{ SGPRs: 9}}
+// expected-remark@+8 {{ SGPRs: 13}}
// expected-remark@+7 {{ VGPRs: 10}}
// expected-remark@+6 {{ AGPRs: 12}}
// expected-remark@+5 {{ ScratchSize [bytes/lane]: 0}}
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits