https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/156695
>From 22f1911b5b5f7a25d5d9cb74feb864341ef9a782 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Wed, 3 Sep 2025 22:33:01 +0900 Subject: [PATCH] AMDGPU: Define agpr versions of ds permute instructions Correctly model these without AV_* operands. This is another step towards removing the special casing in TargetInstrInfo::getRegClass. Also add some tests for this. --- llvm/lib/Target/AMDGPU/DSInstructions.td | 21 +- llvm/test/CodeGen/AMDGPU/ds_permute_a_v.ll | 334 +++++++++++++++++++++ llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s | 28 +- 3 files changed, 365 insertions(+), 18 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/ds_permute_a_v.ll diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index 23dd660c3e57e..bec920380e081 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -520,6 +520,19 @@ class DS_1A1D_PERMUTE <string opName, SDPatternOperator node = null_frag, let has_gds = 0; } +multiclass DS_1A1D_PERMUTE_mc <string opName, SDPatternOperator node = null_frag, + RegisterOperand data_op = VGPROp_32> { + assert OperandIsVGPR<data_op>.ret, + "DS with 2 data operands should be declared with VGPRs"; + def "" : DS_1A1D_PERMUTE<opName, node, data_op>; + + let SubtargetPredicate = isGFX90APlus in { + def _agpr : DS_1A1D_PERMUTE<opName, null_frag, + getEquivalentAGPROperand<data_op>.ret>; + } +} + + class DSAtomicRetPat<DS_Pseudo inst, ValueType vt, PatFrag frag, int complexity = 0, bit gds=0> : GCNPat <(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value), (inst $ptr, getVregSrcForVT<vt>.ret:$value, Offset:$offset, (i1 gds))> { @@ -837,10 +850,10 @@ def DS_NOP : DS_VOID<"ds_nop">; let SubtargetPredicate = isGFX8Plus in { let Uses = [EXEC] in { -def DS_PERMUTE_B32 : DS_1A1D_PERMUTE <"ds_permute_b32", - int_amdgcn_ds_permute>; -def DS_BPERMUTE_B32 : DS_1A1D_PERMUTE <"ds_bpermute_b32", - int_amdgcn_ds_bpermute>; +defm DS_PERMUTE_B32 : DS_1A1D_PERMUTE_mc<"ds_permute_b32", + int_amdgcn_ds_permute>; +defm DS_BPERMUTE_B32 : DS_1A1D_PERMUTE_mc<"ds_bpermute_b32", + int_amdgcn_ds_bpermute>; } } // let SubtargetPredicate = isGFX8Plus diff --git a/llvm/test/CodeGen/AMDGPU/ds_permute_a_v.ll b/llvm/test/CodeGen/AMDGPU/ds_permute_a_v.ll new file mode 100644 index 0000000000000..5cd798d4f6db1 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/ds_permute_a_v.ll @@ -0,0 +1,334 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck %s + +; Try to stress ds.bpermute and ds.permute instructions with AGPR/AV +; inputs. It's not permissible to mix AGPRs and VGPR data operands. + +define void @ds_bpermute_b32_a_a__use_a(ptr addrspace(3) %lds) #0 { +; CHECK-LABEL: ds_bpermute_b32_a_a__use_a: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def a0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def a1 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_accvgpr_read_b32 v0, a0 +; CHECK-NEXT: v_accvgpr_read_b32 v1, a1 +; CHECK-NEXT: ds_bpermute_b32 v0, v0, v1 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_accvgpr_write_b32 a0, v0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use a0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_setpc_b64 s[30:31] + %op0 = call i32 asm "; def $0", "=a"() + %op1 = call i32 asm "; def $0", "=a"() + %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %op0, i32 %op1) + call void asm sideeffect "; use $0", "a"(i32 %bpermute) + ret void +} + +define void @ds_bpermute_b32_v_a__use_a(ptr addrspace(3) %lds) #0 { +; CHECK-LABEL: ds_bpermute_b32_v_a__use_a: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def a0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_accvgpr_read_b32 v1, a0 +; CHECK-NEXT: ds_bpermute_b32 v0, v0, v1 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_accvgpr_write_b32 a0, v0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use a0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_setpc_b64 s[30:31] + %op0 = call i32 asm "; def $0", "=v"() + %op1 = call i32 asm "; def $0", "=a"() + %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %op0, i32 %op1) + call void asm sideeffect "; use $0", "a"(i32 %bpermute) + ret void +} + +define void @ds_bpermute_b32_a_v__use_a(ptr addrspace(3) %lds) #0 { +; CHECK-LABEL: ds_bpermute_b32_a_v__use_a: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def a0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_accvgpr_read_b32 v1, a0 +; CHECK-NEXT: ds_bpermute_b32 v0, v1, v0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_accvgpr_write_b32 a0, v0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use a0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_setpc_b64 s[30:31] + %op0 = call i32 asm "; def $0", "=a"() + %op1 = call i32 asm "; def $0", "=v"() + %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %op0, i32 %op1) + call void asm sideeffect "; use $0", "a"(i32 %bpermute) + ret void +} + +define void @ds_bpermute_b32_a_a__use_v(ptr addrspace(3) %lds) #0 { +; CHECK-LABEL: ds_bpermute_b32_a_a__use_v: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def a0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def a1 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_accvgpr_read_b32 v0, a0 +; CHECK-NEXT: v_accvgpr_read_b32 v1, a1 +; CHECK-NEXT: ds_bpermute_b32 v0, v0, v1 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_setpc_b64 s[30:31] + %op0 = call i32 asm "; def $0", "=a"() + %op1 = call i32 asm "; def $0", "=a"() + %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %op0, i32 %op1) + call void asm sideeffect "; use $0", "v"(i32 %bpermute) + ret void +} + +define void @ds_bpermute_b32_v_v__use_a(ptr addrspace(3) %lds) #0 { +; CHECK-LABEL: ds_bpermute_b32_v_v__use_a: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def v1 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ds_bpermute_b32 v0, v0, v1 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_accvgpr_write_b32 a0, v0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use a0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_setpc_b64 s[30:31] + %op0 = call i32 asm "; def $0", "=v"() + %op1 = call i32 asm "; def $0", "=v"() + %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %op0, i32 %op1) + call void asm sideeffect "; use $0", "a"(i32 %bpermute) + ret void +} + +define void @ds_bpermute_b32_av_av__use_av(ptr addrspace(3) %lds) #0 { +; CHECK-LABEL: ds_bpermute_b32_av_av__use_av: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def v1 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ds_bpermute_b32 v0, v0, v1 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_setpc_b64 s[30:31] + %op0 = call i32 asm "; def $0", "=^VA"() + %op1 = call i32 asm "; def $0", "=^VA"() + %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %op0, i32 %op1) + call void asm sideeffect "; use $0", "^VA"(i32 %bpermute) + ret void +} + +define i32 @ds_bpermute_b32_av_av_no_vgprs(ptr addrspace(3) %lds) #0 { +; CHECK-LABEL: ds_bpermute_b32_av_av_no_vgprs: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_accvgpr_write_b32 a2, v40 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_write_b32 a3, v41 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_write_b32 a4, v42 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_write_b32 a5, v43 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_write_b32 a6, v44 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_write_b32 a7, v45 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_write_b32 a8, v46 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_write_b32 a9, v47 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_write_b32 a10, v56 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_write_b32 a11, v57 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_write_b32 a12, v58 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_write_b32 a13, v59 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_write_b32 a14, v60 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_write_b32 a15, v61 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_write_b32 a16, v62 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_write_b32 a17, v63 ; Reload Reuse +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def a0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def a1 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def v[0:31] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[0:31] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_accvgpr_read_b32 v0, a0 +; CHECK-NEXT: v_accvgpr_read_b32 v1, a1 +; CHECK-NEXT: ds_bpermute_b32 v0, v0, v1 +; CHECK-NEXT: v_accvgpr_read_b32 v63, a17 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_read_b32 v62, a16 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_read_b32 v61, a15 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_read_b32 v60, a14 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_read_b32 v59, a13 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_read_b32 v58, a12 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_read_b32 v57, a11 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_read_b32 v56, a10 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_read_b32 v47, a9 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_read_b32 v46, a8 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_read_b32 v45, a7 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_read_b32 v44, a6 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_read_b32 v43, a5 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_read_b32 v42, a4 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_read_b32 v41, a3 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_read_b32 v40, a2 ; Reload Reuse +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %lds, i32 0, i32 10 + %gep.1 = getelementptr inbounds [512 x i32], ptr addrspace(3) %lds, i32 0, i32 24 + %op0 = call i32 asm sideeffect "; def $0", "=^VA"() + %op1 = call i32 asm sideeffect "; def $0", "=^VA"() + %vgpr.def = call { <32 x i32>, <32 x i32> } asm sideeffect "; def $0", "=${v[0:31]},=${v[32:63]}"() + %vgpr.0 = extractvalue { <32 x i32>, <32 x i32> } %vgpr.def, 0 + %vgpr.1 = extractvalue { <32 x i32>, <32 x i32> } %vgpr.def, 1 + %permute = call i32 @llvm.amdgcn.ds.bpermute(i32 %op0, i32 %op1) + call void asm sideeffect "; use $0", "{v[0:31]},{v[32:63]}"(<32 x i32> %vgpr.0, <32 x i32> %vgpr.1) + ret i32 %permute +} + +define void @ds_permute_b32_a_a__use_a(ptr addrspace(3) %lds) #0 { +; CHECK-LABEL: ds_permute_b32_a_a__use_a: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def a0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def a1 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_accvgpr_read_b32 v0, a0 +; CHECK-NEXT: v_accvgpr_read_b32 v1, a1 +; CHECK-NEXT: ds_permute_b32 v0, v0, v1 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_accvgpr_write_b32 a0, v0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use a0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_setpc_b64 s[30:31] + %op0 = call i32 asm "; def $0", "=a"() + %op1 = call i32 asm "; def $0", "=a"() + %permute = call i32 @llvm.amdgcn.ds.permute(i32 %op0, i32 %op1) + call void asm sideeffect "; use $0", "a"(i32 %permute) + ret void +} + +define void @ds_permute_b32_av_av__use_av(ptr addrspace(3) %lds) #0 { +; CHECK-LABEL: ds_permute_b32_av_av__use_av: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def v1 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ds_permute_b32 v0, v0, v1 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_setpc_b64 s[30:31] + %op0 = call i32 asm "; def $0", "=^VA"() + %op1 = call i32 asm "; def $0", "=^VA"() + %permute = call i32 @llvm.amdgcn.ds.permute(i32 %op0, i32 %op1) + call void asm sideeffect "; use $0", "^VA"(i32 %permute) + ret void +} + +define i32 @ds_permute_b32_av_av_no_vgprs(ptr addrspace(3) %lds) #0 { +; CHECK-LABEL: ds_permute_b32_av_av_no_vgprs: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_accvgpr_write_b32 a2, v40 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_write_b32 a3, v41 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_write_b32 a4, v42 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_write_b32 a5, v43 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_write_b32 a6, v44 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_write_b32 a7, v45 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_write_b32 a8, v46 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_write_b32 a9, v47 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_write_b32 a10, v56 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_write_b32 a11, v57 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_write_b32 a12, v58 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_write_b32 a13, v59 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_write_b32 a14, v60 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_write_b32 a15, v61 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_write_b32 a16, v62 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_write_b32 a17, v63 ; Reload Reuse +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def a0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def a1 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def v[0:31] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[0:31] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_accvgpr_read_b32 v0, a0 +; CHECK-NEXT: v_accvgpr_read_b32 v1, a1 +; CHECK-NEXT: ds_permute_b32 v0, v0, v1 +; CHECK-NEXT: v_accvgpr_read_b32 v63, a17 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_read_b32 v62, a16 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_read_b32 v61, a15 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_read_b32 v60, a14 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_read_b32 v59, a13 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_read_b32 v58, a12 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_read_b32 v57, a11 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_read_b32 v56, a10 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_read_b32 v47, a9 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_read_b32 v46, a8 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_read_b32 v45, a7 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_read_b32 v44, a6 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_read_b32 v43, a5 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_read_b32 v42, a4 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_read_b32 v41, a3 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_read_b32 v40, a2 ; Reload Reuse +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %lds, i32 0, i32 10 + %gep.1 = getelementptr inbounds [512 x i32], ptr addrspace(3) %lds, i32 0, i32 24 + %op0 = call i32 asm sideeffect "; def $0", "=^VA"() + %op1 = call i32 asm sideeffect "; def $0", "=^VA"() + %vgpr.def = call { <32 x i32>, <32 x i32> } asm sideeffect "; def $0", "=${v[0:31]},=${v[32:63]}"() + %vgpr.0 = extractvalue { <32 x i32>, <32 x i32> } %vgpr.def, 0 + %vgpr.1 = extractvalue { <32 x i32>, <32 x i32> } %vgpr.def, 1 + %permute = call i32 @llvm.amdgcn.ds.permute(i32 %op0, i32 %op1) + call void asm sideeffect "; use $0", "{v[0:31]},{v[32:63]}"(<32 x i32> %vgpr.0, <32 x i32> %vgpr.1) + ret i32 %permute +} + +attributes #0 = { nounwind "amdgpu-waves-per-eu"="10,10" } diff --git a/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s b/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s index e8653c4681c1f..d0dc0c76fa0f3 100644 --- a/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s +++ b/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s @@ -8282,59 +8282,59 @@ ds_swizzle_b32 a5, v1 ds_swizzle_b32 a5, v1 offset:swizzle(BITMASK_PERM,"00p00") // GFX90A: ds_permute_b32 a5, v1, a2 offset:65535 ; encoding: [0xff,0xff,0x7c,0xda,0x01,0x02,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode ds_permute_b32 a5, v1, a2 offset:65535 // GFX90A: ds_permute_b32 a255, v1, a2 offset:65535 ; encoding: [0xff,0xff,0x7c,0xda,0x01,0x02,0x00,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode ds_permute_b32 a255, v1, a2 offset:65535 // GFX90A: ds_permute_b32 a5, v255, a2 offset:65535 ; encoding: [0xff,0xff,0x7c,0xda,0xff,0x02,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode ds_permute_b32 a5, v255, a2 offset:65535 // GFX90A: ds_permute_b32 a5, v1, a255 offset:65535 ; encoding: [0xff,0xff,0x7c,0xda,0x01,0xff,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode ds_permute_b32 a5, v1, a255 offset:65535 // GFX90A: ds_permute_b32 a5, v1, a2 ; encoding: [0x00,0x00,0x7c,0xda,0x01,0x02,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode ds_permute_b32 a5, v1, a2 // GFX90A: ds_permute_b32 a5, v1, a2 ; encoding: [0x00,0x00,0x7c,0xda,0x01,0x02,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode ds_permute_b32 a5, v1, a2 // GFX90A: ds_permute_b32 a5, v1, a2 offset:4 ; encoding: [0x04,0x00,0x7c,0xda,0x01,0x02,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode ds_permute_b32 a5, v1, a2 offset:4 // GFX90A: ds_bpermute_b32 a5, v1, a2 offset:65535 ; encoding: [0xff,0xff,0x7e,0xda,0x01,0x02,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode ds_bpermute_b32 a5, v1, a2 offset:65535 // GFX90A: ds_bpermute_b32 a255, v1, a2 offset:65535 ; encoding: [0xff,0xff,0x7e,0xda,0x01,0x02,0x00,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode ds_bpermute_b32 a255, v1, a2 offset:65535 // GFX90A: ds_bpermute_b32 a5, v255, a2 offset:65535 ; encoding: [0xff,0xff,0x7e,0xda,0xff,0x02,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode ds_bpermute_b32 a5, v255, a2 offset:65535 // GFX90A: ds_bpermute_b32 a5, v1, a255 offset:65535 ; encoding: [0xff,0xff,0x7e,0xda,0x01,0xff,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode ds_bpermute_b32 a5, v1, a255 offset:65535 // GFX90A: ds_bpermute_b32 a5, v1, a2 ; encoding: [0x00,0x00,0x7e,0xda,0x01,0x02,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode ds_bpermute_b32 a5, v1, a2 // GFX90A: ds_bpermute_b32 a5, v1, a2 ; encoding: [0x00,0x00,0x7e,0xda,0x01,0x02,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode ds_bpermute_b32 a5, v1, a2 // GFX90A: ds_bpermute_b32 a5, v1, a2 offset:4 ; encoding: [0x04,0x00,0x7e,0xda,0x01,0x02,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode ds_bpermute_b32 a5, v1, a2 offset:4 // GFX90A: ds_add_u64 v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x80,0xda,0x01,0x02,0x00,0x00] _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits