https://github.com/rampitec created https://github.com/llvm/llvm-project/pull/159654
None >From b83405b879b471da983f885bfdffb3d1f58130de Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin <stanislav.mekhanos...@amd.com> Date: Thu, 18 Sep 2025 14:30:20 -0700 Subject: [PATCH] [AMDGPU] gfx1251 VOP3 dpp support --- llvm/lib/Target/AMDGPU/SIInstrInfo.td | 1 + llvm/lib/Target/AMDGPU/VOP3Instructions.td | 64 ++++++-- llvm/lib/Target/AMDGPU/VOPInstructions.td | 78 +++++---- llvm/test/CodeGen/AMDGPU/dpp64_combine.ll | 4 + llvm/test/MC/AMDGPU/gfx1251_asm_vop3_dpp16.s | 150 ++++++++++++++++++ .../AMDGPU/gfx1251_asm_vop3_from_vop1_dpp16.s | 58 +++++++ .../AMDGPU/gfx1251_asm_vop3_from_vop1_err.s | 150 ++++++++++++++++++ .../AMDGPU/gfx1251_asm_vop3_from_vop2_dpp16.s | 34 ++++ .../AMDGPU/gfx1251_asm_vop3_from_vop2_err.s | 93 +++++++++++ llvm/test/MC/AMDGPU/vop3-gfx9.s | 4 +- .../AMDGPU/gfx1251_dasm_vop3_dpp16.txt | 94 +++++++++++ .../gfx1251_dasm_vop3_from_vop1_dpp16.txt | 43 +++++ .../gfx1251_dasm_vop3_from_vop2_dpp16.txt | 25 +++ 13 files changed, 745 insertions(+), 53 deletions(-) create mode 100644 llvm/test/MC/AMDGPU/gfx1251_asm_vop3_dpp16.s create mode 100644 llvm/test/MC/AMDGPU/gfx1251_asm_vop3_from_vop1_dpp16.s create mode 100644 llvm/test/MC/AMDGPU/gfx1251_asm_vop3_from_vop1_err.s create mode 100644 llvm/test/MC/AMDGPU/gfx1251_asm_vop3_from_vop2_dpp16.s create mode 100644 llvm/test/MC/AMDGPU/gfx1251_asm_vop3_from_vop2_err.s create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop3_dpp16.txt create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop3_from_vop1_dpp16.txt create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop3_from_vop2_dpp16.txt diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index c49f1930705aa..18fae6cfc7ed9 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1969,6 +1969,7 @@ class getVOP3DPPSrcForVT<ValueType VT, bit IsFake16 = 1> { RegisterOperand ret = !cond(!eq(VT, i1) : SSrc_i1, !eq(VT, i16) : !if (IsFake16, VCSrc_b16, VCSrcT_b16), + !eq(VT, i64) : VCSrc_b64, !eq(VT, f16) : !if (IsFake16, VCSrc_f16, VCSrcT_f16), !eq(VT, bf16) : !if (IsFake16, VCSrc_bf16, VCSrcT_bf16), !eq(VT, v2i16) : VCSrc_v2b16, diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 582a353632436..e6a7c35dce0be 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -24,6 +24,7 @@ def VOP_F32_F32_F32_F32_VCC : VOPProfile<[f32, f32, f32, f32]> { } def VOP_F64_F64_F64_F64_VCC : VOPProfile<[f64, f64, f64, f64]> { let Outs64 = (outs DstRC.RegClass:$vdst); + let HasExt64BitDPP = 1; let IsSingle = 1; } } @@ -51,7 +52,24 @@ def VOP3b_I64_I1_I32_I32_I64 : VOPProfile<[i64, i32, i32, i64]> { let HasExt64BitDPP = 1 in { def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile<f32>; -def VOP3b_F64_I1_F64_F64_F64 : VOP3b_Profile<f64>; +def VOP3b_F64_I1_F64_F64_F64 : VOP3b_Profile<f64> { + let OutsVOP3DPP = Outs64; + let AsmVOP3DPP = getAsmVOP3DPP<Asm64>.ret; + let AsmVOP3DPP16 = getAsmVOP3DPP16<Asm64>.ret; + let AsmVOP3DPP8 = getAsmVOP3DPP8<Asm64>.ret; +} + +def VOP3b_I64_I1_I32_I32_I64_DPP : VOPProfile<[i64, i32, i32, i64]> { + let HasClamp = 1; + + let IsSingle = 1; + let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); + let OutsVOP3DPP = Outs64; + let Asm64 = "$vdst, $sdst, $src0, $src1, $src2$clamp"; + let AsmVOP3DPP = getAsmVOP3DPP<Asm64>.ret; + let AsmVOP3DPP16 = getAsmVOP3DPP16<Asm64>.ret; + let AsmVOP3DPP8 = getAsmVOP3DPP8<Asm64>.ret; +} class V_MUL_PROF<VOPProfile P> : VOP3_Profile<P> { let HasExtVOP3DPP = 0; @@ -229,7 +247,7 @@ defm V_DIV_FMAS_F32 : VOP3Inst_Pseudo_Wrapper <"v_div_fmas_f32", VOP_F32_F32_F32 // result *= 2^64 // let SchedRW = [WriteDouble], FPDPRounding = 1 in -defm V_DIV_FMAS_F64 : VOP3Inst_Pseudo_Wrapper <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC, []>; +defm V_DIV_FMAS_F64 : VOP3Inst <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC>; } // End Uses = [MODE, VCC, EXEC] } // End isCommutable = 1 @@ -294,7 +312,7 @@ defm V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile<VOP_I32_F32_I32_ defm V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", DIV_FIXUP_F32_PROF, AMDGPUdiv_fixup>; let SchedRW = [WriteDoubleAdd], FPDPRounding = 1 in { - defm V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, AMDGPUdiv_fixup>; + defm V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP_F64_F64_F64_F64_DPP_PROF, AMDGPUdiv_fixup>; defm V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, any_fldexp>; } // End SchedRW = [WriteDoubleAdd], FPDPRounding = 1 } // End isReMaterializable = 1 @@ -335,7 +353,7 @@ let mayRaiseFPException = 0 in { // Seems suspicious but manual doesn't say it d // Double precision division pre-scale. let SchedRW = [WriteDouble, WriteSALU], FPDPRounding = 1 in - defm V_DIV_SCALE_F64 : VOP3Inst_Pseudo_Wrapper <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64>; + defm V_DIV_SCALE_F64 : VOP3Inst <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64>; } // End mayRaiseFPException = 0 let isReMaterializable = 1 in @@ -408,9 +426,9 @@ defm V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOPProfileMQSAD>; } // End SubtargetPredicate = isGFX7Plus let isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU] in { - let SubtargetPredicate = isGFX7Plus, OtherPredicates = [HasNotMADIntraFwdBug] in { - defm V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>; - defm V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>; + let SubtargetPredicate = isGFX7Plus in { + defm V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64_DPP, null_frag, [HasNotMADIntraFwdBug]>; + defm V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64_DPP, null_frag, [HasNotMADIntraFwdBug]>; } let SubtargetPredicate = isGFX11Only, OtherPredicates = [HasMADIntraFwdBug], Constraints = "@earlyclobber $vdst" in { @@ -2054,8 +2072,8 @@ defm V_S_SQRT_F32 : VOP3Only_Real_Base_gfx12<0x288>; defm V_S_SQRT_F16 : VOP3Only_Real_Base_gfx12<0x289>; defm V_MAD_CO_U64_U32 : VOP3be_Real_with_name_gfx12<0x2fe, "V_MAD_U64_U32", "v_mad_co_u64_u32">; defm V_MAD_CO_I64_I32 : VOP3be_Real_with_name_gfx12<0x2ff, "V_MAD_I64_I32", "v_mad_co_i64_i32">; -defm V_MINIMUM_F64 : VOP3Only_Real_Base_gfx12<0x341>; -defm V_MAXIMUM_F64 : VOP3Only_Real_Base_gfx12<0x342>; +defm V_MINIMUM_F64 : VOP3Only_Realtriple_gfx11_gfx12<0x341>; +defm V_MAXIMUM_F64 : VOP3Only_Realtriple_gfx11_gfx12<0x342>; defm V_MINIMUM_F32 : VOP3Only_Realtriple_gfx12<0x365>; defm V_MAXIMUM_F32 : VOP3Only_Realtriple_gfx12<0x366>; defm V_MINIMUM_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x367, "v_minimum_f16">; @@ -2127,6 +2145,13 @@ multiclass VOP3be_Real_gfx11_gfx12<bits<10> op, string opName, string asmName> : VOP3be_Real<GFX11Gen, op, opName, asmName>, VOP3be_Real<GFX12Gen, op, opName, asmName>; +multiclass VOP3be_Real_gfx11_gfx12_not_gfx1250<bits<10> op, string opName, string asmName> : + VOP3be_Real<GFX11Gen, op, opName, asmName>, + VOP3be_Real<GFX12Not12_50Gen, op, opName, asmName>; + +multiclass VOP3be_Realtriple_gfx1250<bits<10> op> : + VOP3be_Realtriple<GFX1250Gen, op>; + multiclass VOP3_Real_No_Suffix_gfx11_gfx12<bits<10> op> : VOP3_Real_No_Suffix<GFX11Gen, op>, VOP3_Real_No_Suffix<GFX12Gen, op>; @@ -2141,7 +2166,7 @@ defm V_BFE_U32 : VOP3_Realtriple_gfx11_gfx12<0x210>; defm V_BFE_I32 : VOP3_Realtriple_gfx11_gfx12<0x211>; defm V_BFI_B32 : VOP3_Realtriple_gfx11_gfx12<0x212>; defm V_FMA_F32 : VOP3_Realtriple_gfx11_gfx12<0x213>; -defm V_FMA_F64 : VOP3_Real_Base_gfx11_gfx12<0x214>; +defm V_FMA_F64 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x214>; defm V_LERP_U8 : VOP3_Realtriple_gfx11_gfx12<0x215>; defm V_ALIGNBIT_B32 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x216, "v_alignbit_b32">; defm V_ALIGNBYTE_B32 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x217, "v_alignbyte_b32">; @@ -2161,9 +2186,9 @@ defm V_SAD_U16 : VOP3_Realtriple_gfx11_gfx12<0x224>; defm V_SAD_U32 : VOP3_Realtriple_gfx11_gfx12<0x225>; defm V_CVT_PK_U8_F32 : VOP3_Realtriple_gfx11_gfx12<0x226>; defm V_DIV_FIXUP_F32 : VOP3_Real_Base_gfx11_gfx12<0x227>; -defm V_DIV_FIXUP_F64 : VOP3_Real_Base_gfx11_gfx12<0x228>; +defm V_DIV_FIXUP_F64 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x228>; defm V_DIV_FMAS_F32 : VOP3_Real_Base_gfx11_gfx12<0x237>; -defm V_DIV_FMAS_F64 : VOP3_Real_Base_gfx11_gfx12<0x238>; +defm V_DIV_FMAS_F64 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x238>; defm V_MSAD_U8 : VOP3_Realtriple_gfx11_gfx12<0x239>; defm V_QSAD_PK_U16_U8 : VOP3_Real_Base_gfx11_gfx12<0x23a>; defm V_MQSAD_PK_U16_U8 : VOP3_Real_Base_gfx11_gfx12<0x23b>; @@ -2205,7 +2230,7 @@ defm V_MINMAX_I32 : VOP3_Realtriple_gfx11_gfx12<0x265>; defm V_DOT2_F16_F16 : VOP3Dot_Realtriple_t16_and_fake16_gfx11_gfx12<0x266, "v_dot2_f16_f16">; defm V_DOT2_BF16_BF16 : VOP3Dot_Realtriple_t16_and_fake16_gfx11_gfx12<0x267, "v_dot2_bf16_bf16">; defm V_DIV_SCALE_F32 : VOP3be_Real_gfx11_gfx12<0x2fc, "V_DIV_SCALE_F32", "v_div_scale_f32">; -defm V_DIV_SCALE_F64 : VOP3be_Real_gfx11_gfx12<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">; +defm V_DIV_SCALE_F64 : VOP3be_Real_gfx11_gfx12_not_gfx1250<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">; defm V_MAD_U64_U32_gfx11 : VOP3be_Real_gfx11<0x2fe, "V_MAD_U64_U32_gfx11", "v_mad_u64_u32">; defm V_MAD_I64_I32_gfx11 : VOP3be_Real_gfx11<0x2ff, "V_MAD_I64_I32_gfx11", "v_mad_i64_i32">; defm V_ADD_NC_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x303, "v_add_nc_u16">; @@ -2228,7 +2253,7 @@ defm V_ADD_F64 : VOP3_Real_Base_gfx11<0x327>; defm V_MUL_F64 : VOP3_Real_Base_gfx11<0x328>; defm V_MIN_F64 : VOP3_Real_Base_gfx11<0x329>; defm V_MAX_F64 : VOP3_Real_Base_gfx11<0x32a>; -defm V_LDEXP_F64 : VOP3_Real_Base_gfx11_gfx12<0x32b>; +defm V_LDEXP_F64 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x32b>; defm V_MUL_LO_U32 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x32c>; defm V_MUL_HI_U32 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x32d>; defm V_MUL_HI_I32 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x32e>; @@ -2237,8 +2262,8 @@ defm V_LSHLREV_B16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x33 defm V_LSHRREV_B16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x339, "v_lshrrev_b16">; defm V_ASHRREV_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x33a, "v_ashrrev_i16">; defm V_LSHLREV_B64 : VOP3_Real_Base_gfx11<0x33c>; -defm V_LSHRREV_B64 : VOP3_Real_Base_gfx11_gfx12<0x33d>; -defm V_ASHRREV_I64 : VOP3_Real_Base_gfx11_gfx12<0x33e>; +defm V_LSHRREV_B64 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x33d>; +defm V_ASHRREV_I64 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x33e>; defm V_READLANE_B32 : VOP3_Real_No_Suffix_gfx11_gfx12<0x360>; // Pseudo in VOP2 let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in { defm V_WRITELANE_B32 : VOP3_Real_No_Suffix_gfx11_gfx12<0x361>; // Pseudo in VOP2 @@ -2260,9 +2285,16 @@ let AssemblerPredicate = isGFX11Plus in { } // These instructions differ from GFX12 variant by supporting DPP: +defm V_FMA_F64 : VOP3Only_Realtriple_gfx1250<0x214>; +defm V_DIV_FIXUP_F64 : VOP3Only_Realtriple_gfx1250<0x228>; +defm V_DIV_FMAS_F64 : VOP3Only_Realtriple_gfx1250<0x238>; +defm V_DIV_SCALE_F64 : VOP3be_Realtriple_gfx1250<0x2fd>; +defm V_LDEXP_F64 : VOP3Only_Realtriple_gfx1250<0x32b>; defm V_MUL_LO_U32 : VOP3Only_Realtriple_gfx1250<0x32c>; defm V_MUL_HI_U32 : VOP3Only_Realtriple_gfx1250<0x32d>; defm V_MUL_HI_I32 : VOP3Only_Realtriple_gfx1250<0x32e>; +defm V_LSHRREV_B64 : VOP3Only_Realtriple_gfx1250<0x33d>; +defm V_ASHRREV_I64 : VOP3Only_Realtriple_gfx1250<0x33e>; defm V_PERM_PK16_B4_U4 : VOP3Only_Real_Base_gfx1250<0x23f>; defm V_PERM_PK16_B6_U4 : VOP3Only_Real_Base_gfx1250<0x242>; diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index b900510d7622a..631f0f3318cd1 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -1041,8 +1041,9 @@ class VOP3_DPP_Pseudo <string OpName, VOPProfile P> : let Size = 12; let VOP3 = 1; let AsmMatchConverter = "cvtVOP3DPP"; - let AsmVariantName = !if(P.HasExtVOP3DPP, AMDGPUAsmVariants.VOP3_DPP, - AMDGPUAsmVariants.Disable); + let AsmVariantName = !if(!or(P.HasExtVOP3DPP, P.HasExt64BitDPP), + AMDGPUAsmVariants.VOP3_DPP, + AMDGPUAsmVariants.Disable); } class VOP_DPP_Real <VOP_DPP_Pseudo ps, int EncodingFamily> : @@ -1115,8 +1116,9 @@ class VOP3_DPP_Base <string OpName, VOPProfile P, bit IsDPP16, let OutOperandList = P.OutsVOP3DPP; let AsmMatchConverter = "cvtVOP3DPP"; let VOP3 = 1; - let AsmVariantName = !if(P.HasExtVOP3DPP, AMDGPUAsmVariants.VOP3_DPP, - AMDGPUAsmVariants.Disable); + let AsmVariantName = !if(!or(P.HasExtVOP3DPP, P.HasExt64BitDPP), + AMDGPUAsmVariants.VOP3_DPP, + AMDGPUAsmVariants.Disable); let Size = 12; } @@ -1855,10 +1857,12 @@ multiclass VOP3_Real_with_name<GFXGen Gen, bits<10> op, string opName, } } } - def Gen.Suffix#"_VOP3_alias" : LetDummies, - AMDGPUMnemonicAlias<!if(!empty(pseudo_mnemonic), - ps.Mnemonic, pseudo_mnemonic), asmName, ""> { - let AssemblerPredicate = Gen.AssemblerPredicate; + if !ne(ps.Mnemonic, asmName) then { + def Gen.Suffix#"_VOP3_alias" : LetDummies, + AMDGPUMnemonicAlias<!if(!empty(pseudo_mnemonic), + ps.Mnemonic, pseudo_mnemonic), asmName, ""> { + let AssemblerPredicate = Gen.AssemblerPredicate; + } } } @@ -1902,33 +1906,36 @@ multiclass VOP3_Real_dpp_with_name<GFXGen Gen, bits<10> op, string opName, multiclass VOP3_Real_dpp8_Base<GFXGen Gen, bits<10> op, string opName = NAME> { defvar ps = !cast<VOP3_Pseudo>(opName#"_e64"); - def _e64_dpp8#Gen.Suffix : Base_VOP3_DPP8<op, ps> { - let DecoderNamespace = Gen.DecoderNamespace; - let AssemblerPredicate = Gen.AssemblerPredicate; - } + if !not(ps.Pfl.HasExt64BitDPP) then + def _e64_dpp8#Gen.Suffix : Base_VOP3_DPP8<op, ps> { + let DecoderNamespace = Gen.DecoderNamespace; + let AssemblerPredicate = Gen.AssemblerPredicate; + } } multiclass VOP3Dot_Real_dpp8_Base<GFXGen Gen, bits<10> op, string asmName, string opName = NAME> { defvar ps = !cast<VOP3_Pseudo>(opName#"_e64"); - def _e64_dpp8#Gen.Suffix : Base_VOP3_DPP8_t16<op, ps> { - let Inst{11} = ?; - let Inst{12} = ?; - let AsmString = asmName # ps.Pfl.AsmVOP3DPP8; - let DecoderNamespace = Gen.DecoderNamespace - # !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); - let AssemblerPredicate = Gen.AssemblerPredicate; - } + if !not(ps.Pfl.HasExt64BitDPP) then + def _e64_dpp8#Gen.Suffix : Base_VOP3_DPP8<op, ps> { + let Inst{11} = ?; + let Inst{12} = ?; + let AsmString = asmName # ps.Pfl.AsmVOP3DPP8; + let DecoderNamespace = Gen.DecoderNamespace + # !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); + let AssemblerPredicate = Gen.AssemblerPredicate; + } } multiclass VOP3_Real_dpp8_with_name<GFXGen Gen, bits<10> op, string opName, string asmName> { defvar ps = !cast<VOP3_Pseudo>(opName#"_e64"); - let AsmString = asmName # ps.Pfl.AsmVOP3DPP8, - DecoderNamespace = Gen.DecoderNamespace# - !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"), - True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, - NoTrue16Predicate) in { - defm NAME : VOP3_Real_dpp8_Base<Gen, op, opName>; + if !not(ps.Pfl.HasExt64BitDPP) then + let AsmString = asmName # ps.Pfl.AsmVOP3DPP8, + DecoderNamespace = Gen.DecoderNamespace# + !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"), + True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, + NoTrue16Predicate) in { + defm NAME : VOP3_Real_dpp8_Base<Gen, op, opName>; } } @@ -1955,10 +1962,11 @@ multiclass VOP3be_Real_dpp<GFXGen Gen, bits<10> op, string opName, multiclass VOP3be_Real_dpp8<GFXGen Gen, bits<10> op, string opName, string asmName> { defvar ps = !cast<VOP3_Pseudo>(opName #"_e64"); - def _e64_dpp8#Gen.Suffix : VOP3b_DPP8_Base<op, ps, asmName> { - let DecoderNamespace = Gen.DecoderNamespace; - let AssemblerPredicate = Gen.AssemblerPredicate; - } + if !not(ps.Pfl.HasExt64BitDPP) then + def _e64_dpp8#Gen.Suffix : VOP3b_DPP8_Base<op, ps, asmName> { + let DecoderNamespace = Gen.DecoderNamespace; + let AssemblerPredicate = Gen.AssemblerPredicate; + } } // VOP1 and VOP2 depend on these triple defs @@ -2105,6 +2113,9 @@ multiclass VOP3Only_Real_Base_gfx1250<bits<10> op> : multiclass VOP3Only_Realtriple_gfx1250<bits<10> op, bit isSingle = 0> : VOP3_Realtriple<GFX1250Gen, op, isSingle>; +multiclass VOP3Only_Realtriple_gfx12_not_gfx1250<bits<10> op, bit isSingle = 0> : + VOP3_Realtriple<GFX12Not12_50Gen, op, isSingle>; + multiclass VOP3Only_Realtriple_with_name_gfx1250<bits<10> op, string opName, string asmName, string pseudo_mnemonic = "", bit isSingle = 0> : @@ -2144,11 +2155,8 @@ multiclass VOP3Only_Realtriple_t16_and_fake16_gfx1250<bits<10> op, multiclass VOP3be_Real_with_name_gfx12<bits<10> op, string opName, string asmName, bit isSingle = 0> { defvar ps = !cast<VOP3_Pseudo>(opName#"_e64"); - let AsmString = asmName # ps.AsmOperands, - IsSingle = !or(isSingle, ps.Pfl.IsSingle) in - def _e64_gfx12 : - VOP3_Real_Gen<ps, GFX12Gen, asmName>, - VOP3be_gfx11_gfx12<op, ps.Pfl>; + defm NAME : VOP3be_Realtriple<GFX12Gen, op, !or(isSingle, ps.Pfl.IsSingle), + opName, asmName>; def : AMDGPUMnemonicAlias<ps.Mnemonic, asmName> { let AssemblerPredicate = GFX12Gen.AssemblerPredicate; } diff --git a/llvm/test/CodeGen/AMDGPU/dpp64_combine.ll b/llvm/test/CodeGen/AMDGPU/dpp64_combine.ll index 43f6def22d981..6c226bd12d79c 100644 --- a/llvm/test/CodeGen/AMDGPU/dpp64_combine.ll +++ b/llvm/test/CodeGen/AMDGPU/dpp64_combine.ll @@ -3,6 +3,7 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX10PLUS,GFX10 -DCTL=row_share ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX10PLUS,GFX11 -DCTL=row_share ; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX1250 -DCTL=row_share +; RUN: llc -mtriple=amdgcn -mcpu=gfx1251 < %s | FileCheck %s -check-prefixes=GCN,DPP64,DPPMOV64,DPP64-GFX1251 -DCTL=row_share ; GCN-LABEL: {{^}}dpp64_ceil: ; GCN: global_load_{{dwordx2|b64}} [[V:v\[[0-9:]+\]]], @@ -23,6 +24,8 @@ define amdgpu_kernel void @dpp64_ceil(ptr addrspace(1) %arg, i64 %in1) { ; GCN-LABEL: {{^}}dpp64_rcp: ; GCN: global_load_{{dwordx2|b64}} [[V:v\[[0-9:]+\]]], ; DPP64-GFX9: v_rcp_f64_dpp [[V]], [[V]] [[CTL]]:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}} +; DPP64-GFX1251: v_mov_b64_dpp v[{{[0-9:]+}}], [[V]] [[CTL]]:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}} +; DPP64-GFX1251: v_rcp_f64_e32 ; DPP32-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} [[CTL]]:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}} define amdgpu_kernel void @dpp64_rcp(ptr addrspace(1) %arg, i64 %in1) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -79,6 +82,7 @@ define amdgpu_kernel void @dpp64_div(ptr addrspace(1) %arg, i64 %in1) { ; GFX1250: v_mov_b32_e32 [[V2:v[0-9]+]], [[V]] ; GFX1250: v_mov_b32_dpp [[V2]], [[V2]] {{row_share|row_newbcast}}:0 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}} ; GFX1250: v_mul_lo_u32 [[V]], [[V2]], [[V]]{{$}} +; DPP64-GFX1251: v_mul_lo_u32_e64_dpp [[V]], [[V]], [[V]] [[CTL]]:0 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}} define amdgpu_kernel void @dpp_mul_row_share(ptr addrspace(1) %arg) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %id diff --git a/llvm/test/MC/AMDGPU/gfx1251_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx1251_asm_vop3_dpp16.s new file mode 100644 index 0000000000000..d3a22a995673e --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1251_asm_vop3_dpp16.s @@ -0,0 +1,150 @@ +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1251 -show-encoding < %s | FileCheck --check-prefix=GFX1251 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s + +v_lshl_add_u64 v[2:3], v[4:5], v7, v[8:9] row_share:3 +// GFX1251: v_lshl_add_u64_e64_dpp v[2:3], v[4:5], v7, v[8:9] row_share:3 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x52,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +v_lshl_add_u64 v[2:3], v[4:5], v4, v[2:3] row_share:0 row_mask:0xf bank_mask:0xf +// GFX1251: v_lshl_add_u64_e64_dpp v[2:3], v[4:5], v4, v[2:3] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x52,0xd6,0xfa,0x08,0x0a,0x04,0x04,0x50,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +v_fma_f64 v[4:5], v[2:3], v[6:7], v[8:9] row_share:1 +// GFX1251: v_fma_f64_e64_dpp v[4:5], v[2:3], v[6:7], v[8:9] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x14,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_fma_f64 v[4:5], v[2:3], v[6:7], v[8:9] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_div_fixup_f64 v[4:5], v[2:3], v[6:7], v[8:9] row_share:1 +// GFX1251: v_div_fixup_f64_e64_dpp v[4:5], v[2:3], v[6:7], v[8:9] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x28,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_div_fixup_f64 v[4:5], v[2:3], v[6:7], v[8:9] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_div_fmas_f64 v[4:5], v[2:3], v[6:7], v[8:9] row_share:1 +// GFX1251: v_div_fmas_f64_e64_dpp v[4:5], v[2:3], v[6:7], v[8:9] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x38,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_div_fmas_f64 v[4:5], v[2:3], v[6:7], v[8:9] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_div_scale_f64 v[4:5], s2, v[2:3], v[6:7], v[8:9] row_share:1 +// GFX1251: v_div_scale_f64_e64_dpp v[4:5], s2, v[2:3], v[6:7], v[8:9] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x02,0xfd,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_div_scale_f64 v[4:5], s2, v[2:3], v[6:7], v[8:9] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_mad_co_u64_u32 v[4:5], s2, v2, v6, v[8:9] row_share:1 +// GFX1251: v_mad_co_u64_u32_e64_dpp v[4:5], s2, v2, v6, v[8:9] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x02,0xfe,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_mad_co_u64_u32 v[4:5], s2, v2, v6, v[8:9] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_mad_co_i64_i32 v[4:5], s2, v2, v6, v[8:9] row_share:1 +// GFX1251: v_mad_co_i64_i32_e64_dpp v[4:5], s2, v2, v6, v[8:9] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x02,0xff,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_mad_co_i64_i32 v[4:5], s2, v2, v6, v[8:9] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_minimum_f64 v[4:5], v[2:3], v[6:7] row_share:1 +// GFX1251: v_minimum_f64_e64_dpp v[4:5], v[2:3], v[6:7] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x41,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_minimum_f64 v[4:5], v[2:3], v[6:7] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_maximum_f64 v[4:5], v[2:3], v[6:7] row_share:1 +// GFX1251: v_maximum_f64_e64_dpp v[4:5], v[2:3], v[6:7] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x42,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_maximum_f64 v[4:5], v[2:3], v[6:7] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_ldexp_f64 v[4:5], v[2:3], v6 row_share:1 +// GFX1251: v_ldexp_f64_e64_dpp v[4:5], v[2:3], v6 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x2b,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_ldexp_f64 v[4:5], v[2:3], v6 row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_mul_lo_u32 v4, v2, v6 row_share:1 +// GFX1251: v_mul_lo_u32_e64_dpp v4, v2, v6 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x2c,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_mul_lo_u32 v4, v2, v6 row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_mul_hi_u32 v4, v2, v6 row_share:1 +// GFX1251: v_mul_hi_u32_e64_dpp v4, v2, v6 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x2d,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_mul_hi_u32 v4, v2, v6 row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_mul_hi_i32 v4, v2, v6 row_share:1 +// GFX1251: v_mul_hi_i32_e64_dpp v4, v2, v6 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x2e,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_mul_hi_i32 v4, v2, v6 row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_lshrrev_b64 v[4:5], v2, v[6:7] row_share:1 +// GFX1251: v_lshrrev_b64_e64_dpp v[4:5], v2, v[6:7] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x3d,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_lshrrev_b64 v[4:5], v2, v[6:7] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_ashrrev_i64 v[4:5], v2, v[6:7] row_share:1 +// GFX1251: v_ashrrev_i64_e64_dpp v[4:5], v2, v[6:7] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x3e,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_ashrrev_i64 v[4:5], v2, v[6:7] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_mad_u32 v2, v4, v7, v8 row_share:3 fi:1 +// GFX1251: v_mad_u32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x35,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +v_mad_u32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1251: v_mad_u32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x35,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +v_max_i64 v[2:3], v[4:5], v[6:7] row_share:3 fi:1 +// GFX1251: v_max_i64_e64_dpp v[2:3], v[4:5], v[6:7] row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x1b,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x53,0x05,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +v_max_i64 v[2:3], v[4:5], v[6:7] row_share:0 row_mask:0xf bank_mask:0xf +// GFX1251: v_max_i64_e64_dpp v[2:3], v[4:5], v[6:7] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x1b,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x50,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +v_max_u64 v[2:3], v[4:5], v[6:7] row_share:3 fi:1 +// GFX1251: v_max_u64_e64_dpp v[2:3], v[4:5], v[6:7] row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x19,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x53,0x05,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +v_max_u64 v[2:3], v[4:5], v[6:7] row_share:0 row_mask:0xf bank_mask:0xf +// GFX1251: v_max_u64_e64_dpp v[2:3], v[4:5], v[6:7] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x19,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x50,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +v_min_i64 v[2:3], v[4:5], v[6:7] row_share:3 fi:1 +// GFX1251: v_min_i64_e64_dpp v[2:3], v[4:5], v[6:7] row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x1a,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x53,0x05,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +v_min_i64 v[2:3], v[4:5], v[6:7] row_share:0 row_mask:0xf bank_mask:0xf +// GFX1251: v_min_i64_e64_dpp v[2:3], v[4:5], v[6:7] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x1a,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x50,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +v_min_u64 v[2:3], v[4:5], v[6:7] row_share:3 fi:1 +// GFX1251: v_min_u64_e64_dpp v[2:3], v[4:5], v[6:7] row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x18,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x53,0x05,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +v_min_u64 v[2:3], v[4:5], v[6:7] row_share:0 row_mask:0xf bank_mask:0xf +// GFX1251: v_min_u64_e64_dpp v[2:3], v[4:5], v[6:7] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x18,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x50,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +v_mad_nc_u64_u32 v[2:3], v4, v7, v[8:9] row_share:3 fi:1 +// GFX1251: v_mad_nc_u64_u32_e64_dpp v[2:3], v4, v7, v[8:9] row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0xfa,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +v_mad_nc_u64_u32 v[2:3], v4, v5, 1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1251: v_mad_nc_u64_u32_e64_dpp v[2:3], v4, v5, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0xfa,0xd6,0xfa,0x0a,0x06,0x02,0x04,0x50,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +v_mad_nc_i64_i32 v[2:3], v4, v7, v[8:9] row_share:3 fi:1 +// GFX1251: v_mad_nc_i64_i32_e64_dpp v[2:3], v4, v7, v[8:9] row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0xfb,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +v_mad_nc_i64_i32 v[2:3], v4, v5, 1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1251: v_mad_nc_i64_i32_e64_dpp v[2:3], v4, v5, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0xfb,0xd6,0xfa,0x0a,0x06,0x02,0x04,0x50,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. diff --git a/llvm/test/MC/AMDGPU/gfx1251_asm_vop3_from_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1251_asm_vop3_from_vop1_dpp16.s new file mode 100644 index 0000000000000..19b138e63bb93 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1251_asm_vop3_from_vop1_dpp16.s @@ -0,0 +1,58 @@ +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1251 -show-encoding < %s | FileCheck --check-prefix=GFX1251 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s + +v_mov_b64_e64_dpp v[4:5], v[2:3] row_share:1 +// GFX1251: v_mov_b64_e64_dpp v[4:5], v[2:3] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x9d,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_cvt_i32_f64_e64_dpp v2, v[4:5] row_share:1 +// GFX1251: v_cvt_i32_f64_e64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x83,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_cvt_f64_i32_e64_dpp v[4:5], v2 row_share:1 +// GFX1251: v_cvt_f64_i32_e64_dpp v[4:5], v2 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x84,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_cvt_f32_f64_e64_dpp v2, v[4:5] row_share:1 +// GFX1251: v_cvt_f32_f64_e64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x8f,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_cvt_f64_f32_e64_dpp v[4:5], v2 row_share:1 +// GFX1251: v_cvt_f64_f32_e64_dpp v[4:5], v2 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x90,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_cvt_u32_f64_e64_dpp v2, v[4:5] row_share:1 +// GFX1251: v_cvt_u32_f64_e64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x95,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_cvt_f64_u32_e64_dpp v[4:5], v2 row_share:1 +// GFX1251: v_cvt_f64_u32_e64_dpp v[4:5], v2 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x96,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_trunc_f64_e64_dpp v[2:3], v[4:5] row_share:1 +// GFX1251: v_trunc_f64_e64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x97,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_ceil_f64_e64_dpp v[2:3], v[4:5] row_share:1 +// GFX1251: v_ceil_f64_e64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x98,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_rndne_f64_e64_dpp v[2:3], v[4:5] row_share:1 +// GFX1251: v_rndne_f64_e64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x99,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_floor_f64_e64_dpp v[2:3], v[4:5] row_share:1 +// GFX1251: v_floor_f64_e64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x9a,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_frexp_exp_i32_f64_e64_dpp v2, v[4:5] row_share:1 +// GFX1251: v_frexp_exp_i32_f64_e64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0xbc,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_frexp_mant_f64_e64_dpp v[2:3], v[4:5] row_share:1 +// GFX1251: v_frexp_mant_f64_e64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0xbd,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_fract_f64_e64_dpp v[2:3], v[4:5] row_share:1 +// GFX1251: v_fract_f64_e64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0xbe,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported diff --git a/llvm/test/MC/AMDGPU/gfx1251_asm_vop3_from_vop1_err.s b/llvm/test/MC/AMDGPU/gfx1251_asm_vop3_from_vop1_err.s new file mode 100644 index 0000000000000..8b30278c7820c --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1251_asm_vop3_from_vop1_err.s @@ -0,0 +1,150 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1251 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1251-ERR --implicit-check-not=error: --strict-whitespace %s + +v_mov_b64_e64_dpp v[4:5], v[2:3] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_mov_b64_e64_dpp v[4:5], v[2:3] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_i32_f64_e64_dpp v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_cvt_i32_f64_e64_dpp v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_f64_i32_e64_dpp v[4:5], v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_cvt_f64_i32_e64_dpp v[4:5], v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_f32_f64_e64_dpp v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_cvt_f32_f64_e64_dpp v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_f64_f32_e64_dpp v[4:5], v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_cvt_f64_f32_e64_dpp v[4:5], v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_u32_f64_e64_dpp v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_cvt_u32_f64_e64_dpp v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_f64_u32_e64_dpp v[4:5], v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_cvt_f64_u32_e64_dpp v[4:5], v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_trunc_f64_e64_dpp v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_trunc_f64_e64_dpp v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_ceil_f64_e64_dpp v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_ceil_f64_e64_dpp v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_rndne_f64_e64_dpp v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_rndne_f64_e64_dpp v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_floor_f64_e64_dpp v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_floor_f64_e64_dpp v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_frexp_exp_i32_f64_e64_dpp v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_frexp_exp_i32_f64_e64_dpp v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_frexp_mant_f64_e64_dpp v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_frexp_mant_f64_e64_dpp v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_fract_f64_e64_dpp v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_fract_f64_e64_dpp v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_mov_b64_e64_dpp v[4:5], v[2:3] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_mov_b64_e64_dpp v[4:5], v[2:3] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_i32_f64_e64_dpp v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_cvt_i32_f64_e64_dpp v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_f64_i32_e64_dpp v[4:5], v2 quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_cvt_f64_i32_e64_dpp v[4:5], v2 quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_f32_f64_e64_dpp v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_cvt_f32_f64_e64_dpp v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_f64_f32_e64_dpp v[4:5], v2 quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_cvt_f64_f32_e64_dpp v[4:5], v2 quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_u32_f64_e64_dpp v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_cvt_u32_f64_e64_dpp v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_f64_u32_e64_dpp v[4:5], v2 quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_cvt_f64_u32_e64_dpp v[4:5], v2 quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_trunc_f64_e64_dpp v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_trunc_f64_e64_dpp v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_ceil_f64_e64_dpp v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_ceil_f64_e64_dpp v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_rndne_f64_e64_dpp v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_rndne_f64_e64_dpp v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_floor_f64_e64_dpp v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_floor_f64_e64_dpp v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_frexp_exp_i32_f64_e64_dpp v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_frexp_exp_i32_f64_e64_dpp v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_frexp_mant_f64_e64_dpp v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_frexp_mant_f64_e64_dpp v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_fract_f64_e64_dpp v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_fract_f64_e64_dpp v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_rcp_f64_e64_dpp v[4:5], v[2:3] row_share:1 +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_rsq_f64_e64_dpp v[4:5], v[2:3] row_share:1 +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_sqrt_f64_e64_dpp v[4:5], v[2:3] row_share:1 +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported diff --git a/llvm/test/MC/AMDGPU/gfx1251_asm_vop3_from_vop2_dpp16.s b/llvm/test/MC/AMDGPU/gfx1251_asm_vop3_from_vop2_dpp16.s new file mode 100644 index 0000000000000..f4c7f6ac336dd --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1251_asm_vop3_from_vop2_dpp16.s @@ -0,0 +1,34 @@ +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1251 -show-encoding < %s | FileCheck --check-prefix=GFX1251 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s + +v_add_nc_u64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 +// GFX1251: v_add_nc_u64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x28,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_sub_nc_u64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 +// GFX1251: v_sub_nc_u64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x29,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_fmac_f64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 +// GFX1251: v_fmac_f64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x17,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_add_f64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 +// GFX1251: v_add_f64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x02,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_mul_f64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 +// GFX1251: v_mul_f64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x06,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_max_num_f64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 +// GFX1251: v_max_num_f64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x0e,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_min_num_f64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 +// GFX1251: v_min_num_f64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x0d,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_lshlrev_b64_e64_dpp v[4:5], v2, v[4:5] row_share:1 +// GFX1251: v_lshlrev_b64_e64_dpp v[4:5], v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x1f,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported diff --git a/llvm/test/MC/AMDGPU/gfx1251_asm_vop3_from_vop2_err.s b/llvm/test/MC/AMDGPU/gfx1251_asm_vop3_from_vop2_err.s new file mode 100644 index 0000000000000..0ff0d4288d984 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1251_asm_vop3_from_vop2_err.s @@ -0,0 +1,93 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1251 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1251-ERR --implicit-check-not=error: --strict-whitespace %s + +v_add_nc_u64_e64_dpp v[2:3], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_add_nc_u64_e64_dpp v[2:3], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_sub_nc_u64_e64_dpp v[2:3], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_sub_nc_u64_e64_dpp v[2:3], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_fmac_f64_e64_dpp v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_fmac_f64_e64_dpp v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_add_f64_e64_dpp v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_add_f64_e64_dpp v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_mul_f64_e64_dpp v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_mul_f64_e64_dpp v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_max_num_f64_e64_dpp v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_max_num_f64_e64_dpp v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_min_num_f64_e64_dpp v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_min_num_f64_e64_dpp v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_lshlrev_b64_e64_dpp v[4:5], v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_lshlrev_b64_e64_dpp v[4:5], v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_add_nc_u64_e64_dpp v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_add_nc_u64_e64_dpp v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_sub_nc_u64_e64_dpp v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_sub_nc_u64_e64_dpp v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_fmac_f64_e64_dpp v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_fmac_f64_e64_dpp v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_add_f64_e64_dpp v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_add_f64_e64_dpp v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_mul_f64_e64_dpp v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_mul_f64_e64_dpp v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_max_num_f64_e64_dpp v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_max_num_f64_e64_dpp v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_min_num_f64_e64_dpp v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_min_num_f64_e64_dpp v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_lshlrev_b64_e64_dpp v[4:5], v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_lshlrev_b64_e64_dpp v[4:5], v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_fmaak_f32_e64_dpp v4, v2, v6, 3 row_share:1 +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_fmamk_f32_e64_dpp v4, v2, 3, v6 row_share:1 +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_fmaak_f16_e64_dpp v4, v2, v6, 3 row_share:1 +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_fmamk_f16_e64_dpp v4, v2, 3, v6 row_share:1 +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported diff --git a/llvm/test/MC/AMDGPU/vop3-gfx9.s b/llvm/test/MC/AMDGPU/vop3-gfx9.s index a61b0c87e199f..f98f33a979bc5 100644 --- a/llvm/test/MC/AMDGPU/vop3-gfx9.s +++ b/llvm/test/MC/AMDGPU/vop3-gfx9.s @@ -723,8 +723,8 @@ v_add_f64 v[0:1], s0, v[0:1] // NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction v_add_f64 v[0:1], s[0:3], v[0:1] -// NOGCN: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +// NOGCN: :[[@LINE+2]]:{{[0-9]+}}: error: too few operands for instruction +// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: too few operands for instruction v_add_f64 v[0:1], v0, v[0:1] // NOGCN: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop3_dpp16.txt new file mode 100644 index 0000000000000..056250d2dd436 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop3_dpp16.txt @@ -0,0 +1,94 @@ +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1251 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1251 %s + +# GFX1251: v_lshl_add_u64_e64_dpp v[2:3], v[4:5], v4, v[2:3] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x52,0xd6,0xfa,0x08,0x0a,0x04,0x04,0x50,0x01,0xff] +0x02,0x00,0x52,0xd6,0xfa,0x08,0x0a,0x04,0x04,0x50,0x01,0xff + +# GFX1251: v_lshl_add_u64_e64_dpp v[2:3], v[4:5], v7, v[8:9] row_share:3 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x52,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x01,0xff] +0x02,0x00,0x52,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x01,0xff + +# GFX1251: v_ashrrev_i64_e64_dpp v[4:5], v2, v[6:7] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x3e,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x3e,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_div_fixup_f64_e64_dpp v[4:5], v[2:3], v[6:7], v[8:9] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x28,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff] +0x04,0x00,0x28,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff + +# GFX1251: v_div_fmas_f64_e64_dpp v[4:5], v[2:3], v[6:7], v[8:9] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x38,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff] +0x04,0x00,0x38,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff + +# W32: v_div_scale_f64_e64_dpp v[4:5], s2, v[2:3], v[6:7], v[8:9] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x02,0xfd,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff] +# W64: v_div_scale_f64_e64_dpp v[4:5], s[2:3], v[2:3], v[6:7], v[8:9] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x02,0xfd,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff] +0x04,0x02,0xfd,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff + +# GFX1251: v_fma_f64_e64_dpp v[4:5], v[2:3], v[6:7], v[8:9] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x14,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff] +0x04,0x00,0x14,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff + +# GFX1251: v_ldexp_f64_e64_dpp v[4:5], v[2:3], v6 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x2b,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x2b,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_lshrrev_b64_e64_dpp v[4:5], v2, v[6:7] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x3d,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x3d,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff + +# W32: v_mad_co_i64_i32_e64_dpp v[4:5], s2, v2, v6, v[8:9] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x02,0xff,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff] +# W64: v_mad_co_i64_i32_e64_dpp v[4:5], s[2:3], v2, v6, v[8:9] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x02,0xff,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff] +0x04,0x02,0xff,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff + +# W32: v_mad_co_u64_u32_e64_dpp v[4:5], s2, v2, v6, v[8:9] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x02,0xfe,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff] +# W64: v_mad_co_u64_u32_e64_dpp v[4:5], s[2:3], v2, v6, v[8:9] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x02,0xfe,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff] +0x04,0x02,0xfe,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff + +# GFX1251: v_maximum_f64_e64_dpp v[4:5], v[2:3], v[6:7] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x42,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x42,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_minimum_f64_e64_dpp v[4:5], v[2:3], v[6:7] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x41,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x41,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_mul_hi_i32_e64_dpp v4, v2, v6 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x2e,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x2e,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_mul_hi_u32_e64_dpp v4, v2, v6 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x2d,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x2d,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_mul_lo_u32_e64_dpp v4, v2, v6 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x2c,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x2c,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_mad_u32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x35,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff] +0x02,0x00,0x35,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff + +# GFX1251: v_mad_u32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x35,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff] +0x02,0x00,0x35,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff + +# GFX1251: v_max_i64_e64_dpp v[2:3], v[4:5], v[6:7] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x1b,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x50,0x01,0xff] +0x02,0x00,0x1b,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x50,0x01,0xff + +# GFX1251: v_max_i64_e64_dpp v[2:3], v[4:5], v[6:7] row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x1b,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x53,0x05,0xff] +0x02,0x00,0x1b,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x53,0x05,0xff + +# GFX1251: v_max_u64_e64_dpp v[2:3], v[4:5], v[6:7] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x19,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x50,0x01,0xff] +0x02,0x00,0x19,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x50,0x01,0xff + +# GFX1251: v_max_u64_e64_dpp v[2:3], v[4:5], v[6:7] row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x19,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x53,0x05,0xff] +0x02,0x00,0x19,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x53,0x05,0xff + +# GFX1251: v_min_i64_e64_dpp v[2:3], v[4:5], v[6:7] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x1a,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x50,0x01,0xff] +0x02,0x00,0x1a,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x50,0x01,0xff + +# GFX1251: v_min_i64_e64_dpp v[2:3], v[4:5], v[6:7] row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x1a,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x53,0x05,0xff] +0x02,0x00,0x1a,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x53,0x05,0xff + +# GFX1251: v_min_u64_e64_dpp v[2:3], v[4:5], v[6:7] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x18,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x50,0x01,0xff] +0x02,0x00,0x18,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x50,0x01,0xff + +# GFX1251: v_min_u64_e64_dpp v[2:3], v[4:5], v[6:7] row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x18,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x53,0x05,0xff] +0x02,0x00,0x18,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x53,0x05,0xff + +# GFX1251: v_mad_nc_u64_u32_e64_dpp v[2:3], v4, v5, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0xfa,0xd6,0xfa,0x0a,0x06,0x02,0x04,0x50,0x01,0xff] +0x02,0x00,0xfa,0xd6,0xfa,0x0a,0x06,0x02,0x04,0x50,0x01,0xff + +# GFX1251: v_mad_nc_u64_u32_e64_dpp v[2:3], v4, v7, v[8:9] row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0xfa,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff] +0x02,0x00,0xfa,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff + +# GFX1251: v_mad_nc_i64_i32_e64_dpp v[2:3], v4, v5, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0xfb,0xd6,0xfa,0x0a,0x06,0x02,0x04,0x50,0x01,0xff] +0x02,0x00,0xfb,0xd6,0xfa,0x0a,0x06,0x02,0x04,0x50,0x01,0xff + +# GFX1251: v_mad_nc_i64_i32_e64_dpp v[2:3], v4, v7, v[8:9] row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0xfb,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff] +0x02,0x00,0xfb,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop3_from_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop3_from_vop1_dpp16.txt new file mode 100644 index 0000000000000..3bc7b01740061 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop3_from_vop1_dpp16.txt @@ -0,0 +1,43 @@ +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1251 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX1251 %s + +# GFX1251: v_ceil_f64_e64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x98,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +0x02,0x00,0x98,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff + +# GFX1251: v_cvt_f32_f64_e64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x8f,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +0x02,0x00,0x8f,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff + +# GFX1251: v_cvt_f64_f32_e64_dpp v[4:5], v2 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x90,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x90,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_cvt_f64_i32_e64_dpp v[4:5], v2 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x84,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x84,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_cvt_f64_u32_e64_dpp v[4:5], v2 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x96,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x96,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_cvt_i32_f64_e64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x83,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +0x02,0x00,0x83,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff + +# GFX1251: v_cvt_u32_f64_e64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x95,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +0x02,0x00,0x95,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff + +# GFX1251: v_floor_f64_e64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x9a,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +0x02,0x00,0x9a,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff + +# GFX1251: v_fract_f64_e64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0xbe,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +0x02,0x00,0xbe,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff + +# GFX1251: v_frexp_exp_i32_f64_e64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0xbc,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +0x02,0x00,0xbc,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff + +# GFX1251: v_frexp_mant_f64_e64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0xbd,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +0x02,0x00,0xbd,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff + +# GFX1251: v_mov_b64_e64_dpp v[4:5], v[2:3] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x9d,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x9d,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_rndne_f64_e64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x99,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +0x02,0x00,0x99,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff + +# GFX1251: v_trunc_f64_e64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x97,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +0x02,0x00,0x97,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop3_from_vop2_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop3_from_vop2_dpp16.txt new file mode 100644 index 0000000000000..d379f6968eaf6 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop3_from_vop2_dpp16.txt @@ -0,0 +1,25 @@ +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1251 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX1251 %s + +# GFX1251: v_add_f64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x02,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x02,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_add_nc_u64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x28,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x28,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_fmac_f64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x17,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x17,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_lshlrev_b64_e64_dpp v[4:5], v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x1f,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x1f,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_max_num_f64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x0e,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x0e,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_min_num_f64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x0d,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x0d,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_mul_f64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x06,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x06,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_sub_nc_u64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x29,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x29,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits