llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Shilei Tian (shiltian) <details> <summary>Changes</summary> Co-authored-by: Mekhanoshin, Stanislav <Stanislav.Mekhanoshin@<!-- -->amd.com> --- Patch is 70.29 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/148921.diff 22 Files Affected: - (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+4) - (modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (+2) - (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sqrt.bf16.ll (+33) - (added) llvm/test/CodeGen/AMDGPU/llvm.sqrt.bf16.ll (+106) - (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s (+45) - (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s (+48) - (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s (+56) - (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s (+60) - (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s (+12) - (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s (+16) - (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s (+45) - (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s (+48) - (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s (+56) - (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s (+60) - (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s (+16) - (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s (+20) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt (+63) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt (+59) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt (+16) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt (+64) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt (+60) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt (+20) ``````````diff diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 6cf2055c8e565..e2acde04f35e5 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -938,6 +938,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::BUILD_VECTOR, MVT::v2bf16, Legal); } + if (Subtarget->hasBF16TransInsts()) { + setOperationAction({ISD::FEXP2, ISD::FLOG2, ISD::FSQRT}, MVT::bf16, Legal); + } + if (Subtarget->hasCvtPkF16F32Inst()) { setOperationAction(ISD::FP_ROUND, {MVT::v2f16, MVT::v4f16, MVT::v8f16, MVT::v16f16}, diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 28f239ba8c396..e2f371079179d 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -530,6 +530,7 @@ defm V_COS_F16 : VOP1Inst_t16 <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; let SubtargetPredicate = HasBF16TransInsts in { defm V_TANH_BF16 : VOP1Inst_t16 <"v_tanh_bf16", VOP_BF16_BF16, int_amdgcn_tanh>; defm V_RCP_BF16 : VOP1Inst_t16 <"v_rcp_bf16", VOP_BF16_BF16, AMDGPUrcp>; +defm V_SQRT_BF16 : VOP1Inst_t16 <"v_sqrt_bf16", VOP_BF16_BF16, any_amdgcn_sqrt>; } } // End TRANS = 1, SchedRW = [WriteTrans32] defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>; @@ -1139,6 +1140,7 @@ defm V_CVT_PK_F16_BF8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x076>; defm V_CVT_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x077>; defm V_CVT_F16_BF8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x078>; defm V_RCP_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x079>; +defm V_SQRT_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07a>; //===----------------------------------------------------------------------===// // GFX10. diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sqrt.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sqrt.bf16.ll new file mode 100644 index 0000000000000..5287b5dba848f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sqrt.bf16.ll @@ -0,0 +1,33 @@ +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN %s +; xUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GCN %s + +; FIXME: GlobalISel does not work with bf16 + +declare bfloat @llvm.amdgcn.sqrt.bf16(bfloat) #0 + +; GCN-LABEL: {{^}}sqrt_bf16: +; GCN: v_sqrt_bf16_e32 {{v[0-9]+}}, {{s[0-9]+}} +define amdgpu_kernel void @sqrt_bf16(ptr addrspace(1) %out, bfloat %src) #1 { + %sqrt = call bfloat @llvm.amdgcn.sqrt.bf16(bfloat %src) #0 + store bfloat %sqrt, ptr addrspace(1) %out, align 2 + ret void +} + +; GCN-LABEL: {{^}}sqrt_bf16_constant_4 +; GCN: v_sqrt_bf16_e32 v0, 4.0 +define amdgpu_kernel void @sqrt_bf16_constant_4(ptr addrspace(1) %out) #1 { + %sqrt = call bfloat @llvm.amdgcn.sqrt.bf16(bfloat 4.0) #0 + store bfloat %sqrt, ptr addrspace(1) %out, align 2 + ret void +} + +; GCN-LABEL: {{^}}sqrt_bf16_constant_100 +; GCN: v_sqrt_bf16_e32 {{v[0-9]+}}, 0x42c8 +define amdgpu_kernel void @sqrt_bf16_constant_100(ptr addrspace(1) %out) #1 { + %sqrt = call bfloat @llvm.amdgcn.sqrt.bf16(bfloat 100.0) #0 + store bfloat %sqrt, ptr addrspace(1) %out, align 2 + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.sqrt.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.sqrt.bf16.ll new file mode 100644 index 0000000000000..5936d6aa86b82 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.sqrt.bf16.ll @@ -0,0 +1,106 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX12-TRUE16 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX12-FAKE16 %s + +; FIXME: t16 doesn't work at the moment because the store of s16 under t16 mode fails to select. + +declare bfloat @llvm.sqrt.bf16(bfloat %a) +declare <2 x bfloat> @llvm.sqrt.v2bf16(<2 x bfloat> %a) + +define amdgpu_kernel void @sqrt_bf16(ptr addrspace(1) %r, ptr addrspace(1) %a) { +; GFX12-TRUE16-LABEL: sqrt_bf16: +; GFX12-TRUE16: ; %bb.0: ; %entry +; GFX12-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 +; GFX12-TRUE16-NEXT: s_mov_b32 s6, -1 +; GFX12-TRUE16-NEXT: s_mov_b32 s7, 0x31016000 +; GFX12-TRUE16-NEXT: s_mov_b32 s10, s6 +; GFX12-TRUE16-NEXT: s_mov_b32 s11, s7 +; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-NEXT: s_mov_b32 s8, s2 +; GFX12-TRUE16-NEXT: s_mov_b32 s9, s3 +; GFX12-TRUE16-NEXT: s_mov_b32 s4, s0 +; GFX12-TRUE16-NEXT: buffer_load_u16 v0, off, s[8:11], null +; GFX12-TRUE16-NEXT: s_mov_b32 s5, s1 +; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 +; GFX12-TRUE16-NEXT: v_sqrt_bf16_e32 v0.l, v0.l +; GFX12-TRUE16-NEXT: buffer_store_b16 v0, off, s[4:7], null +; GFX12-TRUE16-NEXT: s_endpgm +; +; GFX12-FAKE16-LABEL: sqrt_bf16: +; GFX12-FAKE16: ; %bb.0: ; %entry +; GFX12-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 +; GFX12-FAKE16-NEXT: s_mov_b32 s6, -1 +; GFX12-FAKE16-NEXT: s_mov_b32 s7, 0x31016000 +; GFX12-FAKE16-NEXT: s_mov_b32 s10, s6 +; GFX12-FAKE16-NEXT: s_mov_b32 s11, s7 +; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-NEXT: s_mov_b32 s8, s2 +; GFX12-FAKE16-NEXT: s_mov_b32 s9, s3 +; GFX12-FAKE16-NEXT: s_mov_b32 s4, s0 +; GFX12-FAKE16-NEXT: buffer_load_u16 v0, off, s[8:11], null +; GFX12-FAKE16-NEXT: s_mov_b32 s5, s1 +; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 +; GFX12-FAKE16-NEXT: v_sqrt_bf16_e32 v0, v0 +; GFX12-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], null +; GFX12-FAKE16-NEXT: s_endpgm +entry: + %a.val = load bfloat, ptr addrspace(1) %a + %r.val = call bfloat @llvm.sqrt.bf16(bfloat %a.val) + store bfloat %r.val, ptr addrspace(1) %r + ret void +} + +define amdgpu_kernel void @sqrt_v2bf16(ptr addrspace(1) %r, ptr addrspace(1) %a) { +; GFX12-TRUE16-LABEL: sqrt_v2bf16: +; GFX12-TRUE16: ; %bb.0: ; %entry +; GFX12-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 +; GFX12-TRUE16-NEXT: s_mov_b32 s6, -1 +; GFX12-TRUE16-NEXT: s_mov_b32 s7, 0x31016000 +; GFX12-TRUE16-NEXT: s_mov_b32 s10, s6 +; GFX12-TRUE16-NEXT: s_mov_b32 s11, s7 +; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 +; GFX12-TRUE16-NEXT: s_mov_b32 s8, s2 +; GFX12-TRUE16-NEXT: s_mov_b32 s9, s3 +; GFX12-TRUE16-NEXT: s_mov_b32 s4, s0 +; GFX12-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], null +; GFX12-TRUE16-NEXT: s_mov_b32 s5, s1 +; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 +; GFX12-TRUE16-NEXT: v_sqrt_bf16_e32 v1.l, v0.l +; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_2) +; GFX12-TRUE16-NEXT: v_sqrt_bf16_e32 v0.l, v0.l +; GFX12-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX12-TRUE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1) +; GFX12-TRUE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1 +; GFX12-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], null +; GFX12-TRUE16-NEXT: s_endpgm +; +; GFX12-FAKE16-LABEL: sqrt_v2bf16: +; GFX12-FAKE16: ; %bb.0: ; %entry +; GFX12-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 +; GFX12-FAKE16-NEXT: s_mov_b32 s6, -1 +; GFX12-FAKE16-NEXT: s_mov_b32 s7, 0x31016000 +; GFX12-FAKE16-NEXT: s_mov_b32 s10, s6 +; GFX12-FAKE16-NEXT: s_mov_b32 s11, s7 +; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX12-FAKE16-NEXT: s_mov_b32 s8, s2 +; GFX12-FAKE16-NEXT: s_mov_b32 s9, s3 +; GFX12-FAKE16-NEXT: s_mov_b32 s4, s0 +; GFX12-FAKE16-NEXT: buffer_load_b32 v0, off, s[8:11], null +; GFX12-FAKE16-NEXT: s_mov_b32 s5, s1 +; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 +; GFX12-FAKE16-NEXT: v_sqrt_bf16_e32 v1, v0 +; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_2) +; GFX12-FAKE16-NEXT: v_sqrt_bf16_e32 v0, v0 +; GFX12-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX12-FAKE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1) +; GFX12-FAKE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1 +; GFX12-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], null +; GFX12-FAKE16-NEXT: s_endpgm +entry: + %a.val = load <2 x bfloat>, ptr addrspace(1) %a + %r.val = call <2 x bfloat> @llvm.sqrt.v2bf16(<2 x bfloat> %a.val) + store <2 x bfloat> %r.val, ptr addrspace(1) %r + ret void +} diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s index ce8f54a7ef9fc..c587b66e65011 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s @@ -118,6 +118,51 @@ v_rcp_bf16 v5, src_scc v_rcp_bf16 v127, 0x8000 // GFX1250: v_rcp_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xf2,0xfe,0x7e,0x00,0x80,0x00,0x00] +v_sqrt_bf16 v5, v1 +// GFX1250: v_sqrt_bf16_e32 v5, v1 ; encoding: [0x01,0xf5,0x0a,0x7e] + +v_sqrt_bf16 v5, v127 +// GFX1250: v_sqrt_bf16_e32 v5, v127 ; encoding: [0x7f,0xf5,0x0a,0x7e] + +v_sqrt_bf16 v5, s1 +// GFX1250: v_sqrt_bf16_e32 v5, s1 ; encoding: [0x01,0xf4,0x0a,0x7e] + +v_sqrt_bf16 v5, s105 +// GFX1250: v_sqrt_bf16_e32 v5, s105 ; encoding: [0x69,0xf4,0x0a,0x7e] + +v_sqrt_bf16 v5, vcc_lo +// GFX1250: v_sqrt_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xf4,0x0a,0x7e] + +v_sqrt_bf16 v5, vcc_hi +// GFX1250: v_sqrt_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xf4,0x0a,0x7e] + +v_sqrt_bf16 v5, ttmp15 +// GFX1250: v_sqrt_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xf4,0x0a,0x7e] + +v_sqrt_bf16 v5, m0 +// GFX1250: v_sqrt_bf16_e32 v5, m0 ; encoding: [0x7d,0xf4,0x0a,0x7e] + +v_sqrt_bf16 v5, exec_lo +// GFX1250: v_sqrt_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xf4,0x0a,0x7e] + +v_sqrt_bf16 v5, exec_hi +// GFX1250: v_sqrt_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xf4,0x0a,0x7e] + +v_sqrt_bf16 v5, null +// GFX1250: v_sqrt_bf16_e32 v5, null ; encoding: [0x7c,0xf4,0x0a,0x7e] + +v_sqrt_bf16 v5, -1 +// GFX1250: v_sqrt_bf16_e32 v5, -1 ; encoding: [0xc1,0xf4,0x0a,0x7e] + +v_sqrt_bf16 v5, 0.5 +// GFX1250: v_sqrt_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xf4,0x0a,0x7e] + +v_sqrt_bf16 v5, src_scc +// GFX1250: v_sqrt_bf16_e32 v5, src_scc ; encoding: [0xfd,0xf4,0x0a,0x7e] + +v_sqrt_bf16 v127, 0x8000 +// GFX1250: v_sqrt_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xf4,0xfe,0x7e,0x00,0x80,0x00,0x00] + v_cvt_f32_bf16 v5, v1 // GFX1250: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xe5,0x0a,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s index 7001a1f1c4622..719eb3abc02a3 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s @@ -124,6 +124,54 @@ v_rcp_bf16 v127, 0x8000 v_rcp_bf16 v5.h, v1.h // GFX1250: v_rcp_bf16_e32 v5.h, v1.h ; encoding: [0x81,0xf3,0x0a,0x7f] +v_sqrt_bf16 v5, v1 +// GFX1250: v_sqrt_bf16_e32 v5, v1 ; encoding: [0x01,0xf5,0x0a,0x7e] + +v_sqrt_bf16 v5, v127 +// GFX1250: v_sqrt_bf16_e32 v5, v127 ; encoding: [0x7f,0xf5,0x0a,0x7e] + +v_sqrt_bf16 v5, s1 +// GFX1250: v_sqrt_bf16_e32 v5, s1 ; encoding: [0x01,0xf4,0x0a,0x7e] + +v_sqrt_bf16 v5, s105 +// GFX1250: v_sqrt_bf16_e32 v5, s105 ; encoding: [0x69,0xf4,0x0a,0x7e] + +v_sqrt_bf16 v5, vcc_lo +// GFX1250: v_sqrt_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xf4,0x0a,0x7e] + +v_sqrt_bf16 v5, vcc_hi +// GFX1250: v_sqrt_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xf4,0x0a,0x7e] + +v_sqrt_bf16 v5, ttmp15 +// GFX1250: v_sqrt_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xf4,0x0a,0x7e] + +v_sqrt_bf16 v5, m0 +// GFX1250: v_sqrt_bf16_e32 v5, m0 ; encoding: [0x7d,0xf4,0x0a,0x7e] + +v_sqrt_bf16 v5, exec_lo +// GFX1250: v_sqrt_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xf4,0x0a,0x7e] + +v_sqrt_bf16 v5, exec_hi +// GFX1250: v_sqrt_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xf4,0x0a,0x7e] + +v_sqrt_bf16 v5, null +// GFX1250: v_sqrt_bf16_e32 v5, null ; encoding: [0x7c,0xf4,0x0a,0x7e] + +v_sqrt_bf16 v5, -1 +// GFX1250: v_sqrt_bf16_e32 v5, -1 ; encoding: [0xc1,0xf4,0x0a,0x7e] + +v_sqrt_bf16 v5, 0.5 +// GFX1250: v_sqrt_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xf4,0x0a,0x7e] + +v_sqrt_bf16 v5, src_scc +// GFX1250: v_sqrt_bf16_e32 v5, src_scc ; encoding: [0xfd,0xf4,0x0a,0x7e] + +v_sqrt_bf16 v127, 0x8000 +// GFX1250: v_sqrt_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xf4,0xfe,0x7e,0x00,0x80,0x00,0x00] + +v_sqrt_bf16 v5.h, v1.h +// GFX1250: v_sqrt_bf16_e32 v5.h, v1.h ; encoding: [0x81,0xf5,0x0a,0x7f] + v_cvt_f32_bf16 v5, v1 // GFX1250: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xe5,0x0a,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s index 3de8fc29bb01a..44859fcffe223 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s @@ -114,6 +114,62 @@ v_rcp_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi // GFX1250: v_rcp_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xf2,0xfe,0x7e,0x7f,0x6f,0x35,0x30] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_sqrt_bf16 v5, v1 quad_perm:[3,2,1,0] +// GFX1250: v_sqrt_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sqrt_bf16 v5, v1 quad_perm:[0,1,2,3] +// GFX1250: v_sqrt_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sqrt_bf16 v5, v1 row_mirror +// GFX1250: v_sqrt_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sqrt_bf16 v5, v1 row_half_mirror +// GFX1250: v_sqrt_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sqrt_bf16 v5, v1 row_shl:1 +// GFX1250: v_sqrt_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sqrt_bf16 v5, v1 row_shl:15 +// GFX1250: v_sqrt_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sqrt_bf16 v5, v1 row_shr:1 +// GFX1250: v_sqrt_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sqrt_bf16 v5, v1 row_shr:15 +// GFX1250: v_sqrt_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x1f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sqrt_bf16 v5, v1 row_ror:1 +// GFX1250: v_sqrt_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sqrt_bf16 v5, v1 row_ror:15 +// GFX1250: v_sqrt_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sqrt_bf16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_sqrt_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sqrt_bf16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250: v_sqrt_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x5f,0x01,0x01] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sqrt_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX1250: v_sqrt_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x60,0x09,0x13] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sqrt_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1250: v_sqrt_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xf4,0xfe,0x7e,0x7f,0x6f,0x35,0x30] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_f32_bf16 v5, v1 quad_perm:[3,2,1,0] // GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s index 4632b1574731b..8fef387700972 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s @@ -122,6 +122,66 @@ v_rcp_bf16 v5.h, v1.h quad_perm:[3,2,1,0] // GFX1250: v_rcp_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7f,0x81,0x1b,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_sqrt_bf16 v5, v1 quad_perm:[3,2,1,0] +// GFX1250: v_sqrt_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sqrt_bf16 v5, v1 quad_perm:[0,1,2,3] +// GFX1250: v_sqrt_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sqrt_bf16 v5, v1 row_mirror +// GFX1250: v_sqrt_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sqrt_bf16 v5, v1 row_half_mirror +// GFX1250: v_sqrt_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sqrt_bf16 v5, v1 row_shl:1 +// GFX1250: v_sqrt_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sqrt_bf16 v5, v1 row_shl:15 +// GFX1250: v_sqrt_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sqrt_bf16 v5, v1 row_shr:1 +// GFX1250: v_sqrt_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sqrt_bf16 v5, v1 row_shr:15 +// GFX1250: v_sqrt_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x1f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sqrt_bf16 v5, v1 row_ror:1 +// GFX1250: v_sqrt_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf4,0x0a,0x7e,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sqrt_bf16 v5, v1 row_ror:15 +// GFX1250: v_sqrt_bf16_dpp v5... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/148921 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits