Author: Shilei Tian Date: 2025-07-18T13:08:50-04:00 New Revision: 2c50e4cac2c50dbbc9eb3ed78bc0178bfa26d23f
URL: https://github.com/llvm/llvm-project/commit/2c50e4cac2c50dbbc9eb3ed78bc0178bfa26d23f DIFF: https://github.com/llvm/llvm-project/commit/2c50e4cac2c50dbbc9eb3ed78bc0178bfa26d23f.diff LOG: [AMDGPU] Add support for `v_sat_pk4_i4_[i8,u8]` on gfx1250 (#149528) Co-authored-by: Mekhanoshin, Stanislav <stanislav.mekhanos...@amd.com> Co-authored-by: Foad, Jay <jay.f...@amd.com> Added: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.sat.pk.ll Modified: clang/include/clang/Basic/BuiltinsAMDGPU.def clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl llvm/include/llvm/IR/IntrinsicsAMDGPU.td llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp llvm/lib/Target/AMDGPU/SIInstrInfo.td llvm/lib/Target/AMDGPU/VOP1Instructions.td llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt Removed: ################################################################################ diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index a916af7e0c2df..d4fef5d46af73 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -684,6 +684,8 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc", "gfx1250-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_bf8, "V2hs", "nc", "gfx1250-insts") +TARGET_BUILTIN(__builtin_amdgcn_sat_pk4_i4_i8, "UsUi", "nc", "gfx1250-insts") +TARGET_BUILTIN(__builtin_amdgcn_sat_pk4_u4_u8, "UsUi", "nc", "gfx1250-insts") // GFX1250 WMMA builtins TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x4_f32, "V8fIbV2fIbV2fIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32") diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl index 4c3f308a6cf75..a21862c4a9395 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl @@ -5,6 +5,7 @@ #pragma OPENCL EXTENSION cl_khr_fp16 : enable typedef unsigned int uint; +typedef unsigned short int ushort; typedef unsigned int __attribute__((ext_vector_type(2))) uint2; typedef half __attribute__((ext_vector_type(2))) half2; @@ -369,6 +370,30 @@ void test_cvt_pk_f16_bf8(global half2* out, short a) out[0] = __builtin_amdgcn_cvt_pk_f16_bf8(a); } +// CHECK-LABEL: @test_sat_pk4_i4_i8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_ADDR]] to ptr +// CHECK-NEXT: store ptr [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[SRC:%.*]], ptr [[SRC_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.amdgcn.sat.pk4.i4.i8(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i16 [[TMP1]], ptr [[TMP2]], align 2 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[SRC_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.amdgcn.sat.pk4.u4.u8(i32 [[TMP3]]) +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i16 [[TMP4]], ptr [[TMP5]], align 2 +// CHECK-NEXT: ret void +// +void test_sat_pk4_i4_i8(ushort *out, uint src) +{ + *out = __builtin_amdgcn_sat_pk4_i4_i8(src); + *out = __builtin_amdgcn_sat_pk4_u4_u8(src); +} + // CHECK-LABEL: @test_permlane16_swap( // CHECK-NEXT: entry: // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index d8fda0e2bcfa3..ecda6c4efefe3 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -3572,6 +3572,12 @@ def int_amdgcn_cvt_f16_bf8 : ClangBuiltin<"__builtin_amdgcn_cvt_f16_bf8">, [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>; +def int_amdgcn_sat_pk4_i4_i8 : ClangBuiltin<"__builtin_amdgcn_sat_pk4_i4_i8">, + DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; + +def int_amdgcn_sat_pk4_u4_u8 : ClangBuiltin<"__builtin_amdgcn_sat_pk4_u4_u8">, + DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; + //===----------------------------------------------------------------------===// // Special Intrinsics for backend internal use only. No frontend // should emit calls to these. diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index cbbb57c6f8122..bf2f37bddb9ed 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4558,6 +4558,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_cvt_pk_u16: case Intrinsic::amdgcn_cvt_pk_f16_fp8: case Intrinsic::amdgcn_cvt_pk_f16_bf8: + case Intrinsic::amdgcn_sat_pk4_i4_i8: + case Intrinsic::amdgcn_sat_pk4_u4_u8: case Intrinsic::amdgcn_fmed3: case Intrinsic::amdgcn_cubeid: case Intrinsic::amdgcn_cubema: diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index ab7d34002e9f1..9e1951e2946c4 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2850,6 +2850,7 @@ def VOP_F16_I16 : VOPProfile <[f16, i16, untyped, untyped]>; def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>; def VOP_I16_I16 : VOPProfile <[i16, i16, untyped, untyped]>; def VOP_BF16_BF16 : VOPProfile<[bf16, bf16, untyped, untyped]>; +def VOP1_I16_I32 : VOPProfile<[i16, i32, untyped, untyped]>; def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>; def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>; diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 80eb5d8b7d571..f621f8581f778 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -803,6 +803,9 @@ let SubtargetPredicate = isGFX1250Plus in { def : Cvt_F_F8_Pat_ByteSel<int_amdgcn_cvt_f16_fp8, V_CVT_F16_FP8_fake16_e64, 1>; def : Cvt_F_F8_Pat_ByteSel<int_amdgcn_cvt_f16_bf8, V_CVT_F16_BF8_fake16_e64, 1>; } + + defm V_SAT_PK4_I4_I8 : VOP1Inst_t16<"v_sat_pk4_i4_i8", VOP1_I16_I32, int_amdgcn_sat_pk4_i4_i8>; + defm V_SAT_PK4_U4_U8 : VOP1Inst_t16<"v_sat_pk4_u4_u8", VOP1_I16_I32, int_amdgcn_sat_pk4_u4_u8>; } // End SubtargetPredicate = isGFX1250Plus let SubtargetPredicate = isGFX10Plus in { @@ -1158,6 +1161,8 @@ defm V_PERMLANE16_SWAP_B32 : VOP1_Real_OpSelIsDPP_gfx1250<0x049>; defm V_TANH_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x04a>; defm V_PRNG_B32 : VOP1_Real_FULL<GFX1250Gen, 0x04b>; defm V_CVT_F32_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16", "V_CVT_F32_BF16_gfx1250">; +defm V_SAT_PK4_I4_I8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x073>; +defm V_SAT_PK4_U4_U8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x074>; defm V_CVT_PK_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>; defm V_CVT_PK_F16_BF8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x076>; defm V_CVT_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x077>; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.sat.pk.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.sat.pk.ll new file mode 100644 index 0000000000000..3a5507063b834 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.sat.pk.ll @@ -0,0 +1,305 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 %s -o - | FileCheck -check-prefix=SDAG-REAL16 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 %s -o - | FileCheck -check-prefix=SDAG-FAKE16 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 %s -o - | FileCheck -check-prefix=GISEL-REAL16 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 %s -o - | FileCheck -check-prefix=GISEL-FAKE16 %s + +declare i16 @llvm.amdgcn.sat.pk4.i4.i8(i32) #0 +declare i16 @llvm.amdgcn.sat.pk4.u4.u8(i32) #0 + +define amdgpu_kernel void @sat_pk4_i4_i8_f32_v(i32 %src, ptr %out) #1 { +; SDAG-REAL16-LABEL: sat_pk4_i4_i8_f32_v: +; SDAG-REAL16: ; %bb.0: +; SDAG-REAL16-NEXT: s_clause 0x1 +; SDAG-REAL16-NEXT: s_load_b32 s2, s[4:5], 0x0 +; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 +; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0 +; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0 +; SDAG-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, s2 +; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] +; SDAG-REAL16-NEXT: s_endpgm +; +; SDAG-FAKE16-LABEL: sat_pk4_i4_i8_f32_v: +; SDAG-FAKE16: ; %bb.0: +; SDAG-FAKE16-NEXT: s_clause 0x1 +; SDAG-FAKE16-NEXT: s_load_b32 s2, s[4:5], 0x0 +; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 +; SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0 +; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; SDAG-FAKE16-NEXT: v_sat_pk4_i4_i8_e32 v1, s2 +; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1] +; SDAG-FAKE16-NEXT: s_endpgm +; +; GISEL-REAL16-LABEL: sat_pk4_i4_i8_f32_v: +; GISEL-REAL16: ; %bb.0: +; GISEL-REAL16-NEXT: s_clause 0x1 +; GISEL-REAL16-NEXT: s_load_b32 s2, s[4:5], 0x0 +; GISEL-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 +; GISEL-REAL16-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-REAL16-NEXT: s_wait_kmcnt 0x0 +; GISEL-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, s2 +; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] +; GISEL-REAL16-NEXT: s_endpgm +; +; GISEL-FAKE16-LABEL: sat_pk4_i4_i8_f32_v: +; GISEL-FAKE16: ; %bb.0: +; GISEL-FAKE16-NEXT: s_clause 0x1 +; GISEL-FAKE16-NEXT: s_load_b32 s2, s[4:5], 0x0 +; GISEL-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 +; GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GISEL-FAKE16-NEXT: v_sat_pk4_i4_i8_e32 v0, s2 +; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1] +; GISEL-FAKE16-NEXT: s_endpgm + %cvt = call i16 @llvm.amdgcn.sat.pk4.i4.i8(i32 %src) #0 + store i16 %cvt, ptr %out, align 2 + ret void +} + +define amdgpu_kernel void @sat_pk4_i4_i8_f32_s(i32 inreg %src, ptr %out) #1 { +; SDAG-REAL16-LABEL: sat_pk4_i4_i8_f32_s: +; SDAG-REAL16: ; %bb.1: +; SDAG-REAL16-NEXT: s_load_b32 s8, s[4:5], 0x0 +; SDAG-REAL16-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-REAL16-NEXT: s_branch .LBB1_0 +; SDAG-REAL16-NEXT: .p2align 8 +; SDAG-REAL16-NEXT: ; %bb.2: +; SDAG-REAL16-NEXT: .LBB1_0: +; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 +; SDAG-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, s8 +; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0 +; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0 +; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] +; SDAG-REAL16-NEXT: s_endpgm +; +; SDAG-FAKE16-LABEL: sat_pk4_i4_i8_f32_s: +; SDAG-FAKE16: ; %bb.1: +; SDAG-FAKE16-NEXT: s_load_b32 s8, s[4:5], 0x0 +; SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-FAKE16-NEXT: s_branch .LBB1_0 +; SDAG-FAKE16-NEXT: .p2align 8 +; SDAG-FAKE16-NEXT: ; %bb.2: +; SDAG-FAKE16-NEXT: .LBB1_0: +; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 +; SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0 +; SDAG-FAKE16-NEXT: v_sat_pk4_i4_i8_e32 v1, s8 +; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1] +; SDAG-FAKE16-NEXT: s_endpgm +; +; GISEL-REAL16-LABEL: sat_pk4_i4_i8_f32_s: +; GISEL-REAL16: ; %bb.0: +; GISEL-REAL16-NEXT: s_clause 0x1 +; GISEL-REAL16-NEXT: s_load_b32 s2, s[4:5], 0x0 +; GISEL-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 +; GISEL-REAL16-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-REAL16-NEXT: s_wait_kmcnt 0x0 +; GISEL-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, s2 +; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] +; GISEL-REAL16-NEXT: s_endpgm +; +; GISEL-FAKE16-LABEL: sat_pk4_i4_i8_f32_s: +; GISEL-FAKE16: ; %bb.0: +; GISEL-FAKE16-NEXT: s_clause 0x1 +; GISEL-FAKE16-NEXT: s_load_b32 s2, s[4:5], 0x0 +; GISEL-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 +; GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GISEL-FAKE16-NEXT: v_sat_pk4_i4_i8_e32 v0, s2 +; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1] +; GISEL-FAKE16-NEXT: s_endpgm + %cvt = call i16 @llvm.amdgcn.sat.pk4.i4.i8(i32 %src) #0 + store i16 %cvt, ptr %out, align 2 + ret void +} + +define amdgpu_kernel void @sat_pk4_i4_i8_f32_i(ptr %out) #1 { +; SDAG-REAL16-LABEL: sat_pk4_i4_i8_f32_i: +; SDAG-REAL16: ; %bb.0: +; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; SDAG-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, 0x64 +; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0 +; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0 +; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] +; SDAG-REAL16-NEXT: s_endpgm +; +; SDAG-FAKE16-LABEL: sat_pk4_i4_i8_f32_i: +; SDAG-FAKE16: ; %bb.0: +; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0 +; SDAG-FAKE16-NEXT: v_sat_pk4_i4_i8_e32 v1, 0x64 +; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1] +; SDAG-FAKE16-NEXT: s_endpgm +; +; GISEL-REAL16-LABEL: sat_pk4_i4_i8_f32_i: +; GISEL-REAL16: ; %bb.0: +; GISEL-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; GISEL-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, 0x64 +; GISEL-REAL16-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-REAL16-NEXT: s_wait_kmcnt 0x0 +; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] +; GISEL-REAL16-NEXT: s_endpgm +; +; GISEL-FAKE16-LABEL: sat_pk4_i4_i8_f32_i: +; GISEL-FAKE16: ; %bb.0: +; GISEL-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; GISEL-FAKE16-NEXT: v_sat_pk4_i4_i8_e32 v0, 0x64 +; GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1] +; GISEL-FAKE16-NEXT: s_endpgm + %cvt = call i16 @llvm.amdgcn.sat.pk4.i4.i8(i32 100) #0 + store i16 %cvt, ptr %out, align 2 + ret void +} + +define amdgpu_kernel void @sat_pk4_u4_u8_f32_v(i32 %src, ptr %out) #1 { +; SDAG-REAL16-LABEL: sat_pk4_u4_u8_f32_v: +; SDAG-REAL16: ; %bb.0: +; SDAG-REAL16-NEXT: s_clause 0x1 +; SDAG-REAL16-NEXT: s_load_b32 s2, s[4:5], 0x0 +; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 +; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0 +; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0 +; SDAG-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, s2 +; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] +; SDAG-REAL16-NEXT: s_endpgm +; +; SDAG-FAKE16-LABEL: sat_pk4_u4_u8_f32_v: +; SDAG-FAKE16: ; %bb.0: +; SDAG-FAKE16-NEXT: s_clause 0x1 +; SDAG-FAKE16-NEXT: s_load_b32 s2, s[4:5], 0x0 +; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 +; SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0 +; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; SDAG-FAKE16-NEXT: v_sat_pk4_u4_u8_e32 v1, s2 +; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1] +; SDAG-FAKE16-NEXT: s_endpgm +; +; GISEL-REAL16-LABEL: sat_pk4_u4_u8_f32_v: +; GISEL-REAL16: ; %bb.0: +; GISEL-REAL16-NEXT: s_clause 0x1 +; GISEL-REAL16-NEXT: s_load_b32 s2, s[4:5], 0x0 +; GISEL-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 +; GISEL-REAL16-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-REAL16-NEXT: s_wait_kmcnt 0x0 +; GISEL-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, s2 +; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] +; GISEL-REAL16-NEXT: s_endpgm +; +; GISEL-FAKE16-LABEL: sat_pk4_u4_u8_f32_v: +; GISEL-FAKE16: ; %bb.0: +; GISEL-FAKE16-NEXT: s_clause 0x1 +; GISEL-FAKE16-NEXT: s_load_b32 s2, s[4:5], 0x0 +; GISEL-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 +; GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GISEL-FAKE16-NEXT: v_sat_pk4_u4_u8_e32 v0, s2 +; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1] +; GISEL-FAKE16-NEXT: s_endpgm + %cvt = call i16 @llvm.amdgcn.sat.pk4.u4.u8(i32 %src) #0 + store i16 %cvt, ptr %out, align 2 + ret void +} + +define amdgpu_kernel void @sat_pk4_u4_u8_f32_s(i32 inreg %src, ptr %out) #1 { +; SDAG-REAL16-LABEL: sat_pk4_u4_u8_f32_s: +; SDAG-REAL16: ; %bb.1: +; SDAG-REAL16-NEXT: s_load_b32 s8, s[4:5], 0x0 +; SDAG-REAL16-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-REAL16-NEXT: s_branch .LBB4_0 +; SDAG-REAL16-NEXT: .p2align 8 +; SDAG-REAL16-NEXT: ; %bb.2: +; SDAG-REAL16-NEXT: .LBB4_0: +; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 +; SDAG-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, s8 +; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0 +; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0 +; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] +; SDAG-REAL16-NEXT: s_endpgm +; +; SDAG-FAKE16-LABEL: sat_pk4_u4_u8_f32_s: +; SDAG-FAKE16: ; %bb.1: +; SDAG-FAKE16-NEXT: s_load_b32 s8, s[4:5], 0x0 +; SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-FAKE16-NEXT: s_branch .LBB4_0 +; SDAG-FAKE16-NEXT: .p2align 8 +; SDAG-FAKE16-NEXT: ; %bb.2: +; SDAG-FAKE16-NEXT: .LBB4_0: +; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 +; SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0 +; SDAG-FAKE16-NEXT: v_sat_pk4_u4_u8_e32 v1, s8 +; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1] +; SDAG-FAKE16-NEXT: s_endpgm +; +; GISEL-REAL16-LABEL: sat_pk4_u4_u8_f32_s: +; GISEL-REAL16: ; %bb.0: +; GISEL-REAL16-NEXT: s_clause 0x1 +; GISEL-REAL16-NEXT: s_load_b32 s2, s[4:5], 0x0 +; GISEL-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 +; GISEL-REAL16-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-REAL16-NEXT: s_wait_kmcnt 0x0 +; GISEL-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, s2 +; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] +; GISEL-REAL16-NEXT: s_endpgm +; +; GISEL-FAKE16-LABEL: sat_pk4_u4_u8_f32_s: +; GISEL-FAKE16: ; %bb.0: +; GISEL-FAKE16-NEXT: s_clause 0x1 +; GISEL-FAKE16-NEXT: s_load_b32 s2, s[4:5], 0x0 +; GISEL-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 +; GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GISEL-FAKE16-NEXT: v_sat_pk4_u4_u8_e32 v0, s2 +; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1] +; GISEL-FAKE16-NEXT: s_endpgm + %cvt = call i16 @llvm.amdgcn.sat.pk4.u4.u8(i32 %src) #0 + store i16 %cvt, ptr %out, align 2 + ret void +} + +define amdgpu_kernel void @sat_pk4_u4_u8_f32_i(ptr %out) #1 { +; SDAG-REAL16-LABEL: sat_pk4_u4_u8_f32_i: +; SDAG-REAL16: ; %bb.0: +; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; SDAG-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, 0x64 +; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0 +; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0 +; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] +; SDAG-REAL16-NEXT: s_endpgm +; +; SDAG-FAKE16-LABEL: sat_pk4_u4_u8_f32_i: +; SDAG-FAKE16: ; %bb.0: +; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0 +; SDAG-FAKE16-NEXT: v_sat_pk4_u4_u8_e32 v1, 0x64 +; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1] +; SDAG-FAKE16-NEXT: s_endpgm +; +; GISEL-REAL16-LABEL: sat_pk4_u4_u8_f32_i: +; GISEL-REAL16: ; %bb.0: +; GISEL-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; GISEL-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, 0x64 +; GISEL-REAL16-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-REAL16-NEXT: s_wait_kmcnt 0x0 +; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] +; GISEL-REAL16-NEXT: s_endpgm +; +; GISEL-FAKE16-LABEL: sat_pk4_u4_u8_f32_i: +; GISEL-FAKE16: ; %bb.0: +; GISEL-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; GISEL-FAKE16-NEXT: v_sat_pk4_u4_u8_e32 v0, 0x64 +; GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1] +; GISEL-FAKE16-NEXT: s_endpgm + %cvt = call i16 @llvm.amdgcn.sat.pk4.u4.u8(i32 100) #0 + store i16 %cvt, ptr %out, align 2 + ret void +} + +attributes #0 = { nounwind memory(none) } +attributes #1 = { nounwind } diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s index f2cf3d58fb0cf..811c6ebfe0161 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s @@ -628,6 +628,57 @@ v_cvt_f32_fp8_e32 v1, 3 v_cvt_f32_fp8_e32 v1, v3 // GFX1250: v_cvt_f32_fp8_e32 v1, v3 ; encoding: [0x03,0xd9,0x02,0x7e] +v_cvt_pk_f32_bf8_e32 v[2:3], s3 +// GFX1250: v_cvt_pk_f32_bf8_e32 v[2:3], s3 ; encoding: [0x03,0xde,0x04,0x7e] + +v_cvt_pk_f32_bf8_e32 v[4:5], s5 +// GFX1250: v_cvt_pk_f32_bf8_e32 v[4:5], s5 ; encoding: [0x05,0xde,0x08,0x7e] + +v_cvt_pk_f32_bf8_e32 v[2:3], 3 +// GFX1250: v_cvt_pk_f32_bf8_e32 v[2:3], 3 ; encoding: [0x83,0xde,0x04,0x7e] + +v_cvt_pk_f32_bf8_e32 v[4:5], 3 +// GFX1250: v_cvt_pk_f32_bf8_e32 v[4:5], 3 ; encoding: [0x83,0xde,0x08,0x7e] + +v_cvt_pk_f32_bf8_e32 v[2:3], v3 +// GFX1250: v_cvt_pk_f32_bf8_e32 v[2:3], v3 ; encoding: [0x03,0xdf,0x04,0x7e] + +v_cvt_pk_f32_bf8_e32 v[4:5], v3 +// GFX1250: v_cvt_pk_f32_bf8_e32 v[4:5], v3 ; encoding: [0x03,0xdf,0x08,0x7e] + +v_cvt_pk_f32_fp8_e32 v[2:3], s3 +// GFX1250: v_cvt_pk_f32_fp8_e32 v[2:3], s3 ; encoding: [0x03,0xdc,0x04,0x7e] + +v_cvt_pk_f32_fp8_e32 v[2:3], 3 +// GFX1250: v_cvt_pk_f32_fp8_e32 v[2:3], 3 ; encoding: [0x83,0xdc,0x04,0x7e] + +v_cvt_pk_f32_fp8_e32 v[2:3], v3 +// GFX1250: v_cvt_pk_f32_fp8_e32 v[2:3], v3 ; encoding: [0x03,0xdd,0x04,0x7e] + +v_sat_pk4_i4_i8 v1, v2 +// GFX1250: v_sat_pk4_i4_i8_e32 v1, v2 ; encoding: [0x02,0xe7,0x02,0x7e] + +v_sat_pk4_i4_i8 v1, s2 +// GFX1250: v_sat_pk4_i4_i8_e32 v1, s2 ; encoding: [0x02,0xe6,0x02,0x7e] + +v_sat_pk4_i4_i8 v1, 2 +// GFX1250: v_sat_pk4_i4_i8_e32 v1, 2 ; encoding: [0x82,0xe6,0x02,0x7e] + +v_sat_pk4_i4_i8 v1, 0x1234 +// GFX1250: v_sat_pk4_i4_i8_e32 v1, 0x1234 ; encoding: [0xff,0xe6,0x02,0x7e,0x34,0x12,0x00,0x00] + +v_sat_pk4_u4_u8 v1, v2 +// GFX1250: v_sat_pk4_u4_u8_e32 v1, v2 ; encoding: [0x02,0xe9,0x02,0x7e] + +v_sat_pk4_u4_u8 v1, s2 +// GFX1250: v_sat_pk4_u4_u8_e32 v1, s2 ; encoding: [0x02,0xe8,0x02,0x7e] + +v_sat_pk4_u4_u8 v1, 2 +// GFX1250: v_sat_pk4_u4_u8_e32 v1, 2 ; encoding: [0x82,0xe8,0x02,0x7e] + +v_sat_pk4_u4_u8 v1, 0x1234 +// GFX1250: v_sat_pk4_u4_u8_e32 v1, 0x1234 ; encoding: [0xff,0xe8,0x02,0x7e,0x34,0x12,0x00,0x00] + v_permlane16_swap_b32 v1, v2 // GFX1250: v_permlane16_swap_b32_e32 v1, v2 ; encoding: [0x02,0x93,0x02,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s index b1c4dc62edd6d..3ddbc365224db 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s @@ -664,6 +664,36 @@ v_cvt_f32_fp8_e32 v1, 3 v_cvt_f32_fp8_e32 v1, v3 // GFX1250: v_cvt_f32_fp8_e32 v1, v3 ; encoding: [0x03,0xd9,0x02,0x7e] +v_sat_pk4_i4_i8 v1, v2 +// GFX1250: v_sat_pk4_i4_i8_e32 v1, v2 ; encoding: [0x02,0xe7,0x02,0x7e] + +v_sat_pk4_i4_i8 v1, s2 +// GFX1250: v_sat_pk4_i4_i8_e32 v1, s2 ; encoding: [0x02,0xe6,0x02,0x7e] + +v_sat_pk4_i4_i8 v1, 2 +// GFX1250: v_sat_pk4_i4_i8_e32 v1, 2 ; encoding: [0x82,0xe6,0x02,0x7e] + +v_sat_pk4_i4_i8 v1, 0x1234 +// GFX1250: v_sat_pk4_i4_i8_e32 v1, 0x1234 ; encoding: [0xff,0xe6,0x02,0x7e,0x34,0x12,0x00,0x00] + +v_sat_pk4_i4_i8 v1.h, v2 +// GFX1250: v_sat_pk4_i4_i8_e32 v1.h, v2 ; encoding: [0x02,0xe7,0x02,0x7f] + +v_sat_pk4_u4_u8 v1, v2 +// GFX1250: v_sat_pk4_u4_u8_e32 v1, v2 ; encoding: [0x02,0xe9,0x02,0x7e] + +v_sat_pk4_u4_u8 v1, s2 +// GFX1250: v_sat_pk4_u4_u8_e32 v1, s2 ; encoding: [0x02,0xe8,0x02,0x7e] + +v_sat_pk4_u4_u8 v1, 2 +// GFX1250: v_sat_pk4_u4_u8_e32 v1, 2 ; encoding: [0x82,0xe8,0x02,0x7e] + +v_sat_pk4_u4_u8 v1, 0x1234 +// GFX1250: v_sat_pk4_u4_u8_e32 v1, 0x1234 ; encoding: [0xff,0xe8,0x02,0x7e,0x34,0x12,0x00,0x00] + +v_sat_pk4_u4_u8 v1.h, v2 +// GFX1250: v_sat_pk4_u4_u8_e32 v1.h, v2 ; encoding: [0x02,0xe9,0x02,0x7f] + v_permlane16_swap_b32 v1, v2 // GFX1250: v_permlane16_swap_b32_e32 v1, v2 ; encoding: [0x02,0x93,0x02,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s index e1cd2e3043693..7386df87f8dab 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s @@ -693,3 +693,19 @@ v_cvt_pk_f16_bf8 v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 v_cvt_pk_f16_fp8 v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 // GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_i4_i8 v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf +// GFX1250: v_sat_pk4_i4_i8_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe6,0x02,0x7e,0x02,0x39,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_i4_i8 v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf fi:1 +// GFX1250: v_sat_pk4_i4_i8_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xe6,0x02,0x7e,0x02,0x39,0x04,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_u4_u8 v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf +// GFX1250: v_sat_pk4_u4_u8_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe8,0x02,0x7e,0x02,0x39,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_u4_u8 v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf fi:1 +// GFX1250: v_sat_pk4_u4_u8_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xe8,0x02,0x7e,0x02,0x39,0x04,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s index c1d3238b65cbd..0a46f2f074e10 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s @@ -749,3 +749,27 @@ v_cvt_pk_f16_fp8 v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 v_cvt_pk_f16_fp8 v1, v2.h quad_perm:[0,1,2,3] // GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2.h quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xea,0x02,0x7e,0x82,0xe4,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_i4_i8 v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf +// GFX1250: v_sat_pk4_i4_i8_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe6,0x02,0x7e,0x02,0x39,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_i4_i8 v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf fi:1 +// GFX1250: v_sat_pk4_i4_i8_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xe6,0x02,0x7e,0x02,0x39,0x04,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_i4_i8 v1.h, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf +// GFX1250: v_sat_pk4_i4_i8_dpp v1.h, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe6,0x02,0x7f,0x02,0x39,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_u4_u8 v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf +// GFX1250: v_sat_pk4_u4_u8_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe8,0x02,0x7e,0x02,0x39,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_u4_u8 v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf fi:1 +// GFX1250: v_sat_pk4_u4_u8_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xe8,0x02,0x7e,0x02,0x39,0x04,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_u4_u8 v1.h, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf +// GFX1250: v_sat_pk4_u4_u8_dpp v1.h, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe8,0x02,0x7f,0x02,0x39,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s index 100e9f92ff58b..e2763090a8d15 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s @@ -169,3 +169,19 @@ v_cvt_pk_f16_bf8 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cvt_pk_f16_fp8 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xea,0x02,0x7e,0x02,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_i4_i8 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_sat_pk4_i4_i8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe6,0x02,0x7e,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_i4_i8 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_sat_pk4_i4_i8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xe6,0x02,0x7e,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_u4_u8 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_sat_pk4_u4_u8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe8,0x02,0x7e,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_u4_u8 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_sat_pk4_u4_u8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xe8,0x02,0x7e,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s index 2ae103545443c..359aadc49ccc4 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s @@ -225,3 +225,27 @@ v_cvt_pk_f16_fp8 v1, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cvt_pk_f16_fp8 v1, v2.h dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xea,0x02,0x7e,0x82,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_i4_i8 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_sat_pk4_i4_i8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe6,0x02,0x7e,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_i4_i8 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_sat_pk4_i4_i8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xe6,0x02,0x7e,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_i4_i8 v1.h, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_sat_pk4_i4_i8_dpp v1.h, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe6,0x02,0x7f,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_u4_u8 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_sat_pk4_u4_u8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe8,0x02,0x7e,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_u4_u8 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_sat_pk4_u4_u8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xe8,0x02,0x7e,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_u4_u8 v1.h, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_sat_pk4_u4_u8_dpp v1.h, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe8,0x02,0x7f,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s index 6b45930a53d73..aa4e49d85f1ff 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s @@ -721,6 +721,30 @@ v_cvt_pk_f16_fp8 v1, v150 op_sel:[1] v_cvt_pk_f16_fp8 v1, s2 op_sel:[1] // GFX1250: v_cvt_pk_f16_fp8 v1, s2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x02,0x00,0x00,0x00] +v_sat_pk4_i4_i8 v150, v2 +// GFX1250: v_sat_pk4_i4_i8_e64 v150, v2 ; encoding: [0x96,0x00,0xf3,0xd5,0x02,0x01,0x00,0x00] + +v_sat_pk4_i4_i8 v150, s2 +// GFX1250: v_sat_pk4_i4_i8_e64 v150, s2 ; encoding: [0x96,0x00,0xf3,0xd5,0x02,0x00,0x00,0x00] + +v_sat_pk4_i4_i8 v150, 2 +// GFX1250: v_sat_pk4_i4_i8_e64 v150, 2 ; encoding: [0x96,0x00,0xf3,0xd5,0x82,0x00,0x00,0x00] + +v_sat_pk4_i4_i8 v150, 0x1234 +// GFX1250: v_sat_pk4_i4_i8_e64 v150, 0x1234 ; encoding: [0x96,0x00,0xf3,0xd5,0xff,0x00,0x00,0x00,0x34,0x12,0x00,0x00] + +v_sat_pk4_u4_u8 v150, v2 +// GFX1250: v_sat_pk4_u4_u8_e64 v150, v2 ; encoding: [0x96,0x00,0xf4,0xd5,0x02,0x01,0x00,0x00] + +v_sat_pk4_u4_u8 v150, s2 +// GFX1250: v_sat_pk4_u4_u8_e64 v150, s2 ; encoding: [0x96,0x00,0xf4,0xd5,0x02,0x00,0x00,0x00] + +v_sat_pk4_u4_u8 v150, 2 +// GFX1250: v_sat_pk4_u4_u8_e64 v150, 2 ; encoding: [0x96,0x00,0xf4,0xd5,0x82,0x00,0x00,0x00] + +v_sat_pk4_u4_u8 v150, 0x1234 +// GFX1250: v_sat_pk4_u4_u8_e64 v150, 0x1234 ; encoding: [0x96,0x00,0xf4,0xd5,0xff,0x00,0x00,0x00,0x34,0x12,0x00,0x00] + v_permlane16_swap_b32_e64 v1, v2 // GFX1250: v_permlane16_swap_b32_e64 v1, v2 ; encoding: [0x01,0x00,0xc9,0xd5,0x02,0x01,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s index ad00832f7543d..8f0c43de07077 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s @@ -751,6 +751,36 @@ v_cvt_pk_f16_fp8 v1, v150 op_sel:[1] v_cvt_pk_f16_fp8 v1, s2 op_sel:[1] // GFX1250: v_cvt_pk_f16_fp8 v1, s2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x02,0x00,0x00,0x00] +v_sat_pk4_i4_i8 v150, v2 +// GFX1250: v_sat_pk4_i4_i8_e64 v150, v2 ; encoding: [0x96,0x00,0xf3,0xd5,0x02,0x01,0x00,0x00] + +v_sat_pk4_i4_i8 v150, s2 +// GFX1250: v_sat_pk4_i4_i8_e64 v150, s2 ; encoding: [0x96,0x00,0xf3,0xd5,0x02,0x00,0x00,0x00] + +v_sat_pk4_i4_i8 v150, 2 +// GFX1250: v_sat_pk4_i4_i8_e64 v150, 2 ; encoding: [0x96,0x00,0xf3,0xd5,0x82,0x00,0x00,0x00] + +v_sat_pk4_i4_i8 v150, 0x1234 +// GFX1250: v_sat_pk4_i4_i8_e64 v150, 0x1234 ; encoding: [0x96,0x00,0xf3,0xd5,0xff,0x00,0x00,0x00,0x34,0x12,0x00,0x00] + +v_sat_pk4_i4_i8 v150.h, v2 +// GFX1250: v_sat_pk4_i4_i8_e64 v150.h, v2 op_sel:[0,1] ; encoding: [0x96,0x40,0xf3,0xd5,0x02,0x01,0x00,0x00] + +v_sat_pk4_u4_u8 v150, v2 +// GFX1250: v_sat_pk4_u4_u8_e64 v150, v2 ; encoding: [0x96,0x00,0xf4,0xd5,0x02,0x01,0x00,0x00] + +v_sat_pk4_u4_u8 v150, s2 +// GFX1250: v_sat_pk4_u4_u8_e64 v150, s2 ; encoding: [0x96,0x00,0xf4,0xd5,0x02,0x00,0x00,0x00] + +v_sat_pk4_u4_u8 v150, 2 +// GFX1250: v_sat_pk4_u4_u8_e64 v150, 2 ; encoding: [0x96,0x00,0xf4,0xd5,0x82,0x00,0x00,0x00] + +v_sat_pk4_u4_u8 v150, 0x1234 +// GFX1250: v_sat_pk4_u4_u8_e64 v150, 0x1234 ; encoding: [0x96,0x00,0xf4,0xd5,0xff,0x00,0x00,0x00,0x34,0x12,0x00,0x00] + +v_sat_pk4_u4_u8 v150.h, v2 +// GFX1250: v_sat_pk4_u4_u8_e64 v150.h, v2 op_sel:[0,1] ; encoding: [0x96,0x40,0xf4,0xd5,0x02,0x01,0x00,0x00] + v_permlane16_swap_b32_e64 v1, v2 // GFX1250: v_permlane16_swap_b32_e64 v1, v2 ; encoding: [0x01,0x00,0xc9,0xd5,0x02,0x01,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s index 29bb842b529b7..b21fca654590a 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s @@ -717,3 +717,19 @@ v_cvt_pk_f16_fp8 v1, v128 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 v_cvt_pk_f16_fp8 v1, v2 op_sel:[1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 // GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v2 op_sel:[1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x02,0xe4,0x04,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_i4_i8 v150, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf +// GFX1250: v_sat_pk4_i4_i8_e64_dpp v150, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x96,0x00,0xf3,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_i4_i8 v150, v2 row_share:1 fi:1 +// GFX1250: v_sat_pk4_i4_i8_e64_dpp v150, v2 row_share:1 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x96,0x00,0xf3,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x05,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_u4_u8 v150, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf +// GFX1250: v_sat_pk4_u4_u8_e64_dpp v150, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x96,0x00,0xf4,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_u4_u8 v150, v2 row_share:1 fi:1 +// GFX1250: v_sat_pk4_u4_u8_e64_dpp v150, v2 row_share:1 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x96,0x00,0xf4,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x05,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s index 7df92751c38d1..f14705fa9143c 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s @@ -765,3 +765,27 @@ v_cvt_pk_f16_fp8 v1, v128.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 v_cvt_pk_f16_fp8 v1, v128.h quad_perm:[0,1,2,3] // GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v128.h op_sel:[1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x08,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_i4_i8 v150, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf +// GFX1250: v_sat_pk4_i4_i8_e64_dpp v150, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x96,0x00,0xf3,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_i4_i8 v150, v2 row_share:1 fi:1 +// GFX1250: v_sat_pk4_i4_i8_e64_dpp v150, v2 row_share:1 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x96,0x00,0xf3,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x05,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_i4_i8 v150.h, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf +// GFX1250: v_sat_pk4_i4_i8_e64_dpp v150.h, v2 op_sel:[0,1] quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x96,0x40,0xf3,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_u4_u8 v150, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf +// GFX1250: v_sat_pk4_u4_u8_e64_dpp v150, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x96,0x00,0xf4,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_u4_u8 v150, v2 row_share:1 fi:1 +// GFX1250: v_sat_pk4_u4_u8_e64_dpp v150, v2 row_share:1 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x96,0x00,0xf4,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x05,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_u4_u8 v150.h, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf +// GFX1250: v_sat_pk4_u4_u8_e64_dpp v150.h, v2 op_sel:[0,1] quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x96,0x40,0xf4,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s index d235aeb9f3e62..b2c2943e2a182 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s @@ -253,3 +253,19 @@ v_cvt_pk_f16_fp8 v1, v128 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cvt_pk_f16_fp8 v1, v2 op_sel:[1] dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v2 op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_i4_i8 v150, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_sat_pk4_i4_i8_e64_dpp v150, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x96,0x00,0xf3,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_i4_i8 v150, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_sat_pk4_i4_i8_e64_dpp v150, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x96,0x00,0xf3,0xd5,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_u4_u8 v150, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_sat_pk4_u4_u8_e64_dpp v150, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x96,0x00,0xf4,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_u4_u8 v150, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_sat_pk4_u4_u8_e64_dpp v150, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x96,0x00,0xf4,0xd5,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s index f25e2a5882436..e3c7c0f8cbc81 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s @@ -301,3 +301,27 @@ v_cvt_pk_f16_fp8 v1, v128.l dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cvt_pk_f16_fp8 v1, v128.h dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v128.h op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_i4_i8 v150, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_sat_pk4_i4_i8_e64_dpp v150, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x96,0x00,0xf3,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_i4_i8 v150, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_sat_pk4_i4_i8_e64_dpp v150, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x96,0x00,0xf3,0xd5,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_i4_i8 v150.h, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_sat_pk4_i4_i8_e64_dpp v150.h, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x96,0x40,0xf3,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_u4_u8 v150, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_sat_pk4_u4_u8_e64_dpp v150, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x96,0x00,0xf4,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_u4_u8 v150, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_sat_pk4_u4_u8_e64_dpp v150, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x96,0x00,0xf4,0xd5,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_sat_pk4_u4_u8 v150.h, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_sat_pk4_u4_u8_e64_dpp v150.h, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x96,0x40,0xf4,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt index aa968b2bb2bee..5b905820844af 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt @@ -809,3 +809,45 @@ 0x03,0xd9,0x02,0x7e # GFX1250: v_cvt_f32_fp8_e32 v1, v3 ; encoding: [0x03,0xd9,0x02,0x7e] + +0xff,0xe6,0x02,0x7e,0x34,0x12,0x00,0x00 +# GFX1250-REAL16: v_sat_pk4_i4_i8_e32 v1.l, 0x1234 ; encoding: [0xff,0xe6,0x02,0x7e,0x34,0x12,0x00,0x00] +# GFX1250-FAKE16: v_sat_pk4_i4_i8_e32 v1, 0x1234 ; encoding: [0xff,0xe6,0x02,0x7e,0x34,0x12,0x00,0x00] + +0x82,0xe6,0x02,0x7e +# GFX1250-REAL16: v_sat_pk4_i4_i8_e32 v1.l, 2 ; encoding: [0x82,0xe6,0x02,0x7e] +# GFX1250-FAKE16: v_sat_pk4_i4_i8_e32 v1, 2 ; encoding: [0x82,0xe6,0x02,0x7e] + +0x02,0xe6,0x02,0x7e +# GFX1250-REAL16: v_sat_pk4_i4_i8_e32 v1.l, s2 ; encoding: [0x02,0xe6,0x02,0x7e] +# GFX1250-FAKE16: v_sat_pk4_i4_i8_e32 v1, s2 ; encoding: [0x02,0xe6,0x02,0x7e] + +0x02,0xe7,0x02,0x7e +# GFX1250-REAL16: v_sat_pk4_i4_i8_e32 v1.l, v2 ; encoding: [0x02,0xe7,0x02,0x7e] +# GFX1250-FAKE16: v_sat_pk4_i4_i8_e32 v1, v2 ; encoding: [0x02,0xe7,0x02,0x7e] + +0x02,0xe7,0x02,0x7f +# GFX1250-REAL16: v_sat_pk4_i4_i8_e32 v1.h, v2 ; encoding: [0x02,0xe7,0x02,0x7f] + +0xff,0xe8,0x02,0x7e,0x34,0x12,0x00,0x00 +# GFX1250-REAL16: v_sat_pk4_u4_u8_e32 v1.l, 0x1234 ; encoding: [0xff,0xe8,0x02,0x7e,0x34,0x12,0x00,0x00] +# GFX1250-FAKE16: v_sat_pk4_u4_u8_e32 v1, 0x1234 ; encoding: [0xff,0xe8,0x02,0x7e,0x34,0x12,0x00,0x00] + +0x82,0xe8,0x02,0x7e +# GFX1250-REAL16: v_sat_pk4_u4_u8_e32 v1.l, 2 ; encoding: [0x82,0xe8,0x02,0x7e] +# GFX1250-FAKE16: v_sat_pk4_u4_u8_e32 v1, 2 ; encoding: [0x82,0xe8,0x02,0x7e] + +0x02,0xe8,0x02,0x7e +# GFX1250-REAL16: v_sat_pk4_u4_u8_e32 v1.l, s2 ; encoding: [0x02,0xe8,0x02,0x7e] +# GFX1250-FAKE16: v_sat_pk4_u4_u8_e32 v1, s2 ; encoding: [0x02,0xe8,0x02,0x7e] + +0x02,0xe9,0x02,0x7e +# GFX1250-REAL16: v_sat_pk4_u4_u8_e32 v1.l, v2 ; encoding: [0x02,0xe9,0x02,0x7e] +# GFX1250-FAKE16: v_sat_pk4_u4_u8_e32 v1, v2 ; encoding: [0x02,0xe9,0x02,0x7e] + +0xff,0xe8,0x02,0x7e,0x34,0x12,0x00,0x00 +# GFX1250-REAL16: v_sat_pk4_u4_u8_e32 v1.l, 0x1234 ; encoding: [0xff,0xe8,0x02,0x7e,0x34,0x12,0x00,0x00] +# GFX1250-FAKE16: v_sat_pk4_u4_u8_e32 v1, 0x1234 ; encoding: [0xff,0xe8,0x02,0x7e,0x34,0x12,0x00,0x00] + +0x02,0xe9,0x02,0x7f +# GFX1250-REAL16: v_sat_pk4_u4_u8_e32 v1.h, v2 ; encoding: [0x02,0xe9,0x02,0x7f] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt index 913a2a916ff62..c12ecb8d868aa 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt @@ -708,3 +708,25 @@ 0xfa,0xea,0x02,0x7e,0x82,0xe4,0x00,0xff # GFX1250-REAL16: v_cvt_pk_f16_fp8_dpp v1, v2.h quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xea,0x02,0x7e,0x82,0xe4,0x00,0xff] # GFX1250-FAKE16: v_cvt_pk_f16_fp8_dpp v1, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xea,0x02,0x7e,0x82,0xe4,0x00,0xff] + +0xfa,0xe6,0x02,0x7e,0x02,0x39,0x00,0xff +# GFX1250-REAL16: v_sat_pk4_i4_i8_dpp v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe6,0x02,0x7e,0x02,0x39,0x00,0xff] +# GFX1250-FAKE16: v_sat_pk4_i4_i8_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe6,0x02,0x7e,0x02,0x39,0x00,0xff] + +0xfa,0xe6,0x02,0x7e,0x02,0x39,0x04,0xff +# GFX1250-REAL16: v_sat_pk4_i4_i8_dpp v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xe6,0x02,0x7e,0x02,0x39,0x04,0xff] +# GFX1250-FAKE16: v_sat_pk4_i4_i8_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xe6,0x02,0x7e,0x02,0x39,0x04,0xff] + +0xfa,0xe6,0x02,0x7f,0x02,0x39,0x00,0xff +# GFX1250-REAL16: v_sat_pk4_i4_i8_dpp v1.h, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe6,0x02,0x7f,0x02,0x39,0x00,0xff] + +0xfa,0xe8,0x02,0x7e,0x02,0x39,0x00,0xff +# GFX1250-REAL16: v_sat_pk4_u4_u8_dpp v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe8,0x02,0x7e,0x02,0x39,0x00,0xff] +# GFX1250-FAKE16: v_sat_pk4_u4_u8_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe8,0x02,0x7e,0x02,0x39,0x00,0xff] + +0xfa,0xe8,0x02,0x7e,0x02,0x39,0x04,0xff +# GFX1250-REAL16: v_sat_pk4_u4_u8_dpp v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xe8,0x02,0x7e,0x02,0x39,0x04,0xff] +# GFX1250-FAKE16: v_sat_pk4_u4_u8_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xe8,0x02,0x7e,0x02,0x39,0x04,0xff] + +0xfa,0xe8,0x02,0x7f,0x02,0x39,0x00,0xff +# GFX1250-REAL16: v_sat_pk4_u4_u8_dpp v1.h, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe8,0x02,0x7f,0x02,0x39,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt index 4afe44e241bf3..d3706f975e914 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt @@ -208,3 +208,25 @@ 0xe9,0xea,0x02,0x7e,0x82,0x77,0x39,0x05 # GFX1250-REAL16: v_cvt_pk_f16_fp8_dpp v1, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xea,0x02,0x7e,0x82,0x77,0x39,0x05] # GFX1250-FAKE16: v_cvt_pk_f16_fp8_dpp v1, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xea,0x02,0x7e,0x82,0x77,0x39,0x05] + +0xe9,0xe6,0x02,0x7e,0x02,0x77,0x39,0x05 +# GFX1250-REAL16: v_sat_pk4_i4_i8_dpp v1.l, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe6,0x02,0x7e,0x02,0x77,0x39,0x05] +# GFX1250-FAKE16: v_sat_pk4_i4_i8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe6,0x02,0x7e,0x02,0x77,0x39,0x05] + +0xea,0xe6,0x02,0x7e,0x02,0x77,0x39,0x05 +# GFX1250-REAL16: v_sat_pk4_i4_i8_dpp v1.l, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xe6,0x02,0x7e,0x02,0x77,0x39,0x05] +# GFX1250-FAKE16: v_sat_pk4_i4_i8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xe6,0x02,0x7e,0x02,0x77,0x39,0x05] + +0xe9,0xe6,0x02,0x7f,0x02,0x77,0x39,0x05 +# GFX1250-REAL16: v_sat_pk4_i4_i8_dpp v1.h, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe6,0x02,0x7f,0x02,0x77,0x39,0x05] + +0xe9,0xe8,0x02,0x7e,0x02,0x77,0x39,0x05 +# GFX1250-REAL16: v_sat_pk4_u4_u8_dpp v1.l, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe8,0x02,0x7e,0x02,0x77,0x39,0x05] +# GFX1250-FAKE16: v_sat_pk4_u4_u8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe8,0x02,0x7e,0x02,0x77,0x39,0x05] + +0xea,0xe8,0x02,0x7e,0x02,0x77,0x39,0x05 +# GFX1250-REAL16: v_sat_pk4_u4_u8_dpp v1.l, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xe8,0x02,0x7e,0x02,0x77,0x39,0x05] +# GFX1250-FAKE16: v_sat_pk4_u4_u8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xe8,0x02,0x7e,0x02,0x77,0x39,0x05] + +0xe9,0xe8,0x02,0x7f,0x02,0x77,0x39,0x05 +# GFX1250-REAL16: v_sat_pk4_u4_u8_dpp v1.h, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe8,0x02,0x7f,0x02,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt index 1cf3b8807d044..1719592c3dccd 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt @@ -924,3 +924,59 @@ 0x01,0x08,0xf5,0xd5,0x02,0x01,0x00,0x00 # GFX1250-REAL16: v_cvt_pk_f16_fp8 v1, v2.h op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x02,0x01,0x00,0x00] # GFX1250-FAKE16: v_cvt_pk_f16_fp8 v1, v2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x02,0x01,0x00,0x00] + +0x96,0x00,0xf3,0xd5,0xff,0x00,0x00,0x00,0x34,0x12,0x00,0x00 +# GFX1250-REAL16: v_sat_pk4_i4_i8_e64 v150.l, 0x1234 ; encoding: [0x96,0x00,0xf3,0xd5,0xff,0x00,0x00,0x00,0x34,0x12,0x00,0x00] +# GFX1250-FAKE16: v_sat_pk4_i4_i8_e64 v150, 0x1234 ; encoding: [0x96,0x00,0xf3,0xd5,0xff,0x00,0x00,0x00,0x34,0x12,0x00,0x00] + +0x96,0x00,0xf3,0xd5,0x82,0x00,0x00,0x00 +# GFX1250-REAL16: v_sat_pk4_i4_i8_e64 v150.l, 2 ; encoding: [0x96,0x00,0xf3,0xd5,0x82,0x00,0x00,0x00] +# GFX1250-FAKE16: v_sat_pk4_i4_i8_e64 v150, 2 ; encoding: [0x96,0x00,0xf3,0xd5,0x82,0x00,0x00,0x00] + +0x96,0x00,0xf3,0xd5,0x02,0x00,0x00,0x00 +# GFX1250-REAL16: v_sat_pk4_i4_i8_e64 v150.l, s2 ; encoding: [0x96,0x00,0xf3,0xd5,0x02,0x00,0x00,0x00] +# GFX1250-FAKE16: v_sat_pk4_i4_i8_e64 v150, s2 ; encoding: [0x96,0x00,0xf3,0xd5,0x02,0x00,0x00,0x00] + +0x96,0x00,0xf3,0xd5,0x02,0x01,0x00,0x00 +# GFX1250-REAL16: v_sat_pk4_i4_i8_e64 v150.l, v2 ; encoding: [0x96,0x00,0xf3,0xd5,0x02,0x01,0x00,0x00] +# GFX1250-FAKE16: v_sat_pk4_i4_i8_e64 v150, v2 ; encoding: [0x96,0x00,0xf3,0xd5,0x02,0x01,0x00,0x00] + +0x96,0x40,0xf3,0xd5,0x02,0x01,0x00,0x00 +# GFX1250-REAL16: v_sat_pk4_i4_i8_e64 v150.h, v2 op_sel:[0,1] ; encoding: [0x96,0x40,0xf3,0xd5,0x02,0x01,0x00,0x00] +# GFX1250-FAKE16: v_sat_pk4_i4_i8_e64 v150, v2 ; encoding: [0x96,0x00,0xf3,0xd5,0x02,0x01,0x00,0x00] + +0x96,0x00,0xf4,0xd5,0xff,0x00,0x00,0x00,0x34,0x12,0x00,0x00 +# GFX1250-REAL16: v_sat_pk4_u4_u8_e64 v150.l, 0x1234 ; encoding: [0x96,0x00,0xf4,0xd5,0xff,0x00,0x00,0x00,0x34,0x12,0x00,0x00] +# GFX1250-FAKE16: v_sat_pk4_u4_u8_e64 v150, 0x1234 ; encoding: [0x96,0x00,0xf4,0xd5,0xff,0x00,0x00,0x00,0x34,0x12,0x00,0x00] + +0x96,0x00,0xf4,0xd5,0x82,0x00,0x00,0x00 +# GFX1250-REAL16: v_sat_pk4_u4_u8_e64 v150.l, 2 ; encoding: [0x96,0x00,0xf4,0xd5,0x82,0x00,0x00,0x00] +# GFX1250-FAKE16: v_sat_pk4_u4_u8_e64 v150, 2 ; encoding: [0x96,0x00,0xf4,0xd5,0x82,0x00,0x00,0x00] + +0x96,0x00,0xf4,0xd5,0x02,0x00,0x00,0x00 +# GFX1250-REAL16: v_sat_pk4_u4_u8_e64 v150.l, s2 ; encoding: [0x96,0x00,0xf4,0xd5,0x02,0x00,0x00,0x00] +# GFX1250-FAKE16: v_sat_pk4_u4_u8_e64 v150, s2 ; encoding: [0x96,0x00,0xf4,0xd5,0x02,0x00,0x00,0x00] + +0x96,0x00,0xf4,0xd5,0x02,0x01,0x00,0x00 +# GFX1250-REAL16: v_sat_pk4_u4_u8_e64 v150.l, v2 ; encoding: [0x96,0x00,0xf4,0xd5,0x02,0x01,0x00,0x00] +# GFX1250-FAKE16: v_sat_pk4_u4_u8_e64 v150, v2 ; encoding: [0x96,0x00,0xf4,0xd5,0x02,0x01,0x00,0x00] + +0x96,0x00,0xf4,0xd5,0xff,0x00,0x00,0x00,0x34,0x12,0x00,0x00 +# GFX1250-REAL16: v_sat_pk4_u4_u8_e64 v150.l, 0x1234 ; encoding: [0x96,0x00,0xf4,0xd5,0xff,0x00,0x00,0x00,0x34,0x12,0x00,0x00] +# GFX1250-FAKE16: v_sat_pk4_u4_u8_e64 v150, 0x1234 ; encoding: [0x96,0x00,0xf4,0xd5,0xff,0x00,0x00,0x00,0x34,0x12,0x00,0x00] + +0x96,0x40,0xf4,0xd5,0x02,0x01,0x00,0x00 +# GFX1250-REAL16: v_sat_pk4_u4_u8_e64 v150.h, v2 op_sel:[0,1] ; encoding: [0x96,0x40,0xf4,0xd5,0x02,0x01,0x00,0x00] +# GFX1250-FAKE16: v_sat_pk4_u4_u8_e64 v150, v2 ; encoding: [0x96,0x00,0xf4,0xd5,0x02,0x01,0x00,0x00] + +0x01,0x00,0xc9,0xd5,0x02,0x01,0x00,0x00 +# GFX1250: v_permlane16_swap_b32_e64 v1, v2 ; encoding: [0x01,0x00,0xc9,0xd5,0x02,0x01,0x00,0x00] + +0x01,0x10,0xc9,0xd5,0x02,0x01,0x00,0x00 +# GFX1250: v_permlane16_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0xc9,0xd5,0x02,0x01,0x00,0x00] + +0x01,0x18,0xc9,0xd5,0x02,0x01,0x00,0x00 +# GFX1250: v_permlane16_swap_b32_e64 v1, v2 bound_ctrl:1 fi:1 ; encoding: [0x01,0x18,0xc9,0xd5,0x02,0x01,0x00,0x00] + +0x01,0x08,0xc9,0xd5,0x02,0x01,0x00,0x00 +# GFX1250: v_permlane16_swap_b32_e64 v1, v2 fi:1 ; encoding: [0x01,0x08,0xc9,0xd5,0x02,0x01,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt index 83a647ad7c658..34d2104a660d8 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt @@ -737,3 +737,27 @@ 0x01,0x08,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x00,0xff # GFX1250-REAL16: v_cvt_pk_f16_fp8_e64_dpp v1, v128.h op_sel:[1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x08,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x00,0xff] # GFX1250-FAKE16: v_cvt_pk_f16_fp8_e64_dpp v1, v128 op_sel:[1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x08,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x00,0xff] + +0x96,0x00,0xf3,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff +# GFX1250-REAL16: v_sat_pk4_i4_i8_e64_dpp v150.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x96,0x00,0xf3,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] +# GFX1250-FAKE16: v_sat_pk4_i4_i8_e64_dpp v150, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x96,0x00,0xf3,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] + +0x96,0x00,0xf3,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x05,0xff +# GFX1250-REAL16: v_sat_pk4_i4_i8_e64_dpp v150.l, v2 row_share:1 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x96,0x00,0xf3,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x05,0xff] +# GFX1250-FAKE16: v_sat_pk4_i4_i8_e64_dpp v150, v2 row_share:1 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x96,0x00,0xf3,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x05,0xff] + +0x96,0x40,0xf3,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff +# GFX1250-REAL16: v_sat_pk4_i4_i8_e64_dpp v150.h, v2 op_sel:[0,1] quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x96,0x40,0xf3,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] +# GFX1250-FAKE16: v_sat_pk4_i4_i8_e64_dpp v150, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x96,0x00,0xf3,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] + +0x96,0x00,0xf4,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff +# GFX1250-REAL16: v_sat_pk4_u4_u8_e64_dpp v150.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x96,0x00,0xf4,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] +# GFX1250-FAKE16: v_sat_pk4_u4_u8_e64_dpp v150, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x96,0x00,0xf4,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] + +0x96,0x00,0xf4,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x05,0xff +# GFX1250-REAL16: v_sat_pk4_u4_u8_e64_dpp v150.l, v2 row_share:1 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x96,0x00,0xf4,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x05,0xff] +# GFX1250-FAKE16: v_sat_pk4_u4_u8_e64_dpp v150, v2 row_share:1 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x96,0x00,0xf4,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x05,0xff] + +0x96,0x40,0xf4,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff +# GFX1250-REAL16: v_sat_pk4_u4_u8_e64_dpp v150.h, v2 op_sel:[0,1] quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x96,0x40,0xf4,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] +# GFX1250-FAKE16: v_sat_pk4_u4_u8_e64_dpp v150, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x96,0x00,0xf4,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt index ef5ede4d1d453..867fee512b424 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt @@ -289,3 +289,27 @@ 0x01,0x08,0xf5,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05 # GFX1250-REAL16: v_cvt_pk_f16_fp8_e64_dpp v1, v128.h op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] # GFX1250-FAKE16: v_cvt_pk_f16_fp8_e64_dpp v1, v128 op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] + +0x96,0x00,0xf3,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05 +# GFX1250-REAL16: v_sat_pk4_i4_i8_e64_dpp v150.l, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x96,0x00,0xf3,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +# GFX1250-FAKE16: v_sat_pk4_i4_i8_e64_dpp v150, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x96,0x00,0xf3,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] + +0x96,0x00,0xf3,0xd5,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05 +# GFX1250-REAL16: v_sat_pk4_i4_i8_e64_dpp v150.l, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x96,0x00,0xf3,0xd5,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +# GFX1250-FAKE16: v_sat_pk4_i4_i8_e64_dpp v150, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x96,0x00,0xf3,0xd5,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05] + +0x96,0x40,0xf3,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05 +# GFX1250-REAL16: v_sat_pk4_i4_i8_e64_dpp v150.h, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x96,0x40,0xf3,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +# GFX1250-FAKE16: v_sat_pk4_i4_i8_e64_dpp v150, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x96,0x00,0xf3,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] + +0x96,0x00,0xf4,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05 +# GFX1250-REAL16: v_sat_pk4_u4_u8_e64_dpp v150.l, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x96,0x00,0xf4,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +# GFX1250-FAKE16: v_sat_pk4_u4_u8_e64_dpp v150, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x96,0x00,0xf4,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] + +0x96,0x00,0xf4,0xd5,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05 +# GFX1250-REAL16: v_sat_pk4_u4_u8_e64_dpp v150.l, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x96,0x00,0xf4,0xd5,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +# GFX1250-FAKE16: v_sat_pk4_u4_u8_e64_dpp v150, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x96,0x00,0xf4,0xd5,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05] + +0x96,0x40,0xf4,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05 +# GFX1250-REAL16: v_sat_pk4_u4_u8_e64_dpp v150.h, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x96,0x40,0xf4,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +# GFX1250-FAKE16: v_sat_pk4_u4_u8_e64_dpp v150, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x96,0x00,0xf4,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits