https://github.com/shiltian created 
https://github.com/llvm/llvm-project/pull/149355

Co-authored-by: Mekhanoshin, Stanislav <stanislav.mekhanos...@amd.com>

>From a6b7ccf491c4d88b18bfdba0dbf839030df189ec Mon Sep 17 00:00:00 2001
From: Shilei Tian <i...@tianshilei.me>
Date: Thu, 17 Jul 2025 12:45:33 -0400
Subject: [PATCH] [AMDGPU] Add support for `v_sin_bf16_e64` on gfx1250

Co-authored-by: Mekhanoshin, Stanislav <stanislav.mekhanos...@amd.com>
---
 clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp   |  1 +
 .../CodeGenOpenCL/builtins-amdgcn-gfx1250.cl  | 19 ++++++
 llvm/lib/Target/AMDGPU/VOP1Instructions.td    |  2 +
 .../CodeGen/AMDGPU/llvm.amdgcn.cos.bf16.ll    | 33 ++++++++++
 llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s | 45 +++++++++++++
 llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s        | 48 ++++++++++++++
 .../MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s | 56 ++++++++++++++++
 llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s  | 60 +++++++++++++++++
 .../MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s  | 12 ++++
 llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s   | 16 +++++
 .../gfx1250_asm_vop3_from_vop1-fake16.s       | 45 +++++++++++++
 .../MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s    | 48 ++++++++++++++
 .../gfx1250_asm_vop3_from_vop1_dpp16-fake16.s | 56 ++++++++++++++++
 .../AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s | 60 +++++++++++++++++
 .../gfx1250_asm_vop3_from_vop1_dpp8-fake16.s  | 16 +++++
 .../AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s  | 20 ++++++
 .../Disassembler/AMDGPU/gfx1250_dasm_vop1.txt | 63 ++++++++++++++++++
 .../AMDGPU/gfx1250_dasm_vop1_dpp16.txt        | 59 +++++++++++++++++
 .../AMDGPU/gfx1250_dasm_vop1_dpp8.txt         | 15 +++++
 .../AMDGPU/gfx1250_dasm_vop3_from_vop1.txt    | 64 +++++++++++++++++++
 .../gfx1250_dasm_vop3_from_vop1_dpp16.txt     | 60 +++++++++++++++++
 .../gfx1250_dasm_vop3_from_vop1_dpp8.txt      | 20 ++++++
 22 files changed, 818 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.bf16.ll

diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp 
b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 32cf622f20605..9f48149354255 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -433,6 +433,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
     return emitBuiltinWithOneOverloadedType<1>(*this, E, 
Intrinsic::amdgcn_sin);
   case AMDGPU::BI__builtin_amdgcn_cosf:
   case AMDGPU::BI__builtin_amdgcn_cosh:
+  case AMDGPU::BI__builtin_amdgcn_cos_bf16:
     return emitBuiltinWithOneOverloadedType<1>(*this, E, 
Intrinsic::amdgcn_cos);
   case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
     return EmitAMDGPUDispatchPtr(*this, E);
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
index 748b6455103ec..a1f984c129276 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
@@ -156,6 +156,25 @@ void test_sin_bf16(global __bf16* out, __bf16 a)
   *out = __builtin_amdgcn_sin_bf16(a);
 }
 
+// CHECK-LABEL: @test_cos_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, 
addrspace(5)
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5)
+// CHECK-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[OUT_ADDR]] to ptr
+// CHECK-NEXT:    [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[A_ADDR]] to ptr
+// CHECK-NEXT:    store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], 
align 8
+// CHECK-NEXT:    store bfloat [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR_ASCAST]], align 2
+// CHECK-NEXT:    [[TMP1:%.*]] = call bfloat @llvm.amdgcn.cos.bf16(bfloat 
[[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr addrspace(1), ptr 
[[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store bfloat [[TMP1]], ptr addrspace(1) [[TMP2]], align 2
+// CHECK-NEXT:    ret void
+//
+void test_cos_bf16(global __bf16* out, __bf16 a)
+{
+  *out = __builtin_amdgcn_cos_bf16(a);
+}
+
 // CHECK-LABEL: @test_cvt_f16_fp8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, 
addrspace(5)
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td 
b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index c91319eae7218..ff89b8badeed0 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -535,6 +535,7 @@ defm V_RSQ_BF16  : VOP1Inst_t16 <"v_rsq_bf16",  
VOP_BF16_BF16, AMDGPUrsq>;
 defm V_LOG_BF16  : VOP1Inst_t16 <"v_log_bf16",  VOP_BF16_BF16, AMDGPUlogf16>;
 defm V_EXP_BF16  : VOP1Inst_t16 <"v_exp_bf16",  VOP_BF16_BF16, AMDGPUexpf16>;
 defm V_SIN_BF16  : VOP1Inst_t16 <"v_sin_bf16",  VOP_BF16_BF16, AMDGPUsin>;
+defm V_COS_BF16  : VOP1Inst_t16 <"v_cos_bf16",  VOP_BF16_BF16, AMDGPUcos>;
 }
 } // End TRANS = 1, SchedRW = [WriteTrans32]
 defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, 
int_amdgcn_frexp_mant>;
@@ -1149,6 +1150,7 @@ defm V_RSQ_BF16              : 
VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07b>;
 defm V_LOG_BF16              : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07c>;
 defm V_EXP_BF16              : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07d>;
 defm V_SIN_BF16              : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07e>;
+defm V_COS_BF16              : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07f>;
 
 
//===----------------------------------------------------------------------===//
 // GFX10.
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.bf16.ll 
b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.bf16.ll
new file mode 100644
index 0000000000000..091859f3c9bf3
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.bf16.ll
@@ -0,0 +1,33 @@
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck 
-check-prefixes=GCN %s
+; xUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck 
-check-prefix=GCN %s
+
+; FIXME: GlobalISel does not work with bf16
+
+declare bfloat @llvm.amdgcn.cos.bf16(bfloat) #0
+
+; GCN-LABEL: {{^}}cos_bf16:
+; GCN: v_cos_bf16_e32 {{v[0-9]+}}, {{s[0-9]+}}
+define amdgpu_kernel void @cos_bf16(ptr addrspace(1) %out, bfloat %src) #1 {
+  %cos = call bfloat @llvm.amdgcn.cos.bf16(bfloat %src) #0
+  store bfloat %cos, ptr addrspace(1) %out, align 2
+  ret void
+}
+
+; GCN-LABEL: {{^}}cos_bf16_constant_4
+; GCN: v_cos_bf16_e32 v0, 4.0
+define amdgpu_kernel void @cos_bf16_constant_4(ptr addrspace(1) %out) #1 {
+  %cos = call bfloat @llvm.amdgcn.cos.bf16(bfloat 4.0) #0
+  store bfloat %cos, ptr addrspace(1) %out, align 2
+  ret void
+}
+
+; GCN-LABEL: {{^}}cos_bf16_constant_100
+; GCN: v_cos_bf16_e32 {{v[0-9]+}}, 0x42c8
+define amdgpu_kernel void @cos_bf16_constant_100(ptr addrspace(1) %out) #1 {
+  %cos = call bfloat @llvm.amdgcn.cos.bf16(bfloat 100.0) #0
+  store bfloat %cos, ptr addrspace(1) %out, align 2
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s 
b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
index f51d709a594a0..4b61064815ed5 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
@@ -343,6 +343,51 @@ v_sin_bf16 v5, src_scc
 v_sin_bf16 v127, 0x8000
 // GFX1250: v_sin_bf16_e32 v127, 0x8000             ; encoding: 
[0xff,0xfc,0xfe,0x7e,0x00,0x80,0x00,0x00]
 
+v_cos_bf16 v5, v1
+// GFX1250: v_cos_bf16_e32 v5, v1                   ; encoding: 
[0x01,0xff,0x0a,0x7e]
+
+v_cos_bf16 v5, v127
+// GFX1250: v_cos_bf16_e32 v5, v127                 ; encoding: 
[0x7f,0xff,0x0a,0x7e]
+
+v_cos_bf16 v5, s1
+// GFX1250: v_cos_bf16_e32 v5, s1                   ; encoding: 
[0x01,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, s105
+// GFX1250: v_cos_bf16_e32 v5, s105                 ; encoding: 
[0x69,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, vcc_lo
+// GFX1250: v_cos_bf16_e32 v5, vcc_lo               ; encoding: 
[0x6a,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, vcc_hi
+// GFX1250: v_cos_bf16_e32 v5, vcc_hi               ; encoding: 
[0x6b,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, ttmp15
+// GFX1250: v_cos_bf16_e32 v5, ttmp15               ; encoding: 
[0x7b,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, m0
+// GFX1250: v_cos_bf16_e32 v5, m0                   ; encoding: 
[0x7d,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, exec_lo
+// GFX1250: v_cos_bf16_e32 v5, exec_lo              ; encoding: 
[0x7e,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, exec_hi
+// GFX1250: v_cos_bf16_e32 v5, exec_hi              ; encoding: 
[0x7f,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, null
+// GFX1250: v_cos_bf16_e32 v5, null                 ; encoding: 
[0x7c,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, -1
+// GFX1250: v_cos_bf16_e32 v5, -1                   ; encoding: 
[0xc1,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, 0.5
+// GFX1250: v_cos_bf16_e32 v5, 0.5                  ; encoding: 
[0xf0,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, src_scc
+// GFX1250: v_cos_bf16_e32 v5, src_scc              ; encoding: 
[0xfd,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v127, 0x8000
+// GFX1250: v_cos_bf16_e32 v127, 0x8000             ; encoding: 
[0xff,0xfe,0xfe,0x7e,0x00,0x80,0x00,0x00]
+
 v_cvt_f32_bf16 v5, v1
 // GFX1250: v_cvt_f32_bf16_e32 v5, v1               ; encoding: 
[0x01,0xe5,0x0a,0x7e]
 
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s 
b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
index 39fc73d70cab2..40901618fce95 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
@@ -364,6 +364,54 @@ v_sin_bf16 v127, 0x8000
 v_sin_bf16 v5.h, v1.h
 // GFX1250: v_sin_bf16_e32 v5.h, v1.h               ; encoding: 
[0x81,0xfd,0x0a,0x7f]
 
+v_cos_bf16 v5, v1
+// GFX1250: v_cos_bf16_e32 v5, v1                   ; encoding: 
[0x01,0xff,0x0a,0x7e]
+
+v_cos_bf16 v5, v127
+// GFX1250: v_cos_bf16_e32 v5, v127                 ; encoding: 
[0x7f,0xff,0x0a,0x7e]
+
+v_cos_bf16 v5, s1
+// GFX1250: v_cos_bf16_e32 v5, s1                   ; encoding: 
[0x01,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, s105
+// GFX1250: v_cos_bf16_e32 v5, s105                 ; encoding: 
[0x69,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, vcc_lo
+// GFX1250: v_cos_bf16_e32 v5, vcc_lo               ; encoding: 
[0x6a,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, vcc_hi
+// GFX1250: v_cos_bf16_e32 v5, vcc_hi               ; encoding: 
[0x6b,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, ttmp15
+// GFX1250: v_cos_bf16_e32 v5, ttmp15               ; encoding: 
[0x7b,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, m0
+// GFX1250: v_cos_bf16_e32 v5, m0                   ; encoding: 
[0x7d,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, exec_lo
+// GFX1250: v_cos_bf16_e32 v5, exec_lo              ; encoding: 
[0x7e,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, exec_hi
+// GFX1250: v_cos_bf16_e32 v5, exec_hi              ; encoding: 
[0x7f,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, null
+// GFX1250: v_cos_bf16_e32 v5, null                 ; encoding: 
[0x7c,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, -1
+// GFX1250: v_cos_bf16_e32 v5, -1                   ; encoding: 
[0xc1,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, 0.5
+// GFX1250: v_cos_bf16_e32 v5, 0.5                  ; encoding: 
[0xf0,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v5, src_scc
+// GFX1250: v_cos_bf16_e32 v5, src_scc              ; encoding: 
[0xfd,0xfe,0x0a,0x7e]
+
+v_cos_bf16 v127, 0x8000
+// GFX1250: v_cos_bf16_e32 v127, 0x8000             ; encoding: 
[0xff,0xfe,0xfe,0x7e,0x00,0x80,0x00,0x00]
+
+v_cos_bf16 v5.h, v1.h
+// GFX1250: v_cos_bf16_e32 v5.h, v1.h               ; encoding: 
[0x81,0xff,0x0a,0x7f]
+
 v_cvt_f32_bf16 v5, v1
 // GFX1250: v_cvt_f32_bf16_e32 v5, v1               ; encoding: 
[0x01,0xe5,0x0a,0x7e]
 
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s 
b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
index 97058eb2e7c9f..ab5d55fad49ac 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
@@ -394,6 +394,62 @@ v_sin_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 
bank_mask:0x0 bound_ctrl:0 fi
 // GFX1250: v_sin_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 
bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfc,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
+v_cos_bf16 v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_cos_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf 
bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_cos_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf 
bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_mirror
+// GFX1250: v_cos_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; 
encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_half_mirror
+// GFX1250: v_cos_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; 
encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_shl:1
+// GFX1250: v_cos_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; 
encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_shl:15
+// GFX1250: v_cos_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; 
encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_shr:1
+// GFX1250: v_cos_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; 
encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_shr:15
+// GFX1250: v_cos_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; 
encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_ror:1
+// GFX1250: v_cos_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; 
encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_ror:15
+// GFX1250: v_cos_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; 
encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_cos_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; 
encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_cos_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; 
encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_cos_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 
bound_ctrl:1 ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 
fi:1
+// GFX1250: v_cos_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 
bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
 v_cvt_f32_bf16 v5, v1 quad_perm:[3,2,1,0]
 // GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf 
bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s 
b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
index 6a293c19a79a4..dcb613c09a62d 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
@@ -422,6 +422,66 @@ v_sin_bf16 v5.h, v1.h quad_perm:[3,2,1,0]
 // GFX1250: v_sin_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf 
bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7f,0x81,0x1b,0x00,0xff]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
+v_cos_bf16 v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_cos_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf 
bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_cos_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf 
bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_mirror
+// GFX1250: v_cos_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; 
encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_half_mirror
+// GFX1250: v_cos_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; 
encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_shl:1
+// GFX1250: v_cos_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; 
encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_shl:15
+// GFX1250: v_cos_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; 
encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_shr:1
+// GFX1250: v_cos_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; 
encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_shr:15
+// GFX1250: v_cos_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; 
encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_ror:1
+// GFX1250: v_cos_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; 
encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_ror:15
+// GFX1250: v_cos_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; 
encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_cos_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; 
encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_cos_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; 
encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_cos_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 
bound_ctrl:1 ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 
fi:1
+// GFX1250: v_cos_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 
bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5.h, v1.h quad_perm:[3,2,1,0]
+// GFX1250: v_cos_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf 
bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7f,0x81,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
 v_cvt_f32_bf16 v5, v1 quad_perm:[3,2,1,0]
 // GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf 
bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s 
b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
index d1f53c7b2065c..4b37d648a928c 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
@@ -86,6 +86,18 @@ v_sin_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX1250: v_sin_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: 
[0xe9,0xfc,0xfe,0x7e,0x7f,0x00,0x00,0x00]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
+v_cos_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_cos_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: 
[0xe9,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_cos_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: 
[0xea,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_cos_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: 
[0xe9,0xfe,0xfe,0x7e,0x7f,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
 v_cvt_f32_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
 // GFX1250: v_cvt_f32_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: 
[0xe9,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s 
b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
index dbee9f39df5f5..34489a1133abe 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
@@ -114,6 +114,22 @@ v_sin_bf16 v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX1250: v_sin_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: 
[0xe9,0xfc,0x0a,0x7f,0x81,0x77,0x39,0x05]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
+v_cos_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_cos_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: 
[0xe9,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_cos_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: 
[0xea,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_cos_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: 
[0xe9,0xfe,0xfe,0x7e,0x7f,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16 v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_cos_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: 
[0xe9,0xfe,0x0a,0x7f,0x81,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
 v_cvt_f32_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
 // GFX1250: v_cvt_f32_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: 
[0xe9,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s 
b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
index 4257334444244..a61f1da5040d9 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
@@ -397,6 +397,51 @@ v_sin_bf16_e64 v5, src_scc mul:4
 v_sin_bf16_e64 v255, -|0x8000| clamp div:2
 // GFX1250: v_sin_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: 
[0xff,0x81,0xfe,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
 
+v_cos_bf16_e64 v5, v1
+// GFX1250: v_cos_bf16_e64 v5, v1                   ; encoding: 
[0x05,0x00,0xff,0xd5,0x01,0x01,0x00,0x00]
+
+v_cos_bf16_e64 v5, v255
+// GFX1250: v_cos_bf16_e64 v5, v255                 ; encoding: 
[0x05,0x00,0xff,0xd5,0xff,0x01,0x00,0x00]
+
+v_cos_bf16_e64 v5, s1
+// GFX1250: v_cos_bf16_e64 v5, s1                   ; encoding: 
[0x05,0x00,0xff,0xd5,0x01,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, s105
+// GFX1250: v_cos_bf16_e64 v5, s105                 ; encoding: 
[0x05,0x00,0xff,0xd5,0x69,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, vcc_lo
+// GFX1250: v_cos_bf16_e64 v5, vcc_lo               ; encoding: 
[0x05,0x00,0xff,0xd5,0x6a,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, vcc_hi
+// GFX1250: v_cos_bf16_e64 v5, vcc_hi               ; encoding: 
[0x05,0x00,0xff,0xd5,0x6b,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, ttmp15
+// GFX1250: v_cos_bf16_e64 v5, ttmp15               ; encoding: 
[0x05,0x00,0xff,0xd5,0x7b,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, m0
+// GFX1250: v_cos_bf16_e64 v5, m0                   ; encoding: 
[0x05,0x00,0xff,0xd5,0x7d,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, exec_lo
+// GFX1250: v_cos_bf16_e64 v5, exec_lo              ; encoding: 
[0x05,0x00,0xff,0xd5,0x7e,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, exec_hi
+// GFX1250: v_cos_bf16_e64 v5, exec_hi              ; encoding: 
[0x05,0x00,0xff,0xd5,0x7f,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, null
+// GFX1250: v_cos_bf16_e64 v5, null                 ; encoding: 
[0x05,0x00,0xff,0xd5,0x7c,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, -1
+// GFX1250: v_cos_bf16_e64 v5, -1                   ; encoding: 
[0x05,0x00,0xff,0xd5,0xc1,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, 0.5 mul:2
+// GFX1250: v_cos_bf16_e64 v5, 0.5 mul:2            ; encoding: 
[0x05,0x00,0xff,0xd5,0xf0,0x00,0x00,0x08]
+
+v_cos_bf16_e64 v5, src_scc mul:4
+// GFX1250: v_cos_bf16_e64 v5, src_scc mul:4        ; encoding: 
[0x05,0x00,0xff,0xd5,0xfd,0x00,0x00,0x10]
+
+v_cos_bf16_e64 v255, -|0x8000| clamp div:2
+// GFX1250: v_cos_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: 
[0xff,0x81,0xff,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
+
 v_cvt_f32_bf16_e64 v5, v1
 // GFX1250: v_cvt_f32_bf16_e64 v5, v1               ; encoding: 
[0x05,0x00,0xf2,0xd5,0x01,0x01,0x00,0x00]
 
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s 
b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s
index 83986a61fd572..dbd1552b84ac2 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s
@@ -418,6 +418,54 @@ v_sin_bf16_e64 v255, -|0x8000| clamp div:2
 v_sin_bf16 v5.h, v128.h
 // GFX1250: v_sin_bf16_e64 v5.h, v128.h op_sel:[1,1] ; encoding: 
[0x05,0x48,0xfe,0xd5,0x80,0x01,0x00,0x00]
 
+v_cos_bf16_e64 v5, v1
+// GFX1250: v_cos_bf16_e64 v5, v1                   ; encoding: 
[0x05,0x00,0xff,0xd5,0x01,0x01,0x00,0x00]
+
+v_cos_bf16_e64 v5, v255
+// GFX1250: v_cos_bf16_e64 v5, v255                 ; encoding: 
[0x05,0x00,0xff,0xd5,0xff,0x01,0x00,0x00]
+
+v_cos_bf16_e64 v5, s1
+// GFX1250: v_cos_bf16_e64 v5, s1                   ; encoding: 
[0x05,0x00,0xff,0xd5,0x01,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, s105
+// GFX1250: v_cos_bf16_e64 v5, s105                 ; encoding: 
[0x05,0x00,0xff,0xd5,0x69,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, vcc_lo
+// GFX1250: v_cos_bf16_e64 v5, vcc_lo               ; encoding: 
[0x05,0x00,0xff,0xd5,0x6a,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, vcc_hi
+// GFX1250: v_cos_bf16_e64 v5, vcc_hi               ; encoding: 
[0x05,0x00,0xff,0xd5,0x6b,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, ttmp15
+// GFX1250: v_cos_bf16_e64 v5, ttmp15               ; encoding: 
[0x05,0x00,0xff,0xd5,0x7b,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, m0
+// GFX1250: v_cos_bf16_e64 v5, m0                   ; encoding: 
[0x05,0x00,0xff,0xd5,0x7d,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, exec_lo
+// GFX1250: v_cos_bf16_e64 v5, exec_lo              ; encoding: 
[0x05,0x00,0xff,0xd5,0x7e,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, exec_hi
+// GFX1250: v_cos_bf16_e64 v5, exec_hi              ; encoding: 
[0x05,0x00,0xff,0xd5,0x7f,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, null
+// GFX1250: v_cos_bf16_e64 v5, null                 ; encoding: 
[0x05,0x00,0xff,0xd5,0x7c,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, -1
+// GFX1250: v_cos_bf16_e64 v5, -1                   ; encoding: 
[0x05,0x00,0xff,0xd5,0xc1,0x00,0x00,0x00]
+
+v_cos_bf16_e64 v5, 0.5 mul:2
+// GFX1250: v_cos_bf16_e64 v5, 0.5 mul:2            ; encoding: 
[0x05,0x00,0xff,0xd5,0xf0,0x00,0x00,0x08]
+
+v_cos_bf16_e64 v5, src_scc mul:4
+// GFX1250: v_cos_bf16_e64 v5, src_scc mul:4        ; encoding: 
[0x05,0x00,0xff,0xd5,0xfd,0x00,0x00,0x10]
+
+v_cos_bf16_e64 v255, -|0x8000| clamp div:2
+// GFX1250: v_cos_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: 
[0xff,0x81,0xff,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
+
+v_cos_bf16_e64 v5.h, v128.h
+// GFX1250: v_cos_bf16_e64 v5.h, v128.h op_sel:[1,1] ; encoding: 
[0x05,0x48,0xff,0xd5,0x80,0x01,0x00,0x00]
+
 v_cvt_f32_bf16_e64 v5, v1
 // GFX1250: v_cvt_f32_bf16_e64 v5, v1               ; encoding: 
[0x05,0x00,0xf2,0xd5,0x01,0x01,0x00,0x00]
 
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s 
b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s
index bb6739ec312a5..22ad29a7a8d05 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s
@@ -394,6 +394,62 @@ v_sin_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 
row_mask:0x3 bank_mask
 // GFX1250: v_sin_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 
row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: 
[0xff,0x81,0xfe,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
+v_cos_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf 
bank_mask:0xf ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf 
bank_mask:0xf ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_mirror
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; 
encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_half_mirror
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_half_mirror row_mask:0xf 
bank_mask:0xf ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_shl:1
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; 
encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_shl:15
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; 
encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_shr:1
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; 
encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_shr:15
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; 
encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_ror:1
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; 
encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_ror:15
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; 
encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; 
encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 
bank_mask:0x1 ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 
bound_ctrl:1 fi:0
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 
bank_mask:0x3 bound_ctrl:1 ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 
bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX1250: v_cos_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 
row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: 
[0xff,0x81,0xff,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
 v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0]
 // GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf 
bank_mask:0xf ; encoding: 
[0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s 
b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s
index 5f6f28e0f6edb..04cf346797845 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s
@@ -422,6 +422,66 @@ v_sin_bf16_e64_dpp v5.h, v128.h quad_perm:[3,2,1,0]
 // GFX1250: v_sin_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] quad_perm:[3,2,1,0] 
row_mask:0xf bank_mask:0xf ; encoding: 
[0x05,0x48,0xfe,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
+v_cos_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf 
bank_mask:0xf ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf 
bank_mask:0xf ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_mirror
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; 
encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_half_mirror
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_half_mirror row_mask:0xf 
bank_mask:0xf ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_shl:1
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; 
encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_shl:15
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; 
encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_shr:1
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; 
encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_shr:15
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; 
encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_ror:1
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; 
encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_ror:15
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; 
encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; 
encoding: [0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 
bank_mask:0x1 ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 
bound_ctrl:1 fi:0
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 
bank_mask:0x3 bound_ctrl:1 ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 
bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX1250: v_cos_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 
row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: 
[0xff,0x81,0xff,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5.h, v128.h quad_perm:[3,2,1,0]
+// GFX1250: v_cos_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] quad_perm:[3,2,1,0] 
row_mask:0xf bank_mask:0xf ; encoding: 
[0x05,0x48,0xff,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
 v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0]
 // GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf 
bank_mask:0xf ; encoding: 
[0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s 
b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s
index 037e7d650ad73..3ec947575f53a 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s
@@ -114,6 +114,22 @@ v_sin_bf16_e64_dpp v255, -|v255| clamp div:2 
dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX1250: v_sin_bf16_e64_dpp v255, -|v255| clamp div:2 
dpp8:[0,0,0,0,0,0,0,0] ; encoding: 
[0xff,0x81,0xfe,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
+v_cos_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: 
[0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: 
[0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; 
encoding: [0x05,0x00,0xff,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_cos_bf16_e64_dpp v255, -|v255| clamp div:2 
dpp8:[0,0,0,0,0,0,0,0] ; encoding: 
[0xff,0x81,0xff,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
 v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
 // GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: 
[0x05,0x00,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s 
b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s
index 53fb0eb4e9517..643731f6d46e7 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s
@@ -142,6 +142,26 @@ v_sin_bf16_e64_dpp v5.h, v128.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX1250: v_sin_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] 
dpp8:[7,6,5,4,3,2,1,0] ; encoding: 
[0x05,0x48,0xfe,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
+v_cos_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: 
[0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: 
[0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_cos_bf16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; 
encoding: [0x05,0x00,0xff,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_cos_bf16_e64_dpp v255, -|v255| clamp div:2 
dpp8:[0,0,0,0,0,0,0,0] ; encoding: 
[0xff,0x81,0xff,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cos_bf16_e64_dpp v5.h, v128.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_cos_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] 
dpp8:[7,6,5,4,3,2,1,0] ; encoding: 
[0x05,0x48,0xff,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
 v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
 // GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: 
[0x05,0x00,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt 
b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
index fec2207d70a8e..05c18cbf724ba 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
@@ -470,6 +470,69 @@
 0x81,0xfd,0x0a,0x7f
 # GFX1250-REAL16: v_sin_bf16_e32 v5.h, v1.h               ; encoding: 
[0x81,0xfd,0x0a,0x7f]
 
+0xff,0xfe,0xfe,0x7e,0x00,0x80,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_e32 v127.l, 0x8000           ; encoding: 
[0xff,0xfe,0xfe,0x7e,0x00,0x80,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_e32 v127, 0x8000             ; encoding: 
[0xff,0xfe,0xfe,0x7e,0x00,0x80,0x00,0x00]
+
+0xc1,0xfe,0x0a,0x7e
+# GFX1250-REAL16: v_cos_bf16_e32 v5.l, -1                 ; encoding: 
[0xc1,0xfe,0x0a,0x7e]
+# GFX1250-FAKE16: v_cos_bf16_e32 v5, -1                   ; encoding: 
[0xc1,0xfe,0x0a,0x7e]
+
+0xf0,0xfe,0x0a,0x7e
+# GFX1250-REAL16: v_cos_bf16_e32 v5.l, 0.5                ; encoding: 
[0xf0,0xfe,0x0a,0x7e]
+# GFX1250-FAKE16: v_cos_bf16_e32 v5, 0.5                  ; encoding: 
[0xf0,0xfe,0x0a,0x7e]
+
+0x7f,0xfe,0x0a,0x7e
+# GFX1250-REAL16: v_cos_bf16_e32 v5.l, exec_hi            ; encoding: 
[0x7f,0xfe,0x0a,0x7e]
+# GFX1250-FAKE16: v_cos_bf16_e32 v5, exec_hi              ; encoding: 
[0x7f,0xfe,0x0a,0x7e]
+
+0x7e,0xfe,0x0a,0x7e
+# GFX1250-REAL16: v_cos_bf16_e32 v5.l, exec_lo            ; encoding: 
[0x7e,0xfe,0x0a,0x7e]
+# GFX1250-FAKE16: v_cos_bf16_e32 v5, exec_lo              ; encoding: 
[0x7e,0xfe,0x0a,0x7e]
+
+0x7d,0xfe,0x0a,0x7e
+# GFX1250-REAL16: v_cos_bf16_e32 v5.l, m0                 ; encoding: 
[0x7d,0xfe,0x0a,0x7e]
+# GFX1250-FAKE16: v_cos_bf16_e32 v5, m0                   ; encoding: 
[0x7d,0xfe,0x0a,0x7e]
+
+0x7c,0xfe,0x0a,0x7e
+# GFX1250-REAL16: v_cos_bf16_e32 v5.l, null               ; encoding: 
[0x7c,0xfe,0x0a,0x7e]
+# GFX1250-FAKE16: v_cos_bf16_e32 v5, null                 ; encoding: 
[0x7c,0xfe,0x0a,0x7e]
+
+0x01,0xfe,0x0a,0x7e
+# GFX1250-REAL16: v_cos_bf16_e32 v5.l, s1                 ; encoding: 
[0x01,0xfe,0x0a,0x7e]
+# GFX1250-FAKE16: v_cos_bf16_e32 v5, s1                   ; encoding: 
[0x01,0xfe,0x0a,0x7e]
+
+0x69,0xfe,0x0a,0x7e
+# GFX1250-REAL16: v_cos_bf16_e32 v5.l, s105               ; encoding: 
[0x69,0xfe,0x0a,0x7e]
+# GFX1250-FAKE16: v_cos_bf16_e32 v5, s105                 ; encoding: 
[0x69,0xfe,0x0a,0x7e]
+
+0xfd,0xfe,0x0a,0x7e
+# GFX1250-REAL16: v_cos_bf16_e32 v5.l, src_scc            ; encoding: 
[0xfd,0xfe,0x0a,0x7e]
+# GFX1250-FAKE16: v_cos_bf16_e32 v5, src_scc              ; encoding: 
[0xfd,0xfe,0x0a,0x7e]
+
+0x7b,0xfe,0x0a,0x7e
+# GFX1250-REAL16: v_cos_bf16_e32 v5.l, ttmp15             ; encoding: 
[0x7b,0xfe,0x0a,0x7e]
+# GFX1250-FAKE16: v_cos_bf16_e32 v5, ttmp15               ; encoding: 
[0x7b,0xfe,0x0a,0x7e]
+
+0x01,0xff,0x0a,0x7e
+# GFX1250-REAL16: v_cos_bf16_e32 v5.l, v1.l               ; encoding: 
[0x01,0xff,0x0a,0x7e]
+# GFX1250-FAKE16: v_cos_bf16_e32 v5, v1                   ; encoding: 
[0x01,0xff,0x0a,0x7e]
+
+0x7f,0xff,0x0a,0x7e
+# GFX1250-REAL16: v_cos_bf16_e32 v5.l, v127.l             ; encoding: 
[0x7f,0xff,0x0a,0x7e]
+# GFX1250-FAKE16: v_cos_bf16_e32 v5, v127                 ; encoding: 
[0x7f,0xff,0x0a,0x7e]
+
+0x6b,0xfe,0x0a,0x7e
+# GFX1250-REAL16: v_cos_bf16_e32 v5.l, vcc_hi             ; encoding: 
[0x6b,0xfe,0x0a,0x7e]
+# GFX1250-FAKE16: v_cos_bf16_e32 v5, vcc_hi               ; encoding: 
[0x6b,0xfe,0x0a,0x7e]
+
+0x6a,0xfe,0x0a,0x7e
+# GFX1250-REAL16: v_cos_bf16_e32 v5.l, vcc_lo             ; encoding: 
[0x6a,0xfe,0x0a,0x7e]
+# GFX1250-FAKE16: v_cos_bf16_e32 v5, vcc_lo               ; encoding: 
[0x6a,0xfe,0x0a,0x7e]
+
+0x81,0xff,0x0a,0x7f
+# GFX1250-REAL16: v_cos_bf16_e32 v5.h, v1.h               ; encoding: 
[0x81,0xff,0x0a,0x7f]
+
 0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00
 # GFX1250: v_cvt_f32_bf16_e32 v127, 0x8000         ; encoding: 
[0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00]
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt 
b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt
index dc8c6b15dd1bb..2aad85e5ac539 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt
@@ -415,6 +415,65 @@
 0xfa,0xfc,0x0a,0x7f,0x81,0x1b,0x00,0xff
 # GFX1250-REAL16: v_sin_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf 
bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7f,0x81,0x1b,0x00,0xff]
 
+0xfa,0xfe,0xfe,0x7e,0x7f,0x6f,0x35,0x30
+# GFX1250-REAL16: v_cos_bf16_dpp v127.l, -|v127.l| row_xmask:15 row_mask:0x3 
bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
+# GFX1250-FAKE16: v_cos_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 
bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
+
+0xfa,0xfe,0x0a,0x7e,0x01,0xe4,0x00,0xff
+# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l quad_perm:[0,1,2,3] row_mask:0xf 
bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf 
bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+
+0xfa,0xfe,0x0a,0x7e,0x01,0x1b,0x00,0xff
+# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf 
bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf 
bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+
+0xfa,0xfe,0x0a,0x7e,0x01,0x41,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_half_mirror row_mask:0xf 
bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x41,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_half_mirror row_mask:0xf 
bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x41,0x01,0xff]
+
+0xfa,0xfe,0x0a,0x7e,0x01,0x40,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_mirror row_mask:0xf 
bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x40,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf 
; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x40,0x01,0xff]
+
+0xfa,0xfe,0x0a,0x7e,0x01,0x21,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_ror:1 row_mask:0xf 
bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x21,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; 
encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x21,0x01,0xff]
+
+0xfa,0xfe,0x0a,0x7e,0x01,0x2f,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_ror:15 row_mask:0xf 
bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf 
; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+
+0xfa,0xfe,0x0a,0x7e,0x01,0x50,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_share:0 row_mask:0xf 
bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x50,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf 
; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x50,0x01,0xff]
+
+0xfa,0xfe,0x0a,0x7e,0x01,0x5f,0x01,0x01
+# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_share:15 row_mask:0x0 
bank_mask:0x1 ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_share:15 row_mask:0x0 
bank_mask:0x1 ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+
+0xfa,0xfe,0x0a,0x7e,0x01,0x01,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_shl:1 row_mask:0xf 
bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x01,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; 
encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x01,0x01,0xff]
+
+0xfa,0xfe,0x0a,0x7e,0x01,0x0f,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_shl:15 row_mask:0xf 
bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf 
; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+
+0xfa,0xfe,0x0a,0x7e,0x01,0x11,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_shr:1 row_mask:0xf 
bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x11,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; 
encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x11,0x01,0xff]
+
+0xfa,0xfe,0x0a,0x7e,0x01,0x1f,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_shr:15 row_mask:0xf 
bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf 
; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+
+0xfa,0xfe,0x0a,0x7e,0x01,0x60,0x09,0x13
+# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l row_xmask:0 row_mask:0x1 
bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x60,0x09,0x13]
+# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 
bound_ctrl:1 ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x60,0x09,0x13]
+
+0xfa,0xfe,0x0a,0x7f,0x81,0x1b,0x00,0xff
+# GFX1250-REAL16: v_cos_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf 
bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7f,0x81,0x1b,0x00,0xff]
+
 0xfa,0xe4,0xfe,0x7e,0x7f,0x6f,0x35,0x30
 # GFX1250: v_cvt_f32_bf16_dpp v127, -|v127.l| row_xmask:15 row_mask:0x3 
bank_mask:0x0 fi:1 ; encoding: [0xfa,0xe4,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt 
b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt
index 741bf3fd34d32..f67e104c7dc20 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt
@@ -110,6 +110,21 @@
 0xe9,0xfc,0x0a,0x7f,0x81,0x77,0x39,0x05
 # GFX1250-REAL16: v_sin_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: 
[0xe9,0xfc,0x0a,0x7f,0x81,0x77,0x39,0x05]
 
+0xe9,0xfe,0xfe,0x7e,0x7f,0x00,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_dpp v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; 
encoding: [0xe9,0xfe,0xfe,0x7e,0x7f,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: 
[0xe9,0xfe,0xfe,0x7e,0x7f,0x00,0x00,0x00]
+
+0xe9,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05
+# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: 
[0xe9,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: 
[0xe9,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05]
+
+0xea,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05
+# GFX1250-REAL16: v_cos_bf16_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; 
encoding: [0xea,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_cos_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; 
encoding: [0xea,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05]
+
+0xe9,0xfe,0x0a,0x7f,0x81,0x77,0x39,0x05
+# GFX1250-REAL16: v_cos_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: 
[0xe9,0xfe,0x0a,0x7f,0x81,0x77,0x39,0x05]
+
 0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00
 # GFX1250: v_cvt_f32_bf16_dpp v127, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: 
[0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00]
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt 
b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt
index cd9b7120ca966..641e0872eafe8 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt
@@ -450,6 +450,70 @@
 # GFX1250-REAL16: v_sin_bf16_e64 v5.h, v128.h op_sel:[1,1] ; encoding: 
[0x05,0x48,0xfe,0xd5,0x80,0x01,0x00,0x00]
 # GFX1250-FAKE16: v_sin_bf16_e64 v5, v128                 ; encoding: 
[0x05,0x00,0xfe,0xd5,0x80,0x01,0x00,0x00]
 
+0xff,0x81,0xff,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_e64 v255.l, -|0x8000| clamp div:2 ; encoding: 
[0xff,0x81,0xff,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: 
[0xff,0x81,0xff,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00]
+
+0x05,0x00,0xff,0xd5,0xc1,0x00,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_e64 v5.l, -1                 ; encoding: 
[0x05,0x00,0xff,0xd5,0xc1,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_e64 v5, -1                   ; encoding: 
[0x05,0x00,0xff,0xd5,0xc1,0x00,0x00,0x00]
+
+0x05,0x00,0xff,0xd5,0xf0,0x00,0x00,0x08
+# GFX1250-REAL16: v_cos_bf16_e64 v5.l, 0.5 mul:2          ; encoding: 
[0x05,0x00,0xff,0xd5,0xf0,0x00,0x00,0x08]
+# GFX1250-FAKE16: v_cos_bf16_e64 v5, 0.5 mul:2            ; encoding: 
[0x05,0x00,0xff,0xd5,0xf0,0x00,0x00,0x08]
+
+0x05,0x00,0xff,0xd5,0x7f,0x00,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_e64 v5.l, exec_hi            ; encoding: 
[0x05,0x00,0xff,0xd5,0x7f,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_e64 v5, exec_hi              ; encoding: 
[0x05,0x00,0xff,0xd5,0x7f,0x00,0x00,0x00]
+
+0x05,0x00,0xff,0xd5,0x7e,0x00,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_e64 v5.l, exec_lo            ; encoding: 
[0x05,0x00,0xff,0xd5,0x7e,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_e64 v5, exec_lo              ; encoding: 
[0x05,0x00,0xff,0xd5,0x7e,0x00,0x00,0x00]
+
+0x05,0x00,0xff,0xd5,0x7d,0x00,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_e64 v5.l, m0                 ; encoding: 
[0x05,0x00,0xff,0xd5,0x7d,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_e64 v5, m0                   ; encoding: 
[0x05,0x00,0xff,0xd5,0x7d,0x00,0x00,0x00]
+
+0x05,0x00,0xff,0xd5,0x7c,0x00,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_e64 v5.l, null               ; encoding: 
[0x05,0x00,0xff,0xd5,0x7c,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_e64 v5, null                 ; encoding: 
[0x05,0x00,0xff,0xd5,0x7c,0x00,0x00,0x00]
+
+0x05,0x00,0xff,0xd5,0x01,0x00,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_e64 v5.l, s1                 ; encoding: 
[0x05,0x00,0xff,0xd5,0x01,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_e64 v5, s1                   ; encoding: 
[0x05,0x00,0xff,0xd5,0x01,0x00,0x00,0x00]
+
+0x05,0x00,0xff,0xd5,0x69,0x00,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_e64 v5.l, s105               ; encoding: 
[0x05,0x00,0xff,0xd5,0x69,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_e64 v5, s105                 ; encoding: 
[0x05,0x00,0xff,0xd5,0x69,0x00,0x00,0x00]
+
+0x05,0x00,0xff,0xd5,0xfd,0x00,0x00,0x10
+# GFX1250-REAL16: v_cos_bf16_e64 v5.l, src_scc mul:4      ; encoding: 
[0x05,0x00,0xff,0xd5,0xfd,0x00,0x00,0x10]
+# GFX1250-FAKE16: v_cos_bf16_e64 v5, src_scc mul:4        ; encoding: 
[0x05,0x00,0xff,0xd5,0xfd,0x00,0x00,0x10]
+
+0x05,0x00,0xff,0xd5,0x7b,0x00,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_e64 v5.l, ttmp15             ; encoding: 
[0x05,0x00,0xff,0xd5,0x7b,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_e64 v5, ttmp15               ; encoding: 
[0x05,0x00,0xff,0xd5,0x7b,0x00,0x00,0x00]
+
+0x05,0x00,0xff,0xd5,0x01,0x01,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_e64 v5.l, v1.l               ; encoding: 
[0x05,0x00,0xff,0xd5,0x01,0x01,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_e64 v5, v1                   ; encoding: 
[0x05,0x00,0xff,0xd5,0x01,0x01,0x00,0x00]
+
+0x05,0x00,0xff,0xd5,0xff,0x01,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_e64 v5.l, v255.l             ; encoding: 
[0x05,0x00,0xff,0xd5,0xff,0x01,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_e64 v5, v255                 ; encoding: 
[0x05,0x00,0xff,0xd5,0xff,0x01,0x00,0x00]
+
+0x05,0x00,0xff,0xd5,0x6b,0x00,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_e64 v5.l, vcc_hi             ; encoding: 
[0x05,0x00,0xff,0xd5,0x6b,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_e64 v5, vcc_hi               ; encoding: 
[0x05,0x00,0xff,0xd5,0x6b,0x00,0x00,0x00]
+
+0x05,0x00,0xff,0xd5,0x6a,0x00,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_e64 v5.l, vcc_lo             ; encoding: 
[0x05,0x00,0xff,0xd5,0x6a,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_e64 v5, vcc_lo               ; encoding: 
[0x05,0x00,0xff,0xd5,0x6a,0x00,0x00,0x00]
+
+0x05,0x48,0xff,0xd5,0x80,0x01,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_e64 v5.h, v128.h op_sel:[1,1] ; encoding: 
[0x05,0x48,0xff,0xd5,0x80,0x01,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_e64 v5, v128                 ; encoding: 
[0x05,0x00,0xff,0xd5,0x80,0x01,0x00,0x00]
+
 0x01,0x00,0xed,0xd5,0x83,0x00,0x00,0x00
 # GFX1250: v_cvt_f32_bf8_e64 v1, 3                 ; encoding: 
[0x01,0x00,0xed,0xd5,0x83,0x00,0x00,0x00]
 
diff --git 
a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt 
b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt
index ed07393d18b18..0314ab3b59718 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt
@@ -242,6 +242,66 @@
 # GFX1250-REAL16: v_sin_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] 
quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: 
[0x05,0x48,0xfe,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff]
 # GFX1250-FAKE16: v_sin_bf16_e64_dpp v5, v128 quad_perm:[3,2,1,0] row_mask:0xf 
bank_mask:0xf ; encoding: 
[0x05,0x00,0xfe,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff]
 
+0xff,0x81,0xff,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v255.l, -|v255.l| clamp div:2 
row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: 
[0xff,0x81,0xff,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 
row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: 
[0xff,0x81,0xff,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
+
+0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l mul:2 row_share:15 
row_mask:0x0 bank_mask:0x1 ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 
bank_mask:0x1 ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01]
+
+0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l mul:4 row_xmask:0 row_mask:0x1 
bank_mask:0x3 bound_ctrl:1 ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 
bank_mask:0x3 bound_ctrl:1 ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13]
+
+0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l quad_perm:[0,1,2,3] 
row_mask:0xf bank_mask:0xf ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf 
bank_mask:0xf ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
+
+0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l quad_perm:[3,2,1,0] 
row_mask:0xf bank_mask:0xf ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf 
bank_mask:0xf ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l row_half_mirror row_mask:0xf 
bank_mask:0xf ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 row_half_mirror row_mask:0xf 
bank_mask:0xf ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
+
+0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l row_mirror row_mask:0xf 
bank_mask:0xf ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 row_mirror row_mask:0xf 
bank_mask:0xf ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+
+0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l row_ror:1 row_mask:0xf 
bank_mask:0xf ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 row_ror:1 row_mask:0xf 
bank_mask:0xf ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
+
+0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l row_ror:15 row_mask:0xf 
bank_mask:0xf ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 row_ror:15 row_mask:0xf 
bank_mask:0xf ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
+
+0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l row_share:0 row_mask:0xf 
bank_mask:0xf ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf 
bank_mask:0xf ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
+
+0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l row_shl:1 row_mask:0xf 
bank_mask:0xf ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 row_shl:1 row_mask:0xf 
bank_mask:0xf ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
+
+0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l row_shl:15 row_mask:0xf 
bank_mask:0xf ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 row_shl:15 row_mask:0xf 
bank_mask:0xf ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
+
+0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l row_shr:1 row_mask:0xf 
bank_mask:0xf ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 row_shr:1 row_mask:0xf 
bank_mask:0xf ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
+
+0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l row_shr:15 row_mask:0xf 
bank_mask:0xf ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 row_shr:15 row_mask:0xf 
bank_mask:0xf ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
+
+0x05,0x48,0xff,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] 
quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: 
[0x05,0x48,0xff,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v128 quad_perm:[3,2,1,0] row_mask:0xf 
bank_mask:0xf ; encoding: 
[0x05,0x00,0xff,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff]
+
 0xff,0x81,0xf9,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30
 # GFX1250-REAL16: v_rcp_bf16_e64_dpp v255.l, -|v255.l| clamp div:2 
row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: 
[0xff,0x81,0xf9,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
 # GFX1250-FAKE16: v_rcp_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 
row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: 
[0xff,0x81,0xf9,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
diff --git 
a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt 
b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt
index a6d6713c1b00d..ead589195ff50 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt
@@ -82,6 +82,26 @@
 # GFX1250-REAL16: v_sin_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] 
dpp8:[7,6,5,4,3,2,1,0] ; encoding: 
[0x05,0x48,0xfe,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
 # GFX1250-FAKE16: v_sin_bf16_e64_dpp v5, v128 dpp8:[7,6,5,4,3,2,1,0] ; 
encoding: [0x05,0x00,0xfe,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
 
+0xff,0x81,0xff,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v255.l, -|v255.l| clamp div:2 
dpp8:[0,0,0,0,0,0,0,0] ; encoding: 
[0xff,0x81,0xff,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v255, -|v255| clamp div:2 
dpp8:[0,0,0,0,0,0,0,0] ; encoding: 
[0xff,0x81,0xff,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+
+0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; 
encoding: [0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: 
[0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
+0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l mul:2 dpp8:[7,6,5,4,3,2,1,0] ; 
encoding: [0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; 
encoding: [0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05]
+
+0x05,0x00,0xff,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.l, v1.l mul:4 dpp8:[7,6,5,4,3,2,1,0] 
fi:1 ; encoding: [0x05,0x00,0xff,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 
; encoding: [0x05,0x00,0xff,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05]
+
+0x05,0x48,0xff,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05
+# GFX1250-REAL16: v_cos_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] 
dpp8:[7,6,5,4,3,2,1,0] ; encoding: 
[0x05,0x48,0xff,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_cos_bf16_e64_dpp v5, v128 dpp8:[7,6,5,4,3,2,1,0] ; 
encoding: [0x05,0x00,0xff,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05]
+
 0xff,0x81,0xf9,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00
 # GFX1250-REAL16: v_rcp_bf16_e64_dpp v255.l, -|v255.l| clamp div:2 
dpp8:[0,0,0,0,0,0,0,0] ; encoding: 
[0xff,0x81,0xf9,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
 # GFX1250-FAKE16: v_rcp_bf16_e64_dpp v255, -|v255| clamp div:2 
dpp8:[0,0,0,0,0,0,0,0] ; encoding: 
[0xff,0x81,0xf9,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to