https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/117820

>From 0b0d6946d385a341f766d1a3e333962682b8479b Mon Sep 17 00:00:00 2001
From: Shilei Tian <shilei.t...@amd.com>
Date: Tue, 4 Jun 2024 13:59:28 -0400
Subject: [PATCH] AMDGPU: Add support for v_cvt_scalef32_sr instructions

Co-authored-by: Shilei Tian <shilei.t...@amd.com>
---
 clang/include/clang/Basic/BuiltinsAMDGPU.def  |   7 +
 .../CodeGenOpenCL/builtins-amdgcn-gfx950.cl   |  62 ++
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td      |  11 +
 .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp  |   6 +
 llvm/lib/Target/AMDGPU/SIInstrInfo.td         |  44 +-
 llvm/lib/Target/AMDGPU/SIRegisterInfo.td      |   1 +
 llvm/lib/Target/AMDGPU/VOP3Instructions.td    |  12 +
 .../AMDGPU/llvm.amdgcn.cvt.scalef32.sr.pk.ll  | 636 ++++++++++++++++++
 llvm/test/MC/AMDGPU/gfx950_asm_features.s     |  24 +
 llvm/test/MC/AMDGPU/gfx950_err.s              |  18 +
 .../Disassembler/AMDGPU/gfx950_dasm_vop3.txt  |  18 +
 11 files changed, 820 insertions(+), 19 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.sr.pk.ll

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 2c617a90a4fde9..61039938267feb 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -602,5 +602,12 @@ 
TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk_fp4_f16, "UiUiV2hUifIi", "nc"
 TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk_fp4_bf16, "UiUiV2yUifIi", 
"nc", "fp4-cvt-scale-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk_fp4_f32, "UiUiV2fUifIi", 
"nc", "fp4-cvt-scale-insts")
 
+TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_bf6_bf16, "V6UiV32yUif", 
"nc", "f16bf16-to-fp6bf6-cvt-scale-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_bf6_f16, "V6UiV32hUif", 
"nc", "f16bf16-to-fp6bf6-cvt-scale-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_bf6_f32, "V6UiV32fUif", 
"nc", "f16bf16-to-fp6bf6-cvt-scale-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_bf16, "V6UiV32yUif", 
"nc", "f16bf16-to-fp6bf6-cvt-scale-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_f16, "V6UiV32hUif", 
"nc", "f16bf16-to-fp6bf6-cvt-scale-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_f32, "V6UiV32fUif", 
"nc", "f16bf16-to-fp6bf6-cvt-scale-insts")
+
 #undef BUILTIN
 #undef TARGET_BUILTIN
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl
index 691be592e3a4bc..64403f0bf94ebd 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl
@@ -1338,3 +1338,65 @@ void test_cvt_scalef32_sr_pk_fp4_f32(global unsigned 
*out, float2 src, uint seed
   *out = __builtin_amdgcn_cvt_scalef32_sr_pk_fp4_f32(*out, src, seed, scale, 
2);
   *out = __builtin_amdgcn_cvt_scalef32_sr_pk_fp4_f32(*out, src, seed, scale, 
3);
 }
+
+// CHECK-LABEL: @test_cvt_scalef32_sr_pk32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[OUT6_ADDR:%.*]] = alloca ptr addrspace(1), align 8, 
addrspace(5)
+// CHECK-NEXT:    [[SRCBF32_ADDR:%.*]] = alloca <32 x bfloat>, align 64, 
addrspace(5)
+// CHECK-NEXT:    [[SRCH32_ADDR:%.*]] = alloca <32 x half>, align 64, 
addrspace(5)
+// CHECK-NEXT:    [[SRCF32_ADDR:%.*]] = alloca <32 x float>, align 128, 
addrspace(5)
+// CHECK-NEXT:    [[SRC1_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:    [[SRC2_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT:    store ptr addrspace(1) [[OUT6:%.*]], ptr addrspace(5) 
[[OUT6_ADDR]], align 8
+// CHECK-NEXT:    store <32 x bfloat> [[SRCBF32:%.*]], ptr addrspace(5) 
[[SRCBF32_ADDR]], align 64
+// CHECK-NEXT:    store <32 x half> [[SRCH32:%.*]], ptr addrspace(5) 
[[SRCH32_ADDR]], align 64
+// CHECK-NEXT:    store <32 x float> [[SRCF32:%.*]], ptr addrspace(5) 
[[SRCF32_ADDR]], align 128
+// CHECK-NEXT:    store i32 [[SRC1:%.*]], ptr addrspace(5) [[SRC1_ADDR]], 
align 4
+// CHECK-NEXT:    store float [[SRC2:%.*]], ptr addrspace(5) [[SRC2_ADDR]], 
align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <32 x bfloat>, ptr addrspace(5) 
[[SRCBF32_ADDR]], align 64
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[SRC1_ADDR]], 
align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr addrspace(5) [[SRC2_ADDR]], 
align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = call <6 x i32> 
@llvm.amdgcn.cvt.scalef32.sr.pk32.bf6.bf16(<32 x bfloat> [[TMP0]], i32 
[[TMP1]], float [[TMP2]])
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) 
[[OUT6_ADDR]], align 8
+// CHECK-NEXT:    store <6 x i32> [[TMP3]], ptr addrspace(1) [[TMP4]], align 32
+// CHECK-NEXT:    [[TMP5:%.*]] = load <32 x half>, ptr addrspace(5) 
[[SRCH32_ADDR]], align 64
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[SRC1_ADDR]], 
align 4
+// CHECK-NEXT:    [[TMP7:%.*]] = load float, ptr addrspace(5) [[SRC2_ADDR]], 
align 4
+// CHECK-NEXT:    [[TMP8:%.*]] = call <6 x i32> 
@llvm.amdgcn.cvt.scalef32.sr.pk32.bf6.f16(<32 x half> [[TMP5]], i32 [[TMP6]], 
float [[TMP7]])
+// CHECK-NEXT:    [[TMP9:%.*]] = load ptr addrspace(1), ptr addrspace(5) 
[[OUT6_ADDR]], align 8
+// CHECK-NEXT:    store <6 x i32> [[TMP8]], ptr addrspace(1) [[TMP9]], align 32
+// CHECK-NEXT:    [[TMP10:%.*]] = load <32 x float>, ptr addrspace(5) 
[[SRCF32_ADDR]], align 128
+// CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr addrspace(5) [[SRC1_ADDR]], 
align 4
+// CHECK-NEXT:    [[TMP12:%.*]] = load float, ptr addrspace(5) [[SRC2_ADDR]], 
align 4
+// CHECK-NEXT:    [[TMP13:%.*]] = call <6 x i32> 
@llvm.amdgcn.cvt.scalef32.sr.pk32.bf6.f32(<32 x float> [[TMP10]], i32 
[[TMP11]], float [[TMP12]])
+// CHECK-NEXT:    [[TMP14:%.*]] = load ptr addrspace(1), ptr addrspace(5) 
[[OUT6_ADDR]], align 8
+// CHECK-NEXT:    store <6 x i32> [[TMP13]], ptr addrspace(1) [[TMP14]], align 
32
+// CHECK-NEXT:    [[TMP15:%.*]] = load <32 x bfloat>, ptr addrspace(5) 
[[SRCBF32_ADDR]], align 64
+// CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr addrspace(5) [[SRC1_ADDR]], 
align 4
+// CHECK-NEXT:    [[TMP17:%.*]] = load float, ptr addrspace(5) [[SRC2_ADDR]], 
align 4
+// CHECK-NEXT:    [[TMP18:%.*]] = call <6 x i32> 
@llvm.amdgcn.cvt.scalef32.sr.pk32.fp6.bf16(<32 x bfloat> [[TMP15]], i32 
[[TMP16]], float [[TMP17]])
+// CHECK-NEXT:    [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(5) 
[[OUT6_ADDR]], align 8
+// CHECK-NEXT:    store <6 x i32> [[TMP18]], ptr addrspace(1) [[TMP19]], align 
32
+// CHECK-NEXT:    [[TMP20:%.*]] = load <32 x half>, ptr addrspace(5) 
[[SRCH32_ADDR]], align 64
+// CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr addrspace(5) [[SRC1_ADDR]], 
align 4
+// CHECK-NEXT:    [[TMP22:%.*]] = load float, ptr addrspace(5) [[SRC2_ADDR]], 
align 4
+// CHECK-NEXT:    [[TMP23:%.*]] = call <6 x i32> 
@llvm.amdgcn.cvt.scalef32.sr.pk32.fp6.f16(<32 x half> [[TMP20]], i32 [[TMP21]], 
float [[TMP22]])
+// CHECK-NEXT:    [[TMP24:%.*]] = load ptr addrspace(1), ptr addrspace(5) 
[[OUT6_ADDR]], align 8
+// CHECK-NEXT:    store <6 x i32> [[TMP23]], ptr addrspace(1) [[TMP24]], align 
32
+// CHECK-NEXT:    [[TMP25:%.*]] = load <32 x float>, ptr addrspace(5) 
[[SRCF32_ADDR]], align 128
+// CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr addrspace(5) [[SRC1_ADDR]], 
align 4
+// CHECK-NEXT:    [[TMP27:%.*]] = load float, ptr addrspace(5) [[SRC2_ADDR]], 
align 4
+// CHECK-NEXT:    [[TMP28:%.*]] = call <6 x i32> 
@llvm.amdgcn.cvt.scalef32.sr.pk32.fp6.f32(<32 x float> [[TMP25]], i32 
[[TMP26]], float [[TMP27]])
+// CHECK-NEXT:    [[TMP29:%.*]] = load ptr addrspace(1), ptr addrspace(5) 
[[OUT6_ADDR]], align 8
+// CHECK-NEXT:    store <6 x i32> [[TMP28]], ptr addrspace(1) [[TMP29]], align 
32
+// CHECK-NEXT:    ret void
+//
+void test_cvt_scalef32_sr_pk32(global uint6 *out6, bfloat32 srcbf32, half32 
srch32, float32 srcf32, unsigned src1, float src2)
+{
+  *out6 = __builtin_amdgcn_cvt_scalef32_sr_pk32_bf6_bf16(srcbf32, src1, src2);
+  *out6 = __builtin_amdgcn_cvt_scalef32_sr_pk32_bf6_f16(srch32, src1, src2);
+  *out6 = __builtin_amdgcn_cvt_scalef32_sr_pk32_bf6_f32(srcf32, src1, src2);
+  *out6 = __builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_bf16(srcbf32, src1, src2);
+  *out6 = __builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_f16(srch32, src1, src2);
+  *out6 = __builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_f32(srcf32, src1, src2);
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td 
b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 99a29dadef56de..73f3559ab05a48 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -602,6 +602,10 @@ class AMDGPUCvtScaleF32ToFP6BF6Intrinsic<LLVMType DstTy, 
LLVMType Src0Ty, LLVMTy
   [DstTy], [Src0Ty, Src1Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable]
 >, ClangBuiltin<"__builtin_amdgcn_"#name>;
 
+class AMDGPUCvtScaleF32SRIntrinsic<LLVMType DstTy, LLVMType Src0Ty, string 
name> : DefaultAttrsIntrinsic<
+  [DstTy], [Src0Ty, llvm_i32_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable]
+>, ClangBuiltin<"__builtin_amdgcn_"#name>;
+
 def int_amdgcn_cvt_scalef32_pk32_fp6_f16  : 
AMDGPUCvtScaleF32Intrinsic<llvm_v6i32_ty, llvm_v32f16_ty,  
"cvt_scalef32_pk32_fp6_f16">;
 def int_amdgcn_cvt_scalef32_pk32_bf6_f16  : 
AMDGPUCvtScaleF32Intrinsic<llvm_v6i32_ty, llvm_v32f16_ty,  
"cvt_scalef32_pk32_bf6_f16">;
 def int_amdgcn_cvt_scalef32_pk32_fp6_bf16 : 
AMDGPUCvtScaleF32Intrinsic<llvm_v6i32_ty, llvm_v32bf16_ty, 
"cvt_scalef32_pk32_fp6_bf16">;
@@ -609,6 +613,13 @@ def int_amdgcn_cvt_scalef32_pk32_bf6_bf16 : 
AMDGPUCvtScaleF32Intrinsic<llvm_v6i3
 def int_amdgcn_cvt_scalef32_2xpk16_fp6_f32 : 
AMDGPUCvtScaleF32ToFP6BF6Intrinsic<llvm_v6i32_ty, llvm_v16f32_ty, 
llvm_v16f32_ty, "cvt_scalef32_2xpk16_fp6_f32">;
 def int_amdgcn_cvt_scalef32_2xpk16_bf6_f32 : 
AMDGPUCvtScaleF32ToFP6BF6Intrinsic<llvm_v6i32_ty, llvm_v16f32_ty, 
llvm_v16f32_ty, "cvt_scalef32_2xpk16_bf6_f32">;
 
+def int_amdgcn_cvt_scalef32_sr_pk32_bf6_bf16 : 
AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32bf16_ty, 
"cvt_scalef32_sr_pk32_bf6_bf16">;
+def int_amdgcn_cvt_scalef32_sr_pk32_bf6_f16  : 
AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f16_ty, 
"cvt_scalef32_sr_pk32_bf6_f16">;
+def int_amdgcn_cvt_scalef32_sr_pk32_bf6_f32  : 
AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f32_ty, 
"cvt_scalef32_sr_pk32_bf6_f32">;
+def int_amdgcn_cvt_scalef32_sr_pk32_fp6_bf16 : 
AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32bf16_ty, 
"cvt_scalef32_sr_pk32_fp6_bf16">;
+def int_amdgcn_cvt_scalef32_sr_pk32_fp6_f16  : 
AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f16_ty, 
"cvt_scalef32_sr_pk32_fp6_f16">;
+def int_amdgcn_cvt_scalef32_sr_pk32_fp6_f32  : 
AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f32_ty, 
"cvt_scalef32_sr_pk32_fp6_f32">;
+
 class AMDGPUCvtScaleFP4FP8BF8ToF1632Intrinsic<LLVMType DstTy, string name> : 
DefaultAttrsIntrinsic<
   [DstTy],
   [llvm_i32_ty,   // src
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index ad100f37f8710c..158603a7aff879 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4578,6 +4578,12 @@ AMDGPURegisterBankInfo::getInstrMapping(const 
MachineInstr &MI) const {
     case Intrinsic::amdgcn_cvt_scalef32_sr_pk_fp4_f16:
     case Intrinsic::amdgcn_cvt_scalef32_sr_pk_fp4_bf16:
     case Intrinsic::amdgcn_cvt_scalef32_sr_pk_fp4_f32:
+    case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_bf6_bf16:
+    case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_bf6_f16:
+    case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_bf6_f32:
+    case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_fp6_bf16:
+    case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_fp6_f16:
+    case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_fp6_f32:
     case Intrinsic::amdgcn_ashr_pk_i8_i32:
     case Intrinsic::amdgcn_ashr_pk_u8_i32:
     case Intrinsic::amdgcn_cvt_scalef32_2xpk16_fp6_f32:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td 
b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 119a4d63704777..b27e2529f4a807 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1779,25 +1779,27 @@ class getSDWASrcForVT <ValueType VT> {
 // given VT.
 class getVOP3SrcForVT<ValueType VT, bit IsTrue16 = 0> {
   RegisterOperand ret =
-  !cond(!eq(VT, f64)      : VSrc_f64,
-        !eq(VT, f32)      : VSrc_f32,
-        !eq(VT, f16)      : !if(IsTrue16, VSrcT_f16, VSrc_f16),
-        !eq(VT, bf16)     : !if(IsTrue16, VSrcT_bf16, VSrc_bf16),
-        !eq(VT, i16)      : !if(IsTrue16, VSrcT_b16, VSrc_b16),
-        !eq(VT, i1)       : SSrc_i1,
-        !eq(VT, v2f32)    : VSrc_v2f32,
-        !eq(VT, v2i32)    : VSrc_v2b32,
-        !eq(VT, v2f16)    : VSrc_v2f16,
-        !eq(VT, v2bf16)   : VSrc_v2bf16,
-        !eq(VT, v2i16)    : VSrc_v2b16,
-        !eq(VT, v4f16)    : AVSrc_64,
-        !eq(VT, v4bf16)   : AVSrc_64,
-        !eq(VT.Size, 512) : VRegSrc_512,
-        !eq(VT.Size, 192) : VRegSrc_192,
-        !eq(VT.Size, 128) : VRegSrc_128,
-        !eq(VT.Size, 96)  : VRegSrc_96,
-        !eq(VT.Size, 64)  : VSrc_b64,
-        1                 : VSrc_b32);
+  !cond(!eq(VT, f64)       : VSrc_f64,
+        !eq(VT, f32)       : VSrc_f32,
+        !eq(VT, f16)       : !if(IsTrue16, VSrcT_f16, VSrc_f16),
+        !eq(VT, bf16)      : !if(IsTrue16, VSrcT_bf16, VSrc_bf16),
+        !eq(VT, i16)       : !if(IsTrue16, VSrcT_b16, VSrc_b16),
+        !eq(VT, i1)        : SSrc_i1,
+        !eq(VT, v2f32)     : VSrc_v2f32,
+        !eq(VT, v2i32)     : VSrc_v2b32,
+        !eq(VT, v2f16)     : VSrc_v2f16,
+        !eq(VT, v2bf16)    : VSrc_v2bf16,
+        !eq(VT, v2i16)     : VSrc_v2b16,
+        !eq(VT, v4f16)     : AVSrc_64,
+        !eq(VT, v4bf16)    : AVSrc_64,
+        !eq(VT.Size, 1024) : VRegSrc_1024,
+        !eq(VT.Size, 512)  : VRegSrc_512,
+        !eq(VT.Size, 256)  : VRegSrc_256,
+        !eq(VT.Size, 192)  : VRegSrc_192,
+        !eq(VT.Size, 128)  : VRegSrc_128,
+        !eq(VT.Size, 96)   : VRegSrc_96,
+        !eq(VT.Size, 64)   : VSrc_b64,
+        1                  : VSrc_b32);
 }
 
 // Returns the vreg register class to use for sources of VOP3 instructions for 
the
@@ -2856,6 +2858,10 @@ def VOP_I32_BF16_I32_F32 : VOPProfile<[i32, bf16, i32, 
f32]>;
 def VOP_I32_F16_I32_F32 : VOPProfile<[i32, f16, i32, f32]>;
 def VOP_I32_F32_I32_F32 : VOPProfile<[i32, f32, i32, f32]>;
 
+def VOP_V6I32_V32BF16_I32_F32 : VOPProfile<[v6i32, v32bf16, i32, f32]>;
+def VOP_V6I32_V32F16_I32_F32 : VOPProfile<[v6i32, v32f16, i32, f32]>;
+def VOP_V6I32_V32F32_I32_F32 : VOPProfile<[v6i32, v32f32, i32, f32]>;
+
 def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
 def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
 def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td 
b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 51fdd4211a5cf6..6a349d2bf06ea2 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -1252,6 +1252,7 @@ def VRegSrc_128: SrcReg9<VReg_128, "OPW128">;
 def VRegSrc_192: SrcReg9<VReg_192, "OPW192">;
 def VRegSrc_256: SrcReg9<VReg_256, "OPW256">;
 def VRegSrc_512: SrcReg9<VReg_512, "OPW512">;
+def VRegSrc_1024: SrcReg9<VReg_1024, "OPW1024">;
 def VRegOrLdsSrc_32 : SrcReg9<VRegOrLds_32, "OPW32">;
 
 // True 16 Operands
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td 
b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index c78f5c108e4d53..3a79532cecb917 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -1117,6 +1117,12 @@ let SubtargetPredicate = 
HasF16BF16ToFP6BF6ConversionScaleInsts, mayRaiseFPExcep
   defm V_CVT_SCALEF32_PK32_BF6_F16   : VOP3Inst<"v_cvt_scalef32_pk32_bf6_f16", 
 VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V6I32_V32F16_F32>,  
int_amdgcn_cvt_scalef32_pk32_bf6_f16>;
   defm V_CVT_SCALEF32_PK32_FP6_BF16  : 
VOP3Inst<"v_cvt_scalef32_pk32_fp6_bf16", 
VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V6I32_V32BF16_F32>, 
int_amdgcn_cvt_scalef32_pk32_fp6_bf16>;
   defm V_CVT_SCALEF32_PK32_BF6_BF16  : 
VOP3Inst<"v_cvt_scalef32_pk32_bf6_bf16", 
VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V6I32_V32BF16_F32>, 
int_amdgcn_cvt_scalef32_pk32_bf6_bf16>;
+  defm V_CVT_SCALEF32_SR_PK32_BF6_BF16 : 
VOP3Inst<"v_cvt_scalef32_sr_pk32_bf6_bf16", 
VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V6I32_V32BF16_I32_F32>, 
int_amdgcn_cvt_scalef32_sr_pk32_bf6_bf16>;
+  defm V_CVT_SCALEF32_SR_PK32_BF6_F16  : 
VOP3Inst<"v_cvt_scalef32_sr_pk32_bf6_f16",  
VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V6I32_V32F16_I32_F32>,  
int_amdgcn_cvt_scalef32_sr_pk32_bf6_f16>;
+  defm V_CVT_SCALEF32_SR_PK32_BF6_F32  : 
VOP3Inst<"v_cvt_scalef32_sr_pk32_bf6_f32",  
VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V6I32_V32F32_I32_F32>,  
int_amdgcn_cvt_scalef32_sr_pk32_bf6_f32>;
+  defm V_CVT_SCALEF32_SR_PK32_FP6_BF16 : 
VOP3Inst<"v_cvt_scalef32_sr_pk32_fp6_bf16", 
VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V6I32_V32BF16_I32_F32>, 
int_amdgcn_cvt_scalef32_sr_pk32_fp6_bf16>;
+  defm V_CVT_SCALEF32_SR_PK32_FP6_F16  : 
VOP3Inst<"v_cvt_scalef32_sr_pk32_fp6_f16",  
VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V6I32_V32F16_I32_F32>,  
int_amdgcn_cvt_scalef32_sr_pk32_fp6_f16>;
+  defm V_CVT_SCALEF32_SR_PK32_FP6_F32  : 
VOP3Inst<"v_cvt_scalef32_sr_pk32_fp6_f32",  
VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V6I32_V32F32_I32_F32>,  
int_amdgcn_cvt_scalef32_sr_pk32_fp6_f32>;
 }
 
 let SubtargetPredicate = HasGFX950Insts, mayRaiseFPException = 0 in {
@@ -2203,6 +2209,12 @@ defm V_CVT_SCALEF32_PK32_FP6_F16  : 
VOP3_Real_gfx9<0x258, "v_cvt_scalef32_pk32_f
 defm V_CVT_SCALEF32_PK32_FP6_BF16 : VOP3_Real_gfx9<0x259, 
"v_cvt_scalef32_pk32_fp6_bf16">;
 defm V_CVT_SCALEF32_PK32_BF6_F16  : VOP3_Real_gfx9<0x25a, 
"v_cvt_scalef32_pk32_bf6_f16">;
 defm V_CVT_SCALEF32_PK32_BF6_BF16 : VOP3_Real_gfx9<0x25b, 
"v_cvt_scalef32_pk32_bf6_bf16">;
+defm V_CVT_SCALEF32_SR_PK32_BF6_BF16  : VOP3_Real_gfx9<0x25f, 
"v_cvt_scalef32_sr_pk32_bf6_bf16">;
+defm V_CVT_SCALEF32_SR_PK32_BF6_F16   : VOP3_Real_gfx9<0x25e, 
"v_cvt_scalef32_sr_pk32_bf6_f16">;
+defm V_CVT_SCALEF32_SR_PK32_BF6_F32   : VOP3_Real_gfx9<0x255, 
"v_cvt_scalef32_sr_pk32_bf6_f32">;
+defm V_CVT_SCALEF32_SR_PK32_FP6_BF16  : VOP3_Real_gfx9<0x25d, 
"v_cvt_scalef32_sr_pk32_fp6_bf16">;
+defm V_CVT_SCALEF32_SR_PK32_FP6_F16   : VOP3_Real_gfx9<0x25c, 
"v_cvt_scalef32_sr_pk32_fp6_f16">;
+defm V_CVT_SCALEF32_SR_PK32_FP6_F32   : VOP3_Real_gfx9<0x254, 
"v_cvt_scalef32_sr_pk32_fp6_f32">;
 }
 
 let OtherPredicates = [HasF32ToF16BF16ConversionSRInsts] in {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.sr.pk.ll 
b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.sr.pk.ll
new file mode 100644
index 00000000000000..3e9ac6cbe3ba6e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.sr.pk.ll
@@ -0,0 +1,636 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 4
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < 
%s | FileCheck -check-prefix=GFX950-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < 
%s | FileCheck -check-prefix=GFX950-GISEL %s
+
+declare <6 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk32.bf6.bf16(<32 x bfloat> 
%src, i32 %sr, float %scale)
+declare <6 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk32.bf6.f16(<32 x half> %src, 
i32 %sr, float %scale)
+declare <6 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk32.bf6.f32(<32 x float> %src, 
i32 %sr, float %scale)
+declare <6 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk32.fp6.bf16(<32 x bfloat> 
%src, i32 %sr, float %scale)
+declare <6 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk32.fp6.f16(<32 x half> %src, 
i32 %sr, float %scale)
+declare <6 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk32.fp6.f32(<32 x float> %src, 
i32 %sr, float %scale)
+
+define amdgpu_ps void @test_scalef32_sr_pk32_bf6_bf16_vv(<32 x bfloat> %src, 
i32 %sr, float %scale, ptr addrspace(1) %out) {
+; GFX950-SDAG-LABEL: test_scalef32_sr_pk32_bf6_bf16_vv:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    v_cvt_scalef32_sr_pk32_bf6_bf16 v[0:5], v[0:15], v16, 
v17
+; GFX950-SDAG-NEXT:    global_store_dwordx2 v[18:19], v[4:5], off offset:16
+; GFX950-SDAG-NEXT:    global_store_dwordx4 v[18:19], v[0:3], off
+; GFX950-SDAG-NEXT:    s_endpgm
+;
+; GFX950-GISEL-LABEL: test_scalef32_sr_pk32_bf6_bf16_vv:
+; GFX950-GISEL:       ; %bb.0:
+; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v20, 16, v0
+; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v21, 16, v1
+; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v22, 16, v2
+; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v23, 16, v3
+; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v24, 16, v4
+; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v25, 16, v5
+; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v26, 16, v6
+; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v27, 16, v7
+; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v28, 16, v8
+; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v29, 16, v9
+; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v30, 16, v10
+; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v31, 16, v11
+; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v32, 16, v12
+; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v33, 16, v13
+; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v34, 16, v14
+; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v35, 16, v15
+; GFX950-GISEL-NEXT:    v_mov_b32_sdwa v0, v20 dst_sel:WORD_1 
dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+; GFX950-GISEL-NEXT:    v_mov_b32_sdwa v1, v21 dst_sel:WORD_1 
dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+; GFX950-GISEL-NEXT:    v_mov_b32_sdwa v2, v22 dst_sel:WORD_1 
dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+; GFX950-GISEL-NEXT:    v_mov_b32_sdwa v3, v23 dst_sel:WORD_1 
dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+; GFX950-GISEL-NEXT:    v_mov_b32_sdwa v4, v24 dst_sel:WORD_1 
dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+; GFX950-GISEL-NEXT:    v_mov_b32_sdwa v5, v25 dst_sel:WORD_1 
dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+; GFX950-GISEL-NEXT:    v_mov_b32_sdwa v6, v26 dst_sel:WORD_1 
dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+; GFX950-GISEL-NEXT:    v_mov_b32_sdwa v7, v27 dst_sel:WORD_1 
dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+; GFX950-GISEL-NEXT:    v_mov_b32_sdwa v8, v28 dst_sel:WORD_1 
dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+; GFX950-GISEL-NEXT:    v_mov_b32_sdwa v9, v29 dst_sel:WORD_1 
dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+; GFX950-GISEL-NEXT:    v_mov_b32_sdwa v10, v30 dst_sel:WORD_1 
dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+; GFX950-GISEL-NEXT:    v_mov_b32_sdwa v11, v31 dst_sel:WORD_1 
dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+; GFX950-GISEL-NEXT:    v_mov_b32_sdwa v12, v32 dst_sel:WORD_1 
dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+; GFX950-GISEL-NEXT:    v_mov_b32_sdwa v13, v33 dst_sel:WORD_1 
dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+; GFX950-GISEL-NEXT:    v_mov_b32_sdwa v14, v34 dst_sel:WORD_1 
dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+; GFX950-GISEL-NEXT:    v_mov_b32_sdwa v15, v35 dst_sel:WORD_1 
dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+; GFX950-GISEL-NEXT:    s_nop 0
+; GFX950-GISEL-NEXT:    v_cvt_scalef32_sr_pk32_bf6_bf16 v[0:5], v[0:15], v16, 
v17
+; GFX950-GISEL-NEXT:    global_store_dwordx4 v[18:19], v[0:3], off
+; GFX950-GISEL-NEXT:    global_store_dwordx2 v[18:19], v[4:5], off offset:16
+; GFX950-GISEL-NEXT:    s_endpgm
+  %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk32.bf6.bf16(<32 x 
bfloat> %src, i32 %sr, float %scale)
+  store <6 x i32> %cvt, ptr addrspace(1) %out, align 8
+  ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk32_bf6_bf16_sl(<32 x bfloat> inreg 
%src, i32 inreg %sr, ptr addrspace(1) %out) {
+; GFX950-SDAG-LABEL: test_scalef32_sr_pk32_bf6_bf16_sl:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, s1
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v4, s2
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v5, s3
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v6, s4
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v7, s5
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v8, s6
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v9, s7
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v10, s8
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v11, s9
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v12, s10
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v13, s11
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v14, s12
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v15, s13
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v16, s14
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v17, s15
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v18, 0x42c80000
+; GFX950-SDAG-NEXT:    v_cvt_scalef32_sr_pk32_bf6_bf16 v[2:7], v[2:17], s16, 
v18
+; GFX950-SDAG-NEXT:    global_store_dwordx2 v[0:1], v[6:7], off offset:16
+; GFX950-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX950-SDAG-NEXT:    s_endpgm
+;
+; GFX950-GISEL-LABEL: test_scalef32_sr_pk32_bf6_bf16_sl:
+; GFX950-GISEL:       ; %bb.0:
+; GFX950-GISEL-NEXT:    s_lshr_b32 s17, s0, 16
+; GFX950-GISEL-NEXT:    s_lshr_b32 s18, s1, 16
+; GFX950-GISEL-NEXT:    s_lshl_b32 s17, s17, 16
+; GFX950-GISEL-NEXT:    s_and_b32 s0, s0, 0xffff
+; GFX950-GISEL-NEXT:    s_lshr_b32 s19, s2, 16
+; GFX950-GISEL-NEXT:    s_or_b32 s0, s17, s0
+; GFX950-GISEL-NEXT:    s_lshl_b32 s17, s18, 16
+; GFX950-GISEL-NEXT:    s_and_b32 s1, s1, 0xffff
+; GFX950-GISEL-NEXT:    s_lshr_b32 s20, s3, 16
+; GFX950-GISEL-NEXT:    s_or_b32 s1, s17, s1
+; GFX950-GISEL-NEXT:    s_lshl_b32 s17, s19, 16
+; GFX950-GISEL-NEXT:    s_and_b32 s2, s2, 0xffff
+; GFX950-GISEL-NEXT:    s_lshr_b32 s21, s4, 16
+; GFX950-GISEL-NEXT:    s_or_b32 s2, s17, s2
+; GFX950-GISEL-NEXT:    s_lshl_b32 s17, s20, 16
+; GFX950-GISEL-NEXT:    s_and_b32 s3, s3, 0xffff
+; GFX950-GISEL-NEXT:    s_lshr_b32 s22, s5, 16
+; GFX950-GISEL-NEXT:    s_or_b32 s3, s17, s3
+; GFX950-GISEL-NEXT:    s_lshl_b32 s17, s21, 16
+; GFX950-GISEL-NEXT:    s_and_b32 s4, s4, 0xffff
+; GFX950-GISEL-NEXT:    s_lshr_b32 s23, s6, 16
+; GFX950-GISEL-NEXT:    s_or_b32 s4, s17, s4
+; GFX950-GISEL-NEXT:    s_lshl_b32 s17, s22, 16
+; GFX950-GISEL-NEXT:    s_and_b32 s5, s5, 0xffff
+; GFX950-GISEL-NEXT:    s_lshr_b32 s24, s7, 16
+; GFX950-GISEL-NEXT:    s_or_b32 s5, s17, s5
+; GFX950-GISEL-NEXT:    s_lshl_b32 s17, s23, 16
+; GFX950-GISEL-NEXT:    s_and_b32 s6, s6, 0xffff
+; GFX950-GISEL-NEXT:    s_lshr_b32 s25, s8, 16
+; GFX950-GISEL-NEXT:    s_or_b32 s6, s17, s6
+; GFX950-GISEL-NEXT:    s_lshl_b32 s17, s24, 16
+; GFX950-GISEL-NEXT:    s_and_b32 s7, s7, 0xffff
+; GFX950-GISEL-NEXT:    s_lshr_b32 s26, s9, 16
+; GFX950-GISEL-NEXT:    s_or_b32 s7, s17, s7
+; GFX950-GISEL-NEXT:    s_lshl_b32 s17, s25, 16
+; GFX950-GISEL-NEXT:    s_and_b32 s8, s8, 0xffff
+; GFX950-GISEL-NEXT:    s_lshr_b32 s27, s10, 16
+; GFX950-GISEL-NEXT:    s_or_b32 s8, s17, s8
+; GFX950-GISEL-NEXT:    s_lshl_b32 s17, s26, 16
+; GFX950-GISEL-NEXT:    s_and_b32 s9, s9, 0xffff
+; GFX950-GISEL-NEXT:    s_lshr_b32 s28, s11, 16
+; GFX950-GISEL-NEXT:    s_or_b32 s9, s17, s9
+; GFX950-GISEL-NEXT:    s_lshl_b32 s17, s27, 16
+; GFX950-GISEL-NEXT:    s_and_b32 s10, s10, 0xffff
+; GFX950-GISEL-NEXT:    s_lshr_b32 s29, s12, 16
+; GFX950-GISEL-NEXT:    s_or_b32 s10, s17, s10
+; GFX950-GISEL-NEXT:    s_lshl_b32 s17, s28, 16
+; GFX950-GISEL-NEXT:    s_and_b32 s11, s11, 0xffff
+; GFX950-GISEL-NEXT:    s_lshr_b32 s30, s13, 16
+; GFX950-GISEL-NEXT:    s_or_b32 s11, s17, s11
+; GFX950-GISEL-NEXT:    s_lshl_b32 s17, s29, 16
+; GFX950-GISEL-NEXT:    s_and_b32 s12, s12, 0xffff
+; GFX950-GISEL-NEXT:    s_lshr_b32 s31, s14, 16
+; GFX950-GISEL-NEXT:    s_or_b32 s12, s17, s12
+; GFX950-GISEL-NEXT:    s_lshl_b32 s17, s30, 16
+; GFX950-GISEL-NEXT:    s_and_b32 s13, s13, 0xffff
+; GFX950-GISEL-NEXT:    s_lshr_b32 s33, s15, 16
+; GFX950-GISEL-NEXT:    s_or_b32 s13, s17, s13
+; GFX950-GISEL-NEXT:    s_lshl_b32 s17, s31, 16
+; GFX950-GISEL-NEXT:    s_and_b32 s14, s14, 0xffff
+; GFX950-GISEL-NEXT:    s_or_b32 s14, s17, s14
+; GFX950-GISEL-NEXT:    s_lshl_b32 s17, s33, 16
+; GFX950-GISEL-NEXT:    s_and_b32 s15, s15, 0xffff
+; GFX950-GISEL-NEXT:    s_or_b32 s15, s17, s15
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[16:17], s[14:15]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[14:15], s[12:13]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[12:13], s[10:11]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[10:11], s[8:9]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[8:9], s[6:7]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[6:7], s[4:5]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[4:5], s[2:3]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX950-GISEL-NEXT:    v_mov_b32_e32 v18, 0x42c80000
+; GFX950-GISEL-NEXT:    v_cvt_scalef32_sr_pk32_bf6_bf16 v[2:7], v[2:17], s16, 
v18
+; GFX950-GISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX950-GISEL-NEXT:    global_store_dwordx2 v[0:1], v[6:7], off offset:16
+; GFX950-GISEL-NEXT:    s_endpgm
+  %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk32.bf6.bf16(<32 x 
bfloat> %src, i32 %sr, float 100.0)
+  store <6 x i32> %cvt, ptr addrspace(1) %out, align 8
+  ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk32_bf6_f16_vv(<32 x half> %src, i32 
%sr, float %scale, ptr addrspace(1) %out) {
+; GFX950-SDAG-LABEL: test_scalef32_sr_pk32_bf6_f16_vv:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    v_cvt_scalef32_sr_pk32_bf6_f16 v[0:5], v[0:15], v16, v17
+; GFX950-SDAG-NEXT:    global_store_dwordx2 v[18:19], v[4:5], off offset:16
+; GFX950-SDAG-NEXT:    global_store_dwordx4 v[18:19], v[0:3], off
+; GFX950-SDAG-NEXT:    s_endpgm
+;
+; GFX950-GISEL-LABEL: test_scalef32_sr_pk32_bf6_f16_vv:
+; GFX950-GISEL:       ; %bb.0:
+; GFX950-GISEL-NEXT:    v_cvt_scalef32_sr_pk32_bf6_f16 v[0:5], v[0:15], v16, 
v17
+; GFX950-GISEL-NEXT:    global_store_dwordx4 v[18:19], v[0:3], off
+; GFX950-GISEL-NEXT:    global_store_dwordx2 v[18:19], v[4:5], off offset:16
+; GFX950-GISEL-NEXT:    s_endpgm
+  %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk32.bf6.f16(<32 x 
half> %src, i32 %sr, float %scale)
+  store <6 x i32> %cvt, ptr addrspace(1) %out, align 8
+  ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk32_bf6_f16_sl(<32 x half> inreg 
%src, i32 inreg %sr, ptr addrspace(1) %out) {
+; GFX950-SDAG-LABEL: test_scalef32_sr_pk32_bf6_f16_sl:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, s1
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v4, s2
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v5, s3
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v6, s4
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v7, s5
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v8, s6
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v9, s7
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v10, s8
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v11, s9
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v12, s10
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v13, s11
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v14, s12
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v15, s13
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v16, s14
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v17, s15
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v18, 0x42c80000
+; GFX950-SDAG-NEXT:    v_cvt_scalef32_sr_pk32_bf6_f16 v[2:7], v[2:17], s16, v18
+; GFX950-SDAG-NEXT:    global_store_dwordx2 v[0:1], v[6:7], off offset:16
+; GFX950-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX950-SDAG-NEXT:    s_endpgm
+;
+; GFX950-GISEL-LABEL: test_scalef32_sr_pk32_bf6_f16_sl:
+; GFX950-GISEL:       ; %bb.0:
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[16:17], s[14:15]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[14:15], s[12:13]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[12:13], s[10:11]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[10:11], s[8:9]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[8:9], s[6:7]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[6:7], s[4:5]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[4:5], s[2:3]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX950-GISEL-NEXT:    v_mov_b32_e32 v18, 0x42c80000
+; GFX950-GISEL-NEXT:    v_cvt_scalef32_sr_pk32_bf6_f16 v[2:7], v[2:17], s16, 
v18
+; GFX950-GISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX950-GISEL-NEXT:    global_store_dwordx2 v[0:1], v[6:7], off offset:16
+; GFX950-GISEL-NEXT:    s_endpgm
+  %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk32.bf6.f16(<32 x 
half> %src, i32 %sr, float 100.0)
+  store <6 x i32> %cvt, ptr addrspace(1) %out, align 8
+  ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk32_fp6_bf16_vv(<32 x bfloat> %src, 
i32 %sr, float %scale, ptr addrspace(1) %out) {
+; GFX950-SDAG-LABEL: test_scalef32_sr_pk32_fp6_bf16_vv:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    v_cvt_scalef32_sr_pk32_fp6_bf16 v[0:5], v[0:15], v16, 
v17
+; GFX950-SDAG-NEXT:    global_store_dwordx2 v[18:19], v[4:5], off offset:16
+; GFX950-SDAG-NEXT:    global_store_dwordx4 v[18:19], v[0:3], off
+; GFX950-SDAG-NEXT:    s_endpgm
+;
+; GFX950-GISEL-LABEL: test_scalef32_sr_pk32_fp6_bf16_vv:
+; GFX950-GISEL:       ; %bb.0:
+; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v20, 16, v0
+; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v21, 16, v1
+; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v22, 16, v2
+; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v23, 16, v3
+; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v24, 16, v4
+; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v25, 16, v5
+; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v26, 16, v6
+; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v27, 16, v7
+; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v28, 16, v8
+; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v29, 16, v9
+; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v30, 16, v10
+; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v31, 16, v11
+; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v32, 16, v12
+; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v33, 16, v13
+; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v34, 16, v14
+; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v35, 16, v15
+; GFX950-GISEL-NEXT:    v_mov_b32_sdwa v0, v20 dst_sel:WORD_1 
dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+; GFX950-GISEL-NEXT:    v_mov_b32_sdwa v1, v21 dst_sel:WORD_1 
dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+; GFX950-GISEL-NEXT:    v_mov_b32_sdwa v2, v22 dst_sel:WORD_1 
dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+; GFX950-GISEL-NEXT:    v_mov_b32_sdwa v3, v23 dst_sel:WORD_1 
dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+; GFX950-GISEL-NEXT:    v_mov_b32_sdwa v4, v24 dst_sel:WORD_1 
dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+; GFX950-GISEL-NEXT:    v_mov_b32_sdwa v5, v25 dst_sel:WORD_1 
dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+; GFX950-GISEL-NEXT:    v_mov_b32_sdwa v6, v26 dst_sel:WORD_1 
dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+; GFX950-GISEL-NEXT:    v_mov_b32_sdwa v7, v27 dst_sel:WORD_1 
dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+; GFX950-GISEL-NEXT:    v_mov_b32_sdwa v8, v28 dst_sel:WORD_1 
dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+; GFX950-GISEL-NEXT:    v_mov_b32_sdwa v9, v29 dst_sel:WORD_1 
dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+; GFX950-GISEL-NEXT:    v_mov_b32_sdwa v10, v30 dst_sel:WORD_1 
dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+; GFX950-GISEL-NEXT:    v_mov_b32_sdwa v11, v31 dst_sel:WORD_1 
dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+; GFX950-GISEL-NEXT:    v_mov_b32_sdwa v12, v32 dst_sel:WORD_1 
dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+; GFX950-GISEL-NEXT:    v_mov_b32_sdwa v13, v33 dst_sel:WORD_1 
dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+; GFX950-GISEL-NEXT:    v_mov_b32_sdwa v14, v34 dst_sel:WORD_1 
dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+; GFX950-GISEL-NEXT:    v_mov_b32_sdwa v15, v35 dst_sel:WORD_1 
dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+; GFX950-GISEL-NEXT:    s_nop 0
+; GFX950-GISEL-NEXT:    v_cvt_scalef32_sr_pk32_fp6_bf16 v[0:5], v[0:15], v16, 
v17
+; GFX950-GISEL-NEXT:    global_store_dwordx4 v[18:19], v[0:3], off
+; GFX950-GISEL-NEXT:    global_store_dwordx2 v[18:19], v[4:5], off offset:16
+; GFX950-GISEL-NEXT:    s_endpgm
+  %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk32.fp6.bf16(<32 x 
bfloat> %src, i32 %sr, float %scale)
+  store <6 x i32> %cvt, ptr addrspace(1) %out, align 8
+  ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk32_fp6_bf16_sl(<32 x bfloat> inreg 
%src, i32 inreg %sr, ptr addrspace(1) %out) {
+; GFX950-SDAG-LABEL: test_scalef32_sr_pk32_fp6_bf16_sl:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, s1
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v4, s2
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v5, s3
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v6, s4
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v7, s5
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v8, s6
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v9, s7
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v10, s8
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v11, s9
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v12, s10
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v13, s11
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v14, s12
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v15, s13
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v16, s14
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v17, s15
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v18, 0x42c80000
+; GFX950-SDAG-NEXT:    v_cvt_scalef32_sr_pk32_fp6_bf16 v[2:7], v[2:17], s16, 
v18
+; GFX950-SDAG-NEXT:    global_store_dwordx2 v[0:1], v[6:7], off offset:16
+; GFX950-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX950-SDAG-NEXT:    s_endpgm
+;
+; GFX950-GISEL-LABEL: test_scalef32_sr_pk32_fp6_bf16_sl:
+; GFX950-GISEL:       ; %bb.0:
+; GFX950-GISEL-NEXT:    s_lshr_b32 s17, s0, 16
+; GFX950-GISEL-NEXT:    s_lshr_b32 s18, s1, 16
+; GFX950-GISEL-NEXT:    s_lshl_b32 s17, s17, 16
+; GFX950-GISEL-NEXT:    s_and_b32 s0, s0, 0xffff
+; GFX950-GISEL-NEXT:    s_lshr_b32 s19, s2, 16
+; GFX950-GISEL-NEXT:    s_or_b32 s0, s17, s0
+; GFX950-GISEL-NEXT:    s_lshl_b32 s17, s18, 16
+; GFX950-GISEL-NEXT:    s_and_b32 s1, s1, 0xffff
+; GFX950-GISEL-NEXT:    s_lshr_b32 s20, s3, 16
+; GFX950-GISEL-NEXT:    s_or_b32 s1, s17, s1
+; GFX950-GISEL-NEXT:    s_lshl_b32 s17, s19, 16
+; GFX950-GISEL-NEXT:    s_and_b32 s2, s2, 0xffff
+; GFX950-GISEL-NEXT:    s_lshr_b32 s21, s4, 16
+; GFX950-GISEL-NEXT:    s_or_b32 s2, s17, s2
+; GFX950-GISEL-NEXT:    s_lshl_b32 s17, s20, 16
+; GFX950-GISEL-NEXT:    s_and_b32 s3, s3, 0xffff
+; GFX950-GISEL-NEXT:    s_lshr_b32 s22, s5, 16
+; GFX950-GISEL-NEXT:    s_or_b32 s3, s17, s3
+; GFX950-GISEL-NEXT:    s_lshl_b32 s17, s21, 16
+; GFX950-GISEL-NEXT:    s_and_b32 s4, s4, 0xffff
+; GFX950-GISEL-NEXT:    s_lshr_b32 s23, s6, 16
+; GFX950-GISEL-NEXT:    s_or_b32 s4, s17, s4
+; GFX950-GISEL-NEXT:    s_lshl_b32 s17, s22, 16
+; GFX950-GISEL-NEXT:    s_and_b32 s5, s5, 0xffff
+; GFX950-GISEL-NEXT:    s_lshr_b32 s24, s7, 16
+; GFX950-GISEL-NEXT:    s_or_b32 s5, s17, s5
+; GFX950-GISEL-NEXT:    s_lshl_b32 s17, s23, 16
+; GFX950-GISEL-NEXT:    s_and_b32 s6, s6, 0xffff
+; GFX950-GISEL-NEXT:    s_lshr_b32 s25, s8, 16
+; GFX950-GISEL-NEXT:    s_or_b32 s6, s17, s6
+; GFX950-GISEL-NEXT:    s_lshl_b32 s17, s24, 16
+; GFX950-GISEL-NEXT:    s_and_b32 s7, s7, 0xffff
+; GFX950-GISEL-NEXT:    s_lshr_b32 s26, s9, 16
+; GFX950-GISEL-NEXT:    s_or_b32 s7, s17, s7
+; GFX950-GISEL-NEXT:    s_lshl_b32 s17, s25, 16
+; GFX950-GISEL-NEXT:    s_and_b32 s8, s8, 0xffff
+; GFX950-GISEL-NEXT:    s_lshr_b32 s27, s10, 16
+; GFX950-GISEL-NEXT:    s_or_b32 s8, s17, s8
+; GFX950-GISEL-NEXT:    s_lshl_b32 s17, s26, 16
+; GFX950-GISEL-NEXT:    s_and_b32 s9, s9, 0xffff
+; GFX950-GISEL-NEXT:    s_lshr_b32 s28, s11, 16
+; GFX950-GISEL-NEXT:    s_or_b32 s9, s17, s9
+; GFX950-GISEL-NEXT:    s_lshl_b32 s17, s27, 16
+; GFX950-GISEL-NEXT:    s_and_b32 s10, s10, 0xffff
+; GFX950-GISEL-NEXT:    s_lshr_b32 s29, s12, 16
+; GFX950-GISEL-NEXT:    s_or_b32 s10, s17, s10
+; GFX950-GISEL-NEXT:    s_lshl_b32 s17, s28, 16
+; GFX950-GISEL-NEXT:    s_and_b32 s11, s11, 0xffff
+; GFX950-GISEL-NEXT:    s_lshr_b32 s30, s13, 16
+; GFX950-GISEL-NEXT:    s_or_b32 s11, s17, s11
+; GFX950-GISEL-NEXT:    s_lshl_b32 s17, s29, 16
+; GFX950-GISEL-NEXT:    s_and_b32 s12, s12, 0xffff
+; GFX950-GISEL-NEXT:    s_lshr_b32 s31, s14, 16
+; GFX950-GISEL-NEXT:    s_or_b32 s12, s17, s12
+; GFX950-GISEL-NEXT:    s_lshl_b32 s17, s30, 16
+; GFX950-GISEL-NEXT:    s_and_b32 s13, s13, 0xffff
+; GFX950-GISEL-NEXT:    s_lshr_b32 s33, s15, 16
+; GFX950-GISEL-NEXT:    s_or_b32 s13, s17, s13
+; GFX950-GISEL-NEXT:    s_lshl_b32 s17, s31, 16
+; GFX950-GISEL-NEXT:    s_and_b32 s14, s14, 0xffff
+; GFX950-GISEL-NEXT:    s_or_b32 s14, s17, s14
+; GFX950-GISEL-NEXT:    s_lshl_b32 s17, s33, 16
+; GFX950-GISEL-NEXT:    s_and_b32 s15, s15, 0xffff
+; GFX950-GISEL-NEXT:    s_or_b32 s15, s17, s15
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[16:17], s[14:15]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[14:15], s[12:13]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[12:13], s[10:11]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[10:11], s[8:9]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[8:9], s[6:7]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[6:7], s[4:5]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[4:5], s[2:3]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX950-GISEL-NEXT:    v_mov_b32_e32 v18, 0x42c80000
+; GFX950-GISEL-NEXT:    v_cvt_scalef32_sr_pk32_fp6_bf16 v[2:7], v[2:17], s16, 
v18
+; GFX950-GISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX950-GISEL-NEXT:    global_store_dwordx2 v[0:1], v[6:7], off offset:16
+; GFX950-GISEL-NEXT:    s_endpgm
+  %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk32.fp6.bf16(<32 x 
bfloat> %src, i32 %sr, float 100.0)
+  store <6 x i32> %cvt, ptr addrspace(1) %out, align 8
+  ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk32_fp6_f16_vv(<32 x half> %src, i32 
%sr, float %scale, ptr addrspace(1) %out) {
+; GFX950-SDAG-LABEL: test_scalef32_sr_pk32_fp6_f16_vv:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    v_cvt_scalef32_sr_pk32_fp6_f16 v[0:5], v[0:15], v16, v17
+; GFX950-SDAG-NEXT:    global_store_dwordx2 v[18:19], v[4:5], off offset:16
+; GFX950-SDAG-NEXT:    global_store_dwordx4 v[18:19], v[0:3], off
+; GFX950-SDAG-NEXT:    s_endpgm
+;
+; GFX950-GISEL-LABEL: test_scalef32_sr_pk32_fp6_f16_vv:
+; GFX950-GISEL:       ; %bb.0:
+; GFX950-GISEL-NEXT:    v_cvt_scalef32_sr_pk32_fp6_f16 v[0:5], v[0:15], v16, 
v17
+; GFX950-GISEL-NEXT:    global_store_dwordx4 v[18:19], v[0:3], off
+; GFX950-GISEL-NEXT:    global_store_dwordx2 v[18:19], v[4:5], off offset:16
+; GFX950-GISEL-NEXT:    s_endpgm
+  %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk32.fp6.f16(<32 x 
half> %src, i32 %sr, float %scale)
+  store <6 x i32> %cvt, ptr addrspace(1) %out, align 8
+  ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk32_fp6_f16_sl(<32 x half> inreg 
%src, i32 inreg %sr, ptr addrspace(1) %out) {
+; GFX950-SDAG-LABEL: test_scalef32_sr_pk32_fp6_f16_sl:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, s1
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v4, s2
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v5, s3
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v6, s4
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v7, s5
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v8, s6
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v9, s7
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v10, s8
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v11, s9
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v12, s10
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v13, s11
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v14, s12
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v15, s13
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v16, s14
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v17, s15
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v18, 0x42c80000
+; GFX950-SDAG-NEXT:    v_cvt_scalef32_sr_pk32_fp6_f16 v[2:7], v[2:17], s16, v18
+; GFX950-SDAG-NEXT:    global_store_dwordx2 v[0:1], v[6:7], off offset:16
+; GFX950-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX950-SDAG-NEXT:    s_endpgm
+;
+; GFX950-GISEL-LABEL: test_scalef32_sr_pk32_fp6_f16_sl:
+; GFX950-GISEL:       ; %bb.0:
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[16:17], s[14:15]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[14:15], s[12:13]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[12:13], s[10:11]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[10:11], s[8:9]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[8:9], s[6:7]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[6:7], s[4:5]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[4:5], s[2:3]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX950-GISEL-NEXT:    v_mov_b32_e32 v18, 0x42c80000
+; GFX950-GISEL-NEXT:    v_cvt_scalef32_sr_pk32_fp6_f16 v[2:7], v[2:17], s16, 
v18
+; GFX950-GISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX950-GISEL-NEXT:    global_store_dwordx2 v[0:1], v[6:7], off offset:16
+; GFX950-GISEL-NEXT:    s_endpgm
+  %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk32.fp6.f16(<32 x 
half> %src, i32 %sr, float 100.0)
+  store <6 x i32> %cvt, ptr addrspace(1) %out, align 8
+  ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk32_bf6_f32_vv(<32 x float> %src, i32 
%sr, float %scale, ptr addrspace(1) %out) {
+; GFX950-SDAG-LABEL: test_scalef32_sr_pk32_bf6_f32_vv:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    v_cvt_scalef32_sr_pk32_bf6_f32 v[0:5], v[0:31], v32, v33
+; GFX950-SDAG-NEXT:    global_store_dwordx2 v[34:35], v[4:5], off offset:16
+; GFX950-SDAG-NEXT:    global_store_dwordx4 v[34:35], v[0:3], off
+; GFX950-SDAG-NEXT:    s_endpgm
+;
+; GFX950-GISEL-LABEL: test_scalef32_sr_pk32_bf6_f32_vv:
+; GFX950-GISEL:       ; %bb.0:
+; GFX950-GISEL-NEXT:    v_cvt_scalef32_sr_pk32_bf6_f32 v[0:5], v[0:31], v32, 
v33
+; GFX950-GISEL-NEXT:    global_store_dwordx4 v[34:35], v[0:3], off
+; GFX950-GISEL-NEXT:    global_store_dwordx2 v[34:35], v[4:5], off offset:16
+; GFX950-GISEL-NEXT:    s_endpgm
+  %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk32.bf6.f32(<32 x 
float> %src, i32 %sr, float %scale)
+  store <6 x i32> %cvt, ptr addrspace(1) %out, align 8
+  ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk32_bf6_f32_sl(<32 x float> inreg 
%src, i32 inreg %sr, ptr addrspace(1) %out) {
+; GFX950-SDAG-LABEL: test_scalef32_sr_pk32_bf6_f32_sl:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, s1
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v4, s2
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v5, s3
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v6, s4
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v7, s5
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v8, s6
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v9, s7
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v10, s8
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v11, s9
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v12, s10
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v13, s11
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v14, s12
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v15, s13
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v16, s14
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v17, s15
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v18, s16
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v19, s17
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v20, s18
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v21, s19
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v22, s20
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v23, s21
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v24, s22
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v25, s23
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v26, s24
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v27, s25
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v28, s26
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v29, s27
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v30, s28
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v31, s29
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v32, s30
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v33, s31
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v34, 0x42c80000
+; GFX950-SDAG-NEXT:    v_cvt_scalef32_sr_pk32_bf6_f32 v[2:7], v[2:33], s32, v34
+; GFX950-SDAG-NEXT:    global_store_dwordx2 v[0:1], v[6:7], off offset:16
+; GFX950-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX950-SDAG-NEXT:    s_endpgm
+;
+; GFX950-GISEL-LABEL: test_scalef32_sr_pk32_bf6_f32_sl:
+; GFX950-GISEL:       ; %bb.0:
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[32:33], s[30:31]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[30:31], s[28:29]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[28:29], s[26:27]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[26:27], s[24:25]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[24:25], s[22:23]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[22:23], s[20:21]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[20:21], s[18:19]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[18:19], s[16:17]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[16:17], s[14:15]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[14:15], s[12:13]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[12:13], s[10:11]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[10:11], s[8:9]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[8:9], s[6:7]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[6:7], s[4:5]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[4:5], s[2:3]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX950-GISEL-NEXT:    v_mov_b32_e32 v34, 0x42c80000
+; GFX950-GISEL-NEXT:    v_cvt_scalef32_sr_pk32_bf6_f32 v[2:7], v[2:33], s32, 
v34
+; GFX950-GISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX950-GISEL-NEXT:    global_store_dwordx2 v[0:1], v[6:7], off offset:16
+; GFX950-GISEL-NEXT:    s_endpgm
+  %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk32.bf6.f32(<32 x 
float> %src, i32 %sr, float 100.0)
+  store <6 x i32> %cvt, ptr addrspace(1) %out, align 8
+  ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk32_fp6_f32_vv(<32 x float> %src, i32 
%sr, float %scale, ptr addrspace(1) %out) {
+; GFX950-SDAG-LABEL: test_scalef32_sr_pk32_fp6_f32_vv:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    v_cvt_scalef32_sr_pk32_fp6_f32 v[0:5], v[0:31], v32, v33
+; GFX950-SDAG-NEXT:    global_store_dwordx2 v[34:35], v[4:5], off offset:16
+; GFX950-SDAG-NEXT:    global_store_dwordx4 v[34:35], v[0:3], off
+; GFX950-SDAG-NEXT:    s_endpgm
+;
+; GFX950-GISEL-LABEL: test_scalef32_sr_pk32_fp6_f32_vv:
+; GFX950-GISEL:       ; %bb.0:
+; GFX950-GISEL-NEXT:    v_cvt_scalef32_sr_pk32_fp6_f32 v[0:5], v[0:31], v32, 
v33
+; GFX950-GISEL-NEXT:    global_store_dwordx4 v[34:35], v[0:3], off
+; GFX950-GISEL-NEXT:    global_store_dwordx2 v[34:35], v[4:5], off offset:16
+; GFX950-GISEL-NEXT:    s_endpgm
+  %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk32.fp6.f32(<32 x 
float> %src, i32 %sr, float %scale)
+  store <6 x i32> %cvt, ptr addrspace(1) %out, align 8
+  ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk32_fp6_f32_sl(<32 x float> inreg 
%src, i32 inreg %sr, ptr addrspace(1) %out) {
+; GFX950-SDAG-LABEL: test_scalef32_sr_pk32_fp6_f32_sl:
+; GFX950-SDAG:       ; %bb.0:
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, s1
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v4, s2
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v5, s3
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v6, s4
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v7, s5
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v8, s6
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v9, s7
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v10, s8
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v11, s9
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v12, s10
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v13, s11
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v14, s12
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v15, s13
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v16, s14
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v17, s15
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v18, s16
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v19, s17
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v20, s18
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v21, s19
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v22, s20
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v23, s21
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v24, s22
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v25, s23
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v26, s24
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v27, s25
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v28, s26
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v29, s27
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v30, s28
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v31, s29
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v32, s30
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v33, s31
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v34, 0x42c80000
+; GFX950-SDAG-NEXT:    v_cvt_scalef32_sr_pk32_fp6_f32 v[2:7], v[2:33], s32, v34
+; GFX950-SDAG-NEXT:    global_store_dwordx2 v[0:1], v[6:7], off offset:16
+; GFX950-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX950-SDAG-NEXT:    s_endpgm
+;
+; GFX950-GISEL-LABEL: test_scalef32_sr_pk32_fp6_f32_sl:
+; GFX950-GISEL:       ; %bb.0:
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[32:33], s[30:31]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[30:31], s[28:29]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[28:29], s[26:27]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[26:27], s[24:25]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[24:25], s[22:23]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[22:23], s[20:21]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[20:21], s[18:19]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[18:19], s[16:17]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[16:17], s[14:15]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[14:15], s[12:13]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[12:13], s[10:11]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[10:11], s[8:9]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[8:9], s[6:7]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[6:7], s[4:5]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[4:5], s[2:3]
+; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX950-GISEL-NEXT:    v_mov_b32_e32 v34, 0x42c80000
+; GFX950-GISEL-NEXT:    v_cvt_scalef32_sr_pk32_fp6_f32 v[2:7], v[2:33], s32, 
v34
+; GFX950-GISEL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
+; GFX950-GISEL-NEXT:    global_store_dwordx2 v[0:1], v[6:7], off offset:16
+; GFX950-GISEL-NEXT:    s_endpgm
+  %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk32.fp6.f32(<32 x 
float> %src, i32 %sr, float 100.0)
+  store <6 x i32> %cvt, ptr addrspace(1) %out, align 8
+  ret void
+}
diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s 
b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
index a69f62991485ee..490dbb4cd97fef 100644
--- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
@@ -1527,3 +1527,27 @@ v_cvt_scalef32_sr_fp8_f32 v0, |v1|, v2, v3
 // NOT-GFX950: error: instruction not supported on this GPU
 // GFX950: v_cvt_scalef32_sr_fp8_f32 v0, v1, v2, |v3| ; encoding: 
[0x00,0x04,0x37,0xd2,0x01,0x05,0x0e,0x04]
 v_cvt_scalef32_sr_fp8_f32 v0, v1, v2, |v3|
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk32_bf6_bf16 v[0:5], v[6:21], v22, v23 ; 
encoding: [0x00,0x00,0x5f,0xd2,0x06,0x2d,0x5e,0x04]
+v_cvt_scalef32_sr_pk32_bf6_bf16 v[0:5], v[6:21], v22, v23
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk32_bf6_f16 v[0:5], v[6:21], v22, v23 ; 
encoding: [0x00,0x00,0x5e,0xd2,0x06,0x2d,0x5e,0x04]
+v_cvt_scalef32_sr_pk32_bf6_f16 v[0:5], v[6:21], v22, v23
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk32_fp6_bf16 v[0:5], v[6:21], v22, v23 ; 
encoding: [0x00,0x00,0x5d,0xd2,0x06,0x2d,0x5e,0x04]
+v_cvt_scalef32_sr_pk32_fp6_bf16 v[0:5], v[6:21], v22, v23
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk32_fp6_f16 v[0:5], v[6:21], v22, v23 ; 
encoding: [0x00,0x00,0x5c,0xd2,0x06,0x2d,0x5e,0x04]
+v_cvt_scalef32_sr_pk32_fp6_f16 v[0:5], v[6:21], v22, v23
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk32_bf6_f32 v[0:5], v[6:37], v38, v39 ; 
encoding: [0x00,0x00,0x55,0xd2,0x06,0x4d,0x9e,0x04]
+v_cvt_scalef32_sr_pk32_bf6_f32 v[0:5], v[6:37], v38, v39
+
+// NOT-GFX950: error: instruction not supported on this GPU
+// GFX950: v_cvt_scalef32_sr_pk32_fp6_f32 v[0:5], v[6:37], v38, v39 ; 
encoding: [0x00,0x00,0x54,0xd2,0x06,0x4d,0x9e,0x04]
+v_cvt_scalef32_sr_pk32_fp6_f32 v[0:5], v[6:37], v38, v39
diff --git a/llvm/test/MC/AMDGPU/gfx950_err.s b/llvm/test/MC/AMDGPU/gfx950_err.s
index bc01ea90ebbf49..e0b832d8fe2973 100644
--- a/llvm/test/MC/AMDGPU/gfx950_err.s
+++ b/llvm/test/MC/AMDGPU/gfx950_err.s
@@ -416,3 +416,21 @@ v_cvt_scalef32_sr_fp8_f16 v0, v1, v2, v3 clamp
 
 // GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 v_cvt_scalef32_sr_fp8_f32 v0, v1, v2, v3 clamp
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cvt_scalef32_sr_pk32_bf6_bf16 v[0:5], v[0:15], v16, v17 clamp
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cvt_scalef32_sr_pk32_bf6_f16 v[0:5], v[6:21], v22, v23 clamp
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cvt_scalef32_sr_pk32_fp6_bf16 v[0:5], v[6:21], v22, v23 clamp
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cvt_scalef32_sr_pk32_fp6_f16 v[0:5], v[6:21], v22, v23 clamp
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cvt_scalef32_sr_pk32_bf6_f32 v[0:5], v[6:37], v38, v39 clamp
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cvt_scalef32_sr_pk32_fp6_f32 v[0:5], v[6:37], v38, v39 clamp
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt 
b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
index 87ead3a927c8fb..f86d4325d63e58 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
@@ -1128,3 +1128,21 @@
 
 # GFX950: v_cvt_scalef32_sr_fp8_f32 v0, v1, v2, |v3| ; encoding: 
[0x00,0x04,0x37,0xd2,0x01,0x05,0x0e,0x04]
 0x00,0x04,0x37,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk32_bf6_bf16 v[0:5], v[6:21], v22, v23 ; 
encoding: [0x00,0x00,0x5f,0xd2,0x06,0x2d,0x5e,0x04]
+0x00,0x00,0x5f,0xd2,0x06,0x2d,0x5e,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk32_bf6_f16 v[0:5], v[6:21], v22, v23 ; encoding: 
[0x00,0x00,0x5e,0xd2,0x06,0x2d,0x5e,0x04]
+0x00,0x00,0x5e,0xd2,0x06,0x2d,0x5e,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk32_fp6_bf16 v[0:5], v[6:21], v22, v23 ; 
encoding: [0x00,0x00,0x5d,0xd2,0x06,0x2d,0x5e,0x04]
+0x00,0x00,0x5d,0xd2,0x06,0x2d,0x5e,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk32_fp6_f16 v[0:5], v[6:21], v22, v23 ; encoding: 
[0x00,0x00,0x5c,0xd2,0x06,0x2d,0x5e,0x04]
+0x00,0x00,0x5c,0xd2,0x06,0x2d,0x5e,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk32_bf6_f32 v[0:5], v[6:37], v38, v39 ; encoding: 
[0x00,0x00,0x55,0xd2,0x06,0x4d,0x9e,0x04]
+0x00,0x00,0x55,0xd2,0x06,0x4d,0x9e,0x04
+
+# GFX950: v_cvt_scalef32_sr_pk32_fp6_f32 v[0:5], v[6:37], v38, v39 ; encoding: 
[0x00,0x00,0x54,0xd2,0x06,0x4d,0x9e,0x04]
+0x00,0x00,0x54,0xd2,0x06,0x4d,0x9e,0x04

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to