llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) <details> <summary>Changes</summary> Co-authored-by: Shilei Tian <shilei.tian@<!-- -->amd.com> --- Full diff: https://github.com/llvm/llvm-project/pull/117796.diff 7 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (+10) - (modified) llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h (+5) - (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+2) - (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+18) - (modified) llvm/test/MC/AMDGPU/gfx950_asm_features.s (+32) - (modified) llvm/test/MC/AMDGPU/gfx950_err.s (+6) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt (+24) ``````````diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 83a74b4a435909..6bac2d2b590ffa 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -420,6 +420,12 @@ def FeatureF16BF16ToFP6BF6ConversionScaleInsts : SubtargetFeature<"f16bf16-to-fp "Has f16bf16 to fp6bf6 conversion scale instructions" >; +def FeatureF32ToF16BF16ConversionSRInsts : SubtargetFeature<"f32-to-f16bf16-cvt-sr-insts", + "HasF32ToF16BF16ConversionSRInsts", + "true", + "Has f32 to f16bf16 conversion scale instructions" +>; + def FeatureAshrPkInsts : SubtargetFeature<"ashr-pk-insts", "HasAshrPkInsts", "true", @@ -438,6 +444,7 @@ def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts", FeatureFP4ConversionScaleInsts, FeatureFP6BF6ConversionScaleInsts, FeatureF16BF16ToFP6BF6ConversionScaleInsts, + FeatureF32ToF16BF16ConversionSRInsts, FeatureMinimum3Maximum3F32, FeatureMinimum3Maximum3PKF16 ] @@ -2504,6 +2511,9 @@ def HasFP6BF6ConversionScaleInsts : Predicate<"Subtarget->hasFP6BF6ConversionSca def HasF16BF16ToFP6BF6ConversionScaleInsts : Predicate<"Subtarget->hasF16BF16ToFP6BF6ConversionScaleInsts()">, AssemblerPredicate<(all_of FeatureF16BF16ToFP6BF6ConversionScaleInsts)>; +def HasF32ToF16BF16ConversionSRInsts : Predicate<"Subtarget->hasF32ToF16BF16ConversionSRInsts()">, + AssemblerPredicate<(all_of FeatureF32ToF16BF16ConversionSRInsts)>; + def HasGDS : Predicate<"Subtarget->hasGDS()">; def HasGWS : Predicate<"Subtarget->hasGWS()">; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 742f4e6e80f1a9..c5c951b58b8d6d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -55,6 +55,7 @@ class AMDGPUSubtarget { bool HasFP4ConversionScaleInsts = false; bool HasFP6BF6ConversionScaleInsts = false; bool HasF16BF16ToFP6BF6ConversionScaleInsts = false; + bool HasF32ToF16BF16ConversionSRInsts = false; bool EnableRealTrue16Insts = false; bool HasBF16ConversionInsts = false; bool HasMadMixInsts = false; @@ -190,6 +191,10 @@ class AMDGPUSubtarget { bool hasF16BF16ToFP6BF6ConversionScaleInsts() const { return HasF16BF16ToFP6BF6ConversionScaleInsts; } + bool hasF32ToF16BF16ConversionSRInsts() const { + return HasF32ToF16BF16ConversionSRInsts; + } + bool hasMadMacF32Insts() const { return HasMadMacF32Insts || !isGCN(); } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index d08bda37292105..0938a11077cfb1 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2850,6 +2850,8 @@ def VOP_V2I16_V2BF16_F32 : VOPProfile<[v2i16, v2bf16, f32, untyped]>; def VOP_I32_F32_F32_F32 : VOPProfile<[i32, f32, f32, f32]>; def VOP_I32_V2F16_F32_F32 : VOPProfile<[i32, v2f16, f32, f32]>; def VOP_I32_V2BF16_F32_F32: VOPProfile<[i32, v2bf16, f32, f32]>; +def VOP_BF16_F32_I32 : VOPProfile<[bf16, f32, i32, untyped]>; +def VOP_F16_F32_I32 : VOPProfile<[f16, f32, i32, untyped]>; def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>; def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 237bb0faffff8b..2c441910fe21a8 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -1268,6 +1268,12 @@ let SubtargetPredicate = isGFX11Plus in { defm V_CVT_PK_U16_F32 : VOP3Inst<"v_cvt_pk_u16_f32", VOP3_Profile<VOP_V2I16_F32_F32>>; } // End SubtargetPredicate = isGFX11Plus +class VOP3_CVT_SR_FP16_TiedInput_Profile<VOPProfile P> : VOP3_CVT_SCALE_F1632_FP8BF8_TiedInput_Profile<P> { + let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0, + Int32InputMods:$src1_modifiers, Src1RC64:$src1, + VGPR_32:$vdst_in, op_sel0:$op_sel); +} + // FIXME: GlobalISel cannot distinguish f16 and bf16 and may start using bf16 patterns // instead of less complex f16. Disable GlobalISel for these for now. def bf16_fpround : PatFrag <(ops node:$src0), (fpround $src0), [{ return true; }]> { @@ -1292,6 +1298,13 @@ let SubtargetPredicate = HasBF16ConversionInsts in { (V_CVT_PK_BF16_F32_e64 0, (f32 (V_CVT_F32_F64_e64 $src0_modifiers, $src0)), 0, (f32 (IMPLICIT_DEF)))>; } +let SubtargetPredicate = HasF32ToF16BF16ConversionSRInsts in { + let Constraints = "$vdst = $vdst_in", DisableEncoding = "$vdst_in" in { + defm V_CVT_SR_F16_F32 : VOP3Inst<"v_cvt_sr_f16_f32", VOP3_CVT_SR_FP16_TiedInput_Profile<VOP_F16_F32_I32>>; + defm V_CVT_SR_BF16_F32 : VOP3Inst<"v_cvt_sr_bf16_f32", VOP3_CVT_SR_FP16_TiedInput_Profile<VOP_BF16_F32_I32>>; + } +} + let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in { defm V_MAXIMUMMINIMUM_F32 : VOP3Inst<"v_maximumminimum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>; defm V_MINIMUMMAXIMUM_F32 : VOP3Inst<"v_minimummaximum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>; @@ -2164,6 +2177,11 @@ defm V_CVT_SCALEF32_PK32_BF6_F16 : VOP3_Real_gfx9<0x25a, "v_cvt_scalef32_pk32_b defm V_CVT_SCALEF32_PK32_BF6_BF16 : VOP3_Real_gfx9<0x25b, "v_cvt_scalef32_pk32_bf6_bf16">; } +let OtherPredicates = [HasF32ToF16BF16ConversionSRInsts] in { +defm V_CVT_SR_F16_F32 : VOP3OpSel_Real_gfx9 <0x2a6>; +defm V_CVT_SR_BF16_F32: VOP3OpSel_Real_gfx9 <0x2a7>; +} + defm V_ASHR_PK_I8_I32 : VOP3OpSel_Real_gfx9 <0x265>; defm V_ASHR_PK_U8_I32 : VOP3OpSel_Real_gfx9 <0x266>; diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s b/llvm/test/MC/AMDGPU/gfx950_asm_features.s index 1a776c1050578c..dac7c33eaa6b2e 100644 --- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s +++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s @@ -1375,3 +1375,35 @@ v_cvt_scalef32_sr_pk_fp4_f32 v0, |v[2:3]|, v4, v5 // NOT-GFX950: error: instruction not supported on this GPU // GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, |v5| ; encoding: [0x00,0x04,0x3e,0xd2,0x02,0x09,0x16,0x04] v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, |v5| + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_sr_f16_f32 v0, v1, v2 ; encoding: [0x00,0x00,0xa6,0xd2,0x01,0x05,0x02,0x00] +v_cvt_sr_f16_f32 v0, v1, v2 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_sr_bf16_f32 v0, v1, v2 ; encoding: [0x00,0x00,0xa7,0xd2,0x01,0x05,0x02,0x00] +v_cvt_sr_bf16_f32 v0, v1, v2 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_sr_f16_f32 v0, v1, v2 op_sel:[0,0,1] ; encoding: [0x00,0x40,0xa6,0xd2,0x01,0x05,0x02,0x00] +v_cvt_sr_f16_f32 v0, v1, v2 op_sel:[0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_sr_bf16_f32 v0, v1, v2 op_sel:[0,0,1] ; encoding: [0x00,0x40,0xa7,0xd2,0x01,0x05,0x02,0x00] +v_cvt_sr_bf16_f32 v0, v1, v2 op_sel:[0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_sr_f16_f32 v0, -v1, v2 ; encoding: [0x00,0x00,0xa6,0xd2,0x01,0x05,0x02,0x20] +v_cvt_sr_f16_f32 v0, -v1, v2 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_sr_f16_f32 v0, |v1|, v2 ; encoding: [0x00,0x01,0xa6,0xd2,0x01,0x05,0x02,0x00] +v_cvt_sr_f16_f32 v0, |v1|, v2 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_sr_bf16_f32 v0, -v1, v2 ; encoding: [0x00,0x00,0xa7,0xd2,0x01,0x05,0x02,0x20] +v_cvt_sr_bf16_f32 v0, -v1, v2 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_sr_bf16_f32 v0, |v1|, v2 ; encoding: [0x00,0x01,0xa7,0xd2,0x01,0x05,0x02,0x00] +v_cvt_sr_bf16_f32 v0, |v1|, v2 diff --git a/llvm/test/MC/AMDGPU/gfx950_err.s b/llvm/test/MC/AMDGPU/gfx950_err.s index c5450e48558bfd..55cd57a1bc398e 100644 --- a/llvm/test/MC/AMDGPU/gfx950_err.s +++ b/llvm/test/MC/AMDGPU/gfx950_err.s @@ -392,3 +392,9 @@ v_pk_minimum3_f16 v0, s1, s2, v3 // GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) v_pk_maximum3_f16 v0, s1, s2, v3 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_sr_f16_f32 v1, v2, v3 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_sr_bf16_f32 v1, v2, v3 clamp diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt index 2b5b4c7770924d..ee9a7c5d2006fd 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt @@ -1014,3 +1014,27 @@ # GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, |v5| ; encoding: [0x00,0x04,0x3e,0xd2,0x02,0x09,0x16,0x04] 0x00,0x04,0x3e,0xd2,0x02,0x09,0x16,0x04 + +# GFX950: v_cvt_sr_f16_f32 v0, v1, v2 ; encoding: [0x00,0x00,0xa6,0xd2,0x01,0x05,0x02,0x00] +0x00,0x00,0xa6,0xd2,0x01,0x05,0x02,0x00 + +# GFX950: v_cvt_sr_bf16_f32 v0, v1, v2 ; encoding: [0x00,0x00,0xa7,0xd2,0x01,0x05,0x02,0x00] +0x00,0x00,0xa7,0xd2,0x01,0x05,0x02,0x00 + +# GFX950: v_cvt_sr_f16_f32 v0, v1, v2 op_sel:[0,0,1] ; encoding: [0x00,0x40,0xa6,0xd2,0x01,0x05,0x02,0x00] +0x00,0x40,0xa6,0xd2,0x01,0x05,0x02,0x00 + +# GFX950: v_cvt_sr_bf16_f32 v0, v1, v2 op_sel:[0,0,1] ; encoding: [0x00,0x40,0xa7,0xd2,0x01,0x05,0x02,0x00] +0x00,0x40,0xa7,0xd2,0x01,0x05,0x02,0x00 + +# GFX950: v_cvt_sr_f16_f32 v0, -v1, v2 ; encoding: [0x00,0x00,0xa6,0xd2,0x01,0x05,0x02,0x20] +0x00,0x00,0xa6,0xd2,0x01,0x05,0x02,0x20 + +# GFX950: v_cvt_sr_f16_f32 v0, |v1|, v2 ; encoding: [0x00,0x01,0xa6,0xd2,0x01,0x05,0x02,0x00] +0x00,0x01,0xa6,0xd2,0x01,0x05,0x02,0x00 + +# GFX950: v_cvt_sr_bf16_f32 v0, -v1, v2 ; encoding: [0x00,0x00,0xa7,0xd2,0x01,0x05,0x02,0x20] +0x00,0x00,0xa7,0xd2,0x01,0x05,0x02,0x20 + +# GFX950: v_cvt_sr_bf16_f32 v0, |v1|, v2 ; encoding: [0x00,0x01,0xa7,0xd2,0x01,0x05,0x02,0x00] +0x00,0x01,0xa7,0xd2,0x01,0x05,0x02,0x00 `````````` </details> https://github.com/llvm/llvm-project/pull/117796 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits