https://github.com/Shoreshen updated https://github.com/llvm/llvm-project/pull/167809
>From eccc49197174bfb20a26c9cad573df37614ed629 Mon Sep 17 00:00:00 2001 From: shore <[email protected]> Date: Thu, 13 Nov 2025 10:18:26 +0800 Subject: [PATCH 1/3] Adding instruction specific features --- llvm/lib/Target/AMDGPU/AMDGPU.td | 108 +++++++++++++++++-- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 21 ++++ llvm/lib/Target/AMDGPU/VOP1Instructions.td | 14 +-- llvm/lib/Target/AMDGPU/VOP2Instructions.td | 2 +- llvm/lib/Target/AMDGPU/VOP3Instructions.td | 22 ++-- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll | 1 - 6 files changed, 142 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index b008354cfd462..fe2a192f0f372 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -901,6 +901,48 @@ def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst", "Has v_pk_fmac_f16 instruction" >; +def FeatureVCUBEInsts : SubtargetFeature<"V_CUBE-Insts", + "HasVCUBEInsts", + "true", + "Has V_CUBE* instructions" +>; + +def FeatureVLERPInsts : SubtargetFeature<"V_LERP-insts", + "HasVLERPInsts", + "true", + "Has V_LERP* instructions" +>; + +def FeatureVSADInsts : SubtargetFeature<"V_SAD-insts", + "HasVSADInsts", + "true", + "Has V_SAD* instructions" +>; + +def FeatureVQSADInsts : SubtargetFeature<"V_QSAD-insts", + "HasVQSADInsts", + "true", + "Has V_QSAD* instructions" +>; + +def FeatureVCVTNORMInsts : SubtargetFeature<"V_CVT_NORM-insts", + "HasVCVTNORMInsts", + "true", + "Has V_CVT_NORM* instructions" +>; + +def FeatureVCVTPKNORMVOP2Insts : SubtargetFeature<"V_CVT_PKNORM-VOP2-insts", + "HasVCVTPKNORMVOP2Insts", + "true", + "Has V_CVT_NORM* VOP2 instructions" +>; + +def FeatureVCVTPKNORMVOP3Insts : SubtargetFeature<"V_CVT_PKNORM-VOP3-insts", + "HasVCVTPKNORMVOP3Insts", + "true", + "Has V_CVT_NORM* VOP3 instructions" +>; + def FeatureAtomicDsPkAdd16Insts : SubtargetFeature<"atomic-ds-pk-add-16-insts", "HasAtomicDsPkAdd16Insts", "true", @@ -1494,7 +1536,8 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS", FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero, FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts, - FeatureVmemWriteVgprInOrder + FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts, + FeatureVSADInsts, FeatureVCVTPKNORMVOP2Insts ] >; @@ -1508,7 +1551,8 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS", FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero, FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts, - FeatureVmemWriteVgprInOrder + FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts, + FeatureVSADInsts, FeatureVQSADInsts, FeatureVCVTPKNORMVOP2Insts ] >; @@ -1524,7 +1568,9 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS", FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32, FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS, - FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder + FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, + FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts, + FeatureVCVTPKNORMVOP2Insts ] >; @@ -1543,7 +1589,10 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9", FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK, FeatureUnalignedBufferAccess, FeatureUnalignedScratchAccess, FeatureUnalignedDSAccess, FeatureNegativeScratchOffsetBug, FeatureGWS, - FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad + FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad, + FeatureVCUBEInsts, FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts, + FeatureVCVTNORMInsts, FeatureVCVTPKNORMVOP2Insts, + FeatureVCVTPKNORMVOP3Insts ] >; @@ -1567,7 +1616,10 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10", FeatureDefaultComponentZero, FeatureMaxHardClauseLength63, FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts, - FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad + FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad, FeatureVCUBEInsts, + FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts, + FeatureVCVTNORMInsts, FeatureVCVTPKNORMVOP2Insts, + FeatureVCVTPKNORMVOP3Insts ] >; @@ -1590,7 +1642,9 @@ def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11", FeatureUnalignedDSAccess, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero, FeatureMaxHardClauseLength32, FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts, - FeatureVmemWriteVgprInOrder + FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts, + FeatureVSADInsts, FeatureVQSADInsts, FeatureVCVTNORMInsts, + FeatureVCVTPKNORMVOP2Insts, FeatureVCVTPKNORMVOP3Insts ] >; @@ -2069,10 +2123,17 @@ def FeatureISAVersion12 : FeatureSet< FeatureMemoryAtomicFAddF32DenormalSupport, FeatureBVHDualAndBVH8Insts, FeatureWaitsBeforeSystemScopeStores, - FeatureD16Writes32BitVgpr + FeatureD16Writes32BitVgpr, + FeatureVCUBEInsts, + FeatureVLERPInsts, + FeatureVSADInsts, + FeatureVQSADInsts, + FeatureVCVTNORMInsts, + FeatureVCVTPKNORMVOP2Insts, + FeatureVCVTPKNORMVOP3Insts ]>; -def FeatureISAVersion12_50 : FeatureSet< +def FeatureISAVersion12_50_Common : FeatureSet< [FeatureGFX12, FeatureGFX1250Insts, FeatureRequiresAlignedVGPRs, @@ -2147,6 +2208,16 @@ def FeatureISAVersion12_50 : FeatureSet< FeatureD16Writes32BitVgpr, ]>; +def FeatureISAVersion12_50 : FeatureSet< + !listconcat(FeatureISAVersion12_50_Common.Features, + [FeatureVCUBEInsts, + FeatureVLERPInsts, + FeatureVSADInsts, + FeatureVQSADInsts, + FeatureVCVTNORMInsts, + FeatureVCVTPKNORMVOP2Insts, + FeatureVCVTPKNORMVOP3Insts])>; + def FeatureISAVersion12_51 : FeatureSet< !listconcat(FeatureISAVersion12_50.Features, [FeatureDPALU_DPP])>; @@ -2816,6 +2887,27 @@ def HasFP8Insts : Predicate<"Subtarget->hasFP8Insts()">, def HasFP8ConversionInsts : Predicate<"Subtarget->hasFP8ConversionInsts()">, AssemblerPredicate<(all_of FeatureFP8ConversionInsts)>; +def HasVCUBEInsts : Predicate<"Subtarget->hasVCUBEInsts()">, + AssemblerPredicate<(all_of FeatureVCUBEInsts)>; + +def HasVLERPInsts : Predicate<"Subtarget->hasVLERPInsts()">, + AssemblerPredicate<(all_of FeatureVLERPInsts)>; + +def HasVSADInsts : Predicate<"Subtarget->hasVSADInsts()">, + AssemblerPredicate<(all_of FeatureVSADInsts)>; + +def HasVQSADInsts : Predicate<"Subtarget->hasVQSADInsts()">, + AssemblerPredicate<(all_of FeatureVQSADInsts)>; + +def HasVCVTNORMInsts : Predicate<"Subtarget->hasVCVTNORMInsts()">, + AssemblerPredicate<(all_of FeatureVCVTNORMInsts)>; + +def HasVCVTPKNORMVOP2Insts : Predicate<"Subtarget->hasVCVTPKNORMVOP2Insts()">, + AssemblerPredicate<(all_of FeatureVCVTPKNORMVOP2Insts)>; + +def HasVCVTPKNORMVOP3Insts : Predicate<"Subtarget->hasVCVTPKNORMVOP3Insts()">, + AssemblerPredicate<(all_of FeatureVCVTPKNORMVOP3Insts)>; + def HasFP8E5M3Insts : Predicate<"Subtarget->hasFP8E5M3Insts()">, AssemblerPredicate<(all_of FeatureFP8E5M3Insts)>; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index f377b8aaf1333..862cee468b7d3 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -166,6 +166,13 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasMAIInsts = false; bool HasFP8Insts = false; bool HasFP8ConversionInsts = false; + bool HasVCUBEInsts = false; + bool HasVLERPInsts = false; + bool HasVSADInsts = false; + bool HasVQSADInsts = false; + bool HasVCVTNORMInsts = false; + bool HasVCVTPKNORMVOP2Insts = false; + bool HasVCVTPKNORMVOP3Insts = false; bool HasFP8E5M3Insts = false; bool HasCvtFP8Vop1Bug = false; bool HasPkFmacF16Inst = false; @@ -892,6 +899,20 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasFP8ConversionInsts() const { return HasFP8ConversionInsts; } + bool hasVCUBEInsts() const { return HasVCUBEInsts; } + + bool hasVLERPInsts() const { return HasVLERPInsts; } + + bool hasVSADInsts() const { return HasVSADInsts; } + + bool hasVQSADInsts() const { return HasVQSADInsts; } + + bool hasVCVTNORMInsts() const { return HasVCVTNORMInsts; } + + bool hasVCVTPKNORMVOP2Insts() const { return HasVCVTPKNORMVOP2Insts; } + + bool hasVCVTPKNORMVOP3Insts() const { return HasVCVTPKNORMVOP3Insts; } + bool hasFP8E5M3Insts() const { return HasFP8E5M3Insts; } bool hasPkFmacF16Inst() const { diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 85adcab55b742..23095ba17cae8 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -616,15 +616,15 @@ let SubtargetPredicate = isGFX9Plus in { let isReMaterializable = 1 in defm V_SAT_PK_U8_I16 : VOP1Inst_t16<"v_sat_pk_u8_i16", VOP_I16_I32>; - - let mayRaiseFPException = 0 in { - defm V_CVT_NORM_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_i16_f16", - VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>; - defm V_CVT_NORM_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_u16_f16", - VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>; - } // End mayRaiseFPException = 0 } // End SubtargetPredicate = isGFX9Plus +let mayRaiseFPException = 0, SubtargetPredicate = HasVCVTNORMInsts in { +defm V_CVT_NORM_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_i16_f16", + VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>; +defm V_CVT_NORM_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_u16_f16", + VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>; +} // End mayRaiseFPException = 0, SubtargetPredicate = HasVCVTNORMInsts + let SubtargetPredicate = isGFX9Only in { defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>; } // End SubtargetPredicate = isGFX9Only diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index d87d250a034f0..afd2d610b17de 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -971,7 +971,7 @@ defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_a } // End IsNeverUniform = 1 defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, any_fldexp>; -let ReadsModeReg = 0, mayRaiseFPException = 0 in { +let ReadsModeReg = 0, mayRaiseFPException = 0, SubtargetPredicate = HasVCVTPKNORMVOP2Insts in { defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_i16_f32>; defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_u16_f32>; } diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 05ba76ab489d8..3d82866c1e5a7 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -185,7 +185,8 @@ defm V_FMA_LEGACY_F32 : VOP3Inst <"v_fma_legacy_f32", defm V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; defm V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; defm V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, any_fma>, VOPD_Component<0x13, "v_fma_f32">; -defm V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_lerp>; +let SubtargetPredicate = HasVLERPInsts in + defm V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_lerp>; let SchedRW = [WriteIntMul] in { let SubtargetPredicate = HasMadU32Inst in @@ -258,12 +259,12 @@ defm V_DIV_FMAS_F64 : VOP3Inst <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC>; } // End isCommutable = 1 let isReMaterializable = 1 in { -let mayRaiseFPException = 0 in { +let mayRaiseFPException = 0, SubtargetPredicate = HasVCUBEInsts in { defm V_CUBEID_F32 : VOP3Inst <"v_cubeid_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubeid>; defm V_CUBESC_F32 : VOP3Inst <"v_cubesc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubesc>; defm V_CUBETC_F32 : VOP3Inst <"v_cubetc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubetc>; defm V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubema>; -} // End mayRaiseFPException +} // mayRaiseFPException = 0, SubtargetPredicate = HasVCUBEInsts defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_u32>; defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_i32>; @@ -306,12 +307,12 @@ let SubtargetPredicate = HasMinimum3Maximum3F32, ReadsModeReg = 0 in { defm V_MAXIMUM3_F32 : VOP3Inst <"v_maximum3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmaximum3>; } // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 -let isCommutable = 1 in { +let isCommutable = 1, SubtargetPredicate = HasVSADInsts in { defm V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; defm V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; defm V_SAD_U16 : VOP3Inst <"v_sad_u16", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; defm V_SAD_U32 : VOP3Inst <"v_sad_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; -} // End isCommutable = 1 +} // End isCommutable = 1, SubtargetPredicate = HasVSADInsts defm V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile<VOP_I32_F32_I32_I32>, int_amdgcn_cvt_pk_u8_f32>; defm V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", DIV_FIXUP_F32_PROF, AMDGPUdiv_fixup>; @@ -424,7 +425,8 @@ def VOPProfileMQSAD : VOP3_Profile<VOP_V4I32_I64_I32_V4I32, VOP3_CLAMP> { let SubtargetPredicate = isGFX7Plus in { let Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in { -defm V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>; +let SubtargetPredicate = HasVQSADInsts in + defm V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>; defm V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOPProfileMQSAD>; } // End Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] } // End SubtargetPredicate = isGFX7Plus @@ -789,9 +791,6 @@ let isCommutable = 1 in { defm V_MAD_I32_I16 : VOP3Inst_t16 <"v_mad_i32_i16", VOP_I32_I16_I16_I32>; } // End isCommutable = 1 -defm V_CVT_PKNORM_I16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_i16_f16", VOP_B32_F16_F16>; -defm V_CVT_PKNORM_U16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_u16_f16", VOP_B32_F16_F16>; - defm V_PACK_B32_F16 : VOP3Inst_t16 <"v_pack_b32_f16", VOP_B32_F16_F16>; let isReMaterializable = 1 in { @@ -996,6 +995,11 @@ def : GCNPat<(DivergentBinFrag<or> (or_oneuse i64:$src0, i64:$src1), i64:$src2), } // End SubtargetPredicate = isGFX9Plus +let SubtargetPredicate = HasVCVTPKNORMVOP3Insts in { + defm V_CVT_PKNORM_I16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_i16_f16", VOP_B32_F16_F16>; + defm V_CVT_PKNORM_U16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_u16_f16", VOP_B32_F16_F16>; +} // end SubtargetPredicate = HasVCVTPKNORMVOP3Insts + // FIXME: Probably should hardcode clamp bit in pseudo and avoid this. class OpSelBinOpClampPat<SDPatternOperator node, Instruction inst> : GCNPat< diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll index 43c69baaf3e7f..49169eec072b6 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s ; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GCN %s declare i32 @llvm.amdgcn.lerp(i32, i32, i32) #0 >From 33276544f6cd6a96f783bf66dfee81d26a3e8b96 Mon Sep 17 00:00:00 2001 From: shore <[email protected]> Date: Fri, 14 Nov 2025 09:45:23 +0800 Subject: [PATCH 2/3] fix comments --- llvm/lib/Target/AMDGPU/AMDGPU.td | 124 ++++++++++----------- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 28 ++--- llvm/lib/Target/AMDGPU/VOP1Instructions.td | 4 +- llvm/lib/Target/AMDGPU/VOP2Instructions.td | 2 +- llvm/lib/Target/AMDGPU/VOP3Instructions.td | 16 +-- 5 files changed, 87 insertions(+), 87 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index fe2a192f0f372..c5d63e5000767 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -901,46 +901,46 @@ def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst", "Has v_pk_fmac_f16 instruction" >; -def FeatureVCUBEInsts : SubtargetFeature<"V_CUBE-Insts", - "HasVCUBEInsts", +def FeatureCubeInsts : SubtargetFeature<"V_CUBE-Insts", + "HasCubeInsts", "true", "Has V_CUBE* instructions" >; -def FeatureVLERPInsts : SubtargetFeature<"V_LERP-insts", - "HasVLERPInsts", +def FeatureLerpInst : SubtargetFeature<"V_LERP-insts", + "HasLerpInst", "true", - "Has V_LERP* instructions" + "Has v_lerp_u8 instruction" >; -def FeatureVSADInsts : SubtargetFeature<"V_SAD-insts", - "HasVSADInsts", +def FeatureSadInsts : SubtargetFeature<"V_SAD-insts", + "HasSadInsts", "true", "Has V_SAD* instructions" >; -def FeatureVQSADInsts : SubtargetFeature<"V_QSAD-insts", - "HasVQSADInsts", +def FeatureQsadInsts : SubtargetFeature<"V_QSAD-insts", + "HasQsadInsts", "true", "Has V_QSAD* instructions" >; -def FeatureVCVTNORMInsts : SubtargetFeature<"V_CVT_NORM-insts", - "HasVCVTNORMInsts", +def FeatureCvtNormInsts : SubtargetFeature<"V_CVT_NORM-insts", + "HasCvtNormInsts", "true", "Has V_CVT_NORM* instructions" >; -def FeatureVCVTPKNORMVOP2Insts : SubtargetFeature<"V_CVT_PKNORM-VOP2-insts", - "HasVCVTPKNORMVOP2Insts", +def FeatureCvtPkNormVOP2Insts : SubtargetFeature<"V_CVT_PKNORM-VOP2-insts", + "HasCvtPkNormVOP2Insts", "true", - "Has V_CVT_NORM* VOP2 instructions" + "Has V_CVT_PK_NORM_*_F32 instructions/Has V_CVT_PK_NORM_*_F16 instructions" >; -def FeatureVCVTPKNORMVOP3Insts : SubtargetFeature<"V_CVT_PKNORM-VOP3-insts", - "HasVCVTPKNORMVOP3Insts", +def FeatureCvtPkNormVOP3Insts : SubtargetFeature<"V_CVT_PKNORM-VOP3-insts", + "HasCvtPkNormVOP3Insts", "true", - "Has V_CVT_NORM* VOP3 instructions" + "Has V_CVT_PK_NORM_*_F32 instructions/Has V_CVT_PK_NORM_*_F16 instructions" >; def FeatureAtomicDsPkAdd16Insts : SubtargetFeature<"atomic-ds-pk-add-16-insts", @@ -1536,8 +1536,8 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS", FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero, FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts, - FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts, - FeatureVSADInsts, FeatureVCVTPKNORMVOP2Insts + FeatureVmemWriteVgprInOrder, FeatureCubeInsts, FeatureLerpInst, + FeatureSadInsts, FeatureCvtPkNormVOP2Insts ] >; @@ -1551,8 +1551,8 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS", FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero, FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts, - FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts, - FeatureVSADInsts, FeatureVQSADInsts, FeatureVCVTPKNORMVOP2Insts + FeatureVmemWriteVgprInOrder, FeatureCubeInsts, FeatureLerpInst, + FeatureSadInsts, FeatureQsadInsts, FeatureCvtPkNormVOP2Insts ] >; @@ -1568,9 +1568,9 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS", FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32, FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS, - FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, - FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts, - FeatureVCVTPKNORMVOP2Insts + FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder, FeatureCubeInsts, + FeatureLerpInst, FeatureSadInsts, FeatureQsadInsts, + FeatureCvtPkNormVOP2Insts ] >; @@ -1590,9 +1590,9 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9", FeatureUnalignedBufferAccess, FeatureUnalignedScratchAccess, FeatureUnalignedDSAccess, FeatureNegativeScratchOffsetBug, FeatureGWS, FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad, - FeatureVCUBEInsts, FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts, - FeatureVCVTNORMInsts, FeatureVCVTPKNORMVOP2Insts, - FeatureVCVTPKNORMVOP3Insts + FeatureCubeInsts, FeatureLerpInst, FeatureSadInsts, FeatureQsadInsts, + FeatureCvtNormInsts, FeatureCvtPkNormVOP2Insts, + FeatureCvtPkNormVOP3Insts ] >; @@ -1616,10 +1616,10 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10", FeatureDefaultComponentZero, FeatureMaxHardClauseLength63, FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts, - FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad, FeatureVCUBEInsts, - FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts, - FeatureVCVTNORMInsts, FeatureVCVTPKNORMVOP2Insts, - FeatureVCVTPKNORMVOP3Insts + FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad, FeatureCubeInsts, + FeatureLerpInst, FeatureSadInsts, FeatureQsadInsts, + FeatureCvtNormInsts, FeatureCvtPkNormVOP2Insts, + FeatureCvtPkNormVOP3Insts ] >; @@ -1642,9 +1642,9 @@ def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11", FeatureUnalignedDSAccess, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero, FeatureMaxHardClauseLength32, FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts, - FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts, - FeatureVSADInsts, FeatureVQSADInsts, FeatureVCVTNORMInsts, - FeatureVCVTPKNORMVOP2Insts, FeatureVCVTPKNORMVOP3Insts + FeatureVmemWriteVgprInOrder, FeatureCubeInsts, FeatureLerpInst, + FeatureSadInsts, FeatureQsadInsts, FeatureCvtNormInsts, + FeatureCvtPkNormVOP2Insts, FeatureCvtPkNormVOP3Insts ] >; @@ -2124,13 +2124,13 @@ def FeatureISAVersion12 : FeatureSet< FeatureBVHDualAndBVH8Insts, FeatureWaitsBeforeSystemScopeStores, FeatureD16Writes32BitVgpr, - FeatureVCUBEInsts, - FeatureVLERPInsts, - FeatureVSADInsts, - FeatureVQSADInsts, - FeatureVCVTNORMInsts, - FeatureVCVTPKNORMVOP2Insts, - FeatureVCVTPKNORMVOP3Insts + FeatureCubeInsts, + FeatureLerpInst, + FeatureSadInsts, + FeatureQsadInsts, + FeatureCvtNormInsts, + FeatureCvtPkNormVOP2Insts, + FeatureCvtPkNormVOP3Insts ]>; def FeatureISAVersion12_50_Common : FeatureSet< @@ -2210,13 +2210,13 @@ def FeatureISAVersion12_50_Common : FeatureSet< def FeatureISAVersion12_50 : FeatureSet< !listconcat(FeatureISAVersion12_50_Common.Features, - [FeatureVCUBEInsts, - FeatureVLERPInsts, - FeatureVSADInsts, - FeatureVQSADInsts, - FeatureVCVTNORMInsts, - FeatureVCVTPKNORMVOP2Insts, - FeatureVCVTPKNORMVOP3Insts])>; + [FeatureCubeInsts, + FeatureLerpInst, + FeatureSadInsts, + FeatureQsadInsts, + FeatureCvtNormInsts, + FeatureCvtPkNormVOP2Insts, + FeatureCvtPkNormVOP3Insts])>; def FeatureISAVersion12_51 : FeatureSet< !listconcat(FeatureISAVersion12_50.Features, @@ -2887,26 +2887,26 @@ def HasFP8Insts : Predicate<"Subtarget->hasFP8Insts()">, def HasFP8ConversionInsts : Predicate<"Subtarget->hasFP8ConversionInsts()">, AssemblerPredicate<(all_of FeatureFP8ConversionInsts)>; -def HasVCUBEInsts : Predicate<"Subtarget->hasVCUBEInsts()">, - AssemblerPredicate<(all_of FeatureVCUBEInsts)>; +def HasCubeInsts : Predicate<"Subtarget->hasCubeInsts()">, + AssemblerPredicate<(all_of FeatureCubeInsts)>; -def HasVLERPInsts : Predicate<"Subtarget->hasVLERPInsts()">, - AssemblerPredicate<(all_of FeatureVLERPInsts)>; +def HasLerpInst : Predicate<"Subtarget->hasLerpInst()">, + AssemblerPredicate<(all_of FeatureLerpInst)>; -def HasVSADInsts : Predicate<"Subtarget->hasVSADInsts()">, - AssemblerPredicate<(all_of FeatureVSADInsts)>; +def HasSadInsts : Predicate<"Subtarget->hasSadInsts()">, + AssemblerPredicate<(all_of FeatureSadInsts)>; -def HasVQSADInsts : Predicate<"Subtarget->hasVQSADInsts()">, - AssemblerPredicate<(all_of FeatureVQSADInsts)>; +def HasQsadInsts : Predicate<"Subtarget->hasQsadInsts()">, + AssemblerPredicate<(all_of FeatureQsadInsts)>; -def HasVCVTNORMInsts : Predicate<"Subtarget->hasVCVTNORMInsts()">, - AssemblerPredicate<(all_of FeatureVCVTNORMInsts)>; +def HasCvtNormInsts : Predicate<"Subtarget->hasCvtNormInsts()">, + AssemblerPredicate<(all_of FeatureCvtNormInsts)>; -def HasVCVTPKNORMVOP2Insts : Predicate<"Subtarget->hasVCVTPKNORMVOP2Insts()">, - AssemblerPredicate<(all_of FeatureVCVTPKNORMVOP2Insts)>; +def HasCvtPkNormVOP2Insts : Predicate<"Subtarget->hasCvtPkNormVOP2Insts()">, + AssemblerPredicate<(all_of FeatureCvtPkNormVOP2Insts)>; -def HasVCVTPKNORMVOP3Insts : Predicate<"Subtarget->hasVCVTPKNORMVOP3Insts()">, - AssemblerPredicate<(all_of FeatureVCVTPKNORMVOP3Insts)>; +def HasCvtPkNormVOP3Insts : Predicate<"Subtarget->hasCvtPkNormVOP3Insts()">, + AssemblerPredicate<(all_of FeatureCvtPkNormVOP3Insts)>; def HasFP8E5M3Insts : Predicate<"Subtarget->hasFP8E5M3Insts()">, AssemblerPredicate<(all_of FeatureFP8E5M3Insts)>; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 862cee468b7d3..85260c4f123c7 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -166,13 +166,13 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasMAIInsts = false; bool HasFP8Insts = false; bool HasFP8ConversionInsts = false; - bool HasVCUBEInsts = false; - bool HasVLERPInsts = false; - bool HasVSADInsts = false; - bool HasVQSADInsts = false; - bool HasVCVTNORMInsts = false; - bool HasVCVTPKNORMVOP2Insts = false; - bool HasVCVTPKNORMVOP3Insts = false; + bool HasCubeInsts = false; + bool HasLerpInst = false; + bool HasSadInsts = false; + bool HasQsadInsts = false; + bool HasCvtNormInsts = false; + bool HasCvtPkNormVOP2Insts = false; + bool HasCvtPkNormVOP3Insts = false; bool HasFP8E5M3Insts = false; bool HasCvtFP8Vop1Bug = false; bool HasPkFmacF16Inst = false; @@ -899,19 +899,19 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasFP8ConversionInsts() const { return HasFP8ConversionInsts; } - bool hasVCUBEInsts() const { return HasVCUBEInsts; } + bool hasCubeInsts() const { return HasCubeInsts; } - bool hasVLERPInsts() const { return HasVLERPInsts; } + bool hasLerpInst() const { return HasLerpInst; } - bool hasVSADInsts() const { return HasVSADInsts; } + bool hasSadInsts() const { return HasSadInsts; } - bool hasVQSADInsts() const { return HasVQSADInsts; } + bool hasQsadInsts() const { return HasQsadInsts; } - bool hasVCVTNORMInsts() const { return HasVCVTNORMInsts; } + bool hasCvtNormInsts() const { return HasCvtNormInsts; } - bool hasVCVTPKNORMVOP2Insts() const { return HasVCVTPKNORMVOP2Insts; } + bool hasCvtPkNormVOP2Insts() const { return HasCvtPkNormVOP2Insts; } - bool hasVCVTPKNORMVOP3Insts() const { return HasVCVTPKNORMVOP3Insts; } + bool hasCvtPkNormVOP3Insts() const { return HasCvtPkNormVOP3Insts; } bool hasFP8E5M3Insts() const { return HasFP8E5M3Insts; } diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 23095ba17cae8..1d1e95908fce6 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -618,12 +618,12 @@ let SubtargetPredicate = isGFX9Plus in { defm V_SAT_PK_U8_I16 : VOP1Inst_t16<"v_sat_pk_u8_i16", VOP_I16_I32>; } // End SubtargetPredicate = isGFX9Plus -let mayRaiseFPException = 0, SubtargetPredicate = HasVCVTNORMInsts in { +let mayRaiseFPException = 0, SubtargetPredicate = HasCvtNormInsts in { defm V_CVT_NORM_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_i16_f16", VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>; defm V_CVT_NORM_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_u16_f16", VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>; -} // End mayRaiseFPException = 0, SubtargetPredicate = HasVCVTNORMInsts +} // End mayRaiseFPException = 0, SubtargetPredicate = HasCvtNormInsts let SubtargetPredicate = isGFX9Only in { defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>; diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index afd2d610b17de..dbb7862ab4ab5 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -971,7 +971,7 @@ defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_a } // End IsNeverUniform = 1 defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, any_fldexp>; -let ReadsModeReg = 0, mayRaiseFPException = 0, SubtargetPredicate = HasVCVTPKNORMVOP2Insts in { +let ReadsModeReg = 0, mayRaiseFPException = 0, SubtargetPredicate = HasCvtPkNormVOP2Insts in { defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_i16_f32>; defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_u16_f32>; } diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 3d82866c1e5a7..872bde501cd2d 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -185,7 +185,7 @@ defm V_FMA_LEGACY_F32 : VOP3Inst <"v_fma_legacy_f32", defm V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; defm V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; defm V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, any_fma>, VOPD_Component<0x13, "v_fma_f32">; -let SubtargetPredicate = HasVLERPInsts in +let SubtargetPredicate = HasLerpInst in defm V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_lerp>; let SchedRW = [WriteIntMul] in { @@ -259,12 +259,12 @@ defm V_DIV_FMAS_F64 : VOP3Inst <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC>; } // End isCommutable = 1 let isReMaterializable = 1 in { -let mayRaiseFPException = 0, SubtargetPredicate = HasVCUBEInsts in { +let mayRaiseFPException = 0, SubtargetPredicate = HasCubeInsts in { defm V_CUBEID_F32 : VOP3Inst <"v_cubeid_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubeid>; defm V_CUBESC_F32 : VOP3Inst <"v_cubesc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubesc>; defm V_CUBETC_F32 : VOP3Inst <"v_cubetc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubetc>; defm V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubema>; -} // mayRaiseFPException = 0, SubtargetPredicate = HasVCUBEInsts +} // mayRaiseFPException = 0, SubtargetPredicate = HasCubeInsts defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_u32>; defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_i32>; @@ -307,12 +307,12 @@ let SubtargetPredicate = HasMinimum3Maximum3F32, ReadsModeReg = 0 in { defm V_MAXIMUM3_F32 : VOP3Inst <"v_maximum3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmaximum3>; } // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 -let isCommutable = 1, SubtargetPredicate = HasVSADInsts in { +let isCommutable = 1, SubtargetPredicate = HasSadInsts in { defm V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; defm V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; defm V_SAD_U16 : VOP3Inst <"v_sad_u16", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; defm V_SAD_U32 : VOP3Inst <"v_sad_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; -} // End isCommutable = 1, SubtargetPredicate = HasVSADInsts +} // End isCommutable = 1, SubtargetPredicate = HasSadInsts defm V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile<VOP_I32_F32_I32_I32>, int_amdgcn_cvt_pk_u8_f32>; defm V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", DIV_FIXUP_F32_PROF, AMDGPUdiv_fixup>; @@ -425,7 +425,7 @@ def VOPProfileMQSAD : VOP3_Profile<VOP_V4I32_I64_I32_V4I32, VOP3_CLAMP> { let SubtargetPredicate = isGFX7Plus in { let Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in { -let SubtargetPredicate = HasVQSADInsts in +let SubtargetPredicate = HasQsadInsts in defm V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>; defm V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOPProfileMQSAD>; } // End Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] @@ -995,10 +995,10 @@ def : GCNPat<(DivergentBinFrag<or> (or_oneuse i64:$src0, i64:$src1), i64:$src2), } // End SubtargetPredicate = isGFX9Plus -let SubtargetPredicate = HasVCVTPKNORMVOP3Insts in { +let SubtargetPredicate = HasCvtPkNormVOP3Insts in { defm V_CVT_PKNORM_I16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_i16_f16", VOP_B32_F16_F16>; defm V_CVT_PKNORM_U16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_u16_f16", VOP_B32_F16_F16>; -} // end SubtargetPredicate = HasVCVTPKNORMVOP3Insts +} // end SubtargetPredicate = HasCvtPkNormVOP3Insts // FIXME: Probably should hardcode clamp bit in pseudo and avoid this. class OpSelBinOpClampPat<SDPatternOperator node, >From cd83519edfef4bc692b656b7c1a10cbb5727f17c Mon Sep 17 00:00:00 2001 From: shore <[email protected]> Date: Tue, 18 Nov 2025 10:59:53 +0800 Subject: [PATCH 3/3] add feature to builtins --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 22 ++++++++++---------- llvm/lib/Target/AMDGPU/AMDGPU.td | 4 ++-- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 2b6fcb1fd479b..0dfa9c13792cf 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -122,13 +122,13 @@ BUILTIN(__builtin_amdgcn_frexp_exp, "id", "nc") BUILTIN(__builtin_amdgcn_frexp_expf, "if", "nc") BUILTIN(__builtin_amdgcn_fract, "dd", "nc") BUILTIN(__builtin_amdgcn_fractf, "ff", "nc") -BUILTIN(__builtin_amdgcn_lerp, "UiUiUiUi", "nc") +TARGET_BUILTIN(__builtin_amdgcn_lerp, "UiUiUiUi", "nc", "HasLerpInst") BUILTIN(__builtin_amdgcn_class, "bdi", "nc") BUILTIN(__builtin_amdgcn_classf, "bfi", "nc") -BUILTIN(__builtin_amdgcn_cubeid, "ffff", "nc") -BUILTIN(__builtin_amdgcn_cubesc, "ffff", "nc") -BUILTIN(__builtin_amdgcn_cubetc, "ffff", "nc") -BUILTIN(__builtin_amdgcn_cubema, "ffff", "nc") +TARGET_BUILTIN(__builtin_amdgcn_cubeid, "ffff", "nc", "HasCubeInsts") +TARGET_BUILTIN(__builtin_amdgcn_cubesc, "ffff", "nc", "HasCubeInsts") +TARGET_BUILTIN(__builtin_amdgcn_cubetc, "ffff", "nc", "HasCubeInsts") +TARGET_BUILTIN(__builtin_amdgcn_cubema, "ffff", "nc", "HasCubeInsts") BUILTIN(__builtin_amdgcn_s_sleep, "vIi", "n") BUILTIN(__builtin_amdgcn_s_incperflevel, "vIi", "n") BUILTIN(__builtin_amdgcn_s_decperflevel, "vIi", "n") @@ -149,17 +149,17 @@ BUILTIN(__builtin_amdgcn_alignbyte, "UiUiUiUi", "nc") BUILTIN(__builtin_amdgcn_ubfe, "UiUiUiUi", "nc") BUILTIN(__builtin_amdgcn_sbfe, "UiUiUiUi", "nc") BUILTIN(__builtin_amdgcn_cvt_pkrtz, "E2hff", "nc") -BUILTIN(__builtin_amdgcn_cvt_pknorm_i16, "E2sff", "nc") -BUILTIN(__builtin_amdgcn_cvt_pknorm_u16, "E2Usff", "nc") +TARGET_BUILTIN(__builtin_amdgcn_cvt_pknorm_i16, "E2sff", "nc", "HasCvtPkNormVOP2Insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_pknorm_u16, "E2Usff", "nc", "HasCvtPkNormVOP2Insts") BUILTIN(__builtin_amdgcn_cvt_pk_i16, "E2sii", "nc") BUILTIN(__builtin_amdgcn_cvt_pk_u16, "E2UsUiUi", "nc") BUILTIN(__builtin_amdgcn_cvt_pk_u8_f32, "UifUiUi", "nc") BUILTIN(__builtin_amdgcn_cvt_off_f32_i4, "fi", "nc") -BUILTIN(__builtin_amdgcn_sad_u8, "UiUiUiUi", "nc") +TARGET_BUILTIN(__builtin_amdgcn_sad_u8, "UiUiUiUi", "nc", "HasSadInsts") BUILTIN(__builtin_amdgcn_msad_u8, "UiUiUiUi", "nc") -BUILTIN(__builtin_amdgcn_sad_hi_u8, "UiUiUiUi", "nc") -BUILTIN(__builtin_amdgcn_sad_u16, "UiUiUiUi", "nc") -BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "WUiWUiUiWUi", "nc") +TARGET_BUILTIN(__builtin_amdgcn_sad_hi_u8, "UiUiUiUi", "nc", "HasSadInsts") +TARGET_BUILTIN(__builtin_amdgcn_sad_u16, "UiUiUiUi", "nc", "HasSadInsts") +TARGET_BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "WUiWUiUiWUi", "nc", "HasQsadInsts") BUILTIN(__builtin_amdgcn_mqsad_pk_u16_u8, "WUiWUiUiWUi", "nc") BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiWUiUiV4Ui", "nc") diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index c5d63e5000767..cd8327563d9d6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -934,13 +934,13 @@ def FeatureCvtNormInsts : SubtargetFeature<"V_CVT_NORM-insts", def FeatureCvtPkNormVOP2Insts : SubtargetFeature<"V_CVT_PKNORM-VOP2-insts", "HasCvtPkNormVOP2Insts", "true", - "Has V_CVT_PK_NORM_*_F32 instructions/Has V_CVT_PK_NORM_*_F16 instructions" + "Has V_CVT_PK_NORM_* instructions/Has V_CVT_PK_NORM_*_F16 instructions" >; def FeatureCvtPkNormVOP3Insts : SubtargetFeature<"V_CVT_PKNORM-VOP3-insts", "HasCvtPkNormVOP3Insts", "true", - "Has V_CVT_PK_NORM_*_F32 instructions/Has V_CVT_PK_NORM_*_F16 instructions" + "Has V_CVT_PK_NORM_* instructions/Has V_CVT_PK_NORM_*_F16 instructions" >; def FeatureAtomicDsPkAdd16Insts : SubtargetFeature<"atomic-ds-pk-add-16-insts", _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
