kushanam updated this revision to Diff 528475. kushanam added a comment. Rebasing the D144911 <https://reviews.llvm.org/D144911> patch
Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D144911/new/ https://reviews.llvm.org/D144911 Files: llvm/lib/Target/NVPTX/NVPTXInstrInfo.td llvm/lib/Target/NVPTX/NVPTXIntrinsics.td Index: llvm/lib/Target/NVPTX/NVPTXIntrinsics.td =================================================================== --- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -998,17 +998,17 @@ FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Float16Regs, [hasPTX<70>, hasSM<80>]>, - FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, BFloat16Regs, [hasPTX70, hasSM80]>, + FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, BFloat16Regs, [hasPTX<70>, hasSM<80>]>, FMA_TUPLE<"_rn_ftz_bf16", int_nvvm_fma_rn_ftz_bf16, BFloat16Regs, - [hasPTX70, hasSM80]>, + [hasPTX<70>, hasSM<80>]>, FMA_TUPLE<"_rn_sat_bf16", int_nvvm_fma_rn_sat_bf16, BFloat16Regs, - [hasPTX70, hasSM80]>, + [hasPTX<70>, hasSM<80>]>, FMA_TUPLE<"_rn_ftz_sat_bf16", int_nvvm_fma_rn_ftz_sat_bf16, BFloat16Regs, - [hasPTX70, hasSM80]>, + [hasPTX<70>, hasSM<80>]>, FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, BFloat16Regs, - [hasPTX70, hasSM80]>, + [hasPTX<70>, hasSM<80>]>, FMA_TUPLE<"_rn_ftz_relu_bf16", int_nvvm_fma_rn_ftz_relu_bf16, BFloat16Regs, - [hasPTX70, hasSM80]>, + [hasPTX<70>, hasSM<80>]>, FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Float16x2Regs, [hasPTX<42>, hasSM<53>]>, @@ -1022,10 +1022,10 @@ [hasPTX<70>, hasSM<80>]>, FMA_TUPLE<"_rn_ftz_relu_f16x2", int_nvvm_fma_rn_ftz_relu_f16x2, Float16x2Regs, [hasPTX<70>, hasSM<80>]>, - FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, Int32Regs, - [hasPTX70, hasSM80]>, - FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, Int32Regs, - [hasPTX70, hasSM80]> + FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, BFloat16x2Regs, + [hasPTX<70>, hasSM<80>]>, + FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, BFloat16x2Regs, + [hasPTX<70>, hasSM<80>]> ] in { def P.Variant : F_MATH_3<!strconcat("fma", Index: llvm/lib/Target/NVPTX/NVPTXInstrInfo.td =================================================================== --- llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1121,7 +1121,7 @@ NVPTXInst<(outs RC:$dst), (ins RC:$src), !strconcat(OpcStr, " \t$dst, $src;"), [(set RC:$dst, (fneg (T RC:$src)))]>, - Requires<[useFP16Math, hasPTX70, hasSM80, Pred]>; + Requires<[useFP16Math, hasPTX<70>, hasSM<80>, Pred]>; def BFNEG16_ftz : FNEG_BF16_F16X2<"neg.ftz.bf16", bf16, BFloat16Regs, doF32FTZ>; def BFNEG16 : FNEG_BF16_F16X2<"neg.bf16", bf16, BFloat16Regs, True>; def BFNEG16x2_ftz : FNEG_BF16_F16X2<"neg.ftz.bf16x2", v2bf16, BFloat16x2Regs, doF32FTZ>; @@ -3337,30 +3337,6 @@ " mov.b32 \t{%tmp_lo, $dst}, $src; }}", [(set BFloat16Regs:$dst, (extractelt (v2bf16 BFloat16x2Regs:$src), 1))]>; - - // // Coalesce two bf16 registers into bf16x2 - // def BuildBF16x2 : NVPTXInst<(outs BFloat16x2Regs:$dst), - // (ins BFloat16Regs:$a, BFloat16Regs:$b), - // "mov.b32 \t$dst, {{$a, $b}};", - // [(set (v2bf16 BFloat16x2Regs:$dst), - // (build_vector (bf16 BFloat16Regs:$a), (bf16 BFloat16Regs:$b)))]>; - - // // Directly initializing underlying the b32 register is one less SASS - // // instruction than than vector-packing move. - // def BuildBF16x2i : NVPTXInst<(outs BFloat16x2Regs:$dst), (ins i32imm:$src), - // "mov.b32 \t$dst, $src;", - // []>; - - // // Split f16x2 into two f16 registers. - // def SplitBF16x2 : NVPTXInst<(outs BFloat16Regs:$lo, BFloat16Regs:$hi), - // (ins BFloat16x2Regs:$src), - // "mov.b32 \t{{$lo, $hi}}, $src;", - // []>; - // // Split an i32 into two f16 - // def SplitI32toBF16x2 : NVPTXInst<(outs BFloat16Regs:$lo, BFloat16Regs:$hi), - // (ins Int32Regs:$src), - // "mov.b32 \t{{$lo, $hi}}, $src;", - // []>; } // Count leading zeros
Index: llvm/lib/Target/NVPTX/NVPTXIntrinsics.td =================================================================== --- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -998,17 +998,17 @@ FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Float16Regs, [hasPTX<70>, hasSM<80>]>, - FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, BFloat16Regs, [hasPTX70, hasSM80]>, + FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, BFloat16Regs, [hasPTX<70>, hasSM<80>]>, FMA_TUPLE<"_rn_ftz_bf16", int_nvvm_fma_rn_ftz_bf16, BFloat16Regs, - [hasPTX70, hasSM80]>, + [hasPTX<70>, hasSM<80>]>, FMA_TUPLE<"_rn_sat_bf16", int_nvvm_fma_rn_sat_bf16, BFloat16Regs, - [hasPTX70, hasSM80]>, + [hasPTX<70>, hasSM<80>]>, FMA_TUPLE<"_rn_ftz_sat_bf16", int_nvvm_fma_rn_ftz_sat_bf16, BFloat16Regs, - [hasPTX70, hasSM80]>, + [hasPTX<70>, hasSM<80>]>, FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, BFloat16Regs, - [hasPTX70, hasSM80]>, + [hasPTX<70>, hasSM<80>]>, FMA_TUPLE<"_rn_ftz_relu_bf16", int_nvvm_fma_rn_ftz_relu_bf16, BFloat16Regs, - [hasPTX70, hasSM80]>, + [hasPTX<70>, hasSM<80>]>, FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Float16x2Regs, [hasPTX<42>, hasSM<53>]>, @@ -1022,10 +1022,10 @@ [hasPTX<70>, hasSM<80>]>, FMA_TUPLE<"_rn_ftz_relu_f16x2", int_nvvm_fma_rn_ftz_relu_f16x2, Float16x2Regs, [hasPTX<70>, hasSM<80>]>, - FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, Int32Regs, - [hasPTX70, hasSM80]>, - FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, Int32Regs, - [hasPTX70, hasSM80]> + FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, BFloat16x2Regs, + [hasPTX<70>, hasSM<80>]>, + FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, BFloat16x2Regs, + [hasPTX<70>, hasSM<80>]> ] in { def P.Variant : F_MATH_3<!strconcat("fma", Index: llvm/lib/Target/NVPTX/NVPTXInstrInfo.td =================================================================== --- llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1121,7 +1121,7 @@ NVPTXInst<(outs RC:$dst), (ins RC:$src), !strconcat(OpcStr, " \t$dst, $src;"), [(set RC:$dst, (fneg (T RC:$src)))]>, - Requires<[useFP16Math, hasPTX70, hasSM80, Pred]>; + Requires<[useFP16Math, hasPTX<70>, hasSM<80>, Pred]>; def BFNEG16_ftz : FNEG_BF16_F16X2<"neg.ftz.bf16", bf16, BFloat16Regs, doF32FTZ>; def BFNEG16 : FNEG_BF16_F16X2<"neg.bf16", bf16, BFloat16Regs, True>; def BFNEG16x2_ftz : FNEG_BF16_F16X2<"neg.ftz.bf16x2", v2bf16, BFloat16x2Regs, doF32FTZ>; @@ -3337,30 +3337,6 @@ " mov.b32 \t{%tmp_lo, $dst}, $src; }}", [(set BFloat16Regs:$dst, (extractelt (v2bf16 BFloat16x2Regs:$src), 1))]>; - - // // Coalesce two bf16 registers into bf16x2 - // def BuildBF16x2 : NVPTXInst<(outs BFloat16x2Regs:$dst), - // (ins BFloat16Regs:$a, BFloat16Regs:$b), - // "mov.b32 \t$dst, {{$a, $b}};", - // [(set (v2bf16 BFloat16x2Regs:$dst), - // (build_vector (bf16 BFloat16Regs:$a), (bf16 BFloat16Regs:$b)))]>; - - // // Directly initializing underlying the b32 register is one less SASS - // // instruction than than vector-packing move. - // def BuildBF16x2i : NVPTXInst<(outs BFloat16x2Regs:$dst), (ins i32imm:$src), - // "mov.b32 \t$dst, $src;", - // []>; - - // // Split f16x2 into two f16 registers. - // def SplitBF16x2 : NVPTXInst<(outs BFloat16Regs:$lo, BFloat16Regs:$hi), - // (ins BFloat16x2Regs:$src), - // "mov.b32 \t{{$lo, $hi}}, $src;", - // []>; - // // Split an i32 into two f16 - // def SplitI32toBF16x2 : NVPTXInst<(outs BFloat16Regs:$lo, BFloat16Regs:$hi), - // (ins Int32Regs:$src), - // "mov.b32 \t{{$lo, $hi}}, $src;", - // []>; } // Count leading zeros
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits