[PATCH] D144911: adding bf16 support to NVPTX

Kushan Ahmadian via Phabricator via cfe-commits Mon, 05 Jun 2023 09:18:36 -0700

kushanam updated this revision to Diff 528475.
kushanam added a comment.

Rebasing the D144911 <https://reviews.llvm.org/D144911> patch



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D144911/new/

https://reviews.llvm.org/D144911

Files:
  llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
  llvm/lib/Target/NVPTX/NVPTXIntrinsics.td


Index: llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
===================================================================
--- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -998,17 +998,17 @@
     FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Float16Regs,
       [hasPTX<70>, hasSM<80>]>,
 
-    FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, BFloat16Regs, [hasPTX70, 
hasSM80]>,
+    FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, BFloat16Regs, [hasPTX<70>, 
hasSM<80>]>,
     FMA_TUPLE<"_rn_ftz_bf16", int_nvvm_fma_rn_ftz_bf16, BFloat16Regs,
-      [hasPTX70, hasSM80]>,
+      [hasPTX<70>, hasSM<80>]>,
     FMA_TUPLE<"_rn_sat_bf16", int_nvvm_fma_rn_sat_bf16, BFloat16Regs,
-      [hasPTX70, hasSM80]>,
+      [hasPTX<70>, hasSM<80>]>,
     FMA_TUPLE<"_rn_ftz_sat_bf16", int_nvvm_fma_rn_ftz_sat_bf16, BFloat16Regs,
-      [hasPTX70, hasSM80]>,
+      [hasPTX<70>, hasSM<80>]>,
     FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, BFloat16Regs,
-      [hasPTX70, hasSM80]>,
+      [hasPTX<70>, hasSM<80>]>,
     FMA_TUPLE<"_rn_ftz_relu_bf16", int_nvvm_fma_rn_ftz_relu_bf16, BFloat16Regs,
-      [hasPTX70, hasSM80]>,
+      [hasPTX<70>, hasSM<80>]>,
 
     FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Float16x2Regs,
       [hasPTX<42>, hasSM<53>]>,
@@ -1022,10 +1022,10 @@
       [hasPTX<70>, hasSM<80>]>,
     FMA_TUPLE<"_rn_ftz_relu_f16x2", int_nvvm_fma_rn_ftz_relu_f16x2,
       Float16x2Regs, [hasPTX<70>, hasSM<80>]>,
-    FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, Int32Regs,
-      [hasPTX70, hasSM80]>,
-    FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, Int32Regs,
-      [hasPTX70, hasSM80]>
+    FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, BFloat16x2Regs,
+      [hasPTX<70>, hasSM<80>]>,
+    FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, BFloat16x2Regs,
+      [hasPTX<70>, hasSM<80>]>
   ] in {
     def P.Variant :
       F_MATH_3<!strconcat("fma",
Index: llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
===================================================================
--- llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1121,7 +1121,7 @@
       NVPTXInst<(outs RC:$dst), (ins RC:$src),
                 !strconcat(OpcStr, " \t$dst, $src;"),
                 [(set RC:$dst, (fneg (T RC:$src)))]>,
-                Requires<[useFP16Math, hasPTX70, hasSM80, Pred]>;
+                Requires<[useFP16Math, hasPTX<70>, hasSM<80>, Pred]>;
 def BFNEG16_ftz   : FNEG_BF16_F16X2<"neg.ftz.bf16", bf16, BFloat16Regs, 
doF32FTZ>;
 def BFNEG16       : FNEG_BF16_F16X2<"neg.bf16", bf16, BFloat16Regs, True>;
 def BFNEG16x2_ftz : FNEG_BF16_F16X2<"neg.ftz.bf16x2", v2bf16, BFloat16x2Regs, 
doF32FTZ>;
@@ -3337,30 +3337,6 @@
                                "  mov.b32 \t{%tmp_lo, $dst}, $src; }}",
                                [(set BFloat16Regs:$dst,
                                  (extractelt (v2bf16 BFloat16x2Regs:$src), 
1))]>;
-
-  // // Coalesce two bf16 registers into bf16x2
-  // def BuildBF16x2 : NVPTXInst<(outs BFloat16x2Regs:$dst),
-  //                            (ins BFloat16Regs:$a, BFloat16Regs:$b),
-  //                            "mov.b32 \t$dst, {{$a, $b}};",
-  //                            [(set (v2bf16 BFloat16x2Regs:$dst),
-  //                              (build_vector (bf16 BFloat16Regs:$a), (bf16 
BFloat16Regs:$b)))]>;
-
-  // // Directly initializing underlying the b32 register is one less SASS
-  // // instruction than than vector-packing move.
-  // def BuildBF16x2i : NVPTXInst<(outs BFloat16x2Regs:$dst), (ins 
i32imm:$src),
-  //                             "mov.b32 \t$dst, $src;",
-  //                             []>;
-
-  // // Split f16x2 into two f16 registers.
-  // def SplitBF16x2  : NVPTXInst<(outs BFloat16Regs:$lo, BFloat16Regs:$hi),
-  //                             (ins BFloat16x2Regs:$src),
-  //                             "mov.b32 \t{{$lo, $hi}}, $src;",
-  //                             []>;
-  // // Split an i32 into two f16
-  // def SplitI32toBF16x2  : NVPTXInst<(outs BFloat16Regs:$lo, 
BFloat16Regs:$hi),
-  //                                  (ins Int32Regs:$src),
-  //                                  "mov.b32 \t{{$lo, $hi}}, $src;",
-  //                                  []>;
 }
 
 // Count leading zeros

Index: llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
===================================================================
--- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -998,17 +998,17 @@
     FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Float16Regs,
       [hasPTX<70>, hasSM<80>]>,
 
-    FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, BFloat16Regs, [hasPTX70, hasSM80]>,
+    FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, BFloat16Regs, [hasPTX<70>, hasSM<80>]>,
     FMA_TUPLE<"_rn_ftz_bf16", int_nvvm_fma_rn_ftz_bf16, BFloat16Regs,
-      [hasPTX70, hasSM80]>,
+      [hasPTX<70>, hasSM<80>]>,
     FMA_TUPLE<"_rn_sat_bf16", int_nvvm_fma_rn_sat_bf16, BFloat16Regs,
-      [hasPTX70, hasSM80]>,
+      [hasPTX<70>, hasSM<80>]>,
     FMA_TUPLE<"_rn_ftz_sat_bf16", int_nvvm_fma_rn_ftz_sat_bf16, BFloat16Regs,
-      [hasPTX70, hasSM80]>,
+      [hasPTX<70>, hasSM<80>]>,
     FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, BFloat16Regs,
-      [hasPTX70, hasSM80]>,
+      [hasPTX<70>, hasSM<80>]>,
     FMA_TUPLE<"_rn_ftz_relu_bf16", int_nvvm_fma_rn_ftz_relu_bf16, BFloat16Regs,
-      [hasPTX70, hasSM80]>,
+      [hasPTX<70>, hasSM<80>]>,
 
     FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Float16x2Regs,
       [hasPTX<42>, hasSM<53>]>,
@@ -1022,10 +1022,10 @@
       [hasPTX<70>, hasSM<80>]>,
     FMA_TUPLE<"_rn_ftz_relu_f16x2", int_nvvm_fma_rn_ftz_relu_f16x2,
       Float16x2Regs, [hasPTX<70>, hasSM<80>]>,
-    FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, Int32Regs,
-      [hasPTX70, hasSM80]>,
-    FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, Int32Regs,
-      [hasPTX70, hasSM80]>
+    FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, BFloat16x2Regs,
+      [hasPTX<70>, hasSM<80>]>,
+    FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, BFloat16x2Regs,
+      [hasPTX<70>, hasSM<80>]>
   ] in {
     def P.Variant :
       F_MATH_3<!strconcat("fma",
Index: llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
===================================================================
--- llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1121,7 +1121,7 @@
       NVPTXInst<(outs RC:$dst), (ins RC:$src),
                 !strconcat(OpcStr, " \t$dst, $src;"),
                 [(set RC:$dst, (fneg (T RC:$src)))]>,
-                Requires<[useFP16Math, hasPTX70, hasSM80, Pred]>;
+                Requires<[useFP16Math, hasPTX<70>, hasSM<80>, Pred]>;
 def BFNEG16_ftz   : FNEG_BF16_F16X2<"neg.ftz.bf16", bf16, BFloat16Regs, doF32FTZ>;
 def BFNEG16       : FNEG_BF16_F16X2<"neg.bf16", bf16, BFloat16Regs, True>;
 def BFNEG16x2_ftz : FNEG_BF16_F16X2<"neg.ftz.bf16x2", v2bf16, BFloat16x2Regs, doF32FTZ>;
@@ -3337,30 +3337,6 @@
                                "  mov.b32 \t{%tmp_lo, $dst}, $src; }}",
                                [(set BFloat16Regs:$dst,
                                  (extractelt (v2bf16 BFloat16x2Regs:$src), 1))]>;
-
-  // // Coalesce two bf16 registers into bf16x2
-  // def BuildBF16x2 : NVPTXInst<(outs BFloat16x2Regs:$dst),
-  //                            (ins BFloat16Regs:$a, BFloat16Regs:$b),
-  //                            "mov.b32 \t$dst, {{$a, $b}};",
-  //                            [(set (v2bf16 BFloat16x2Regs:$dst),
-  //                              (build_vector (bf16 BFloat16Regs:$a), (bf16 BFloat16Regs:$b)))]>;
-
-  // // Directly initializing underlying the b32 register is one less SASS
-  // // instruction than than vector-packing move.
-  // def BuildBF16x2i : NVPTXInst<(outs BFloat16x2Regs:$dst), (ins i32imm:$src),
-  //                             "mov.b32 \t$dst, $src;",
-  //                             []>;
-
-  // // Split f16x2 into two f16 registers.
-  // def SplitBF16x2  : NVPTXInst<(outs BFloat16Regs:$lo, BFloat16Regs:$hi),
-  //                             (ins BFloat16x2Regs:$src),
-  //                             "mov.b32 \t{{$lo, $hi}}, $src;",
-  //                             []>;
-  // // Split an i32 into two f16
-  // def SplitI32toBF16x2  : NVPTXInst<(outs BFloat16Regs:$lo, BFloat16Regs:$hi),
-  //                                  (ins Int32Regs:$src),
-  //                                  "mov.b32 \t{{$lo, $hi}}, $src;",
-  //                                  []>;
 }
 
 // Count leading zeros

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D144911: adding bf16 support to NVPTX

Reply via email to