================
@@ -1793,19 +1817,81 @@ let Predicates = [doRsqrtOpt] in {
//
def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32", B32, B32, B32,
int_nvvm_add_rn_ftz_f>;
+def INT_NVVM_ADD_RN_SAT_FTZ_F : F_MATH_2<"add.rn.sat.ftz.f32", B32, B32, B32,
int_nvvm_add_rn_ftz_sat_f>;
def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32", B32, B32, B32,
int_nvvm_add_rn_f>;
+def INT_NVVM_ADD_RN_SAT_F : F_MATH_2<"add.rn.sat.f32", B32, B32, B32,
int_nvvm_add_rn_sat_f>;
def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32", B32, B32, B32,
int_nvvm_add_rz_ftz_f>;
+def INT_NVVM_ADD_RZ_SAT_FTZ_F : F_MATH_2<"add.rz.sat.ftz.f32", B32, B32, B32,
int_nvvm_add_rz_ftz_sat_f>;
def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32", B32, B32, B32,
int_nvvm_add_rz_f>;
+def INT_NVVM_ADD_RZ_SAT_F : F_MATH_2<"add.rz.sat.f32", B32, B32, B32,
int_nvvm_add_rz_sat_f>;
def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32", B32, B32, B32,
int_nvvm_add_rm_ftz_f>;
+def INT_NVVM_ADD_RM_SAT_FTZ_F : F_MATH_2<"add.rm.sat.ftz.f32", B32, B32, B32,
int_nvvm_add_rm_ftz_sat_f>;
def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32", B32, B32, B32,
int_nvvm_add_rm_f>;
+def INT_NVVM_ADD_RM_SAT_F : F_MATH_2<"add.rm.sat.f32", B32, B32, B32,
int_nvvm_add_rm_sat_f>;
def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32", B32, B32, B32,
int_nvvm_add_rp_ftz_f>;
+def INT_NVVM_ADD_RP_SAT_FTZ_F : F_MATH_2<"add.rp.sat.ftz.f32", B32, B32, B32,
int_nvvm_add_rp_ftz_sat_f>;
def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32", B32, B32, B32,
int_nvvm_add_rp_f>;
+def INT_NVVM_ADD_RP_SAT_F : F_MATH_2<"add.rp.sat.f32", B32, B32, B32,
int_nvvm_add_rp_sat_f>;
def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64", B64, B64, B64,
int_nvvm_add_rn_d>;
def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64", B64, B64, B64,
int_nvvm_add_rz_d>;
def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64", B64, B64, B64,
int_nvvm_add_rm_d>;
def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64", B64, B64, B64,
int_nvvm_add_rp_d>;
+foreach rnd = ["_RN", "_RZ", "_RM", "_RP"] in {
+ foreach sat = ["", "_SAT"] in {
+ foreach type = ["F16", "BF16"] in {
+ def INT_NVVM_ADD # rnd # sat # _F32_ # type :
+ BasicNVPTXInst<(outs B32:$dst), (ins B16:$a, B32:$b),
+ !tolower(!subst("_", ".", "add" # rnd # sat # "_f32_" # type)),
+ [(set f32:$dst,
+ (!cast<Intrinsic>(!tolower("int_nvvm_add" # rnd # sat # "_f"))
+ (f32 (fpextend !cast<ValueType>(!tolower(type)):$a)),
----------------
durga4github wrote:
For FMA's the PTX ISA is clear stating the rounding mode is applied _only_ to
the "result".
Let us double check and confirm once that is true for the add/sub also.
(i.e. the given rounding mode has no impact on the conversion of input from f16
to f32 but only applies to the final result)
https://github.com/llvm/llvm-project/pull/168359
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits