================
@@ -605,15 +608,117 @@ static Value *emitRsqIEEE1ULP(IRBuilder<> &Builder,
Value *Src,
return Builder.CreateFMul(Rsq, OutputScaleFactor);
}
+/// Emit inverse sqrt expansion for f64 with a correction sequence on top of
+/// v_rsq_f64. This should give a 1ulp result.
+Value *AMDGPUCodeGenPrepareImpl::emitRsqF64(IRBuilder<> &Builder, Value *X,
+ FastMathFlags SqrtFMF,
+ FastMathFlags DivFMF,
+ const Instruction *CtxI,
+ bool IsNegative) const {
+ // rsq(x):
+ // double y0 = BUILTIN_AMDGPU_RSQRT_F64(x);
+ // double e = MATH_MAD(-y0 * (x == PINF_F64 || x == 0.0 ? y0 : x), y0,
1.0);
+ // return MATH_MAD(y0*e, MATH_MAD(e, 0.375, 0.5), y0);
+ //
+ // The rsq instruction handles the special cases correctly. We need to check
+ // for the edge case conditions to ensure the special case propagates through
+ // the later instructions.
+
+ Value *Y0 = Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rsq, X);
+
+ // Try to elide the edge case check.
+ //
+ // Fast math flags imply:
+ // sqrt ninf => !isinf(x)
+ // sqrt nnan => not helpful
+ // fdiv ninf => x != 0, !isinf(x)
+ // fdiv nnan => x != 0
----------------
dtcxzyw wrote:
`+-1.0 / +-0` gives `+-inf`. I don't think nnan helps.
https://github.com/llvm/llvm-project/pull/172053
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits