arsenm created this revision. arsenm added reviewers: yaxunl, Anastasia, jcranmer-intel, tra, jlebar, jhuber6. Herald added a project: All. arsenm requested review of this revision. Herald added subscribers: jplehr, sstefan1, wdng. Herald added a reviewer: jdoerfert.
OpenCL and HIP have -cl-fp32-correctly-rounded-divide-sqrt and -fno-hip-correctly-rounded-divide-sqrt. The corresponding fpmath metadata was only set on fdiv, and not sqrt. The backend is currently underutilizing sqrt lowering options, and the responsibility is split between the libraries and backend and this metadata is needed. CUDA/NVCC has -prec-div and -prev-sqrt but clang doesn't appear to be aiming for compatibility with those. Don't know if OpenMP has a similar control. https://reviews.llvm.org/D154495 Files: clang/lib/CodeGen/CGBuiltin.cpp clang/lib/CodeGen/CGExpr.cpp clang/lib/CodeGen/CGExprScalar.cpp clang/lib/CodeGen/CodeGenFunction.h clang/test/CodeGenCUDA/correctly-rounded-div.cu clang/test/CodeGenOpenCL/fpmath.cl
Index: clang/test/CodeGenOpenCL/fpmath.cl =================================================================== --- clang/test/CodeGenOpenCL/fpmath.cl +++ clang/test/CodeGenOpenCL/fpmath.cl @@ -21,6 +21,13 @@ return a / b; } +float spscalarsqrt(float a) { + // CHECK-LABEL: @spscalarsqrt + // NODIVOPT: call float @llvm.sqrt.f32(float %{{.+}}), !fpmath ![[MD:[0-9]+]] + // DIVOPT: call float @llvm.sqrt.f32(float %{{.+}}){{$}} + return __builtin_sqrtf(a); +} + #if __OPENCL_C_VERSION__ >=120 void printf(constant char* fmt, ...); @@ -34,11 +41,26 @@ #ifndef NOFP64 #pragma OPENCL EXTENSION cl_khr_fp64 : enable +typedef __attribute__(( ext_vector_type(4) )) double double4; + double dpscalardiv(double a, double b) { // CHECK: @dpscalardiv // CHECK-NOT: !fpmath return a / b; } + +double4 dpvectordiv(double4 a, double4 b) { + // CHECK: @dpvectordiv + // CHECK-NOT: !fpmath + return a / b; +} + +double dpscalarsqrt(double a) { + // CHECK-LABEL: @dpscalarsqrt + // CHECK: call double @llvm.sqrt.f64(double %{{.+}}){{$}} + return __builtin_sqrt(a); +} + #endif // NODIVOPT: ![[MD]] = !{float 2.500000e+00} Index: clang/test/CodeGenCUDA/correctly-rounded-div.cu =================================================================== --- clang/test/CodeGenCUDA/correctly-rounded-div.cu +++ clang/test/CodeGenCUDA/correctly-rounded-div.cu @@ -32,4 +32,18 @@ return a / b; } -// NCRDIV: ![[MD]] = !{float 2.500000e+00} +// COMMON-LABEL: @_Z12spscalarsqrt +// NCRDIV: call contract float @llvm.sqrt.f32(float %{{.+}}), !fpmath ![[MD:[0-9]+]] +// CRDIV: call contract float @llvm.sqrt.f32(float %{{.+}}){{$}} +__device__ float spscalarsqrt(float a) { + return __builtin_sqrtf(a); +} + +// COMMON-LABEL: @_Z12dpscalarsqrt +// COMMON: call contract double @llvm.sqrt.f64(double %{{.+}}){{$}} +// COMMON-NOT: !fpmath +__device__ double dpscalarsqrt(double a) { + return __builtin_sqrt(a); +} + +// NCRSQRT: ![[MD]] = !{float 2.500000e+00} Index: clang/lib/CodeGen/CodeGenFunction.h =================================================================== --- clang/lib/CodeGen/CodeGenFunction.h +++ clang/lib/CodeGen/CodeGenFunction.h @@ -4688,6 +4688,10 @@ /// point operation, expressed as the maximum relative error in ulp. void SetFPAccuracy(llvm::Value *Val, float Accuracy); + /// SetFPAccuracy - Set the minimum required accuracy of the given fdiv or + /// sqrt operation based on CodeGenOpts. + void SetSqrtOrDivFPAccuracy(llvm::Value *Val); + /// Set the codegen fast-math flags. void SetFastMathFlags(FPOptions FPFeatures); Index: clang/lib/CodeGen/CGExprScalar.cpp =================================================================== --- clang/lib/CodeGen/CGExprScalar.cpp +++ clang/lib/CodeGen/CGExprScalar.cpp @@ -3478,21 +3478,7 @@ llvm::Value *Val; CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Ops.FPFeatures); Val = Builder.CreateFDiv(Ops.LHS, Ops.RHS, "div"); - if ((CGF.getLangOpts().OpenCL && - !CGF.CGM.getCodeGenOpts().OpenCLCorrectlyRoundedDivSqrt) || - (CGF.getLangOpts().HIP && CGF.getLangOpts().CUDAIsDevice && - !CGF.CGM.getCodeGenOpts().HIPCorrectlyRoundedDivSqrt)) { - // OpenCL v1.1 s7.4: minimum accuracy of single precision / is 2.5ulp - // OpenCL v1.2 s5.6.4.2: The -cl-fp32-correctly-rounded-divide-sqrt - // build option allows an application to specify that single precision - // floating-point divide (x/y and 1/x) and sqrt used in the program - // source are correctly rounded. - llvm::Type *ValTy = Val->getType(); - if (ValTy->isFloatTy() || - (isa<llvm::VectorType>(ValTy) && - cast<llvm::VectorType>(ValTy)->getElementType()->isFloatTy())) - CGF.SetFPAccuracy(Val, 2.5); - } + CGF.SetSqrtOrDivFPAccuracy(Val); return Val; } else if (Ops.isFixedPointOp()) Index: clang/lib/CodeGen/CGExpr.cpp =================================================================== --- clang/lib/CodeGen/CGExpr.cpp +++ clang/lib/CodeGen/CGExpr.cpp @@ -5585,6 +5585,24 @@ cast<llvm::Instruction>(Val)->setMetadata(llvm::LLVMContext::MD_fpmath, Node); } +void CodeGenFunction::SetSqrtOrDivFPAccuracy(llvm::Value *Val) { + llvm::Type *EltTy = Val->getType()->getScalarType(); + if (!EltTy->isFloatTy()) + return; + + if ((getLangOpts().OpenCL && + !CGM.getCodeGenOpts().OpenCLCorrectlyRoundedDivSqrt) || + (getLangOpts().HIP && getLangOpts().CUDAIsDevice && + !CGM.getCodeGenOpts().HIPCorrectlyRoundedDivSqrt)) { + // OpenCL v1.1 s7.4: minimum accuracy of single precision / is 2.5ulp + // OpenCL v1.2 s5.6.4.2: The -cl-fp32-correctly-rounded-divide-sqrt + // build option allows an application to specify that single precision + // floating-point divide (x/y and 1/x) and sqrt used in the program + // source are correctly rounded. + SetFPAccuracy(Val, 2.5); + } +} + namespace { struct LValueOrRValue { LValue LV; Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -2532,11 +2532,14 @@ case Builtin::BI__builtin_sqrtf: case Builtin::BI__builtin_sqrtf16: case Builtin::BI__builtin_sqrtl: - case Builtin::BI__builtin_sqrtf128: - return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, - Intrinsic::sqrt, - Intrinsic::experimental_constrained_sqrt)); - + case Builtin::BI__builtin_sqrtf128: { + llvm::Value *Call = emitUnaryMaybeConstrainedFPBuiltin( + *this, E, + Intrinsic::sqrt, + Intrinsic::experimental_constrained_sqrt); + SetSqrtOrDivFPAccuracy(Call); + return RValue::get(Call); + } case Builtin::BItrunc: case Builtin::BItruncf: case Builtin::BItruncl:
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits