spatel updated this revision to Diff 144200. spatel added a comment. Patch updated:
1. Improve the documentation language - more suggestions welcome! 2. Change the default setting so the work-around is 'off' (ie, by default assume source is compliant and optimize accordingly). 3. Remove the 'no' version of the flag. Given the change in the default, this seems more natural to me, and it simplifies the patch/tests...but I might have been too pessimistic before and this is too optimistic? Let me know... https://reviews.llvm.org/D46135 Files: docs/UsersManual.rst include/clang/Driver/Options.td include/clang/Frontend/CodeGenOptions.def lib/CodeGen/CGCall.cpp lib/Driver/ToolChains/Clang.cpp lib/Frontend/CompilerInvocation.cpp test/CodeGen/no-junk-ftrunc.c test/Driver/fast-math.c
Index: test/Driver/fast-math.c =================================================================== --- test/Driver/fast-math.c +++ test/Driver/fast-math.c @@ -287,3 +287,17 @@ // RUN: %clang -### -ftrapping-math -fno-trapping-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-NO-TRAPPING-MATH %s // CHECK-NO-TRAPPING-MATH: "-fno-trapping-math" + +// This isn't fast-math, but the option is handled in the same place as other FP params. +// The flag is *not* passed by default. + +// RUN: %clang -### -ffp-cast-overflow-workaround -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-FPOV-WORKAROUND %s +// CHECK-FPOV-WORKAROUND: "-cc1" +// CHECK-FPOV-WORKAROUND: "-ffp-cast-overflow-workaround" + +// RUN: %clang -### -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-FPOV-WORKAROUND-DEFAULT %s +// CHECK-FPOV-WORKAROUND-DEFAULT: "-cc1" +// CHECK-FPOV-WORKAROUND-DEFAULT-NOT: "-ffp-cast-overflow-workaround" + Index: test/CodeGen/no-junk-ftrunc.c =================================================================== --- test/CodeGen/no-junk-ftrunc.c +++ test/CodeGen/no-junk-ftrunc.c @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -S -ffp-cast-overflow-workaround %s -emit-llvm -o - | FileCheck %s + +// CHECK-LABEL: main +// CHECK: attributes #0 = {{.*}}"fp-cast-overflow-workaround"="true"{{.*}} + +int main() { + return 0; +} + Index: lib/Frontend/CompilerInvocation.cpp =================================================================== --- lib/Frontend/CompilerInvocation.cpp +++ lib/Frontend/CompilerInvocation.cpp @@ -699,6 +699,8 @@ Opts.Reciprocals = Args.getAllArgValues(OPT_mrecip_EQ); Opts.ReciprocalMath = Args.hasArg(OPT_freciprocal_math); Opts.NoTrappingMath = Args.hasArg(OPT_fno_trapping_math); + Opts.FPCastOverflowWorkaround = Args.hasArg(OPT_ffp_cast_overflow_workaround); + Opts.NoZeroInitializedInBSS = Args.hasArg(OPT_mno_zero_initialized_in_bss); Opts.NumRegisterParameters = getLastArgIntValue(Args, OPT_mregparm, 0, Diags); Opts.NoExecStack = Args.hasArg(OPT_mno_exec_stack); Index: lib/Driver/ToolChains/Clang.cpp =================================================================== --- lib/Driver/ToolChains/Clang.cpp +++ lib/Driver/ToolChains/Clang.cpp @@ -2241,6 +2241,10 @@ CmdArgs.push_back("-mfpmath"); CmdArgs.push_back(A->getValue()); } + + // Disable a codegen optimization for floating-point casts. + if (Args.hasArg(options::OPT_ffp_cast_overflow_workaround)) + CmdArgs.push_back("-ffp-cast-overflow-workaround"); } static void RenderAnalyzerOptions(const ArgList &Args, ArgStringList &CmdArgs, Index: lib/CodeGen/CGCall.cpp =================================================================== --- lib/CodeGen/CGCall.cpp +++ lib/CodeGen/CGCall.cpp @@ -1727,6 +1727,9 @@ FuncAttrs.addAttribute("no-trapping-math", llvm::toStringRef(CodeGenOpts.NoTrappingMath)); + if (CodeGenOpts.FPCastOverflowWorkaround) + FuncAttrs.addAttribute("fp-cast-overflow-workaround", "true"); + // TODO: Are these all needed? // unsafe/inf/nan/nsz are handled by instruction-level FastMathFlags. FuncAttrs.addAttribute("no-infs-fp-math", Index: include/clang/Frontend/CodeGenOptions.def =================================================================== --- include/clang/Frontend/CodeGenOptions.def +++ include/clang/Frontend/CodeGenOptions.def @@ -136,6 +136,12 @@ CODEGENOPT(NoNaNsFPMath , 1, 0) ///< Assume FP arguments, results not NaN. CODEGENOPT(FlushDenorm , 1, 0) ///< Allow FP denorm numbers to be flushed to zero CODEGENOPT(CorrectlyRoundedDivSqrt, 1, 0) ///< -cl-fp32-correctly-rounded-divide-sqrt + +/// Disable a float-to-int-to-float cast optimization. This attempts to generate +/// code as if the result of an overflowing conversion matches the overflowing +/// behavior of a target's native float-to-int conversion instructions. +CODEGENOPT(FPCastOverflowWorkaround, 1, 0) + CODEGENOPT(UniformWGSize , 1, 0) ///< -cl-uniform-work-group-size CODEGENOPT(NoZeroInitializedInBSS , 1, 0) ///< -fno-zero-initialized-in-bss. /// \brief Method of Objective-C dispatch to use. Index: include/clang/Driver/Options.td =================================================================== --- include/clang/Driver/Options.td +++ include/clang/Driver/Options.td @@ -1029,6 +1029,9 @@ Flags<[CC1Option]>, HelpText<"Form fused FP ops (e.g. FMAs): fast (everywhere)" " | on (according to FP_CONTRACT pragma, default) | off (never fuse)">, Values<"fast,on,off">; +def ffp_cast_overflow_workaround : Flag<["-"], + "ffp-cast-overflow-workaround">, Group<f_Group>, Flags<[CC1Option]>; + def ffor_scope : Flag<["-"], "ffor-scope">, Group<f_Group>; def fno_for_scope : Flag<["-"], "fno-for-scope">, Group<f_Group>; Index: docs/UsersManual.rst =================================================================== --- docs/UsersManual.rst +++ docs/UsersManual.rst @@ -1255,6 +1255,16 @@ flushed-to-zero number is preserved in the sign of 0, denormals are flushed to positive zero, respectively. +.. option:: -ffp-cast-overflow-workaround + + Enable a workaround for code that casts floating-point values to + integers and back to floating-point. If the floating-point value + is not representable in the intermediate integer type, the code is + incorrect according to the language standard. This flag will attempt + to generate code as if the result of an overflowing conversion matches + the overflowing behavior of a target's native float-to-int conversion + instructions. + .. option:: -fwhole-program-vtables Enable whole-program vtable optimizations, such as single-implementation
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits