arsenm updated this revision to Diff 257011.
arsenm added a comment.

Remove leftover comment from before I used JobAction


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D78019/new/

https://reviews.llvm.org/D78019

Files:
  clang/include/clang/Driver/ToolChain.h
  clang/lib/Driver/ToolChains/AMDGPU.cpp
  clang/lib/Driver/ToolChains/AMDGPU.h
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/lib/Driver/ToolChains/Cuda.h
  clang/lib/Driver/ToolChains/Linux.cpp
  clang/lib/Driver/ToolChains/Linux.h
  clang/lib/Driver/ToolChains/PS4CPU.h
  clang/test/Driver/cuda-flush-denormals-to-zero.cu

Index: clang/test/Driver/cuda-flush-denormals-to-zero.cu
===================================================================
--- clang/test/Driver/cuda-flush-denormals-to-zero.cu
+++ clang/test/Driver/cuda-flush-denormals-to-zero.cu
@@ -7,16 +7,28 @@
 // RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fcuda-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=FTZ %s
 // RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fno-cuda-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
 
-// Test explicit argument.
+// Test explicit argument, with CUDA offload kind
 // RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fcuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
 // RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fno-cuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+
+// Test explicit argument, with HIP offload kind
+// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fcuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
+// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fno-cuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+
 // RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -fcuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
 // RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -fno-cuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
 
-// Test the default changing with no argument based on the subtarget.
+// Test the default changing with no argument based on the subtarget in HIP mode
 // RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
 // RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
 
+
+// Test multiple offload archs with different defaults.
+// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=MIXED-DEFAULT-MODE %s
+// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell -fcuda-flush-denormals-to-zero --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZX2 %s
+// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell -fno-cuda-flush-denormals-to-zero --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+
+
 // CPUFTZ-NOT: -fdenormal-fp-math
 
 // FTZ-NOT: -fdenormal-fp-math-f32=
@@ -25,3 +37,13 @@
 // The default of ieee is omitted
 // NOFTZ-NOT: "-fdenormal-fp-math"
 // NOFTZ-NOT: "-fdenormal-fp-math-f32"
+
+// MIXED-DEFAULT-MODE-NOT: -denormal-fp-math
+// MIXED-DEFAULT-MODE: "-fdenormal-fp-math-f32=preserve-sign,preserve-sign"
+// MIXED-DEFAULT-MODE-SAME: "-target-cpu" "gfx803"
+// MIXED-DEFAULT-MODE-NOT: -denormal-fp-math
+
+// FTZX2: "-fdenormal-fp-math-f32=preserve-sign,preserve-sign"
+// FTZX2-SAME: "-target-cpu" "gfx803"
+// FTZX2: "-fdenormal-fp-math-f32=preserve-sign,preserve-sign"
+// FTZX2-SAME: "-target-cpu" "gfx900"
Index: clang/lib/Driver/ToolChains/PS4CPU.h
===================================================================
--- clang/lib/Driver/ToolChains/PS4CPU.h
+++ clang/lib/Driver/ToolChains/PS4CPU.h
@@ -94,9 +94,8 @@
     Action::OffloadKind DeviceOffloadingKind) const override;
 
   llvm::DenormalMode getDefaultDenormalModeForType(
-    const llvm::opt::ArgList &DriverArgs,
-    Action::OffloadKind DeviceOffloadKind,
-    const llvm::fltSemantics *FPType) const override {
+      const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
+      const llvm::fltSemantics *FPType) const override {
     // DAZ and FTZ are on by default.
     return llvm::DenormalMode::getPreserveSign();
   }
Index: clang/lib/Driver/ToolChains/Linux.h
===================================================================
--- clang/lib/Driver/ToolChains/Linux.h
+++ clang/lib/Driver/ToolChains/Linux.h
@@ -49,9 +49,8 @@
   std::vector<std::string> ExtraOpts;
 
   llvm::DenormalMode getDefaultDenormalModeForType(
-    const llvm::opt::ArgList &DriverArgs,
-    Action::OffloadKind DeviceOffloadKind,
-    const llvm::fltSemantics *FPType = nullptr) const override;
+      const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
+      const llvm::fltSemantics *FPType = nullptr) const override;
 
 protected:
   Tool *buildAssembler() const override;
Index: clang/lib/Driver/ToolChains/Linux.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Linux.cpp
+++ clang/lib/Driver/ToolChains/Linux.cpp
@@ -988,10 +988,10 @@
   ToolChain::addProfileRTLibs(Args, CmdArgs);
 }
 
-llvm::DenormalMode Linux::getDefaultDenormalModeForType(
-  const llvm::opt::ArgList &DriverArgs,
-  Action::OffloadKind DeviceOffloadKind,
-  const llvm::fltSemantics *FPType) const {
+llvm::DenormalMode
+Linux::getDefaultDenormalModeForType(const llvm::opt::ArgList &DriverArgs,
+                                     const JobAction &JA,
+                                     const llvm::fltSemantics *FPType) const {
   switch (getTriple().getArch()) {
   case llvm::Triple::x86:
   case llvm::Triple::x86_64: {
Index: clang/lib/Driver/ToolChains/Cuda.h
===================================================================
--- clang/lib/Driver/ToolChains/Cuda.h
+++ clang/lib/Driver/ToolChains/Cuda.h
@@ -156,8 +156,7 @@
                              Action::OffloadKind DeviceOffloadKind) const override;
 
   llvm::DenormalMode getDefaultDenormalModeForType(
-      const llvm::opt::ArgList &DriverArgs,
-      Action::OffloadKind DeviceOffloadKind,
+      const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
       const llvm::fltSemantics *FPType = nullptr) const override;
 
   // Never try to use the integrated assembler with CUDA; always fork out to
Index: clang/lib/Driver/ToolChains/Cuda.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -721,9 +721,9 @@
 }
 
 llvm::DenormalMode CudaToolChain::getDefaultDenormalModeForType(
-    const llvm::opt::ArgList &DriverArgs, Action::OffloadKind DeviceOffloadKind,
+    const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
     const llvm::fltSemantics *FPType) const {
-  if (DeviceOffloadKind == Action::OFK_Cuda) {
+  if (JA.getOffloadingDeviceKind() == Action::OFK_Cuda) {
     if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
         DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
                            options::OPT_fno_cuda_flush_denormals_to_zero,
@@ -731,7 +731,7 @@
       return llvm::DenormalMode::getPreserveSign();
   }
 
-  assert(DeviceOffloadKind != Action::OFK_Host);
+  assert(JA.getOffloadingDeviceKind() != Action::OFK_Host);
   return llvm::DenormalMode::getIEEE();
 }
 
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -2510,7 +2510,7 @@
 static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
                                        bool OFastEnabled, const ArgList &Args,
                                        ArgStringList &CmdArgs,
-                                       Action::OffloadKind DeviceOffloadKind) {
+                                       const JobAction &JA) {
   // Handle various floating point optimization flags, mapping them to the
   // appropriate LLVM code generation flags. This is complicated by several
   // "umbrella" flags, so we do this by stepping through the flags incrementally
@@ -2533,10 +2533,9 @@
   // -ffp-exception-behavior options: strict, maytrap, ignore
   StringRef FPExceptionBehavior = "";
   const llvm::DenormalMode DefaultDenormalFPMath =
-      TC.getDefaultDenormalModeForType(Args, DeviceOffloadKind);
+      TC.getDefaultDenormalModeForType(Args, JA);
   const llvm::DenormalMode DefaultDenormalFP32Math =
-    TC.getDefaultDenormalModeForType(Args, DeviceOffloadKind,
-                                     &llvm::APFloat::IEEEsingle());
+      TC.getDefaultDenormalModeForType(Args, JA, &llvm::APFloat::IEEEsingle());
 
   llvm::DenormalMode DenormalFPMath = DefaultDenormalFPMath;
   llvm::DenormalMode DenormalFP32Math = DefaultDenormalFP32Math;
@@ -4295,7 +4294,7 @@
       CmdArgs.push_back("-mdisable-tail-calls");
 
     RenderFloatingPointOptions(TC, D, isOptimizationLevelFast(Args), Args,
-                               CmdArgs, JA.getOffloadingDeviceKind());
+                               CmdArgs, JA);
 
     // Render ABI arguments
     switch (TC.getArch()) {
@@ -4618,8 +4617,7 @@
   if (Args.hasArg(options::OPT_fsplit_stack))
     CmdArgs.push_back("-split-stacks");
 
-  RenderFloatingPointOptions(TC, D, OFastEnabled, Args, CmdArgs,
-                             JA.getOffloadingDeviceKind());
+  RenderFloatingPointOptions(TC, D, OFastEnabled, Args, CmdArgs, JA);
 
   if (Arg *A = Args.getLastArg(options::OPT_mdouble_EQ)) {
     if (TC.getArch() == llvm::Triple::avr)
Index: clang/lib/Driver/ToolChains/AMDGPU.h
===================================================================
--- clang/lib/Driver/ToolChains/AMDGPU.h
+++ clang/lib/Driver/ToolChains/AMDGPU.h
@@ -214,8 +214,7 @@
   static bool getDefaultDenormsAreZeroForTarget(llvm::AMDGPU::GPUKind GPUKind);
 
   llvm::DenormalMode getDefaultDenormalModeForType(
-      const llvm::opt::ArgList &DriverArgs,
-      Action::OffloadKind DeviceOffloadKind,
+      const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
       const llvm::fltSemantics *FPType = nullptr) const override;
 };
 
Index: clang/lib/Driver/ToolChains/AMDGPU.cpp
===================================================================
--- clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -273,18 +273,22 @@
 }
 
 llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType(
-    const llvm::opt::ArgList &DriverArgs, Action::OffloadKind DeviceOffloadKind,
+    const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
     const llvm::fltSemantics *FPType) const {
   // Denormals should always be enabled for f16 and f64.
   if (!FPType || FPType != &llvm::APFloat::IEEEsingle())
     return llvm::DenormalMode::getIEEE();
 
-  if (DeviceOffloadKind == Action::OFK_Cuda) {
+  if (JA.getOffloadingDeviceKind() == Action::OFK_HIP ||
+      JA.getOffloadingDeviceKind() == Action::OFK_Cuda) {
+    auto Kind = llvm::AMDGPU::parseArchAMDGCN(JA.getOffloadingArch());
     if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
         DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
                            options::OPT_fno_cuda_flush_denormals_to_zero,
-                           false))
+                           getDefaultDenormsAreZeroForTarget(Kind)))
       return llvm::DenormalMode::getPreserveSign();
+
+    return llvm::DenormalMode::getIEEE();
   }
 
   const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
@@ -294,7 +298,9 @@
   // them all?
   bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
              getDefaultDenormsAreZeroForTarget(Kind);
-  // Outputs are flushed to zero, preserving sign
+
+  // Outputs are flushed to zero (FTZ), preserving sign. Denormal inputs are
+  // also implicit treated as zero (DAZ).
   return DAZ ? llvm::DenormalMode::getPreserveSign() :
                llvm::DenormalMode::getIEEE();
 }
Index: clang/include/clang/Driver/ToolChain.h
===================================================================
--- clang/include/clang/Driver/ToolChain.h
+++ clang/include/clang/Driver/ToolChain.h
@@ -636,8 +636,7 @@
   /// environment for the given \p FPType if given. Otherwise, the default
   /// assumed mode for any floating point type.
   virtual llvm::DenormalMode getDefaultDenormalModeForType(
-      const llvm::opt::ArgList &DriverArgs,
-      Action::OffloadKind DeviceOffloadKind,
+      const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
       const llvm::fltSemantics *FPType = nullptr) const {
     return llvm::DenormalMode::getIEEE();
   }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to