jhuber6 updated this revision to Diff 453021. jhuber6 added a comment. Adjusting, adding code generation options for the other constants and changing to use linkonce ODR linkage.
I attempted to follow Jon's suggestion and group it with the existing code. but all the existing handling for this occurs in the driver. So I don't think there's a convenient way to drop in this functionality without adding a new function as in this patch. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D130096/new/ https://reviews.llvm.org/D130096 Files: clang/include/clang/Basic/CodeGenOptions.def clang/include/clang/Basic/CodeGenOptions.h clang/include/clang/Driver/Options.td clang/lib/CodeGen/CodeGenModule.cpp clang/lib/CodeGen/TargetInfo.cpp clang/lib/CodeGen/TargetInfo.h clang/lib/Frontend/CompilerInvocation.cpp clang/test/CodeGen/amdgcn-control-constants.c
Index: clang/test/CodeGen/amdgcn-control-constants.c =================================================================== --- /dev/null +++ clang/test/CodeGen/amdgcn-control-constants.c @@ -0,0 +1,54 @@ +// RUN: %clang_cc1 -x c -triple amdgcn-amd-amdhsa -target-cpu gfx90a -S -emit-llvm -o - %s | FileCheck %s --check-prefix=GFX90A +// RUN: %clang_cc1 -x c -triple amdgcn-amd-amdhsa -target-cpu gfx1030 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=GFX1030 +// RUN: %clang_cc1 -x c -triple amdgcn-amd-amdhsa -target-cpu gfx908 -ffast-math -S -emit-llvm -o - %s | FileCheck %s --check-prefix=FAST +// RUN: %clang_cc1 -x c -triple amdgcn-amd-amdhsa -target-cpu gfx908 -ffinite-math-only -S -emit-llvm -o - %s | FileCheck %s --check-prefix=FINITE +// RUN: %clang_cc1 -x c -triple amdgcn-amd-amdhsa -target-cpu gfx703 -fgpu-flush-denormals-to-zero -S -emit-llvm -o - %s | FileCheck %s --check-prefix=DAZ +// RUN: %clang_cc1 -x c -triple amdgcn-amd-amdhsa -target-cpu gfx908 -funsafe-math-optimizations -S -emit-llvm -o - %s | FileCheck %s --check-prefix=UNSAFE-MATH + +// GFX90A: @__oclc_daz_opt = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i8 0, align 1 +// GFX90A: @__oclc_wavefrontsize64 = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i8 1, align 1 +// GFX90A: @__oclc_finite_only_opt = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i8 0, align 1 +// GFX90A: @__oclc_unsafe_math_opt = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i8 0, align 1 +// GFX90A: @__oclc_correctly_rounded_sqrt32 = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i8 1, align 1 +// GFX90A: @__oclc_ISA_version = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i32 9010, align 4 +// GFX90A: @__oclc_ABI_version = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i32 400, align 4 + +// GFX1030: @__oclc_daz_opt = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i8 0, align 1 +// GFX1030: @__oclc_wavefrontsize64 = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i8 0, align 1 +// GFX1030: @__oclc_finite_only_opt = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i8 0, align 1 +// GFX1030: @__oclc_unsafe_math_opt = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i8 0, align 1 +// GFX1030: @__oclc_correctly_rounded_sqrt32 = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i8 1, align 1 +// GFX1030: @__oclc_ISA_version = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i32 10048, align 4 +// GFX1030: @__oclc_ABI_version = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i32 400, align 4 + +// FAST: @__oclc_daz_opt = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i8 0, align 1 +// FAST: @__oclc_wavefrontsize64 = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i8 1, align 1 +// FAST: @__oclc_finite_only_opt = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i8 1, align 1 +// FAST: @__oclc_unsafe_math_opt = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i8 1, align 1 +// FAST: @__oclc_correctly_rounded_sqrt32 = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i8 1, align 1 +// FAST: @__oclc_ISA_version = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i32 9008, align 4 +// FAST: @__oclc_ABI_version = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i32 400, align 4 + +// FINITE: @__oclc_daz_opt = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i8 0, align 1 +// FINITE: @__oclc_wavefrontsize64 = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i8 1, align 1 +// FINITE: @__oclc_finite_only_opt = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i8 1, align 1 +// FINITE: @__oclc_unsafe_math_opt = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i8 0, align 1 +// FINITE: @__oclc_correctly_rounded_sqrt32 = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i8 1, align 1 +// FINITE: @__oclc_ISA_version = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i32 9008, align 4 +// FINITE: @__oclc_ABI_version = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i32 400, align 4 + +// DAZ: @__oclc_daz_opt = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i8 1, align 1 +// DAZ: @__oclc_wavefrontsize64 = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i8 1, align 1 +// DAZ: @__oclc_finite_only_opt = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i8 0, align 1 +// DAZ: @__oclc_unsafe_math_opt = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i8 0, align 1 +// DAZ: @__oclc_correctly_rounded_sqrt32 = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i8 1, align 1 +// DAZ: @__oclc_ISA_version = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i32 7003, align 4 +// DAZ: @__oclc_ABI_version = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i32 400, align 4 + +// UNSAFE-MATH: @__oclc_daz_opt = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i8 0, align 1 +// UNSAFE-MATH: @__oclc_wavefrontsize64 = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i8 1, align 1 +// UNSAFE-MATH: @__oclc_finite_only_opt = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i8 0, align 1 +// UNSAFE-MATH: @__oclc_unsafe_math_opt = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i8 0, align 1 +// UNSAFE-MATH: @__oclc_correctly_rounded_sqrt32 = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i8 1, align 1 +// UNSAFE-MATH: @__oclc_ISA_version = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i32 9008, align 4 +// UNSAFE-MATH: @__oclc_ABI_version = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i32 400, align 4 Index: clang/lib/Frontend/CompilerInvocation.cpp =================================================================== --- clang/lib/Frontend/CompilerInvocation.cpp +++ clang/lib/Frontend/CompilerInvocation.cpp @@ -1562,6 +1562,12 @@ if (!Opts.EmitVersionIdentMetadata) GenerateArg(Args, OPT_Qn, SA); + if (Opts.AMDGPUDenormAtZero) + GenerateArg(Args, + *Opts.AMDGPUDenormAtZero ? OPT_fgpu_flush_denormals_to_zero + : OPT_fno_gpu_flush_denormals_to_zero, + SA); + switch (Opts.FiniteLoops) { case CodeGenOptions::FiniteLoopsKind::Language: break; @@ -1668,6 +1674,13 @@ Opts.setDebugInfo(codegenoptions::LimitedDebugInfo); } + // Forward the flag value to override the default target behaviour on AMDGPU + // targets. + if (Args.hasArg(OPT_fgpu_flush_denormals_to_zero)) + Opts.AMDGPUDenormAtZero = true; + else if (Args.hasArg(OPT_fno_gpu_flush_denormals_to_zero)) + Opts.AMDGPUDenormAtZero = false; + for (const auto &Arg : Args.getAllArgValues(OPT_fdebug_prefix_map_EQ)) { auto Split = StringRef(Arg).split('='); Opts.DebugPrefixMap.insert( Index: clang/lib/CodeGen/TargetInfo.h =================================================================== --- clang/lib/CodeGen/TargetInfo.h +++ clang/lib/CodeGen/TargetInfo.h @@ -76,6 +76,9 @@ CodeGen::CodeGenModule &CGM, const llvm::MapVector<GlobalDecl, StringRef> &MangledDeclNames) const {} + /// Provides a convenient hook to handle extra target-specific globals. + virtual void emitTargetGlobals(CodeGen::CodeGenModule &CGM) const {} + /// Any further codegen related checks that need to be done on a function call /// in a target specific manner. virtual void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc, Index: clang/lib/CodeGen/TargetInfo.cpp =================================================================== --- clang/lib/CodeGen/TargetInfo.cpp +++ clang/lib/CodeGen/TargetInfo.cpp @@ -33,6 +33,7 @@ #include "llvm/IR/IntrinsicsS390.h" #include "llvm/IR/Type.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/TargetParser.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> @@ -9288,6 +9289,8 @@ void setFunctionDeclAttributes(const FunctionDecl *FD, llvm::Function *F, CodeGenModule &CGM) const; + void emitTargetGlobals(CodeGen::CodeGenModule &CGM) const override; + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; unsigned getOpenCLKernelCallingConv() const override; @@ -9403,6 +9406,69 @@ } } +/// Emits control constants used to change per-architecture behaviour in the +/// AMDGPU ROCm device libraries. +void AMDGPUTargetCodeGenInfo::emitTargetGlobals( + CodeGen::CodeGenModule &CGM) const { + if (!CGM.getTriple().isAMDGCN()) + return; + StringRef CPU = CGM.getTarget().getTargetOpts().CPU; + llvm::AMDGPU::GPUKind Kind = llvm::AMDGPU::parseArchAMDGCN(CPU); + unsigned Features = llvm::AMDGPU::getArchAttrAMDGCN(Kind); + if (Kind == llvm::AMDGPU::GK_NONE) + return; + + unsigned Minor; + unsigned Major; + StringRef Identifier = CPU.drop_while([](char C) { return !isDigit(C); }); + if (Identifier.take_back(2).getAsInteger(16, Minor) || + Identifier.drop_back(2).getAsInteger(10, Major)) + return; + + auto AddGlobal = [&](StringRef Name, unsigned Value, unsigned Size) { + if (CGM.getModule().getNamedGlobal(Name)) + return; + + auto *Type = + llvm::IntegerType::getIntNTy(CGM.getModule().getContext(), Size); + auto *GV = new llvm::GlobalVariable( + CGM.getModule(), Type, true, + llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage, + llvm::ConstantInt::get(Type, Value), Name, nullptr, + llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, + CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant)); + GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local); + GV->setVisibility(llvm::GlobalValue::VisibilityTypes::HiddenVisibility); + GV->setAlignment(CGM.getDataLayout().getABITypeAlign(Type)); + }; + + bool DenormAtZero = + CGM.getCodeGenOpts().AMDGPUDenormAtZero + ? *CGM.getCodeGenOpts().AMDGPUDenormAtZero + : !((Features & llvm::AMDGPU::FEATURE_FAST_FMA_F32) && + (Features & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32)); + bool Wavefront64 = !(Features & llvm::AMDGPU::FEATURE_WAVE32); + bool FastRelaxedMath = CGM.getLangOpts().FastMath; + bool FiniteOnly = + CGM.getLangOpts().NoHonorInfs || CGM.getLangOpts().NoHonorNaNs; + bool UnsafeMath = CGM.getCodeGenOpts().UnsafeMathOptimizations; + bool CorrectSqrt = CGM.getCodeGenOpts().HIPCorrectlyRoundedDivSqrt; + + // Control constants for math operations. + AddGlobal("__oclc_daz_opt", DenormAtZero, /*Size=*/8); + AddGlobal("__oclc_wavefrontsize64", Wavefront64, /*Size=*/8); + AddGlobal("__oclc_finite_only_opt", FiniteOnly || FastRelaxedMath, + /*Size=*/8); + AddGlobal("__oclc_unsafe_math_opt", UnsafeMath || FastRelaxedMath, + /*Size=*/8); + AddGlobal("__oclc_correctly_rounded_sqrt32", CorrectSqrt, /*Size=*/8); + + // Control constants for the system. + AddGlobal("__oclc_ISA_version", Minor + Major * 1000, /*Size=*/32); + AddGlobal("__oclc_ABI_version", + CGM.getTarget().getTargetOpts().CodeObjectVersion, /*Size=*/32); +} + void AMDGPUTargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { if (requiresAMDGPUProtectedVisibility(D, GV)) { Index: clang/lib/CodeGen/CodeGenModule.cpp =================================================================== --- clang/lib/CodeGen/CodeGenModule.cpp +++ clang/lib/CodeGen/CodeGenModule.cpp @@ -936,6 +936,7 @@ if (getCodeGenOpts().SkipRaxSetup) getModule().addModuleFlag(llvm::Module::Override, "SkipRaxSetup", 1); + getTargetCodeGenInfo().emitTargetGlobals(*this); getTargetCodeGenInfo().emitTargetMetadata(*this, MangledDeclNames); EmitBackendOptionsMetadata(getCodeGenOpts()); Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -963,9 +963,9 @@ HelpText<"Ignore environment variables to detect CUDA installation">; def ptxas_path_EQ : Joined<["--"], "ptxas-path=">, Group<i_Group>, HelpText<"Path to ptxas (used for compiling CUDA code)">; -def fgpu_flush_denormals_to_zero : Flag<["-"], "fgpu-flush-denormals-to-zero">, +def fgpu_flush_denormals_to_zero : Flag<["-"], "fgpu-flush-denormals-to-zero">, Flags<[CC1Option]>, HelpText<"Flush denormal floating point values to zero in CUDA/HIP device mode.">; -def fno_gpu_flush_denormals_to_zero : Flag<["-"], "fno-gpu-flush-denormals-to-zero">; +def fno_gpu_flush_denormals_to_zero : Flag<["-"], "fno-gpu-flush-denormals-to-zero">, Flags<[CC1Option]>; def fcuda_flush_denormals_to_zero : Flag<["-"], "fcuda-flush-denormals-to-zero">, Alias<fgpu_flush_denormals_to_zero>; def fno_cuda_flush_denormals_to_zero : Flag<["-"], "fno-cuda-flush-denormals-to-zero">, @@ -1864,10 +1864,10 @@ } // end -f[no-]sanitize* flags -def funsafe_math_optimizations : Flag<["-"], "funsafe-math-optimizations">, - Group<f_Group>; -def fno_unsafe_math_optimizations : Flag<["-"], "fno-unsafe-math-optimizations">, - Group<f_Group>; +def funsafe_math_optimizations : Flag<["-"], "funsafe-math-optimizations">, Flags<[CC1Option]>, + Group<f_Group>, MarshallingInfoFlag<CodeGenOpts<"UnsafeMathOptimizations">>; +def fno_unsafe_math_optimizations : Flag<["-"], "fno-unsafe-math-optimizations">, Flags<[CC1Option]>, + Group<f_Group>, MarshallingInfoFlag<CodeGenOpts<"UnsafeMathOptimizations">>; def fassociative_math : Flag<["-"], "fassociative-math">, Group<f_Group>; def fno_associative_math : Flag<["-"], "fno-associative-math">, Group<f_Group>; defm reciprocal_math : BoolFOption<"reciprocal-math", Index: clang/include/clang/Basic/CodeGenOptions.h =================================================================== --- clang/include/clang/Basic/CodeGenOptions.h +++ clang/include/clang/Basic/CodeGenOptions.h @@ -407,6 +407,9 @@ const char *Argv0 = nullptr; std::vector<std::string> CommandLineArgs; + /// Override the system default denormalization behavior for AMDGPU. + Optional<bool> AMDGPUDenormAtZero; + /// The minimum hotness value a diagnostic needs in order to be included in /// optimization diagnostics. /// Index: clang/include/clang/Basic/CodeGenOptions.def =================================================================== --- clang/include/clang/Basic/CodeGenOptions.def +++ clang/include/clang/Basic/CodeGenOptions.def @@ -193,6 +193,7 @@ CODEGENOPT(HIPSaveKernelArgName, 1, 0) ///< Set when -fhip-kernel-arg-name is enabled. CODEGENOPT(UniqueInternalLinkageNames, 1, 0) ///< Internal Linkage symbols get unique names. CODEGENOPT(SplitMachineFunctions, 1, 0) ///< Split machine functions using profile information. +CODEGENOPT(UnsafeMathOptimizations, 1, 0) ///< Set when -funsafe-math-optimizations. /// When false, this attempts to generate code as if the result of an /// overflowing conversion matches the overflowing behavior of a target's native
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits