@@ -1233,6 +1233,10 @@ def offload_compression_level_EQ : Joined<["--"],
"offload-compression-level=">,
Flags<[HelpHidden]>,
HelpText<"Compression level for offload device binaries (HIP only)">;
+def offload_jobs_EQ : Joined<["--"], "offload-jobs=">,
+ HelpText<"Set the
@@ -982,8 +982,9 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
case ADDRESS_SPACE_SHARED:
Opc = TM.is64Bit() ? NVPTX::cvta_shared_64 : NVPTX::cvta_shared;
break;
-case ADDRESS_SPACE_DSHARED:
- Opc = TM.is64Bit() ? NVPTX::cvta_dshared_64 :
@@ -1034,6 +1034,10 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned
BuiltinID,
case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:
return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
*this);
+ case NVPTX::BI__nvvm_abs_bf16
@@ -0,0 +1,35 @@
+// libstdc++ uses the non-constexpr function std::__glibcxx_assert_fail()
+// to trigger compilation errors when the __glibcxx_assert(cond) macro
+// is used in a constexpr context.
+// Compilation fails when using code from the libstdc++ (such as std::array) on
https://github.com/Artem-B approved this pull request.
https://github.com/llvm/llvm-project/pull/128222
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -25,6 +25,7 @@ enum AddressSpace : unsigned {
ADDRESS_SPACE_CONST = 4,
ADDRESS_SPACE_LOCAL = 5,
ADDRESS_SPACE_TENSOR = 6,
+ ADDRESS_SPACE_SHARED_CLUSTER = 7,
Artem-B wrote:
PTX docs say:
```
If no sub-qualifier is specified with the .shared state sp
@@ -109,3 +109,48 @@ void func2(void) {
void func3(void) {
float a[16][1] = {{0.}};
}
+
+// CL12-LABEL: define dso_local void @wrong_store_type_private_pointer_alloca(
+// CL12-SAME: ) #[[ATTR0]] {
+// CL12-NEXT: [[ENTRY:.*:]]
+// CL12-NEXT:[[PLONG:%.*]] = alloca i64, al
@@ -109,3 +109,48 @@ void func2(void) {
void func3(void) {
float a[16][1] = {{0.}};
}
+
+// CL12-LABEL: define dso_local void @wrong_store_type_private_pointer_alloca(
+// CL12-SAME: ) #[[ATTR0]] {
+// CL12-NEXT: [[ENTRY:.*:]]
+// CL12-NEXT:[[PLONG:%.*]] = alloca i64, al
https://github.com/Artem-B approved this pull request.
LGTM.
Do you need help merging the patch?
https://github.com/llvm/llvm-project/pull/127187
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/
@@ -300,6 +306,10 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions
&Opts,
Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1");
if (GPU == OffloadArch::SM_100a)
Builder.defineMacro("__CUDA_ARCH_FEAT_SM100_ALL", "1");
+if (GPU == OffloadArch::SM_
@@ -32,22 +32,24 @@ public:
template class B;
}
-// The implicit host/device attrs of virtual dtor B::~B() is inferred to
-// have implicit device attr since dtors of its members and parent classes can
-// be executed on device. This causes a diagnostic since B::~B() must
-//
https://github.com/Artem-B approved this pull request.
https://github.com/llvm/llvm-project/pull/128926
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -32,22 +32,24 @@ public:
template class B;
}
-// The implicit host/device attrs of virtual dtor B::~B() is inferred to
-// have implicit device attr since dtors of its members and parent classes can
-// be executed on device. This causes a diagnostic since B::~B() must
-//
https://github.com/Artem-B approved this pull request.
https://github.com/llvm/llvm-project/pull/129117
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/Artem-B edited
https://github.com/llvm/llvm-project/pull/129117
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -1798,6 +1798,62 @@ class DeferredDiagnosticsEmitter
Inherited::visitUsedDecl(Loc, D);
}
+ // Visitor member and parent dtors called by this dtor.
+ void VisitCalledDestructors(CXXDestructorDecl *DD) {
+const CXXRecordDecl *RD = DD->getParent();
+
+// Visi
https://github.com/Artem-B approved this pull request.
LGTM functionally, some style nits.
https://github.com/llvm/llvm-project/pull/129117
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-comm
@@ -1798,6 +1798,62 @@ class DeferredDiagnosticsEmitter
Inherited::visitUsedDecl(Loc, D);
}
+ // Visitor member and parent dtors called by this dtor.
+ void VisitCalledDestructors(CXXDestructorDecl *DD) {
+const CXXRecordDecl *RD = DD->getParent();
+
+// Visi
@@ -1798,6 +1798,62 @@ class DeferredDiagnosticsEmitter
Inherited::visitUsedDecl(Loc, D);
}
+ // Visitor member and parent dtors called by this dtor.
+ void VisitCalledDestructors(CXXDestructorDecl *DD) {
+const CXXRecordDecl *RD = DD->getParent();
+
+// Visi
@@ -179,8 +179,10 @@ __gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t
__idx, uint64_t __x,
_DEFAULT_FN_ATTRS static __inline__ uint64_t
__gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) {
// Newer targets can use the dedicated CUDA support.
- if (__CUDA_ARCH__ >=
@@ -179,8 +179,10 @@ __gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t
__idx, uint64_t __x,
_DEFAULT_FN_ATTRS static __inline__ uint64_t
__gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) {
// Newer targets can use the dedicated CUDA support.
- if (__CUDA_ARCH__ >=
@@ -96,6 +100,47 @@ the header file to conditionally make a
function constexpr whenever
the constant evaluation of the corresponding builtin (for example,
``std::fmax`` calls ``__builtin_fmax``) is supported in Clang.
+``__has_target_builtin``
+
+
@@ -5021,6 +5024,36 @@ bool llvm::UpgradeDebugInfo(Module &M) {
return Modified;
}
+static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
+GlobalValue *GV, const Metadata *V) {
+ Function *F = cast(GV);
+
+ constexpr
@@ -196,6 +198,36 @@ static std::optional getFnAttrParsedInt(const
Function &F,
: std::nullopt;
}
+static SmallVector getFnAttrParsedVector(const Function &F,
+ StringRef Attr) {
+ SmallVector V;
+ auto &Ctx
@@ -506,24 +507,15 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const
Function &F,
// If the NVVM IR has some of reqntid* specified, then output
// the reqntid directive, and set the unspecified ones to 1.
// If none of Reqntid* is specified, don't output reqnti
@@ -5021,6 +5024,36 @@ bool llvm::UpgradeDebugInfo(Module &M) {
return Modified;
}
+static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
+GlobalValue *GV, const Metadata *V) {
+ Function *F = cast(GV);
+
+ constexpr
@@ -932,9 +932,18 @@ def W_Joined : Joined<["-"], "W">, Group,
def Xanalyzer : Separate<["-"], "Xanalyzer">,
HelpText<"Pass to the static analyzer">, MetaVarName<"">,
Group;
-def Xarch__ : JoinedAndSeparate<["-"], "Xarch_">, Flags<[NoXarchOption]>,
- HelpText<"Pass to th
@@ -5059,6 +5092,18 @@ bool static upgradeSingleNVVMAnnotation(GlobalValue *GV,
StringRef K,
cast(GV)->addFnAttr("nvvm.maxnreg", llvm::utostr(CV));
return true;
}
+ if (K.consume_front("maxntid") && (K == "x" || K == "y" || K == "z")) {
Artem-B wrot
https://github.com/Artem-B edited
https://github.com/llvm/llvm-project/pull/127890
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -932,9 +932,18 @@ def W_Joined : Joined<["-"], "W">, Group,
def Xanalyzer : Separate<["-"], "Xanalyzer">,
HelpText<"Pass to the static analyzer">, MetaVarName<"">,
Group;
-def Xarch__ : JoinedAndSeparate<["-"], "Xarch_">, Flags<[NoXarchOption]>,
- HelpText<"Pass to th
https://github.com/Artem-B edited
https://github.com/llvm/llvm-project/pull/127890
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -196,6 +198,36 @@ static std::optional getFnAttrParsedInt(const
Function &F,
: std::nullopt;
}
+static SmallVector getFnAttrParsedVector(const Function &F,
+ StringRef Attr) {
+ SmallVector V;
+ auto &Ctx
@@ -5021,6 +5024,36 @@ bool llvm::UpgradeDebugInfo(Module &M) {
return Modified;
}
+static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
+GlobalValue *GV, const Metadata *V) {
+ Function *F = cast(GV);
+
+ constexpr
Artem-B wrote:
> > I'd vote for fixing the CUDA on Arm case that failed in the meantime, then
> > make a decision as to whether or not we should go back to `__has_builtin`
> > only returning the current compilation target once that's gone.
>
> +1, if we can get away with it.
+1 to that.
http
@@ -27,6 +27,8 @@
extern "C" {
#endif
+#if !defined(__CUDA_ARCH__)
+
Artem-B wrote:
Unfortunately, this will be observable to any code that happens to need those
builtins, and they are actually not provided by the compiler (i.e.
`__has_builtin(__wfi)` would
@@ -27,6 +27,8 @@
extern "C" {
#endif
+#if !defined(__CUDA_ARCH__)
+
Artem-B wrote:
I'm actually wondering if the header needs `__has_builtin()`... They are coming
from the same compiler, and, if they are missing, they are declared in terms of
`__builtin*()
@@ -27,6 +27,8 @@
extern "C" {
#endif
+#if !defined(__CUDA_ARCH__)
+
Artem-B wrote:
```
#if __CUDA_ARCH__
// Make sure the declarations here are in sync with the functions provided
under #else.
#else
#endif
```
https://github.com/llvm/llvm-project/pull/1
Artem-B wrote:
Would it make sense to separate into separate patches deferred diag fix (1a/1b
on your list) from inference of destructor attributes?
Deferred diags fix is straightforward, but destructor attribute inference may
need a longer discussion.
https://github.com/llvm/llvm-project/pul
https://github.com/Artem-B approved this pull request.
https://github.com/llvm/llvm-project/pull/133590
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -0,0 +1,3329 @@
+// RUN: %clang_cc1 -triple nvptx-unknown-unknown -fcuda-is-device -O3 -o - %s
-emit-llvm | FileCheck %s
+// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -fcuda-is-device -O3 -o -
%s -emit-llvm | FileCheck %s
+#include "../Headers/Inputs/include/cuda.h"
---
@@ -596,6 +605,28 @@ def __nvvm_e4m3x2_to_f16x2_rn_relu :
NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(sh
def __nvvm_e5m2x2_to_f16x2_rn : NVPTXBuiltinSMAndPTX<"_Vector<2,
__fp16>(short)", SM_89, PTX81>;
def __nvvm_e5m2x2_to_f16x2_rn_relu : NVPTXBuiltinSMAndPTX<"_Vector<2,
__fp16>
@@ -1021,6 +1036,174 @@ __device__ void nvvm_cvt_sm89() {
__nvvm_e5m2x2_to_f16x2_rn(0x4c4c);
// CHECK_PTX81_SM89: call <2 x half> @llvm.nvvm.e5m2x2.to.f16x2.rn.relu(i16
19532)
__nvvm_e5m2x2_to_f16x2_rn_relu(0x4c4c);
+
+ // CHECK_PTX81_SM89: call i32 @llvm.nvvm.f2tf32.rn
@@ -703,6 +703,53 @@ let hasSideEffects = false in {
defm CVT_to_tf32_rz_satf : CVT_TO_TF32<"rz.satfinite", [hasPTX<86>,
hasSM<100>]>;
defm CVT_to_tf32_rn_relu_satf : CVT_TO_TF32<"rn.relu.satfinite",
[hasPTX<86>, hasSM<100>]>;
defm CVT_to_tf32_rz_relu_satf : CVT_TO_TF
@@ -580,6 +580,15 @@ def __nvvm_f2bf16_rz :
NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX70>;
def __nvvm_f2bf16_rz_relu : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80,
PTX70>;
def __nvvm_f2tf32_rna : NVPTXBuiltinSMAndPTX<"int32_t(float)", SM_80, PTX70>;
+def __nvvm_f2tf32_
https://github.com/Artem-B approved this pull request.
https://github.com/llvm/llvm-project/pull/134459
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/Artem-B approved this pull request.
https://github.com/llvm/llvm-project/pull/139244
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Artem-B wrote:
@jhuber6 @ldionne One concern I have for this change is that it will break
folks who will use older libc++ with the new Clang + wrapper headers.
Is older libc++ expected to work with non-matching clang version? If the
expectation is that libc++ and clang are from the same versio
https://github.com/Artem-B closed
https://github.com/llvm/llvm-project/pull/139164
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Artem-B wrote:
> Right now this checks for `libc++` less than 14. Is that still relevant
> following that change?
That's a very good point. Looks like those `__constexpr_fmin/fmax` are gone now
and we do not heed them any more.
https://github.com/llvm/llvm-project/pull/139164
https://github.com/Artem-B updated
https://github.com/llvm/llvm-project/pull/139164
>From a1d60feed11174b9d2106b57ee15ff6d9bc56fa4 Mon Sep 17 00:00:00 2001
From: Artem Belevich
Date: Thu, 8 May 2025 14:43:47 -0700
Subject: [PATCH] [CUDA] remove obsolete GPU-side __constexpr* wrappers
libc++ no
https://github.com/Artem-B edited
https://github.com/llvm/llvm-project/pull/139164
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Artem-B wrote:
No wrappers -- no problems. :-)
https://github.com/llvm/llvm-project/pull/139164
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/Artem-B approved this pull request.
https://github.com/llvm/llvm-project/pull/138162
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Artem-B wrote:
@cgmb
> I would suggest that we should either (a) change the default GPU target to
> native and make the failure to detect the user’s GPU into a hard compiler
> error, or (b) change the default GPU target to SPIR-V so that it works on
> every machine.
The thing is that the se
Artem-B wrote:
@jhuber6 do you think can we use `native` instead? I think it would be a
somewhat better option here.
If we have to choose a GPU variant by default, we may as well choose the actual
GPU, rather than a conditional choice between generic SPIR-V or an old GPU,
which has the disadva
@@ -1399,19 +1399,27 @@ void NVPTXAsmPrinter::emitFunctionParamList(const
Function *F, raw_ostream &O) {
if (PTy) {
O << "\t.param .u" << PTySizeInBits << " .ptr";
+bool IsCUDA = static_cast(TM).getDrvInterface()
==
+ NVPTX::CUDA;
@@ -2927,6 +2928,20 @@ void Verifier::visitFunction(const Function &F) {
"Calling convention does not support varargs or "
"perfect forwarding!",
&F);
+if (F.getCallingConv() == CallingConv::PTX_Kernel &&
+TT.getOS() == Triple::CUDA) {
https://github.com/Artem-B edited
https://github.com/llvm/llvm-project/pull/138706
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -170,6 +170,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public
TargetInfo {
Opts["cl_khr_global_int32_extended_atomics"] = true;
Opts["cl_khr_local_int32_base_atomics"] = true;
Opts["cl_khr_local_int32_extended_atomics"] = true;
+
+Opts["__opencl_c_
@@ -1349,6 +1349,10 @@ static bool upgradeIntrinsicFunction1(Function *F,
Function *&NewFn,
else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f" ||
Name == "swap.lo.hi.b64")
Expand = true;
+ else if (Name == "barrier0" || Name == "b
https://github.com/Artem-B edited
https://github.com/llvm/llvm-project/pull/141278
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -177,6 +177,7 @@ let Attributes = [NoReturn] in {
}
let Attributes = [NoThrow] in {
def __nvvm_nanosleep : NVPTXBuiltinSMAndPTX<"void(unsigned int)", SM_70,
PTX63>;
+ def __nvvm_pm_event_mask : NVPTXBuiltin<"void(unsigned short)">;
Artem-B wrote:
The ar
https://github.com/Artem-B approved this pull request.
Builtin signature needs a fix, but LGTM otherwise.
https://github.com/llvm/llvm-project/pull/141278
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/li
https://github.com/Artem-B approved this pull request.
https://github.com/llvm/llvm-project/pull/141036
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/Artem-B commented:
Being able to override a flag is a good thing to have, IMO. There are builds
where the owner of the leaf targets do not have much control over which options
are set by the "default" compilation, so they need to rely on being able to
override preceding opti
https://github.com/Artem-B edited
https://github.com/llvm/llvm-project/pull/140106
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -5734,6 +5734,9 @@ def nobuiltininc : Flag<["-"], "nobuiltininc">,
def nogpuinc : Flag<["-"], "nogpuinc">, Group,
HelpText<"Do not add include paths for CUDA/HIP and"
" do not include the default CUDA/HIP wrapper headers">;
+def gpuinc : Flag<["-"], "gpuinc">, Group,
+
@@ -5734,6 +5734,9 @@ def nobuiltininc : Flag<["-"], "nobuiltininc">,
def nogpuinc : Flag<["-"], "nogpuinc">, Group,
HelpText<"Do not add include paths for CUDA/HIP and"
" do not include the default CUDA/HIP wrapper headers">;
+def gpuinc : Flag<["-"], "gpuinc">, Group,
+
https://github.com/Artem-B approved this pull request.
https://github.com/llvm/llvm-project/pull/141143
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
1201 - 1269 of 1269 matches
Mail list logo