tra created this revision.
tra added a reviewer: jlebar.
Herald added subscribers: sanjoy.google, bixia, yaxunl.
Herald added a project: clang.
tra requested review of this revision.
This is needed to compile some headers in CUDA-11 that assume that threadIdx is
implicitly convertible to dim3. With NVCC, threadIdx is uint3 and there's
dim3(uint3) constructor, so that works. Clang uses a special type for the
threadIdx,
so dim3 can't be constructed from it. Instead, this patch adds conversion
function to the
builtin variable classes.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D88250
Files:
clang/lib/Headers/__clang_cuda_builtin_vars.h
clang/lib/Headers/__clang_cuda_runtime_wrapper.h
Index: clang/lib/Headers/__clang_cuda_runtime_wrapper.h
===================================================================
--- clang/lib/Headers/__clang_cuda_runtime_wrapper.h
+++ clang/lib/Headers/__clang_cuda_runtime_wrapper.h
@@ -377,28 +377,36 @@
// Out-of-line implementations from __clang_cuda_builtin_vars.h. These need to
// come after we've pulled in the definition of uint3 and dim3.
+__device__ inline __cuda_builtin_threadIdx_t::operator dim3() const {
+ return {x, y, z};
+}
+
__device__ inline __cuda_builtin_threadIdx_t::operator uint3() const {
- uint3 ret;
- ret.x = x;
- ret.y = y;
- ret.z = z;
- return ret;
+ return {x, y, z};
+}
+
+__device__ inline __cuda_builtin_blockIdx_t::operator dim3() const {
+ return {x, y, z};
}
__device__ inline __cuda_builtin_blockIdx_t::operator uint3() const {
- uint3 ret;
- ret.x = x;
- ret.y = y;
- ret.z = z;
- return ret;
+ return {x, y, z};
}
__device__ inline __cuda_builtin_blockDim_t::operator dim3() const {
- return dim3(x, y, z);
+ return {x, y, z};
+}
+
+__device__ inline __cuda_builtin_blockDim_t::operator uint3() const {
+ return {x, y, z};
}
__device__ inline __cuda_builtin_gridDim_t::operator dim3() const {
- return dim3(x, y, z);
+ return {x, y, z};
+}
+
+__device__ inline __cuda_builtin_gridDim_t::operator uint3() const {
+ return {x, y, z};
}
#include <__clang_cuda_cmath.h>
Index: clang/lib/Headers/__clang_cuda_builtin_vars.h
===================================================================
--- clang/lib/Headers/__clang_cuda_builtin_vars.h
+++ clang/lib/Headers/__clang_cuda_builtin_vars.h
@@ -55,7 +55,9 @@
__CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z());
// threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
// uint3). This function is defined after we pull in vector_types.h.
+ __attribute__((device)) operator dim3() const;
__attribute__((device)) operator uint3() const;
+
private:
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t);
};
@@ -66,7 +68,9 @@
__CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z());
// blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a
// uint3). This function is defined after we pull in vector_types.h.
+ __attribute__((device)) operator dim3() const;
__attribute__((device)) operator uint3() const;
+
private:
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t);
};
@@ -78,6 +82,8 @@
// blockDim should be convertible to dim3 (in fact in nvcc, it *is* a
// dim3). This function is defined after we pull in vector_types.h.
__attribute__((device)) operator dim3() const;
+ __attribute__((device)) operator uint3() const;
+
private:
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t);
};
@@ -89,6 +95,8 @@
// gridDim should be convertible to dim3 (in fact in nvcc, it *is* a
// dim3). This function is defined after we pull in vector_types.h.
__attribute__((device)) operator dim3() const;
+ __attribute__((device)) operator uint3() const;
+
private:
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t);
};
@@ -108,5 +116,6 @@
#undef __CUDA_DEVICE_BUILTIN
#undef __CUDA_BUILTIN_VAR
#undef __CUDA_DISALLOW_BUILTINVAR_ACCESS
+#undef __DELETE
#endif /* __CUDA_BUILTIN_VARS_H */
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits