tra created this revision. tra added a reviewer: timshen. Herald added subscribers: sanjoy.google, bixia.
vote.ballot instruction is gone in recent CUDA versions and vote.sync.ballot can not be used because it needs a thread mask parameter. Fortunately PTX 6.2 (introduced with CUDA-9.2) provides activemask.b32 instruction for this. https://reviews.llvm.org/D66665 Files: clang/lib/Headers/__clang_cuda_intrinsics.h Index: clang/lib/Headers/__clang_cuda_intrinsics.h =================================================================== --- clang/lib/Headers/__clang_cuda_intrinsics.h +++ clang/lib/Headers/__clang_cuda_intrinsics.h @@ -211,7 +211,15 @@ return __nvvm_vote_ballot_sync(mask, pred); } -inline __device__ unsigned int __activemask() { return __nvvm_vote_ballot(1); } +inline __device__ unsigned int __activemask() { +#if CUDA_VERSION < 9020 + return __nvvm_vote_ballot(1); +#else + unsigned int mask; + asm volatile("activemask.b32 %0;" : "=r"(mask)); + return mask; +#endif +} inline __device__ unsigned int __fns(unsigned mask, unsigned base, int offset) { return __nvvm_fns(mask, base, offset);
Index: clang/lib/Headers/__clang_cuda_intrinsics.h =================================================================== --- clang/lib/Headers/__clang_cuda_intrinsics.h +++ clang/lib/Headers/__clang_cuda_intrinsics.h @@ -211,7 +211,15 @@ return __nvvm_vote_ballot_sync(mask, pred); } -inline __device__ unsigned int __activemask() { return __nvvm_vote_ballot(1); } +inline __device__ unsigned int __activemask() { +#if CUDA_VERSION < 9020 + return __nvvm_vote_ballot(1); +#else + unsigned int mask; + asm volatile("activemask.b32 %0;" : "=r"(mask)); + return mask; +#endif +} inline __device__ unsigned int __fns(unsigned mask, unsigned base, int offset) { return __nvvm_fns(mask, base, offset);
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits