https://github.com/jhuber6 created https://github.com/llvm/llvm-project/pull/129927
Summary: These require `+ptx` features to be set even though they're guarded by the `__nvvm_reflect`. Rather than figure out how to hack around that with the `target` attribute I'm just going to disable it for 'generic' builds and use the slow version for now. >From ceba17a5a18b0a34cedfbb625c3d9ba28079fd15 Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Wed, 5 Mar 2025 14:05:11 -0600 Subject: [PATCH] [Clang] Fix 'gpuintrin.h' match when included with no arch set Summary: These require `+ptx` features to be set even though they're guarded by the `__nvvm_reflect`. Rather than figure out how to hack around that with the `target` attribute I'm just going to disable it for 'generic' builds and use the slow version for now. --- clang/lib/Headers/nvptxintrin.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/clang/lib/Headers/nvptxintrin.h b/clang/lib/Headers/nvptxintrin.h index 29d0adcabc82f..b2c3097a464fe 100644 --- a/clang/lib/Headers/nvptxintrin.h +++ b/clang/lib/Headers/nvptxintrin.h @@ -179,8 +179,10 @@ __gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x, _DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) { // Newer targets can use the dedicated CUDA support. - if (__CUDA_ARCH__ >= 700 || __nvvm_reflect("__CUDA_ARCH") >= 700) +#if __CUDA_ARCH__ >= 700 + if (__nvvm_reflect("__CUDA_ARCH") >= 700) return __nvvm_match_any_sync_i32(__lane_mask, __x); +#endif uint32_t __match_mask = 0; bool __done = 0; @@ -200,8 +202,10 @@ __gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) { _DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_match_any_u64(uint64_t __lane_mask, uint64_t __x) { // Newer targets can use the dedicated CUDA support. +#if __CUDA_ARCH__ >= 700 if (__CUDA_ARCH__ >= 700 || __nvvm_reflect("__CUDA_ARCH") >= 700) return __nvvm_match_any_sync_i64(__lane_mask, __x); +#endif uint64_t __match_mask = 0; @@ -223,9 +227,11 @@ __gpu_match_any_u64(uint64_t __lane_mask, uint64_t __x) { _DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_match_all_u32(uint64_t __lane_mask, uint32_t __x) { // Newer targets can use the dedicated CUDA support. +#if __CUDA_ARCH__ >= 700 int predicate; if (__CUDA_ARCH__ >= 700 || __nvvm_reflect("__CUDA_ARCH") >= 700) return __nvvm_match_all_sync_i32p(__lane_mask, __x, &predicate); +#endif uint32_t __first = __gpu_read_first_lane_u64(__lane_mask, __x); uint64_t __ballot = __gpu_ballot(__lane_mask, __x == __first); @@ -236,9 +242,11 @@ __gpu_match_all_u32(uint64_t __lane_mask, uint32_t __x) { _DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_match_all_u64(uint64_t __lane_mask, uint64_t __x) { // Newer targets can use the dedicated CUDA support. +#if __CUDA_ARCH__ >= 700 int predicate; if (__CUDA_ARCH__ >= 700 || __nvvm_reflect("__CUDA_ARCH") >= 700) return __nvvm_match_all_sync_i64p(__lane_mask, __x, &predicate); +#endif uint64_t __first = __gpu_read_first_lane_u64(__lane_mask, __x); uint64_t __ballot = __gpu_ballot(__lane_mask, __x == __first); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits