================ @@ -0,0 +1,86 @@ +//===-- gpuintrin.h - Generic GPU intrinsic functions ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Provides wrappers around the clang builtins for accessing GPU hardware +// features. The interface is intended to be portable between architectures, but +// some targets may provide different implementations. This header can be +// included for all the common GPU programming languages, namely OpenMP, HIP, +// CUDA, and OpenCL. +// +//===----------------------------------------------------------------------===// + +#ifndef __GPUINTRIN_H +#define __GPUINTRIN_H + +#if defined(__NVPTX__) +#include <nvptxintrin.h> +#elif defined(__AMDGPU__) +#include <amdgpuintrin.h> +#else +#error "This header is only meant to be used on GPU architectures." +#endif + +// Returns the total number of blocks / workgroups. +_DEFAULT_ATTRS static inline uint64_t __gpu_num_blocks() { + return __gpu_num_blocks_x() * __gpu_num_blocks_y() * __gpu_num_blocks_z(); +} + +// Returns the absolute id of the block / workgroup. +_DEFAULT_ATTRS static inline uint64_t __gpu_block_id() { + return __gpu_block_id_x() + + (uint64_t)__gpu_num_blocks_x() * __gpu_block_id_y() + + (uint64_t)__gpu_num_blocks_x() * __gpu_num_blocks_y() * + __gpu_block_id_z(); +} + +// Returns the total number of threads in the block / workgroup. +_DEFAULT_ATTRS static inline uint32_t __gpu_num_threads() { ---------------- jdoerfert wrote:
I still don't like the mix of 32 and 64 bit types. I think for users 64 everywhere is much nicer, and it avoids the explicit casts above. https://github.com/llvm/llvm-project/pull/110179 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits