https://github.com/clementval created https://github.com/llvm/llvm-project/pull/101216
Add allocators for CUDA fortran allocation on the device. 3 allocators are added for pinned, device and managed/unified memory allocation. `CUFRegisterAllocator()` is called to register the allocators in the allocator registry added in #100690. Since this require CUDA, a cmake option `FLANG_CUF_RUNTIME` is added to conditionally build these. >From 825e6efbbe20041b2b1591617f32abc12a0b42ff Mon Sep 17 00:00:00 2001 From: Valentin Clement <clement...@gmail.com> Date: Fri, 12 Jul 2024 15:20:12 -0700 Subject: [PATCH] [flang][cuda] Add CUF allocator --- flang/CMakeLists.txt | 7 ++ flang/include/flang/Runtime/CUDA/allocator.h | 43 +++++++++ flang/runtime/CMakeLists.txt | 3 + flang/runtime/CUDA/CMakeLists.txt | 18 ++++ flang/runtime/CUDA/allocator.cpp | 62 +++++++++++++ flang/unittests/Runtime/CMakeLists.txt | 2 + flang/unittests/Runtime/CUDA/AllocatorCUF.cpp | 87 +++++++++++++++++++ flang/unittests/Runtime/CUDA/CMakeLists.txt | 15 ++++ 8 files changed, 237 insertions(+) create mode 100644 flang/include/flang/Runtime/CUDA/allocator.h create mode 100644 flang/runtime/CUDA/CMakeLists.txt create mode 100644 flang/runtime/CUDA/allocator.cpp create mode 100644 flang/unittests/Runtime/CUDA/AllocatorCUF.cpp create mode 100644 flang/unittests/Runtime/CUDA/CMakeLists.txt diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index 070c39eb6e9ab..971e5d5c93f23 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -461,6 +461,13 @@ option(FLANG_BUILD_TOOLS if (FLANG_BUILD_TOOLS) add_subdirectory(tools) endif() + +option(FLANG_CUF_RUNTIME + "Compile CUDA Fortran runtime sources" OFF) +if (FLANG_CUF_RUNTIME) + find_package(CUDAToolkit REQUIRED) +endif() + add_subdirectory(runtime) if (LLVM_INCLUDE_EXAMPLES) diff --git a/flang/include/flang/Runtime/CUDA/allocator.h b/flang/include/flang/Runtime/CUDA/allocator.h new file mode 100644 index 0000000000000..0738d1e3a8bf3 --- /dev/null +++ b/flang/include/flang/Runtime/CUDA/allocator.h @@ -0,0 +1,43 @@ +//===-- include/flang/Runtime/CUDA/allocator.h ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_ +#define FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_ + +#include "flang/Runtime/descriptor.h" + +static constexpr unsigned kPinnedAllocatorPos = 1; +static constexpr unsigned kDeviceAllocatorPos = 2; +static constexpr unsigned kManagedAllocatorPos = 3; + +#define CUDA_REPORT_IF_ERROR(expr) \ + [](CUresult result) { \ + if (!result) \ + return; \ + const char *name = nullptr; \ + cuGetErrorName(result, &name); \ + if (!name) \ + name = "<unknown>"; \ + fprintf(stderr, "'%s' failed with '%s'\n", #expr, name); \ + }(expr) + +namespace Fortran::runtime::cuf { + +void CUFRegisterAllocator(); + +void *CUFAllocPinned(std::size_t); +void CUFFreePinned(void *); + +void *CUFAllocDevice(std::size_t); +void CUFFreeDevice(void *); + +void *CUFAllocManaged(std::size_t); +void CUFFreeManaged(void *); + +} // namespace Fortran::runtime::cuf +#endif // FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_ diff --git a/flang/runtime/CMakeLists.txt b/flang/runtime/CMakeLists.txt index 1f3ae23dcbf12..4537b2d059d65 100644 --- a/flang/runtime/CMakeLists.txt +++ b/flang/runtime/CMakeLists.txt @@ -309,3 +309,6 @@ if (TARGET flang-new AND TARGET module_files) add_dependencies(FortranRuntime flang-new module_files) endif() +if (FLANG_CUF_RUNTIME) + add_subdirectory(CUDA) +endif() diff --git a/flang/runtime/CUDA/CMakeLists.txt b/flang/runtime/CUDA/CMakeLists.txt new file mode 100644 index 0000000000000..e963b6062abc4 --- /dev/null +++ b/flang/runtime/CUDA/CMakeLists.txt @@ -0,0 +1,18 @@ +#===-- runtime/CUDA/CMakeLists.txt -----------------------------------------===# +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +#===------------------------------------------------------------------------===# + +include_directories(${CUDAToolkit_INCLUDE_DIRS}) +find_library(CUDA_RUNTIME_LIBRARY cuda HINTS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} REQUIRED) + +add_flang_library(CufRuntime + allocator.cpp +) +target_link_libraries(CufRuntime +PRIVATE +${CUDA_RUNTIME_LIBRARY} +) diff --git a/flang/runtime/CUDA/allocator.cpp b/flang/runtime/CUDA/allocator.cpp new file mode 100644 index 0000000000000..3c913e344335b --- /dev/null +++ b/flang/runtime/CUDA/allocator.cpp @@ -0,0 +1,62 @@ +//===-- runtime/CUDA/allocator.cpp ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Runtime/CUDA/allocator.h" +#include "../allocator-registry.h" +#include "../derived.h" +#include "../stat.h" +#include "../terminator.h" +#include "../type-info.h" +#include "flang/Common/Fortran.h" +#include "flang/ISO_Fortran_binding_wrapper.h" + +#include "cuda.h" + +namespace Fortran::runtime::cuf { + +void CUFRegisterAllocator() { + allocatorRegistry.Register( + kPinnedAllocatorPos, {&CUFAllocPinned, CUFFreePinned}); + allocatorRegistry.Register( + kDeviceAllocatorPos, {&CUFAllocDevice, CUFFreeDevice}); + allocatorRegistry.Register( + kManagedAllocatorPos, {&CUFAllocManaged, CUFFreeManaged}); +} + +void *CUFAllocPinned(std::size_t sizeInBytes) { + void *p; + CUDA_REPORT_IF_ERROR(cuMemAllocHost(&p, sizeInBytes)); + return p; +} + +void CUFFreePinned(void *p) { + CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p))); +} + +void *CUFAllocDevice(std::size_t sizeInBytes) { + CUdeviceptr p = 0; + CUDA_REPORT_IF_ERROR(cuMemAlloc(&p, sizeInBytes)); + return reinterpret_cast<void *>(p); +} + +void CUFFreeDevice(void *p) { + CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p))); +} + +void *CUFAllocManaged(std::size_t sizeInBytes) { + CUdeviceptr p = 0; + CUDA_REPORT_IF_ERROR( + cuMemAllocManaged(&p, sizeInBytes, CU_MEM_ATTACH_GLOBAL)); + return reinterpret_cast<void *>(p); +} + +void CUFFreeManaged(void *p) { + CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p))); +} + +} // namespace Fortran::runtime::cuf diff --git a/flang/unittests/Runtime/CMakeLists.txt b/flang/unittests/Runtime/CMakeLists.txt index ed047b08ada35..2c3f8c1a9e9ac 100644 --- a/flang/unittests/Runtime/CMakeLists.txt +++ b/flang/unittests/Runtime/CMakeLists.txt @@ -35,3 +35,5 @@ target_link_libraries(FlangRuntimeTests PRIVATE FortranRuntime ) + +add_subdirectory(CUDA) diff --git a/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp new file mode 100644 index 0000000000000..204826d3f2a96 --- /dev/null +++ b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp @@ -0,0 +1,87 @@ +//===-- flang/unittests/Runtime/AllocatableCUF.cpp ---------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "gtest/gtest.h" +#include "flang/Common/Fortran.h" +#include "flang/Runtime/CUDA/allocator.h" +#include "flang/Runtime/allocatable.h" + +#include "cuda.h" + +using namespace Fortran::runtime; + +static OwningPtr<Descriptor> createAllocatable( + Fortran::common::TypeCategory tc, int kind, int rank = 1) { + return Descriptor::Create(TypeCode{tc, kind}, kind, nullptr, rank, nullptr, + CFI_attribute_allocatable); +} + +thread_local static int32_t defaultDevice = 0; + +CUdevice getDefaultCuDevice() { + CUdevice device; + CUDA_REPORT_IF_ERROR(cuDeviceGet(&device, /*ordinal=*/defaultDevice)); + return device; +} + +class ScopedContext { +public: + ScopedContext() { + // Static reference to CUDA primary context for device ordinal + // defaultDevice. + static CUcontext context = [] { + CUDA_REPORT_IF_ERROR(cuInit(/*flags=*/0)); + CUcontext ctx; + // Note: this does not affect the current context. + CUDA_REPORT_IF_ERROR( + cuDevicePrimaryCtxRetain(&ctx, getDefaultCuDevice())); + return ctx; + }(); + + CUDA_REPORT_IF_ERROR(cuCtxPushCurrent(context)); + } + + ~ScopedContext() { CUDA_REPORT_IF_ERROR(cuCtxPopCurrent(nullptr)); } +}; + +TEST(AllocatableCUFTest, SimpleDeviceAllocate) { + using Fortran::common::TypeCategory; + Fortran::runtime::cuf::CUFRegisterAllocator(); + ScopedContext ctx; + // REAL(4), DEVICE, ALLOCATABLE :: a(:) + auto a{createAllocatable(TypeCategory::Real, 4)}; + a->raw().SetAllocIdx(kDeviceAllocatorPos); + EXPECT_EQ((int)kDeviceAllocatorPos, a->raw().GetAllocIdx()); + EXPECT_FALSE(a->raw().HasAddendum()); + RTNAME(AllocatableSetBounds)(*a, 0, 1, 10); + RTNAME(AllocatableAllocate) + (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__); + EXPECT_TRUE(a->IsAllocated()); + RTNAME(AllocatableDeallocate) + (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__); + EXPECT_FALSE(a->IsAllocated()); +} + +TEST(AllocatableCUFTest, SimplePinnedAllocate) { + using Fortran::common::TypeCategory; + Fortran::runtime::cuf::CUFRegisterAllocator(); + ScopedContext ctx; + // INTEGER(4), PINNED, ALLOCATABLE :: a(:) + auto a{createAllocatable(TypeCategory::Integer, 4)}; + EXPECT_FALSE(a->raw().HasAddendum()); + a->raw().SetAllocIdx(kPinnedAllocatorPos); + EXPECT_EQ((int)kPinnedAllocatorPos, a->raw().GetAllocIdx()); + EXPECT_FALSE(a->raw().HasAddendum()); + RTNAME(AllocatableSetBounds)(*a, 0, 1, 10); + RTNAME(AllocatableAllocate) + (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__); + EXPECT_TRUE(a->IsAllocated()); + RTNAME(AllocatableDeallocate) + (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__); + EXPECT_FALSE(a->IsAllocated()); +} diff --git a/flang/unittests/Runtime/CUDA/CMakeLists.txt b/flang/unittests/Runtime/CUDA/CMakeLists.txt new file mode 100644 index 0000000000000..14b5c788719b8 --- /dev/null +++ b/flang/unittests/Runtime/CUDA/CMakeLists.txt @@ -0,0 +1,15 @@ +if (FLANG_CUF_RUNTIME) + +add_flang_unittest(FlangCufRuntimeTests + AllocatorCUF.cpp +) + +target_link_libraries(FlangCufRuntimeTests + PRIVATE + CufRuntime + FortranRuntime +) + +target_include_directories(FlangCufRuntimeTests PRIVATE ${CUDAToolkit_INCLUDE_DIRS}) + +endif() _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits