https://github.com/clementval created https://github.com/llvm/llvm-project/pull/114302
Use the feature added in #114301 to perform data transfer between data having a descriptor. >From e4c7e31c77bbfda563e4e2c9b591fe2f5cb2c259 Mon Sep 17 00:00:00 2001 From: Valentin Clement <clement...@gmail.com> Date: Wed, 30 Oct 2024 11:53:12 -0700 Subject: [PATCH] [flang][cuda] Data transfer with descriptor --- flang/runtime/CUDA/memory.cpp | 34 +++++++++++++++++++-- flang/unittests/Runtime/CUDA/Memory.cpp | 40 +++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 2 deletions(-) diff --git a/flang/runtime/CUDA/memory.cpp b/flang/runtime/CUDA/memory.cpp index 4778a4ae77683f..f25d3b531c84f0 100644 --- a/flang/runtime/CUDA/memory.cpp +++ b/flang/runtime/CUDA/memory.cpp @@ -9,10 +9,32 @@ #include "flang/Runtime/CUDA/memory.h" #include "../terminator.h" #include "flang/Runtime/CUDA/common.h" +#include "flang/Runtime/assign.h" #include "cuda_runtime.h" namespace Fortran::runtime::cuda { +static void *MemmoveHostToDevice( + void *dst, const void *src, std::size_t count) { + // TODO: Use cudaMemcpyAsync when we have support for stream. + CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyHostToDevice)); + return dst; +} + +static void *MemmoveDeviceToHost( + void *dst, const void *src, std::size_t count) { + // TODO: Use cudaMemcpyAsync when we have support for stream. + CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyDeviceToHost)); + return dst; +} + +static void *MemmoveDeviceToDevice( + void *dst, const void *src, std::size_t count) { + // TODO: Use cudaMemcpyAsync when we have support for stream. + CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyHostToDevice)); + return dst; +} + extern "C" { void *RTDEF(CUFMemAlloc)( @@ -90,8 +112,16 @@ void RTDEF(CUFDataTransferPtrDesc)(void *addr, Descriptor *desc, void RTDECL(CUFDataTransferDescDesc)(Descriptor *dstDesc, Descriptor *srcDesc, unsigned mode, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; - terminator.Crash( - "not yet implemented: CUDA data transfer between two descriptors"); + MemmoveFct memmoveFct; + if (mode == kHostToDevice) { + memmoveFct = &MemmoveHostToDevice; + } else if (mode == kDeviceToHost) { + memmoveFct = &MemmoveDeviceToHost; + } else if (mode == kDeviceToDevice) { + memmoveFct = &MemmoveDeviceToDevice; + } + Fortran::runtime::Assign( + dstDesc, srcDesc, terminator, MaybeReallocate, memmoveFct); } } } // namespace Fortran::runtime::cuda diff --git a/flang/unittests/Runtime/CUDA/Memory.cpp b/flang/unittests/Runtime/CUDA/Memory.cpp index 157d3cdb531def..ade05e21b70a89 100644 --- a/flang/unittests/Runtime/CUDA/Memory.cpp +++ b/flang/unittests/Runtime/CUDA/Memory.cpp @@ -9,11 +9,17 @@ #include "flang/Runtime/CUDA/memory.h" #include "gtest/gtest.h" #include "../../../runtime/terminator.h" +#include "../tools.h" #include "flang/Common/Fortran.h" +#include "flang/Runtime/CUDA/allocator.h" #include "flang/Runtime/CUDA/common.h" +#include "flang/Runtime/CUDA/descriptor.h" +#include "flang/Runtime/allocatable.h" +#include "flang/Runtime/allocator-registry.h" #include "cuda_runtime.h" +using namespace Fortran::runtime; using namespace Fortran::runtime::cuda; TEST(MemoryCUFTest, SimpleAllocTramsferFree) { @@ -29,3 +35,37 @@ TEST(MemoryCUFTest, SimpleAllocTramsferFree) { EXPECT_EQ(42, host); RTNAME(CUFMemFree)((void *)dev, kMemTypeDevice, __FILE__, __LINE__); } + +static OwningPtr<Descriptor> createAllocatable( + Fortran::common::TypeCategory tc, int kind, int rank = 1) { + return Descriptor::Create(TypeCode{tc, kind}, kind, nullptr, rank, nullptr, + CFI_attribute_allocatable); +} + +TEST(MemoryCUFTest, CUFDataTransferDescDesc) { + using Fortran::common::TypeCategory; + RTNAME(CUFRegisterAllocator)(); + // INTEGER(4), DEVICE, ALLOCATABLE :: a(:) + auto dev{createAllocatable(TypeCategory::Integer, 4)}; + dev->SetAllocIdx(kDeviceAllocatorPos); + EXPECT_EQ((int)kDeviceAllocatorPos, dev->GetAllocIdx()); + RTNAME(AllocatableSetBounds)(*dev, 0, 1, 10); + RTNAME(AllocatableAllocate) + (*dev, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__); + EXPECT_TRUE(dev->IsAllocated()); + + // Create temp array to transfer to device. + auto x{MakeArray<TypeCategory::Integer, 4>(std::vector<int>{10}, + std::vector<int32_t>{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})}; + RTNAME(CUFDataTransferDescDesc)(*dev, *x, kHostToDevice, __FILE__, __LINE__); + + // Retrieve data from device. + auto host{MakeArray<TypeCategory::Integer, 4>(std::vector<int>{10}, + std::vector<int32_t>{0, 0, 0, 0, 0, 0, 0, 0, 0, 0})}; + RTNAME(CUFDataTransferDescDesc)( + *host, *dev, kDeviceToHost, __FILE__, __LINE__); + + for (unsigned i = 0; i < 10; ++i) { + EXPECT_EQ(*host->ZeroBasedIndexedElement<std::int32_t>(i), (std::int32_t)i); + } +} _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits