jz10 updated this revision to Diff 469772. jz10 added a comment. Thanks Johannes and Shilei
1. '385-387 are the same as in omp_target_memcpy_async. Can we also not duplicate those lines?' I put the common code (i.e.helper task creation) into another static function 2. 'In this code there are also various places with variables not named according to the style guide' fixed, please check if there's remaining issues 3. 'The problem with the int32 Flags I mentioned already. ' The flag variable was defined as 'kmp_int32', since its consumer '__kmpc_omp_target_task_alloc' needs 'kmp_int32' type as input. the type cast to 'kmp_tasking_flags_t' is to set the 'hidden_helper' bit. So it seems that there's no better option for us. Please let me know your suggestion. 4. "Can we put all KMP related code into a separate header" we used both kmp relevent data structure/types and APIs, so should I wrap all those relevant code into several tool functions and put them into separate header file? 5. 'aving a variable suffix with _ is generally not a good coding style' fixed CHANGES SINCE LAST ACTION https://reviews.llvm.org/D136103/new/ https://reviews.llvm.org/D136103 Files: openmp/libomptarget/include/interop.h openmp/libomptarget/src/api.cpp openmp/libomptarget/src/exports openmp/libomptarget/src/private.h
Index: openmp/libomptarget/src/private.h =================================================================== --- openmp/libomptarget/src/private.h +++ openmp/libomptarget/src/private.h @@ -98,7 +98,51 @@ * We maintain the same data structure for compatibility. */ typedef int kmp_int32; +typedef int64_t kmp_int64; typedef intptr_t kmp_intptr_t; + +typedef void *omp_depend_t; +struct kmp_task; +typedef kmp_int32 (*kmp_routine_entry_t)(kmp_int32, struct kmp_task *); +typedef struct kmp_task { + void *shareds; + kmp_routine_entry_t routine; + kmp_int32 part_id; +} kmp_task_t; + +typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ + /* Compiler flags */ /* Total compiler flags must be 16 bits */ + unsigned tiedness : 1; /* task is either tied (1) or untied (0) */ + unsigned final : 1; /* task is final(1) so execute immediately */ + unsigned merged_if0 : 1; /* no __kmpc_task_{begin/complete}_if0 calls in if0 + code path */ + unsigned destructors_thunk : 1; /* set if the compiler creates a thunk to + invoke destructors from the runtime */ + unsigned proxy : 1; /* task is a proxy task (it will be executed outside the + context of the RTL) */ + unsigned priority_specified : 1; /* set if the compiler provides priority + setting for the task */ + unsigned detachable : 1; /* 1 == can detach */ + unsigned hidden_helper : 1; /* 1 == hidden helper task */ + unsigned reserved : 8; /* reserved for compiler use */ + + /* Library flags */ /* Total library flags must be 16 bits */ + unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */ + unsigned task_serial : 1; // task is executed immediately (1) or deferred (0) + unsigned tasking_ser : 1; // all tasks in team are either executed immediately + // (1) or may be deferred (0) + unsigned team_serial : 1; // entire team is serial (1) [1 thread] or parallel + // (0) [>= 2 threads] + /* If either team_serial or tasking_ser is set, task team may be NULL */ + /* Task State Flags: */ + unsigned started : 1; /* 1==started, 0==not started */ + unsigned executing : 1; /* 1==executing, 0==not executing */ + unsigned complete : 1; /* 1==complete, 0==not complete */ + unsigned freed : 1; /* 1==freed, 0==allocated */ + unsigned native : 1; /* 1==gcc-compiled task, 0==intel */ + unsigned reserved31 : 7; /* reserved for library use */ +} kmp_tasking_flags_t; + // Compiler sends us this info: typedef struct kmp_depend_info { kmp_intptr_t base_addr; @@ -117,6 +161,88 @@ kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) __attribute__((weak)); + +kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, + kmp_int32 flags, size_t sizeof_kmp_task_t, + size_t sizeof_shareds, + kmp_routine_entry_t task_entry) + __attribute__((weak)); + +kmp_task_t * +__kmpc_omp_target_task_alloc(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags, + size_t sizeof_kmp_task_t, size_t sizeof_shareds, + kmp_routine_entry_t task_entry, + kmp_int64 device_id) __attribute__((weak)); + +kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *new_task, kmp_int32 ndeps, + kmp_depend_info_t *dep_list, + kmp_int32 ndeps_noalias, + kmp_depend_info_t *noalias_dep_list) + __attribute__((weak)); + +/** + * The argument set that is passed from asynchronous memory copy to block + * version of memory copy invoked in helper task + */ +struct TargetMemcpyArgsTy { + /** + * Common attribuutes + */ + void *Dst; + const void *Src; + int DstDevice; + int SrcDevice; + + /** + * The flag that denotes single dimensional or rectangle dimensional copy + */ + bool IsRectMemcpy; + + /** + * Arguments for single dimensional copy + */ + size_t Length; + size_t DstOffset; + size_t SrcOffset; + + /** + * Arguments for rectangle dimensional copy + */ + size_t ElementSize; + int NumDims; + const size_t *Volume; + const size_t *DstOffsets; + const size_t *SrcOffsets; + const size_t *DstDimensions; + const size_t *SrcDimensions; + + /** + * Constructor for single dimensional copy + */ + TargetMemcpyArgsTy(void *Dst, const void *Src, size_t Length, + size_t DstOffset, size_t SrcOffset, int DstDevice, + int SrcDevice) + : Dst(Dst), Src(Src), DstDevice(DstDevice), SrcDevice(SrcDevice), + IsRectMemcpy(false), Length(Length), DstOffset(DstOffset), + SrcOffset(SrcOffset), ElementSize(0), NumDims(0), Volume(0), + DstOffsets(0), SrcOffsets(0), DstDimensions(0), SrcDimensions(0){}; + + /** + * Constructor for rectangle dimensional copy + */ + TargetMemcpyArgsTy(void *Dst, const void *Src, size_t ElementSize, + int NumDims, const size_t *Volume, + const size_t *DstOffsets, const size_t *SrcOffsets, + const size_t *DstDimensions, const size_t *SrcDimensions, + int DstDevice, int SrcDevice) + : Dst(Dst), Src(Src), DstDevice(DstDevice), SrcDevice(SrcDevice), + IsRectMemcpy(true), Length(0), DstOffset(0), SrcOffset(0), + ElementSize(ElementSize), NumDims(NumDims), Volume(Volume), + DstOffsets(DstOffsets), SrcOffsets(SrcOffsets), + DstDimensions(DstDimensions), SrcDimensions(SrcDimensions){}; +}; + #ifdef __cplusplus } #endif Index: openmp/libomptarget/src/exports =================================================================== --- openmp/libomptarget/src/exports +++ openmp/libomptarget/src/exports @@ -38,6 +38,8 @@ omp_target_is_present; omp_target_memcpy; omp_target_memcpy_rect; + omp_target_memcpy_async; + omp_target_memcpy_rect_async; omp_target_associate_ptr; omp_target_disassociate_ptr; llvm_omp_target_alloc_host; Index: openmp/libomptarget/src/api.cpp =================================================================== --- openmp/libomptarget/src/api.cpp +++ openmp/libomptarget/src/api.cpp @@ -15,6 +15,8 @@ #include "private.h" #include "rtl.h" +#include "llvm/ADT/SmallVector.h" + #include <climits> #include <cstdlib> #include <cstring> @@ -200,6 +202,99 @@ return Rc; } +// The helper function that calls omp_target_memcpy or omp_target_memcpy_rect +static int __kmpc_target_memcpy_async_helper(kmp_int32 Gtid, kmp_task_t *Task) { + if (Task == nullptr) + return OFFLOAD_FAIL; + + TargetMemcpyArgsTy *Args = (TargetMemcpyArgsTy *)Task->shareds; + + if (Args == nullptr) + return OFFLOAD_FAIL; + + // Call blocked version + int Rc = OFFLOAD_SUCCESS; + if (Args->IsRectMemcpy) { + Rc = omp_target_memcpy_rect( + Args->Dst, Args->Src, Args->ElementSize, Args->NumDims, Args->Volume, + Args->DstOffsets, Args->SrcOffsets, Args->DstDimensions, + Args->SrcDimensions, Args->DstDevice, Args->SrcDevice); + + DP("omp_target_memcpy_rect returns %d\n", Rc); + } else { + Rc = omp_target_memcpy(Args->Dst, Args->Src, Args->Length, Args->DstOffset, + Args->SrcOffset, Args->DstDevice, Args->SrcDevice); + + DP("omp_target_memcpy returns %d\n", Rc); + } + + // Release the arguments object + delete Args; + + return Rc; +} + +// Allocate helper task +static kmp_task_t *__kmpc_helper_task_alloc(kmp_int32 Gtid) { + // Create task + int (*Fn)(kmp_int32, kmp_task_t *) = &__kmpc_target_memcpy_async_helper; + + // Setup the hidden helper flags; + kmp_int32 Flags = 0; + kmp_tasking_flags_t *InputFlags = (kmp_tasking_flags_t *)&Flags; + InputFlags->hidden_helper = 1; + + // Alloc helper task + kmp_task_t *Ptr = __kmpc_omp_target_task_alloc(nullptr, Gtid, Flags, + sizeof(kmp_task_t), 0, Fn, -1); + + return Ptr; +} + +EXTERN int omp_target_memcpy_async(void *Dst, const void *Src, size_t Length, + size_t DstOffset, size_t SrcOffset, + int DstDevice, int SrcDevice, + int DepObjCount, omp_depend_t *DepObjList) { + TIMESCOPE(); + DP("Call to omp_target_memcpy_async, dst device %d, src device %d, " + "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, " + "src offset %zu, length %zu\n", + DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DstOffset, SrcOffset, + Length); + + // Check the source and dest address + if (Dst == nullptr || Src == nullptr) + return OFFLOAD_FAIL; + + // Create helper task + int Gtid = __kmpc_global_thread_num(nullptr); + kmp_task_t *Ptr = __kmpc_helper_task_alloc(Gtid); + if (Ptr == nullptr) + return OFFLOAD_FAIL; + + // Create task object + TargetMemcpyArgsTy *Args = new TargetMemcpyArgsTy( + Dst, Src, Length, DstOffset, SrcOffset, DstDevice, SrcDevice); + Ptr->shareds = Args; + + // Convert the type of depend objects + llvm::SmallVector<kmp_depend_info_t> DepObjs; + if (DepObjCount > 0) { + for (int i = 0; i < DepObjCount; i++) { + omp_depend_t DepObj = DepObjList[i]; + DepObjs.push_back(*((kmp_depend_info_t *)DepObj)); + } + } + + int Rc = OFFLOAD_SUCCESS; + + Rc = __kmpc_omp_task_with_deps(nullptr, Gtid, Ptr, DepObjCount, + DepObjs.data(), 0, nullptr); + + DP("omp_target_memcpy_async returns %d\n", Rc); + return Rc; +} + EXTERN int omp_target_memcpy_rect(void *Dst, const void *Src, size_t ElementSize, int NumDims, const size_t *Volume, @@ -260,6 +355,53 @@ return Rc; } +EXTERN int omp_target_memcpy_rect_async( + void *Dst, const void *Src, size_t ElementSize, int NumDims, + const size_t *Volume, const size_t *DstOffsets, const size_t *SrcOffsets, + const size_t *DstDimensions, const size_t *SrcDimensions, int DstDevice, + int SrcDevice, int DepObjCount, omp_depend_t *DepObjList) { + TIMESCOPE(); + DP("Call to omp_target_memcpy_rect_async, dst device %d, src device %d, " + "dst addr " DPxMOD ", src addr " DPxMOD ", dst offsets " DPxMOD ", " + "src offsets " DPxMOD ", dst dims " DPxMOD ", src dims " DPxMOD ", " + "volume " DPxMOD ", element size %zu, num_dims %d\n", + DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DPxPTR(DstOffsets), + DPxPTR(SrcOffsets), DPxPTR(DstDimensions), DPxPTR(SrcDimensions), + DPxPTR(Volume), ElementSize, NumDims); + + // Check the source and dest address + if (Dst == nullptr || Src == nullptr) + return OFFLOAD_FAIL; + + // Create helper task + int Gtid = __kmpc_global_thread_num(nullptr); + kmp_task_t *Ptr = __kmpc_helper_task_alloc(Gtid); + if (Ptr == nullptr) + return OFFLOAD_FAIL; + + // Create task object + TargetMemcpyArgsTy *Args = new TargetMemcpyArgsTy( + Dst, Src, ElementSize, NumDims, Volume, DstOffsets, SrcOffsets, + DstDimensions, SrcDimensions, DstDevice, SrcDevice); + Ptr->shareds = Args; + + // Convert the type of depend objects + llvm::SmallVector<kmp_depend_info_t> DepObjs; + if (DepObjCount > 0) { + for (int i = 0; i < DepObjCount; i++) { + omp_depend_t DepObj = DepObjList[i]; + DepObjs.push_back(*((kmp_depend_info_t *)DepObj)); + } + } + + int Rc = OFFLOAD_SUCCESS; + Rc = __kmpc_omp_task_with_deps(nullptr, Gtid, Ptr, DepObjCount, + DepObjs.data(), 0, nullptr); + + DP("omp_target_memcpy_rect_async returns %d\n", Rc); + return Rc; +} + EXTERN int omp_target_associate_ptr(const void *HostPtr, const void *DevicePtr, size_t Size, size_t DeviceOffset, int DeviceNum) { Index: openmp/libomptarget/include/interop.h =================================================================== --- openmp/libomptarget/include/interop.h +++ openmp/libomptarget/include/interop.h @@ -116,30 +116,6 @@ extern const char *__KAI_KMPC_CONVENTION omp_get_interop_rc_desc(const omp_interop_t, omp_interop_rc_t); -typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ - /* Compiler flags */ /* Total compiler flags must be 16 bits */ - unsigned tiedness : 1; /* task is either tied (1) or untied (0) */ - unsigned final : 1; /* task is final(1) so execute immediately */ - unsigned merged_if0 : 1; // no __kmpc_task_{begin/complete}_if0 calls in if0 - unsigned destructors_thunk : 1; // set if the compiler creates a thunk to - unsigned proxy : 1; // task is a proxy task (it will be executed outside the - unsigned priority_specified : 1; // set if the compiler provides priority - unsigned detachable : 1; // 1 == can detach */ - unsigned unshackled : 1; /* 1 == unshackled task */ - unsigned target : 1; /* 1 == target task */ - unsigned reserved : 7; /* reserved for compiler use */ - unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */ - unsigned task_serial : 1; // task is executed immediately (1) or deferred (0) - unsigned tasking_ser : 1; // all tasks in team are either executed immediately - unsigned team_serial : 1; // entire team is serial (1) [1 thread] or parallel - unsigned started : 1; /* 1==started, 0==not started */ - unsigned executing : 1; /* 1==executing, 0==not executing */ - unsigned complete : 1; /* 1==complete, 0==not complete */ - unsigned freed : 1; /* 1==freed, 0==allocated */ - unsigned native : 1; /* 1==gcc-compiled task, 0==intel */ - unsigned reserved31 : 7; /* reserved for library use */ -} kmp_tasking_flags_t; - typedef enum omp_interop_backend_type_t { // reserve 0 omp_interop_backend_type_cuda_1 = 1,
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits