jz10 updated this revision to Diff 469810.
jz10 added a comment.

Thanks Johannes

1. 'I feel like I'm missing something. As said before, all but 3 lines are 
identical in these two functions.

Now you created a helper for 1/3 of those identical lines but left the other 
2/3 being duplicated. Could
you elaborate why?'
I put the common part into task_creation function


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D136103/new/

https://reviews.llvm.org/D136103

Files:
  openmp/libomptarget/include/interop.h
  openmp/libomptarget/src/api.cpp
  openmp/libomptarget/src/exports
  openmp/libomptarget/src/private.h

Index: openmp/libomptarget/src/private.h
===================================================================
--- openmp/libomptarget/src/private.h
+++ openmp/libomptarget/src/private.h
@@ -98,7 +98,51 @@
  * We maintain the same data structure for compatibility.
  */
 typedef int kmp_int32;
+typedef int64_t kmp_int64;
 typedef intptr_t kmp_intptr_t;
+
+typedef void *omp_depend_t;
+struct kmp_task;
+typedef kmp_int32 (*kmp_routine_entry_t)(kmp_int32, struct kmp_task *);
+typedef struct kmp_task {
+  void *shareds;
+  kmp_routine_entry_t routine;
+  kmp_int32 part_id;
+} kmp_task_t;
+
+typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */
+  /* Compiler flags */             /* Total compiler flags must be 16 bits */
+  unsigned tiedness : 1;           /* task is either tied (1) or untied (0) */
+  unsigned final : 1;              /* task is final(1) so execute immediately */
+  unsigned merged_if0 : 1; /* no __kmpc_task_{begin/complete}_if0 calls in if0
+                              code path */
+  unsigned destructors_thunk : 1; /* set if the compiler creates a thunk to
+                                     invoke destructors from the runtime */
+  unsigned proxy : 1; /* task is a proxy task (it will be executed outside the
+                         context of the RTL) */
+  unsigned priority_specified : 1; /* set if the compiler provides priority
+                                      setting for the task */
+  unsigned detachable : 1;         /* 1 == can detach */
+  unsigned hidden_helper : 1;      /* 1 == hidden helper task */
+  unsigned reserved : 8;           /* reserved for compiler use */
+
+  /* Library flags */       /* Total library flags must be 16 bits */
+  unsigned tasktype : 1;    /* task is either explicit(1) or implicit (0) */
+  unsigned task_serial : 1; // task is executed immediately (1) or deferred (0)
+  unsigned tasking_ser : 1; // all tasks in team are either executed immediately
+  // (1) or may be deferred (0)
+  unsigned team_serial : 1; // entire team is serial (1) [1 thread] or parallel
+  // (0) [>= 2 threads]
+  /* If either team_serial or tasking_ser is set, task team may be NULL */
+  /* Task State Flags: */
+  unsigned started : 1;    /* 1==started, 0==not started     */
+  unsigned executing : 1;  /* 1==executing, 0==not executing */
+  unsigned complete : 1;   /* 1==complete, 0==not complete   */
+  unsigned freed : 1;      /* 1==freed, 0==allocated        */
+  unsigned native : 1;     /* 1==gcc-compiled task, 0==intel */
+  unsigned reserved31 : 7; /* reserved for library use */
+} kmp_tasking_flags_t;
+
 // Compiler sends us this info:
 typedef struct kmp_depend_info {
   kmp_intptr_t base_addr;
@@ -117,6 +161,88 @@
                           kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
                           kmp_depend_info_t *noalias_dep_list)
     __attribute__((weak));
+
+kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
+                                  kmp_int32 flags, size_t sizeof_kmp_task_t,
+                                  size_t sizeof_shareds,
+                                  kmp_routine_entry_t task_entry)
+    __attribute__((weak));
+
+kmp_task_t *
+__kmpc_omp_target_task_alloc(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
+                             size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+                             kmp_routine_entry_t task_entry,
+                             kmp_int64 device_id) __attribute__((weak));
+
+kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
+                                    kmp_task_t *new_task, kmp_int32 ndeps,
+                                    kmp_depend_info_t *dep_list,
+                                    kmp_int32 ndeps_noalias,
+                                    kmp_depend_info_t *noalias_dep_list)
+    __attribute__((weak));
+
+/**
+ * The argument set that is passed from asynchronous memory copy to block
+ * version of memory copy invoked in helper task
+ */
+struct TargetMemcpyArgsTy {
+  /**
+   * Common attribuutes
+   */
+  void *Dst;
+  const void *Src;
+  int DstDevice;
+  int SrcDevice;
+
+  /**
+   * The flag that denotes single dimensional or rectangle dimensional copy
+   */
+  bool IsRectMemcpy;
+
+  /**
+   * Arguments for single dimensional copy
+   */
+  size_t Length;
+  size_t DstOffset;
+  size_t SrcOffset;
+
+  /**
+   * Arguments for rectangle dimensional copy
+   */
+  size_t ElementSize;
+  int NumDims;
+  const size_t *Volume;
+  const size_t *DstOffsets;
+  const size_t *SrcOffsets;
+  const size_t *DstDimensions;
+  const size_t *SrcDimensions;
+
+  /**
+   * Constructor for single dimensional copy
+   */
+  TargetMemcpyArgsTy(void *Dst, const void *Src, size_t Length,
+                     size_t DstOffset, size_t SrcOffset, int DstDevice,
+                     int SrcDevice)
+      : Dst(Dst), Src(Src), DstDevice(DstDevice), SrcDevice(SrcDevice),
+        IsRectMemcpy(false), Length(Length), DstOffset(DstOffset),
+        SrcOffset(SrcOffset), ElementSize(0), NumDims(0), Volume(0),
+        DstOffsets(0), SrcOffsets(0), DstDimensions(0), SrcDimensions(0){};
+
+  /**
+   * Constructor for rectangle dimensional copy
+   */
+  TargetMemcpyArgsTy(void *Dst, const void *Src, size_t ElementSize,
+                     int NumDims, const size_t *Volume,
+                     const size_t *DstOffsets, const size_t *SrcOffsets,
+                     const size_t *DstDimensions, const size_t *SrcDimensions,
+                     int DstDevice, int SrcDevice)
+      : Dst(Dst), Src(Src), DstDevice(DstDevice), SrcDevice(SrcDevice),
+        IsRectMemcpy(true), Length(0), DstOffset(0), SrcOffset(0),
+        ElementSize(ElementSize), NumDims(NumDims), Volume(Volume),
+        DstOffsets(DstOffsets), SrcOffsets(SrcOffsets),
+        DstDimensions(DstDimensions), SrcDimensions(SrcDimensions){};
+};
+
 #ifdef __cplusplus
 }
 #endif
Index: openmp/libomptarget/src/exports
===================================================================
--- openmp/libomptarget/src/exports
+++ openmp/libomptarget/src/exports
@@ -38,6 +38,8 @@
     omp_target_is_present;
     omp_target_memcpy;
     omp_target_memcpy_rect;
+    omp_target_memcpy_async;
+    omp_target_memcpy_rect_async;
     omp_target_associate_ptr;
     omp_target_disassociate_ptr;
     llvm_omp_target_alloc_host;
Index: openmp/libomptarget/src/api.cpp
===================================================================
--- openmp/libomptarget/src/api.cpp
+++ openmp/libomptarget/src/api.cpp
@@ -15,6 +15,8 @@
 #include "private.h"
 #include "rtl.h"
 
+#include "llvm/ADT/SmallVector.h"
+
 #include <climits>
 #include <cstdlib>
 #include <cstring>
@@ -200,6 +202,109 @@
   return Rc;
 }
 
+// The helper function that calls omp_target_memcpy or omp_target_memcpy_rect
+static int __kmpc_target_memcpy_async_helper(kmp_int32 Gtid, kmp_task_t *Task) {
+  if (Task == nullptr)
+    return OFFLOAD_FAIL;
+
+  TargetMemcpyArgsTy *Args = (TargetMemcpyArgsTy *)Task->shareds;
+
+  if (Args == nullptr)
+    return OFFLOAD_FAIL;
+
+  // Call blocked version
+  int Rc = OFFLOAD_SUCCESS;
+  if (Args->IsRectMemcpy) {
+    Rc = omp_target_memcpy_rect(
+        Args->Dst, Args->Src, Args->ElementSize, Args->NumDims, Args->Volume,
+        Args->DstOffsets, Args->SrcOffsets, Args->DstDimensions,
+        Args->SrcDimensions, Args->DstDevice, Args->SrcDevice);
+
+    DP("omp_target_memcpy_rect returns %d\n", Rc);
+  } else {
+    Rc = omp_target_memcpy(Args->Dst, Args->Src, Args->Length, Args->DstOffset,
+                           Args->SrcOffset, Args->DstDevice, Args->SrcDevice);
+
+    DP("omp_target_memcpy returns %d\n", Rc);
+  }
+
+  // Release the arguments object
+  delete Args;
+
+  return Rc;
+}
+
+// Allocate and launch helper task
+static int __kmpc_helper_task_creation(TargetMemcpyArgsTy *Args,
+                                       int DepObjCount,
+                                       omp_depend_t *DepObjList) {
+  // Create global thread ID
+  int Gtid = __kmpc_global_thread_num(nullptr);
+  int (*Fn)(kmp_int32, kmp_task_t *) = &__kmpc_target_memcpy_async_helper;
+
+  // Setup the hidden helper flags;
+  kmp_int32 Flags = 0;
+  kmp_tasking_flags_t *InputFlags = (kmp_tasking_flags_t *)&Flags;
+  InputFlags->hidden_helper = 1;
+
+  // Alloc helper task
+  kmp_task_t *Ptr = __kmpc_omp_target_task_alloc(nullptr, Gtid, Flags,
+                                                 sizeof(kmp_task_t), 0, Fn, -1);
+
+  if (Ptr == nullptr) {
+    // Task allocation failed, delete the argument object
+    if (Args != nullptr)
+      delete Args;
+
+    return OFFLOAD_FAIL;
+  }
+
+  // Setup the arguments passed to helper task
+  Ptr->shareds = Args;
+
+  // Convert the type of depend objects
+  llvm::SmallVector<kmp_depend_info_t> DepObjs;
+  if (DepObjCount > 0) {
+    for (int i = 0; i < DepObjCount; i++) {
+      omp_depend_t DepObj = DepObjList[i];
+      DepObjs.push_back(*((kmp_depend_info_t *)DepObj));
+    }
+  }
+
+  int Rc = OFFLOAD_SUCCESS;
+  // Launch the helper task
+  Rc = __kmpc_omp_task_with_deps(nullptr, Gtid, Ptr, DepObjCount,
+                                 DepObjs.data(), 0, nullptr);
+
+  return Rc;
+}
+
+EXTERN int omp_target_memcpy_async(void *Dst, const void *Src, size_t Length,
+                                   size_t DstOffset, size_t SrcOffset,
+                                   int DstDevice, int SrcDevice,
+                                   int DepObjCount, omp_depend_t *DepObjList) {
+  TIMESCOPE();
+  DP("Call to omp_target_memcpy_async, dst device %d, src device %d, "
+     "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, "
+     "src offset %zu, length %zu\n",
+     DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DstOffset, SrcOffset,
+     Length);
+
+  // Check the source and dest address
+  if (Dst == nullptr || Src == nullptr)
+    return OFFLOAD_FAIL;
+
+  // Create task object
+  TargetMemcpyArgsTy *Args = new TargetMemcpyArgsTy(
+      Dst, Src, Length, DstOffset, SrcOffset, DstDevice, SrcDevice);
+
+  // Create and launch helper task
+  int Rc = __kmpc_helper_task_creation(Args, DepObjCount, DepObjList);
+
+  DP("omp_target_memcpy_async returns %d\n", Rc);
+  return Rc;
+}
+
 EXTERN int
 omp_target_memcpy_rect(void *Dst, const void *Src, size_t ElementSize,
                        int NumDims, const size_t *Volume,
@@ -260,6 +365,36 @@
   return Rc;
 }
 
+EXTERN int omp_target_memcpy_rect_async(
+    void *Dst, const void *Src, size_t ElementSize, int NumDims,
+    const size_t *Volume, const size_t *DstOffsets, const size_t *SrcOffsets,
+    const size_t *DstDimensions, const size_t *SrcDimensions, int DstDevice,
+    int SrcDevice, int DepObjCount, omp_depend_t *DepObjList) {
+  TIMESCOPE();
+  DP("Call to omp_target_memcpy_rect_async, dst device %d, src device %d, "
+     "dst addr " DPxMOD ", src addr " DPxMOD ", dst offsets " DPxMOD ", "
+     "src offsets " DPxMOD ", dst dims " DPxMOD ", src dims " DPxMOD ", "
+     "volume " DPxMOD ", element size %zu, num_dims %d\n",
+     DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DPxPTR(DstOffsets),
+     DPxPTR(SrcOffsets), DPxPTR(DstDimensions), DPxPTR(SrcDimensions),
+     DPxPTR(Volume), ElementSize, NumDims);
+
+  // Check the source and dest address
+  if (Dst == nullptr || Src == nullptr)
+    return OFFLOAD_FAIL;
+
+  // Create task object
+  TargetMemcpyArgsTy *Args = new TargetMemcpyArgsTy(
+      Dst, Src, ElementSize, NumDims, Volume, DstOffsets, SrcOffsets,
+      DstDimensions, SrcDimensions, DstDevice, SrcDevice);
+
+  // Create and launch helper task
+  int Rc = __kmpc_helper_task_creation(Args, DepObjCount, DepObjList);
+
+  DP("omp_target_memcpy_rect_async returns %d\n", Rc);
+  return Rc;
+}
+
 EXTERN int omp_target_associate_ptr(const void *HostPtr, const void *DevicePtr,
                                     size_t Size, size_t DeviceOffset,
                                     int DeviceNum) {
Index: openmp/libomptarget/include/interop.h
===================================================================
--- openmp/libomptarget/include/interop.h
+++ openmp/libomptarget/include/interop.h
@@ -116,30 +116,6 @@
 extern const char *__KAI_KMPC_CONVENTION
 omp_get_interop_rc_desc(const omp_interop_t, omp_interop_rc_t);
 
-typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */
-  /* Compiler flags */             /* Total compiler flags must be 16 bits */
-  unsigned tiedness : 1;           /* task is either tied (1) or untied (0) */
-  unsigned final : 1;              /* task is final(1) so execute immediately */
-  unsigned merged_if0 : 1; // no __kmpc_task_{begin/complete}_if0 calls in if0
-  unsigned destructors_thunk : 1; // set if the compiler creates a thunk to
-  unsigned proxy : 1; // task is a proxy task (it will be executed outside the
-  unsigned priority_specified : 1; // set if the compiler provides priority
-  unsigned detachable : 1;         // 1 == can detach */
-  unsigned unshackled : 1;         /* 1 == unshackled task */
-  unsigned target : 1;             /* 1 == target task */
-  unsigned reserved : 7;           /* reserved for compiler use */
-  unsigned tasktype : 1;    /* task is either explicit(1) or implicit (0) */
-  unsigned task_serial : 1; // task is executed immediately (1) or deferred (0)
-  unsigned tasking_ser : 1; // all tasks in team are either executed immediately
-  unsigned team_serial : 1; // entire team is serial (1) [1 thread] or parallel
-  unsigned started : 1;     /* 1==started, 0==not started     */
-  unsigned executing : 1;   /* 1==executing, 0==not executing */
-  unsigned complete : 1;    /* 1==complete, 0==not complete   */
-  unsigned freed : 1;       /* 1==freed, 0==allocated        */
-  unsigned native : 1;      /* 1==gcc-compiled task, 0==intel */
-  unsigned reserved31 : 7;  /* reserved for library use */
-} kmp_tasking_flags_t;
-
 typedef enum omp_interop_backend_type_t {
   // reserve 0
   omp_interop_backend_type_cuda_1 = 1,
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to