jz10 updated this revision to Diff 468695.
jz10 added a comment.

Thanks Johannes for your comments, and I relied them below

1. format issues

I ran clang-format to reformat, please check if there's any missed things;

2. replace '0' with 'nullptr'

fixed

3. proper return value for helper functions and async functions

fixed

4. Why can't we have a `kmp_tasking_flags_t` object?

I followed the same access approach in openmp/runtime , so I didn't change this 
part. But I can revise if it is needed

5. make helper function as 'static'

fixed

6. Why do you access args_ for some parts and not for others? That said, where 
does the hidden helper need access to the dependences anyway?

there's type cast for depend objects from 'omp_depend_t' to 
'kmp_depend_info_t*', and the array of casted depend objects is consumed by 
'__kmpc_omp_task_with_deps' , to make it safe, I just make larray of casted 
depend objects to live longer, thus attached it to Args object.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D136103/new/

https://reviews.llvm.org/D136103

Files:
  clang/docs/ReleaseNotes.rst
  openmp/libomptarget/include/interop.h
  openmp/libomptarget/src/api.cpp
  openmp/libomptarget/src/exports
  openmp/libomptarget/src/private.h

Index: openmp/libomptarget/src/private.h
===================================================================
--- openmp/libomptarget/src/private.h
+++ openmp/libomptarget/src/private.h
@@ -98,7 +98,47 @@
  * We maintain the same data structure for compatibility.
  */
 typedef int kmp_int32;
+typedef int64_t kmp_int64;
 typedef intptr_t kmp_intptr_t;
+
+typedef void * omp_depend_t;
+struct kmp_task;
+typedef kmp_int32 (* kmp_routine_entry_t)( kmp_int32, struct kmp_task * );
+typedef struct kmp_task {
+  void *              shareds;
+  kmp_routine_entry_t routine;
+  kmp_int32           part_id;
+} kmp_task_t;
+
+typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */
+  /* Compiler flags */ /* Total compiler flags must be 16 bits */
+  unsigned tiedness : 1; /* task is either tied (1) or untied (0) */
+  unsigned final : 1; /* task is final(1) so execute immediately */
+  unsigned merged_if0 : 1; /* no __kmpc_task_{begin/complete}_if0 calls in if0 code path */
+  unsigned destructors_thunk : 1; /* set if the compiler creates a thunk to invoke destructors from the runtime */
+  unsigned proxy : 1; /* task is a proxy task (it will be executed outside the context of the RTL) */
+  unsigned priority_specified : 1; /* set if the compiler provides priority setting for the task */
+  unsigned detachable : 1; /* 1 == can detach */
+  unsigned hidden_helper : 1; /* 1 == hidden helper task */
+  unsigned reserved : 8; /* reserved for compiler use */
+
+  /* Library flags */ /* Total library flags must be 16 bits */
+  unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */
+  unsigned task_serial : 1; // task is executed immediately (1) or deferred (0) 
+  unsigned tasking_ser : 1; // all tasks in team are either executed immediately 
+  // (1) or may be deferred (0) 
+  unsigned team_serial : 1; // entire team is serial (1) [1 thread] or parallel    
+  // (0) [>= 2 threads]
+  /* If either team_serial or tasking_ser is set, task team may be NULL */
+  /* Task State Flags: */
+  unsigned started : 1; /* 1==started, 0==not started     */
+  unsigned executing : 1; /* 1==executing, 0==not executing */
+  unsigned complete : 1; /* 1==complete, 0==not complete   */
+  unsigned freed : 1; /* 1==freed, 0==allocated        */
+  unsigned native : 1; /* 1==gcc-compiled task, 0==intel */
+  unsigned reserved31 : 7; /* reserved for library use */
+} kmp_tasking_flags_t;
+  
 // Compiler sends us this info:
 typedef struct kmp_depend_info {
   kmp_intptr_t base_addr;
@@ -117,6 +157,96 @@
                           kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
                           kmp_depend_info_t *noalias_dep_list)
     __attribute__((weak));
+
+kmp_task_t* __kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
+                                  size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+                                  kmp_routine_entry_t task_entry)
+  __attribute__((weak));
+
+kmp_task_t* __kmpc_omp_target_task_alloc(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
+                                         size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+                                         kmp_routine_entry_t task_entry, kmp_int64 device_id)
+  __attribute__((weak));
+
+void __kmpc_proxy_task_completed_ooo (kmp_task_t *ptask) __attribute__((weak));
+kmp_int32 __kmpc_omp_task_with_deps (ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task,
+                                     kmp_int32 ndeps, kmp_depend_info_t *dep_list,
+                                     kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list)
+  __attribute__((weak));
+
+class TargetMemcpyArgsTy {
+public:
+  TargetMemcpyArgsTy(void *Dst_, const void *Src_, size_t Length_,
+                     size_t DstOffset_, size_t SrcOffset_, int DstDevice_, int SrcDevice_,
+                     int Depobj_count, omp_depend_t* Depobj_list) :
+    Dst(Dst_), Src(Src_), Length(Length_), DstOffset(DstOffset_), SrcOffset(SrcOffset_),
+    DstDevice(DstDevice_), SrcDevice(SrcDevice_), Depobjs(0) {
+    if (Depobj_count > 0) {
+      Depobjs = new kmp_depend_info_t[Depobj_count];
+      for (int i = 0; i < Depobj_count; i ++) {
+        omp_depend_t depobj = Depobj_list[i];
+        Depobjs[i] = * ((kmp_depend_info_t* )depobj);
+      }
+    }
+  };
+
+    ~TargetMemcpyArgsTy() {
+    if (Depobjs != 0)
+      delete Depobjs;
+  }
+
+  void *Dst;
+  const void *Src;
+  size_t Length;
+  size_t DstOffset;
+  size_t SrcOffset;
+  int DstDevice;
+  int SrcDevice;
+
+  // The buffer for depend objects
+  kmp_depend_info_t* Depobjs;
+};
+
+class TargetMemcpyRectArgsTy {
+public:
+  TargetMemcpyRectArgsTy(void *Dst_, const void *Src_, size_t ElementSize_, int NumDims_,
+                          const size_t* Volume_, const size_t* DstOffsets_, const size_t* SrcOffsets_,
+                          const size_t* DstDimensions_, const size_t* SrcDimensions_,
+                          int DstDevice_, int SrcDevice_,
+                          int Depobj_count, omp_depend_t* Depobj_list) :
+    Dst(Dst_), Src(Src_), ElementSize(ElementSize_), NumDims(NumDims_), Volume(Volume_),
+    DstOffsets(DstOffsets_), SrcOffsets(SrcOffsets_), DstDimensions(DstDimensions_),
+    SrcDimensions(SrcDimensions_), DstDevice(DstDevice_), SrcDevice(SrcDevice_), Depobjs(0) {
+    if (Depobj_count > 0) {
+      Depobjs = new kmp_depend_info_t[Depobj_count];
+      for (int i = 0; i < Depobj_count; i ++) {
+        omp_depend_t depobj = Depobj_list[i];
+        Depobjs[i] = * ((kmp_depend_info_t* )depobj);
+      }
+    }
+  };
+
+  ~TargetMemcpyRectArgsTy() {
+    if (Depobjs != 0)
+      delete Depobjs;
+  }
+
+  void *Dst;
+  const void *Src;
+  size_t ElementSize;
+  int NumDims;
+  const size_t *Volume;
+  const size_t *DstOffsets;
+  const size_t *SrcOffsets;
+  const size_t *DstDimensions;
+  const size_t *SrcDimensions;
+  int DstDevice;
+  int SrcDevice;
+
+  // The buffer for depend objects   
+  kmp_depend_info_t* Depobjs;
+};
+    
 #ifdef __cplusplus
 }
 #endif
Index: openmp/libomptarget/src/exports
===================================================================
--- openmp/libomptarget/src/exports
+++ openmp/libomptarget/src/exports
@@ -38,6 +38,8 @@
     omp_target_is_present;
     omp_target_memcpy;
     omp_target_memcpy_rect;
+    omp_target_memcpy_async;
+    omp_target_memcpy_rect_async;
     omp_target_associate_ptr;
     omp_target_disassociate_ptr;
     llvm_omp_target_alloc_host;
Index: openmp/libomptarget/src/api.cpp
===================================================================
--- openmp/libomptarget/src/api.cpp
+++ openmp/libomptarget/src/api.cpp
@@ -200,6 +200,71 @@
   return Rc;
 }
 
+// The helper function that calls omp_target_memcpy
+static int __kmpc_target_memcpy_async_helper(kmp_int32 Gtid, kmp_task_t *Task) {
+  if (Task == nullptr)
+    return OFFLOAD_FAIL;
+
+  TargetMemcpyArgsTy *Args = (TargetMemcpyArgsTy *)Task->shareds;
+
+  if (Args == nullptr)
+    return OFFLOAD_FAIL;
+
+  // Call blocked version
+  int Rc = omp_target_memcpy(Args->Dst, Args->Src, Args->Length, Args->DstOffset,
+			     Args->SrcOffset, Args->DstDevice, Args->SrcDevice);
+
+  DP("omp_target_memcpy returns %d\n", Rc);
+  return Rc;
+}
+
+EXTERN int omp_target_memcpy_async(void *Dst, const void *Src, size_t Length,
+                                   size_t DstOffset, size_t SrcOffset,
+                                   int DstDevice, int SrcDevice,
+                                   int Depobj_count,
+                                   omp_depend_t *Depobj_list) {
+  TIMESCOPE();
+  DP("Call to omp_target_memcpy_async, dst device %d, src device %d, "
+     "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, "
+     "src offset %zu, length %zu\n",
+     DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DstOffset, SrcOffset,
+     Length);
+
+  // Check the source and dest address
+  if (Dst == nullptr || Src == nullptr)
+    return OFFLOAD_FAIL;
+
+  // Create task
+  int (*Fn)(kmp_int32, kmp_task_t *) = &__kmpc_target_memcpy_async_helper;
+  int Errsz = sizeof(kmp_task_t);
+  int Errhr = 0;
+  int Gtid = __kmpc_global_thread_num(NULL);
+
+  // Setup the hidden helper flags;
+  kmp_int32 Flags = 0;
+  kmp_tasking_flags_t *InputFlags = (kmp_tasking_flags_t *)&Flags;
+  InputFlags->hidden_helper = 1;
+
+  // Alloc helper task
+  kmp_task_t *Ptr =
+      __kmpc_omp_target_task_alloc(NULL, Gtid, Flags, Errsz, Errhr, Fn, -1);
+
+  // Create task object
+  TargetMemcpyArgsTy *Args_ =
+      new TargetMemcpyArgsTy(Dst, Src, Length, DstOffset, SrcOffset, DstDevice,
+                             SrcDevice, Depobj_count, Depobj_list);
+  Ptr->shareds = Args_;
+
+  int Rc = OFFLOAD_SUCCESS;
+  // omp_target_memcpy(Dst, Src, Length, DstOffset, SrcOffset, DstDevice,
+  // SrcDevice);
+  Rc = __kmpc_omp_task_with_deps(NULL, Gtid, Ptr, Depobj_count, Args_->Depobjs,
+                                 0, NULL);
+
+  DP("omp_target_memcpy_async returns %d\n", Rc);
+  return Rc;
+}
+
 EXTERN int
 omp_target_memcpy_rect(void *Dst, const void *Src, size_t ElementSize,
                        int NumDims, const size_t *Volume,
@@ -260,6 +325,74 @@
   return Rc;
 }
 
+// The helper function that calls omp_target_memcpy_rect
+static int __kmpc_target_memcpy_rect_async_helper(kmp_int32 Gtid,
+                                                  kmp_task_t *Task) {
+  if (Task == nullptr)
+    return OFFLOAD_FAIL;
+
+  TargetMemcpyRectArgsTy *Args = (TargetMemcpyRectArgsTy *)Task->shareds;
+
+  if (Args == nullptr)
+    return OFFLOAD_FAIL;
+
+  // Call blocked version
+  int Rc = omp_target_memcpy_rect(Args->Dst, Args->Src, Args->ElementSize, Args->NumDims, Args->Volume,
+				  Args->DstOffsets, Args->SrcOffsets, Args->DstDimensions,
+				  Args->SrcDimensions, Args->DstDevice, Args->SrcDevice);
+  
+  DP("omp_target_memcpy_rect returns %d\n", Rc);
+  return Rc;
+}
+
+EXTERN int omp_target_memcpy_rect_async(
+    void *Dst, const void *Src, size_t ElementSize, int NumDims,
+    const size_t *Volume, const size_t *DstOffsets, const size_t *SrcOffsets,
+    const size_t *DstDimensions, const size_t *SrcDimensions, int DstDevice,
+    int SrcDevice, int Depobj_count, omp_depend_t *Depobj_list) {
+  TIMESCOPE();
+  DP("Call to omp_target_memcpy_rect_async, dst device %d, src device %d, "
+     "dst addr " DPxMOD ", src addr " DPxMOD ", dst offsets " DPxMOD ", "
+     "src offsets " DPxMOD ", dst dims " DPxMOD ", src dims " DPxMOD ", "
+     "volume " DPxMOD ", element size %zu, num_dims %d\n",
+     DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DPxPTR(DstOffsets),
+     DPxPTR(SrcOffsets), DPxPTR(DstDimensions), DPxPTR(SrcDimensions),
+     DPxPTR(Volume), ElementSize, NumDims);
+
+  // Check the source and dest address
+  if (Dst == nullptr || Src == nullptr)
+    return OFFLOAD_FAIL;
+
+  // Create task
+  int (*Fn)(kmp_int32, kmp_task_t *) = &__kmpc_target_memcpy_rect_async_helper;
+  int Errsz = sizeof(kmp_task_t);
+  int Errhr = 0;
+  int Gtid = __kmpc_global_thread_num(NULL);
+
+  // Setup the hidden helper flags
+  kmp_int32 Flags = 0;
+  kmp_tasking_flags_t *InputFlags = (kmp_tasking_flags_t *)&Flags;
+  InputFlags->hidden_helper = 1;
+
+  // Alloc helper task
+  kmp_task_t *Ptr =
+      __kmpc_omp_target_task_alloc(NULL, Gtid, Flags, Errsz, Errhr, Fn, -1);
+
+  // Create task object
+  TargetMemcpyRectArgsTy *Args_ = new TargetMemcpyRectArgsTy(
+      Dst, Src, ElementSize, NumDims, Volume, DstOffsets, SrcOffsets,
+      DstDimensions, SrcDimensions, DstDevice, SrcDevice, Depobj_count,
+      Depobj_list);
+  Ptr->shareds = Args_;
+
+  int Rc = OFFLOAD_SUCCESS;
+  Rc = __kmpc_omp_task_with_deps(NULL, Gtid, Ptr, Depobj_count, Args_->Depobjs,
+                                 0, NULL);
+
+  DP("omp_target_memcpy_rect_async returns %d\n", Rc);
+  return Rc;
+}
+
 EXTERN int omp_target_associate_ptr(const void *HostPtr, const void *DevicePtr,
                                     size_t Size, size_t DeviceOffset,
                                     int DeviceNum) {
Index: openmp/libomptarget/include/interop.h
===================================================================
--- openmp/libomptarget/include/interop.h
+++ openmp/libomptarget/include/interop.h
@@ -116,30 +116,6 @@
 extern const char *__KAI_KMPC_CONVENTION
 omp_get_interop_rc_desc(const omp_interop_t, omp_interop_rc_t);
 
-typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */
-  /* Compiler flags */             /* Total compiler flags must be 16 bits */
-  unsigned tiedness : 1;           /* task is either tied (1) or untied (0) */
-  unsigned final : 1;              /* task is final(1) so execute immediately */
-  unsigned merged_if0 : 1; // no __kmpc_task_{begin/complete}_if0 calls in if0
-  unsigned destructors_thunk : 1; // set if the compiler creates a thunk to
-  unsigned proxy : 1; // task is a proxy task (it will be executed outside the
-  unsigned priority_specified : 1; // set if the compiler provides priority
-  unsigned detachable : 1;         // 1 == can detach */
-  unsigned unshackled : 1;         /* 1 == unshackled task */
-  unsigned target : 1;             /* 1 == target task */
-  unsigned reserved : 7;           /* reserved for compiler use */
-  unsigned tasktype : 1;    /* task is either explicit(1) or implicit (0) */
-  unsigned task_serial : 1; // task is executed immediately (1) or deferred (0)
-  unsigned tasking_ser : 1; // all tasks in team are either executed immediately
-  unsigned team_serial : 1; // entire team is serial (1) [1 thread] or parallel
-  unsigned started : 1;     /* 1==started, 0==not started     */
-  unsigned executing : 1;   /* 1==executing, 0==not executing */
-  unsigned complete : 1;    /* 1==complete, 0==not complete   */
-  unsigned freed : 1;       /* 1==freed, 0==allocated        */
-  unsigned native : 1;      /* 1==gcc-compiled task, 0==intel */
-  unsigned reserved31 : 7;  /* reserved for library use */
-} kmp_tasking_flags_t;
-
 typedef enum omp_interop_backend_type_t {
   // reserve 0
   omp_interop_backend_type_cuda_1 = 1,
Index: clang/docs/ReleaseNotes.rst
===================================================================
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -248,6 +248,9 @@
   not satisfied in the event of an instantiation failures in a requires expression's
   parameter list. We previously handled this correctly in a constraint evaluation
   context, but not in a requires clause evaluated as a boolean.
+- Address the thread identification problems in coroutines.
+  `Issue 47177 <https://github.com/llvm/llvm-project/issues/47177>`_
+  `Issue 47179 <https://github.com/llvm/llvm-project/issues/47179>`_
 
 Improvements to Clang's diagnostics
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to