jz10 updated this revision to Diff 469110.
jz10 added a comment.

Thanks Johannes

1. use SmallVector

fixed

2. "module 5 characters"

ran clang-format , please check it that works


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D136103/new/

https://reviews.llvm.org/D136103

Files:
  clang/docs/ReleaseNotes.rst
  openmp/libomptarget/include/interop.h
  openmp/libomptarget/src/api.cpp
  openmp/libomptarget/src/exports
  openmp/libomptarget/src/private.h

Index: openmp/libomptarget/src/private.h
===================================================================
--- openmp/libomptarget/src/private.h
+++ openmp/libomptarget/src/private.h
@@ -98,7 +98,51 @@
  * We maintain the same data structure for compatibility.
  */
 typedef int kmp_int32;
+typedef int64_t kmp_int64;
 typedef intptr_t kmp_intptr_t;
+
+typedef void *omp_depend_t;
+struct kmp_task;
+typedef kmp_int32 (*kmp_routine_entry_t)(kmp_int32, struct kmp_task *);
+typedef struct kmp_task {
+  void *shareds;
+  kmp_routine_entry_t routine;
+  kmp_int32 part_id;
+} kmp_task_t;
+
+typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */
+  /* Compiler flags */             /* Total compiler flags must be 16 bits */
+  unsigned tiedness : 1;           /* task is either tied (1) or untied (0) */
+  unsigned final : 1;              /* task is final(1) so execute immediately */
+  unsigned merged_if0 : 1; /* no __kmpc_task_{begin/complete}_if0 calls in if0
+                              code path */
+  unsigned destructors_thunk : 1; /* set if the compiler creates a thunk to
+                                     invoke destructors from the runtime */
+  unsigned proxy : 1; /* task is a proxy task (it will be executed outside the
+                         context of the RTL) */
+  unsigned priority_specified : 1; /* set if the compiler provides priority
+                                      setting for the task */
+  unsigned detachable : 1;         /* 1 == can detach */
+  unsigned hidden_helper : 1;      /* 1 == hidden helper task */
+  unsigned reserved : 8;           /* reserved for compiler use */
+
+  /* Library flags */       /* Total library flags must be 16 bits */
+  unsigned tasktype : 1;    /* task is either explicit(1) or implicit (0) */
+  unsigned task_serial : 1; // task is executed immediately (1) or deferred (0)
+  unsigned tasking_ser : 1; // all tasks in team are either executed immediately
+  // (1) or may be deferred (0)
+  unsigned team_serial : 1; // entire team is serial (1) [1 thread] or parallel
+  // (0) [>= 2 threads]
+  /* If either team_serial or tasking_ser is set, task team may be NULL */
+  /* Task State Flags: */
+  unsigned started : 1;    /* 1==started, 0==not started     */
+  unsigned executing : 1;  /* 1==executing, 0==not executing */
+  unsigned complete : 1;   /* 1==complete, 0==not complete   */
+  unsigned freed : 1;      /* 1==freed, 0==allocated        */
+  unsigned native : 1;     /* 1==gcc-compiled task, 0==intel */
+  unsigned reserved31 : 7; /* reserved for library use */
+} kmp_tasking_flags_t;
+
 // Compiler sends us this info:
 typedef struct kmp_depend_info {
   kmp_intptr_t base_addr;
@@ -117,6 +161,70 @@
                           kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
                           kmp_depend_info_t *noalias_dep_list)
     __attribute__((weak));
+
+kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
+                                  kmp_int32 flags, size_t sizeof_kmp_task_t,
+                                  size_t sizeof_shareds,
+                                  kmp_routine_entry_t task_entry)
+    __attribute__((weak));
+
+kmp_task_t *
+__kmpc_omp_target_task_alloc(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
+                             size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+                             kmp_routine_entry_t task_entry,
+                             kmp_int64 device_id) __attribute__((weak));
+
+kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
+                                    kmp_task_t *new_task, kmp_int32 ndeps,
+                                    kmp_depend_info_t *dep_list,
+                                    kmp_int32 ndeps_noalias,
+                                    kmp_depend_info_t *noalias_dep_list)
+    __attribute__((weak));
+
+struct TargetMemcpyArgsTy {
+  void *Dst;
+  const void *Src;
+  size_t Length;
+  size_t DstOffset;
+  size_t SrcOffset;
+  int DstDevice;
+  int SrcDevice;
+
+  TargetMemcpyArgsTy(void *Dst_, const void *Src_, size_t Length_,
+                     size_t DstOffset_, size_t SrcOffset_, int DstDevice_,
+                     int SrcDevice_)
+      : Dst(Dst_), Src(Src_), Length(Length_), DstOffset(DstOffset_),
+        SrcOffset(SrcOffset_), DstDevice(DstDevice_), SrcDevice(SrcDevice_){};
+};
+
+struct TargetMemcpyRectArgsTy {
+  void *Dst;
+  const void *Src;
+  size_t ElementSize;
+  int NumDims;
+  const size_t *Volume;
+  const size_t *DstOffsets;
+  const size_t *SrcOffsets;
+  const size_t *DstDimensions;
+  const size_t *SrcDimensions;
+  int DstDevice;
+  int SrcDevice;
+
+  // The buffer for depend objects
+  kmp_depend_info_t *Depobjs;
+
+  TargetMemcpyRectArgsTy(void *Dst_, const void *Src_, size_t ElementSize_,
+                         int NumDims_, const size_t *Volume_,
+                         const size_t *DstOffsets_, const size_t *SrcOffsets_,
+                         const size_t *DstDimensions_,
+                         const size_t *SrcDimensions_, int DstDevice_,
+                         int SrcDevice_)
+      : Dst(Dst_), Src(Src_), ElementSize(ElementSize_), NumDims(NumDims_),
+        Volume(Volume_), DstOffsets(DstOffsets_), SrcOffsets(SrcOffsets_),
+        DstDimensions(DstDimensions_), SrcDimensions(SrcDimensions_),
+        DstDevice(DstDevice_), SrcDevice(SrcDevice_){};
+};
+
 #ifdef __cplusplus
 }
 #endif
Index: openmp/libomptarget/src/exports
===================================================================
--- openmp/libomptarget/src/exports
+++ openmp/libomptarget/src/exports
@@ -38,6 +38,8 @@
     omp_target_is_present;
     omp_target_memcpy;
     omp_target_memcpy_rect;
+    omp_target_memcpy_async;
+    omp_target_memcpy_rect_async;
     omp_target_associate_ptr;
     omp_target_disassociate_ptr;
     llvm_omp_target_alloc_host;
Index: openmp/libomptarget/src/api.cpp
===================================================================
--- openmp/libomptarget/src/api.cpp
+++ openmp/libomptarget/src/api.cpp
@@ -200,6 +200,87 @@
   return Rc;
 }
 
+// The helper function that calls omp_target_memcpy
+static int __kmpc_target_memcpy_async_helper(kmp_int32 Gtid, kmp_task_t *Task) {
+  if (Task == nullptr)
+    return OFFLOAD_FAIL;
+
+  TargetMemcpyArgsTy *Args = (TargetMemcpyArgsTy *)Task->shareds;
+
+  if (Args == nullptr)
+    return OFFLOAD_FAIL;
+
+  // Call blocked version
+  int Rc =
+      omp_target_memcpy(Args->Dst, Args->Src, Args->Length, Args->DstOffset,
+                        Args->SrcOffset, Args->DstDevice, Args->SrcDevice);
+
+  // Release the arguments object
+  delete Args;
+
+  DP("omp_target_memcpy returns %d\n", Rc);
+  return Rc;
+}
+
+EXTERN int omp_target_memcpy_async(void *Dst, const void *Src, size_t Length,
+                                   size_t DstOffset, size_t SrcOffset,
+                                   int DstDevice, int SrcDevice,
+                                   int DepObj_Count,
+                                   omp_depend_t *DepObj_List) {
+  TIMESCOPE();
+  DP("Call to omp_target_memcpy_async, dst device %d, src device %d, "
+     "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, "
+     "src offset %zu, length %zu\n",
+     DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DstOffset, SrcOffset,
+     Length);
+
+  // Check the source and dest address
+  if (Dst == nullptr || Src == nullptr)
+    return OFFLOAD_FAIL;
+
+  // Create task
+  int (*Fn)(kmp_int32, kmp_task_t *) = &__kmpc_target_memcpy_async_helper;
+  int Errsz = sizeof(kmp_task_t);
+  int Errhr = 0;
+  int Gtid = __kmpc_global_thread_num(NULL);
+
+  // Setup the hidden helper flags;
+  kmp_int32 Flags = 0;
+  kmp_tasking_flags_t *InputFlags = (kmp_tasking_flags_t *)&Flags;
+  InputFlags->hidden_helper = 1;
+
+  // Alloc helper task
+  kmp_task_t *Ptr =
+      __kmpc_omp_target_task_alloc(NULL, Gtid, Flags, Errsz, Errhr, Fn, -1);
+
+  // Create task object
+  TargetMemcpyArgsTy *Args = new TargetMemcpyArgsTy(
+      Dst, Src, Length, DstOffset, SrcOffset, DstDevice, SrcDevice);
+  Ptr->shareds = Args;
+
+  // Convert the type of depend objects
+  kmp_depend_info_t *DepObjs = nullptr;
+  if (DepObj_Count > 0) {
+    DepObjs = new kmp_depend_info_t[DepObj_Count];
+    for (int i = 0; i < DepObj_Count; i++) {
+      omp_depend_t DepObj = DepObj_List[i];
+      DepObjs[i] = *((kmp_depend_info_t *)DepObj);
+    }
+  }
+
+  int Rc = OFFLOAD_SUCCESS;
+
+  Rc = __kmpc_omp_task_with_deps(NULL, Gtid, Ptr, DepObj_Count, DepObjs, 0,
+                                 NULL);
+
+  // Release KMP depend object references
+  if (DepObjs != nullptr)
+    delete DepObjs;
+
+  DP("omp_target_memcpy_async returns %d\n", Rc);
+  return Rc;
+}
+
 EXTERN int
 omp_target_memcpy_rect(void *Dst, const void *Src, size_t ElementSize,
                        int NumDims, const size_t *Volume,
@@ -260,6 +341,91 @@
   return Rc;
 }
 
+// The helper function that calls omp_target_memcpy_rect
+static int __kmpc_target_memcpy_rect_async_helper(kmp_int32 Gtid,
+                                                  kmp_task_t *Task) {
+  if (Task == nullptr)
+    return OFFLOAD_FAIL;
+
+  TargetMemcpyRectArgsTy *Args = (TargetMemcpyRectArgsTy *)Task->shareds;
+
+  if (Args == nullptr)
+    return OFFLOAD_FAIL;
+
+  // Call blocked version
+  int Rc = omp_target_memcpy_rect(
+      Args->Dst, Args->Src, Args->ElementSize, Args->NumDims, Args->Volume,
+      Args->DstOffsets, Args->SrcOffsets, Args->DstDimensions,
+      Args->SrcDimensions, Args->DstDevice, Args->SrcDevice);
+
+  // Release the arguments object
+  delete Args;
+
+  DP("omp_target_memcpy_rect returns %d\n", Rc);
+  return Rc;
+}
+
+EXTERN int omp_target_memcpy_rect_async(
+    void *Dst, const void *Src, size_t ElementSize, int NumDims,
+    const size_t *Volume, const size_t *DstOffsets, const size_t *SrcOffsets,
+    const size_t *DstDimensions, const size_t *SrcDimensions, int DstDevice,
+    int SrcDevice, int DepObj_Count, omp_depend_t *DepObj_List) {
+  TIMESCOPE();
+  DP("Call to omp_target_memcpy_rect_async, dst device %d, src device %d, "
+     "dst addr " DPxMOD ", src addr " DPxMOD ", dst offsets " DPxMOD ", "
+     "src offsets " DPxMOD ", dst dims " DPxMOD ", src dims " DPxMOD ", "
+     "volume " DPxMOD ", element size %zu, num_dims %d\n",
+     DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DPxPTR(DstOffsets),
+     DPxPTR(SrcOffsets), DPxPTR(DstDimensions), DPxPTR(SrcDimensions),
+     DPxPTR(Volume), ElementSize, NumDims);
+
+  // Check the source and dest address
+  if (Dst == nullptr || Src == nullptr)
+    return OFFLOAD_FAIL;
+
+  // Create task
+  int (*Fn)(kmp_int32, kmp_task_t *) = &__kmpc_target_memcpy_rect_async_helper;
+  int Errsz = sizeof(kmp_task_t);
+  int Errhr = 0;
+  int Gtid = __kmpc_global_thread_num(NULL);
+
+  // Setup the hidden helper flags
+  kmp_int32 Flags = 0;
+  kmp_tasking_flags_t *InputFlags = (kmp_tasking_flags_t *)&Flags;
+  InputFlags->hidden_helper = 1;
+
+  // Alloc helper task
+  kmp_task_t *Ptr =
+      __kmpc_omp_target_task_alloc(NULL, Gtid, Flags, Errsz, Errhr, Fn, -1);
+
+  // Create task object
+  TargetMemcpyRectArgsTy *Args = new TargetMemcpyRectArgsTy(
+      Dst, Src, ElementSize, NumDims, Volume, DstOffsets, SrcOffsets,
+      DstDimensions, SrcDimensions, DstDevice, SrcDevice);
+  Ptr->shareds = Args;
+
+  // Convert the type of depend objects
+  kmp_depend_info_t *DepObjs = nullptr;
+  if (DepObj_Count > 0) {
+    DepObjs = new kmp_depend_info_t[DepObj_Count];
+    for (int i = 0; i < DepObj_Count; i++) {
+      omp_depend_t DepObj = DepObj_List[i];
+      DepObjs[i] = *((kmp_depend_info_t *)DepObj);
+    }
+  }
+
+  int Rc = OFFLOAD_SUCCESS;
+  Rc = __kmpc_omp_task_with_deps(NULL, Gtid, Ptr, DepObj_Count, DepObjs, 0,
+                                 NULL);
+
+  // Release KMP depend object references
+  if (DepObjs != nullptr)
+    delete DepObjs;
+
+  DP("omp_target_memcpy_rect_async returns %d\n", Rc);
+  return Rc;
+}
+
 EXTERN int omp_target_associate_ptr(const void *HostPtr, const void *DevicePtr,
                                     size_t Size, size_t DeviceOffset,
                                     int DeviceNum) {
Index: openmp/libomptarget/include/interop.h
===================================================================
--- openmp/libomptarget/include/interop.h
+++ openmp/libomptarget/include/interop.h
@@ -116,30 +116,6 @@
 extern const char *__KAI_KMPC_CONVENTION
 omp_get_interop_rc_desc(const omp_interop_t, omp_interop_rc_t);
 
-typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */
-  /* Compiler flags */             /* Total compiler flags must be 16 bits */
-  unsigned tiedness : 1;           /* task is either tied (1) or untied (0) */
-  unsigned final : 1;              /* task is final(1) so execute immediately */
-  unsigned merged_if0 : 1; // no __kmpc_task_{begin/complete}_if0 calls in if0
-  unsigned destructors_thunk : 1; // set if the compiler creates a thunk to
-  unsigned proxy : 1; // task is a proxy task (it will be executed outside the
-  unsigned priority_specified : 1; // set if the compiler provides priority
-  unsigned detachable : 1;         // 1 == can detach */
-  unsigned unshackled : 1;         /* 1 == unshackled task */
-  unsigned target : 1;             /* 1 == target task */
-  unsigned reserved : 7;           /* reserved for compiler use */
-  unsigned tasktype : 1;    /* task is either explicit(1) or implicit (0) */
-  unsigned task_serial : 1; // task is executed immediately (1) or deferred (0)
-  unsigned tasking_ser : 1; // all tasks in team are either executed immediately
-  unsigned team_serial : 1; // entire team is serial (1) [1 thread] or parallel
-  unsigned started : 1;     /* 1==started, 0==not started     */
-  unsigned executing : 1;   /* 1==executing, 0==not executing */
-  unsigned complete : 1;    /* 1==complete, 0==not complete   */
-  unsigned freed : 1;       /* 1==freed, 0==allocated        */
-  unsigned native : 1;      /* 1==gcc-compiled task, 0==intel */
-  unsigned reserved31 : 7;  /* reserved for library use */
-} kmp_tasking_flags_t;
-
 typedef enum omp_interop_backend_type_t {
   // reserve 0
   omp_interop_backend_type_cuda_1 = 1,
Index: clang/docs/ReleaseNotes.rst
===================================================================
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -248,6 +248,9 @@
   not satisfied in the event of an instantiation failures in a requires expression's
   parameter list. We previously handled this correctly in a constraint evaluation
   context, but not in a requires clause evaluated as a boolean.
+- Address the thread identification problems in coroutines.
+  `Issue 47177 <https://github.com/llvm/llvm-project/issues/47177>`_
+  `Issue 47179 <https://github.com/llvm/llvm-project/issues/47179>`_
 
 Improvements to Clang's diagnostics
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to