AndreyChurbanov updated this revision to Diff 329461.
AndreyChurbanov added a comment.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Changed the size of task dependences flags from 8 bit to 32 bit, so that 
runtime does not get garbage data in unused bits of the structure, and can use 
flags as an integer where it is more convenient than looking at particular bits.

Also adjusted some tests to clear unused bits of the task dependences structure.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D97085/new/

https://reviews.llvm.org/D97085

Files:
  clang/lib/CodeGen/CGOpenMPRuntime.cpp
  openmp/runtime/src/kmp.h
  openmp/runtime/src/kmp_taskdeps.cpp
  openmp/runtime/src/kmp_taskdeps.h
  openmp/runtime/test/tasking/bug_nested_proxy_task.c
  openmp/runtime/test/tasking/bug_proxy_task_dep_waiting.c
  openmp/runtime/test/tasking/hidden_helper_task/common.h
  openmp/runtime/test/tasking/hidden_helper_task/depend.cpp
  openmp/runtime/test/tasking/hidden_helper_task/gtid.cpp
  openmp/runtime/test/tasking/omp51_task_dep_inoutset.c

Index: openmp/runtime/test/tasking/omp51_task_dep_inoutset.c
===================================================================
--- /dev/null
+++ openmp/runtime/test/tasking/omp51_task_dep_inoutset.c
@@ -0,0 +1,258 @@
+// RUN: %libomp-compile-and-run
+// RUN: %libomp-cxx-compile-and-run
+// UNSUPPORTED: gcc
+
+// Tests OMP 5.0 task dependences "mutexinoutset" and 5.1 "inoutset",
+// emulates compiler codegen for new dep kinds
+// Mutually exclusive tasks get same input dependency info array
+//
+// Task tree created:
+//      task0 - task1 (in)
+//             \
+//        task2 - task3 (inoutset)
+//             /
+//      task3 - task4 (in)
+//           /
+//  task6 <-->task7  (mutexinoutset)
+//       \    /
+//       task8 (in)
+//
+#include <stdio.h>
+#include <omp.h>
+
+#ifdef _WIN32
+#include <windows.h>
+#define mysleep(n) Sleep(n)
+#else
+#include <unistd.h>
+#define mysleep(n) usleep((n)*1000)
+#endif
+
+// to check the # of concurrent tasks (must be 1 for MTX, <3 for other kinds)
+static int volatile checker = 0;
+static int err = 0;
+#ifndef DELAY
+#define DELAY 100
+#endif
+
+// ---------------------------------------------------------------------------
+// internal data to emulate compiler codegen
+typedef struct DEP {
+  size_t addr;
+  size_t len;
+  int flags;
+} dep;
+typedef struct task {
+  void** shareds;
+  void* entry;
+  int part_id;
+  void* destr_thunk;
+  int priority;
+  long long device_id;
+  int f_priv;
+} task_t;
+#define TIED 1
+typedef int(*entry_t)(int, task_t*);
+typedef struct ID {
+  int reserved_1;
+  int flags;
+  int reserved_2;
+  int reserved_3;
+  char *psource;
+} id;
+// thunk routine for tasks with MTX dependency
+int thunk_m(int gtid, task_t* ptask) {
+  int th = omp_get_thread_num();
+  #pragma omp atomic
+    ++checker;
+  printf("task _%d, th %d\n", ptask->f_priv, th);
+  if (checker != 1) { // no more than 1 task at a time
+    err++;
+    printf("Error1, checker %d != 1\n", checker);
+  }
+  mysleep(DELAY);
+  if (checker != 1) { // no more than 1 task at a time
+    err++;
+    printf("Error2, checker %d != 1\n", checker);
+  }
+  #pragma omp atomic
+    --checker;
+  return 0;
+}
+// thunk routine for tasks with inoutset dependency
+int thunk_s(int gtid, task_t* ptask) {
+  int th = omp_get_thread_num();
+  #pragma omp atomic
+    ++checker;
+  printf("task _%d, th %d\n", ptask->f_priv, th);
+  if (checker > 2) { // no more than 2 tasks concurrently
+    err++;
+    printf("Error1, checker %d > 2\n", checker);
+  }
+  mysleep(DELAY);
+  if (checker > 2) { // no more than 2 tasks concurrently
+    err++;
+    printf("Error2, checker %d > 2\n", checker);
+  }
+  #pragma omp atomic
+    --checker;
+  return 0;
+}
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+int __kmpc_global_thread_num(id*);
+extern task_t* __kmpc_omp_task_alloc(id *loc, int gtid, int flags,
+                                     size_t sz, size_t shar, entry_t rtn);
+int
+__kmpc_omp_task_with_deps(id *loc, int gtid, task_t *task, int nd, dep *dep_lst,
+                          int nd_noalias, dep *noalias_dep_lst);
+static id loc = {0, 2, 0, 0, ";file;func;0;0;;"};
+#ifdef __cplusplus
+} // extern "C"
+#endif
+// End of internal data
+// ---------------------------------------------------------------------------
+
+int main()
+{
+  int i1,i2,i3;
+  omp_set_num_threads(4);
+  omp_set_dynamic(0);
+  #pragma omp parallel
+  {
+    #pragma omp single nowait
+    {
+      dep sdep[2];
+      task_t *ptr;
+      int gtid = __kmpc_global_thread_num(&loc);
+      int t = omp_get_thread_num();
+      #pragma omp task depend(in: i1, i2)
+      { int th = omp_get_thread_num();
+        printf("task 0_%d, th %d\n", t, th);
+        #pragma omp atomic
+          ++checker;
+        if (checker > 2) { // no more than 2 tasks concurrently
+          err++;
+          printf("Error1, checker %d > 2\n", checker);
+        }
+        mysleep(DELAY);
+        if (checker > 2) { // no more than 2 tasks concurrently
+          err++;
+          printf("Error1, checker %d > 2\n", checker);
+        }
+        #pragma omp atomic
+          --checker;
+      }
+      #pragma omp task depend(in: i1, i2)
+      { int th = omp_get_thread_num();
+        printf("task 1_%d, th %d\n", t, th);
+        #pragma omp atomic
+          ++checker;
+        if (checker > 2) { // no more than 2 tasks concurrently
+          err++;
+          printf("Error1, checker %d > 2\n", checker);
+        }
+        mysleep(DELAY);
+        if (checker > 2) { // no more than 2 tasks concurrently
+          err++;
+          printf("Error1, checker %d > 2\n", checker);
+        }
+        #pragma omp atomic
+          --checker;
+      }
+// compiler codegen start
+      // task2
+      ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_s);
+      sdep[0].addr = (size_t)&i1;
+      sdep[0].len = 0;   // not used
+      sdep[0].flags = 1; // IN
+      sdep[1].addr = (size_t)&i2;
+      sdep[1].len = 0;   // not used
+      sdep[1].flags = 8; // INOUTSET
+      ptr->f_priv = t + 10; // init single first-private variable
+      __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
+
+      // task3
+      ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_s);
+      ptr->f_priv = t + 20; // init single first-private variable
+      __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
+// compiler codegen end
+      t = omp_get_thread_num();
+      #pragma omp task depend(in: i1, i2)
+      { int th = omp_get_thread_num();
+        printf("task 4_%d, th %d\n", t, th);
+        #pragma omp atomic
+          ++checker;
+        if (checker > 2) { // no more than 2 tasks concurrently
+          err++;
+          printf("Error1, checker %d > 2\n", checker);
+        }
+        mysleep(DELAY);
+        if (checker > 2) { // no more than 2 tasks concurrently
+          err++;
+          printf("Error1, checker %d > 2\n", checker);
+        }
+        #pragma omp atomic
+          --checker;
+      }
+      #pragma omp task depend(in: i1, i2)
+      { int th = omp_get_thread_num();
+        printf("task 5_%d, th %d\n", t, th);
+        #pragma omp atomic
+          ++checker;
+        if (checker > 2) { // no more than 2 tasks concurrently
+          err++;
+          printf("Error1, checker %d > 2\n", checker);
+        }
+        mysleep(DELAY);
+        if (checker > 2) { // no more than 2 tasks concurrently
+          err++;
+          printf("Error1, checker %d > 2\n", checker);
+        }
+        #pragma omp atomic
+          --checker;
+      }
+// compiler codegen start
+      // task6
+      ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m);
+      sdep[0].addr = (size_t)&i1;
+      sdep[0].len = 0;   // not used
+      sdep[0].flags = 4; // MUTEXINOUTSET
+      sdep[1].addr = (size_t)&i3;
+      sdep[1].len = 0;   // not used
+      sdep[1].flags = 4; // MUTEXINOUTSET
+      ptr->f_priv = t + 30; // init single first-private variable
+      __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
+
+      // task7
+      ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m);
+      ptr->f_priv = t + 40; // init single first-private variable
+      __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
+// compiler codegen end
+      #pragma omp task depend(in: i3)
+      { int th = omp_get_thread_num();
+        printf("task 8_%d, th %d\n", t, th);
+        #pragma omp atomic
+          ++checker;
+        if (checker != 1) { // last task should run exclusively
+          err++;
+          printf("Error1, checker %d != 1\n", checker); }
+        mysleep(DELAY);
+        if (checker != 1) { // last task should run exclusively
+          err++;
+          printf("Error1, checker %d != 1\n", checker); }
+        #pragma omp atomic
+          --checker;
+      }
+    } // single
+  } // parallel
+  if (err == 0) {
+    printf("passed\n");
+    return 0;
+  } else {
+    printf("failed\n");
+    return 1;
+  }
+}
Index: openmp/runtime/test/tasking/hidden_helper_task/gtid.cpp
===================================================================
--- openmp/runtime/test/tasking/hidden_helper_task/gtid.cpp
+++ openmp/runtime/test/tasking/hidden_helper_task/gtid.cpp
@@ -81,6 +81,7 @@
 
     kmp_depend_info_t depinfo1;
     depinfo1.base_addr = reinterpret_cast<intptr_t>(&depvar);
+    depinfo1.flag = 0;
     depinfo1.flags.in = 1;
     depinfo1.flags.out = 1;
     depinfo1.len = 4;
@@ -96,6 +97,7 @@
 
     kmp_depend_info_t depinfo2;
     depinfo2.base_addr = reinterpret_cast<intptr_t>(&depvar);
+    depinfo2.flag = 0;
     depinfo2.flags.in = 1;
     depinfo2.flags.out = 1;
     depinfo2.len = 4;
@@ -111,6 +113,7 @@
 
     kmp_depend_info_t depinfo3;
     depinfo3.base_addr = reinterpret_cast<intptr_t>(&depvar);
+    depinfo3.flag = 0;
     depinfo3.flags.in = 1;
     depinfo3.flags.out = 1;
     depinfo3.len = 4;
Index: openmp/runtime/test/tasking/hidden_helper_task/depend.cpp
===================================================================
--- openmp/runtime/test/tasking/hidden_helper_task/depend.cpp
+++ openmp/runtime/test/tasking/hidden_helper_task/depend.cpp
@@ -65,6 +65,7 @@
 
     kmp_depend_info_t depinfo1;
     depinfo1.base_addr = reinterpret_cast<intptr_t>(&data);
+    depinfo1.flag = 0;
     depinfo1.flags.out = 1;
     depinfo1.len = 4;
 
@@ -80,6 +81,7 @@
 
     kmp_depend_info_t depinfo2;
     depinfo2.base_addr = reinterpret_cast<intptr_t>(&data);
+    depinfo2.flag = 0;
     depinfo2.flags.in = 1;
     depinfo2.flags.out = 1;
     depinfo2.len = 4;
@@ -96,6 +98,7 @@
 
     kmp_depend_info_t depinfo3;
     depinfo3.base_addr = reinterpret_cast<intptr_t>(&data);
+    depinfo3.flag = 0;
     depinfo3.flags.in = 1;
     depinfo3.flags.out = 1;
     depinfo3.len = 4;
@@ -112,6 +115,7 @@
 
     kmp_depend_info_t depinfo4;
     depinfo4.base_addr = reinterpret_cast<intptr_t>(&data);
+    depinfo4.flag = 0;
     depinfo4.flags.in = 1;
     depinfo4.flags.out = 1;
     depinfo4.len = 4;
Index: openmp/runtime/test/tasking/hidden_helper_task/common.h
===================================================================
--- openmp/runtime/test/tasking/hidden_helper_task/common.h
+++ openmp/runtime/test/tasking/hidden_helper_task/common.h
@@ -13,11 +13,15 @@
 typedef struct kmp_depend_info {
   kmp_intptr_t base_addr;
   size_t len;
-  struct {
-    bool in : 1;
-    bool out : 1;
-    bool mtx : 1;
-  } flags;
+  union {
+    int flag;
+    struct {
+      unsigned in : 1;
+      unsigned out : 1;
+      unsigned mtx : 1;
+      unsigned reserved : 29; // 32 bits in total
+    } flags;
+  };
 } kmp_depend_info_t;
 
 typedef union kmp_cmplrdata {
Index: openmp/runtime/test/tasking/bug_proxy_task_dep_waiting.c
===================================================================
--- openmp/runtime/test/tasking/bug_proxy_task_dep_waiting.c
+++ openmp/runtime/test/tasking/bug_proxy_task_dep_waiting.c
@@ -38,12 +38,12 @@
 } ident_t;
 
 typedef struct kmp_depend_info {
-     kmp_intptr_t               base_addr;
-     size_t                     len;
-     struct {
-         bool                   in:1;
-         bool                   out:1;
-     } flags;
+  kmp_intptr_t base_addr;
+  size_t len;
+//  struct {
+//    bool in:1;
+//    bool out:1; }
+  int flags;
 } kmp_depend_info_t;
 
 struct kmp_task;
@@ -104,8 +104,7 @@
     dep_info.base_addr = (long) &dep;
     dep_info.len = sizeof(int);
     // out = inout per spec and runtime expects this
-    dep_info.flags.in = 1;
-    dep_info.flags.out = 1;
+    dep_info.flags = 3; // inout
 
     kmp_int32 gtid = __kmpc_global_thread_num(NULL);
     kmp_task_t *proxy_task = __kmpc_omp_task_alloc(NULL,gtid,17,sizeof(kmp_task_t),0,&task_entry);
Index: openmp/runtime/test/tasking/bug_nested_proxy_task.c
===================================================================
--- openmp/runtime/test/tasking/bug_nested_proxy_task.c
+++ openmp/runtime/test/tasking/bug_nested_proxy_task.c
@@ -41,12 +41,12 @@
 } ident_t;
 
 typedef struct kmp_depend_info {
-     kmp_intptr_t               base_addr;
-     size_t                     len;
-     struct {
-         bool                   in:1;
-         bool                   out:1;
-     } flags;
+  kmp_intptr_t base_addr;
+  size_t len;
+//  struct {
+//    bool in:1;
+//    bool out:1; }
+  int flags;
 } kmp_depend_info_t;
 
 struct kmp_task;
@@ -109,8 +109,7 @@
     dep_info.base_addr = (long) &dep;
     dep_info.len = sizeof(int);
     // out = inout per spec and runtime expects this
-    dep_info.flags.in = 1;
-    dep_info.flags.out = 1;
+    dep_info.flags = 3; // inout
 
     kmp_int32 gtid = __kmpc_global_thread_num(NULL);
     kmp_task_t *proxy_task = __kmpc_omp_task_alloc(NULL,gtid,17,sizeof(kmp_task_t),0,&task_entry);
Index: openmp/runtime/src/kmp_taskdeps.h
===================================================================
--- openmp/runtime/src/kmp_taskdeps.h
+++ openmp/runtime/src/kmp_taskdeps.h
@@ -58,8 +58,8 @@
       kmp_dephash_entry_t *next;
       for (kmp_dephash_entry_t *entry = h->buckets[i]; entry; entry = next) {
         next = entry->next_in_bucket;
-        __kmp_depnode_list_free(thread, entry->last_ins);
-        __kmp_depnode_list_free(thread, entry->last_mtxs);
+        __kmp_depnode_list_free(thread, entry->last_set);
+        __kmp_depnode_list_free(thread, entry->prev_set);
         __kmp_node_deref(thread, entry->last_out);
         if (entry->mtx_lock) {
           __kmp_destroy_lock(entry->mtx_lock);
Index: openmp/runtime/src/kmp_taskdeps.cpp
===================================================================
--- openmp/runtime/src/kmp_taskdeps.cpp
+++ openmp/runtime/src/kmp_taskdeps.cpp
@@ -149,9 +149,6 @@
   return h;
 }
 
-#define ENTRY_LAST_INS 0
-#define ENTRY_LAST_MTXS 1
-
 static kmp_dephash_entry *__kmp_dephash_find(kmp_info_t *thread,
                                              kmp_dephash_t **hash,
                                              kmp_intptr_t addr) {
@@ -178,9 +175,9 @@
 #endif
     entry->addr = addr;
     entry->last_out = NULL;
-    entry->last_ins = NULL;
-    entry->last_mtxs = NULL;
-    entry->last_flag = ENTRY_LAST_INS;
+    entry->last_set = NULL;
+    entry->prev_set = NULL;
+    entry->last_flag = 0;
     entry->mtx_lock = NULL;
     entry->next_in_bucket = h->buckets[bucket];
     h->buckets[bucket] = entry;
@@ -313,96 +310,81 @@
     kmp_dephash_entry_t *info =
         __kmp_dephash_find(thread, hash, dep->base_addr);
     kmp_depnode_t *last_out = info->last_out;
-    kmp_depnode_list_t *last_ins = info->last_ins;
-    kmp_depnode_list_t *last_mtxs = info->last_mtxs;
-
-    if (dep->flags.out) { // out --> clean lists of ins and mtxs if any
-      if (last_ins || last_mtxs) {
-        if (info->last_flag == ENTRY_LAST_INS) { // INS were last
-          npredecessors +=
-              __kmp_depnode_link_successor(gtid, thread, task, node, last_ins);
-        } else { // MTXS were last
-          npredecessors +=
-              __kmp_depnode_link_successor(gtid, thread, task, node, last_mtxs);
-        }
-        __kmp_depnode_list_free(thread, last_ins);
-        __kmp_depnode_list_free(thread, last_mtxs);
-        info->last_ins = NULL;
-        info->last_mtxs = NULL;
+    kmp_depnode_list_t *last_set = info->last_set;
+    kmp_depnode_list_t *prev_set = info->prev_set;
+
+    if (dep->flags.out) { // out or inout --> clean lists if any
+      if (last_set) {
+        npredecessors +=
+            __kmp_depnode_link_successor(gtid, thread, task, node, last_set);
+        __kmp_depnode_list_free(thread, last_set);
+        __kmp_depnode_list_free(thread, prev_set);
+        info->last_set = NULL;
+        info->prev_set = NULL;
+        info->last_flag = 0; // no sets in this dephash entry
       } else {
         npredecessors +=
             __kmp_depnode_link_successor(gtid, thread, task, node, last_out);
       }
       __kmp_node_deref(thread, last_out);
-      if (dep_barrier) {
+      if (!dep_barrier) {
+        info->last_out = __kmp_node_ref(node);
+      } else {
         // if this is a sync point in the serial sequence, then the previous
         // outputs are guaranteed to be completed after the execution of this
         // task so the previous output nodes can be cleared.
         info->last_out = NULL;
-      } else {
-        info->last_out = __kmp_node_ref(node);
       }
-    } else if (dep->flags.in) {
-      // in --> link node to either last_out or last_mtxs, clean earlier deps
-      if (last_mtxs) {
-        npredecessors +=
-            __kmp_depnode_link_successor(gtid, thread, task, node, last_mtxs);
-        __kmp_node_deref(thread, last_out);
-        info->last_out = NULL;
-        if (info->last_flag == ENTRY_LAST_MTXS && last_ins) { // MTXS were last
-          // clean old INS before creating new list
-          __kmp_depnode_list_free(thread, last_ins);
-          info->last_ins = NULL;
-        }
-      } else {
+    } else { // either IN or MTX or SET
+      if (info->last_flag == 0 || info->last_flag == dep->flag) {
+        // last_set either didn't exist or of same dep kind
         // link node as successor of the last_out if any
         npredecessors +=
             __kmp_depnode_link_successor(gtid, thread, task, node, last_out);
-      }
-      info->last_flag = ENTRY_LAST_INS;
-      info->last_ins = __kmp_add_node(thread, info->last_ins, node);
-    } else {
-      KMP_DEBUG_ASSERT(dep->flags.mtx == 1);
-      // mtx --> link node to either last_out or last_ins, clean earlier deps
-      if (last_ins) {
+        // link node as successor of all nodes in the prev_set if any
+        npredecessors +=
+            __kmp_depnode_link_successor(gtid, thread, task, node, prev_set);
+      } else { // last_set is of different dep kind, make it prev_set
+        // link node as successor of all nodes in the last_set
         npredecessors +=
-            __kmp_depnode_link_successor(gtid, thread, task, node, last_ins);
+            __kmp_depnode_link_successor(gtid, thread, task, node, last_set);
+        // clean last_out if any
         __kmp_node_deref(thread, last_out);
         info->last_out = NULL;
-        if (info->last_flag == ENTRY_LAST_INS && last_mtxs) { // INS were last
-          // clean old MTXS before creating new list
-          __kmp_depnode_list_free(thread, last_mtxs);
-          info->last_mtxs = NULL;
-        }
-      } else {
-        // link node as successor of the last_out if any
-        npredecessors +=
-            __kmp_depnode_link_successor(gtid, thread, task, node, last_out);
-      }
-      info->last_flag = ENTRY_LAST_MTXS;
-      info->last_mtxs = __kmp_add_node(thread, info->last_mtxs, node);
-      if (info->mtx_lock == NULL) {
-        info->mtx_lock = (kmp_lock_t *)__kmp_allocate(sizeof(kmp_lock_t));
-        __kmp_init_lock(info->mtx_lock);
+        // clean prev_set if any
+        __kmp_depnode_list_free(thread, prev_set);
+        // move last_set to prev_set, new last_set will be allocated
+        info->prev_set = last_set;
+        info->last_set = NULL;
       }
-      KMP_DEBUG_ASSERT(node->dn.mtx_num_locks < MAX_MTX_DEPS);
-      kmp_int32 m;
-      // Save lock in node's array
-      for (m = 0; m < MAX_MTX_DEPS; ++m) {
-        // sort pointers in decreasing order to avoid potential livelock
-        if (node->dn.mtx_locks[m] < info->mtx_lock) {
-          KMP_DEBUG_ASSERT(node->dn.mtx_locks[node->dn.mtx_num_locks] == NULL);
-          for (int n = node->dn.mtx_num_locks; n > m; --n) {
-            // shift right all lesser non-NULL pointers
-            KMP_DEBUG_ASSERT(node->dn.mtx_locks[n - 1] != NULL);
-            node->dn.mtx_locks[n] = node->dn.mtx_locks[n - 1];
+      info->last_flag = dep->flag; // store dep kind of the last_set
+      info->last_set = __kmp_add_node(thread, info->last_set, node);
+
+      // check if we are processing MTX dependency
+      if (dep->flag == KMP_DEP_MTX) {
+        if (info->mtx_lock == NULL) {
+          info->mtx_lock = (kmp_lock_t *)__kmp_allocate(sizeof(kmp_lock_t));
+          __kmp_init_lock(info->mtx_lock);
+        }
+        KMP_DEBUG_ASSERT(node->dn.mtx_num_locks < MAX_MTX_DEPS);
+        kmp_int32 m;
+        // Save lock in node's array
+        for (m = 0; m < MAX_MTX_DEPS; ++m) {
+          // sort pointers in decreasing order to avoid potential livelock
+          if (node->dn.mtx_locks[m] < info->mtx_lock) {
+            KMP_DEBUG_ASSERT(!node->dn.mtx_locks[node->dn.mtx_num_locks]);
+            for (int n = node->dn.mtx_num_locks; n > m; --n) {
+              // shift right all lesser non-NULL pointers
+              KMP_DEBUG_ASSERT(node->dn.mtx_locks[n - 1] != NULL);
+              node->dn.mtx_locks[n] = node->dn.mtx_locks[n - 1];
+            }
+            node->dn.mtx_locks[m] = info->mtx_lock;
+            break;
           }
-          node->dn.mtx_locks[m] = info->mtx_lock;
-          break;
         }
+        KMP_DEBUG_ASSERT(m < MAX_MTX_DEPS); // must break from loop
+        node->dn.mtx_num_locks++;
       }
-      KMP_DEBUG_ASSERT(m < MAX_MTX_DEPS); // must break from loop
-      node->dn.mtx_num_locks++;
     }
   }
   KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d found %d predecessors\n", filter,
@@ -433,27 +415,25 @@
   // TODO: Different algorithm for large dep_list ( > 10 ? )
   for (i = 0; i < ndeps; i++) {
     if (dep_list[i].base_addr != 0) {
+      KMP_DEBUG_ASSERT(
+          dep_list[i].flag == KMP_DEP_IN || dep_list[i].flag == KMP_DEP_OUT ||
+          dep_list[i].flag == KMP_DEP_INOUT ||
+          dep_list[i].flag == KMP_DEP_MTX || dep_list[i].flag == KMP_DEP_SET);
       for (int j = i + 1; j < ndeps; j++) {
         if (dep_list[i].base_addr == dep_list[j].base_addr) {
-          dep_list[i].flags.in |= dep_list[j].flags.in;
-          dep_list[i].flags.out |=
-              (dep_list[j].flags.out ||
-               (dep_list[i].flags.in && dep_list[j].flags.mtx) ||
-               (dep_list[i].flags.mtx && dep_list[j].flags.in));
-          dep_list[i].flags.mtx =
-              dep_list[i].flags.mtx | dep_list[j].flags.mtx &&
-              !dep_list[i].flags.out;
+          if (dep_list[i].flag != dep_list[j].flag) {
+            // two different dependences on same address work identical to OUT
+            dep_list[i].flag = KMP_DEP_OUT;
+          }
           dep_list[j].base_addr = 0; // Mark j element as void
         }
       }
-      if (dep_list[i].flags.mtx) {
+      if (dep_list[i].flag == KMP_DEP_MTX) {
         // limit number of mtx deps to MAX_MTX_DEPS per node
         if (n_mtxs < MAX_MTX_DEPS && task != NULL) {
           ++n_mtxs;
         } else {
-          dep_list[i].flags.in = 1; // downgrade mutexinoutset to inout
-          dep_list[i].flags.out = 1;
-          dep_list[i].flags.mtx = 0;
+          dep_list[i].flag = KMP_DEP_OUT; // downgrade mutexinoutset to inout
         }
       }
     }
Index: openmp/runtime/src/kmp.h
===================================================================
--- openmp/runtime/src/kmp.h
+++ openmp/runtime/src/kmp.h
@@ -2260,15 +2260,24 @@
 typedef struct kmp_depnode_list kmp_depnode_list_t;
 typedef struct kmp_dephash_entry kmp_dephash_entry_t;
 
+#define KMP_DEP_IN 0x1
+#define KMP_DEP_OUT 0x2
+#define KMP_DEP_INOUT 0x3
+#define KMP_DEP_MTX 0x4
+#define KMP_DEP_SET 0x8
 // Compiler sends us this info:
 typedef struct kmp_depend_info {
   kmp_intptr_t base_addr;
   size_t len;
-  struct {
-    bool in : 1;
-    bool out : 1;
-    bool mtx : 1;
-  } flags;
+  union {
+    kmp_uint32 flag;
+    struct {
+      unsigned in : 1;
+      unsigned out : 1;
+      unsigned mtx : 1;
+      unsigned set : 1;
+    } flags;
+  };
 } kmp_depend_info_t;
 
 // Internal structures to work with task dependencies:
@@ -2302,8 +2311,8 @@
 struct kmp_dephash_entry {
   kmp_intptr_t addr;
   kmp_depnode_t *last_out;
-  kmp_depnode_list_t *last_ins;
-  kmp_depnode_list_t *last_mtxs;
+  kmp_depnode_list_t *last_set;
+  kmp_depnode_list_t *prev_set;
   kmp_int32 last_flag;
   kmp_lock_t *mtx_lock; /* is referenced by depnodes w/mutexinoutset dep */
   kmp_dephash_entry_t *next_in_bucket;
Index: clang/lib/CodeGen/CGOpenMPRuntime.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -4573,7 +4573,7 @@
 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
                            QualType &FlagsTy) {
-  FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
+  FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
   if (KmpDependInfoTy.isNull()) {
     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
     KmpDependInfoRD->startDefinition();
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to