Hi Jakub,
This patch was reduced a bit and most of your comments were considered in the
last submission of the environment variable syntax extension patch
(https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599175.html). This patch
also builds on that envvar patch version.
The nteams-var related content was moved from this patch to the envvar patch as
that is closely connected. However, additional testing and testing of copy back
device-specific nteams-var ICV values is still included in this patch together
with the teams-thread-limit-var content.
--- a/gcc/gimplify.cc
+++ b/gcc/gimplify.cc
@@ -13994,7 +13994,7 @@ optimize_target_teams (tree target, gimple_seq *pre_p)
struct gimplify_omp_ctx *target_ctx = gimplify_omp_ctxp;
if (teams == NULL_TREE)
- num_teams_upper = integer_one_node;
+ num_teams_upper = integer_minus_two_node;
No, please don't introduce this, it is quite costly to have a GC trees
like integer_one_node, so they should stay for the most commonly used
numbers, -2 isn't like that. Just build_int_cst (integer_type_node, -2).
integer_minus_two_node was replaced by "build_int_cst (integer_type_node, -2)".
--- a/gcc/tree-core.h
+++ b/gcc/tree-core.h
@@ -642,6 +642,7 @@ enum tree_index {
TI_INTEGER_ONE,
TI_INTEGER_THREE,
TI_INTEGER_MINUS_ONE,
+ TI_INTEGER_MINUS_TWO,
TI_NULL_POINTER,
TI_SIZE_ZERO,
diff --git a/gcc/tree.cc b/gcc/tree.cc
index 8f83ea1..8cb474d 100644
--- a/gcc/tree.cc
+++ b/gcc/tree.cc
@@ -9345,6 +9345,7 @@ build_common_tree_nodes (bool signed_char)
integer_one_node = build_int_cst (integer_type_node, 1);
integer_three_node = build_int_cst (integer_type_node, 3);
integer_minus_one_node = build_int_cst (integer_type_node, -1);
+ integer_minus_two_node = build_int_cst (integer_type_node, -2);
size_zero_node = size_int (0);
size_one_node = size_int (1);
diff --git a/gcc/tree.h b/gcc/tree.h
index cea49a5..1aeb009 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -4206,6 +4206,7 @@ tree_strip_any_location_wrapper (tree exp)
#define integer_one_node global_trees[TI_INTEGER_ONE]
#define integer_three_node global_trees[TI_INTEGER_THREE]
#define integer_minus_one_node global_trees[TI_INTEGER_MINUS_ONE]
+#define integer_minus_two_node global_trees[TI_INTEGER_MINUS_TWO]
#define size_zero_node global_trees[TI_SIZE_ZERO]
#define size_one_node global_trees[TI_SIZE_ONE]
#define bitsize_zero_node global_trees[TI_BITSIZE_ZERO]
And drop the above 3 hunks.
Removed.
--- a/libgomp/config/gcn/icv-device.c
+++ b/libgomp/config/gcn/icv-device.c
@@ -37,6 +37,7 @@ volatile int GOMP_DEFAULT_DEVICE_VAR;
volatile int GOMP_MAX_ACTIVE_LEVELS_VAR;
volatile omp_proc_bind_t GOMP_BIND_VAR;
volatile int GOMP_NTEAMS_VAR;
+volatile int GOMP_TEAMS_THREAD_LIMIT_VAR;
I really don't like this copying of individual ICVs one by one to the
device, copy a struct containing them and access fields in that struct.
I recently changed this in
https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599175.html. So there is
one struct containing all ICVs that are copied from host to the device and back.
--- a/libgomp/libgomp-plugin.h
+++ b/libgomp/libgomp-plugin.h
@@ -116,6 +116,7 @@ struct addr_pair
#define GOMP_MAX_ACTIVE_LEVELS_VAR __gomp_max_active_levels
#define GOMP_BIND_VAR __gomp_bind
#define GOMP_NTEAMS_VAR __gomp_nteams
+#define GOMP_TEAMS_THREAD_LIMIT_VAR __gomp_teams_thread_limit_var
Likewise here.
Those were all removed.
@@ -527,13 +538,19 @@ struct gomp_icv_list {
extern void *gomp_get_icv_value_ptr (struct gomp_icv_list **list,
int device_num);
-extern struct gomp_icv_list *gomp_run_sched_var_dev_list;
-extern struct gomp_icv_list *gomp_run_sched_chunk_size_dev_list;
+extern struct gomp_icv_list* gomp_add_device_specific_icv (int dev_num,
+ size_t size,
+ struct gomp_icv_list
**list);
+extern struct gomp_icv_list *gomp_initial_run_sched_var_dev_list;
+extern struct gomp_icv_list *gomp_initial_run_sched_chunk_size_dev_list;
+extern struct gomp_icv_list *gomp_initial_max_active_levels_var_dev_list;
+extern struct gomp_icv_list *gomp_initial_proc_bind_var_dev_list;
+extern struct gomp_icv_list *gomp_initial_proc_bind_var_list_dev_list;
+extern struct gomp_icv_list *gomp_initial_proc_bind_var_list_len_dev_list;
+extern struct gomp_icv_list *gomp_initial_nteams_var_dev_list;
+
extern struct gomp_icv_list *gomp_nteams_var_dev_list;
-extern struct gomp_icv_list *gomp_max_active_levels_var_dev_list;
-extern struct gomp_icv_list *gomp_proc_bind_var_dev_list;
-extern struct gomp_icv_list *gomp_proc_bind_var_list_dev_list;
-extern struct gomp_icv_list *gomp_proc_bind_var_list_len_dev_list;
+extern struct gomp_icv_list *gomp_teams_thread_limit_var_dev_list;
Nor these per-var lists. For a specific device, walk the list with
all the vars in it, start with the most specific (matching dev number),
then just dev and then all and fill in from it what is going to be copied.
The above lists were removed and instead one list for device-specific ICV
structs was introduced in the above mentioned patch.
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -572,7 +572,8 @@ static char *GOMP_ICV_STRINGS[] =
XSTRING (GOMP_DYN_VAR),
XSTRING (GOMP_MAX_ACTIVE_LEVELS_VAR),
XSTRING (GOMP_BIND_VAR),
- XSTRING (GOMP_NTEAMS_VAR)
+ XSTRING (GOMP_NTEAMS_VAR),
+ XSTRING (GOMP_TEAMS_THREAD_LIMIT_VAR)
Then you don't need to e.g. track the names of the individual vars, just
one for the whole ICV block.
That array was also removed.
The patch was tested on x86_64-linux with nvptx and
amdgcn offloading without regression.
Marcel
-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht
München, HRB 106955
This patch adds support for omp_get_max_teams, omp_set_num_teams, and
omp_{gs}et_teams_thread_limit on offload devices. That includes the usage of
device-specific ICV values (specified as environment variables or changed on a
device). In order to reuse device-specific ICV values, a copy back mechanism is
implemented that copies ICV values back from device to the host.
gcc/ChangeLog:
* gimplify.cc (optimize_target_teams): Set initial num_teams_upper
to "-2" instead of "1" for non-existing num_teams clause in order to
disambiguate from the case of an existing num_teams clause with value 1.
libgomp/ChangeLog:
* config/gcn/icv-device.c (omp_get_teams_thread_limit): Added to
allow processing of device-specific values.
(omp_set_teams_thread_limit): Likewise.
(ialias): Likewise.
* config/nvptx/icv-device.c (omp_get_teams_thread_limit): Likewise.
(omp_set_teams_thread_limit): Likewise.
(ialias): Likewise.
* icv-device.c (omp_get_teams_thread_limit): Likewise.
(ialias): Likewise.
(omp_set_teams_thread_limit): Likewise.
* icv.c (omp_set_teams_thread_limit): Removed.
(omp_get_teams_thread_limit): Likewise.
(ialias): Likewise.
* target.c (get_gomp_offload_icvs): Added teams_thread_limit_var
handling.
(gomp_load_image_to_device): Added a size check for the ICVs struct
variable.
(gomp_copy_back_icvs): New function that is used in GOMP_target_ext to
copy back the ICV values from device to host.
(GOMP_target_ext): Update the number of teams and threads in the kernel
args also considering device-specific values.
* testsuite/libgomp.c-c++-common/icv-4.c: Bugfix.
* testsuite/libgomp.c-c++-common/icv-5.c: Extended.
* testsuite/libgomp.c-c++-common/icv-6.c: Extended.
* testsuite/libgomp.c-c++-common/icv-7.c: Extended.
* testsuite/libgomp.c-c++-common/icv-8.c: Extended.
* testsuite/libgomp.c-c++-common/icv-9.c: New test.
* testsuite/libgomp.fortran/icv-5.f90: New test.
* testsuite/libgomp.fortran/icv-6.f90: New test.
gcc/testsuite/ChangeLog:
* c-c++-common/gomp/target-teams-1.c: Adapt expected values for
num_teams from "1" to "-2" in cases without num_teams clause.
* g++.dg/gomp/target-teams-1.C: Likewise.
* gfortran.dg/gomp/defaultmap-4.f90: Likewise.
* gfortran.dg/gomp/defaultmap-5.f90: Likewise.
* gfortran.dg/gomp/defaultmap-6.f90: Likewise.
diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
index 2ac7ca0..468fc2b 100644
--- a/gcc/gimplify.cc
+++ b/gcc/gimplify.cc
@@ -14103,7 +14103,7 @@ optimize_target_teams (tree target, gimple_seq *pre_p)
struct gimplify_omp_ctx *target_ctx = gimplify_omp_ctxp;
if (teams == NULL_TREE)
- num_teams_upper = integer_one_node;
+ num_teams_upper = build_int_cst (integer_type_node, -2);
else
for (c = OMP_TEAMS_CLAUSES (teams); c; c = OMP_CLAUSE_CHAIN (c))
{
diff --git a/gcc/testsuite/c-c++-common/gomp/target-teams-1.c
b/gcc/testsuite/c-c++-common/gomp/target-teams-1.c
index 51b8d48..74d60e1 100644
--- a/gcc/testsuite/c-c++-common/gomp/target-teams-1.c
+++ b/gcc/testsuite/c-c++-common/gomp/target-teams-1.c
@@ -81,5 +81,5 @@ foo (int a, int b, long c, long d)
/* { dg-final { scan-tree-dump-times "thread_limit\\(-1\\)" 3 "gimple" } } */
/* { dg-final { scan-tree-dump-times "num_teams\\(0\\)" 4 "gimple" } } */
/* { dg-final { scan-tree-dump-times "thread_limit\\(0\\)" 6 "gimple" } } */
-/* { dg-final { scan-tree-dump-times "num_teams\\(1\\)" 2 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "num_teams\\(-2\\)" 2 "gimple" } } */
/* { dg-final { scan-tree-dump-times "thread_limit\\(1\\)" 0 "gimple" } } */
diff --git a/gcc/testsuite/g++.dg/gomp/target-teams-1.C
b/gcc/testsuite/g++.dg/gomp/target-teams-1.C
index f78a608..29e5597 100644
--- a/gcc/testsuite/g++.dg/gomp/target-teams-1.C
+++ b/gcc/testsuite/g++.dg/gomp/target-teams-1.C
@@ -88,5 +88,5 @@ foo (int a, int b, long c, long d)
/* { dg-final { scan-tree-dump-times "thread_limit\\(-1\\)" 3 "gimple" } } */
/* { dg-final { scan-tree-dump-times "num_teams\\(0\\)" 4 "gimple" } } */
/* { dg-final { scan-tree-dump-times "thread_limit\\(0\\)" 6 "gimple" } } */
-/* { dg-final { scan-tree-dump-times "num_teams\\(1\\)" 2 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "num_teams\\(-2\\)" 2 "gimple" } } */
/* { dg-final { scan-tree-dump-times "thread_limit\\(1\\)" 0 "gimple" } } */
diff --git a/gcc/testsuite/gfortran.dg/gomp/defaultmap-4.f90
b/gcc/testsuite/gfortran.dg/gomp/defaultmap-4.f90
index 7b182b5..9081159 100644
--- a/gcc/testsuite/gfortran.dg/gomp/defaultmap-4.f90
+++ b/gcc/testsuite/gfortran.dg/gomp/defaultmap-4.f90
@@ -141,5 +141,5 @@ end
! { dg-final { scan-tree-dump-times "map\\(to:\\.strxparr \\\[len:" 2 "gimple"
} }
! { dg-final { scan-tree-dump-times "map\\(to:strxparr \\\[pointer set, len:"
2 "gimple" } }
! { dg-final { scan-tree-dump-times "map\\(to:\\.strxp \\\[len:" 2 "gimple" } }
-! { dg-final { scan-tree-dump-times "#pragma omp target num_teams\\(1\\)
thread_limit\\(0\\) defaultmap\\(alloc\\)" 1 "gimple" } }
-! { dg-final { scan-tree-dump-times "#pragma omp target num_teams\\(1\\)
thread_limit\\(0\\) defaultmap\\(alloc:scalar\\) defaultmap\\(to:aggregate\\)
defaultmap\\(tofrom:allocatable\\) defaultmap\\(firstprivate:pointer\\)" 1
"gimple" } }
+! { dg-final { scan-tree-dump-times "#pragma omp target num_teams\\(-2\\)
thread_limit\\(0\\) defaultmap\\(alloc\\)" 1 "gimple" } }
+! { dg-final { scan-tree-dump-times "#pragma omp target num_teams\\(-2\\)
thread_limit\\(0\\) defaultmap\\(alloc:scalar\\) defaultmap\\(to:aggregate\\)
defaultmap\\(tofrom:allocatable\\) defaultmap\\(firstprivate:pointer\\)" 1
"gimple" } }
diff --git a/gcc/testsuite/gfortran.dg/gomp/defaultmap-5.f90
b/gcc/testsuite/gfortran.dg/gomp/defaultmap-5.f90
index 1391274..91566ed 100644
--- a/gcc/testsuite/gfortran.dg/gomp/defaultmap-5.f90
+++ b/gcc/testsuite/gfortran.dg/gomp/defaultmap-5.f90
@@ -141,5 +141,5 @@ end
! { dg-final { scan-tree-dump-times "map\\(to:strxparr \\\[pointer set, len:"
2 "gimple" } }
! { dg-final { scan-tree-dump-times "map\\(to:\\*strxp \\\[len:" 1 "gimple" } }
! { dg-final { scan-tree-dump-times "map\\(to:\\.strxp \\\[len:" 1 "gimple" } }
-! { dg-final { scan-tree-dump-times "#pragma omp target num_teams\\(1\\)
thread_limit\\(0\\) defaultmap\\(to\\)" 1 "gimple" } }
-! { dg-final { scan-tree-dump-times "#pragma omp target num_teams\\(1\\)
thread_limit\\(0\\) defaultmap\\(to:scalar\\) defaultmap\\(tofrom:aggregate\\)
defaultmap\\(firstprivate:allocatable\\) defaultmap\\(default:pointer\\)" 1
"gimple" } }
+! { dg-final { scan-tree-dump-times "#pragma omp target num_teams\\(-2\\)
thread_limit\\(0\\) defaultmap\\(to\\)" 1 "gimple" } }
+! { dg-final { scan-tree-dump-times "#pragma omp target num_teams\\(-2\\)
thread_limit\\(0\\) defaultmap\\(to:scalar\\) defaultmap\\(tofrom:aggregate\\)
defaultmap\\(firstprivate:allocatable\\) defaultmap\\(default:pointer\\)" 1
"gimple" } }
diff --git a/gcc/testsuite/gfortran.dg/gomp/defaultmap-6.f90
b/gcc/testsuite/gfortran.dg/gomp/defaultmap-6.f90
index 9a81d0f..867e41a 100644
--- a/gcc/testsuite/gfortran.dg/gomp/defaultmap-6.f90
+++ b/gcc/testsuite/gfortran.dg/gomp/defaultmap-6.f90
@@ -101,4 +101,4 @@ end
! { dg-final { scan-tree-dump-times "map\\(to:\\.strxparr \\\[len:" 1 "gimple"
} }
! { dg-final { scan-tree-dump-times "map\\(to:strxparr \\\[pointer set, len:"
1 "gimple" } }
! { dg-final { scan-tree-dump-times "map\\(to:\\.strxp \\\[len:" 1 "gimple" } }
-! { dg-final { scan-tree-dump-times "#pragma omp target num_teams\\(1\\)
thread_limit\\(0\\) defaultmap\\(default\\)" 1 "gimple" } }
+! { dg-final { scan-tree-dump-times "#pragma omp target num_teams\\(-2\\)
thread_limit\\(0\\) defaultmap\\(default\\)" 1 "gimple" } }
diff --git a/libgomp/config/gcn/icv-device.c b/libgomp/config/gcn/icv-device.c
index bf757ba..eb68881 100644
--- a/libgomp/config/gcn/icv-device.c
+++ b/libgomp/config/gcn/icv-device.c
@@ -81,6 +81,19 @@ omp_set_num_teams (int num_teams)
GOMP_ADDITIONAL_ICVS.nteams = num_teams;
}
+int
+omp_get_teams_thread_limit (void)
+{
+ return GOMP_ADDITIONAL_ICVS.teams_thread_limit;
+}
+
+void
+omp_set_teams_thread_limit (int thread_limit)
+{
+ if (thread_limit >= 0)
+ GOMP_ADDITIONAL_ICVS.teams_thread_limit = thread_limit;
+}
+
ialias (omp_set_default_device)
ialias (omp_get_default_device)
ialias (omp_get_initial_device)
@@ -89,3 +102,5 @@ ialias (omp_is_initial_device)
ialias (omp_get_device_num)
ialias (omp_get_max_teams)
ialias (omp_set_num_teams)
+ialias (omp_get_teams_thread_limit)
+ialias (omp_set_teams_thread_limit)
diff --git a/libgomp/config/nvptx/icv-device.c
b/libgomp/config/nvptx/icv-device.c
index 6f869be..a3f00cf 100644
--- a/libgomp/config/nvptx/icv-device.c
+++ b/libgomp/config/nvptx/icv-device.c
@@ -81,6 +81,19 @@ omp_set_num_teams (int num_teams)
GOMP_ADDITIONAL_ICVS.nteams = num_teams;
}
+int
+omp_get_teams_thread_limit (void)
+{
+ return GOMP_ADDITIONAL_ICVS.teams_thread_limit;
+}
+
+void
+omp_set_teams_thread_limit (int thread_limit)
+{
+ if (thread_limit >= 0)
+ GOMP_ADDITIONAL_ICVS.teams_thread_limit = thread_limit;
+}
+
ialias (omp_set_default_device)
ialias (omp_get_default_device)
ialias (omp_get_initial_device)
@@ -89,3 +102,5 @@ ialias (omp_is_initial_device)
ialias (omp_get_device_num)
ialias (omp_get_max_teams)
ialias (omp_set_num_teams)
+ialias (omp_get_teams_thread_limit)
+ialias (omp_set_teams_thread_limit)
diff --git a/libgomp/icv-device.c b/libgomp/icv-device.c
index d8acf0e..48607ce 100644
--- a/libgomp/icv-device.c
+++ b/libgomp/icv-device.c
@@ -97,3 +97,20 @@ omp_set_num_teams (int num_teams)
}
ialias (omp_set_num_teams)
+
+int
+omp_get_teams_thread_limit (void)
+{
+ return gomp_teams_thread_limit_var;
+}
+
+ialias (omp_get_teams_thread_limit)
+
+void
+omp_set_teams_thread_limit (int thread_limit)
+{
+ if (thread_limit >= 0)
+ gomp_teams_thread_limit_var = thread_limit;
+}
+
+ialias (omp_set_teams_thread_limit)
diff --git a/libgomp/icv.c b/libgomp/icv.c
index df423c0..9aef91c 100644
--- a/libgomp/icv.c
+++ b/libgomp/icv.c
@@ -148,19 +148,6 @@ omp_get_supported_active_levels (void)
return gomp_supported_active_levels;
}
-void
-omp_set_teams_thread_limit (int thread_limit)
-{
- if (thread_limit >= 0)
- gomp_teams_thread_limit_var = thread_limit;
-}
-
-int
-omp_get_teams_thread_limit (void)
-{
- return gomp_teams_thread_limit_var;
-}
-
int
omp_get_cancellation (void)
{
@@ -261,8 +248,6 @@ ialias (omp_get_thread_limit)
ialias (omp_set_max_active_levels)
ialias (omp_get_max_active_levels)
ialias (omp_get_supported_active_levels)
-ialias (omp_set_teams_thread_limit)
-ialias (omp_get_teams_thread_limit)
ialias (omp_get_cancellation)
ialias (omp_get_proc_bind)
ialias (omp_get_max_task_priority)
diff --git a/libgomp/target.c b/libgomp/target.c
index 1624938..6160deb 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -2143,6 +2143,19 @@ get_gomp_offload_icvs (int dev_num)
new->icvs.nteams = gomp_default_icv_values.nteams_var;
if (dev_x != NULL
+ && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
+ new->icvs.teams_thread_limit = dev_x->icvs.teams_thread_limit_var;
+ else if (dev != NULL
+ && gomp_get_icv_flag (dev->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
+ new->icvs.teams_thread_limit = dev->icvs.teams_thread_limit_var;
+ else if (all != NULL
+ && gomp_get_icv_flag (all->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
+ new->icvs.teams_thread_limit = all->icvs.teams_thread_limit_var;
+ else
+ new->icvs.teams_thread_limit
+ = gomp_default_icv_values.teams_thread_limit_var;
+
+ if (dev_x != NULL
&& gomp_get_icv_flag (dev_x->flags, GOMP_ICV_DEFAULT_DEVICE))
new->icvs.default_device = dev_x->icvs.default_device_var;
else if (dev != NULL
@@ -2278,24 +2291,31 @@ gomp_load_image_to_device (struct gomp_device_descr
*devicep, unsigned version,
int dev_num = (int) (devicep - &devices[0]);
struct gomp_offload_icvs *icvs = get_gomp_offload_icvs (dev_num);
size_t var_size = var->end - var->start;
-
+ if (var_size != sizeof (struct gomp_offload_icvs))
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ if (is_register_lock)
+ gomp_mutex_unlock (®ister_lock);
+ gomp_fatal ("offload plugin managed 'icv struct' not of expected "
+ "format");
+ }
/* Copy the ICVs variable to place on device memory, hereby
actually designating its device number into effect. */
gomp_copy_host2dev (devicep, NULL, (void *) var->start, icvs,
var_size, false, NULL);
- splay_tree_key k = &array->key;
- k->host_start = (uintptr_t) icvs;
- k->host_end =
- k->host_start + (size_mask & sizeof (struct gomp_offload_icvs));
- k->tgt = tgt;
- k->tgt_offset = var->start;
- k->refcount = REFCOUNT_INFINITY;
- k->dynamic_refcount = 0;
- k->aux = NULL;
- array->left = NULL;
- array->right = NULL;
- splay_tree_insert (&devicep->mem_map, array);
- array++;
+ splay_tree_key k = &array->key;
+ k->host_start = (uintptr_t) icvs;
+ k->host_end =
+ k->host_start + (size_mask & sizeof (struct gomp_offload_icvs));
+ k->tgt = tgt;
+ k->tgt_offset = var->start;
+ k->refcount = REFCOUNT_INFINITY;
+ k->dynamic_refcount = 0;
+ k->aux = NULL;
+ array->left = NULL;
+ array->right = NULL;
+ splay_tree_insert (&devicep->mem_map, array);
+ array++;
}
}
@@ -2757,6 +2777,20 @@ clear_unsupported_flags (struct gomp_device_descr
*devicep, unsigned int flags)
return flags;
}
+static void
+gomp_copy_back_icvs (struct gomp_device_descr *devicep, int device)
+{
+ struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
+ if (item == NULL)
+ return;
+
+ void *host_ptr = &item->icvs;
+ void *dev_ptr = omp_get_mapped_ptr (host_ptr, device);
+ if (dev_ptr != NULL)
+ gomp_copy_dev2host (devicep, NULL, host_ptr, dev_ptr,
+ sizeof (struct gomp_offload_icvs));
+}
+
/* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
and several arguments have been added:
FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
@@ -2789,6 +2823,142 @@ GOMP_target_ext (int device, void (*fn) (void *),
size_t mapnum,
size_t tgt_align = 0, tgt_size = 0;
bool fpc_done = false;
+ /* Obtain the original TEAMS and THREADS values from ARGS. */
+ intptr_t orig_teams = 1, orig_threads = 0;
+ size_t num_args = 0, len = 1, teams_len = 1, threads_len = 1;
+ void **tmpargs = args;
+ while (*tmpargs)
+ {
+ intptr_t id = (intptr_t) *tmpargs++, val;
+ if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
+ {
+ val = (intptr_t) *tmpargs++;
+ len = 2;
+ }
+ else
+ {
+ val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
+ len = 1;
+ }
+ num_args += len;
+ if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
+ continue;
+ val = val > INT_MAX ? INT_MAX : val;
+ if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS)
+ {
+ orig_teams = val;
+ teams_len = len;
+ }
+ else if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT)
+ {
+ orig_threads = val;
+ threads_len = len;
+ }
+ }
+
+ intptr_t new_teams = orig_teams, new_threads = orig_threads;
+ /* ORIG_TEAMS == -2: No explicit teams construct specified. Set to 1.
+ ORIG_TEAMS == -1: Teams construct with NUM_TEAMS clause specified, but the
+ value could not be specified. No Change.
+ ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause.
+ Set device-specific value.
+ ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause.
+ No change. */
+ if (orig_teams == -2)
+ new_teams = 1;
+ else if (orig_teams == 0)
+ {
+ struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
+ if (item != NULL)
+ new_teams = item->icvs.nteams;
+ }
+ /* The device-specific teams-thread-limit is only set if (a) an explicit
TEAMS
+ region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already set
by
+ e.g. a THREAD_LIMIT clause. */
+ if (orig_teams >= -2 && orig_threads == 0)
+ {
+ struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
+ if (item != NULL)
+ new_threads = item->icvs.teams_thread_limit;
+ }
+
+ /* Copy and change the arguments list only if TEAMS or THREADS need to be
+ updated. */
+ void **new_args = args;
+ if (orig_teams != new_teams || orig_threads != new_threads)
+ {
+ size_t tms_len = (orig_teams == new_teams
+ ? teams_len
+ : (new_teams > -(1 << 15) && new_teams < (1 << 15)
+ ? 1 : 2));
+ size_t ths_len = (orig_threads == new_threads
+ ? threads_len
+ : (new_threads > -(1 << 15) && new_threads < (1 << 15)
+ ? 1 : 2));
+ /* One additional item after the last arg must be NULL. */
+ size_t new_args_cnt = num_args - teams_len - threads_len + tms_len
+ + ths_len + 1;
+ new_args = (void **) gomp_alloca (new_args_cnt * sizeof (void*));
+
+ tmpargs = args;
+ void **tmp_new_args = new_args;
+ while (*tmpargs)
+ {
+ intptr_t id = (intptr_t) *tmpargs;
+ if (((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS
+ && orig_teams != new_teams)
+ || ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT
+ && orig_threads != new_threads))
+ {
+ tmpargs++;
+ if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
+ tmpargs++;
+ }
+ else
+ {
+ *tmp_new_args++ = *tmpargs++;
+ if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
+ *tmp_new_args++ = *tmpargs++;
+ }
+ }
+
+ if (orig_teams != new_teams)
+ {
+ intptr_t new_val = new_teams;
+ if (tms_len == 1)
+ {
+ new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
+ | GOMP_TARGET_ARG_NUM_TEAMS;
+ *tmp_new_args++ = (void *) new_val;
+ }
+ else
+ {
+ *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
+ | GOMP_TARGET_ARG_NUM_TEAMS);
+ *tmp_new_args++ = (void *) new_val;
+ }
+ }
+
+ if (orig_threads != new_threads)
+ {
+ intptr_t new_val = new_threads;
+ if (ths_len == 1)
+ {
+ new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
+ | GOMP_TARGET_ARG_THREAD_LIMIT;
+ *tmp_new_args++ = (void *) new_val;
+ }
+ else
+ {
+ *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
+ | GOMP_TARGET_ARG_THREAD_LIMIT);
+ *tmp_new_args++ = (void *) new_val;
+ }
+ }
+
+ *tmp_new_args = NULL;
+ }
+
flags = clear_unsupported_flags (devicep, flags);
if (flags & GOMP_TARGET_FLAG_NOWAIT)
@@ -2827,7 +2997,7 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t
mapnum,
&& !thr->task->final_task)
{
gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
- sizes, kinds, flags, depend, args,
+ sizes, kinds, flags, depend, new_args,
GOMP_TARGET_TASK_BEFORE_MAP);
return;
}
@@ -2873,7 +3043,7 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t
mapnum,
tgt_align, tgt_size);
}
}
- gomp_target_fallback (fn, hostaddrs, devicep, args);
+ gomp_target_fallback (fn, hostaddrs, devicep, new_args);
return;
}
@@ -2903,7 +3073,7 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t
mapnum,
}
devicep->run_func (devicep->target_id, fn_addr,
tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
- args);
+ new_args);
if (tgt_vars)
{
htab_clear (refcount_set);
@@ -2911,6 +3081,12 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t
mapnum,
}
if (refcount_set)
htab_free (refcount_set);
+
+ /* Copy back ICVs from device to host.
+ HOST_PTR is expected to exist since it was added in
+ gomp_load_image_to_device if not already available. */
+ gomp_copy_back_icvs (devicep, device);
+
}
/* Host fallback for GOMP_target_data{,_ext} routines. */
diff --git a/libgomp/testsuite/libgomp.c-c++-common/icv-4.c
b/libgomp/testsuite/libgomp.c-c++-common/icv-4.c
index b987a33..9da0d63 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/icv-4.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/icv-4.c
@@ -16,7 +16,7 @@ main ()
}
else
omp_set_num_teams (6);
- if (getenv ("OMP_TEAMS_THREAD_LIMIT") == NULL
+ if (getenv ("OMP_TEAMS_THREAD_LIMIT") != NULL
&& strcmp (getenv ("OMP_TEAMS_THREAD_LIMIT"), "12") == 0)
{
if (omp_get_teams_thread_limit () != 12)
diff --git a/libgomp/testsuite/libgomp.c-c++-common/icv-5.c
b/libgomp/testsuite/libgomp.c-c++-common/icv-5.c
index 82108bce..82d8e76 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/icv-5.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/icv-5.c
@@ -1,25 +1,205 @@
+/* { dg-additional-options "-DAMD" { target offload_target_amdgcn } } */
/* { dg-do run } */
-/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV_0 "42" } */
-/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV_1 "43" } */
-/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV_2 "44" } */
-/* { dg-set-target-env-var OMP_NUM_TEAMS_ALL "45" } */
-/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV "46" } */
-/* { dg-set-target-env-var OMP_NUM_TEAMS "47" } */
+/* { dg-set-target-env-var OMP_NUM_TEAMS_ALL "3" } */
+/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV "4" } */
+/* { dg-set-target-env-var OMP_NUM_TEAMS "5" } */
+/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV_0 "6" } */
+/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV_1 "7" } */
+/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV_2 "8" } */
+/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_ALL "2" } */
+/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV "3" } */
+/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT "4" } */
+/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV_0 "5" } */
+/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV_1 "6" } */
+/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV_2 "7" } */
#include <omp.h>
#include <stdlib.h>
+#include <unistd.h>
int
main (int argc, char *const *argv)
{
- if (omp_get_max_teams () != 47)
+ if (omp_get_max_teams () != 5
+ || omp_get_teams_thread_limit () != 4)
abort ();
+ #pragma omp teams
+ {
+ if (omp_get_num_teams () > 5
+ || omp_get_team_num () >= 5)
+ abort ();
+ #pragma omp parallel
+ if (omp_get_thread_limit () > 4
+ || omp_get_thread_num () >= 4)
+ abort ();
+ }
+
+ omp_set_num_teams (4);
+ omp_set_teams_thread_limit (3);
+ if (omp_get_max_teams () != 4
+ || omp_get_teams_thread_limit () != 3)
+ abort ();
+
+ #pragma omp teams
+ {
+ if (omp_get_num_teams () > 4
+ || omp_get_team_num () >= 4)
+ abort ();
+ #pragma omp parallel
+ if (omp_get_thread_limit () > 3
+ || omp_get_thread_num () >= 3)
+ abort ();
+ }
+
+ #pragma omp teams num_teams(3) thread_limit(2)
+ {
+ if (omp_get_num_teams () != 3
+ || omp_get_team_num () >= 3)
+ abort ();
+ #pragma omp parallel
+ if (omp_get_thread_limit () > 2
+ || omp_get_thread_num () >= 2)
+ abort ();
+ }
+
+ #pragma omp teams num_teams(5) thread_limit(4)
+ {
+ if (omp_get_num_teams () != 5
+ || omp_get_team_num () >= 5)
+ abort ();
+ #pragma omp parallel
+ if (omp_get_thread_limit () > 4
+ || omp_get_thread_num () >= 4)
+ abort ();
+ }
+
int num_devices = omp_get_num_devices () > 3 ? 3 : omp_get_num_devices ();
+ if (num_devices <= 0)
+ return 0;
+
for (int i=0; i < num_devices; i++)
- #pragma omp target device (i)
- if (omp_get_max_teams () != 42 + i)
+ {
+ #pragma omp target device (i)
+ if (omp_get_max_teams () != 6 + i
+ || omp_get_teams_thread_limit () != 5 + i)
abort ();
+ #pragma omp target device (i)
+ #pragma omp teams
+ #pragma omp parallel
+ if (omp_get_thread_limit () > 5 + i
+ || omp_get_thread_num () >= 5 + i)
+ abort ();
+
+ #pragma omp target device (i)
+ {
+ omp_set_num_teams (5 + i);
+ omp_set_teams_thread_limit (4 + i);
+ if (omp_get_max_teams () != 5 + i
+ || omp_get_teams_thread_limit () != 4 + i)
+ abort ();
+ }
+
+ /* omp_set_num_teams and omp_set_teams_thread_limit above set the value
+ of nteams-var and teams-thread-limit-var ICVs on device 'i', which has
+ scope 'device' and should be avaible in subsequent target regions. */
+ #pragma omp target device (i)
+ if (omp_get_max_teams () != 5 + i
+ || omp_get_teams_thread_limit () != 4 + i)
+ abort ();
+
+ #pragma omp target device (i)
+ #pragma omp teams
+ {
+ if (omp_get_num_teams () > 5 + i
+ || omp_get_team_num () >= 5 + i)
+ abort ();
+ #pragma omp parallel
+ if (omp_get_thread_limit () > 4 + i
+ || omp_get_thread_num () >= 4 + i)
+ abort ();
+ }
+
+ #pragma omp target device (i)
+ #pragma omp teams num_teams(6 + i) thread_limit(5 + i)
+ {
+ if (omp_get_num_teams () > 6 + i
+ || omp_get_team_num () >= 6 + i)
+ abort ();
+ #pragma omp parallel
+ if (omp_get_thread_limit () > 5 + i
+ || omp_get_thread_num () >= 5 + i
+ || omp_get_num_teams () > 6 + i
+ || omp_get_team_num () >= 6 + i)
+ abort ();
+ }
+
+ #pragma omp target device (i)
+ #pragma omp teams num_teams(4 + i) thread_limit(3 + i)
+ {
+ if (omp_get_num_teams () > 4 + i
+ || omp_get_team_num () >= 4 + i)
+ abort ();
+ #pragma omp parallel
+ if (omp_get_thread_limit () > 3 + i
+ || omp_get_thread_num () >= 3 + i
+ || omp_get_num_teams () > 4 + i
+ || omp_get_team_num () >= 4 + i)
+ abort ();
+ }
+
+ #pragma omp target device (i)
+ #pragma omp teams thread_limit(3 + i) num_teams(4 + i)
+ {
+ if (omp_get_num_teams () > 4 + i
+ || omp_get_team_num () >= 4 + i)
+ abort ();
+ #pragma omp parallel
+ if (omp_get_thread_limit () > 3 + i
+ || omp_get_thread_num () >= 3 + i
+ || omp_get_num_teams () > 4 + i
+ || omp_get_team_num () >= 4 + i)
+ abort ();
+ }
+
+ /* This tests a large number of teams and threads. If it is larger than
+ 2^15+1 then the according argument in the kernels arguments list
+ is encoded with two items instead of one. On NVIDIA there is an
+ adjustment for too large teams and threads. For AMD such adjustment
+ exists only for threads and will cause runtime errors with a two large
+ number of teams. */
+ intptr_t large_num_teams = 66000;
+#ifdef AMD
+ large_num_teams = 8;
+#endif
+ intptr_t large_threads_limit = 67000;
+ #pragma omp target device (i)
+ {
+ omp_set_num_teams (large_num_teams + i);
+ omp_set_teams_thread_limit (large_threads_limit + i);
+ if (omp_get_max_teams () != large_num_teams + i
+ || omp_get_teams_thread_limit () != large_threads_limit + i)
+ abort ();
+ }
+
+ #pragma omp target device (i)
+ if (omp_get_max_teams () != large_num_teams + i
+ || omp_get_teams_thread_limit () != large_threads_limit + i)
+ abort ();
+
+ #pragma omp target device (i)
+ #pragma omp teams
+ {
+ if (omp_get_num_teams () > large_num_teams + i
+ || omp_get_team_num () >= large_num_teams + i)
+ abort ();
+ #pragma omp parallel
+ if (omp_get_thread_limit () > large_threads_limit + i
+ || omp_get_thread_num () >= large_threads_limit + i)
+ abort ();
+ }
+
+ }
return 0;
}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/icv-6.c
b/libgomp/testsuite/libgomp.c-c++-common/icv-6.c
index 05f07c7..7fdcaf0 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/icv-6.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/icv-6.c
@@ -1,9 +1,10 @@
/* { dg-do run } */
-/* { dg-set-target-env-var OMP_NUM_TEAMS_ALL "42" } */
-/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV "43" } */
+/* { dg-set-target-env-var OMP_NUM_TEAMS_ALL "3" } */
+/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV "4" } */
+/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_ALL "2" } */
+/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV "3" } */
/* { dg-set-target-env-var OMP_SCHEDULE_ALL "guided,4" } */
/* { dg-set-target-env-var OMP_DYNAMIC_ALL "true" } */
-/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_ALL "44" } */
/* { dg-set-target-env-var OMP_THREAD_LIMIT_ALL "45" } */
/* { dg-set-target-env-var OMP_NUM_THREADS_ALL "46,3,2" } */
/* { dg-set-target-env-var OMP_MAX_ACTIVE_LEVELS_ALL "47" } */
@@ -12,7 +13,8 @@
/* This tests the hierarchical usage of ICVs on the device, i.e. if
OMP_NUM_TEAMS_DEV_<device_num> is not configured, then the value of
- OMP_NUM_TEAMS_DEV should be used. And if */
+ OMP_NUM_TEAMS_DEV should be used. And if OMP_NUM_TEAMS (without suffix) is
+ not defined, then OMP_NUM_TEAMS_ALL should be used for the host. */
#include <omp.h>
#include <stdlib.h>
@@ -24,10 +26,10 @@ main (int argc, char *const *argv)
int chunk_size;
omp_get_schedule(&kind, &chunk_size);
- if (omp_get_max_teams () != 42
+ if (omp_get_max_teams () != 3
|| !omp_get_dynamic ()
|| kind != 3 || chunk_size != 4
- || omp_get_teams_thread_limit () != 44
+ || omp_get_teams_thread_limit () != 2
|| omp_get_thread_limit () != 45
|| omp_get_max_threads () != 46
|| omp_get_proc_bind () != omp_proc_bind_spread
@@ -36,9 +38,52 @@ main (int argc, char *const *argv)
int num_devices = omp_get_num_devices () > 3 ? 3 : omp_get_num_devices ();
for (int i=0; i < num_devices; i++)
- #pragma omp target device (i)
- if (omp_get_max_teams () != 43)
+ {
+ #pragma omp target device (i)
+ if (omp_get_max_teams () != 4
+ || omp_get_teams_thread_limit () != 3)
abort ();
+ #pragma omp target device (i)
+ #pragma omp teams
+ {
+ if (omp_get_num_teams () > 4
+ || omp_get_team_num () >= 4)
+ abort ();
+ #pragma omp parallel
+ if (omp_get_thread_limit () > 3
+ || omp_get_thread_num () >= 3)
+ abort ();
+ }
+
+ #pragma omp target device (i)
+ {
+ omp_set_num_teams (3 + i);
+ omp_set_teams_thread_limit (2 + i);
+ if (omp_get_max_teams () != 3 + i
+ || omp_get_teams_thread_limit () != 2 + i)
+ abort ();
+ }
+
+ /* omp_set_num_teams above set the value of nteams-var ICV on device 'i',
+ which has scope 'device' and should be avaible in subsequent target
+ regions. */
+ #pragma omp target device (i)
+ if (omp_get_max_teams () != 3 + i
+ || omp_get_teams_thread_limit () != 2 + i)
+ abort ();
+
+ #pragma omp target device (i)
+ #pragma omp teams
+ {
+ if (omp_get_num_teams () > 3 + i
+ || omp_get_team_num () >= 3 + i)
+ abort ();
+ #pragma omp parallel
+ if (omp_get_thread_limit () > 2 + i
+ || omp_get_thread_num () >= 2 + i)
+ abort ();
+ }
+ }
return 0;
}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/icv-7.c
b/libgomp/testsuite/libgomp.c-c++-common/icv-7.c
index 67081dc..ffceaf3 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/icv-7.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/icv-7.c
@@ -1,5 +1,6 @@
/* { dg-do run } */
-/* { dg-set-target-env-var OMP_NUM_TEAMS_ALL "42" } */
+/* { dg-set-target-env-var OMP_NUM_TEAMS_ALL "7" } */
+/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_ALL "2" } */
/* This tests the hierarchical usage of ICVs on the host and on devices, i.e.
if
OMP_NUM_TEAMS_DEV_<device_num>, OMP_NUM_TEAMS_DEV, and
@@ -13,14 +14,73 @@
int
main (int argc, char *const *argv)
{
- if (omp_get_max_teams () != 42)
+ if (omp_get_max_teams () != 7
+ || omp_get_teams_thread_limit () != 2)
+ abort ();
+
+ #pragma omp teams
+ if (omp_get_num_teams () > 7
+ || omp_get_team_num () >= 7)
+ abort ();
+
+ omp_set_num_teams (9);
+ omp_set_teams_thread_limit (3);
+ if (omp_get_max_teams () != 9
+ || omp_get_teams_thread_limit () != 3)
+ abort ();
+
+ #pragma omp teams
+ if (omp_get_num_teams () > 9
+ || omp_get_team_num () >= 9)
+ abort ();
+
+ #pragma omp teams num_teams(5)
+ if (omp_get_num_teams () > 5
+ || omp_get_team_num () >= 5)
abort ();
int num_devices = omp_get_num_devices () > 3 ? 3 : omp_get_num_devices ();
for (int i=0; i < num_devices; i++)
- #pragma omp target device (i)
- if (omp_get_max_teams () != 42)
+ {
+ #pragma omp target device (i)
+ if (omp_get_max_teams () != 7
+ || omp_get_teams_thread_limit () != 2)
+ abort ();
+
+ #pragma omp target device (i)
+ #pragma omp teams
+ if (omp_get_num_teams () > 7
+ || omp_get_team_num () >= 7)
+ abort ();
+
+ #pragma omp target device (i)
+ {
+ omp_set_num_teams (8 + i);
+ omp_set_teams_thread_limit (4 + i);
+ if (omp_get_max_teams () != 8 + i
+ || omp_get_teams_thread_limit () != 4 + i)
+ abort ();
+ }
+
+ /* omp_set_num_teams above set the value of nteams-var ICV on device 'i',
+ which has scope 'device' and should be avaible in subsequent target
+ regions. */
+ #pragma omp target device (i)
+ if (omp_get_max_teams () != 8 + i
+ || omp_get_teams_thread_limit () != 4 + i)
+ abort ();
+
+ #pragma omp target device (i)
+ #pragma omp teams
+ if (omp_get_num_teams () > 8 + i
+ || omp_get_team_num () >= 8 + i)
+ abort ();
+
+ #pragma omp target device (i)
+ #pragma omp teams num_teams(5 + i)
+ if (omp_get_num_teams () != 5 + i)
abort ();
+ }
return 0;
}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/icv-8.c
b/libgomp/testsuite/libgomp.c-c++-common/icv-8.c
index adaff5a..ad1dbfc 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/icv-8.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/icv-8.c
@@ -19,4 +19,4 @@ main (int argc, char *const *argv)
/* { dg-output ".*Invalid device number in OMP_NUM_TEAMS_DEV_01=44 (leading
zero).*" } */
/* { dg-output ".*Invalid device number in OMP_NUM_TEAMS_DEV_a=45.*" } */
/* { dg-output ".*Invalid device number in OMP_NUM_TEAMS_DEV_12345678901=46
(too long).*" } */
-/* { dg-output ".*Non-negative device number expected in
OMP_NUM_TEAMS_DEV_-1=47.*" } */
+/* { dg-output ".*Non-negative device number expected in
OMP_NUM_TEAMS_DEV_-1=47.*" } */
\ No newline at end of file
diff --git a/libgomp/testsuite/libgomp.c-c++-common/icv-9.c
b/libgomp/testsuite/libgomp.c-c++-common/icv-9.c
new file mode 100644
index 0000000..c850342
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/icv-9.c
@@ -0,0 +1,74 @@
+/* { dg-do run } */
+
+/* This tests usage of ICVs on the host and on devices if no corresponding
+ environment variables are configured. */
+
+#include <omp.h>
+#include <stdlib.h>
+
+int
+main (int argc, char *const *argv)
+{
+ if (omp_get_max_teams () != 0
+ || omp_get_teams_thread_limit () != 0)
+ abort ();
+
+ omp_set_num_teams (9);
+ omp_set_teams_thread_limit (2);
+ if (omp_get_max_teams () != 9
+ || omp_get_teams_thread_limit () != 2)
+ abort ();
+
+ #pragma omp teams
+ if (omp_get_num_teams () > 9
+ || omp_get_team_num () >= 9)
+ abort ();
+
+ #pragma omp teams num_teams(5)
+ if (omp_get_num_teams () > 5
+ || omp_get_team_num () >= 5)
+ abort ();
+
+ int num_devices = omp_get_num_devices () > 3 ? 3 : omp_get_num_devices ();
+ for (int i=0; i < num_devices; i++)
+ {
+ #pragma omp target device (i)
+ if (omp_get_max_teams () != 0
+ || omp_get_teams_thread_limit () != 0)
+ abort ();
+
+ #pragma omp target device (i)
+ {
+ omp_set_num_teams (8 + i);
+ omp_set_teams_thread_limit (3 + i);
+ if (omp_get_max_teams () != 8 + i
+ || omp_get_teams_thread_limit () != 3 + i)
+ abort ();
+ }
+
+ /* omp_set_num_teams above set the value of nteams-var ICV on device 'i',
+ which has scope 'device' and should be avaible in subsequent target
+ regions. */
+ #pragma omp target device (i)
+ if (omp_get_max_teams () != 8 + i
+ || omp_get_teams_thread_limit () != 3 + i)
+ abort ();
+
+ #pragma omp target device (i)
+ #pragma omp teams
+ if (omp_get_num_teams () > 8 + i
+ || omp_get_team_num () >= 8 + i)
+ abort ();
+
+ #pragma omp target device (i)
+ #pragma omp teams num_teams(5 + i)
+ if (omp_get_num_teams () > 5 + i
+ || omp_get_team_num () >= 5 + i)
+ abort ();
+
+ #pragma omp target device (i)
+ ;
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.fortran/icv-5.f90
b/libgomp/testsuite/libgomp.fortran/icv-5.f90
new file mode 100644
index 0000000..05a35fa
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/icv-5.f90
@@ -0,0 +1,226 @@
+! { dg-set-target-env-var OMP_NUM_TEAMS_ALL "3" }
+! { dg-set-target-env-var OMP_NUM_TEAMS_DEV "4" }
+! { dg-set-target-env-var OMP_NUM_TEAMS "5" }
+! { dg-set-target-env-var OMP_NUM_TEAMS_DEV_0 "6" }
+! { dg-set-target-env-var OMP_NUM_TEAMS_DEV_1 "7" }
+! { dg-set-target-env-var OMP_NUM_TEAMS_DEV_2 "8" }
+! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_ALL "2" }
+! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV "3" }
+! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT "4" }
+! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV_0 "5" }
+! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV_1 "6" }
+! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV_2 "7" }
+
+use omp_lib
+implicit none (type, external)
+ integer :: num_devices, i, large_num_teams, large_threads_limit
+ logical :: err
+
+ if (omp_get_num_devices () > 3) then
+ num_devices = 3
+ else
+ num_devices = omp_get_num_devices ()
+ end if
+
+ do i=0,num_devices-1
+
+ ! Testing NUM_TEAMS.
+ if (env_is_set_dev ("OMP_NUM_TEAMS_DEV_", i, 6 + i)) then
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ if (omp_get_max_teams () /= 6 + i) err = .true.
+ !$omp end target
+ if (err) stop 1
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ !$omp teams
+ if (omp_get_num_teams () > 6 + i .or. omp_get_team_num () >= 6 + i) &
+ err = .true.
+ !$omp end teams
+ !$omp end target
+ if (err) stop 2
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ call omp_set_num_teams (5 + i)
+ if (omp_get_max_teams () /= 5 + i) err = .true.
+ !$omp end target
+ if (err) stop 3
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ if (omp_get_max_teams () /= 5 + i) err = .true.
+ !$omp end target
+ if (err) stop 4
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ !$omp teams
+ if (omp_get_num_teams () > 5 + i .or. omp_get_team_num () >= 5 + i) &
+ err = .true.
+ !$omp end teams
+ !$omp end target
+ if (err) stop 5
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ !$omp teams num_teams(6 + i)
+ if (omp_get_num_teams () > 6 + i .or. omp_get_team_num () >= 6 + i) &
+ err = .true.
+ !$omp end teams
+ !$omp end target
+ if (err) stop 6
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ !$omp teams num_teams(4 + i)
+ if (omp_get_num_teams () > 4 + i .or. omp_get_team_num () >= 4 + i) &
+ err = .true.
+ !$omp end teams
+ !$omp end target
+ if (err) stop 7
+
+ large_num_teams = 66000
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ call omp_set_num_teams (large_num_teams + i)
+ if (omp_get_max_teams () /= large_num_teams + i) err = .true.
+ !$omp end target
+ if (err) stop 8
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ if (omp_get_max_teams () /= large_num_teams + i) err = .true.
+ !$omp end target
+ if (err) stop 9
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ !$omp teams
+ if (omp_get_num_teams () > large_num_teams + i &
+ .or. omp_get_team_num () >= large_num_teams + i) err = .true.
+ !$omp end teams
+ !$omp end target
+ if (err) stop 10
+ end if
+
+ ! Testing TEAMS-THREAD-LIMIT
+ if (env_is_set_dev ("OMP_TEAMS_THREAD_LIMIT_DEV_", i, 5 + i)) then
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ if (omp_get_teams_thread_limit () /= 5 + i) err = .true.
+ !$omp end target
+ if (err) stop 11
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ !$omp teams
+ !$omp parallel
+ if (omp_get_thread_limit () > 5 + i .or. omp_get_thread_num () >= 5 + i)
&
+ err = .true.
+ !$omp end parallel
+ !$omp end teams
+ !$omp end target
+ if (err) stop 12
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ call omp_set_teams_thread_limit (4 + i)
+ if (omp_get_teams_thread_limit () /= 4 + i) err = .true.
+ !$omp end target
+ if (err) stop 13
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ if (omp_get_teams_thread_limit () /= 4 + i) err = .true.
+ !$omp end target
+ if (err) stop 14
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ !$omp teams
+ !$omp parallel
+ if (omp_get_thread_limit () > 4 + i .or. omp_get_thread_num () >= 4 + i)
&
+ err = .true.
+ !$omp end parallel
+ !$omp end teams
+ !$omp end target
+ if (err) stop 15
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ !$omp teams thread_limit(5 + i)
+ !$omp parallel
+ if (omp_get_thread_limit () > 5 + i .or. omp_get_thread_num () >= 5 + i)
&
+ err = .true.
+ !$omp end parallel
+ !$omp end teams
+ !$omp end target
+ if (err) stop 16
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ !$omp teams thread_limit(3 + i)
+ !$omp parallel
+ if (omp_get_thread_limit () > 3 + i .or. omp_get_thread_num () >= 3 + i)
&
+ err = .true.
+ !$omp end parallel
+ !$omp end teams
+ !$omp end target
+ if (err) stop 17
+
+ large_threads_limit = 67000
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ call omp_set_teams_thread_limit (large_threads_limit + i)
+ if (omp_get_teams_thread_limit () /= large_threads_limit + i) err =
.true.
+ !$omp end target
+ if (err) stop 18
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ if (omp_get_teams_thread_limit () /= large_threads_limit + i) err =
.true.
+ !$omp end target
+ if (err) stop 19
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ !$omp teams
+ !$omp parallel
+ if (omp_get_thread_limit () > large_threads_limit + i &
+ .or. omp_get_thread_num () >= large_threads_limit + i) err = .true.
+ !$omp end parallel
+ !$omp end teams
+ !$omp end target
+ if (err) stop 20
+ end if
+
+ end do
+
+contains
+ logical function env_is_set (name, val)
+ character(len=*) :: name, val
+ character(len=40) :: val2
+ integer :: stat
+ call get_environment_variable (name, val2, status=stat)
+ if (stat == 0) then
+ if (val == val2) then
+ env_is_set = .true.
+ return
+ end if
+ else if (stat /= 1) then
+ error stop 30
+ endif
+ env_is_set = .false.
+ end
+ logical function env_is_set_dev (name, dev_num, val)
+ character(len=*) :: name
+ integer :: dev_num, val
+ character(len=64) :: dev_num_str, env_var, val_str
+ dev_num_str = ADJUSTL(dev_num_str)
+ env_var = name // dev_num_str
+ val_str = ADJUSTL(val_str)
+ env_is_set_dev = env_is_set (TRIM(env_var), TRIM(val_str))
+ end
+end
diff --git a/libgomp/testsuite/libgomp.fortran/icv-6.f90
b/libgomp/testsuite/libgomp.fortran/icv-6.f90
new file mode 100644
index 0000000..c8e6a0d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/icv-6.f90
@@ -0,0 +1,140 @@
+! { dg-set-target-env-var OMP_NUM_TEAMS_ALL "3" }
+! { dg-set-target-env-var OMP_NUM_TEAMS_DEV "4" }
+! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_ALL "2" }
+! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV "3" }
+
+! This test considers the hierarchical usage of ICVs on the device, i.e. if
+! e.g. OMP_NUM_TEAMS_DEV_<device_num> is not configured, then the value of
+! OMP_NUM_TEAMS_DEV should be used for the targets.
+
+use omp_lib
+implicit none (type, external)
+ integer :: num_devices, i, stat, tmp
+ logical :: err
+ character(len=40) :: val
+
+ ! The following environment variables should not be set.
+ call get_environment_variable ("OMP_NUM_TEAMS_DEV_0", val, status=stat)
+ if (stat /= 1) return
+ call get_environment_variable ("OMP_NUM_TEAMS_DEV_1", val, status=stat)
+ if (stat /= 1) return
+ call get_environment_variable ("OMP_NUM_TEAMS_DEV_2", val, status=stat)
+ if (stat /= 1) return
+ call get_environment_variable ("OMP_TEAMS_THREAD_LIMIT_DEV_0", val,
status=stat)
+ if (stat /= 1) return
+ call get_environment_variable ("OMP_TEAMS_THREAD_LIMIT_DEV_1", val,
status=stat)
+ if (stat /= 1) return
+ call get_environment_variable ("OMP_TEAMS_THREAD_LIMIT_DEV_2", val,
status=stat)
+ if (stat /= 1) return
+
+ if (omp_get_num_devices () > 3) then
+ num_devices = 3
+ else
+ num_devices = omp_get_num_devices ()
+ end if
+
+ do i=0,num_devices-1
+
+ ! Testing NUM_TEAMS.
+ if (env_is_set ("OMP_NUM_TEAMS_DEV", "4")) then
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ if (omp_get_max_teams () /= 4) err = .true.
+ !$omp end target
+ if (err) stop 1
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ !$omp teams
+ if (omp_get_num_teams () > 4 .or. omp_get_team_num () >= 4) &
+ err = .true.
+ !$omp end teams
+ !$omp end target
+ if (err) stop 2
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ call omp_set_num_teams (3 + i)
+ if (omp_get_max_teams () /= 3 + i) err = .true.
+ !$omp end target
+ if (err) stop 3
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ if (omp_get_max_teams () /= 3 + i) err = .true.
+ !$omp end target
+ if (err) stop 4
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ !$omp teams
+ if (omp_get_num_teams () > 3 + i .or. omp_get_team_num () >= 3 + i) &
+ err = .true.
+ !$omp end teams
+ !$omp end target
+ if (err) stop 5
+ end if
+
+ ! Testing TEAMS-THREAD-LIMIT
+ if (env_is_set ("OMP_TEAMS_THREAD_LIMIT_DEV", "3")) then
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ if (omp_get_teams_thread_limit () /= 3) err = .true.
+ !$omp end target
+ if (err) stop 6
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ !$omp teams
+ !$omp parallel
+ if (omp_get_thread_limit () > 3 .or. omp_get_thread_num () >= 3) &
+ err = .true.
+ !$omp end parallel
+ !$omp end teams
+ !$omp end target
+ if (err) stop 7
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ call omp_set_teams_thread_limit (2 + i)
+ if (omp_get_teams_thread_limit () /= 2 + i) err = .true.
+ !$omp end target
+ if (err) stop 8
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ if (omp_get_teams_thread_limit () /= 2 + i) err = .true.
+ !$omp end target
+ if (err) stop 9
+
+ err = .false.
+ !$omp target device(i) map(tofrom: err)
+ !$omp teams
+ !$omp parallel
+ if (omp_get_thread_limit () > 2 + i .or. omp_get_thread_num () >= 2 + i)
&
+ err = .true.
+ !$omp end parallel
+ !$omp end teams
+ !$omp end target
+ if (err) stop 10
+ end if
+
+ end do
+
+contains
+ logical function env_is_set (name, val)
+ character(len=*) :: name, val
+ character(len=40) :: val2
+ integer :: stat
+ call get_environment_variable (name, val2, status=stat)
+ if (stat == 0) then
+ if (val == val2) then
+ env_is_set = .true.
+ return
+ end if
+ else if (stat /= 1) then
+ error stop 10
+ endif
+ env_is_set = .false.
+ end
+end