On 06/11/2020 8:33 pm, Tobias Burnus wrote:
Hello Kwok, hi Jakub,
On 06.11.20 21:13, Kwok Cheung Yeung wrote:
In addition to deprecating the omp_(get|set)_nested() functions and OMP_NESTED
environment variable, OpenMP 5.0 also removes the nest-var ICV altogether,
defining it in terms of the max-active-levels-var ICV instead. [...]
Shouldn't libgomp/libgomp.texi be also updated?
Tobias
I have added some documentation regarding the relationship between the nesting
setting and the current maximum number active levels. The documentation does not
detail ICVs though, so we probably don't need to explicitly state that one is in
terms of another?
Is this version okay for trunk?
Thanks
Kwok
commit b4feb16f3c84b8f82163a4cbba6a31d55fbb8e5b
Author: Kwok Cheung Yeung <k...@codesourcery.com>
Date: Mon Nov 9 09:34:39 2020 -0800
openmp: Retire nest-var ICV for OpenMP 5.0
This removes the nest-var ICV, expressing nesting in terms of the
max-active-levels-var ICV instead.
2020-11-09 Kwok Cheung Yeung <k...@codesourcery.com>
libgomp/
* env.c (gomp_global_icv): Remove nest_var field.
(gomp_max_active_levels_var): Initialize to 1.
(parse_boolean): Return true on success.
(handle_omp_display_env): Express OMP_NESTED in terms of
gomp_max_active_levels_var.
(initialize_env): Set gomp_max_active_levels_var from
OMP_MAX_ACTIVE_LEVELS, OMP_NESTED, OMP_NUM_THREADS and
OMP_PROC_BIND.
* icv.c (omp_set_nested): Express in terms of
gomp_max_active_levels_var.
(omp_get_nested): Likewise.
* libgomp.h (struct gomp_task_icv): Remove nest_var field.
* libgomp.texi (omp_get_nested): Update documentation.
(omp_set_nested): Likewise.
(OMP_MAX_ACTIVE_LEVELS): Likewise.
(OMP_NESTED): Likewise.
(OMP_NUM_THREADS): Likewise.
(OMP_PROC_BIND): Likewise.
* parallel.c (gomp_resolve_num_threads): Replace reference
to nest_var with gomp_max_active_levels_var.
* testsuite/libgomp.c/target-5.c: Remove additional options.
(main): Remove references to omp_get_nested and omp_set_nested.
diff --git a/libgomp/env.c b/libgomp/env.c
index ab22525..75d0fe2 100644
--- a/libgomp/env.c
+++ b/libgomp/env.c
@@ -68,12 +68,11 @@ struct gomp_task_icv gomp_global_icv = {
.run_sched_chunk_size = 1,
.default_device_var = 0,
.dyn_var = false,
- .nest_var = false,
.bind_var = omp_proc_bind_false,
.target_data = NULL
};
-unsigned long gomp_max_active_levels_var = gomp_supported_active_levels;
+unsigned long gomp_max_active_levels_var = 1;
bool gomp_cancel_var = false;
enum gomp_target_offload_t gomp_target_offload_var
= GOMP_TARGET_OFFLOAD_DEFAULT;
@@ -959,16 +958,17 @@ parse_spincount (const char *name, unsigned long long
*pvalue)
}
/* Parse a boolean value for environment variable NAME and store the
- result in VALUE. */
+ result in VALUE. Return true if one was present and it was
+ successfully parsed. */
-static void
+static bool
parse_boolean (const char *name, bool *value)
{
const char *env;
env = getenv (name);
if (env == NULL)
- return;
+ return false;
while (isspace ((unsigned char) *env))
++env;
@@ -987,7 +987,11 @@ parse_boolean (const char *name, bool *value)
while (isspace ((unsigned char) *env))
++env;
if (*env != '\0')
- gomp_error ("Invalid value for environment variable %s", name);
+ {
+ gomp_error ("Invalid value for environment variable %s", name);
+ return false;
+ }
+ return true;
}
/* Parse the OMP_WAIT_POLICY environment variable and return the value. */
@@ -1252,7 +1256,7 @@ handle_omp_display_env (unsigned long stacksize, int
wait_policy)
fprintf (stderr, " OMP_DYNAMIC = '%s'\n",
gomp_global_icv.dyn_var ? "TRUE" : "FALSE");
fprintf (stderr, " OMP_NESTED = '%s'\n",
- gomp_global_icv.nest_var ? "TRUE" : "FALSE");
+ gomp_max_active_levels_var > 1 ? "TRUE" : "FALSE");
fprintf (stderr, " OMP_NUM_THREADS = '%lu", gomp_global_icv.nthreads_var);
for (i = 1; i < gomp_nthreads_var_list_len; i++)
@@ -1417,16 +1421,11 @@ initialize_env (void)
parse_schedule ();
parse_boolean ("OMP_DYNAMIC", &gomp_global_icv.dyn_var);
- parse_boolean ("OMP_NESTED", &gomp_global_icv.nest_var);
parse_boolean ("OMP_CANCELLATION", &gomp_cancel_var);
parse_boolean ("OMP_DISPLAY_AFFINITY", &gomp_display_affinity_var);
parse_int ("OMP_DEFAULT_DEVICE", &gomp_global_icv.default_device_var, true);
parse_target_offload ("OMP_TARGET_OFFLOAD", &gomp_target_offload_var);
parse_int ("OMP_MAX_TASK_PRIORITY", &gomp_max_task_priority_var, true);
- parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", &gomp_max_active_levels_var,
- true);
- if (gomp_max_active_levels_var > gomp_supported_active_levels)
- gomp_max_active_levels_var = gomp_supported_active_levels;
gomp_def_allocator = parse_allocator ();
if (parse_unsigned_long ("OMP_THREAD_LIMIT", &thread_limit_var, false))
{
@@ -1451,6 +1450,23 @@ initialize_env (void)
&gomp_bind_var_list_len)
&& gomp_global_icv.bind_var == omp_proc_bind_false)
ignore = true;
+ if (parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS",
+ &gomp_max_active_levels_var, true))
+ {
+ if (gomp_max_active_levels_var > gomp_supported_active_levels)
+ gomp_max_active_levels_var = gomp_supported_active_levels;
+ }
+ else
+ {
+ bool nested = true;
+
+ /* OMP_NESTED is deprecated in OpenMP 5.0. */
+ if (parse_boolean ("OMP_NESTED", &nested))
+ gomp_max_active_levels_var = nested ? gomp_supported_active_levels
+ : 1;
+ else if (gomp_nthreads_var_list_len > 1 || gomp_bind_var_list_len > 1)
+ gomp_max_active_levels_var = gomp_supported_active_levels;
+ }
/* Make sure OMP_PLACES and GOMP_CPU_AFFINITY env vars are always
parsed if present in the environment. If OMP_PROC_BIND was set
explicitly to false, don't populate places list though. If places
diff --git a/libgomp/icv.c b/libgomp/icv.c
index 8df15e3..f54ccb1 100644
--- a/libgomp/icv.c
+++ b/libgomp/icv.c
@@ -56,15 +56,16 @@ omp_get_dynamic (void)
void
omp_set_nested (int val)
{
- struct gomp_task_icv *icv = gomp_icv (true);
- icv->nest_var = val;
+ if (val)
+ gomp_max_active_levels_var = gomp_supported_active_levels;
+ else if (gomp_max_active_levels_var > 1)
+ gomp_max_active_levels_var = 1;
}
int
omp_get_nested (void)
{
- struct gomp_task_icv *icv = gomp_icv (false);
- return icv->nest_var;
+ return gomp_max_active_levels_var > 1;
}
#pragma GCC diagnostic pop
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index da7ac03..05aa1e4 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -428,7 +428,6 @@ struct gomp_task_icv
int default_device_var;
unsigned int thread_limit_var;
bool dyn_var;
- bool nest_var;
char bind_var;
/* Internal ICV. */
struct target_mem_desc *target_data;
diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 6937063..9ad871b 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -489,8 +489,11 @@ represent their language-specific counterparts.
Nested parallel regions may be initialized at startup by the
@env{OMP_NESTED} environment variable or at runtime using
-@code{omp_set_nested}. If undefined, nested parallel regions are
-disabled by default.
+@code{omp_set_nested}. Setting the maximum number of nested
+regions to above one using the @env{OMP_MAX_ACTIVE_LEVELS}
+environment variable or @code{omp_set_max_active_levels} will
+also enable nesting. If undefined, nested parallel regions
+are disabled by default.
@item @emph{C/C++}:
@multitable @columnfractions .20 .80
@@ -503,7 +506,8 @@ disabled by default.
@end multitable
@item @emph{See also}:
-@ref{omp_set_nested}, @ref{OMP_NESTED}
+@ref{omp_set_max_active_levels}, @ref{omp_set_nested},
+@ref{OMP_MAX_ACTIVE_LEVELS}, @ref{OMP_NESTED}
@item @emph{Reference}:
@uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.11.
@@ -964,6 +968,10 @@ are allowed to create new teams. The function takes the
language-specific
equivalent of @code{true} and @code{false}, where @code{true} enables
dynamic adjustment of team sizes and @code{false} disables it.
+Enabling nested parallel regions will also set the maximum number of
+active nested regions to the maximum supported. Disabling nested parallel
+regions will set the maximum number of active nested regions to one.
+
@item @emph{C/C++}:
@multitable @columnfractions .20 .80
@item @emph{Prototype}: @tab @code{void omp_set_nested(int nested);}
@@ -976,7 +984,8 @@ dynamic adjustment of team sizes and @code{false} disables
it.
@end multitable
@item @emph{See also}:
-@ref{OMP_NESTED}, @ref{omp_get_nested}
+@ref{omp_get_nested}, @ref{omp_set_max_active_levels},
+@ref{OMP_MAX_ACTIVE_LEVELS}, @ref{OMP_NESTED}
@item @emph{Reference}:
@uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.10.
@@ -1502,10 +1511,11 @@ disabled by default.
@item @emph{Description}:
Specifies the initial value for the maximum number of nested parallel
regions. The value of this variable shall be a positive integer.
-If undefined, the number of active levels is unlimited.
+If undefined, the number of active levels is one, which effectively
+disables nested regions.
@item @emph{See also}:
-@ref{omp_set_max_active_levels}
+@ref{omp_set_max_active_levels}, @ref{OMP_NESTED}
@item @emph{Reference}:
@uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.9
@@ -1541,11 +1551,13 @@ integer, and zero is allowed. If undefined, the
default priority is
@item @emph{Description}:
Enable or disable nested parallel regions, i.e., whether team members
are allowed to create new teams. The value of this environment variable
-shall be @code{TRUE} or @code{FALSE}. If undefined, nested parallel
-regions are disabled by default.
+shall be @code{TRUE} or @code{FALSE}. If set to @code{TRUE}, the number
+of maximum active nested regions supported will by default be set to the
+maximum supported, otherwise it will be set to one. If undefined, nested
+parallel regions are disabled by default.
@item @emph{See also}:
-@ref{omp_set_nested}
+@ref{omp_set_max_active_levels}, @ref{omp_set_nested}
@item @emph{Reference}:
@uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.6
@@ -1561,11 +1573,12 @@ regions are disabled by default.
@item @emph{Description}:
Specifies the default number of threads to use in parallel regions. The
value of this variable shall be a comma-separated list of positive integers;
-the value specified the number of threads to use for the corresponding nested
-level. If undefined one thread per CPU is used.
+the value specifies the number of threads to use for the corresponding nested
+level. Specifying more than one item in the list will automatically enable
+nesting by default. If undefined one thread per CPU is used.
@item @emph{See also}:
-@ref{omp_set_num_threads}
+@ref{omp_set_num_threads}, @ref{OMP_NESTED}
@item @emph{Reference}:
@uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.2
@@ -1586,13 +1599,15 @@ the thread affinity policy for the corresponding
nesting level. With
@code{MASTER} the worker threads are in the same place partition as the
master thread. With @code{CLOSE} those are kept close to the master thread
in contiguous place partitions. And with @code{SPREAD} a sparse distribution
-across the place partitions is used.
+across the place partitions is used. Specifying more than one item in the
+list will automatically enable nesting by default.
When undefined, @env{OMP_PROC_BIND} defaults to @code{TRUE} when
@env{OMP_PLACES} or @env{GOMP_CPU_AFFINITY} is set and @code{FALSE} otherwise.
@item @emph{See also}:
-@ref{OMP_PLACES}, @ref{GOMP_CPU_AFFINITY}, @ref{omp_get_proc_bind}
+@ref{omp_get_proc_bind}, @ref{GOMP_CPU_AFFINITY},
+@ref{OMP_NESTED}, @ref{OMP_PLACES}
@item @emph{Reference}:
@uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.4
diff --git a/libgomp/parallel.c b/libgomp/parallel.c
index 2fe4f573..9629470 100644
--- a/libgomp/parallel.c
+++ b/libgomp/parallel.c
@@ -53,7 +53,7 @@ gomp_resolve_num_threads (unsigned specified, unsigned count)
/* Accelerators with fixed thread counts require this to return 1 for
nested parallel regions. */
#if !defined(__AMDGCN__) && !defined(__nvptx__)
- && !icv->nest_var
+ && gomp_max_active_levels_var <= 1
#endif
)
return 1;
diff --git a/libgomp/testsuite/libgomp.c/target-5.c
b/libgomp/testsuite/libgomp.c/target-5.c
index 21a69ea..ec31f89 100644
--- a/libgomp/testsuite/libgomp.c/target-5.c
+++ b/libgomp/testsuite/libgomp.c/target-5.c
@@ -1,5 +1,3 @@
-/* { dg-additional-options "-Wno-deprecated-declarations" } */
-
#include <omp.h>
#include <stdlib.h>
@@ -7,17 +5,14 @@ int
main ()
{
int d_o = omp_get_dynamic ();
- int n_o = omp_get_nested ();
omp_sched_t s_o;
int c_o;
omp_get_schedule (&s_o, &c_o);
int m_o = omp_get_max_threads ();
omp_set_dynamic (1);
- omp_set_nested (1);
omp_set_schedule (omp_sched_static, 2);
omp_set_num_threads (4);
int d = omp_get_dynamic ();
- int n = omp_get_nested ();
omp_sched_t s;
int c;
omp_get_schedule (&s, &c);
@@ -30,13 +25,11 @@ main ()
int c_c;
omp_get_schedule (&s_c, &c_c);
if (d_o != omp_get_dynamic ()
- || n_o != omp_get_nested ()
|| s_o != s_c
|| c_o != c_c
|| m_o != omp_get_max_threads ())
abort ();
omp_set_dynamic (0);
- omp_set_nested (0);
omp_set_schedule (omp_sched_dynamic, 4);
omp_set_num_threads (2);
if (!omp_is_initial_device ())
@@ -48,7 +41,6 @@ main ()
int c_c;
omp_get_schedule (&s_c, &c_c);
if (d != omp_get_dynamic ()
- || n != omp_get_nested ()
|| s != s_c
|| c != c_c
|| m != omp_get_max_threads ())
@@ -60,13 +52,11 @@ main ()
int c_c;
omp_get_schedule (&s_c, &c_c);
if (d_o != omp_get_dynamic ()
- || n_o != omp_get_nested ()
|| s_o != s_c
|| c_o != c_c
|| m_o != omp_get_max_threads ())
abort ();
omp_set_dynamic (0);
- omp_set_nested (0);
omp_set_schedule (omp_sched_dynamic, 4);
omp_set_num_threads (2);
if (!omp_is_initial_device ())
@@ -76,7 +66,6 @@ main ()
abort ();
omp_get_schedule (&s_c, &c_c);
if (d != omp_get_dynamic ()
- || n != omp_get_nested ()
|| s != s_c
|| c != c_c
|| m != omp_get_max_threads ())