Hi PA,
sorry for the slow review. Looks mostly fine, but I stumbled over a few
minor points. Some are only RFC items (some of the 'I wonder …').
Paul-Antoine Arras wrote:
This patch adds middle-end support for the `dispatch` construct and the
`adjust_args` clause. The heavy lifting is done in `gimplify_omp_dispatch` and
`gimplify_call_expr` respectively. For `adjust_args`, this mostly consists in
emitting a call to `gomp_get_mapped_ptr` for the adequate device.
omp_get_… not gomp_get_…
For dispatch, the following steps are performed:
* Handle the device clause, if any: set the default-device ICV at the top of the
dispatch region and restore its previous value at the end.
* Handle novariants and nocontext clauses, if any. Evaluate compile-time
constants and select a variant, if possible. Otherwise, emit code to handle all
possible cases at run time.
* If depend clauses are present, add a taskwait construct before the dispatch
region and move them there.
The latter is not done here – but already in the front ends, i.e.
OMP_TASK are handled in part 3 (C), 4 (C++) and 6 (Fortran) of this series.
...
--- a/gcc/gimple.cc
+++ b/gcc/gimple.cc
...
+/* Build a GIMPLE_OMP_DISPATCH statement.
+
+ BODY is the target function call to be dispatched.
+ CLAUSES are any of the OMP dispatch construct's clauses: ... */
Looks as if you planned to add something here. How about:
s/: ..././ ?
@@ -4067,23 +4069,125 @@ gimplify_call_expr (tree *expr_p, gimple_seq *pre_p,
bool want_value)
+ if (flag_openmp && EXPR_P (CALL_EXPR_FN (*expr_p))
+ && DECL_P (TREE_OPERAND (CALL_EXPR_FN (*expr_p), 0))
+ && (adjust_args_list = lookup_attribute (
+ "omp declare variant variant adjust_args",
+ DECL_ATTRIBUTES (
+ TREE_OPERAND (CALL_EXPR_FN (*expr_p), 0))))
+ != NULL_TREE
+ && gimplify_omp_ctxp != NULL
+ && gimplify_omp_ctxp->code == OMP_DISPATCH
+ && !gimplify_omp_ctxp->in_call_args)
+ {
!= should be under 'a' of 'adjust (remove one space)
And I wonder whether it is a bit more readable and a tiny bit faster if
you move the gimplify_omp_ctx checks directly after flag_openmp
and only if successfull ('&&') check for the attributes.
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IS_DEVICE_PTR)
+ {
+ tree decl1 = DECL_NAME (OMP_CLAUSE_DECL (c));
+ tree decl2 = tree_strip_nop_conversions (*arg_p);
+ if (TREE_CODE (decl2) == ADDR_EXPR)
+ decl2 = TREE_OPERAND (decl2, 0);
+ gcc_assert (TREE_CODE (decl2) == VAR_DECL
+ || TREE_CODE (decl2) == PARM_DECL);
The first one can be 'VAR_P (decl2)'. I keep wondering whether there
can be cases where that's not true (e.g. VAR_DECL) or some indirect ref.
For Fortran, I could imagine that array descriptors make problems, e.g.
subroutine f(x)
integer, pointer :: x(:)
where 'x->data' is the device pointer and not 'x'.
(TODO: something to check + possibly to revisite when handling the
Fortran part; for now (including C/C++), I think we can leave it as is.)
Or something with reference types (→ C++, Fortran), albeit that's more
for need_device_addr / has_device_addr, which is not yet implemeted.
+ bool need_device_ptr = false;
+ for (tree arg
+ = TREE_PURPOSE (TREE_VALUE (adjust_args_list));
+ arg != NULL; arg = TREE_CHAIN (arg))
+ {
...
+ }
+
+ if (need_device_ptr && !is_device_ptr)
Actually, the is_device_ptr loop is only needed when need_device_ptr
(or, later, need_device_addr) is true; I wonder whether it should be
swapped and is_device_ptr only be checked conditionally?
+ *arg_p = (TREE_CODE (*arg_p) == NOP_EXPR)
+ ? TREE_OPERAND (*arg_p, 0)
+ : *arg_p;
Use tree_strip_nop_conversions or STRIP_NOPS ? However, it is not clear
why it is needed here ...
+ gimplify_arg (arg_p, pre_p, loc);
+ gimplify_arg (&device_num, pre_p, loc);
+ call = gimple_build_call (fn, 2, *arg_p, device_num);
+ tree mapped_arg
+ = create_tmp_var (gimple_call_return_type (call));
+ gimple_call_set_lhs (call, mapped_arg);
+ gimplify_seq_add_stmt (pre_p, call);
+
+ *arg_p = mapped_arg;
This line causes the following to attempt to fail:
+ // Mark mapped argument as device pointer to ensure
+ // idempotency in gimplification
+ gcc_assert (gimplify_omp_ctxp->code == OMP_DISPATCH);
+ tree c = build_omp_clause (input_location,
+ OMP_CLAUSE_IS_DEVICE_PTR);
+ OMP_CLAUSE_DECL (c) = *arg_p;
and, to continue the previous topic, I wonder whether it would be
sufficient to add the STRIP_NOPS on the RHS.
* * *
Testcase (could be a candidate for a scan-tree-dump-times test):
void foobar(int *x, int *y);
#pragma omp declare variant(foobar) match(construct={dispatch})
adjust_args(need_device_ptr: x,y)
void bar(int *x, int *y);
void sub(int *y, int *z)
{
#pragma omp dispatch //is_device_ptr(y)
bar(y, y);
}
yields:
_3 = __builtin_omp_get_default_device ();
_6 = __builtin_omp_get_mapped_ptr (y_4(D), _3);
_8 = __builtin_omp_get_mapped_ptr (y_4(D), _3);
foobar (_8, _6);
+/* Callback for walk_tree to find a IFN_GOMP_DISPATCH. */
+
+static tree
+find_ifn_gomp_dispatch (tree *tp, int *, void *modify)
Typo: "... an IFN_..." (a -> an)
+/* Try to evaluate a novariants clause. Return 1 if true, 0 if false or absent,
+ * -1 if run-time evaluation is needed. */
+
+int
+omp_has_novariants (void)
+{
+ for (struct gimplify_omp_ctx *ctx = gimplify_omp_ctxp; ctx;
+ ctx = ctx->outer_context)
+ {
+ if (ctx->code == OMP_DISPATCH && !ctx->in_call_args)
Shouldn't dispatch not always be the innermost OpenMP context?
Likewise for nocontext.
+ while (TREE_CODE (base_call_expr) == FLOAT_EXPR
+ || TREE_CODE (base_call_expr) == CONVERT_EXPR
+ || TREE_CODE (base_call_expr) == COMPLEX_EXPR
+ || TREE_CODE (base_call_expr) == INDIRECT_REF)
+ base_call_expr = TREE_OPERAND (base_call_expr, 0);
Is there a reason that NOP_EXPR is not in that list? Especially as you
use STRIP_NOPS in the next line?
+ tree base_fndecl = get_callee_fndecl (STRIP_NOPS (base_call_expr));
+ if (base_fndecl != NULL_TREE)
...
+ if (base_fndecl != variant_fndecl
+ && (omp_has_novariants () == -1 || omp_has_nocontext () == -1))
+ {
+ tree novariants_clause = NULL_TREE, nocontext_clause = NULL_TREE,
+ novariants_cond = NULL_TREE, nocontext_cond = NULL_TREE;
+ for (tree c = OMP_DISPATCH_CLAUSES (expr); c; c = TREE_CHAIN (c))
+ {
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_NOVARIANTS)
&& !integer_zerop (OMP_CLAUSE_NOVARIANTS_EXPR (c)) ?
+ {
+ gcc_assert (novariants_cond == NULL_TREE);
+ novariants_clause = c;
+ novariants_cond = OMP_CLAUSE_NOVARIANTS_EXPR (c);
+ }
+ else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_NOCONTEXT)
&& !integer_zerop (OMP_CLAUSE_NOCONTEXT_EXPR (c)) ?
Just to avoid some extra gimplification / optimization work later on?
+ {
+ gcc_assert (nocontext_cond == NULL_TREE);
+ nocontext_clause = c;
+ nocontext_cond = OMP_CLAUSE_NOCONTEXT_EXPR (c);
+ }
+ }
+ gcc_assert (novariants_cond != NULL_TREE
+ || nocontext_cond != NULL_TREE);
+
+ enum gimplify_status ret
+ = gimplify_expr (&novariants_cond, &body, NULL, is_gimple_val,
+ fb_rvalue);
+ if (ret == GS_ERROR || ret == GS_UNHANDLED)
+ return ret;
+ ret = gimplify_expr (&nocontext_cond, &body, NULL, is_gimple_val,
+ fb_rvalue);
+ if (ret == GS_ERROR || ret == GS_UNHANDLED)
+ return ret;
+
+ tree base_label = create_artificial_label (UNKNOWN_LOCATION);
+ tree variant1_label = create_artificial_label (UNKNOWN_LOCATION);
+ tree cond_label = create_artificial_label (UNKNOWN_LOCATION);
+ tree variant2_label = create_artificial_label (UNKNOWN_LOCATION);
+ tree end_label = create_artificial_label (UNKNOWN_LOCATION);
+
+ if (novariants_cond != NULL_TREE)
+ {
+ gcond *novariants_cond_stmt
+ = gimple_build_cond_from_tree (novariants_cond, base_label,
+ cond_label);
+ gimplify_seq_add_stmt (&body, novariants_cond_stmt);
+
+ gimplify_seq_add_stmt (&body, gimple_build_label (base_label));
+ tree base_call_expr2 = copy_node (base_call_expr);
+ if (TREE_CODE (dispatch_body) == MODIFY_EXPR)
+ {
+ base_call_expr2 = build2 (MODIFY_EXPR, TREE_TYPE (dst), dst,
+ base_call_expr2);
+ }
+ OMP_CLAUSE_NOVARIANTS_EXPR (novariants_clause)
+ = boolean_true_node;
+ gimplify_and_add (base_call_expr2, &body);
+ gimplify_seq_add_stmt (&body, gimple_build_goto (end_label));
+
+ OMP_CLAUSE_NOVARIANTS_EXPR (novariants_clause)
+ = boolean_false_node;
+ }
+
+ gimplify_seq_add_stmt (&body, gimple_build_label (cond_label));
You can move the cond_label into the 'if' - and do likewise with the
first two label creations - those are not used outside of the 'if'.
(except for end_label, of course)
Likewise for:
+ if (nocontext_cond != NULL_TREE)
+ {
...
+ }
+
+ gimplify_seq_add_stmt (&body, gimple_build_label (variant2_label));
Those.
+ tree variant_call_expr = copy_node (base_call_expr);
Is there a reason to copy the node? I understand that you copy them for
the variant calls - but I think one of the nodes can be used without
copying (e.g. this one), or do I miss something?
+ if (TREE_CODE (dispatch_body) == MODIFY_EXPR)
+ {
+ variant_call_expr
+ = build2 (MODIFY_EXPR, TREE_TYPE (dst), dst, variant_call_expr);
+ }
+ gimplify_and_add (variant_call_expr, &body);
+ gimplify_seq_add_stmt (&body, gimple_build_goto (end_label));
+ gimplify_seq_add_stmt (&body, gimple_build_label (end_label));
+ }
I think the last goto is superflous as it just jumps to the next line.
+ else
+ gimplify_and_add (OMP_DISPATCH_BODY (expr), &body);
+ }
+ else
+ gimplify_and_add (OMP_DISPATCH_BODY (expr), &body);
+
+ // Restore default-device-var ICV
+ if (device)
+ {
+ tree fn = builtin_decl_explicit (BUILT_IN_OMP_SET_DEFAULT_DEVICE);
+ gcall *call = gimple_build_call (fn, 1, saved_device_icv);
+ gimplify_seq_add_stmt (&body, call);
+ }
+
+ // Wrap dispatch body into a bind
+ gimple *bind = gimple_build_bind (NULL_TREE, body, NULL_TREE);
+ pop_gimplify_context (bind);
+
+ // Manually tear down context created by gimplify_scan_omp_clauses to avoid a
+ // call to gimplify_adjust_omp_clauses
+ gimplify_omp_ctx *ctx = gimplify_omp_ctxp;
+ if (ctx != NULL)
+ {
+ gcc_assert (ctx->code == OMP_DISPATCH);
+ gimplify_omp_ctxp = ctx->outer_context;
+ delete_omp_context (ctx);
+ }
+
+ // Remove nowait as it has no effect on dispatch (OpenMP 5.2), and depend and
+ // device clauses as they have already been handled
"Remove ..., device as it has been handled above, and depend as the
front end handled it by inserting taskwait."
Or something like that - to avoid searching for OMP_TASK/OMP_CLAUSE_DEPEND.
--- a/gcc/omp-general.cc
+++ b/gcc/omp-general.cc
@@ -1049,7 +1049,7 @@ omp_construct_traits_to_codes (tree ctx, int nconstructs,
/* Order must match the OMP_TRAIT_CONSTRUCT_* enumerators in
enum omp_ts_code. */
static enum tree_code code_map[]
- = { OMP_TARGET, OMP_TEAMS, OMP_PARALLEL, OMP_FOR, OMP_SIMD };
+ = { OMP_TARGET, OMP_TEAMS, OMP_PARALLEL, OMP_FOR, OMP_SIMD, OMP_DISPATCH };
for (tree ts = ctx; ts; ts = TREE_CHAIN (ts), i--)
{
@@ -1142,6 +1142,7 @@ const char *omp_tss_map[] =
"target_device",
"implementation",
"user",
+ "need_device_ptr",
NULL
};
This one looks bogus - and is also effectively unused.
Tobias