Hi! On Mon, Oct 26, 2015 at 20:49:40 +0100, Jakub Jelinek wrote: > On Mon, Oct 26, 2015 at 10:39:04PM +0300, Ilya Verbin wrote: > > > Without declare target link or to, you can't use the global variables > > > in orphaned accelerated routines (unless you e.g. take the address of the > > > mapped variable in the region and pass it around). > > > The to variables (non-deferred) are always mapped and are initialized with > > > the original initializer, refcount is infinity. link (deferred) work more > > > like the normal mapping, referencing those vars when they aren't > > > explicitly > > > (or implicitly) mapped is unspecified behavior, if it is e.g. mapped > > > freshly > > > with to kind, it gets the current value of the host var rather than the > > > original one. But, beyond the mapping the compiler needs to ensure that > > > all uses of the link global var (or perhaps just all uses of the link > > > global > > > var outside of the target construct body where it is mapped, because you > > > could use there the pointer you got from GOMP_target) are replaced by > > > dereference of some artificial pointer, so a becomes *a_tmp and &a becomes > > > &*a_tmp, and that the runtime library during registration of the tables is > > > told about the address of this artificial pointer. During registration, > > > I'd expect it would stick an entry for this range into the table, with > > > some > > > special flag or something similar, indicating that it is deferred mapping > > > and where the offloading device pointer is. During mapping, it would map > > > it > > > as any other not yet mapped object, but additionally would also set this > > > device pointer to the device address of the mapped object. We also need > > > to > > > ensure that when we drop the refcount of that mapping back to 0, we get it > > > back to the state where it is described as a range with registered > > > deferred > > > mapping and where the device pointer is. > > > > Ok, got it, I'll try implement this... > > Thanks. > > > > > > we actually replace the variables with pointers to variables, then > > > > > need > > > > > to somehow also mark those in the offloading tables, so that the > > > > > library > > > > > > > > I see 2 possible options: use the MSB of the size, or introduce the > > > > third field > > > > for flags. > > > > > > Well, it can be either recorded in the host variable tables (which contain > > > address and size pair, right), or in corresponding offloading device table > > > (which contains the pointer, something else?). > > > > It contains a size too, which is checked in libgomp: > > gomp_fatal ("Can't map target variables (size mismatch)"); > > Yes, we can remove this check, and use second field in device table for > > flags. > > Yeah, or e.g. just use MSB of that size (so check that either the size is > the same (then it is target to) or it is MSB | size (then it is target link). > Objects larger than half of the address space aren't really supportable > anyway.
Here is WIP patch, not for check-in. There are still many FIXMEs, which I am going to resolve, however target-link-1.c testcase pass. Is this approach correct? Any comments on FIXMEs? diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c index 23d0107..58771c0 100644 --- a/gcc/c/c-parser.c +++ b/gcc/c/c-parser.c @@ -15895,7 +15895,10 @@ c_parser_omp_declare_target (c_parser *parser) g->have_offload = true; if (is_a <varpool_node *> (node)) { - vec_safe_push (offload_vars, t); + omp_offload_var var; + var.decl = t; + var.link_ptr_decl = NULL_TREE; + vec_safe_push (offload_vars, var); node->force_output = 1; } #endif diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index d1f4970..b890f6d 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -34999,7 +34999,10 @@ cp_parser_omp_declare_target (cp_parser *parser, cp_token *pragma_tok) g->have_offload = true; if (is_a <varpool_node *> (node)) { - vec_safe_push (offload_vars, t); + omp_offload_var var; + var.decl = t; + var.link_ptr_decl = NULL_TREE; + vec_safe_push (offload_vars, var); node->force_output = 1; } #endif diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c index 67a9024..878a9c5 100644 --- a/gcc/lto-cgraph.c +++ b/gcc/lto-cgraph.c @@ -1106,7 +1106,7 @@ output_offload_tables (void) streamer_write_enum (ob->main_stream, LTO_symtab_tags, LTO_symtab_last_tag, LTO_symtab_variable); lto_output_var_decl_index (ob->decl_state, ob->main_stream, - (*offload_vars)[i]); + (*offload_vars)[i].decl); } streamer_write_uhwi_stream (ob->main_stream, 0); @@ -1902,7 +1902,10 @@ input_offload_tables (void) int decl_index = streamer_read_uhwi (ib); tree var_decl = lto_file_decl_data_get_var_decl (file_data, decl_index); - vec_safe_push (offload_vars, var_decl); + omp_offload_var var; + var.decl = var_decl; + var.link_ptr_decl = NULL_TREE; + vec_safe_push (offload_vars, var); } else fatal_error (input_location, diff --git a/gcc/omp-low.c b/gcc/omp-low.c index ee33551..5900f1a 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -373,7 +373,8 @@ unshare_and_remap (tree x, tree from, tree to) } /* Holds offload tables with decls. */ -vec<tree, va_gc> *offload_funcs, *offload_vars; +vec<tree, va_gc> *offload_funcs; +vec<omp_offload_var, va_gc> *offload_vars; /* Convenience function for calling scan_omp_1_op on tree operands. */ @@ -2009,7 +2010,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) decl = OMP_CLAUSE_DECL (c); /* Global variables with "omp declare target" attribute don't need to be copied, the receiver side will use them - directly. */ + directly. However, global variables with "omp declare target link" + attribute need to be copied. */ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP && DECL_P (decl) && ((OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_FIRSTPRIVATE_POINTER @@ -2017,7 +2019,9 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) != GOMP_MAP_FIRSTPRIVATE_REFERENCE)) || TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE) && is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)) - && varpool_node::get_create (decl)->offloadable) + && varpool_node::get_create (decl)->offloadable + && !lookup_attribute ("omp declare target link", + DECL_ATTRIBUTES (decl))) break; if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER) @@ -18331,23 +18335,50 @@ make_pass_omp_simd_clone (gcc::context *ctxt) return new pass_omp_simd_clone (ctxt); } -/* Helper function for omp_finish_file routine. Takes decls from V_DECLS and - adds their addresses and sizes to constructor-vector V_CTOR. */ +/* Helper function for omp_finish_file routine. Takes func decls from V_DECLS + and adds their addresses to constructor-vector V_CTOR. */ static void -add_decls_addresses_to_decl_constructor (vec<tree, va_gc> *v_decls, - vec<constructor_elt, va_gc> *v_ctor) +add_funcs_to_decl_constructor (vec<tree, va_gc> *v_decls, + vec<constructor_elt, va_gc> *v_ctor) { unsigned len = vec_safe_length (v_decls); for (unsigned i = 0; i < len; i++) { tree it = (*v_decls)[i]; - bool is_function = TREE_CODE (it) != VAR_DECL; - CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, build_fold_addr_expr (it)); - if (!is_function) - CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, - fold_convert (const_ptr_type_node, - DECL_SIZE_UNIT (it))); + } +} + +/* Helper function for omp_finish_file routine. Takes var decls from V_DECLS + and adds their addresses and sizes to constructor-vector V_CTOR. */ +static void +add_vars_to_decl_constructor (vec<omp_offload_var, va_gc> *v_decls, + vec<constructor_elt, va_gc> *v_ctor) +{ + unsigned len = vec_safe_length (v_decls); + for (unsigned i = 0; i < len; i++) + { + omp_offload_var var = (*v_decls)[i]; + tree addr; + tree size = fold_convert (const_ptr_type_node, DECL_SIZE_UNIT (var.decl)); + + if (var.link_ptr_decl == NULL_TREE) + addr = build_fold_addr_expr (var.decl); + else + { + /* For "omp declare target link" var use address of the pointer + instead of address of the var. */ + addr = build_fold_addr_expr (var.link_ptr_decl); + /* Most significant bit of the size marks such vars. */ + unsigned HOST_WIDE_INT isize = tree_to_uhwi (size); + isize |= 1ULL << (int_size_in_bytes (const_ptr_type_node) * 8 - 1); + size = wide_int_to_tree (const_ptr_type_node, isize); + + /* FIXME: Remove varpool node of var? */ + } + + CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, addr); + CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, size); } } @@ -18369,8 +18400,8 @@ omp_finish_file (void) vec_alloc (v_f, num_funcs); vec_alloc (v_v, num_vars * 2); - add_decls_addresses_to_decl_constructor (offload_funcs, v_f); - add_decls_addresses_to_decl_constructor (offload_vars, v_v); + add_funcs_to_decl_constructor (offload_funcs, v_f); + add_vars_to_decl_constructor (offload_vars, v_v); tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node, num_vars * 2); @@ -18412,7 +18443,7 @@ omp_finish_file (void) } for (unsigned i = 0; i < num_vars; i++) { - tree it = (*offload_vars)[i]; + tree it = (*offload_vars)[i].decl; targetm.record_offload_symbol (it); } } @@ -19538,4 +19569,145 @@ make_pass_oacc_device_lower (gcc::context *ctxt) return new pass_oacc_device_lower (ctxt); } +/* "omp declare target link" handling pass. */ + +namespace { + +const pass_data pass_data_omp_target_link = +{ + GIMPLE_PASS, /* type */ + "omptargetlink", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_NONE, /* tv_id */ + PROP_ssa, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_update_ssa, /* todo_flags_finish */ +}; + +class pass_omp_target_link : public gimple_opt_pass +{ +public: + pass_omp_target_link (gcc::context *ctxt) + : gimple_opt_pass (pass_data_omp_target_link, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *fun) + { +#ifdef ACCEL_COMPILER + /* FIXME: Replace globals in target regions too or not? */ + return lookup_attribute ("omp declare target", + DECL_ATTRIBUTES (fun->decl)); +#else + (void) fun; + return false; +#endif + } + + virtual unsigned execute (function *); +}; + +unsigned +pass_omp_target_link::execute (function *fun) +{ + basic_block bb; + FOR_EACH_BB_FN (bb, fun) + { + gimple_stmt_iterator gsi; + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + unsigned i; + gimple *stmt = gsi_stmt (gsi); + for (i = 0; i < gimple_num_ops (stmt); i++) + { + tree op = gimple_op (stmt, i); + tree var = NULL_TREE; + + if (!op) + continue; + if (TREE_CODE (op) == VAR_DECL) + var = op; + else if (TREE_CODE (op) == ADDR_EXPR) + { + tree op1 = TREE_OPERAND (op, 0); + if (TREE_CODE (op1) == VAR_DECL) + var = op1; + } + /* FIXME: Support arrays. What else? */ + + if (var && lookup_attribute ("omp declare target link", + DECL_ATTRIBUTES (var))) + { + tree type = TREE_TYPE (var); + tree ptype = build_pointer_type (type); + + /* Find var in offload table. */ + omp_offload_var *table_entry = NULL; + for (unsigned j = 0; j < vec_safe_length (offload_vars); j++) + if ((*offload_vars)[j].decl == var) + { + table_entry = &(*offload_vars)[j]; + break; + } + gcc_assert (table_entry); + + /* Get or create artificial pointer for the var. */ + tree ptr_decl; + if (table_entry->link_ptr_decl != NULL_TREE) + ptr_decl = table_entry->link_ptr_decl; + else + { + /* FIXME: Create a new node instead of copying? + Which info to preserve? */ + ptr_decl = copy_node (var); + TREE_TYPE (ptr_decl) = ptype; + DECL_MODE (ptr_decl) = TYPE_MODE (ptype); + DECL_SIZE (ptr_decl) = TYPE_SIZE (ptype); + DECL_SIZE_UNIT (ptr_decl) = TYPE_SIZE_UNIT (ptype); + DECL_ARTIFICIAL (ptr_decl) = 1; + /* FIXME: Add new function clone_variable_name? + clone_function_name adds dots into the name, which are + bad for vars. */ + DECL_NAME (ptr_decl) + = clone_function_name (var, "linkptr"); + SET_DECL_ASSEMBLER_NAME (ptr_decl, DECL_NAME (ptr_decl)); + SET_DECL_RTL (ptr_decl, NULL); + varpool_node::finalize_decl (ptr_decl); + table_entry->link_ptr_decl = ptr_decl; + } + + /* Replace the use of var with dereference of ptr_decl. */ + tree tmp_ssa = make_temp_ssa_name (ptype, NULL, "linkptr"); + gimple *new_stmt = gimple_build_assign (tmp_ssa, ptr_decl); + gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); + tree mem_ref = build_simple_mem_ref (tmp_ssa); + + if (TREE_CODE (op) == VAR_DECL) + *gimple_op_ptr (stmt, i) = mem_ref; + else if (TREE_CODE (op) == ADDR_EXPR) + { + tree op1 = TREE_OPERAND (op, 0); + if (TREE_CODE (op1) == VAR_DECL) + TREE_OPERAND (op, 0) = mem_ref; + recompute_tree_invariant_for_addr_expr (op); + } + update_stmt (stmt); + } + } + } + } + + return 0; +} + +} // anon namespace + +gimple_opt_pass * +make_pass_omp_target_link (gcc::context *ctxt) +{ + return new pass_omp_target_link (ctxt); +} + #include "gt-omp-low.h" diff --git a/gcc/omp-low.h b/gcc/omp-low.h index ee0f8ac..c6e4d5a 100644 --- a/gcc/omp-low.h +++ b/gcc/omp-low.h @@ -34,7 +34,16 @@ extern tree get_oacc_fn_attrib (tree); extern int get_oacc_ifn_dim_arg (const gimple *); extern int get_oacc_fn_dim_size (tree, int); +struct omp_offload_var +{ + /* Declaration representing global variable. */ + tree decl; + + /* Artificial pointer for "omp declare target link" variables. */ + tree link_ptr_decl; +}; + extern GTY(()) vec<tree, va_gc> *offload_funcs; -extern GTY(()) vec<tree, va_gc> *offload_vars; +extern GTY(()) vec<omp_offload_var, va_gc> *offload_vars; #endif /* GCC_OMP_LOW_H */ diff --git a/gcc/passes.def b/gcc/passes.def index c0ab6b9..b32a5e5 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -151,6 +151,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_fixup_cfg); NEXT_PASS (pass_lower_eh_dispatch); NEXT_PASS (pass_oacc_device_lower); + NEXT_PASS (pass_omp_target_link); NEXT_PASS (pass_all_optimizations); PUSH_INSERT_PASSES_WITHIN (pass_all_optimizations) NEXT_PASS (pass_remove_cgraph_callee_edges); diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 49e22a9..554f3d2 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -413,6 +413,7 @@ extern gimple_opt_pass *make_pass_lower_omp (gcc::context *ctxt); extern gimple_opt_pass *make_pass_diagnose_omp_blocks (gcc::context *ctxt); extern gimple_opt_pass *make_pass_expand_omp (gcc::context *ctxt); extern gimple_opt_pass *make_pass_expand_omp_ssa (gcc::context *ctxt); +extern gimple_opt_pass *make_pass_omp_target_link (gcc::context *ctxt); extern gimple_opt_pass *make_pass_oacc_device_lower (gcc::context *ctxt); extern gimple_opt_pass *make_pass_object_sizes (gcc::context *ctxt); extern gimple_opt_pass *make_pass_strlen (gcc::context *ctxt); diff --git a/gcc/varpool.c b/gcc/varpool.c index 478f365..ca8457d 100644 --- a/gcc/varpool.c +++ b/gcc/varpool.c @@ -156,7 +156,12 @@ varpool_node::get_create (tree decl) #ifdef ENABLE_OFFLOADING g->have_offload = true; if (!in_lto_p) - vec_safe_push (offload_vars, decl); + { + omp_offload_var var; + var.decl = decl; + var.link_ptr_decl = NULL_TREE; + vec_safe_push (offload_vars, var); + } node->force_output = 1; #endif } diff --git a/libgomp/target.c b/libgomp/target.c index ef22329..195be43 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -78,6 +78,17 @@ static int num_devices; /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */ static int num_devices_openmp; +/* FIXME: Quick and dirty prototype of keeping correspondence between host + address of the object and target address of the artificial link pointer. + Move it to gomp_device_descr, or where? */ +struct link_struct +{ + uintptr_t host_start; + uintptr_t tgt_link_ptr; +}; +static struct link_struct links[100]; +static int link_num; + /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */ static void * @@ -763,6 +774,21 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, } } + /* Set pointers to "omp declare target link" variables. */ + for (i = 0; i < mapnum; i++) + /* FIXME: Remove this ugly loop. */ + for (int j = 0; j < link_num; j++) + if (links[j].host_start == (uintptr_t) hostaddrs[i]) + { + cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i); + /* Set link pointer on target to the device address of the mapped + object. */ + devicep->host2dev_func (devicep->target_id, + (void *) links[j].tgt_link_ptr, + (void *) &cur_node.tgt_offset, + sizeof (void *)); + } + /* If the variable from "omp target enter data" map-list was already mapped, tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or gomp_exit_data. */ @@ -981,6 +1007,7 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, /* Insert host-target address mapping into splay tree. */ struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); + /* FIXME: Do not allocate space for link vars. */ tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array)); tgt->refcount = REFCOUNT_INFINITY; tgt->tgt_start = 0; @@ -1009,26 +1036,44 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, for (i = 0; i < num_vars; i++) { struct addr_pair *target_var = &target_table[num_funcs + i]; - if (target_var->end - target_var->start - != (uintptr_t) host_var_table[i * 2 + 1]) + uintptr_t target_size = target_var->end - target_var->start; + + /* Most significant bit of the size marks "omp declare target link" + variables. */ + bool is_link = target_size & (1ULL << (sizeof (uintptr_t) * 8 - 1)); + + if (!is_link) { - gomp_mutex_unlock (&devicep->lock); - if (is_register_lock) - gomp_mutex_unlock (®ister_lock); - gomp_fatal ("Can't map target variables (size mismatch)"); - } + if ((uintptr_t) host_var_table[i * 2 + 1] != target_size) + { + gomp_mutex_unlock (&devicep->lock); + if (is_register_lock) + gomp_mutex_unlock (®ister_lock); + gomp_fatal ("Can't map target variables (size mismatch)"); + } - splay_tree_key k = &array->key; - k->host_start = (uintptr_t) host_var_table[i * 2]; - k->host_end = k->host_start + (uintptr_t) host_var_table[i * 2 + 1]; - k->tgt = tgt; - k->tgt_offset = target_var->start; - k->refcount = REFCOUNT_INFINITY; - k->async_refcount = 0; - array->left = NULL; - array->right = NULL; - splay_tree_insert (&devicep->mem_map, array); - array++; + splay_tree_key k = &array->key; + k->host_start = (uintptr_t) host_var_table[i * 2]; + k->host_end = k->host_start + (uintptr_t) host_var_table[i * 2 + 1]; + k->tgt = tgt; + k->tgt_offset = target_var->start; + k->refcount = REFCOUNT_INFINITY; + k->async_refcount = 0; + array->left = NULL; + array->right = NULL; + splay_tree_insert (&devicep->mem_map, array); + array++; + } + else + { + /* Do not map "omp declare target link" variables, only keep target + address of the artificial pointer. */ + /* FIXME: Where to keep it? */ + struct link_struct l; + l.host_start = (uintptr_t) host_var_table[i * 2]; + l.tgt_link_ptr = target_var->start; + links[link_num++] = l; + } } free (target_table); diff --git a/libgomp/testsuite/libgomp.c/target-link-1.c b/libgomp/testsuite/libgomp.c/target-link-1.c new file mode 100644 index 0000000..332bc14 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-link-1.c @@ -0,0 +1,56 @@ +int a = 1, b = 1; +double c = 1.0; +long long d[27]; +#pragma omp declare target link (a) to (b) link (c, d) + +/* FIXME: When the function is inlined, it gets the wrong values. */ +__attribute__((noinline, noclone)) int +foo (void) +{ + return a++ + b++; +} + +/* FIXME: When the function is inlined, it gets the wrong values. */ +__attribute__((noinline, noclone)) int +bar (void) +{ + int *p1 = &a; + int *p2 = &b; + c += 0.1; + d[10]++; /* FIXME: Support arrays in pass_omp_target_link::execute. */ + return *p1 + *p2; +} + +#pragma omp declare target (foo, bar) + +int +main () +{ + int res; + a = b = 2; + #pragma omp target map (to: a, b, c, d) map (from: res) + { + a; c; d; /* FIXME: Do not remove map(a,c,d) during gimplification. */ + res = foo () + foo (); + res += bar (); + } + + int shared_mem = 0; + #pragma omp target map (alloc: shared_mem) + shared_mem = 1; + + if ((shared_mem && res != (2 + 2) + (3 + 3) + (4 + 4)) + || (!shared_mem && res != (2 + 1) + (3 + 2) + (4 + 3))) + __builtin_abort (); + + #pragma omp target map (to: a) map (from: res) + { + a; /* FIXME: Do not remove map(a) during gimplification. */ + res = foo (); + } + + if ((shared_mem && res != 4 + 4) || (!shared_mem && res != 2 + 3)) + __builtin_abort (); + + return 0; +} -- Ilya