Hello. Following patch adds a new IPA pass that creates clones intended to be expanded to HSAIL. The pass is capable of LTO stuff.
Thanks, Martin
>From 8cbddf693f93328f117dc48588deee924d2df6cd Mon Sep 17 00:00:00 2001 From: mliska <mli...@suse.cz> Date: Tue, 1 Sep 2015 14:10:24 +0200 Subject: [PATCH 1/4] HSA: create HSA clones. gcc/c-family/ChangeLog: 2015-09-03 Martin Liska <mli...@suse.cz> * c-common.c (handle_hsa_attribute): Do not handle hsakernel attribute. gcc/lto/ChangeLog: 2015-09-03 Martin Liska <mli...@suse.cz> * lto-partition.c (add_symbol_to_partition_1): For an HSA clone, append also all dependencies to a LTO partition. libgomp/ChangeLog: 2015-09-03 Martin Liska <mli...@suse.cz> * plugin/plugin-hsa.c (GOMP_OFFLOAD_load_image): Enable having a module without kernels (can contain HSA functions). gcc/ChangeLog: 2015-09-03 Martin Liska <mli...@suse.cz> * Makefile.in: Add new source file and remove hsa-gen.c from list of GT files. * cgraph.h: Remove hsa_imp_of property of cgraph_node. * hsa-brig.c (brig_init): Append LTRANS name to a BRIG module name. (emit_function_directives): Add new argument. (emit_function_declaration): Use it. (emit_call_insn): Fill up offsets of functions that should be filled before a BRIG module is done. (hsa_brig_emit_function): Emit declarations before a function is defined/declared. (hsa_output_kernel_mapping): An HSA brig module can have zero kernels. (hsa_output_brig): Process functions linkage that fills up correct code list references. * hsa-dump.c: Add new include files due to function_summary. * hsa-gen.c (hsa_get_gpu_function): New function. (hsa_get_host_function): New function. (gen_hsa_insns_for_direct_call): Small refactoring. (gen_hsa_insns_for_known_library_call): Likewise. (hsa_generate_function_declaration): Sanitize function name. (generate_hsa): Remove unused return value. (init_hsa_functions): Remove. (insert_store_range_dim): Likewise. (wrap_hsa_kernel_call): Likewise. (wrap_all_hsa_calls): Likewise. (pass_gen_hsail::execute): Emit code just for cgraph_nodes that is hsa_summaries. * hsa-regalloc.c: Include additional header files. * hsa.c (hsa_get_declaration_name): Use asm_name as name of function. (hsa_register_kernel): New function. * hsa.h (enum hsa_function_kind): New enum. (struct hsa_function_summary): New. (hsa_summary_t::link_functions): Likewise. * ipa-hsa.c: New file. * lto-section-in.c: Add new section name. * lto-streamer.h (enum lto_section_type): Likewise. * omp-low.c (expand_parallel_call): Fill up HSA function summary. (expand_target_kernel_body): Likewise. * passes.c (execute_one_pass): Terminate pass queue if stop execution TODO is returned. (execute_pass_list_1): Likewise. (execute_ipa_pass_list): Likewise. * passes.def: Add new IPA pass. * timevar.def: Likewise. * tree-pass.h: Likewise. --- gcc/Makefile.in | 2 +- gcc/c-family/c-common.c | 9 -- gcc/cgraph.h | 4 - gcc/hsa-brig.c | 89 +++++++++--- gcc/hsa-dump.c | 40 +++++- gcc/hsa-gen.c | 267 +++++++---------------------------- gcc/hsa-regalloc.c | 27 +++- gcc/hsa.c | 28 ++++ gcc/hsa.h | 62 +++++++++ gcc/ipa-hsa.c | 330 ++++++++++++++++++++++++++++++++++++++++++++ gcc/lto-section-in.c | 3 +- gcc/lto-streamer.h | 1 + gcc/lto/lto-partition.c | 48 +++++++ gcc/omp-low.c | 16 ++- gcc/passes.c | 18 ++- gcc/passes.def | 1 + gcc/timevar.def | 1 + gcc/tree-pass.h | 2 + libgomp/plugin/plugin-hsa.c | 2 - 19 files changed, 681 insertions(+), 269 deletions(-) create mode 100644 gcc/ipa-hsa.c diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 1a37630..ea8750b 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1314,6 +1314,7 @@ OBJS = \ ipa-icf.o \ ipa-icf-gimple.o \ ipa-reference.o \ + ipa-hsa.o \ ipa-ref.o \ ipa-utils.o \ ipa.o \ @@ -2371,7 +2372,6 @@ GTFILES = $(CPP_ID_DATA_H) $(srcdir)/input.h $(srcdir)/coretypes.h \ $(srcdir)/ipa-devirt.c \ $(srcdir)/internal-fn.h \ $(srcdir)/hsa.c \ - $(srcdir)/hsa-gen.c \ @all_gtfiles@ # Compute the list of GT header files from the corresponding C sources, diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c index a8775ab..df7819f 100644 --- a/gcc/c-family/c-common.c +++ b/gcc/c-family/c-common.c @@ -667,10 +667,6 @@ const struct attribute_spec c_common_attribute_table[] = handle_noinline_attribute, false }, { "noclone", 0, 0, true, false, false, handle_noclone_attribute, false }, - { "hsa", 0, 0, true, false, false, - handle_hsa_attribute, false }, - { "hsakernel", 0, 0, true, false, false, - handle_hsa_attribute, false }, { "hsafunc", 0, 0, true, false, false, handle_hsa_attribute, false }, { "no_icf", 0, 0, true, false, false, @@ -7369,11 +7365,6 @@ handle_hsa_attribute (tree *node, tree name, TREE_USED (*node) = 1; DECL_UNINLINABLE (*node) = 1; - if (strcmp ("hsakernel", IDENTIFIER_POINTER (name)) == 0 - && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (TREE_TYPE (*node)))) - == void_type_node)) - warning (OPT_Wattributes, "%qE attribute on a function with fixed number " - "of argument makes no sense", name); return NULL_TREE; } diff --git a/gcc/cgraph.h b/gcc/cgraph.h index c487f10..b742c8c 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -524,10 +524,6 @@ public: /* Section name. Again can be private, if allowed. */ section_hash_entry *x_section; - /* TODO: Consider moving this to a summary. - The node this HSA node corresponds to. */ - symtab_node *hsa_imp_of; - protected: /* Dump base fields of symtab nodes to F. Not to be used directly. */ void dump_base (FILE *); diff --git a/gcc/hsa-brig.c b/gcc/hsa-brig.c index a375cf2..7daae33 100644 --- a/gcc/hsa-brig.c +++ b/gcc/hsa-brig.c @@ -37,7 +37,6 @@ along with GCC; see the file COPYING3. If not see #include "stor-layout.h" #include "tree-cfg.h" #include "tree-ssa-alias.h" -#include "machmode.h" #include "output.h" #include "gimple-expr.h" #include "dominance.h" @@ -51,10 +50,26 @@ along with GCC; see the file COPYING3. If not see #include "gimple-pretty-print.h" #include "diagnostic-core.h" #include "hash-map.h" -#include "ipa-ref.h" #include "lto-streamer.h" #include "cgraph.h" #include "real.h" +#include "gimple-iterator.h" +#include "bitmap.h" +#include "dumpfile.h" +#include "alloc-pool.h" +#include "tree-ssa-operands.h" +#include "gimple-ssa.h" +#include "tree-phinodes.h" +#include "tree-ssanames.h" +#include "rtl.h" +#include "expr.h" +#include "tree-dfa.h" +#include "ssa-iterators.h" +#include "ipa-ref.h" +#include "gimplify-me.h" +#include "print-tree.h" +#include "cfghooks.h" +#include "symbol-summary.h" #include "hsa.h" #define BRIG_ELF_SECTION_NAME ".brig" @@ -116,6 +131,9 @@ static bool brig_initialized = false; /* Mapping between emitted HSA functions and their offset in code segment. */ static hash_map<tree, BrigCodeOffset32_t> *function_offsets; +/* Set of emitted function declarations. */ +static hash_set <tree> *emitted_declarations; + struct function_linkage_pair { function_linkage_pair (tree decl, unsigned int off): @@ -128,6 +146,9 @@ struct function_linkage_pair unsigned int offset; }; +/* Vector of function calls where we need to resolve function offsets. */ +static auto_vec <function_linkage_pair> function_call_linkage; + /* Add a new chunk, allocate data for it and initialize it. */ void @@ -404,6 +425,21 @@ brig_init (void) char* extension = strchr (modname, '.'); if (extension) *extension = '\0'; + + /* As in LTO mode, we have to emit a different module names. */ + if (flag_ltrans) + { + part = strrchr (asm_file_name, '/'); + if (!part) + part = asm_file_name; + else + part++; + char *modname2; + asprintf (&modname2, "%s_%s", modname, part); + free (modname); + modname = modname2; + } + hsa_sanitize_name (modname); moddir.name = brig_emit_string (modname); free (modname); @@ -570,7 +606,7 @@ emit_directive_variable (struct hsa_symbol *symbol) definition F. */ static BrigDirectiveExecutable * -emit_function_directives (hsa_function_representation *f) +emit_function_directives (hsa_function_representation *f, bool is_declaration) { struct BrigDirectiveExecutable fndir; unsigned name_offset, inarg_off, scoped_off, next_toplev_off; @@ -621,7 +657,10 @@ emit_function_directives (hsa_function_representation *f) fndir.modifier.allBits |= BRIG_EXECUTABLE_DEFINITION; memset (&fndir.reserved, 0, sizeof (fndir.reserved)); - function_offsets->put (f->decl, brig_code.total_size); + /* Once we put a definition of function_offsets, we should not overwrite + it with a declaration of the function. */ + if (!function_offsets->get (f->decl) || !is_declaration) + function_offsets->put (f->decl, brig_code.total_size); brig_code.add (&fndir, sizeof (fndir)); /* XXX terrible hack: we need to set instCount after we emit all @@ -1048,7 +1087,7 @@ emit_function_declaration (tree decl) { hsa_function_representation *f = hsa_generate_function_declaration (decl); - emit_function_directives (f); + emit_function_directives (f, true); emit_queued_operands (); delete f; @@ -1423,11 +1462,9 @@ emit_call_insn (hsa_insn_basic *insn) operand_offsets[0] = htole32 (enqueue_op (call->result_code_list)); /* Operand 1: func */ - BrigCodeOffset32_t *func_offset = function_offsets->get - (call->called_function); - gcc_assert (func_offset != NULL); - call->func.directive_offset = *func_offset; unsigned int offset = enqueue_op (&call->func); + function_call_linkage.safe_push + (function_linkage_pair (call->called_function, offset)); operand_offsets[1] = htole32 (offset); /* Operand 2: in-args. */ @@ -1746,18 +1783,22 @@ hsa_brig_emit_function (void) if (!function_offsets) function_offsets = new hash_map<tree, BrigCodeOffset32_t> (); + if (!emitted_declarations) + emitted_declarations = new hash_set<tree> (); + for (unsigned i = 0; i < hsa_cfun->called_functions.length (); i++) { tree called = hsa_cfun->called_functions[i]; - if (function_offsets->get (called) == NULL) + /* If the function has no definition, emit a declaration. */ + if (!emitted_declarations->contains (called)) { emit_function_declaration (called); - gcc_assert (function_offsets->get (called) != NULL); + emitted_declarations->add (called); } } - ptr_to_fndir = emit_function_directives (hsa_cfun); + ptr_to_fndir = emit_function_directives (hsa_cfun, false); for (insn = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun))->first_insn; insn; insn = insn->next) @@ -1790,10 +1831,6 @@ hsa_output_kernel_mapping (tree brig_decl) { unsigned map_count = hsa_get_number_decl_kernel_mappings (); - /* If the current TU does not contain a kernel, no mapping is produced. */ - if (map_count == 0) - return; - tree int_num_of_kernels; int_num_of_kernels = build_int_cst (uint32_type_node, map_count); tree kernel_num_index_type = build_index_type (int_num_of_kernels); @@ -1804,8 +1841,9 @@ hsa_output_kernel_mapping (tree brig_decl) for (unsigned i = 0; i < map_count; ++i) { tree decl = hsa_get_decl_kernel_mapping_decl (i); - CONSTRUCTOR_APPEND_ELT (host_functions_vec, NULL_TREE, - build_fold_addr_expr (decl)); + CONSTRUCTOR_APPEND_ELT + (host_functions_vec, NULL_TREE, + build_fold_addr_expr (hsa_get_host_function (decl))); } tree host_functions_ctor = build_constructor (host_functions_array_type, host_functions_vec); @@ -2106,6 +2144,18 @@ hsa_output_brig (void) if (!brig_initialized) return; + for (unsigned i = 0; i < function_call_linkage.length (); i++) + { + function_linkage_pair p = function_call_linkage[i]; + + BrigCodeOffset32_t *func_offset = function_offsets->get (p.function_decl); + gcc_assert (*func_offset); + BrigOperandCodeRef *code_ref = (BrigOperandCodeRef *) + (brig_operand.get_ptr_by_offset (p.offset)); + gcc_assert (code_ref->base.kind == BRIG_KIND_OPERAND_CODE_REF); + code_ref->ref = htole32 (*func_offset); + } + saved_section = in_section; switch_to_section (get_section (BRIG_ELF_SECTION_NAME, SECTION_NOTYPE, NULL)); @@ -2178,4 +2228,7 @@ hsa_output_brig (void) hsa_free_decl_kernel_mapping (); brig_release_data (); hsa_deinit_compilation_unit_data (); + + delete emitted_declarations; + delete function_offsets; } diff --git a/gcc/hsa-dump.c b/gcc/hsa-dump.c index 4d78519..134005b 100644 --- a/gcc/hsa-dump.c +++ b/gcc/hsa-dump.c @@ -22,27 +22,55 @@ along with GCC; see the file COPYING3. If not see #include "coretypes.h" #include "tm.h" #include "is-a.h" -#include "vec.h" -#include "hash-set.h" #include "defaults.h" #include "hard-reg-set.h" -#include "dominance.h" -#include "cfg.h" -#include "input.h" -#include "function.h" +#include "hash-set.h" +#include "vec.h" #include "symtab.h" +#include "vec.h" +#include "input.h" #include "alias.h" #include "double-int.h" #include "inchash.h" #include "tree.h" +#include "tree-pass.h" #include "tree-ssa-alias.h" #include "internal-fn.h" #include "gimple-expr.h" +#include "dominance.h" +#include "cfg.h" +#include "cfganal.h" +#include "function.h" #include "predict.h" #include "basic-block.h" #include "fold-const.h" #include "gimple.h" +#include "gimple-iterator.h" +#include "machmode.h" +#include "output.h" +#include "function.h" +#include "bitmap.h" +#include "dumpfile.h" #include "gimple-pretty-print.h" +#include "diagnostic-core.h" +#include "alloc-pool.h" +#include "tree-ssa-operands.h" +#include "gimple-ssa.h" +#include "tree-phinodes.h" +#include "stringpool.h" +#include "tree-ssanames.h" +#include "rtl.h" +#include "expr.h" +#include "tree-dfa.h" +#include "ssa-iterators.h" +#include "ipa-ref.h" +#include "lto-streamer.h" +#include "cgraph.h" +#include "stor-layout.h" +#include "gimplify-me.h" +#include "print-tree.h" +#include "cfghooks.h" +#include "symbol-summary.h" #include "hsa.h" /* Return textual name of TYPE. */ diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c index 5c53876..5065394 100644 --- a/gcc/hsa-gen.c +++ b/gcc/hsa-gen.c @@ -70,7 +70,7 @@ along with GCC; see the file COPYING3. If not see #include "stor-layout.h" #include "gimplify-me.h" #include "print-tree.h" -#include "cfghooks.h" +#include "symbol-summary.h" #include "hsa.h" #include "cfghooks.h" @@ -662,6 +662,32 @@ get_symbol_for_decl (tree decl) return sym; } +/* For a given function declaration, return a GPU function + of the function. */ + +static tree +hsa_get_gpu_function (tree decl) +{ + hsa_function_summary *s = hsa_summaries->get (cgraph_node::get_create (decl)); + gcc_assert (s->kind != HSA_NONE); + gcc_assert (!s->gpu_implementation_p); + + return s->binded_function->decl; +} + +/* For a given HSA function declaration, return a host + function declaration. */ + +tree +hsa_get_host_function (tree decl) +{ + hsa_function_summary *s = hsa_summaries->get (cgraph_node::get_create (decl)); + gcc_assert (s->kind != HSA_NONE); + gcc_assert (s->gpu_implementation_p); + + return s->binded_function->decl; +} + /* Create a spill symbol of type TYPE. */ hsa_symbol * @@ -2664,7 +2690,8 @@ static void gen_hsa_insns_for_direct_call (gimple stmt, hsa_bb *hbb, vec <hsa_op_reg_p> *ssa_map) { - hsa_insn_call *call_insn = new hsa_insn_call (gimple_call_fndecl (stmt)); + tree decl = gimple_call_fndecl (stmt); + hsa_insn_call *call_insn = new hsa_insn_call (decl); hsa_cfun->called_functions.safe_push (call_insn->called_function); /* Argument block start. */ @@ -2702,7 +2729,7 @@ gen_hsa_insns_for_direct_call (gimple stmt, hsa_bb *hbb, call_insn->args_code_list = new hsa_op_code_list (args); hbb->append_insn (call_insn); - tree result_type = TREE_TYPE (TREE_TYPE (gimple_call_fndecl (stmt))); + tree result_type = TREE_TYPE (TREE_TYPE (decl)); tree result = gimple_call_lhs (stmt); hsa_insn_mem *result_insn = NULL; @@ -2796,8 +2823,7 @@ static bool gen_hsa_insns_for_known_library_call (gimple stmt, hsa_bb *hbb, vec <hsa_op_reg_p> *ssa_map) { - tree decl = gimple_call_fndecl (stmt); - const char *name = hsa_get_declaration_name (decl); + const char *name = hsa_get_declaration_name (gimple_call_fndecl (stmt)); if (strcmp (name, "omp_is_initial_device") == 0) { @@ -3474,7 +3500,8 @@ specialop: called = TREE_OPERAND (called, 0); gcc_checking_assert (TREE_CODE (called) == FUNCTION_DECL); - const char *name = hsa_get_declaration_name (called); + const char *name = hsa_get_declaration_name + (hsa_get_gpu_function (called)); hsa_add_kernel_dependency (hsa_cfun->decl, hsa_brig_function_name (name)); gen_hsa_insns_for_kernel_call (hbb, as_a <gcall *> (stmt)); @@ -3833,6 +3860,7 @@ hsa_generate_function_declaration (tree decl) fun->declaration_p = true; fun->decl = decl; fun->name = xstrdup (hsa_get_declaration_name (decl)); + hsa_sanitize_name (fun->name); gen_function_decl_parameters (fun, decl); @@ -3844,19 +3872,19 @@ hsa_generate_function_declaration (tree decl) considered an HSA kernel callable from the host, otherwise it will be compiled as an HSA function callable from other HSA code. */ -static unsigned int +static void generate_hsa (bool kernel) { if (DECL_STATIC_CHAIN (cfun->decl)) { sorry ("HSA does not support nested functions"); - return 0; + return; } else if (!TYPE_ARG_TYPES (TREE_TYPE (cfun->decl))) { sorry ("HSA does not support functions with variadic arguments " "(or unknown return type)"); - return 0; + return; } vec <hsa_op_reg_p> ssa_map = vNULL; @@ -3879,13 +3907,7 @@ generate_hsa (bool kernel) if (hsa_cfun->kern_p) { - cgraph_node *node = cgraph_node::get_create (current_function_decl); - tree host_decl; - if (node->hsa_imp_of) - host_decl = node->hsa_imp_of->decl; - else - host_decl = current_function_decl; - hsa_add_kern_decl_mapping (host_decl, hsa_cfun->name, + hsa_add_kern_decl_mapping (current_function_decl, hsa_cfun->name, hsa_cfun->maximum_omp_data_size); } @@ -3903,197 +3925,6 @@ generate_hsa (bool kernel) fail: hsa_deinit_data_for_cfun (); - return 0; -} - -static GTY(()) tree hsa_launch_fn; -static GTY(()) tree hsa_dim_array_type; -static GTY(()) tree hsa_lattrs_dimnum_decl; -static GTY(()) tree hsa_lattrs_grid_decl; -static GTY(()) tree hsa_lattrs_group_decl; -static GTY(()) tree hsa_lattrs_nargs_decl; -static GTY(()) tree hsa_launch_attributes_type; - -static void -init_hsa_functions (void) -{ - if (hsa_launch_fn) - return; - - tree dim_arr_index_type; - dim_arr_index_type = build_index_type (build_int_cst (integer_type_node, 2)); - hsa_dim_array_type = build_array_type (uint32_type_node, dim_arr_index_type); - - hsa_launch_attributes_type = make_node (RECORD_TYPE); - hsa_lattrs_dimnum_decl = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("ndim"), - uint32_type_node); - DECL_CHAIN (hsa_lattrs_dimnum_decl) = NULL_TREE; - - hsa_lattrs_grid_decl = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("global_size"), - hsa_dim_array_type); - DECL_CHAIN (hsa_lattrs_grid_decl) = hsa_lattrs_dimnum_decl; - hsa_lattrs_group_decl = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("group_size"), - hsa_dim_array_type); - DECL_CHAIN (hsa_lattrs_group_decl) = hsa_lattrs_grid_decl; - hsa_lattrs_nargs_decl = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("nargs"), - uint32_type_node); - DECL_CHAIN (hsa_lattrs_nargs_decl) = hsa_lattrs_group_decl; - finish_builtin_struct (hsa_launch_attributes_type, "__hsa_launch_attributes", - hsa_lattrs_nargs_decl, NULL_TREE); - tree launch_fn_type; - launch_fn_type - = build_function_type_list (void_type_node, ptr_type_node, - build_pointer_type (hsa_launch_attributes_type), - build_pointer_type (uint64_type_node), - NULL_TREE); - - hsa_launch_fn = build_fn_decl ("__hsa_launch_kernel", launch_fn_type); -} - -/* Insert before the current statement in GSI a store of VALUE to INDEX of - array (of type hsa_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be of - type uint32_type_node. */ - -static void -insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var, - tree fld_decl, int index, tree value) -{ - tree ref = build4 (ARRAY_REF, uint32_type_node, - build3 (COMPONENT_REF, hsa_dim_array_type, - range_var, fld_decl, NULL_TREE), - build_int_cst (integer_type_node, index), - NULL_TREE, NULL_TREE); - gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT); -} - -/* Generate call to invoke kernel implementing function FNDECL. */ - -static void -wrap_hsa_kernel_call (gimple_stmt_iterator *gsi, tree fndecl) -{ - init_hsa_functions (); - - bool real_kern_p = lookup_attribute ("hsakernel", DECL_ATTRIBUTES (fndecl)); - tree grid_size_1, group_size_1; - tree u32_one = build_int_cst (uint32_type_node, 1); - gimple call_stmt = gsi_stmt (*gsi); - unsigned discard_arguents, num_args = gimple_call_num_args (call_stmt); - if (real_kern_p) - { - discard_arguents = 2; - if (num_args < 2) - { - error ("Calls to functions with hsakernel attribute must " - "have at least two arguments."); - grid_size_1 = group_size_1 = u32_one; - } - else - { - grid_size_1 = fold_convert (uint32_type_node, - gimple_call_arg (call_stmt, num_args - 2)); - grid_size_1 = force_gimple_operand_gsi (gsi, grid_size_1, true, - NULL_TREE, true, - GSI_SAME_STMT); - group_size_1 = fold_convert (uint32_type_node, - gimple_call_arg (call_stmt, - num_args - 1)); - group_size_1 = force_gimple_operand_gsi (gsi, group_size_1, true, - NULL_TREE, true, - GSI_SAME_STMT); - } - } - else - { - discard_arguents = 0; - grid_size_1 = build_int_cst (uint32_type_node, 64); - group_size_1 = build_int_cst (uint32_type_node, 64); - } - - tree lattrs = create_tmp_var (hsa_launch_attributes_type, - "__hsa_launch_attrs"); - tree dimref = build3 (COMPONENT_REF, uint32_type_node, - lattrs, hsa_lattrs_dimnum_decl, NULL_TREE); - gsi_insert_before (gsi, gimple_build_assign (dimref, u32_one), GSI_SAME_STMT); - insert_store_range_dim (gsi, lattrs, hsa_lattrs_grid_decl, 0, - grid_size_1); - insert_store_range_dim (gsi, lattrs, hsa_lattrs_grid_decl, 1, - u32_one); - insert_store_range_dim (gsi, lattrs, hsa_lattrs_grid_decl, 2, - u32_one); - insert_store_range_dim (gsi, lattrs, hsa_lattrs_group_decl, 0, - group_size_1); - insert_store_range_dim (gsi, lattrs, hsa_lattrs_group_decl, 1, - u32_one); - insert_store_range_dim (gsi, lattrs, hsa_lattrs_group_decl, 2, - u32_one); - tree nargsref = build3 (COMPONENT_REF, uint32_type_node, - lattrs, hsa_lattrs_nargs_decl, NULL_TREE); - tree nargsval = build_int_cst (uint32_type_node, num_args - discard_arguents); - gsi_insert_before (gsi, gimple_build_assign (nargsref, nargsval), - GSI_SAME_STMT); - lattrs = build_fold_addr_expr (lattrs); - - tree args; - args = create_tmp_var (build_array_type_nelts (uint64_type_node, - num_args - discard_arguents), - NULL); - - gcc_assert (num_args >= discard_arguents); - for (unsigned i = 0; i < (num_args - discard_arguents); i++) - { - tree arg = gimple_call_arg (call_stmt, i); - gimple g; - - tree r = build4 (ARRAY_REF, uint64_type_node, args, - size_int (i), NULL_TREE, NULL_TREE); - - arg = force_gimple_operand_gsi (gsi, fold_convert (uint64_type_node, arg), - true, NULL_TREE, true, GSI_SAME_STMT); - g = gimple_build_assign (r, arg); - gsi_insert_before (gsi, g, GSI_SAME_STMT); - } - - args = build_fold_addr_expr (args); - - /* XXX doesn't handle calls with lhs, doesn't remove EH - edges. */ - gimple launch = gimple_build_call (hsa_launch_fn, 3, - build_fold_addr_expr (fndecl), - lattrs, args); - gsi_insert_before (gsi, launch, GSI_SAME_STMT); - unlink_stmt_vdef (call_stmt); - gsi_remove (gsi, true); -} - -/* Replace calls of functions which have been turned into HSA kernels into - their invocation via HSA run-time. */ - -static unsigned int -wrap_all_hsa_calls (void) -{ - bool changed = false; - basic_block bb; - FOR_ALL_BB_FN (bb, cfun) - { - gimple_stmt_iterator gsi; - tree fndecl; - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);) - if (is_gimple_call (gsi_stmt (gsi)) - && (fndecl = gimple_call_fndecl (gsi_stmt (gsi))) - && (lookup_attribute ("hsa", DECL_ATTRIBUTES (fndecl)) - || lookup_attribute ("hsakernel", DECL_ATTRIBUTES (fndecl)))) - { - wrap_hsa_kernel_call (&gsi, fndecl); - changed = true; - } - else - gsi_next (&gsi); - } - return changed ? TODO_cleanup_cfg | TODO_update_ssa : 0; } namespace { @@ -4135,15 +3966,17 @@ pass_gen_hsail::gate (function *) unsigned int pass_gen_hsail::execute (function *) { - if (cgraph_node::get_create (current_function_decl)->hsa_imp_of - || lookup_attribute ("hsa", DECL_ATTRIBUTES (current_function_decl)) - || lookup_attribute ("hsakernel", - DECL_ATTRIBUTES (current_function_decl))) - return generate_hsa (true); - else if (hsa_callable_function_p (current_function_decl)) - return generate_hsa (false); - else - return wrap_all_hsa_calls (); + hsa_function_summary *s = hsa_summaries->get + (cgraph_node::get_create (current_function_decl)); + + if (s->gpu_implementation_p) + { + generate_hsa (s->kind == HSA_KERNEL); + TREE_ASM_WRITTEN (current_function_decl) = 1; + return TODO_stop_pass_execution; + } + + return 0; } } // anon namespace @@ -4155,5 +3988,3 @@ make_pass_gen_hsail (gcc::context *ctxt) { return new pass_gen_hsail (ctxt); } - -#include "gt-hsa-gen.h" diff --git a/gcc/hsa-regalloc.c b/gcc/hsa-regalloc.c index 75bcc8a..bb93c35 100644 --- a/gcc/hsa-regalloc.c +++ b/gcc/hsa-regalloc.c @@ -27,27 +27,50 @@ along with GCC; see the file COPYING3. If not see #include "hash-set.h" #include "vec.h" #include "symtab.h" +#include "vec.h" #include "input.h" #include "alias.h" #include "double-int.h" #include "inchash.h" #include "tree.h" +#include "tree-pass.h" #include "tree-ssa-alias.h" #include "internal-fn.h" #include "gimple-expr.h" #include "dominance.h" #include "cfg.h" -#include "cfghooks.h" +#include "cfganal.h" #include "function.h" #include "predict.h" #include "basic-block.h" #include "fold-const.h" #include "gimple.h" +#include "gimple-iterator.h" +#include "machmode.h" +#include "output.h" +#include "function.h" #include "bitmap.h" #include "dumpfile.h" #include "gimple-pretty-print.h" #include "diagnostic-core.h" -#include "cfganal.h" +#include "alloc-pool.h" +#include "tree-ssa-operands.h" +#include "gimple-ssa.h" +#include "tree-phinodes.h" +#include "stringpool.h" +#include "tree-ssanames.h" +#include "rtl.h" +#include "expr.h" +#include "tree-dfa.h" +#include "ssa-iterators.h" +#include "ipa-ref.h" +#include "lto-streamer.h" +#include "cgraph.h" +#include "stor-layout.h" +#include "gimplify-me.h" +#include "print-tree.h" +#include "cfghooks.h" +#include "symbol-summary.h" #include "hsa.h" diff --git a/gcc/hsa.c b/gcc/hsa.c index 4ad44fe..017b4ca 100644 --- a/gcc/hsa.c +++ b/gcc/hsa.c @@ -70,6 +70,7 @@ along with GCC; see the file COPYING3. If not see #include "stor-layout.h" #include "gimplify-me.h" #include "print-tree.h" +#include "symbol-summary.h" #include "hsa.h" /* Structure containing intermediate HSA representation of the generated @@ -100,6 +101,9 @@ hash_map <tree, vec <char *> *> *hsa_decl_kernel_dependencies; /* Hash function to lookup a symbol for a decl. */ hash_table <hsa_free_symbol_hasher> *hsa_global_variable_symbols; +/* HSA summaries. */ +hsa_summary_t *hsa_summaries = NULL; + /* True if compilation unit-wide data are already allocated and initialized. */ static bool compilation_unit_data_initialized; @@ -464,10 +468,34 @@ hsa_get_declaration_name (tree decl) free (b); return ggc_str; } + else if (TREE_CODE (decl) == FUNCTION_DECL) + return cgraph_node::get_create (decl)->asm_name (); else return IDENTIFIER_POINTER (DECL_NAME (decl)); return NULL; } +/* Add a HOST function to HSA summaries. */ + +void +hsa_register_kernel (cgraph_node *host) +{ + if (hsa_summaries == NULL) + hsa_summaries = new hsa_summary_t (symtab); + hsa_function_summary *s = hsa_summaries->get (host); + s->kind = HSA_KERNEL; +} + +/* Add a pair of functions to HSA summaries. GPU is an HSA implementation of + a HOST function. */ + +void +hsa_register_kernel (cgraph_node *gpu, cgraph_node *host) +{ + if (hsa_summaries == NULL) + hsa_summaries = new hsa_summary_t (symtab); + hsa_summaries->link_functions (gpu, host, HSA_KERNEL); +} + #include "gt-hsa.h" diff --git a/gcc/hsa.h b/gcc/hsa.h index 8ebfcaa..c6cd124 100644 --- a/gcc/hsa.h +++ b/gcc/hsa.h @@ -889,10 +889,69 @@ public: unsigned maximum_omp_data_size; }; +enum hsa_function_kind +{ + HSA_NONE, + HSA_KERNEL, + HSA_FUNCTION +}; + +struct hsa_function_summary +{ + /* Default constructor. */ + hsa_function_summary (); + + /* Kind of GPU/hostfunction. */ + hsa_function_kind kind; + + /* Pointer to a cgraph node which is a HSA implementation of the function. + In case of the function is a HSA function, the binded function points + to the host function. */ + cgraph_node *binded_function; + + /* Identifies if the function is an HSA function or a host function. */ + bool gpu_implementation_p; +}; + +inline +hsa_function_summary::hsa_function_summary (): kind (HSA_NONE), + binded_function (NULL), gpu_implementation_p (false) +{ +} + +/* Function summary for HSA functions. */ +class hsa_summary_t: public function_summary <hsa_function_summary *> +{ +public: + hsa_summary_t (symbol_table *table): + function_summary<hsa_function_summary *> (table) { } + + void link_functions (cgraph_node *gpu, cgraph_node *host, + hsa_function_kind kind); +}; + +inline void +hsa_summary_t::link_functions (cgraph_node *gpu, cgraph_node *host, + hsa_function_kind kind) +{ + hsa_function_summary *gpu_summary = get (gpu); + hsa_function_summary *host_summary = get (host); + + gpu_summary->kind = kind; + host_summary->kind = kind; + + gpu_summary->gpu_implementation_p = true; + host_summary->gpu_implementation_p = false; + + gpu_summary->binded_function = host; + host_summary->binded_function = gpu; +} + /* in hsa.c */ extern struct hsa_function_representation *hsa_cfun; extern hash_table <hsa_free_symbol_hasher> *hsa_global_variable_symbols; extern hash_map <tree, vec <char *> *> *hsa_decl_kernel_dependencies; +extern hsa_summary_t *hsa_summaries; extern unsigned hsa_kernel_calls_counter; bool hsa_callable_function_p (tree fndecl); void hsa_init_compilation_unit_data (void); @@ -915,6 +974,8 @@ void hsa_add_kernel_dependency (tree caller, char *called_function); void hsa_sanitize_name (char *p); char *hsa_brig_function_name (const char *p); const char *hsa_get_declaration_name (tree decl); +void hsa_register_kernel (cgraph_node *host); +void hsa_register_kernel (cgraph_node *gpu, cgraph_node *host); /* In hsa-gen.c. */ void hsa_build_append_simple_mov (hsa_op_reg *, hsa_op_base *, hsa_bb *); @@ -924,6 +985,7 @@ hsa_op_reg *hsa_spill_in (hsa_insn_basic *, hsa_op_reg *, hsa_op_reg **); hsa_op_reg *hsa_spill_out (hsa_insn_basic *, hsa_op_reg *, hsa_op_reg **); hsa_bb *hsa_init_new_bb (basic_block); hsa_function_representation *hsa_generate_function_declaration (tree decl); +tree hsa_get_host_function (tree decl); /* In hsa-regalloc.c. */ void hsa_regalloc (void); diff --git a/gcc/ipa-hsa.c b/gcc/ipa-hsa.c new file mode 100644 index 0000000..24d3fe4 --- /dev/null +++ b/gcc/ipa-hsa.c @@ -0,0 +1,330 @@ +/* Callgraph based analysis of static variables. + Copyright (C) 2015 Free Software Foundation, Inc. + Contributed by Martin Liska <mli...@suse.cz> + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* Interprocedural HSA pass is responsible for creation of HSA clones. + For all these HSA clones, we emit HSAIL instructions and pass processing + is terminated. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "is-a.h" +#include "defaults.h" +#include "hard-reg-set.h" +#include "hash-set.h" +#include "vec.h" +#include "symtab.h" +#include "vec.h" +#include "input.h" +#include "alias.h" +#include "double-int.h" +#include "inchash.h" +#include "tree.h" +#include "tree-pass.h" +#include "tree-ssa-alias.h" +#include "internal-fn.h" +#include "gimple-expr.h" +#include "dominance.h" +#include "cfg.h" +#include "cfganal.h" +#include "function.h" +#include "predict.h" +#include "basic-block.h" +#include "fold-const.h" +#include "gimple.h" +#include "gimple-iterator.h" +#include "machmode.h" +#include "output.h" +#include "function.h" +#include "bitmap.h" +#include "dumpfile.h" +#include "gimple-pretty-print.h" +#include "tree-streamer.h" +#include "diagnostic-core.h" +#include "alloc-pool.h" +#include "tree-ssa-operands.h" +#include "gimple-ssa.h" +#include "tree-phinodes.h" +#include "stringpool.h" +#include "tree-ssanames.h" +#include "rtl.h" +#include "expr.h" +#include "tree-dfa.h" +#include "ssa-iterators.h" +#include "ipa-ref.h" +#include "lto-streamer.h" +#include "cgraph.h" +#include "stor-layout.h" +#include "gimplify-me.h" +#include "print-tree.h" +#include "cfghooks.h" +#include "symbol-summary.h" +#include "hsa.h" + +namespace { + +static unsigned int +process_hsa_functions (void) +{ + struct cgraph_node *node; + + if (hsa_summaries == NULL) + hsa_summaries = new hsa_summary_t (symtab); + + FOR_EACH_DEFINED_FUNCTION (node) + { + hsa_function_summary *s = hsa_summaries->get (node); + + /* A linked function is skipped. */ + if (s->binded_function != NULL) + continue; + + if (s->kind != HSA_NONE) + { + cgraph_node *clone = node->create_virtual_clone + (vec <cgraph_edge *> (), NULL, NULL, "hsa"); + + clone->force_output = true; + hsa_summaries->link_functions (clone, node, s->kind); + + if (dump_file) + fprintf (dump_file, "HSA creates a new clone: %s, type: %s\n", + clone->name (), + s->kind == HSA_KERNEL ? "kernel" : "function"); + } + else if (hsa_callable_function_p (node->decl)) + { + cgraph_node *clone = node->create_virtual_clone + (vec <cgraph_edge *> (), NULL, NULL, "hsa"); + + hsa_summaries->link_functions (clone, node, HSA_FUNCTION); + + if (dump_file) + fprintf (dump_file, "HSA creates a new function clone: %s\n", + clone->name ()); + } + } + + /* Redirect all edges that are between HSA clones. */ + FOR_EACH_DEFINED_FUNCTION (node) + { + cgraph_edge *e = node->callees; + + while (e) + { + hsa_function_summary *src = hsa_summaries->get (node); + if (src->kind != HSA_NONE && src->gpu_implementation_p) + { + hsa_function_summary *dst = hsa_summaries->get (e->callee); + if (dst->kind != HSA_NONE && !dst->gpu_implementation_p) + { + e->redirect_callee (dst->binded_function); + if (dump_file) + fprintf (dump_file, + "Redirecting edge to HSA function: %s->%s\n", + xstrdup_for_dump (e->caller->name ()), + xstrdup_for_dump (e->callee->name ())); + } + } + + e = e->next_callee; + } + } + + return 0; +} + +static void +ipa_hsa_write_summary (void) +{ + struct bitpack_d bp; + struct cgraph_node *node; + struct output_block *ob; + unsigned int count = 0; + lto_symtab_encoder_iterator lsei; + lto_symtab_encoder_t encoder; + + if (!hsa_summaries) + return; + + ob = create_output_block (LTO_section_ipa_hsa); + encoder = ob->decl_state->symtab_node_encoder; + ob->symbol = NULL; + for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei); + lsei_next_function_in_partition (&lsei)) + { + node = lsei_cgraph_node (lsei); + hsa_function_summary *s = hsa_summaries->get (node); + + if (s->kind != HSA_NONE) + count++; + } + + streamer_write_uhwi (ob, count); + + /* Process all of the functions. */ + for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei); + lsei_next_function_in_partition (&lsei)) + { + node = lsei_cgraph_node (lsei); + hsa_function_summary *s = hsa_summaries->get (node); + + if (s->kind != HSA_NONE) + { + encoder = ob->decl_state->symtab_node_encoder; + int node_ref = lto_symtab_encoder_encode (encoder, node); + streamer_write_uhwi (ob, node_ref); + + bp = bitpack_create (ob->main_stream); + bp_pack_value (&bp, s->kind, 2); + bp_pack_value (&bp, s->gpu_implementation_p, 1); + bp_pack_value (&bp, s->binded_function != NULL, 1); + streamer_write_bitpack (&bp); + if (s->binded_function) + stream_write_tree (ob, s->binded_function->decl, true); + } + } + + streamer_write_char_stream (ob->main_stream, 0); + produce_asm (ob, NULL); + destroy_output_block (ob); +} + +static void +ipa_hsa_read_section (struct lto_file_decl_data *file_data, const char *data, + size_t len) +{ + const struct lto_function_header *header = + (const struct lto_function_header *) data; + const int cfg_offset = sizeof (struct lto_function_header); + const int main_offset = cfg_offset + header->cfg_size; + const int string_offset = main_offset + header->main_size; + struct data_in *data_in; + unsigned int i; + unsigned int count; + + lto_input_block ib_main ((const char *) data + main_offset, + header->main_size, file_data->mode_table); + + data_in = + lto_data_in_create (file_data, (const char *) data + string_offset, + header->string_size, vNULL); + count = streamer_read_uhwi (&ib_main); + + for (i = 0; i < count; i++) + { + unsigned int index; + struct cgraph_node *node; + lto_symtab_encoder_t encoder; + + index = streamer_read_uhwi (&ib_main); + encoder = file_data->symtab_node_encoder; + node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder, + index)); + gcc_assert (node->definition); + hsa_function_summary *s = hsa_summaries->get (node); + + struct bitpack_d bp = streamer_read_bitpack (&ib_main); + s->kind = (hsa_function_kind) bp_unpack_value (&bp, 2); + s->gpu_implementation_p = bp_unpack_value (&bp, 1); + bool has_tree = bp_unpack_value (&bp, 1); + + if (has_tree) + { + tree decl = stream_read_tree (&ib_main, data_in); + s->binded_function = cgraph_node::get_create (decl); + } + } + lto_free_section_data (file_data, LTO_section_ipa_hsa, NULL, data, + len); + lto_data_in_delete (data_in); +} + +static void +ipa_hsa_read_summary (void) +{ + struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data (); + struct lto_file_decl_data *file_data; + unsigned int j = 0; + + if (hsa_summaries == NULL) + hsa_summaries = new hsa_summary_t (symtab); + + while ((file_data = file_data_vec[j++])) + { + size_t len; + const char *data = lto_get_section_data (file_data, LTO_section_ipa_hsa, + NULL, &len); + + if (data) + ipa_hsa_read_section (file_data, data, len); + } +} + +const pass_data pass_data_ipa_hsa = +{ + IPA_PASS, /* type */ + "hsa", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_IPA_HSA, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_symtab, /* todo_flags_finish */ +}; + +class pass_ipa_hsa : public ipa_opt_pass_d +{ +public: + pass_ipa_hsa (gcc::context *ctxt) + : ipa_opt_pass_d (pass_data_ipa_hsa, ctxt, + NULL, /* generate_summary */ + ipa_hsa_write_summary, /* write_summary */ + ipa_hsa_read_summary, /* read_summary */ + ipa_hsa_write_summary, /* write_optimization_summary */ + ipa_hsa_read_summary, /* read_optimization_summary */ + NULL, /* stmt_fixup */ + 0, /* function_transform_todo_flags_start */ + NULL, /* function_transform */ + NULL) /* variable_transform */ + {} + + /* opt_pass methods: */ + virtual bool gate (function *); + + virtual unsigned int execute (function *) { return process_hsa_functions (); } + +}; // class pass_ipa_reference + +bool +pass_ipa_hsa::gate (function *) +{ + return hsa_gen_requested_p () || in_lto_p; +} + +} // anon namespace + +ipa_opt_pass_d * +make_pass_ipa_hsa (gcc::context *ctxt) +{ + return new pass_ipa_hsa (ctxt); +} diff --git a/gcc/lto-section-in.c b/gcc/lto-section-in.c index 58560a8..468777a 100644 --- a/gcc/lto-section-in.c +++ b/gcc/lto-section-in.c @@ -68,7 +68,8 @@ const char *lto_section_name[LTO_N_SECTION_TYPES] = "ipcp_trans", "icf", "offload_table", - "mode_table" + "mode_table", + "hsa" }; diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h index 66a824e..b4455a1 100644 --- a/gcc/lto-streamer.h +++ b/gcc/lto-streamer.h @@ -244,6 +244,7 @@ enum lto_section_type LTO_section_ipa_icf, LTO_section_offload_table, LTO_section_mode_table, + LTO_section_ipa_hsa, LTO_N_SECTION_TYPES /* Must be last. */ }; diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c index 8e5b555..01a60b2 100644 --- a/gcc/lto/lto-partition.c +++ b/gcc/lto/lto-partition.c @@ -44,6 +44,7 @@ along with GCC; see the file COPYING3. If not see #include "ipa-utils.h" #include "lto-partition.h" #include "stringpool.h" +#include "hsa.h" vec<ltrans_partition> ltrans_partitions; @@ -180,6 +181,53 @@ add_symbol_to_partition_1 (ltrans_partition part, symtab_node *node) Therefore put it into the same partition. */ if (cnode->instrumented_version) add_symbol_to_partition_1 (part, cnode->instrumented_version); + + /* Add an HSA associated with the symbol. */ + if (hsa_summaries != NULL) + { + hsa_function_summary *s = hsa_summaries->get (cnode); + if (s->kind != HSA_NONE) + { + /* Add binded function. */ + bool added = add_symbol_to_partition_1 (part, s->binded_function); + gcc_assert (added); + if (symtab->dump_file) + fprintf (symtab->dump_file, + "adding an HSA function (host/gpu) to the " + "partition: %s\n", + s->binded_function->name ()); + + ipa_ref *ref; + + /* Add all parents nodes that have HSA type. */ + for (unsigned i = 0; node->iterate_referring (i, ref); i++) + { + cgraph_node *r = dyn_cast <cgraph_node *> (ref->referring); + if (r && hsa_summaries->get (r)->kind != HSA_NONE) + { + add_symbol_to_partition_1 (part, r); + if (symtab->dump_file) + fprintf (symtab->dump_file, + "adding an HSA referring node: %s\n", + r->name ()); + } + } + + /* Add all children nodes that have HSA type. */ + for (unsigned i = 0; node->iterate_reference (i, ref); i++) + { + cgraph_node *r = dyn_cast <cgraph_node *> (ref->referred); + if (r && hsa_summaries->get (r)->kind != HSA_NONE) + { + add_symbol_to_partition_1 (part, r); + if (symtab->dump_file) + fprintf (symtab->dump_file, + "adding an HSA referred symbol: %s\n", + r->name ()); + } + } + } + } } add_references_to_partition (part, node); diff --git a/gcc/omp-low.c b/gcc/omp-low.c index d6c521f..2cbd4e8 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -81,6 +81,7 @@ along with GCC; see the file COPYING3. If not see #include "context.h" #include "lto-section-names.h" #include "gomp-constants.h" +#include "symbol-summary.h" #include "hsa.h" @@ -5236,7 +5237,7 @@ gimple_build_cond_empty (tree cond) target region that has not been turned into a simple GPGPU kernel. */ static bool -region_part_of_unkernelized_tartget_p (struct omp_region *region) +region_part_of_unkernelized_target_p (struct omp_region *region) { if (lookup_attribute ("omp declare target", DECL_ATTRIBUTES (current_function_decl))) @@ -5429,10 +5430,11 @@ expand_parallel_call (struct omp_region *region, basic_block bb, false, GSI_CONTINUE_LINKING); if (hsa_gen_requested_p () - && region_part_of_unkernelized_tartget_p (region)) + && region_part_of_unkernelized_target_p (region)) { cgraph_node *child_cnode = cgraph_node::get (child_fndecl); - child_cnode->hsa_imp_of = child_cnode; + hsa_register_kernel (child_cnode); + /* FIXME: Flatten should be set on HSA-only clones created by an IPA pass. */ DECL_ATTRIBUTES (child_fndecl) @@ -10010,7 +10012,8 @@ expand_target_kernel_body (struct omp_region *target) { gcc_assert (!tgt_stmt->kernel_iter); cgraph_node *n = cgraph_node::get (orig_child_fndecl); - n->hsa_imp_of = n; + + hsa_register_kernel (n); /* FIXME: Flatten should be set on HSA-only clones created by an IPA pass. */ DECL_ATTRIBUTES (orig_child_fndecl) @@ -10075,7 +10078,10 @@ expand_target_kernel_body (struct omp_region *target) cgraph_node *kcn = cgraph_node::get_create (kern_fndecl); kcn->mark_force_output (); - kcn->hsa_imp_of = cgraph_node::get (orig_child_fndecl); + cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl); + + hsa_register_kernel (kcn, orig_child); + /* FIXME: Flatten should be set on HSA-only clones created by an IPA pass. */ DECL_ATTRIBUTES (kern_fndecl) diff --git a/gcc/passes.c b/gcc/passes.c index 1b677ac..86768e0 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -2257,7 +2257,7 @@ override_gate_status (opt_pass *pass, tree func, bool gate_status) /* Execute PASS. */ bool -execute_one_pass (opt_pass *pass) +execute_one_pass (opt_pass *pass, bool *exit) { unsigned int todo_after = 0; @@ -2362,18 +2362,28 @@ execute_one_pass (opt_pass *pass) if (!((todo_after | pass->todo_flags_finish) & TODO_do_not_ggc_collect)) ggc_collect (); + /* If finish TODO flags contain TODO_stop_pass_execution, set exit = true. */ + if (todo_after & TODO_stop_pass_execution) + *exit = true; + return true; } static void execute_pass_list_1 (opt_pass *pass) { + bool stop_pass_execution = false; + do { gcc_assert (pass->type == GIMPLE_PASS || pass->type == RTL_PASS); - if (execute_one_pass (pass) && pass->sub) + if (execute_one_pass (pass, &stop_pass_execution) && pass->sub) execute_pass_list_1 (pass->sub); + + if (stop_pass_execution) + return; + pass = pass->next; } while (pass); @@ -2714,12 +2724,14 @@ ipa_read_optimization_summaries (void) void execute_ipa_pass_list (opt_pass *pass) { + bool stop_pass_execution; + do { gcc_assert (!current_function_decl); gcc_assert (!cfun); gcc_assert (pass->type == SIMPLE_IPA_PASS || pass->type == IPA_PASS); - if (execute_one_pass (pass) && pass->sub) + if (execute_one_pass (pass, &stop_pass_execution) && pass->sub) { if (pass->sub->type == GIMPLE_PASS) { diff --git a/gcc/passes.def b/gcc/passes.def index 60bb6eb..3999fbb 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -127,6 +127,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_ipa_inline); NEXT_PASS (pass_ipa_pure_const); NEXT_PASS (pass_ipa_reference); + NEXT_PASS (pass_ipa_hsa); /* This pass needs to be scheduled after any IP code duplication. */ NEXT_PASS (pass_ipa_single_use); /* Comdat privatization come last, as direct references to comdat local diff --git a/gcc/timevar.def b/gcc/timevar.def index ac41075..705f6a8 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -94,6 +94,7 @@ DEFTIMEVAR (TV_WHOPR_WPA_IO , "whopr wpa I/O") DEFTIMEVAR (TV_WHOPR_PARTITIONING , "whopr partitioning") DEFTIMEVAR (TV_WHOPR_LTRANS , "whopr ltrans") DEFTIMEVAR (TV_IPA_REFERENCE , "ipa reference") +DEFTIMEVAR (TV_IPA_HSA , "ipa HSA") DEFTIMEVAR (TV_IPA_PROFILE , "ipa profile") DEFTIMEVAR (TV_IPA_AUTOFDO , "auto profile") DEFTIMEVAR (TV_IPA_PURE_CONST , "ipa pure const") diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 91c44a7..0f084f7 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -295,6 +295,7 @@ protected: /* Rebuild the callgraph edges. */ #define TODO_rebuild_cgraph_edges (1 << 22) +#define TODO_stop_pass_execution (1 << 23) /* Internally used in execute_function_todo(). */ #define TODO_update_ssa_any \ @@ -480,6 +481,7 @@ extern ipa_opt_pass_d *make_pass_ipa_cp (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_icf (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_devirt (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_reference (gcc::context *ctxt); +extern ipa_opt_pass_d *make_pass_ipa_hsa (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_pta (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_tm (gcc::context *ctxt); diff --git a/libgomp/plugin/plugin-hsa.c b/libgomp/plugin/plugin-hsa.c index d318d52..f9be015 100644 --- a/libgomp/plugin/plugin-hsa.c +++ b/libgomp/plugin/plugin-hsa.c @@ -473,8 +473,6 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version __attribute__ ((unused)), if (agent->prog_finalized) destroy_hsa_program (agent); - if (kernel_count == 0) - GOMP_PLUGIN_fatal ("No kernels encountered in a brig module description"); if (debug) fprintf (stderr, "Encountered %d kernels in an image\n", kernel_count); pair = GOMP_PLUGIN_malloc (kernel_count * sizeof (struct addr_pair)); -- 2.4.6