On Tue, 10 Nov 2015, Martin Jambor wrote: > On Fri, Nov 06, 2015 at 09:38:21AM +0100, Richard Biener wrote: > > On Thu, 5 Nov 2015, Martin Jambor wrote: > > > > > Hi, > > > > > > in the previous email I wrote we need to "change behavior" of a few > > > optimization passes. One was the flattening of GPU functions and the > > > other two are in the patch below. It all comes to that, at the > > > moment, we need to switch off the vectorizer (only for the GPU > > > functions, of course). > > > > > > We are actually quite close to being able to handle gimple vector > > > input in HSA back-end but not all the way yet, and before allowing the > > > vectorizer again, we will have to make sure it never produces vectors > > > bigger than 128bits (in GPU functions). > > > > Hmm. I'd rather have this modify > > DECL_FUNCTION_SPECIFIC_OPTIMIZATION of the hsa function to get this > > effect. I think I mentioned this to the OACC guys as well for a > > similar needs of them. > > I see, that is a good idea. I have reverted changes to > tree-ssa-loop.c and tree-vectorizer.c and on top of that committed the > following patch to the branch which makes modifications to HSA fndecls > at a more convenient spot and disables vectorization in the following > way: > > tree gdecl = gpu->decl; > tree fn_opts = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl); > if (fn_opts == NULL_TREE) > fn_opts = optimization_default_node; > fn_opts = copy_node (fn_opts); > TREE_OPTIMIZATION (fn_opts)->x_flag_tree_loop_vectorize = false; > TREE_OPTIMIZATION (fn_opts)->x_flag_tree_slp_vectorize = false; > DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl) = fn_opts; > > I hope that is what you meant. I have also verified that it works.
Yes, that's what I meant. Thanks, Richard. > Thanks, > > Martin > > > 2015-11-10 Martin Jambor <mjam...@suse.cz> > > * hsa.h (hsa_summary_t): Add a comment to method link_functions. > (hsa_summary_t::link_functions): Moved... > * hsa.c (hsa_summary_t::link_functions): ...here. Added common fndecl > modifications. > Include stringpool.h. > * ipa-hsa.c (process_hsa_functions): Do not add flatten attribute > here. Fixed comments. > > diff --git a/gcc/hsa.c b/gcc/hsa.c > index ab05a1d..e63be95 100644 > --- a/gcc/hsa.c > +++ b/gcc/hsa.c > @@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. If not see > #include "alloc-pool.h" > #include "cgraph.h" > #include "print-tree.h" > +#include "stringpool.h" > #include "symbol-summary.h" > #include "hsa.h" > > @@ -693,6 +694,40 @@ hsa_get_declaration_name (tree decl) > return NULL; > } > > +/* Couple GPU and HOST as gpu-specific and host-specific implementation of > the > + same function. KIND determines whether GPU is a host-invokable kernel or > + gpu-callable function. */ > + > +inline void > +hsa_summary_t::link_functions (cgraph_node *gpu, cgraph_node *host, > + hsa_function_kind kind) > +{ > + hsa_function_summary *gpu_summary = get (gpu); > + hsa_function_summary *host_summary = get (host); > + > + gpu_summary->m_kind = kind; > + host_summary->m_kind = kind; > + > + gpu_summary->m_gpu_implementation_p = true; > + host_summary->m_gpu_implementation_p = false; > + > + gpu_summary->m_binded_function = host; > + host_summary->m_binded_function = gpu; > + > + tree gdecl = gpu->decl; > + DECL_ATTRIBUTES (gdecl) > + = tree_cons (get_identifier ("flatten"), NULL_TREE, > + DECL_ATTRIBUTES (gdecl)); > + > + tree fn_opts = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl); > + if (fn_opts == NULL_TREE) > + fn_opts = optimization_default_node; > + fn_opts = copy_node (fn_opts); > + TREE_OPTIMIZATION (fn_opts)->x_flag_tree_loop_vectorize = false; > + TREE_OPTIMIZATION (fn_opts)->x_flag_tree_slp_vectorize = false; > + DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl) = fn_opts; > +} > + > /* Add a HOST function to HSA summaries. */ > > void > diff --git a/gcc/hsa.h b/gcc/hsa.h > index 025de67..b6855ea 100644 > --- a/gcc/hsa.h > +++ b/gcc/hsa.h > @@ -1161,27 +1161,14 @@ public: > hsa_summary_t (symbol_table *table): > function_summary<hsa_function_summary *> (table) { } > > + /* Couple GPU and HOST as gpu-specific and host-specific implementation of > + the same function. KIND determines whether GPU is a host-invokable > kernel > + or gpu-callable function. */ > + > void link_functions (cgraph_node *gpu, cgraph_node *host, > hsa_function_kind kind); > }; > > -inline void > -hsa_summary_t::link_functions (cgraph_node *gpu, cgraph_node *host, > - hsa_function_kind kind) > -{ > - hsa_function_summary *gpu_summary = get (gpu); > - hsa_function_summary *host_summary = get (host); > - > - gpu_summary->m_kind = kind; > - host_summary->m_kind = kind; > - > - gpu_summary->m_gpu_implementation_p = true; > - host_summary->m_gpu_implementation_p = false; > - > - gpu_summary->m_binded_function = host; > - host_summary->m_binded_function = gpu; > -} > - > /* in hsa.c */ > extern struct hsa_function_representation *hsa_cfun; > extern hash_map <tree, vec <const char *> *> *hsa_decl_kernel_dependencies; > diff --git a/gcc/ipa-hsa.c b/gcc/ipa-hsa.c > index b4cb58e..d77fa6b 100644 > --- a/gcc/ipa-hsa.c > +++ b/gcc/ipa-hsa.c > @@ -90,16 +90,12 @@ process_hsa_functions (void) > cgraph_node *clone = node->create_virtual_clone > (vec <cgraph_edge *> (), NULL, NULL, "hsa"); > TREE_PUBLIC (clone->decl) = TREE_PUBLIC (node->decl); > - if (s->m_kind == HSA_KERNEL) > - DECL_ATTRIBUTES (clone->decl) > - = tree_cons (get_identifier ("flatten"), NULL_TREE, > - DECL_ATTRIBUTES (clone->decl)); > > clone->force_output = true; > hsa_summaries->link_functions (clone, node, s->m_kind); > > if (dump_file) > - fprintf (dump_file, "HSA creates a new clone: %s, type: %s\n", > + fprintf (dump_file, "Created a new HSA clone: %s, type: %s\n", > clone->name (), > s->m_kind == HSA_KERNEL ? "kernel" : "function"); > } > @@ -116,7 +112,7 @@ process_hsa_functions (void) > hsa_summaries->link_functions (clone, node, HSA_FUNCTION); > > if (dump_file) > - fprintf (dump_file, "HSA creates a new function clone: %s\n", > + fprintf (dump_file, "Created a new HSA function clone: %s\n", > clone->name ()); > } > } > > -- Richard Biener <rguent...@suse.de> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)