Hi! I've committed the patch, so that the rest can be handled incrementally.
On Wed, May 13, 2020 at 01:16:42PM +0200, Jakub Jelinek wrote: > Honza/Martin, are the cgraph related changes acceptable to you? > > For LTO, the patch only saves/restores the two cgraph_node bits added in the > patch, but doesn't yet stream out and back in the on the side info for the > declare_variant_alt. For the LTO partitioning, I believe those artificial > FUNCTION_DECLs with declare_variant_alt need to go into partition together > with anything that calls them (possibly duplicated), any way how to achieve > that? Say if declare variant artificial fn foobar is directly > called from all of foo, bar and baz and not from qux and we want 4 > partitions, one for each of foo, bar, baz, qux, then foobar is needed in the > first 3 partitions, and the IPA_REF_ADDRs recorded for foobar that right > after IPA the foobar call will be replaced with calls to foobar1, foobar2, > foobar3 or foobar (non-artificial) can of course stay in different > partitions if needed. I've tried to add the saving/restoring next to ipa refs saving/restoring, as the declare variant alt stuff is kind of extension of those, unfortunately following doesn't compile, because I need to also write or read a tree there (ctx is a portion of DECL_ATTRIBUTES of the base function), but the ipa refs write/read back functions don't have arguments that can be used for that. Any idea where to do it instead (for all cgraph_nodes with declare_variant_alt call the function to write it which needs to contain a few other cgraph_nodes (duplicated also in the ipa_refs), some widest_ints, one tree and some booleans)? Also, do I need to do anything special to avoid LTO merging those artificial decls? It is just fine if their ipa refs are merged, but the artificial vars would be fine only if they are the same (could use the other hash table for that). --- gcc/symtab.c.jj 2020-04-20 15:51:19.005560662 +0200 +++ gcc/symtab.c 2020-05-14 12:25:41.530745061 +0200 @@ -1984,7 +1984,7 @@ symtab_node::get_partitioning_class (voi if (DECL_ABSTRACT_P (decl)) return SYMBOL_EXTERNAL; - if (cnode && cnode->inlined_to) + if (cnode && (cnode->inlined_to || cnode->declare_variant_alt)) return SYMBOL_DUPLICATE; /* Transparent aliases are always duplicated. */ --- gcc/lto-cgraph.c.jj 2020-05-14 09:58:21.353412170 +0200 +++ gcc/lto-cgraph.c 2020-05-14 12:39:01.592642219 +0200 @@ -766,6 +766,9 @@ output_refs (lto_symtab_encoder_t encode for (int i = 0; node->iterate_reference (i, ref); i++) lto_output_ref (ob, ref, encoder); } + if (cgraph_node *cnode = dyn_cast <cgraph_node *> (node)) + if (cnode->declare_variant_alt) + omp_lto_output_declare_variant_alt (ob, cnode, encoder); } streamer_write_uhwi_stream (ob->main_stream, 0); @@ -1610,6 +1613,9 @@ input_refs (class lto_input_block *ib, input_ref (ib, node, nodes); count--; } + if (cgraph_node *cnode = dyn_cast <cgraph_node *> (node)) + if (cnode->declare_variant_alt) + omp_lto_input_declare_variant_alt (ib, cnode, nodes); } } --- gcc/omp-general.c.jj 2020-05-14 09:58:21.394411547 +0200 +++ gcc/omp-general.c 2020-05-14 13:14:09.338841298 +0200 @@ -42,6 +42,8 @@ along with GCC; see the file COPYING3. #include "hsa-common.h" #include "tree-pass.h" #include "omp-device-properties.h" +#include "data-streamer.h" +#include "streamer-hooks.h" enum omp_requires omp_requires_mask; @@ -1898,6 +1900,91 @@ omp_resolve_declare_variant (tree base) ? TREE_PURPOSE (TREE_VALUE (variant1)) : base); } +void +omp_lto_output_declare_variant_alt (lto_simple_output_block *ob, + cgraph_node *node, + lto_symtab_encoder_t encoder) +{ + gcc_assert (node->declare_variant_alt); + + omp_declare_variant_base_entry entry; + entry.base = NULL; + entry.node = node; + entry.variants = NULL; + omp_declare_variant_base_entry *entryp + = omp_declare_variant_alt->find_with_hash (&entry, DECL_UID (node->decl)); + gcc_assert (entryp); + + int nbase = lto_symtab_encoder_lookup (encoder, entryp->base); + gcc_assert (nbase != LCC_NOT_FOUND); + streamer_write_hwi_stream (ob->main_stream, nbase); + + streamer_write_hwi_stream (ob->main_stream, entryp->variants->length ()); + + unsigned int i; + omp_declare_variant_entry *varentry; + FOR_EACH_VEC_SAFE_ELT (entryp->variants, i, varentry) + { + int nvar = lto_symtab_encoder_lookup (encoder, varentry->variant); + gcc_assert (nvar != LCC_NOT_FOUND); + streamer_write_hwi_stream (ob->main_stream, nvar); + + for (widest_int *w = &varentry->score; ; + w = &varentry->score_in_declare_simd_clone) + { + unsigned len = w->get_len (); + streamer_write_hwi_stream (ob->main_stream, len); + const HOST_WIDE_INT *val = w->get_val (); + for (unsigned j = 0; j < len; j++) + streamer_write_hwi_stream (ob->main_stream, val[j]); + if (w == &varentry->score_in_declare_simd_clone) + break; + } + + stream_write_tree (ob, varentry->ctx, false); + streamer_write_hwi_stream (ob->main_stream, varentry->matches); + } +} + +void +omp_lto_input_declare_variant_alt (lto_input_block *ib, cgraph_node *node, + vec<symtab_node *> nodes) +{ + gcc_assert (node->declare_variant_alt); + omp_declare_variant_base_entry *entryp + = ggc_cleared_alloc<omp_declare_variant_base_entry> (); + entryp->base = dyn_cast<cgraph_node *> (nodes[streamer_read_hwi (ib)]); + entryp->node = node; + unsigned int len = streamer_read_hwi (ib); + vec_alloc (entryp->variants, len); + + for (unsigned int i = 0; i < len; i++) + { + omp_declare_variant_entry varentry; + varentry.variant + = dyn_cast<cgraph_node *> (nodes[streamer_read_hwi (ib)]); + for (widest_int *w = &varentry.score; ; + w = &varentry.score_in_declare_simd_clone) + { + unsigned len2 = streamer_read_hwi (ib); + HOST_WIDE_INT arr[WIDE_INT_MAX_ELTS]; + gcc_assert (len2 <= WIDE_INT_MAX_ELTS); + for (unsigned int j = 0; j < len2; j++) + arr[j] = streamer_read_hwi (ib); + *w = widest_int::from_array (arr, len2, true); + if (w == &varentry.score_in_declare_simd_clone) + break; + } + varentry.ctx = stream_read_tree (ib, /*data_in*/NULL); + varentry.matches = streamer_read_hwi (ib) != 0; + entryp->variants->quick_push (varentry); + } + if (omp_declare_variant_alt == NULL) + omp_declare_variant_alt + = hash_table<omp_declare_variant_alt_hasher>::create_ggc (64); + *omp_declare_variant_alt->find_slot_with_hash (entryp, DECL_UID (node->decl), + INSERT) = entryp; +} /* Encode an oacc launch argument. This matches the GOMP_LAUNCH_PACK macro on gomp-constants.h. We do not check for overflow. */ Jakub