Hi! On 2021-03-02T04:20:11-0800, Julian Brown <jul...@codesourcery.com> wrote: > This patch implements worker-partitioning support in the middle end, > by rewriting gimple. [...]
Yay! > This version of the patch [...] > avoids moving SESE-region finding code out > of the NVPTX backend So that's 'struct bb_sese' and following functions. > since that code isn't used by the middle-end worker > partitioning neutering/broadcasting implementation yet. I understand correctly that "isn't used [...] yet" means that (a) "this isn't implemented yet" (on og11 etc.), and doesn't mean (b) "is missing from this patch submission"? ... thus from (a) it follows that we may later also drop from the og11 branch these changes? Relatedly, a nontrivial amount of data structures/logic/code did get duplicated from the nvptx back end, and modified slightly or not-so-slightly (RTL vs. GIMPLE plus certain implementation "details"). We should at least cross reference the two instances, to make sure that any changes to one are also propagated to the other. (I'll take care.) And then, do you (or anyone else, of course) happen to have any clever idea about how to avoid the duplication, and somehow combine the RTL vs. GIMPLE implementations? Given that we nowadays may use C++ -- do you foresee it feasible to have an abstract base class capturing basically the data structures, logic, common code, and then RTL-specialized plus GIMPLE-specialized classes inheriting from that? For example: $ sed -e s%parallel_g%parallel%g < gcc/oacc-neuter-bcast.c > gcc/oacc-neuter-bcast.c_ $ git diff --no-index --word-diff -b --patience gcc/config/nvptx/nvptx.c gcc/oacc-neuter-bcast.c_ [...] /* Loop structure of the function. The entire function is described as a NULL loop. */ @@ -3229,17 +80,21 @@ struct parallel basic_block forked_block; basic_block join_block; [-rtx_insn *forked_insn;-] [- rtx_insn *join_insn;-]{+gimple *forked_stmt;+} {+ gimple *join_stmt;+} [-rtx_insn *fork_insn;-] [- rtx_insn *joining_insn;-]{+gimple *fork_stmt;+} {+ gimple *joining_stmt;+} /* Basic blocks in this parallel, but not in child parallels. The FORKED and JOINING blocks are in the partition. The FORK and JOIN blocks are not. */ auto_vec<basic_block> blocks; {+tree record_type;+} {+ tree sender_decl;+} {+ tree receiver_decl;+} public: parallel (parallel *parent, unsigned mode); ~parallel (); @@ -3252,8 +107,12 @@ parallel::parallel (parallel *parent_, unsigned mask_) :parent (parent_), next (0), inner (0), mask (mask_), inner_mask (0) { forked_block = join_block = 0; [-forked_insn-]{+forked_stmt+} = [-join_insn-]{+join_stmt+} = [-0;-] [- fork_insn-]{+NULL;+} {+ fork_stmt+} = [-joining_insn-]{+joining_stmt+} = [-0;-]{+NULL;+} {+ record_type = NULL_TREE;+} {+ sender_decl = NULL_TREE;+} {+ receiver_decl = NULL_TREE;+} if (parent) { @@ -3268,12 +127,54 @@ parallel::~parallel () delete next; } [...] /* Split basic blocks such that each forked and join unspecs are at the start of their basic blocks. Thus afterwards each block will @@ -3284,111 +185,168 @@ typedef auto_vec<insn_bb_t> insn_bb_vec_t; used when finding partitions. */ static void [-nvptx_split_blocks (bb_insn_map_t-]{+omp_sese_split_blocks (bb_stmt_map_t+} *map) { [-insn_bb_vec_t-]{+auto_vec<gimple *>+} worklist; basic_block block; [- rtx_insn *insn;-] /* Locate all the reorg instructions of interest. */ FOR_ALL_BB_FN (block, cfun) { [- bool seen_insn = false;-] /* Clear visited flag, for use by parallel locator */ block->flags &= ~BB_VISITED; [-FOR_BB_INSNS (block, insn)-]{+for (gimple_stmt_iterator gsi = gsi_start_bb (block);+} {+ !gsi_end_p (gsi);+} {+ gsi_next (&gsi))+} { [...] /* Dump this parallel and all its inner parallels. */ static void [-nvptx_dump_pars-]{+omp_sese_dump_pars+} (parallel *par, unsigned depth) { fprintf (dump_file, "%u: mask %d {+(%s)+} head=%d, tail=%d\n", depth, par->mask, {+mask_name (par->mask),+} par->forked_block ? par->forked_block->index : -1, par->join_block ? par->join_block->index : -1); @@ -3399,10 +357,10 @@ nvptx_dump_pars (parallel *par, unsigned depth) fprintf (dump_file, " %d", block->index); fprintf (dump_file, "\n"); if (par->inner) [-nvptx_dump_pars-]{+omp_sese_dump_pars+} (par->inner, depth + 1); if (par->next) [-nvptx_dump_pars-]{+omp_sese_dump_pars+} (par->next, depth); } /* If BLOCK contains a fork/join marker, process it to create or @@ -3410,60 +368,84 @@ nvptx_dump_pars (parallel *par, unsigned depth) and then walk successor blocks. */ static parallel * [-nvptx_find_par (bb_insn_map_t-]{+omp_sese_find_par (bb_stmt_map_t+} *map, parallel *par, basic_block block) { if (block->flags & BB_VISITED) return par; block->flags |= BB_VISITED; if [-(rtx_insn **endp-]{+(gimple **stmtp+} = map->get (block)) { [...] static parallel * [-nvptx_discover_pars (bb_insn_map_t-]{+omp_sese_discover_pars (bb_stmt_map_t+} *map) { basic_block block; @@ -3502,3468 +485,1033 @@ nvptx_discover_pars (bb_insn_map_t *map) block = ENTRY_BLOCK_PTR_FOR_FN (cfun); block->flags &= ~BB_VISITED; parallel *par = [-nvptx_find_par-]{+omp_sese_find_par+} (map, 0, block); if (dump_file) { fprintf (dump_file, "\nLoops\n"); [-nvptx_dump_pars-]{+omp_sese_dump_pars+} (par, 0); fprintf (dump_file, "\n"); } return par; } (For brevity, I stripped out the parts where implementation "details" differ considerably.) Grüße Thomas ----------------- Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955