On Thu, Nov 10, 2016 at 06:45:10PM +0800, Chung-Lin Tang wrote: > 2016-XX-XX Nathan Sidwell <nat...@codesourcery.com> > > * internal-fn.def (GOACC_DIM_POS): Comment may be overly conservative. > (GOACC_TILE): New. > * internal-fn.c (expand_GOACC_TILE): New. > > * omp-low.c (struct omp_for_data): Add tiling field. > (struct oacc_loop): Change 'ifns' to vector of call stmts, > add e_mask field.
Please avoid using 8 spaces instead of a tab in ChangeLog. > dump partitioning. > (oacc_loop_auto_partitions): Add outer_assign parm. Assign all but > vector partitioning to outer loops. Assign 2 partitions to loops > when available. Add TILE handling. > (oacc_loop_partition): Adjust oacc_loop_auto_partitions call. > (execite_oacc_device_lower): Process GOACC_TILE fns, ignore unknown > specs. Here too. execute instead of execite? And the last line is too long. > @@ -626,7 +638,8 @@ extract_omp_for_data (gomp_for *for_stmt, struct o > int cnt = fd->ordered ? fd->ordered : fd->collapse; > for (i = 0; i < cnt; i++) > { > - if (i == 0 && fd->collapse == 1 && (fd->ordered == 0 || loops == NULL)) > + if (i == 0 && fd->collapse == 1 && !fd->tiling > + && (fd->ordered == 0 || loops == NULL)) > loop = &fd->loop; > else if (loops != NULL) > loop = loops + i; If the condition fits on one line, it can stay as is, if it can't, then you should use a if (i == 0 && fd->collapse == 1 && !fd->tiling && (fd->ordered == 0 || loops == NULL)) IMHO. > + tree tile = TREE_VALUE (tiling); > + gcall *call = gimple_build_call_internal > + (IFN_GOACC_TILE, 5, num, loop_no, tile, > + /* gwv-outer=*/integer_zero_node, > + /* gwv-inner=*/integer_zero_node); I don't really like the ( on separate line unless absolutely necessary. So better: gcall *call = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile, integer_zero_node, integer_zero_node); > + call = gimple_build_call_internal > + (IFN_GOACC_LOOP, 7, > + build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET), > + dir, e_range, element_s, chunk, e_gwv, chunk); Similarly. For the build_int_cst argument just add a temporary with a short name (e.g. t) and initialize it to build_int_cst before the gimple_build_call_internal. > + gimple_call_set_lhs (call, e_offset); > + gimple_set_location (call, loc); > + gsi_insert_before (&gsi, call, GSI_SAME_STMT); > + > + call = gimple_build_call_internal > + (IFN_GOACC_LOOP, 7, > + build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND), > + dir, e_range, element_s, chunk, e_gwv, e_offset); > + gimple_call_set_lhs (call, e_bound); > + gimple_set_location (call, loc); > + gsi_insert_before (&gsi, call, GSI_SAME_STMT); > + > + call = gimple_build_call_internal > + (IFN_GOACC_LOOP, 6, > + build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP), > + dir, e_range, element_s, chunk, e_gwv); And again 2x. > if (cont_bb) > { > - /* We now have one or two nested loops. Update the loop > + /* We now have one, two or three nested loops. Update the loop Only one space after , - we use 2 spaces only after full stop. > @@ -11537,6 +11712,15 @@ expand_oacc_for (struct omp_region *region, struct > body_loop->header = body_bb; > body_loop->latch = cont_bb; > add_loop (body_loop, parent); > + > + if (fd->tiling) > + { > + // Insert tiling's element loop Please use /* */ style comment instead, plus full stop: /* Insert tiling's element loop. */ > + struct loop *inner_loop = alloc_loop (); > + inner_loop->header = elem_body_bb; > + inner_loop->latch = elem_cont_bb; > + add_loop (inner_loop, body_loop); > +static void > +oacc_xform_tile (gcall *call) > +{ > + gimple_stmt_iterator gsi = gsi_for_stmt (call); > + unsigned collapse = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, > 0)); > + /* Inner loops have higher loop_nos. */ > + unsigned loop_no = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 1)); > + tree tile_size = gimple_call_arg (call, 2); > + unsigned e_mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 4)); Please use unsigned collapse = tree_to_uhwi (gimple_call_arg (call, 0)); etc. instead. > + tree lhs = gimple_call_lhs (call); > + tree type = TREE_TYPE (lhs); > + gimple_seq seq = NULL; > + tree span = build_int_cst (type, 1); > + > + gcc_assert (!(e_mask > + & ~(GOMP_DIM_MASK (GOMP_DIM_VECTOR) > + | GOMP_DIM_MASK (GOMP_DIM_WORKER)))); > + push_gimplify_context (!seen_error ()); > + if ( > +#ifndef ACCEL_COMPILER > + 1 || > +#endif > + !e_mask) We don't want || at the end of line. Perhaps better #ifndef ACCEL_COMPILER e_mask = 0; #endif if (!e_mask) ? > + switch (gimple_call_internal_fn (call)) > + { > + case IFN_GOACC_LOOP: > + { > + bool is_e = gimple_call_arg (call, 5) == integer_minus_one_node; > + *gimple_call_arg_ptr (call, 5) = is_e ? e_mask_arg : mask_arg; > + if (!is_e) > + *gimple_call_arg_ptr (call, 4) = chunk_arg; > + } > + break; > > + case IFN_GOACC_TILE: > + *gimple_call_arg_ptr (call, 3) = mask_arg; > + *gimple_call_arg_ptr (call, 4) = e_mask_arg; > + break; Please use gimple_call_set_arg (call, idx, val) instead of *gimple_call_arg_ptr (call, idx) = val > + /* Apply auto partitioning if this is a non-partitioned regular > + loop, or (no more than) single axis tiled loop. */ > + bool maybe_auto = !seq_par > + && this_mask == (tiling ? this_mask & -this_mask : 0); Wrong formatting, the && must go below !seq or better there should be () around the whole rhs. Or maybe even better bool maybe_auto = !seq_par && this_mask == (tiling ? this_mask & -this_mask : 0); > if (loop->child) > { > - loop->inner = oacc_loop_fixed_partitions (loop->child, > - outer_mask | this_mask); > + loop->inner = oacc_loop_fixed_partitions > + (loop->child, outer_mask | this_mask | loop->e_mask); See above for ( on next line. Perhaps introduce a temporary holding outer_mask | this_mask | loop->e_mask or similar. > - if (assign && !loop->mask) > + if (loop->child) > + loop->inner = oacc_loop_auto_partitions > + (loop->child, outer_mask | loop->mask | loop->e_mask, > + outer_assign | assign); Again. > /* Determine the outermost partitioning used within this loop. */ > this_mask = loop->inner | GOMP_DIM_MASK (GOMP_DIM_MAX); > this_mask = least_bit_hwi (this_mask); > > /* Pick the partitioning just inside that one. */ > this_mask >>= 1; > - > /* And avoid picking one use by an outer loop. */ > this_mask &= ~outer_mask; Why remove the empty line? Jakub