When SRA completely scalarizes an array, this patch changes the generated accesses from e.g.
MEM[(int[8] *)&a + 4B] = 1; to a[1] = 1; This overcomes a limitation in dom2, that accesses to equivalent chunks of e.g. MEM[(int[8] *)&a] are not hashable_expr_equal_p with accesses to e.g. MEM[(int[8] *)&a]. This is necessary for constant propagation in the ssa-dom-cse-2.c testcase (after the next patch that makes SRA handle constant-pool loads). I tried to work around this by making dom2's hashable_expr_equal_p less conservative, but found that on platforms without AArch64's vectorized reductions (specifically Alpha, hppa, PowerPC, and SPARC, mentioned in ssa-dom-cse-2.c), I also needed to make MEM[(int[8] *)&a] equivalent to a[0], etc.; a complete overhaul of hashable_expr_equal_p seems like a larger task than this patch series. I can't see how to write a testcase for this in C though as direct assignment to an array is not possible; such assignments occur only with constant pool data, which is dealt with in the next patch. Bootstrap + check-gcc on x86-none-linux-gnu, arm-none-linux-gnueabihf, aarch64-none-linux-gnu. gcc/ChangeLog: * tree-sra.c (completely_scalarize): Move some code into: (get_elem_size): New. (build_ref_for_offset): Build ARRAY_REF if base is aligned array. --- gcc/tree-sra.c | 110 ++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 69 insertions(+), 41 deletions(-) diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c index 08fa8dc..af35fcc 100644 --- a/gcc/tree-sra.c +++ b/gcc/tree-sra.c @@ -957,6 +957,20 @@ scalarizable_type_p (tree type) } } +static bool +get_elem_size (const_tree type, unsigned HOST_WIDE_INT *sz_out) +{ + gcc_assert (TREE_CODE (type) == ARRAY_TYPE); + tree t_size = TYPE_SIZE (TREE_TYPE (type)); + if (!t_size || !tree_fits_uhwi_p (t_size)) + return false; + unsigned HOST_WIDE_INT sz = tree_to_uhwi (t_size); + if (!sz) + return false; + *sz_out = sz; + return true; +} + static void scalarize_elem (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree, tree); /* Create total_scalarization accesses for all scalar fields of a member @@ -985,10 +999,9 @@ completely_scalarize (tree base, tree decl_type, HOST_WIDE_INT offset, tree ref) case ARRAY_TYPE: { tree elemtype = TREE_TYPE (decl_type); - tree elem_size = TYPE_SIZE (elemtype); - gcc_assert (elem_size && tree_fits_uhwi_p (elem_size)); - int el_size = tree_to_uhwi (elem_size); - gcc_assert (el_size); + unsigned HOST_WIDE_INT el_size; + if (!get_elem_size (decl_type, &el_size)) + gcc_assert (false); tree minidx = TYPE_MIN_VALUE (TYPE_DOMAIN (decl_type)); tree maxidx = TYPE_MAX_VALUE (TYPE_DOMAIN (decl_type)); @@ -1563,7 +1576,7 @@ build_ref_for_offset (location_t loc, tree base, HOST_WIDE_INT offset, tree off; tree mem_ref; HOST_WIDE_INT base_offset; - unsigned HOST_WIDE_INT misalign; + unsigned HOST_WIDE_INT misalign, el_sz; unsigned int align; gcc_checking_assert (offset % BITS_PER_UNIT == 0); @@ -1572,47 +1585,62 @@ build_ref_for_offset (location_t loc, tree base, HOST_WIDE_INT offset, /* get_addr_base_and_unit_offset returns NULL for references with a variable offset such as array[var_index]. */ - if (!base) - { - gassign *stmt; - tree tmp, addr; - - gcc_checking_assert (gsi); - tmp = make_ssa_name (build_pointer_type (TREE_TYPE (prev_base))); - addr = build_fold_addr_expr (unshare_expr (prev_base)); - STRIP_USELESS_TYPE_CONVERSION (addr); - stmt = gimple_build_assign (tmp, addr); - gimple_set_location (stmt, loc); - if (insert_after) - gsi_insert_after (gsi, stmt, GSI_NEW_STMT); - else - gsi_insert_before (gsi, stmt, GSI_SAME_STMT); - - off = build_int_cst (reference_alias_ptr_type (prev_base), - offset / BITS_PER_UNIT); - base = tmp; - } - else if (TREE_CODE (base) == MEM_REF) - { - off = build_int_cst (TREE_TYPE (TREE_OPERAND (base, 1)), - base_offset + offset / BITS_PER_UNIT); - off = int_const_binop (PLUS_EXPR, TREE_OPERAND (base, 1), off); - base = unshare_expr (TREE_OPERAND (base, 0)); + if (base + && TREE_CODE (TREE_TYPE (base)) == ARRAY_TYPE + && misalign == 0 + && get_elem_size (TREE_TYPE (base), &el_sz) + && ((offset % el_sz) == 0) + && useless_type_conversion_p (exp_type, TREE_TYPE (TREE_TYPE (base))) + && (align >= TYPE_ALIGN (exp_type))) + { + tree idx = build_int_cst (TYPE_DOMAIN (TREE_TYPE (base)), offset / el_sz); + base = unshare_expr (base); + mem_ref = build4 (ARRAY_REF, exp_type, base, idx, NULL_TREE, NULL_TREE); } else { - off = build_int_cst (reference_alias_ptr_type (base), - base_offset + offset / BITS_PER_UNIT); - base = build_fold_addr_expr (unshare_expr (base)); - } + if (!base) + { + gassign *stmt; + tree tmp, addr; + + gcc_checking_assert (gsi); + tmp = make_ssa_name (build_pointer_type (TREE_TYPE (prev_base))); + addr = build_fold_addr_expr (unshare_expr (prev_base)); + STRIP_USELESS_TYPE_CONVERSION (addr); + stmt = gimple_build_assign (tmp, addr); + gimple_set_location (stmt, loc); + if (insert_after) + gsi_insert_after (gsi, stmt, GSI_NEW_STMT); + else + gsi_insert_before (gsi, stmt, GSI_SAME_STMT); - misalign = (misalign + offset) & (align - 1); - if (misalign != 0) - align = (misalign & -misalign); - if (align != TYPE_ALIGN (exp_type)) - exp_type = build_aligned_type (exp_type, align); + off = build_int_cst (reference_alias_ptr_type (prev_base), + offset / BITS_PER_UNIT); + base = tmp; + } + else if (TREE_CODE (base) == MEM_REF) + { + off = build_int_cst (TREE_TYPE (TREE_OPERAND (base, 1)), + base_offset + offset / BITS_PER_UNIT); + off = int_const_binop (PLUS_EXPR, TREE_OPERAND (base, 1), off); + base = unshare_expr (TREE_OPERAND (base, 0)); + } + else + { + off = build_int_cst (reference_alias_ptr_type (base), + base_offset + offset / BITS_PER_UNIT); + base = build_fold_addr_expr (unshare_expr (base)); + } - mem_ref = fold_build2_loc (loc, MEM_REF, exp_type, base, off); + misalign = (misalign + offset) & (align - 1); + if (misalign != 0) + align = (misalign & -misalign); + if (align != TYPE_ALIGN (exp_type)) + exp_type = build_aligned_type (exp_type, align); + + mem_ref = fold_build2_loc (loc, MEM_REF, exp_type, base, off); + } if (TREE_THIS_VOLATILE (prev_base)) TREE_THIS_VOLATILE (mem_ref) = 1; if (TREE_SIDE_EFFECTS (prev_base)) -- 1.8.3