The following patch implements CSEing of "subreg" reads from memory like (from the testcase in the PR)
union U { int i[16]; char c; }; char foo(int i) { union U u; u.i[0] = i; return u.c; } CSEing u.c as (char)i and thus removing u during GIMPLE optimizations. The patch always goes via generating BIT_FIELD_REFs and letting them be simplified via the match-and-simplify machinery. This means it replaces handling of complex component and vector extracts we've been able to do before. I didn't restrict the kind of BIT_FIELD_REFs much apart from requiring byte-size accesses. I did inspect code generated on powerpc (big-endian) for the testcase though (also to verify any endianess issues) and didn't spot anything wrong (even for non-lowpart "subregs"). Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. Richard. 2016-05-09 Richard Biener <rguent...@suse.de> PR tree-optimization/70497 * tree-ssa-sccvn.c (vn_nary_build_or_lookup): New function split out from ... (visit_reference_op_load): ... here. (vn_reference_lookup_3): Use it to handle subreg-like accesses with simplified BIT_FIELD_REFs. * tree-ssa-pre.c (eliminate_insert): Handle inserting BIT_FIELD_REFs. * tree-complex.c (extract_component): Handle BIT_FIELD_REFs correctly. * gcc.dg/torture/20160404-1.c: New testcase. * gcc.dg/tree-ssa/ssa-fre-54.c: Likewise. Index: gcc/tree-ssa-sccvn.c =================================================================== *** gcc/tree-ssa-sccvn.c.orig 2016-05-03 15:15:58.002994741 +0200 --- gcc/tree-ssa-sccvn.c 2016-05-03 15:35:41.976538344 +0200 *************** vn_reference_lookup_or_insert_for_pieces *** 1610,1615 **** --- 1610,1714 ---- operands.copy (), value, value_id); } + static vn_nary_op_t vn_nary_op_insert_stmt (gimple *stmt, tree result); + + /* Hook for maybe_push_res_to_seq, lookup the expression in the VN tables. */ + + static tree + vn_lookup_simplify_result (code_helper rcode, tree type, tree *ops) + { + if (!rcode.is_tree_code ()) + return NULL_TREE; + vn_nary_op_t vnresult = NULL; + return vn_nary_op_lookup_pieces (TREE_CODE_LENGTH ((tree_code) rcode), + (tree_code) rcode, type, ops, &vnresult); + } + + /* Return a value-number for RCODE OPS... either by looking up an existing + value-number for the simplified result or by inserting the operation. */ + + static tree + vn_nary_build_or_lookup (code_helper rcode, tree type, tree *ops) + { + tree result = NULL_TREE; + /* We will be creating a value number for + ROCDE (OPS...). + So first simplify and lookup this expression to see if it + is already available. */ + mprts_hook = vn_lookup_simplify_result; + bool res = false; + switch (TREE_CODE_LENGTH ((tree_code) rcode)) + { + case 1: + res = gimple_resimplify1 (NULL, &rcode, type, ops, vn_valueize); + break; + case 2: + res = gimple_resimplify2 (NULL, &rcode, type, ops, vn_valueize); + break; + case 3: + res = gimple_resimplify3 (NULL, &rcode, type, ops, vn_valueize); + break; + } + mprts_hook = NULL; + gimple *new_stmt = NULL; + if (res + && gimple_simplified_result_is_gimple_val (rcode, ops)) + /* The expression is already available. */ + result = ops[0]; + else + { + tree val = vn_lookup_simplify_result (rcode, type, ops); + if (!val) + { + gimple_seq stmts = NULL; + result = maybe_push_res_to_seq (rcode, type, ops, &stmts); + if (result) + { + gcc_assert (gimple_seq_singleton_p (stmts)); + new_stmt = gimple_seq_first_stmt (stmts); + } + } + else + /* The expression is already available. */ + result = val; + } + if (new_stmt) + { + /* The expression is not yet available, value-number lhs to + the new SSA_NAME we created. */ + /* Initialize value-number information properly. */ + VN_INFO_GET (result)->valnum = result; + VN_INFO (result)->value_id = get_next_value_id (); + gimple_seq_add_stmt_without_update (&VN_INFO (result)->expr, + new_stmt); + VN_INFO (result)->needs_insertion = true; + /* As all "inserted" statements are singleton SCCs, insert + to the valid table. This is strictly needed to + avoid re-generating new value SSA_NAMEs for the same + expression during SCC iteration over and over (the + optimistic table gets cleared after each iteration). + We do not need to insert into the optimistic table, as + lookups there will fall back to the valid table. */ + if (current_info == optimistic_info) + { + current_info = valid_info; + vn_nary_op_insert_stmt (new_stmt, result); + current_info = optimistic_info; + } + else + vn_nary_op_insert_stmt (new_stmt, result); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Inserting name "); + print_generic_expr (dump_file, result, 0); + fprintf (dump_file, " for expression "); + print_gimple_expr (dump_file, new_stmt, 0, TDF_SLIM); + fprintf (dump_file, "\n"); + } + } + return result; + } + /* Callback for walk_non_aliased_vuses. Tries to perform a lookup from the statement defining VUSE and if not successful tries to translate *REFP and VR_ through an aggregate copy at the definition *************** vn_reference_lookup_3 (ao_ref *ref, tree *** 1815,1829 **** /* 3) Assignment from a constant. We can use folds native encode/interpret routines to extract the assigned bits. */ ! else if (vn_walk_kind == VN_WALKREWRITE ! && CHAR_BIT == 8 && BITS_PER_UNIT == 8 ! && ref->size == maxsize ! && maxsize % BITS_PER_UNIT == 0 ! && offset % BITS_PER_UNIT == 0 && is_gimple_reg_type (vr->type) && !contains_storage_order_barrier_p (vr->operands) && gimple_assign_single_p (def_stmt) ! && is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt))) { tree base2; HOST_WIDE_INT offset2, size2, maxsize2; --- 1914,1929 ---- /* 3) Assignment from a constant. We can use folds native encode/interpret routines to extract the assigned bits. */ ! else if (ref->size == maxsize && is_gimple_reg_type (vr->type) && !contains_storage_order_barrier_p (vr->operands) && gimple_assign_single_p (def_stmt) ! && CHAR_BIT == 8 && BITS_PER_UNIT == 8 ! && maxsize % BITS_PER_UNIT == 0 ! && offset % BITS_PER_UNIT == 0 ! && (is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)) ! || (TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME ! && is_gimple_min_invariant (SSA_VAL (gimple_assign_rhs1 (def_stmt)))))) { tree base2; HOST_WIDE_INT offset2, size2, maxsize2; *************** vn_reference_lookup_3 (ao_ref *ref, tree *** 1843,1848 **** --- 1943,1951 ---- unsigned char buffer[64]; int len; + tree rhs = gimple_assign_rhs1 (def_stmt); + if (TREE_CODE (rhs) == SSA_NAME) + rhs = SSA_VAL (rhs); len = native_encode_expr (gimple_assign_rhs1 (def_stmt), buffer, sizeof (buffer)); if (len > 0) *************** vn_reference_lookup_3 (ao_ref *ref, tree *** 1867,1922 **** && gimple_assign_single_p (def_stmt) && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME) { ! tree rhs1 = gimple_assign_rhs1 (def_stmt); ! gimple *def_stmt2 = SSA_NAME_DEF_STMT (rhs1); ! if (is_gimple_assign (def_stmt2) ! && (gimple_assign_rhs_code (def_stmt2) == COMPLEX_EXPR ! || gimple_assign_rhs_code (def_stmt2) == CONSTRUCTOR) ! && types_compatible_p (vr->type, TREE_TYPE (TREE_TYPE (rhs1)))) ! { ! tree base2; ! HOST_WIDE_INT offset2, size2, maxsize2, off; ! bool reverse; ! base2 = get_ref_base_and_extent (gimple_assign_lhs (def_stmt), ! &offset2, &size2, &maxsize2, ! &reverse); ! off = offset - offset2; ! if (!reverse ! && maxsize2 != -1 ! && maxsize2 == size2 ! && operand_equal_p (base, base2, 0) ! && offset2 <= offset ! && offset2 + size2 >= offset + maxsize) { ! tree val = NULL_TREE; ! HOST_WIDE_INT elsz ! = TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (TREE_TYPE (rhs1)))); ! if (gimple_assign_rhs_code (def_stmt2) == COMPLEX_EXPR) ! { ! if (off == 0) ! val = gimple_assign_rhs1 (def_stmt2); ! else if (off == elsz) ! val = gimple_assign_rhs2 (def_stmt2); ! } ! else if (gimple_assign_rhs_code (def_stmt2) == CONSTRUCTOR ! && off % elsz == 0) ! { ! tree ctor = gimple_assign_rhs1 (def_stmt2); ! unsigned i = off / elsz; ! if (i < CONSTRUCTOR_NELTS (ctor)) ! { ! constructor_elt *elt = CONSTRUCTOR_ELT (ctor, i); ! if (TREE_CODE (TREE_TYPE (rhs1)) == VECTOR_TYPE) ! { ! if (TREE_CODE (TREE_TYPE (elt->value)) ! != VECTOR_TYPE) ! val = elt->value; ! } ! } ! } ! if (val) ! return vn_reference_lookup_or_insert_for_pieces ! (vuse, vr->set, vr->type, vr->operands, val); } } } --- 1970,2006 ---- && gimple_assign_single_p (def_stmt) && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME) { ! tree base2; ! HOST_WIDE_INT offset2, size2, maxsize2; ! bool reverse; ! base2 = get_ref_base_and_extent (gimple_assign_lhs (def_stmt), ! &offset2, &size2, &maxsize2, ! &reverse); ! if (!reverse ! && maxsize2 != -1 ! && maxsize2 == size2 ! && operand_equal_p (base, base2, 0) ! && offset2 <= offset ! && offset2 + size2 >= offset + maxsize ! /* ??? We can't handle bitfield precision extracts without ! either using an alternate type for the BIT_FIELD_REF and ! then doing a conversion or possibly adjusting the offset ! according to endianess. */ ! && (! INTEGRAL_TYPE_P (vr->type) ! || ref->size == TYPE_PRECISION (vr->type)) ! && ref->size % BITS_PER_UNIT == 0) ! { ! code_helper rcode = BIT_FIELD_REF; ! tree ops[3]; ! ops[0] = SSA_VAL (gimple_assign_rhs1 (def_stmt)); ! ops[1] = bitsize_int (ref->size); ! ops[2] = bitsize_int (offset - offset2); ! tree val = vn_nary_build_or_lookup (rcode, vr->type, ops); ! if (val) { ! vn_reference_t res = vn_reference_lookup_or_insert_for_pieces ! (vuse, vr->set, vr->type, vr->operands, val); ! return res; } } } *************** vn_nary_op_lookup_stmt (gimple *stmt, vn *** 2657,2674 **** return vn_nary_op_lookup_1 (vno1, vnresult); } - /* Hook for maybe_push_res_to_seq, lookup the expression in the VN tables. */ - - static tree - vn_lookup_simplify_result (code_helper rcode, tree type, tree *ops) - { - if (!rcode.is_tree_code ()) - return NULL_TREE; - vn_nary_op_t vnresult = NULL; - return vn_nary_op_lookup_pieces (TREE_CODE_LENGTH ((tree_code) rcode), - (tree_code) rcode, type, ops, &vnresult); - } - /* Allocate a vn_nary_op_t with LENGTH operands on STACK. */ static vn_nary_op_t --- 2741,2746 ---- *************** visit_reference_op_load (tree lhs, tree *** 3417,3485 **** of VIEW_CONVERT_EXPR <TREE_TYPE (result)> (result). So first simplify and lookup this expression to see if it is already available. */ - mprts_hook = vn_lookup_simplify_result; code_helper rcode = VIEW_CONVERT_EXPR; tree ops[3] = { result }; ! bool res = gimple_resimplify1 (NULL, &rcode, TREE_TYPE (op), ops, ! vn_valueize); ! mprts_hook = NULL; ! gimple *new_stmt = NULL; ! if (res ! && gimple_simplified_result_is_gimple_val (rcode, ops)) ! /* The expression is already available. */ ! result = ops[0]; ! else ! { ! tree val = vn_lookup_simplify_result (rcode, TREE_TYPE (op), ops); ! if (!val) ! { ! gimple_seq stmts = NULL; ! result = maybe_push_res_to_seq (rcode, TREE_TYPE (op), ops, ! &stmts); ! if (result) ! { ! gcc_assert (gimple_seq_singleton_p (stmts)); ! new_stmt = gimple_seq_first_stmt (stmts); ! } ! } ! else ! /* The expression is already available. */ ! result = val; ! } ! if (new_stmt) ! { ! /* The expression is not yet available, value-number lhs to ! the new SSA_NAME we created. */ ! /* Initialize value-number information properly. */ ! VN_INFO_GET (result)->valnum = result; ! VN_INFO (result)->value_id = get_next_value_id (); ! gimple_seq_add_stmt_without_update (&VN_INFO (result)->expr, ! new_stmt); ! VN_INFO (result)->needs_insertion = true; ! /* As all "inserted" statements are singleton SCCs, insert ! to the valid table. This is strictly needed to ! avoid re-generating new value SSA_NAMEs for the same ! expression during SCC iteration over and over (the ! optimistic table gets cleared after each iteration). ! We do not need to insert into the optimistic table, as ! lookups there will fall back to the valid table. */ ! if (current_info == optimistic_info) ! { ! current_info = valid_info; ! vn_nary_op_insert_stmt (new_stmt, result); ! current_info = optimistic_info; ! } ! else ! vn_nary_op_insert_stmt (new_stmt, result); ! if (dump_file && (dump_flags & TDF_DETAILS)) ! { ! fprintf (dump_file, "Inserting name "); ! print_generic_expr (dump_file, result, 0); ! fprintf (dump_file, " for expression "); ! print_gimple_expr (dump_file, new_stmt, 0, TDF_SLIM); ! fprintf (dump_file, "\n"); ! } ! } } if (result) --- 3489,3497 ---- of VIEW_CONVERT_EXPR <TREE_TYPE (result)> (result). So first simplify and lookup this expression to see if it is already available. */ code_helper rcode = VIEW_CONVERT_EXPR; tree ops[3] = { result }; ! result = vn_nary_build_or_lookup (rcode, TREE_TYPE (op), ops); } if (result) Index: gcc/tree-ssa-pre.c =================================================================== *** gcc/tree-ssa-pre.c.orig 2016-05-03 15:15:58.002994741 +0200 --- gcc/tree-ssa-pre.c 2016-05-03 15:35:41.976538344 +0200 *************** eliminate_insert (gimple_stmt_iterator * *** 3863,3881 **** gimple *stmt = gimple_seq_first_stmt (VN_INFO (val)->expr); if (!is_gimple_assign (stmt) || (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)) ! && gimple_assign_rhs_code (stmt) != VIEW_CONVERT_EXPR)) return NULL_TREE; tree op = gimple_assign_rhs1 (stmt); ! if (gimple_assign_rhs_code (stmt) == VIEW_CONVERT_EXPR) op = TREE_OPERAND (op, 0); tree leader = TREE_CODE (op) == SSA_NAME ? eliminate_avail (op) : op; if (!leader) return NULL_TREE; gimple_seq stmts = NULL; ! tree res = gimple_build (&stmts, gimple_assign_rhs_code (stmt), ! TREE_TYPE (val), leader); gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); VN_INFO_GET (res)->valnum = val; --- 3863,3890 ---- gimple *stmt = gimple_seq_first_stmt (VN_INFO (val)->expr); if (!is_gimple_assign (stmt) || (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)) ! && gimple_assign_rhs_code (stmt) != VIEW_CONVERT_EXPR ! && gimple_assign_rhs_code (stmt) != BIT_FIELD_REF)) return NULL_TREE; tree op = gimple_assign_rhs1 (stmt); ! if (gimple_assign_rhs_code (stmt) == VIEW_CONVERT_EXPR ! || gimple_assign_rhs_code (stmt) == BIT_FIELD_REF) op = TREE_OPERAND (op, 0); tree leader = TREE_CODE (op) == SSA_NAME ? eliminate_avail (op) : op; if (!leader) return NULL_TREE; gimple_seq stmts = NULL; ! tree res; ! if (gimple_assign_rhs_code (stmt) == BIT_FIELD_REF) ! res = gimple_build (&stmts, BIT_FIELD_REF, ! TREE_TYPE (val), leader, ! TREE_OPERAND (gimple_assign_rhs1 (stmt), 1), ! TREE_OPERAND (gimple_assign_rhs1 (stmt), 2)); ! else ! res = gimple_build (&stmts, gimple_assign_rhs_code (stmt), ! TREE_TYPE (val), leader); gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); VN_INFO_GET (res)->valnum = val; Index: gcc/testsuite/gcc.dg/torture/20160404-1.c =================================================================== *** /dev/null 1970-01-01 00:00:00.000000000 +0000 --- gcc/testsuite/gcc.dg/torture/20160404-1.c 2016-05-03 15:35:41.996538573 +0200 *************** *** 0 **** --- 1,21 ---- + /* { dg-do compile } */ + + typedef __UINT64_TYPE__ UINT64; + typedef union { + struct { + unsigned short lo4; + unsigned short lo3; + unsigned short lo2; + unsigned short lo1; + } i; + long double f; + } BID_BINARY80LDOUBLE; + UINT64 __binary80_to_bid32 (long double x) + { + BID_BINARY80LDOUBLE x_in; + x_in.f = x; + return (x_in.i.lo4 + + ((UINT64)x_in.i.lo3 << 16) + + ((UINT64)x_in.i.lo2 << 32) + + ((UINT64)x_in.i.lo1 << 48)); + } Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-54.c =================================================================== *** /dev/null 1970-01-01 00:00:00.000000000 +0000 --- gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-54.c 2016-05-03 15:35:42.000538619 +0200 *************** *** 0 **** --- 1,56 ---- + /* { dg-do run } */ + /* { dg-require-effective-target int32plus } */ + /* { dg-options "-O -fdump-tree-fre1 -fdump-tree-dse1" } */ + + extern void abort (void); + + union U { int i; char c[4]; short s[2]; }; + + char __attribute__((noinline,noclone)) foo(int i) + { + union U u; + u.i = i; + /* This should be equivalent to (char) i. */ + #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return u.c[0]; + #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return u.c[3]; + #else + return 0x04; + #endif + } + + short __attribute__((noinline,noclone)) baz(int i) + { + union U u; + u.i = i; + /* This should be equivalent to (char) i. */ + #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return u.s[0]; + #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return u.s[1]; + #else + return 0x0304; + #endif + } + + char __attribute__((noinline,noclone)) bar(int j) + { + union U u; + u.i = j; + /* This gets simplified to a BIT_FIELD_REF. */ + return u.c[2]; + } + + int main() + { + if (foo (0x01020304) != 0x04) + abort (); + if (baz (0x01020304) != 0x0304) + abort (); + return 0; + } + + /* { dg-final { scan-tree-dump "\\(char\\) i_" "fre1" } } */ + /* { dg-final { scan-tree-dump "\\(short int\\) i_" "fre1" } } */ + /* { dg-final { scan-tree-dump-not "u.i =" "dse1" } } */ Index: gcc/tree-complex.c =================================================================== *** gcc/tree-complex.c.orig 2016-05-03 15:15:58.002994741 +0200 --- gcc/tree-complex.c 2016-05-03 15:35:42.000538619 +0200 *************** extract_component (gimple_stmt_iterator *** 604,609 **** --- 604,624 ---- case COMPLEX_EXPR: gcc_unreachable (); + case BIT_FIELD_REF: + { + tree inner_type = TREE_TYPE (TREE_TYPE (t)); + t = unshare_expr (t); + TREE_TYPE (t) = inner_type; + TREE_OPERAND (t, 1) = TYPE_SIZE (inner_type); + if (imagpart_p) + TREE_OPERAND (t, 2) = size_binop (PLUS_EXPR, TREE_OPERAND (t, 2), + TYPE_SIZE (inner_type)); + if (gimple_p) + t = force_gimple_operand_gsi (gsi, t, true, NULL, true, + GSI_SAME_STMT); + return t; + } + case VAR_DECL: case RESULT_DECL: case PARM_DECL: