This is sort of the "final" state I ended up trying to teach the C frontend not to emit array-to-pointer decay as ADDR_EXPR (element-type*, array) but as ADDR_EXPR (element-type*, ARRAY_REF (element-type, array, 0)) for both type correctness and for possible simplifications of fold and the tree optimizers that will no longer have to handle &a as &a[0] specially if trying to optimize ARRAY_REF operations.
While the patch to teach the C frontend to emit &a[0] is not complicated (see the c-typeck.c (default_function_array_conversion) patch chunk), there is a lot of fall-out in the frontend and in the optimizers. In the patch you will find intermixed (fold-const.c parts) code to fold more of address calculation with &a[i] especially in the case of char arrays which gets excercised quite a lot in the C testsuite. I also came along problems in fold_indirect_ref_1 (see also separate post) and fold_stmt. The gimplify.c chunk was applied to check if C no longer creates those "invalid" trees. Until other frontends are fixed, this is not safe. The patch was bootstrapped and tested on i686-pc-linux-gnu for the C language with the only remaining regression being c99-init-4.c (I didn't manage to find the place to fix). I'll stop right here until I get encouragement and help from other frontend maintainers to maybe fix all of the frontends (I know of at least gfortran that needs to be fixed) to really have a benefit of the patch. Other suggestions? Like f.e. how to fix c99-init-4.c? Thanks, Richard.
2005-04-28 Richard Guenther <[EMAIL PROTECTED]> * builtins.c (fold_builtin_constant_p): Handle constant strings of the form &str[0]. * c-format.c (check_format_arg): Handle &a[offset] as valid format string. * c-typeck.c (default_function_array_conversion): Emit array-to-pointer decay as &a[0]. (build_unary_op): Emit &a[i] in its original form rather than splitting it to a + i. * expr.c (string_constant): Handle &"Foo"[0] as string constant. * tree-ssa-ccp.c (fold_stmt): Do not use results from fold that are not TREE_CONSTANT. * fold-const.c (extract_array_ref): Remove. (try_move_mult_to_index): Handle folding of &a[i] OP x with x being constant or array element size one. Fix type correctness. (fold_binary): Dispatch to try_move_mult_to_index in more cases. Simplify folding of comparisons of ARRAY_REFs. (fold_indirect_ref_1): Avoid removing NOP_EXPRs with type qualifiers like const. * gimplify.c (check_pointer_types_r): ADDR_EXPR no longer can be of type array element for array operands. Index: gcc/builtins.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/builtins.c,v retrieving revision 1.460 diff -c -3 -p -r1.460 builtins.c *** gcc/builtins.c 23 Apr 2005 21:27:24 -0000 1.460 --- gcc/builtins.c 28 Apr 2005 13:24:21 -0000 *************** fold_builtin_constant_p (tree arglist) *** 6320,6326 **** || (TREE_CODE (arglist) == CONSTRUCTOR && TREE_CONSTANT (arglist)) || (TREE_CODE (arglist) == ADDR_EXPR ! && TREE_CODE (TREE_OPERAND (arglist, 0)) == STRING_CST)) return integer_one_node; /* If this expression has side effects, show we don't know it to be a --- 6320,6329 ---- || (TREE_CODE (arglist) == CONSTRUCTOR && TREE_CONSTANT (arglist)) || (TREE_CODE (arglist) == ADDR_EXPR ! && (TREE_CODE (TREE_OPERAND (arglist, 0)) == STRING_CST ! || (TREE_CODE (TREE_OPERAND (arglist, 0)) == ARRAY_REF ! && integer_zerop (TREE_OPERAND (TREE_OPERAND (arglist, 0), 1)) ! && TREE_CODE (TREE_OPERAND (TREE_OPERAND (arglist, 0), 0)) == STRING_CST)))) return integer_one_node; /* If this expression has side effects, show we don't know it to be a Index: gcc/c-format.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/c-format.c,v retrieving revision 1.74 diff -c -3 -p -r1.74 c-format.c *** gcc/c-format.c 26 Apr 2005 23:57:55 -0000 1.74 --- gcc/c-format.c 28 Apr 2005 13:24:22 -0000 *************** check_format_arg (void *ctx, tree format *** 1260,1265 **** --- 1260,1269 ---- return; } format_tree = TREE_OPERAND (format_tree, 0); + if (TREE_CODE (format_tree) == ARRAY_REF + && host_integerp (TREE_OPERAND (format_tree, 1), 0) + && (offset = tree_low_cst (TREE_OPERAND (format_tree, 1), 0)) >= 0) + format_tree = TREE_OPERAND (format_tree, 0); if (TREE_CODE (format_tree) == VAR_DECL && TREE_CODE (TREE_TYPE (format_tree)) == ARRAY_TYPE && (array_init = decl_constant_value (format_tree)) != format_tree Index: gcc/c-typeck.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/c-typeck.c,v retrieving revision 1.438 diff -c -3 -p -r1.438 c-typeck.c *** gcc/c-typeck.c 28 Apr 2005 00:45:42 -0000 1.438 --- gcc/c-typeck.c 28 Apr 2005 13:24:27 -0000 *************** default_function_array_conversion (tree *** 1335,1356 **** ptrtype = build_pointer_type (restype); ! if (TREE_CODE (exp) == VAR_DECL) ! { ! /* We are making an ADDR_EXPR of ptrtype. This is a valid ! ADDR_EXPR because it's the best way of representing what ! happens in C when we take the address of an array and place ! it in a pointer to the element type. */ ! adr = build1 (ADDR_EXPR, ptrtype, exp); ! if (!c_mark_addressable (exp)) ! return error_mark_node; ! TREE_SIDE_EFFECTS (adr) = 0; /* Default would be, same as EXP. */ ! return adr; ! } ! /* This way is better for a COMPONENT_REF since it can ! simplify the offset for a component. */ ! adr = build_unary_op (ADDR_EXPR, exp, 1); ! return convert (ptrtype, adr); } return exp; } --- 1335,1353 ---- ptrtype = build_pointer_type (restype); ! /* We are making an ADDR_EXPR of ptrtype. So we need to ! construct an ARRAY_REF to the first element of the ! array for tree type correctness. This is the best way of ! representing what happens in C when we take the address of ! an array and place it in a pointer to the element type. */ ! adr = build1 (ADDR_EXPR, ptrtype, ! build4 (ARRAY_REF, restype, ! exp, integer_zero_node, ! NULL_TREE, NULL_TREE)); ! if (!c_mark_addressable (exp)) ! return error_mark_node; ! TREE_SIDE_EFFECTS (adr) = 0; /* Default would be, same as EXP. */ ! return adr; } return exp; } *************** build_unary_op (enum tree_code code, tre *** 2713,2726 **** return TREE_OPERAND (arg, 0); } ! /* For &x[y], return x+y */ ! if (TREE_CODE (arg) == ARRAY_REF) ! { ! if (!c_mark_addressable (TREE_OPERAND (arg, 0))) ! return error_mark_node; ! return build_binary_op (PLUS_EXPR, TREE_OPERAND (arg, 0), ! TREE_OPERAND (arg, 1), 1); ! } /* Anything not already handled and not a true memory reference or a non-lvalue array is an error. */ --- 2710,2718 ---- return TREE_OPERAND (arg, 0); } ! if (TREE_CODE (arg) == ARRAY_REF ! && !c_mark_addressable (TREE_OPERAND (arg, 0))) ! return error_mark_node; /* Anything not already handled and not a true memory reference or a non-lvalue array is an error. */ Index: gcc/expr.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/expr.c,v retrieving revision 1.788 diff -c -3 -p -r1.788 expr.c *** gcc/expr.c 28 Apr 2005 05:03:03 -0000 1.788 --- gcc/expr.c 28 Apr 2005 13:24:28 -0000 *************** string_constant (tree arg, tree *ptr_off *** 8484,8499 **** if (TREE_CODE (arg0) == ADDR_EXPR && (TREE_CODE (TREE_OPERAND (arg0, 0)) == STRING_CST ! || TREE_CODE (TREE_OPERAND (arg0, 0)) == VAR_DECL)) { array = TREE_OPERAND (arg0, 0); offset = arg1; } else if (TREE_CODE (arg1) == ADDR_EXPR && (TREE_CODE (TREE_OPERAND (arg1, 0)) == STRING_CST ! || TREE_CODE (TREE_OPERAND (arg1, 0)) == VAR_DECL)) { array = TREE_OPERAND (arg1, 0); offset = arg0; } else --- 8484,8507 ---- if (TREE_CODE (arg0) == ADDR_EXPR && (TREE_CODE (TREE_OPERAND (arg0, 0)) == STRING_CST ! || TREE_CODE (TREE_OPERAND (arg0, 0)) == VAR_DECL ! || (TREE_CODE (TREE_OPERAND (arg0, 0)) == ARRAY_REF ! && integer_zerop (TREE_OPERAND (TREE_OPERAND (arg0, 0), 1))))) { array = TREE_OPERAND (arg0, 0); + if (TREE_CODE (TREE_OPERAND (arg0, 0)) == ARRAY_REF) + array = TREE_OPERAND (array, 0); offset = arg1; } else if (TREE_CODE (arg1) == ADDR_EXPR && (TREE_CODE (TREE_OPERAND (arg1, 0)) == STRING_CST ! || TREE_CODE (TREE_OPERAND (arg1, 0)) == VAR_DECL ! || (TREE_CODE (TREE_OPERAND (arg1, 0)) == ARRAY_REF ! && integer_zerop (TREE_OPERAND (TREE_OPERAND (arg1, 0), 1))))) { array = TREE_OPERAND (arg1, 0); + if (TREE_CODE (TREE_OPERAND (arg1, 0)) == ARRAY_REF) + array = TREE_OPERAND (array, 0); offset = arg0; } else Index: gcc/gimplify.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/gimplify.c,v retrieving revision 2.125 diff -c -3 -p -r2.125 gimplify.c *** gcc/gimplify.c 22 Apr 2005 16:14:54 -0000 2.125 --- gcc/gimplify.c 28 Apr 2005 13:24:28 -0000 *************** check_pointer_types_r (tree *tp, int *wa *** 4500,4516 **** ptype = TREE_TYPE (t); otype = TREE_TYPE (TREE_OPERAND (t, 0)); dtype = TREE_TYPE (ptype); ! if (!cpt_same_type (otype, dtype)) ! { ! /* &array is allowed to produce a pointer to the element, rather than ! a pointer to the array type. We must allow this in order to ! properly represent assigning the address of an array in C into ! pointer to the element type. */ ! gcc_assert (TREE_CODE (otype) == ARRAY_TYPE ! && POINTER_TYPE_P (ptype) ! && cpt_same_type (TREE_TYPE (otype), dtype)); ! break; ! } break; default: --- 4500,4506 ---- ptype = TREE_TYPE (t); otype = TREE_TYPE (TREE_OPERAND (t, 0)); dtype = TREE_TYPE (ptype); ! gcc_assert (cpt_same_type (otype, dtype)); break; default: Index: gcc/tree-ssa-ccp.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/tree-ssa-ccp.c,v retrieving revision 2.67 diff -c -3 -p -r2.67 tree-ssa-ccp.c *** gcc/tree-ssa-ccp.c 21 Apr 2005 18:05:24 -0000 2.67 --- gcc/tree-ssa-ccp.c 28 Apr 2005 13:24:28 -0000 *************** fold_stmt (tree *stmt_p) *** 2177,2183 **** /* If we couldn't fold the RHS, hand over to the generic fold routines. */ if (result == NULL_TREE) ! result = fold (rhs); /* Strip away useless type conversions. Both the NON_LVALUE_EXPR that may have been added by fold, and "useless" type conversions that might --- 2177,2186 ---- /* If we couldn't fold the RHS, hand over to the generic fold routines. */ if (result == NULL_TREE) ! { ! tree tmp = fold (rhs); ! result = TREE_CONSTANT (tmp) ? tmp : rhs; ! } /* Strip away useless type conversions. Both the NON_LVALUE_EXPR that may have been added by fold, and "useless" type conversions that might Index: gcc/fold-const.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/fold-const.c,v retrieving revision 1.573 diff -c -3 -p -r1.573 fold-const.c *** gcc/fold-const.c 27 Apr 2005 19:38:57 -0000 1.573 --- gcc/fold-const.c 28 Apr 2005 13:31:37 -0000 *************** constant_boolean_node (int value, tree t *** 5416,5471 **** } - /* Return true if expr looks like an ARRAY_REF and set base and - offset to the appropriate trees. If there is no offset, - offset is set to NULL_TREE. */ - - static bool - extract_array_ref (tree expr, tree *base, tree *offset) - { - /* We have to be careful with stripping nops as with the - base type the meaning of the offset can change. */ - tree inner_expr = expr; - STRIP_NOPS (inner_expr); - /* One canonical form is a PLUS_EXPR with the first - argument being an ADDR_EXPR with a possible NOP_EXPR - attached. */ - if (TREE_CODE (expr) == PLUS_EXPR) - { - tree op0 = TREE_OPERAND (expr, 0); - STRIP_NOPS (op0); - if (TREE_CODE (op0) == ADDR_EXPR) - { - *base = TREE_OPERAND (expr, 0); - *offset = TREE_OPERAND (expr, 1); - return true; - } - } - /* Other canonical form is an ADDR_EXPR of an ARRAY_REF, - which we transform into an ADDR_EXPR with appropriate - offset. For other arguments to the ADDR_EXPR we assume - zero offset and as such do not care about the ADDR_EXPR - type and strip possible nops from it. */ - else if (TREE_CODE (inner_expr) == ADDR_EXPR) - { - tree op0 = TREE_OPERAND (inner_expr, 0); - if (TREE_CODE (op0) == ARRAY_REF) - { - *base = build_fold_addr_expr (TREE_OPERAND (op0, 0)); - *offset = TREE_OPERAND (op0, 1); - } - else - { - *base = inner_expr; - *offset = NULL_TREE; - } - return true; - } - - return false; - } - - /* Transform `a + (b ? x : y)' into `b ? (a + x) : (a + y)'. Transform, `a + (x < y)' into `(x < y) ? (a + 1) : (a + 0)'. Here CODE corresponds to the `+', COND to the `(b ? x : y)' or `(x < y)' --- 5416,5421 ---- *************** fold_sign_changed_comparison (enum tree_ *** 6258,6312 **** } /* Tries to replace &a[idx] CODE s * delta with &a[idx CODE delta], if s is ! step of the array. ADDR is the address. MULT is the multiplicative expression. If the function succeeds, the new address expression is returned. Otherwise NULL_TREE is returned. */ static tree ! try_move_mult_to_index (enum tree_code code, tree addr, tree mult) { tree s, delta, step; - tree arg0 = TREE_OPERAND (mult, 0), arg1 = TREE_OPERAND (mult, 1); tree ref = TREE_OPERAND (addr, 0), pref; tree ret, pos; tree itype; ! STRIP_NOPS (arg0); ! STRIP_NOPS (arg1); ! ! if (TREE_CODE (arg0) == INTEGER_CST) { ! s = arg0; ! delta = arg1; } ! else if (TREE_CODE (arg1) == INTEGER_CST) { ! s = arg1; ! delta = arg0; } else ! return NULL_TREE; for (;; ref = TREE_OPERAND (ref, 0)) { if (TREE_CODE (ref) == ARRAY_REF) { step = array_ref_element_size (ref); if (TREE_CODE (step) != INTEGER_CST) continue; ! itype = TREE_TYPE (step); ! /* If the type sizes do not match, we might run into problems ! when one of them would overflow. */ ! if (TYPE_PRECISION (itype) != TYPE_PRECISION (TREE_TYPE (s))) ! continue; ! if (!operand_equal_p (step, fold_convert (itype, s), 0)) ! continue; ! delta = fold_convert (itype, delta); break; } --- 6208,6292 ---- } /* Tries to replace &a[idx] CODE s * delta with &a[idx CODE delta], if s is ! step of the array. Reconstructs s and delta in the case of s * delta ! being an integer constant (and thus already folded). ! ADDR is the address. MULT is the multiplicative expression. If the function succeeds, the new address expression is returned. Otherwise NULL_TREE is returned. */ static tree ! try_move_mult_to_index (enum tree_code code, tree addr, tree op1) { tree s, delta, step; tree ref = TREE_OPERAND (addr, 0), pref; tree ret, pos; tree itype; ! /* Canonicalize op1 into a possibly non-constant delta ! and an INTEGER_CST s. */ ! if (TREE_CODE (op1) == MULT_EXPR) { ! tree arg0 = TREE_OPERAND (op1, 0), arg1 = TREE_OPERAND (op1, 1); ! ! STRIP_NOPS (arg0); ! STRIP_NOPS (arg1); ! ! if (TREE_CODE (arg0) == INTEGER_CST) ! { ! s = arg0; ! delta = arg1; ! } ! else if (TREE_CODE (arg1) == INTEGER_CST) ! { ! s = arg1; ! delta = arg0; ! } ! else ! return NULL_TREE; } ! else if (TREE_CODE (op1) == INTEGER_CST) { ! delta = op1; ! s = NULL_TREE; } else ! { ! delta = op1; ! s = integer_one_node; ! } for (;; ref = TREE_OPERAND (ref, 0)) { if (TREE_CODE (ref) == ARRAY_REF) { step = array_ref_element_size (ref); + itype = TREE_TYPE (step); if (TREE_CODE (step) != INTEGER_CST) continue; ! if (s) ! { ! /* If the type sizes do not match, we might run into problems ! when one of them would overflow. */ ! if (TYPE_PRECISION (itype) != TYPE_PRECISION (TREE_TYPE (s))) ! continue; ! if (!operand_equal_p (step, fold_convert (itype, s), 0)) ! continue; ! } ! else ! { ! /* Try if delta is a multiple of step. */ ! tree mod = int_const_binop (TRUNC_MOD_EXPR, delta, step, 0); ! if (!integer_zerop (mod)) ! continue; ! ! delta = int_const_binop (EXACT_DIV_EXPR, delta, step, 0); ! } ! /*delta = fold_convert (itype, delta);*/ break; } *************** try_move_mult_to_index (enum tree_code c *** 6328,6336 **** pos = TREE_OPERAND (pos, 0); } ! TREE_OPERAND (pos, 1) = fold_build2 (code, itype, ! TREE_OPERAND (pos, 1), ! delta); return build1 (ADDR_EXPR, TREE_TYPE (addr), ret); } --- 6308,6338 ---- pos = TREE_OPERAND (pos, 0); } ! /* fold_convert operands of CODE appropriately. */ ! pref = TREE_OPERAND (pos, 1); ! if (! TREE_TYPE (pref) && TREE_TYPE (delta)) ! { ! itype = TREE_TYPE (delta); ! pref = fold_convert (itype, pref); ! } ! else if (! TREE_TYPE (delta) && TREE_TYPE (pref)) ! { ! itype = TREE_TYPE (pref); ! delta = fold_convert (itype, delta); ! } ! else if (TREE_TYPE (delta) && TREE_TYPE (pref) ! && TREE_TYPE (delta) == TREE_TYPE (pref)) ! ; ! else ! { ! itype = TYPE_DOMAIN (TREE_TYPE (TREE_OPERAND (pos, 0))); ! if (! itype) ! return NULL_TREE; ! pref = fold_convert (itype, pref); ! delta = fold_convert (itype, delta); ! } ! ! TREE_OPERAND (pos, 1) = fold_build2 (code, itype, pref, delta); return build1 (ADDR_EXPR, TREE_TYPE (addr), ret); } *************** fold_binary (enum tree_code code, tree t *** 7440,7454 **** /* Try replacing &a[i1] + c * i2 with &a[i1 + i2], if c is step of the array. Loop optimizer sometimes produce this type of expressions. */ ! if (TREE_CODE (arg0) == ADDR_EXPR ! && TREE_CODE (arg1) == MULT_EXPR) { tem = try_move_mult_to_index (PLUS_EXPR, arg0, arg1); if (tem) return fold_convert (type, fold (tem)); } ! else if (TREE_CODE (arg1) == ADDR_EXPR ! && TREE_CODE (arg0) == MULT_EXPR) { tem = try_move_mult_to_index (PLUS_EXPR, arg1, arg0); if (tem) --- 7442,7454 ---- /* Try replacing &a[i1] + c * i2 with &a[i1 + i2], if c is step of the array. Loop optimizer sometimes produce this type of expressions. */ ! if (TREE_CODE (arg0) == ADDR_EXPR) { tem = try_move_mult_to_index (PLUS_EXPR, arg0, arg1); if (tem) return fold_convert (type, fold (tem)); } ! else if (TREE_CODE (arg1) == ADDR_EXPR) { tem = try_move_mult_to_index (PLUS_EXPR, arg1, arg0); if (tem) *************** fold_binary (enum tree_code code, tree t *** 7866,7873 **** /* Try replacing &a[i1] - c * i2 with &a[i1 - i2], if c is step of the array. Loop optimizer sometimes produce this type of expressions. */ ! if (TREE_CODE (arg0) == ADDR_EXPR ! && TREE_CODE (arg1) == MULT_EXPR) { tem = try_move_mult_to_index (MINUS_EXPR, arg0, arg1); if (tem) --- 7866,7872 ---- /* Try replacing &a[i1] - c * i2 with &a[i1 - i2], if c is step of the array. Loop optimizer sometimes produce this type of expressions. */ ! if (TREE_CODE (arg0) == ADDR_EXPR) { tem = try_move_mult_to_index (MINUS_EXPR, arg0, arg1); if (tem) *************** fold_binary (enum tree_code code, tree t *** 8893,8916 **** /* If this is a comparison of two exprs that look like an ARRAY_REF of the same object, then we can fold this to a comparison of the two offsets. */ ! if (TREE_CODE_CLASS (code) == tcc_comparison) { ! tree base0, offset0, base1, offset1; ! if (extract_array_ref (arg0, &base0, &offset0) ! && extract_array_ref (arg1, &base1, &offset1) ! && operand_equal_p (base0, base1, 0)) { ! if (offset0 == NULL_TREE ! && offset1 == NULL_TREE) ! { ! offset0 = integer_zero_node; ! offset1 = integer_zero_node; } - else if (offset0 == NULL_TREE) - offset0 = build_int_cst (TREE_TYPE (offset1), 0); - else if (offset1 == NULL_TREE) - offset1 = build_int_cst (TREE_TYPE (offset0), 0); if (TREE_TYPE (offset0) == TREE_TYPE (offset1)) return fold_build2 (code, type, offset0, offset1); --- 8892,8921 ---- /* If this is a comparison of two exprs that look like an ARRAY_REF of the same object, then we can fold this to a comparison of the two offsets. */ ! if (TREE_CODE (arg0) == ADDR_EXPR ! && TREE_CODE (arg1) == ADDR_EXPR ! && TREE_CODE (TREE_OPERAND (arg0, 0)) == ARRAY_REF ! && TREE_CODE (TREE_OPERAND (arg1, 0)) == ARRAY_REF) { ! tree aref0 = TREE_OPERAND (arg0, 0); ! tree aref1 = TREE_OPERAND (arg1, 0); ! if (operand_equal_p (TREE_OPERAND (aref0, 0), ! TREE_OPERAND (aref1, 0), 0)) { ! tree offset0 = TREE_OPERAND (aref0, 1); ! tree offset1 = TREE_OPERAND (aref1, 1); ! tree dtype = TYPE_DOMAIN (TREE_TYPE (TREE_OPERAND (aref0, 0))); ! if (integer_zerop (array_ref_element_size (aref0))) ! offset0 = integer_zero_node; ! if (integer_zerop (array_ref_element_size (aref1))) ! offset1 = integer_zero_node; ! if (TREE_TYPE (offset0) != TREE_TYPE (offset1) ! && dtype) ! { ! offset0 = fold_convert (dtype, offset0); ! offset1 = fold_convert (dtype, offset1); } if (TREE_TYPE (offset0) == TREE_TYPE (offset1)) return fold_build2 (code, type, offset0, offset1); *************** fold_indirect_ref_1 (tree t) *** 11310,11316 **** tree sub = t; tree subtype; ! STRIP_NOPS (sub); subtype = TREE_TYPE (sub); if (!POINTER_TYPE_P (subtype)) return NULL_TREE; --- 11315,11321 ---- tree sub = t; tree subtype; ! STRIP_TYPE_NOPS (sub); subtype = TREE_TYPE (sub); if (!POINTER_TYPE_P (subtype)) return NULL_TREE;