> The problem here seems to be that the FE (or the GIMPLE generation)
> transforms
>
>         addr + ((on_off-1)*2)
>
> into
>
>         addr + ((1-on_off)*-2)
>
> and nothing in -Os has the wit to recover.  This badly affects targets
> with no hardware multiplier, which end up calling libgcc to do the
> multiply.
>
> Is this a well-known problem?

We very recently ran into it, although it's quite old.  It comes from:

2005-11-19  Richard Guenther  <rguent...@suse.de>

        PR middle-end/23294
        * fold-const.c (fold_plusminus_mult_expr): New function.
        (fold_binary): Use to canonicalize PLUS_EXPR and MINUS_EXPR
        cases, remove now unnecessary code.

which has changed the canonicalization of addresses as well, without anyone 
apparently noticing.  In particular, 

  [...]
   We also canonicalize (X + 7) * 4 into X * 4 + 28 in the hope that either
   the machine has a multiply-accumulate insn or that this is part of an
   addressing calculation.
  [...]

static tree
extract_muldiv (tree t, tree c, enum tree_code code, tree wide_type,
                bool *strict_overflow_p)

is now wrong since it's undone by fold_plusminus_mult_expr.


I've attached the patch (against the 4.3 branch) we use locally.


        * fold-const.c (extract_muldiv): Remove obsolete comment.
        (fold_plusminus_mult_expr): Use only positive power-of-two factors.
        * expr.c (get_inner_reference): Canonicalize offset.


-- 
Eric Botcazou
*** gcc/fold-const.c.0	2008-12-11 10:47:40.000000000 +0100
--- gcc/fold-const.c	2008-12-11 20:08:18.000000000 +0100
*************** optimize_minmax_comparison (enum tree_co
*** 6010,6019 ****
     expression would not overflow or that overflow is undefined for the type
     in the language in question.
  
-    We also canonicalize (X + 7) * 4 into X * 4 + 28 in the hope that either
-    the machine has a multiply-accumulate insn or that this is part of an
-    addressing calculation.
- 
     If we return a non-null expression, it is an equivalent form of the
     original computation, but need not be in the original type.
  
--- 6010,6015 ----
*************** fold_plusminus_mult_expr (enum tree_code
*** 7459,7497 ****
    else if (operand_equal_p (arg01, arg10, 0))
      same = arg01, alt0 = arg00, alt1 = arg11;
  
!   /* No identical multiplicands; see if we can find a common
!      power-of-two factor in non-power-of-two multiplies.  This
!      can help in multi-dimensional array access.  */
!   else if (host_integerp (arg01, 0)
! 	   && host_integerp (arg11, 0))
!     {
!       HOST_WIDE_INT int01, int11, tmp;
!       bool swap = false;
!       tree maybe_same;
!       int01 = TREE_INT_CST_LOW (arg01);
!       int11 = TREE_INT_CST_LOW (arg11);
  
        /* Move min of absolute values to int11.  */
!       if ((int01 >= 0 ? int01 : -int01)
! 	  < (int11 >= 0 ? int11 : -int11))
!         {
  	  tmp = int01, int01 = int11, int11 = tmp;
! 	  alt0 = arg00, arg00 = arg10, arg10 = alt0;
! 	  maybe_same = arg01;
  	  swap = true;
  	}
        else
! 	maybe_same = arg11;
  
!       if (exact_log2 (abs (int11)) > 0 && int01 % int11 == 0)
!         {
  	  alt0 = fold_build2 (MULT_EXPR, TREE_TYPE (arg00), arg00,
  			      build_int_cst (TREE_TYPE (arg00),
! 					     int01 / int11));
! 	  alt1 = arg10;
! 	  same = maybe_same;
  	  if (swap)
! 	    maybe_same = alt0, alt0 = alt1, alt1 = maybe_same;
  	}
      }
  
--- 7455,7497 ----
    else if (operand_equal_p (arg01, arg10, 0))
      same = arg01, alt0 = arg00, alt1 = arg11;
  
!   /* No identical multiplicands; see if we can find a common positive
!      power-of-two factor.  This can help in multi-dim array accesses.  */
!   else if (host_integerp (arg01, 0) && host_integerp (arg11, 0))
!     {
!       HOST_WIDE_INT int01 = TREE_INT_CST_LOW (arg01);
!       HOST_WIDE_INT int11 = TREE_INT_CST_LOW (arg11);
!       HOST_WIDE_INT factor, tmp;
!       tree tmp_tree;
!       bool swap;
  
        /* Move min of absolute values to int11.  */
!       if ((int01 >= 0 ? int01 : -int01) < (int11 >= 0 ? int11 : -int11))
! 	{
  	  tmp = int01, int01 = int11, int11 = tmp;
! 	  tmp_tree = arg00, arg00 = arg10, arg10 = tmp_tree;
  	  swap = true;
  	}
        else
! 	swap = false;
  
!       if (int11 < 0)
! 	factor = -int11;
!       else
! 	factor = int11;
! 
!       if (exact_log2 (factor) > 0 && (int01 % factor) == 0)
! 	{
  	  alt0 = fold_build2 (MULT_EXPR, TREE_TYPE (arg00), arg00,
  			      build_int_cst (TREE_TYPE (arg00),
! 					     int01 / factor));
! 	  if (int11 < 0)
! 	    alt1 = fold_build1 (NEGATE_EXPR, TREE_TYPE (arg10), arg10);
! 	  else
! 	    alt1 = arg10;
  	  if (swap)
! 	    tmp_tree = alt0, alt0 = alt1, alt1 = tmp_tree;
! 	  same = build_int_cst (TREE_TYPE (arg10), factor);
  	}
      }
  
*** gcc/expr.c.0	2008-12-11 10:54:49.000000000 +0100
--- gcc/expr.c	2008-12-11 20:29:28.000000000 +0100
*************** get_inner_reference (tree exp, HOST_WIDE
*** 6042,6047 ****
--- 6042,6066 ----
      {
        *pbitpos = tree_low_cst (bit_offset, 0);
        *poffset = offset;
+ 
+       /* The middle-end used to canonicalize (X + 7) * 4 into X * 4 + 28 at
+ 	 the tree level, but that is no longer the case.  However the latter
+ 	 form is still the preferred one in addressing calculations.  */
+       if (TREE_CODE (offset) == MULT_EXPR
+ 	  && TREE_CODE (TREE_OPERAND (offset, 1)) == INTEGER_CST)
+ 	{
+ 	  tree op0 = TREE_OPERAND (offset, 0), disp;
+ 	  enum tree_code code = TREE_CODE (op0);
+ 	  if ((code == PLUS_EXPR || code == MINUS_EXPR)
+ 	      && TREE_CODE (TREE_OPERAND (op0, 1)) == INTEGER_CST
+ 	      && (disp = int_const_binop (MULT_EXPR, TREE_OPERAND (op0, 1),
+ 					  TREE_OPERAND (offset, 1), 0))
+ 	      && !TREE_OVERFLOW (disp))
+ 	    *poffset = build2 (code, TREE_TYPE (offset),
+ 			       size_binop (MULT_EXPR, TREE_OPERAND (op0, 0),
+ 					   TREE_OPERAND (offset, 1)),
+ 			       disp);
+ 	}
      }
  
    /* We can use BLKmode for a byte-aligned BLKmode bitfield.  */

Reply via email to