Index: loop-iv.c
===================================================================
--- loop-iv.c	(revision 207338)
+++ loop-iv.c	(working copy)
@@ -272,6 +272,41 @@ clear_iv_info (void)
   bivs.empty ();
 }
 
+struct regdef_entry
+{
+  int regno;
+  int bb_i;
+  int insn_from;
+  rtx def;
+};
+
+struct regdef_entry_hasher : typed_free_remove <regdef_entry>
+{
+  typedef regdef_entry value_type;
+  typedef regdef_entry compare_type;
+  static inline hashval_t hash (const value_type *);
+  static inline bool equal (const value_type *, const compare_type *);
+};
+
+/* Returns hash value for regdef RD.  */
+
+inline hashval_t
+regdef_entry_hasher::hash (const value_type *rd)
+{
+  return rd->regno;
+}
+
+/* Compares regdef RD1 and regdef RD2.  */
+
+inline bool
+regdef_entry_hasher::equal (const value_type *rd1, const compare_type *rd2)
+{
+  return rd1->regno == rd2->regno
+    && rd1->bb_i == rd2->bb_i
+    && rd1->insn_from == rd2->insn_from;
+}
+
+static hash_table <regdef_entry_hasher> regdefs;
 
 /* Prepare the data for an induction variable analysis of a LOOP.  */
 
@@ -285,6 +320,7 @@ iv_analysis_loop_init (struct loop *loop
     {
       df_set_flags (DF_EQ_NOTES + DF_DEFER_INSN_RESCAN);
       bivs.create (10);
+      regdefs.create (31);
       clean_slate = false;
     }
   else
@@ -1300,6 +1336,7 @@ iv_analysis_done (void)
       clean_slate = true;
       df_finish_pass (true);
       bivs.dispose ();
+      regdefs.dispose ();
       free (iv_ref_table);
       iv_ref_table = NULL;
       iv_ref_table_size = 0;
@@ -1849,11 +1886,334 @@ eliminate_implied_conditions (enum rtx_c
     eliminate_implied_condition (op, XEXP (elt, 0), head);
 }
 
+static rtx
+simplify_using_context_helper (basic_block bb, rtx expr, rtx from);
+
+static bool
+reg_defined_in_bb (basic_block bb, rtx reg, rtx from, rtx *insnp = NULL)
+{
+  rtx insn = from == NULL_RTX ? BB_END (bb) : from;
+  gcc_assert (from == NULL_RTX || BLOCK_FOR_INSN (from) == bb);
+
+  for (; insn && insn != PREV_INSN (BB_HEAD (bb));
+       insn = PREV_INSN (insn))
+    {
+      if (!INSN_P (insn))
+	continue;
+      if (df_reg_defined (insn, reg))
+	{
+	  if (insnp)
+	    *insnp = insn;
+	  return true;
+	}
+    }
+  return false;
+}
+
+static bool
+reg_defined_in_pred_helper (basic_block bb, rtx reg, bitmap visited)
+{
+  edge e;
+  edge_iterator ei;
+
+  FOR_EACH_EDGE (e, ei, bb->preds)
+    {
+      basic_block pred = e->src;
+
+      if (bitmap_bit_p (visited, pred->index))
+	continue;
+
+      if (dump_file)
+	fprintf (dump_file, ";; looking for definition of register %d in bb %d\n",
+		 REGNO (reg), pred->index);
+
+      bitmap_set_bit (visited, pred->index);
+      if (reg_defined_in_bb (pred, reg, NULL_RTX)
+	  || reg_defined_in_pred_helper (pred, reg, visited))
+	return true;
+    }
+  return false;
+}
+
+/* Returns true if register reg is defined in any block that is a successor
+   of dombb and reaches bb.  */
+static bool
+reg_defined_in_pred (basic_block bb, rtx reg, basic_block dombb)
+{
+  bitmap visited = BITMAP_ALLOC (NULL);
+  gcc_assert (REG_P (reg));
+  gcc_assert (dominated_by_p (CDI_DOMINATORS, bb, dombb));
+
+  bitmap_clear (visited);
+  bitmap_set_bit (visited, bb->index);
+  bitmap_set_bit (visited, dombb->index);
+  return reg_defined_in_pred_helper (bb, reg, visited);
+}
+
+static rtx
+simplify_reg_using_context (basic_block bb, rtx reg, rtx from)
+{
+  rtx insn_def = NULL_RTX, set;
+  bool def_found_in_loop = false;
+  basic_block dom_bb;
+
+  struct regdef_entry *rd;
+  struct regdef_entry *req = XNEW (struct regdef_entry);
+  req->regno = REGNO (reg);
+  req->bb_i = bb->index;
+  req->insn_from = from == NULL_RTX ? -1 : INSN_UID (from);
+  req->def = NULL_RTX;
+
+  gcc_assert (REG_P (reg));
+  gcc_assert (from == NULL_RTX || BLOCK_FOR_INSN (from) == bb);
+
+  if (dump_file)
+    {
+      fprintf (dump_file, ";; simplifying from bb %d register %d",
+	       bb->index, REGNO (reg));
+      if (from == NULL_RTX)
+	fprintf (dump_file, "\n");
+      else
+	fprintf (dump_file, " from insn %d\n", INSN_UID (from));
+    }
+
+  /* Check if we already have the definition requested in our hash.  */
+  if ((rd = regdefs.find (req)))
+    {
+      if (dump_file)
+	{
+	  fprintf (dump_file, ";; found definition in hash:");
+	  print_inline_rtx (dump_file, rd->def, 0);
+	  fprintf (dump_file, "\n");
+	}
+
+      free (req);
+      gcc_assert (rd->def != NULL_RTX);
+      return rd->def;
+    }
+
+  /* Find definition of reg in a dominator of bb.  */
+  for (dom_bb = bb;
+       dom_bb != ENTRY_BLOCK_PTR_FOR_FN (cfun);
+       dom_bb = get_immediate_dominator (CDI_DOMINATORS, dom_bb))
+    {
+      if (dump_file)
+	{
+	  fprintf (dump_file, ";; trying to find definition of reg %d in bb %d",
+		   REGNO (reg), dom_bb->index);
+	  if (from == NULL_RTX || bb != dom_bb)
+	    fprintf (dump_file, "\n");
+	  else
+	    fprintf (dump_file, " from insn %d\n", INSN_UID (from));
+	}
+
+      if (reg_defined_in_bb (dom_bb,
+			     reg,
+			     bb == dom_bb ? from : NULL_RTX,
+			     &insn_def))
+	{
+	  /* If definition is inside a basic block which is a loop header,
+	     we can use the definition but we cannot simplify it further
+	     because whatever values were defined before are changed by the loop.
+	     For example:
+	          i <- 10
+	          r1 <- 4
+	     .L2:
+	          r2 <- r1 + 5
+		  i <- i - 1
+		  goto .L2 until i == 0
+
+	      We can simplify r2 + 10 to r1 + 15, but we can't simplify it further.
+	  */
+	  if (bb_loop_header_p (dom_bb))
+	    {
+	      if (dump_file)
+		fprintf (dump_file, ";; found definition for register %d in loop header, basic block %d\n",
+			 REGNO (reg), dom_bb->index);
+	      def_found_in_loop = true;
+	    }
+
+	  break;
+	}
+    }
+
+  /* Can't find definition.  */
+  if (!insn_def
+      || !(set = single_set (insn_def)))
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; failed to find definition\n");
+
+      return reg;
+    }
+
+  if (dump_file)
+    {
+      fprintf (dump_file, ";; found definition of register %d in bb %d\n",
+	       REGNO (reg), dom_bb->index);
+      print_inline_rtx (dump_file, insn_def, 0);
+      fprintf (dump_file, "\n");
+    }
+
+  /* Now we have to ensure that from all the blocks that reach bb, coming from dom_bb,
+     none redefine reg.  */
+  if (bb != dom_bb
+      && reg_defined_in_pred (bb, reg, dom_bb))
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; cannot simplify register %d because a def. has been found in a pred. of bb %d\n",
+		 REGNO (reg), bb->index);
+      return reg;
+    }
+
+  if (def_found_in_loop)
+    req->def = SET_SRC (set);
+  else
+    req->def = simplify_using_context_helper (dom_bb, SET_SRC (set), PREV_INSN (insn_def));
+
+  /* Save definition found in hash.  */
+  {
+    struct regdef_entry **slot = regdefs.find_slot (req, INSERT);
+    *slot = req;
+  }
+  return req->def;
+}
+
+/* Helper for simplify_using_context.
+   Simplify expr in the context of bb starting lookup from insn from (in bb),
+   return the simplified expression.
+   Returns expr if no simplifications are possible.  */
+static rtx
+simplify_using_context_helper (basic_block bb, rtx expr, rtx from = NULL_RTX)
+{
+  const char *fmt;
+  int i;
+  rtx copy = NULL_RTX;
+  bool optimized_p = false;
+  const enum rtx_code code = GET_CODE (expr);
+
+  if (dump_file)
+    {
+      fprintf (dump_file, ";; simplifying from bb %d the expression:\n",
+	       bb->index);
+      print_inline_rtx (dump_file, expr, 0);
+      fprintf (dump_file, "\n");
+    }
+
+  switch (GET_RTX_CLASS (code))
+    {
+    case RTX_EXTRA:
+    case RTX_CONST_OBJ:
+      /* Can't optimize these any further.  */
+      return expr;
+
+    case RTX_OBJ:
+
+      /* Currently we only simplify registers.  */
+      if (! REG_P (expr))
+	return expr;
+
+      return simplify_reg_using_context	(bb, expr, from);
+      break;
+
+    case RTX_MATCH:
+    case RTX_INSN:
+      /* We should never get these here.  */
+      gcc_unreachable ();
+
+    default:
+
+      copy = copy_rtx (expr);
+      fmt = GET_RTX_FORMAT (code);
+      for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+	if (fmt[i] == 'e')
+	  {
+	    rtx new_expr = simplify_using_context_helper (bb, XEXP (expr, i), from);
+	    rtx simp_expr = simplify_rtx (new_expr);
+	    if (simp_expr)
+	      new_expr = simp_expr;
+
+	    if (new_expr != XEXP (copy, i))
+	      {
+		/* Optimization success.  */
+		XEXP (copy, i) = new_expr;
+		optimized_p = true;
+	      }
+	  }
+
+      if (optimized_p)
+	{
+	  rtx simp_expr;
+
+	  /* Simplify abnormal expressions (not handled by simplify_rtx), like
+	     zero_extend of integers, etc.
+
+	     (zero_extend:M (const_int n)) == (const_int n)
+	     (sign_extend:M (const_int n)) == (const_int n)
+	  */
+	  if ((GET_CODE (copy) == ZERO_EXTEND
+	       || GET_CODE (copy) == SIGN_EXTEND)
+	      && CONST_INT_P (XEXP (copy, 0)))
+	    copy = XEXP (copy, 0);
+
+	  simp_expr = simplify_rtx (copy);
+	  if (simp_expr)
+	    copy = simp_expr;
+
+	  if (dump_file)
+	    {
+	      fprintf (dump_file, ";; simplification success from bb %d:\n",
+		       bb->index);
+	      print_inline_rtx (dump_file, expr, 0);
+	      fprintf (dump_file, "\n;; simplified to\n");
+	      print_inline_rtx (dump_file, copy, 0);
+	      fprintf (dump_file, "\n");
+	    }
+
+	  return copy;
+	}
+      else
+	{
+	  if (dump_file)
+	    {
+	      fprintf (dump_file, ";; simplification failed from bb %d:\n",
+		       bb->index);
+	      print_inline_rtx (dump_file, expr, 0);
+	      fprintf (dump_file, "\n");
+	    }
+	  return expr;
+	}
+    }
+
+  /* Never reach end of function.  */
+  gcc_unreachable ();
+}
+
+/* Simplifies *EXPR using context of LOOP.  *EXPR cannot be a list.  */
+
+static void
+simplify_using_context (struct loop *loop, rtx *expr)
+{
+  if (dump_file)
+    {
+      fprintf (dump_file, ";; simplifying from loop header (loop: %d, bb: %d) the expression:\n",
+	       loop->num,
+	       loop->header->index);
+      print_inline_rtx (dump_file, *expr, 0);
+      fprintf (dump_file, "\n");
+    }
+  *expr = simplify_using_context_helper
+    (get_immediate_dominator (CDI_DOMINATORS, loop->header), *expr);
+}
+
 /* Simplifies *EXPR using initial values at the start of the LOOP.  If *EXPR
    is a list, its elements are assumed to be combined using OP.  */
 
 static void
-simplify_using_initial_values (struct loop *loop, enum rtx_code op, rtx *expr)
+simplify_using_initial_values (struct loop *loop,
+			       enum rtx_code op,
+			       rtx *expr,
+			       bool use_context = false)
 {
   bool expression_valid;
   rtx head, tail, insn, cond_list, last_valid_expr;
@@ -1889,8 +2249,7 @@ simplify_using_initial_values (struct lo
 	default:
 	  gcc_unreachable ();
 	}
-
-      simplify_using_initial_values (loop, UNKNOWN, &head);
+      simplify_using_initial_values (loop, UNKNOWN, &head, use_context);
       if (head == aggr)
 	{
 	  XEXP (*expr, 0) = aggr;
@@ -1900,10 +2259,10 @@ simplify_using_initial_values (struct lo
       else if (head == neutral)
 	{
 	  *expr = tail;
-	  simplify_using_initial_values (loop, op, expr);
+	  simplify_using_initial_values (loop, op, expr, use_context);
 	  return;
 	}
-      simplify_using_initial_values (loop, op, &tail);
+      simplify_using_initial_values (loop, op, &tail, use_context);
 
       if (tail && XEXP (tail, 0) == aggr)
 	{
@@ -2061,7 +2420,11 @@ simplify_using_initial_values (struct lo
       e = single_pred_edge (e->src);
     }
 
+
  out:
+  if (use_context)
+    simplify_using_context (loop, expr);
+
   free_EXPR_LIST_list (&cond_list);
   if (!CONSTANT_P (*expr))
     *expr = last_valid_expr;
@@ -2787,8 +3150,8 @@ iv_number_of_iterations (struct loop *lo
       && XEXP (desc->assumptions, 0) == const0_rtx)
     goto fail;
   simplify_using_initial_values (loop, IOR, &desc->noloop_assumptions);
-  simplify_using_initial_values (loop, IOR, &desc->infinite);
-  simplify_using_initial_values (loop, UNKNOWN, &desc->niter_expr);
+  simplify_using_initial_values (loop, IOR, &desc->infinite, true);
+  simplify_using_initial_values (loop, UNKNOWN, &desc->niter_expr, true);
 
   /* Rerun the simplification.  Consider code (created by copying loop headers)
 
@@ -2810,8 +3173,8 @@ iv_number_of_iterations (struct loop *lo
       && XEXP (desc->assumptions, 0) == const0_rtx)
     goto fail;
   simplify_using_initial_values (loop, IOR, &desc->noloop_assumptions);
-  simplify_using_initial_values (loop, IOR, &desc->infinite);
-  simplify_using_initial_values (loop, UNKNOWN, &desc->niter_expr);
+  simplify_using_initial_values (loop, IOR, &desc->infinite, true);
+  simplify_using_initial_values (loop, UNKNOWN, &desc->niter_expr, true);
 
   if (desc->noloop_assumptions
       && XEXP (desc->noloop_assumptions, 0) == const_true_rtx)
@@ -2844,7 +3207,12 @@ iv_number_of_iterations (struct loop *lo
 	 brings no gain (and if it happens to do, the cse pass will take care
 	 of it anyway).  So prevent this behavior, unless it enabled us to
 	 derive that the number of iterations is a constant.  */
-      desc->niter_expr = old_niter;
+      /* We do the context simplification as well which can be quite
+	 beneficial so we will revert only the simplification if rtx cost is
+	 less than it used to be.  */
+      if (set_src_cost (desc->niter_expr, optimize_loop_for_speed_p (loop)) >=
+	  set_src_cost (old_niter, optimize_loop_for_speed_p (loop)))
+	desc->niter_expr = old_niter;
     }
 
   return;
@@ -2855,7 +3223,7 @@ zero_iter_simplify:
   if (desc->assumptions
       && XEXP (desc->assumptions, 0) == const0_rtx)
     goto fail;
-  simplify_using_initial_values (loop, IOR, &desc->infinite);
+  simplify_using_initial_values (loop, IOR, &desc->infinite, true);
 
   /* Fallthru.  */
 zero_iter:
