Hello,

The attached patch extends the current implementation to analyze
instructions with REG_INC_NOTE.

Tested on ppc64-redhat-linux (bootstrap and regtest) SPU (only regtest)
and arm-linux-gnueabi (bootstrap c and regtest) configured with
--with-arch=armv7-a --with-mode=thumb.

OK for mainline?

Thanks,
Revital

Changelog:

        * modulo-sched.c (record_inc_dec_insn_info,
        free_node_sched_params): New functions.
        (SCHED_FIRST_REG_MOVE, SCHED_NREG_MOVES): Remove.
        (struct regmove_info): New.
        (insn_regmove_info): New field in node_sched_params.
        (print_node_sched_params): Print information for all the
        definitions in the instructions.
        (generate_reg_moves, duplicate_insns_of_cycles,
        set_node_sched_params): Adjust the code to handle instructions
        that have multiple definitions.
        (sms_schedule): Handle loops that contain instructions with
        FIND_REG_INC_NOTE and call free_node_sched_params.
=== modified file 'gcc/modulo-sched.c'
--- gcc/modulo-sched.c  2011-03-27 07:11:08 +0000
+++ gcc/modulo-sched.c  2011-04-17 10:29:24 +0000
@@ -201,32 +201,50 @@ static void duplicate_insns_of_cycles (p
                                       int, int, int, rtx);
 static int calculate_stage_count (partial_schedule_ptr ps);
 
+static int record_inc_dec_insn_info (rtx, rtx, rtx, rtx, rtx, void *);
+
+
 #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
 #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
-#define SCHED_FIRST_REG_MOVE(x) \
-       (((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
-#define SCHED_NREG_MOVES(x) \
-       (((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
 #define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
 #define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
 #define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
 
-/* The scheduling parameters held for each node.  */
-typedef struct node_sched_params
+/* Information about register-move generated for a definition.  */
+struct regmove_info
 {
-  int asap;    /* A lower-bound on the absolute scheduling cycle.  */
-  int time;    /* The absolute scheduling cycle (time >= asap).  */
-
+  /* The definition for which the register-move is generated for.  */
+  rtx def;
+  
   /* The following field (first_reg_move) is a pointer to the first
-     register-move instruction added to handle the modulo-variable-expansion
-     of the register defined by this node.  This register-move copies the
-     original register defined by the node.  */
+     register-move instruction added to handle the
+     modulo-variable-expansion of the register defined by this node.
+     This register-move copies the original register defined by the node.
+  */
   rtx first_reg_move;
-
+  
   /* The number of register-move instructions added, immediately preceding
      first_reg_move.  */
   int nreg_moves;
+  
+  /* Auxiliary info used in the calculation of the register-moves.  */
+  void *aux;
+};
+
+typedef struct regmove_info *regmove_info_ptr;
+DEF_VEC_P (regmove_info_ptr);
+DEF_VEC_ALLOC_P (regmove_info_ptr, heap);
 
+/* The scheduling parameters held for each node.  */
+typedef struct node_sched_params
+{
+  int asap;    /* A lower-bound on the absolute scheduling cycle.  */
+  int time;    /* The absolute scheduling cycle (time >= asap).  */
+  
+  /* Information about register-moves needed for
+     definitions in the instruction.  */
+  VEC (regmove_info_ptr, heap) *insn_regmove_info;
+  
   int row;    /* Holds time % ii.  */
   int stage;  /* Holds time / ii.  */
 
@@ -423,12 +441,58 @@ set_node_sched_params (ddg_ptr g)
      appropriate sched_params structure.  */
   for (i = 0; i < g->num_nodes; i++)
     {
+      rtx insn = g->nodes[i].insn;
+      rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
+      rtx set = single_set (insn);
+      
       /* Watch out for aliasing problems?  */
       node_sched_params[i].asap = g->nodes[i].aux.count;
+      node_sched_params[i].insn_regmove_info = NULL;
+      
+      /* Record the definition(s) in the instruction.  These will be
+        later used to calculate the register-moves needed for each
+        definition. */
+      if (set && REG_P (SET_DEST (set)))
+       { 
+         regmove_info_ptr elt = 
+           (regmove_info_ptr) xcalloc (1, sizeof (struct regmove_info));
+         
+         elt->def = SET_DEST (set);
+         VEC_safe_push (regmove_info_ptr, heap, 
+                        node_sched_params[i].insn_regmove_info,
+                        elt);
+       }
+      
+      if (note)
+       for_each_inc_dec (&insn, record_inc_dec_insn_info, 
+                         &node_sched_params[i]);
+      
       g->nodes[i].aux.info = &node_sched_params[i];
     }
 }
 
+/* Free the sched_params information allocated for each node.  */
+static void
+free_node_sched_params (ddg_ptr g)
+{
+  int i;
+  regmove_info_ptr def;
+
+  for (i = 0; i < g->num_nodes; i++)
+    {
+      int j;
+      VEC (regmove_info_ptr, heap) *rinfo =
+       node_sched_params[i].insn_regmove_info;
+      
+      for (j = 0; VEC_iterate (regmove_info_ptr, rinfo, j, def); j++)
+       free (def);
+      
+      VEC_free (regmove_info_ptr, heap, rinfo);
+    }
+  
+  free (node_sched_params);
+}
+
 static void
 print_node_sched_params (FILE *file, int num_nodes, ddg_ptr g)
 {
@@ -438,20 +502,32 @@ print_node_sched_params (FILE *file, int
     return;
   for (i = 0; i < num_nodes; i++)
     {
+      int k;
       node_sched_params_ptr nsp = &node_sched_params[i];
-      rtx reg_move = nsp->first_reg_move;
-      int j;
-
+      regmove_info_ptr def;
+      VEC (regmove_info_ptr, heap) *rinfo =
+       nsp->insn_regmove_info;
+      
       fprintf (file, "Node = %d; INSN = %d\n", i,
               (INSN_UID (g->nodes[i].insn)));
       fprintf (file, " asap = %d:\n", nsp->asap);
       fprintf (file, " time = %d:\n", nsp->time);
-      fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves);
-      for (j = 0; j < nsp->nreg_moves; j++)
-       {
-         fprintf (file, " reg_move = ");
-         print_rtl_single (file, reg_move);
-         reg_move = PREV_INSN (reg_move);
+
+      /* Iterate over the definitions in the instruction printing the
+         reg-moves needed definition for each definition. */
+      for (k = 0; VEC_iterate (regmove_info_ptr, rinfo, k, def); k++)
+       {
+         rtx reg_move = def->first_reg_move;
+         int j;
+         fprintf (file, "def:\n"); 
+         print_rtl_single (file, def->def);
+         fprintf (file, " nreg_moves = %d\n", def->nreg_moves);
+         for (j = 0; j < def->nreg_moves; j++)
+           {
+             fprintf (file, " reg_move = ");
+             print_rtl_single (file, reg_move);
+             reg_move = PREV_INSN (reg_move);
+           }
        }
     }
 }
@@ -472,25 +548,28 @@ generate_reg_moves (partial_schedule_ptr
 {
   ddg_ptr g = ps->g;
   int ii = ps->ii;
-  int i;
+  int i, j;
   struct undo_replace_buff_elem *reg_move_replaces = NULL;
 
   for (i = 0; i < g->num_nodes; i++)
     {
       ddg_node_ptr u = &g->nodes[i];
       ddg_edge_ptr e;
-      int nreg_moves = 0, i_reg_move;
-      sbitmap *uses_of_defs;
+      int i_reg_move;
       rtx last_reg_move;
       rtx prev_reg, old_reg;
-
+      bool need_reg_moves_p = false;
+      VEC (regmove_info_ptr, heap) *rinfo = 
+       node_sched_params[i].insn_regmove_info;
+      regmove_info_ptr def;
+      
       /* Compute the number of reg_moves needed for u, by looking at life
         ranges started at u (excluding self-loops).  */
       for (e = u->out; e; e = e->next_out)
        if (e->type == TRUE_DEP && e->dest != e->src)
          {
            int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / 
ii;
-
+           
             if (e->distance == 1)
               nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) 
/ ii;
 
@@ -499,19 +578,42 @@ generate_reg_moves (partial_schedule_ptr
            if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
                && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
              nreg_moves4e--;
-
-           nreg_moves = MAX (nreg_moves, nreg_moves4e);
+           
+           /* Iterate over the definitions in the instruction and record
+              the information about reg-moves needed for each one.  */
+           for (j = 0; VEC_iterate (regmove_info_ptr, rinfo, j, def); j++)
+             {
+               if (rtx_referenced_p (def->def, e->dest->insn)) 
+                 {
+                   rtx set = single_set (e->dest->insn);
+                   
+                   /* Check that the TRUE_DEP edge belongs to the current
+                      definition.  */
+                   if (set && REG_P (SET_DEST (set)) 
+                       && (SET_DEST (set) == def->def))
+                     continue;
+                   
+                   def->nreg_moves = MAX (def->nreg_moves, nreg_moves4e);
+                   if (def->nreg_moves != 0)
+                     need_reg_moves_p = true;
+                 }
+             }
          }
-
-      if (nreg_moves == 0)
+      
+      if (!need_reg_moves_p)
        continue;
-
-      /* Every use of the register defined by node may require a different
-        copy of this register, depending on the time the use is scheduled.
-        Set a bitmap vector, telling which nodes use each copy of this
-        register.  */
-      uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes);
-      sbitmap_vector_zero (uses_of_defs, nreg_moves);
+      
+      for (j = 0; VEC_iterate (regmove_info_ptr, rinfo, j, def); j++)
+       {
+         def->aux = 
+           (sbitmap *) sbitmap_vector_alloc (def->nreg_moves, g->num_nodes);
+         
+         /* Every use of the register defined by node may require a different
+            copy of this register, depending on the time the use is scheduled.
+            Set a bitmap vector, telling which nodes use each copy of this
+            register.  */
+         sbitmap_vector_zero ((sbitmap *)def->aux, def->nreg_moves);
+       }
       for (e = u->out; e; e = e->next_out)
        if (e->type == TRUE_DEP && e->dest != e->src)
          {
@@ -525,55 +627,79 @@ generate_reg_moves (partial_schedule_ptr
              dest_copy--;
 
            if (dest_copy)
-             SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid);
+             {
+               /* Iterate over the definitions in the instruction and record
+                  the information about reg-moves needed for each one.  */
+               for (j = 0; VEC_iterate (regmove_info_ptr, rinfo, j, def); j++)
+                 {
+                   sbitmap *uses_of_def = (sbitmap *)def->aux;
+                   
+                   if (rtx_referenced_p (def->def, e->dest->insn))
+                     {
+                       rtx set = single_set (e->dest->insn);
+                       
+                       /* Check that the TRUE_DEP edge belongs to the current
+                          definition.  */
+                       if (set && REG_P (SET_DEST (set)) 
+                           && (SET_DEST (set) == def->def))
+                         continue;
+                       
+                       SET_BIT (uses_of_def[dest_copy - 1], e->dest->cuid);
+                     }
+                 }
+             }
          }
 
       /* Now generate the reg_moves, attaching relevant uses to them.  */
-      SCHED_NREG_MOVES (u) = nreg_moves;
-      old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
-      /* Insert the reg-moves right before the notes which precede
-         the insn they relates to.  */
-      last_reg_move = u->first_note;
-
-      for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
-       {
-         unsigned int i_use = 0;
-         rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
-         rtx reg_move = gen_move_insn (new_reg, prev_reg);
-         sbitmap_iterator sbi;
-
-         add_insn_before (reg_move, last_reg_move, NULL);
-         last_reg_move = reg_move;
-
-         if (!SCHED_FIRST_REG_MOVE (u))
-           SCHED_FIRST_REG_MOVE (u) = reg_move;
-
-         EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi)
+      for (j = 0; VEC_iterate (regmove_info_ptr, rinfo, j, def); j++)
+       {
+         sbitmap *uses_of_def = (sbitmap *)def->aux;
+         old_reg = prev_reg = copy_rtx (def->def);
+         
+         /* Insert the reg-moves right before the notes which precede
+            the insn they relates to.  */
+         last_reg_move = u->first_note;
+         
+         for (i_reg_move = 0; i_reg_move < def->nreg_moves; i_reg_move++)
            {
-             struct undo_replace_buff_elem *rep;
+             unsigned int i_use = 0;
+             rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
+             rtx reg_move = gen_move_insn (new_reg, prev_reg);
+             sbitmap_iterator sbi;
+             
+             add_insn_before (reg_move, last_reg_move, NULL);
+             last_reg_move = reg_move;
+             
+             if (!def->first_reg_move)
+               def->first_reg_move = reg_move;
 
-             rep = (struct undo_replace_buff_elem *)
-                   xcalloc (1, sizeof (struct undo_replace_buff_elem));
-             rep->insn = g->nodes[i_use].insn;
-             rep->orig_reg = old_reg;
-             rep->new_reg = new_reg;
-
-             if (! reg_move_replaces)
-               reg_move_replaces = rep;
-             else
+             EXECUTE_IF_SET_IN_SBITMAP (uses_of_def[i_reg_move], 0, i_use, sbi)
                {
-                 rep->next = reg_move_replaces;
-                 reg_move_replaces = rep;
+                 struct undo_replace_buff_elem *rep;
+                 
+                 rep = (struct undo_replace_buff_elem *)
+                   xcalloc (1, sizeof (struct undo_replace_buff_elem));
+                 rep->insn = g->nodes[i_use].insn;
+                 rep->orig_reg = old_reg;
+                 rep->new_reg = new_reg;
+                 
+                 if (! reg_move_replaces)
+                   reg_move_replaces = rep;
+                 else
+                   {
+                     rep->next = reg_move_replaces;
+                     reg_move_replaces = rep;
+                   }
+                 
+                 replace_rtx (g->nodes[i_use].insn, old_reg, new_reg);
+                 if (rescan)
+                   df_insn_rescan (g->nodes[i_use].insn);
                }
-
-             replace_rtx (g->nodes[i_use].insn, old_reg, new_reg);
-             if (rescan)
-               df_insn_rescan (g->nodes[i_use].insn);
+             
+             prev_reg = new_reg;
            }
-
-         prev_reg = new_reg;
+         sbitmap_vector_free (uses_of_def);
        }
-      sbitmap_vector_free (uses_of_defs);
     }
   return reg_move_replaces;
 }
@@ -689,7 +815,7 @@ duplicate_insns_of_cycles (partial_sched
     for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
       {
        ddg_node_ptr u_node = ps_ij->node;
-       int j, i_reg_moves;
+       int i_reg_moves;
        rtx reg_move = NULL_RTX;
 
         /* Do not duplicate any insn which refers to count_reg as it
@@ -704,43 +830,68 @@ duplicate_insns_of_cycles (partial_sched
 
        if (for_prolog)
          {
-           /* SCHED_STAGE (u_node) >= from_stage == 0.  Generate increasing
-              number of reg_moves starting with the second occurrence of
-              u_node, which is generated if its SCHED_STAGE <= to_stage.  */
-           i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1;
-           i_reg_moves = MAX (i_reg_moves, 0);
-           i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
-
-           /* The reg_moves start from the *first* reg_move backwards.  */
-           if (i_reg_moves)
+           int i;
+           VEC (regmove_info_ptr, heap) *rinfo =
+             node_sched_params[u_node->cuid].insn_regmove_info;
+           regmove_info_ptr def;
+           
+           for (i = 0; VEC_iterate (regmove_info_ptr, rinfo, i, def); i++)
              {
-               reg_move = SCHED_FIRST_REG_MOVE (u_node);
-               for (j = 1; j < i_reg_moves; j++)
-                 reg_move = PREV_INSN (reg_move);
+               int j;
+               
+               /* SCHED_STAGE (u_node) >= from_stage == 0.  Generate increasing
+                  number of reg_moves starting with the second occurrence of
+                  u_node, which is generated if its SCHED_STAGE <= to_stage.  
*/
+               i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1;
+               i_reg_moves = MAX (i_reg_moves, 0);
+               i_reg_moves = MIN (i_reg_moves, def->nreg_moves);
+               
+               /* The reg_moves start from the *first* reg_move backwards.  */
+               if (i_reg_moves)
+                 {
+                   reg_move = def->first_reg_move;
+                   for (j = 1; j < i_reg_moves; j++)
+                     reg_move = PREV_INSN (reg_move);
+                 }
+               
+               for (j = 0; j < i_reg_moves; 
+                    j++, reg_move = NEXT_INSN (reg_move))
+                 emit_insn (copy_rtx (PATTERN (reg_move)));
              }
          }
        else /* It's for the epilog.  */
          {
-           /* SCHED_STAGE (u_node) <= to_stage.  Generate all reg_moves,
-              starting to decrease one stage after u_node no longer occurs;
-              that is, generate all reg_moves until
-              SCHED_STAGE (u_node) == from_stage - 1.  */
-           i_reg_moves = SCHED_NREG_MOVES (u_node)
-                      - (from_stage - SCHED_STAGE (u_node) - 1);
-           i_reg_moves = MAX (i_reg_moves, 0);
-           i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
-
-           /* The reg_moves start from the *last* reg_move forwards.  */
-           if (i_reg_moves)
+           int i;
+           VEC (regmove_info_ptr, heap) *rinfo =
+             node_sched_params[u_node->cuid].insn_regmove_info;
+           regmove_info_ptr def;
+           
+           for (i = 0; VEC_iterate (regmove_info_ptr, rinfo, i, def); i++)
              {
-               reg_move = SCHED_FIRST_REG_MOVE (u_node);
-               for (j = 1; j < SCHED_NREG_MOVES (u_node); j++)
-                 reg_move = PREV_INSN (reg_move);
+               int j;
+               
+               /* SCHED_STAGE (u_node) <= to_stage.  Generate all reg_moves,
+                  starting to decrease one stage after u_node no longer occurs;
+                  that is, generate all reg_moves until
+                  SCHED_STAGE (u_node) == from_stage - 1.  */
+               i_reg_moves = def->nreg_moves
+                 - (from_stage - SCHED_STAGE (u_node) - 1);
+               i_reg_moves = MAX (i_reg_moves, 0);
+               i_reg_moves = MIN (i_reg_moves, def->nreg_moves);
+               
+               /* The reg_moves start from the *last* reg_move forwards.  */
+               if (i_reg_moves)
+                 {
+                   reg_move = def->first_reg_move;
+                   for (j = 1; j < def->nreg_moves; j++)
+                     reg_move = PREV_INSN (reg_move);
+                 }
+               
+               for (j = 0; j < i_reg_moves; 
+                    j++, reg_move = NEXT_INSN (reg_move))
+                 emit_insn (copy_rtx (PATTERN (reg_move)));
              }
          }
-
-       for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move))
-         emit_insn (copy_rtx (PATTERN (reg_move)));
        if (SCHED_STAGE (u_node) >= from_stage
            && SCHED_STAGE (u_node) <= to_stage)
          duplicate_insn_chain (u_node->first_note, u_node->insn);
@@ -929,6 +1080,25 @@ setup_sched_infos (void)
 /* Used to calculate the upper bound of ii.  */
 #define MAXII_FACTOR 2
 
+/* Callback for for_each_inc_dec.  Records in ARG the register DEST
+   which is defined by the auto operation.  */
+static int
+record_inc_dec_insn_info (rtx mem ATTRIBUTE_UNUSED,
+                         rtx op ATTRIBUTE_UNUSED,
+                         rtx dest, rtx src ATTRIBUTE_UNUSED,
+                         rtx srcoff ATTRIBUTE_UNUSED, void *arg)
+{
+  node_sched_params_ptr params = (node_sched_params_ptr) arg;
+  regmove_info_ptr insn_regmove_info =
+    (regmove_info_ptr) xcalloc (1, sizeof (struct regmove_info));
+
+  insn_regmove_info->def = copy_rtx (dest);
+  VEC_safe_push (regmove_info_ptr, heap, params->insn_regmove_info,
+                insn_regmove_info);
+
+  return -1;
+}
+
 /* Main entry point, perform SMS scheduling on the loops of the function
    that consist of single basic blocks.  */
 static void
@@ -1062,12 +1232,10 @@ sms_schedule (void)
        continue;
       }
 
-      /* Don't handle BBs with calls or barriers or auto-increment insns 
-        (to avoid creating invalid reg-moves for the auto-increment insns),
+      /* Don't handle BBs with calls or barriers
         or !single_set with the exception of instructions that include
         count_reg---these instructions are part of the control part
         that do-loop recognizes.
-         ??? Should handle auto-increment insns.
          ??? Should handle insns defining subregs.  */
      for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
       {
@@ -1078,7 +1246,6 @@ sms_schedule (void)
             || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
                 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
                 && !reg_mentioned_p (count_reg, insn))
-            || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
             || (INSN_P (insn) && (set = single_set (insn))
                 && GET_CODE (SET_DEST (set)) == SUBREG))
         break;
@@ -1092,8 +1259,6 @@ sms_schedule (void)
                fprintf (dump_file, "SMS loop-with-call\n");
              else if (BARRIER_P (insn))
                fprintf (dump_file, "SMS loop-with-barrier\n");
-              else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
-                fprintf (dump_file, "SMS reg inc\n");
               else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
                 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
                 fprintf (dump_file, "SMS loop-with-not-single-set\n");
@@ -1312,7 +1477,7 @@ sms_schedule (void)
        }
 
       free_partial_schedule (ps);
-      free (node_sched_params);
+      free_node_sched_params (g);
       free (node_order);
       free_ddg (g);
     }

Reply via email to