PING?

> 
> Hi Sterling,
> 
>     I made some improvement to the patch. Two changes:
>     1. TARGET_LOOPS is now used as a condition of the doloop related
> patterns, which is more elegant.
>     2. As the trip count register of the zero-cost loop maybe potentially 
> spilled,
> we need to change the patterns in order to handle this issue. The solution is
> similar to that adapted by c6x backend.
> Just turn the zero-cost loop into a regular loop when that happens when reload
> is completed.
>     Attached please find version 4 of the patch. Make check regression tested
> with xtensa-elf-gcc/simulator.
>     OK for trunk?
> 
> Index: gcc/ChangeLog
> ================================================================
> ===
> --- gcc/ChangeLog    (revision 216079)
> +++ gcc/ChangeLog    (working copy)
> @@ -1,3 +1,20 @@
> +2014-10-10  Felix Yang  <felix.y...@huawei.com>
> +
> +    * config/xtensa/xtensa.h (TARGET_LOOPS): New Macro.
> +    * config/xtensa/xtensa.c (xtensa_reorg): New.
> +    (xtensa_reorg_loops): New.
> +    (xtensa_can_use_doloop_p): New.
> +    (xtensa_invalid_within_doloop): New.
> +    (hwloop_optimize): New.
> +    (hwloop_fail): New.
> +    (hwloop_pattern_reg): New.
> +    (xtensa_emit_loop_end): Modified to emit the zero-overhead loop end
> label.
> +    (xtensa_doloop_hooks): Define.
> +    * config/xtensa/xtensa.md (doloop_end): New.
> +    (loop_end): New
> +    (zero_cost_loop_start): Rewritten.
> +    (zero_cost_loop_end): Rewritten.
> +
>  2014-10-10  Kyrylo Tkachov  <kyrylo.tkac...@arm.com>
> 
>      * configure.ac: Add --enable-fix-cortex-a53-835769 option.
> Index: gcc/config/xtensa/xtensa.md
> ================================================================
> ===
> --- gcc/config/xtensa/xtensa.md    (revision 216079)
> +++ gcc/config/xtensa/xtensa.md    (working copy)
> @@ -35,6 +35,8 @@
>    (UNSPEC_TLS_CALL    9)
>    (UNSPEC_TP        10)
>    (UNSPEC_MEMW        11)
> +  (UNSPEC_LSETUP_START  12)
> +  (UNSPEC_LSETUP_END    13)
> 
>    (UNSPECV_SET_FP    1)
>    (UNSPECV_ENTRY    2)
> @@ -1289,41 +1291,120 @@
>     (set_attr "length"    "3")])
> 
> 
> +;; Zero-overhead looping support.
> +
>  ;; Define the loop insns used by bct optimization to represent the -;; start 
> and
> end of a zero-overhead loop (in loop.c).  This start -;; template generates 
> the
> loop insn; the end template doesn't generate -;; any instructions since loop 
> end
> is handled in hardware.
> +;; start and end of a zero-overhead loop.  This start template
> +generates ;; the loop insn; the end template doesn't generate any
> +instructions since ;; loop end is handled in hardware.
> 
>  (define_insn "zero_cost_loop_start"
>    [(set (pc)
> -    (if_then_else (eq (match_operand:SI 0 "register_operand" "a")
> -              (const_int 0))
> -              (label_ref (match_operand 1 "" ""))
> -              (pc)))
> -   (set (reg:SI 19)
> -    (plus:SI (match_dup 0) (const_int -1)))]
> -  ""
> -  "loopnez\t%0, %l1"
> +        (if_then_else (ne (match_operand:SI 0 "register_operand" "2")
> +                          (const_int 1))
> +                      (label_ref (match_operand 1 "" ""))
> +                      (pc)))
> +   (set (match_operand:SI 2 "register_operand" "=a")
> +        (plus (match_dup 0)
> +              (const_int -1)))
> +   (unspec [(const_int 0)] UNSPEC_LSETUP_START)]  "TARGET_LOOPS &&
> + optimize"
> +  "loop\t%0, %l1_LEND"
>    [(set_attr "type"    "jump")
>     (set_attr "mode"    "none")
>     (set_attr "length"    "3")])
> 
>  (define_insn "zero_cost_loop_end"
>    [(set (pc)
> -    (if_then_else (ne (reg:SI 19) (const_int 0))
> -              (label_ref (match_operand 0 "" ""))
> -              (pc)))
> -   (set (reg:SI 19)
> -    (plus:SI (reg:SI 19) (const_int -1)))]
> -  ""
> +        (if_then_else (ne (match_operand:SI 0 "nonimmediate_operand"
> "2,2")
> +                          (const_int 1))
> +                      (label_ref (match_operand 1 "" ""))
> +                      (pc)))
> +   (set (match_operand:SI 2 "nonimmediate_operand" "=a,m")
> +        (plus (match_dup 0)
> +              (const_int -1)))
> +   (unspec [(const_int 0)] UNSPEC_LSETUP_END)
> +   (clobber (match_scratch:SI 3 "=X,&r"))]  "TARGET_LOOPS && optimize"
> +  "#"
> +  [(set_attr "type"    "jump")
> +   (set_attr "mode"    "none")
> +   (set_attr "length"    "0")])
> +
> +(define_insn "loop_end"
> +  [(set (pc)
> +        (if_then_else (ne (match_operand:SI 0 "register_operand" "2")
> +                          (const_int 1))
> +                      (label_ref (match_operand 1 "" ""))
> +                      (pc)))
> +   (set (match_operand:SI 2 "register_operand" "=a")
> +        (plus (match_dup 0)
> +              (const_int -1)))
> +   (unspec [(const_int 0)] UNSPEC_LSETUP_END)]
> +  "TARGET_LOOPS && optimize"
>  {
> -    xtensa_emit_loop_end (insn, operands);
> -    return "";
> +  xtensa_emit_loop_end (insn, operands);  return "";
>  }
>    [(set_attr "type"    "jump")
>     (set_attr "mode"    "none")
>     (set_attr "length"    "0")])
> 
> +(define_split
> +  [(set (pc)
> +        (if_then_else (ne (match_operand:SI 0 "nonimmediate_operand" "")
> +                          (const_int 1))
> +                      (label_ref (match_operand 1 "" ""))
> +                      (pc)))
> +   (set (match_operand:SI 2 "nonimmediate_operand" "")
> +        (plus:SI (match_dup 0)
> +                 (const_int -1)))
> +   (unspec [(const_int 0)] UNSPEC_LSETUP_END)
> +   (clobber (match_scratch 3))]
> +  "TARGET_LOOPS && optimize && reload_completed"
> +  [(const_int 0)]
> +{
> +  if (!REG_P (operands[0]))
> +    {
> +      rtx test;
> +
> +      /* Fallback into a normal conditional branch insn.  */
> +      emit_move_insn (operands[3], operands[0]);
> +      emit_insn (gen_addsi3 (operands[3], operands[3], constm1_rtx));
> +      emit_move_insn (operands[0], operands[3]);
> +      test = gen_rtx_NE (VOIDmode, operands[3], const0_rtx);
> +      emit_jump_insn (gen_cbranchsi4 (test, operands[3],
> +                                      const0_rtx, operands[1]));
> +    }
> +  else
> +    {
> +      emit_jump_insn (gen_loop_end (operands[0], operands[1],
> operands[2]));
> +    }
> +
> +  DONE;
> +})
> +
> +; operand 0 is the loop count pseudo register ; operand 1 is the label
> +to jump to at the top of the loop (define_expand "doloop_end"
> +  [(parallel [(set (pc) (if_then_else
> +                          (ne (match_operand:SI 0 "" "")
> +                              (const_int 1))
> +                          (label_ref (match_operand 1 "" ""))
> +                          (pc)))
> +              (set (match_dup 0)
> +                   (plus:SI (match_dup 0)
> +                            (const_int -1)))
> +              (unspec [(const_int 0)] UNSPEC_LSETUP_END)
> +              (clobber (match_dup 2))])] ; match_scratch
> +  "TARGET_LOOPS && optimize"
> +{
> +  /* The loop optimizer doesn't check the predicates... */
> +  if (GET_MODE (operands[0]) != SImode)
> +    FAIL;
> +  operands[2] = gen_rtx_SCRATCH (SImode);
> +})
> +
> 
>  ;; Setting a register from a comparison.
> 
> Index: gcc/config/xtensa/xtensa.c
> ================================================================
> ===
> --- gcc/config/xtensa/xtensa.c    (revision 216079)
> +++ gcc/config/xtensa/xtensa.c    (working copy)
> @@ -61,6 +61,8 @@ along with GCC; see the file COPYING3.  If not see
> #include "gimplify.h"
>  #include "df.h"
>  #include "builtins.h"
> +#include "dumpfile.h"
> +#include "hw-doloop.h"
> 
> 
>  /* Enumeration for all of the relational tests, so that we can build @@ 
> -186,6
> +188,10 @@ static reg_class_t xtensa_secondary_reload (bool,
> 
>  static bool constantpool_address_p (const_rtx addr);  static bool
> xtensa_legitimate_constant_p (enum machine_mode, rtx);
> +static void xtensa_reorg (void);
> +static bool xtensa_can_use_doloop_p (const widest_int &, const widest_int
> &,
> +                                     unsigned int, bool); static const
> +char *xtensa_invalid_within_doloop (const rtx_insn *);
> 
>  static bool xtensa_member_type_forces_blk (const_tree,
>                         enum machine_mode mode); @@ -312,6
> +318,15 @@ static const int reg_nonleaf_alloc_order[FIRST_PSE  #undef
> TARGET_LEGITIMATE_CONSTANT_P  #define
> TARGET_LEGITIMATE_CONSTANT_P xtensa_legitimate_constant_p
> 
> +#undef TARGET_MACHINE_DEPENDENT_REORG
> +#define TARGET_MACHINE_DEPENDENT_REORG xtensa_reorg
> +
> +#undef TARGET_CAN_USE_DOLOOP_P
> +#define TARGET_CAN_USE_DOLOOP_P xtensa_can_use_doloop_p
> +
> +#undef TARGET_INVALID_WITHIN_DOLOOP
> +#define TARGET_INVALID_WITHIN_DOLOOP xtensa_invalid_within_doloop
> +
>  struct gcc_target targetm = TARGET_INITIALIZER;
> 
> 
> @@ -1676,7 +1691,7 @@ xtensa_emit_loop_end (rtx_insn *insn, rtx *operand
>          }
>      }
> 
> -  output_asm_insn ("# loop end for %0", operands);
> +  output_asm_insn ("%1_LEND:", operands);
>  }
> 
> 
> @@ -3712,4 +3727,236 @@ xtensa_legitimate_constant_p (enum
> machine_mode mo
>    return !xtensa_tls_referenced_p (x);
>  }
> 
> +/* Implement TARGET_CAN_USE_DOLOOP_P.  */
> +
> +static bool
> +xtensa_can_use_doloop_p (const widest_int &, const widest_int &,
> +                         unsigned int loop_depth, bool entered_at_top)
> +{
> +  /* Considering limitations in the hardware, only use doloop
> +     for innermost loops which must be entered from the top.  */
> +  if (loop_depth > 1 || !entered_at_top)
> +    return false;
> +
> +  return true;
> +}
> +
> +/* NULL if INSN insn is valid within a low-overhead loop.
> +   Otherwise return why doloop cannot be applied.  */
> +
> +static const char *
> +xtensa_invalid_within_doloop (const rtx_insn *insn) {
> +  if (CALL_P (insn))
> +    return "Function call in the loop.";
> +
> +  if (JUMP_P (insn) && INSN_CODE (insn) == CODE_FOR_return)
> +    return "Return from a call instruction in the loop.";
> +
> +  return NULL;
> +}
> +
> +/* Optimize LOOP.  */
> +
> +static bool
> +hwloop_optimize (hwloop_info loop)
> +{
> +  int i;
> +  edge entry_edge;
> +  basic_block entry_bb;
> +  rtx iter_reg;
> +  rtx_insn *insn, *seq, *entry_after;
> +
> +  if (loop->depth > 1)
> +    {
> +      if (dump_file)
> +        fprintf (dump_file, ";; loop %d is not innermost\n",
> +                 loop->loop_no);
> +      return false;
> +    }
> +
> +  if (!loop->incoming_dest)
> +    {
> +      if (dump_file)
> +        fprintf (dump_file, ";; loop %d has more than one entry\n",
> +                 loop->loop_no);
> +      return false;
> +    }
> +
> +  if (loop->incoming_dest != loop->head)
> +    {
> +      if (dump_file)
> +        fprintf (dump_file, ";; loop %d is not entered from head\n",
> +                 loop->loop_no);
> +      return false;
> +    }
> +
> +  if (loop->has_call || loop->has_asm)
> +    {
> +      if (dump_file)
> +        fprintf (dump_file, ";; loop %d has invalid insn\n",
> +                 loop->loop_no);
> +      return false;
> +    }
> +
> +  /* Scan all the blocks to make sure they don't use iter_reg.  */  if
> + (loop->iter_reg_used || loop->iter_reg_used_outside)
> +    {
> +      if (dump_file)
> +        fprintf (dump_file, ";; loop %d uses iterator\n",
> +                 loop->loop_no);
> +      return false;
> +    }
> +
> +  /* Check if start_label appears before doloop_end.  */  insn =
> + loop->start_label;  while (insn && insn != loop->loop_end)
> +    insn = NEXT_INSN (insn);
> +
> +  if (!insn)
> +    {
> +      if (dump_file)
> +        fprintf (dump_file, ";; loop %d start_label not before loop_end\n",
> +                 loop->loop_no);
> +      return false;
> +    }
> +
> +  /* Get the loop iteration register.  */  iter_reg = loop->iter_reg;
> +
> +  gcc_assert (REG_P (iter_reg));
> +
> +  entry_edge = NULL;
> +
> +  FOR_EACH_VEC_SAFE_ELT (loop->incoming, i, entry_edge)
> +    if (entry_edge->flags & EDGE_FALLTHRU)
> +      break;
> +
> +  if (entry_edge == NULL)
> +    return false;
> +
> +  /* Place the zero_cost_loop_start instruction before the loop.  */
> + entry_bb = entry_edge->src;
> +
> +  start_sequence ();
> +
> +  insn = emit_insn (gen_zero_cost_loop_start (loop->iter_reg,
> +                                              loop->start_label,
> +                                              loop->iter_reg));
> +
> +  seq = get_insns ();
> +
> +  if (!single_succ_p (entry_bb) || vec_safe_length (loop->incoming) > 1)
> +    {
> +      basic_block new_bb;
> +      edge e;
> +      edge_iterator ei;
> +
> +      emit_insn_before (seq, BB_HEAD (loop->head));
> +      seq = emit_label_before (gen_label_rtx (), seq);
> +      new_bb = create_basic_block (seq, insn, entry_bb);
> +      FOR_EACH_EDGE (e, ei, loop->incoming)
> +        {
> +          if (!(e->flags & EDGE_FALLTHRU))
> +            redirect_edge_and_branch_force (e, new_bb);
> +          else
> +            redirect_edge_succ (e, new_bb);
> +        }
> +
> +      make_edge (new_bb, loop->head, 0);
> +    }
> +  else
> +    {
> +      entry_after = BB_END (entry_bb);
> +      while (DEBUG_INSN_P (entry_after)
> +             || (NOTE_P (entry_after)
> +                 && NOTE_KIND (entry_after) !=
> NOTE_INSN_BASIC_BLOCK))
> +        entry_after = PREV_INSN (entry_after);
> +
> +      emit_insn_after (seq, entry_after);
> +    }
> +
> +  end_sequence ();
> +
> +  return true;
> +}
> +
> +/* A callback for the hw-doloop pass.  Called when a loop we have discovered
> +   turns out not to be optimizable; we have to split the loop_end pattern 
> into
> +   a subtract and a test.  */
> +
> +static void
> +hwloop_fail (hwloop_info loop)
> +{
> +  rtx test;
> +  rtx_insn *insn = loop->loop_end;
> +
> +  emit_insn_before (gen_addsi3 (loop->iter_reg,
> +                                loop->iter_reg,
> +                                constm1_rtx),
> +                    loop->loop_end);
> +
> +  test = gen_rtx_NE (VOIDmode, loop->iter_reg, const0_rtx);  insn =
> + emit_jump_insn_before (gen_cbranchsi4 (test,
> +                                                loop->iter_reg,
> const0_rtx,
> +                                                loop->start_label),
> +                                loop->loop_end);
> +
> +  JUMP_LABEL (insn) = loop->start_label;
> +  LABEL_NUSES (loop->start_label)++;
> +  delete_insn (loop->loop_end);
> +}
> +
> +/* A callback for the hw-doloop pass.  This function examines INSN; if
> +   it is a doloop_end pattern we recognize, return the reg rtx for the
> +   loop counter.  Otherwise, return NULL_RTX.  */
> +
> +static rtx
> +hwloop_pattern_reg (rtx_insn *insn)
> +{
> +  rtx reg;
> +
> +  if (!JUMP_P (insn) || recog_memoized (insn) != CODE_FOR_loop_end)
> +    return NULL_RTX;
> +
> +  reg = SET_DEST (XVECEXP (PATTERN (insn), 0, 1));  if (!REG_P (reg))
> +    return NULL_RTX;
> +
> +  return reg;
> +}
> +
> +
> +static struct hw_doloop_hooks xtensa_doloop_hooks = {
> +  hwloop_pattern_reg,
> +  hwloop_optimize,
> +  hwloop_fail
> +};
> +
> +/* Run from machine_dependent_reorg, this pass looks for doloop_end insns
> +   and tries to rewrite the RTL of these loops so that proper Xtensa
> +   hardware loops are generated.  */
> +
> +static void
> +xtensa_reorg_loops (void)
> +{
> +  reorg_loops (false, &xtensa_doloop_hooks); }
> +
> +/* Implement the TARGET_MACHINE_DEPENDENT_REORG pass.  */
> +
> +static void
> +xtensa_reorg (void)
> +{
> +  /* We are freeing block_for_insn in the toplev to keep compatibility
> +     with old MDEP_REORGS that are not CFG based.  Recompute it now.
> +*/
> +  compute_bb_for_insn ();
> +
> +  df_analyze ();
> +
> +  /* Doloop optimization.  */
> +  xtensa_reorg_loops ();
> +}
> +
>  #include "gt-xtensa.h"
> Index: gcc/config/xtensa/xtensa.h
> ================================================================
> ===
> --- gcc/config/xtensa/xtensa.h    (revision 216079)
> +++ gcc/config/xtensa/xtensa.h    (working copy)
> @@ -61,6 +61,7 @@ extern unsigned xtensa_current_frame_size;
>  #define TARGET_S32C1I        XCHAL_HAVE_S32C1I
>  #define TARGET_ABSOLUTE_LITERALS XSHAL_USE_ABSOLUTE_LITERALS
>  #define TARGET_THREADPTR    XCHAL_HAVE_THREADPTR
> +#define TARGET_LOOPS            XCHAL_HAVE_LOOPS
> 
>  #define TARGET_DEFAULT \
>    ((XCHAL_HAVE_L32R    ? 0 : MASK_CONST16) |                \
> 
> Cheers,
> Felix
> 
> 
> On Thu, Oct 9, 2014 at 6:52 PM, Felix Yang <fei.yang0...@gmail.com> wrote:
> > Hello Sterling,
> >
> >      My paper work with the FSF has finished and we can now move
> > forward with this patch :-)
> >      I rebased the patch on the latest trunk. Attached please find
> > version 3 of the patch.
> >      And the enclosed patch also includes the two points pointed by
> > you, do you like it?
> >      Make check regression tested with xtensa-elf-gcc built from trunk
> > with this patch.
> >      OK to apply?
> >
> > Index: gcc/ChangeLog
> >
> ================================================================
> ===
> > --- gcc/ChangeLog    (revision 216036)
> > +++ gcc/ChangeLog    (working copy)
> > @@ -1,3 +1,19 @@
> > +2014-10-09  Felix Yang  <felix.y...@huawei.com>
> > +
> > +    * config/xtensa/xtensa.h (TARGET_LOOPS): New Macro.
> > +    * config/xtensa/xtensa.c (xtensa_reorg): New.
> > +    (xtensa_reorg_loops): New.
> > +    (xtensa_can_use_doloop_p): New.
> > +    (xtensa_invalid_within_doloop): New.
> > +    (hwloop_optimize): New.
> > +    (hwloop_fail): New.
> > +    (hwloop_pattern_reg): New.
> > +    (xtensa_emit_loop_end): Modified to emit the zero-overhead loop end
> label.
> > +    (xtensa_doloop_hooks): Define.
> > +    * config/xtensa/xtensa.md (doloop_end): New.
> > +    (zero_cost_loop_start): Rewritten.
> > +    (zero_cost_loop_end): Rewritten.
> > +
> >  2014-10-09  Joern Rennecke  <joern.renne...@embecosm.com>
> >
> >      * config/avr/avr.opt (mmcu=): Change to have a string value.
> > Index: gcc/config/xtensa/xtensa.md
> >
> ================================================================
> ===
> > --- gcc/config/xtensa/xtensa.md    (revision 216036)
> > +++ gcc/config/xtensa/xtensa.md    (working copy)
> > @@ -35,6 +35,8 @@
> >    (UNSPEC_TLS_CALL    9)
> >    (UNSPEC_TP        10)
> >    (UNSPEC_MEMW        11)
> > +  (UNSPEC_LSETUP_START  12)
> > +  (UNSPEC_LSETUP_END    13)
> >
> >    (UNSPECV_SET_FP    1)
> >    (UNSPECV_ENTRY    2)
> > @@ -1289,41 +1291,67 @@
> >     (set_attr "length"    "3")])
> >
> >
> > +;; Zero-overhead looping support.
> > +
> >  ;; Define the loop insns used by bct optimization to represent the
> > -;; start and end of a zero-overhead loop (in loop.c).  This start -;;
> > template generates the loop insn; the end template doesn't generate
> > -;; any instructions since loop end is handled in hardware.
> > +;; start and end of a zero-overhead loop.  This start template
> > +generates ;; the loop insn; the end template doesn't generate any
> > +instructions since ;; loop end is handled in hardware.
> >
> >  (define_insn "zero_cost_loop_start"
> >    [(set (pc)
> > -    (if_then_else (eq (match_operand:SI 0 "register_operand" "a")
> > -              (const_int 0))
> > -              (label_ref (match_operand 1 "" ""))
> > -              (pc)))
> > -   (set (reg:SI 19)
> > -    (plus:SI (match_dup 0) (const_int -1)))]
> > +        (if_then_else (ne (match_operand:SI 0 "register_operand" "a")
> > +                          (const_int 1))
> > +                      (label_ref (match_operand 1 "" ""))
> > +                      (pc)))
> > +   (set (match_operand:SI 2 "register_operand" "+a0")
> > +        (plus (match_dup 2)
> > +              (const_int -1)))
> > +   (unspec [(const_int 0)] UNSPEC_LSETUP_START)]
> >    ""
> > -  "loopnez\t%0, %l1"
> > +  "loop\t%0, %l1_LEND"
> >    [(set_attr "type"    "jump")
> >     (set_attr "mode"    "none")
> >     (set_attr "length"    "3")])
> >
> >  (define_insn "zero_cost_loop_end"
> >    [(set (pc)
> > -    (if_then_else (ne (reg:SI 19) (const_int 0))
> > -              (label_ref (match_operand 0 "" ""))
> > -              (pc)))
> > -   (set (reg:SI 19)
> > -    (plus:SI (reg:SI 19) (const_int -1)))]
> > +        (if_then_else (ne (match_operand:SI 0 "register_operand" "a")
> > +                          (const_int 1))
> > +                      (label_ref (match_operand 1 "" ""))
> > +                      (pc)))
> > +   (set (match_operand:SI 2 "register_operand" "+a0")
> > +        (plus (match_dup 2)
> > +              (const_int -1)))
> > +   (unspec [(const_int 0)] UNSPEC_LSETUP_END)]
> >    ""
> >  {
> > -    xtensa_emit_loop_end (insn, operands);
> > -    return "";
> > +  xtensa_emit_loop_end (insn, operands);  return "";
> >  }
> >    [(set_attr "type"    "jump")
> >     (set_attr "mode"    "none")
> >     (set_attr "length"    "0")])
> >
> > +; operand 0 is the loop count pseudo register ; operand 1 is the
> > +label to jump to at the top of the loop (define_expand "doloop_end"
> > +  [(parallel [(set (pc) (if_then_else
> > +                          (ne (match_operand:SI 0 "" "")
> > +                              (const_int 1))
> > +                          (label_ref (match_operand 1 "" ""))
> > +                          (pc)))
> > +              (set (match_dup 0)
> > +                   (plus:SI (match_dup 0)
> > +                            (const_int -1)))
> > +              (unspec [(const_int 0)] UNSPEC_LSETUP_END)])]
> > +  ""
> > +{
> > +  /* The loop optimizer doesn't check the predicates... */
> > +  if (GET_MODE (operands[0]) != SImode)
> > +    FAIL;
> > +})
> > +
> >
> >  ;; Setting a register from a comparison.
> >
> > Index: gcc/config/xtensa/xtensa.c
> >
> ================================================================
> ===
> > --- gcc/config/xtensa/xtensa.c    (revision 216036)
> > +++ gcc/config/xtensa/xtensa.c    (working copy)
> > @@ -61,6 +61,8 @@ along with GCC; see the file COPYING3.  If not see
> >  #include "gimplify.h"
> >  #include "df.h"
> >  #include "builtins.h"
> > +#include "dumpfile.h"
> > +#include "hw-doloop.h"
> >
> >
> >  /* Enumeration for all of the relational tests, so that we can build
> > @@ -186,6 +188,10 @@ static reg_class_t xtensa_secondary_reload (bool,
> >
> >  static bool constantpool_address_p (const_rtx addr);
> >  static bool xtensa_legitimate_constant_p (enum machine_mode, rtx);
> > +static void xtensa_reorg (void);
> > +static bool xtensa_can_use_doloop_p (const widest_int &, const widest_int
> &,
> > +                                     unsigned int, bool);
> > +static const char *xtensa_invalid_within_doloop (const rtx_insn *);
> >
> >  static bool xtensa_member_type_forces_blk (const_tree,
> >                         enum machine_mode mode);
> > @@ -312,6 +318,15 @@ static const int reg_nonleaf_alloc_order[FIRST_PSE
> >  #undef TARGET_LEGITIMATE_CONSTANT_P
> >  #define TARGET_LEGITIMATE_CONSTANT_P xtensa_legitimate_constant_p
> >
> > +#undef TARGET_MACHINE_DEPENDENT_REORG
> > +#define TARGET_MACHINE_DEPENDENT_REORG xtensa_reorg
> > +
> > +#undef TARGET_CAN_USE_DOLOOP_P
> > +#define TARGET_CAN_USE_DOLOOP_P xtensa_can_use_doloop_p
> > +
> > +#undef TARGET_INVALID_WITHIN_DOLOOP
> > +#define TARGET_INVALID_WITHIN_DOLOOP xtensa_invalid_within_doloop
> > +
> >  struct gcc_target targetm = TARGET_INITIALIZER;
> >
> >
> > @@ -1676,7 +1691,7 @@ xtensa_emit_loop_end (rtx_insn *insn, rtx
> *operand
> >          }
> >      }
> >
> > -  output_asm_insn ("# loop end for %0", operands);
> > +  output_asm_insn ("%1_LEND:", operands);
> >  }
> >
> >
> > @@ -3712,4 +3727,239 @@ xtensa_legitimate_constant_p (enum
> machine_mode mo
> >    return !xtensa_tls_referenced_p (x);
> >  }
> >
> > +/* Implement TARGET_CAN_USE_DOLOOP_P.  */
> > +
> > +static bool
> > +xtensa_can_use_doloop_p (const widest_int &, const widest_int &,
> > +                         unsigned int loop_depth, bool
> entered_at_top)
> > +{
> > +  if (!TARGET_LOOPS)
> > +    return false;
> > +
> > +  /* Considering limitations in the hardware, only use doloop
> > +     for innermost loops which must be entered from the top.  */
> > +  if (loop_depth > 1 || !entered_at_top)
> > +    return false;
> > +
> > +  return true;
> > +}
> > +
> > +/* NULL if INSN insn is valid within a low-overhead loop.
> > +   Otherwise return why doloop cannot be applied.  */
> > +
> > +static const char *
> > +xtensa_invalid_within_doloop (const rtx_insn *insn)
> > +{
> > +  if (CALL_P (insn))
> > +    return "Function call in the loop.";
> > +
> > +  if (JUMP_P (insn) && INSN_CODE (insn) == CODE_FOR_return)
> > +    return "Return from a call instruction in the loop.";
> > +
> > +  return NULL;
> > +}
> > +
> > +/* Optimize LOOP.  */
> > +
> > +static bool
> > +hwloop_optimize (hwloop_info loop)
> > +{
> > +  int i;
> > +  edge entry_edge;
> > +  basic_block entry_bb;
> > +  rtx iter_reg;
> > +  rtx_insn *insn, *seq, *entry_after;
> > +
> > +  if (loop->depth > 1)
> > +    {
> > +      if (dump_file)
> > +        fprintf (dump_file, ";; loop %d is not innermost\n",
> > +                 loop->loop_no);
> > +      return false;
> > +    }
> > +
> > +  if (!loop->incoming_dest)
> > +    {
> > +      if (dump_file)
> > +        fprintf (dump_file, ";; loop %d has more than one entry\n",
> > +                 loop->loop_no);
> > +      return false;
> > +    }
> > +
> > +  if (loop->incoming_dest != loop->head)
> > +    {
> > +      if (dump_file)
> > +        fprintf (dump_file, ";; loop %d is not entered from head\n",
> > +                 loop->loop_no);
> > +      return false;
> > +    }
> > +
> > +  if (loop->has_call || loop->has_asm)
> > +    {
> > +      if (dump_file)
> > +        fprintf (dump_file, ";; loop %d has invalid insn\n",
> > +                 loop->loop_no);
> > +      return false;
> > +    }
> > +
> > +  /* Scan all the blocks to make sure they don't use iter_reg.  */
> > +  if (loop->iter_reg_used || loop->iter_reg_used_outside)
> > +    {
> > +      if (dump_file)
> > +        fprintf (dump_file, ";; loop %d uses iterator\n",
> > +                 loop->loop_no);
> > +      return false;
> > +    }
> > +
> > +  /* Check if start_label appears before doloop_end.  */
> > +  insn = loop->start_label;
> > +  while (insn && insn != loop->loop_end)
> > +    insn = NEXT_INSN (insn);
> > +
> > +  if (!insn)
> > +    {
> > +      if (dump_file)
> > +        fprintf (dump_file, ";; loop %d start_label not before loop_end\n",
> > +                 loop->loop_no);
> > +      return false;
> > +    }
> > +
> > +  /* Get the loop iteration register.  */
> > +  iter_reg = loop->iter_reg;
> > +
> > +  gcc_assert (REG_P (iter_reg));
> > +
> > +  entry_edge = NULL;
> > +
> > +  FOR_EACH_VEC_SAFE_ELT (loop->incoming, i, entry_edge)
> > +    if (entry_edge->flags & EDGE_FALLTHRU)
> > +      break;
> > +
> > +  if (entry_edge == NULL)
> > +    return false;
> > +
> > +  /* Place the zero_cost_loop_start instruction before the loop.  */
> > +  entry_bb = entry_edge->src;
> > +
> > +  start_sequence ();
> > +
> > +  insn = emit_insn (gen_zero_cost_loop_start (loop->iter_reg,
> > +                                              loop->start_label,
> > +                                              loop->iter_reg));
> > +
> > +  seq = get_insns ();
> > +
> > +  if (!single_succ_p (entry_bb) || vec_safe_length (loop->incoming) > 1)
> > +    {
> > +      basic_block new_bb;
> > +      edge e;
> > +      edge_iterator ei;
> > +
> > +      emit_insn_before (seq, BB_HEAD (loop->head));
> > +      seq = emit_label_before (gen_label_rtx (), seq);
> > +      new_bb = create_basic_block (seq, insn, entry_bb);
> > +      FOR_EACH_EDGE (e, ei, loop->incoming)
> > +        {
> > +          if (!(e->flags & EDGE_FALLTHRU))
> > +            redirect_edge_and_branch_force (e, new_bb);
> > +          else
> > +            redirect_edge_succ (e, new_bb);
> > +        }
> > +
> > +      make_edge (new_bb, loop->head, 0);
> > +    }
> > +  else
> > +    {
> > +      entry_after = BB_END (entry_bb);
> > +      while (DEBUG_INSN_P (entry_after)
> > +             || (NOTE_P (entry_after)
> > +                 && NOTE_KIND (entry_after) !=
> NOTE_INSN_BASIC_BLOCK))
> > +        entry_after = PREV_INSN (entry_after);
> > +
> > +      emit_insn_after (seq, entry_after);
> > +    }
> > +
> > +  end_sequence ();
> > +
> > +  return true;
> > +}
> > +
> > +/* A callback for the hw-doloop pass.  Called when a loop we have
> discovered
> > +   turns out not to be optimizable; we have to split the loop_end pattern
> into
> > +   a subtract and a test.  */
> > +
> > +static void
> > +hwloop_fail (hwloop_info loop)
> > +{
> > +  rtx test;
> > +  rtx_insn *insn = loop->loop_end;
> > +
> > +  emit_insn_before (gen_addsi3 (loop->iter_reg,
> > +                                loop->iter_reg,
> > +                                constm1_rtx),
> > +                    loop->loop_end);
> > +
> > +  test = gen_rtx_NE (VOIDmode, loop->iter_reg, const0_rtx);
> > +  insn = emit_jump_insn_before (gen_cbranchsi4 (test,
> > +                                                loop->iter_reg,
> const0_rtx,
> > +
> loop->start_label),
> > +                                loop->loop_end);
> > +
> > +  JUMP_LABEL (insn) = loop->start_label;
> > +  LABEL_NUSES (loop->start_label)++;
> > +  delete_insn (loop->loop_end);
> > +}
> > +
> > +/* A callback for the hw-doloop pass.  This function examines INSN; if
> > +   it is a doloop_end pattern we recognize, return the reg rtx for the
> > +   loop counter.  Otherwise, return NULL_RTX.  */
> > +
> > +static rtx
> > +hwloop_pattern_reg (rtx_insn *insn)
> > +{
> > +  rtx reg;
> > +
> > +  if (!JUMP_P (insn) || recog_memoized (insn) !=
> CODE_FOR_zero_cost_loop_end)
> > +    return NULL_RTX;
> > +
> > +  reg = SET_DEST (XVECEXP (PATTERN (insn), 0, 1));
> > +  if (!REG_P (reg))
> > +    return NULL_RTX;
> > +
> > +  return reg;
> > +}
> > +
> > +
> > +static struct hw_doloop_hooks xtensa_doloop_hooks =
> > +{
> > +  hwloop_pattern_reg,
> > +  hwloop_optimize,
> > +  hwloop_fail
> > +};
> > +
> > +/* Run from machine_dependent_reorg, this pass looks for doloop_end
> insns
> > +   and tries to rewrite the RTL of these loops so that proper Xtensa
> > +   hardware loops are generated.  */
> > +
> > +static void
> > +xtensa_reorg_loops (void)
> > +{
> > +  reorg_loops (false, &xtensa_doloop_hooks);
> > +}
> > +
> > +/* Implement the TARGET_MACHINE_DEPENDENT_REORG pass.  */
> > +
> > +static void
> > +xtensa_reorg (void)
> > +{
> > +  /* We are freeing block_for_insn in the toplev to keep compatibility
> > +     with old MDEP_REORGS that are not CFG based.  Recompute it now.
> */
> > +  compute_bb_for_insn ();
> > +
> > +  df_analyze ();
> > +
> > +  /* Doloop optimization.  */
> > +  xtensa_reorg_loops ();
> > +}
> > +
> >  #include "gt-xtensa.h"
> > Index: gcc/config/xtensa/xtensa.h
> >
> ================================================================
> ===
> > --- gcc/config/xtensa/xtensa.h    (revision 216036)
> > +++ gcc/config/xtensa/xtensa.h    (working copy)
> > @@ -61,6 +61,7 @@ extern unsigned xtensa_current_frame_size;
> >  #define TARGET_S32C1I        XCHAL_HAVE_S32C1I
> >  #define TARGET_ABSOLUTE_LITERALS XSHAL_USE_ABSOLUTE_LITERALS
> >  #define TARGET_THREADPTR    XCHAL_HAVE_THREADPTR
> > +#define TARGET_LOOPS            XCHAL_HAVE_LOOPS
> >
> >  #define TARGET_DEFAULT \
> >    ((XCHAL_HAVE_L32R    ? 0 : MASK_CONST16) |                \
> > Cheers,
> > Felix
> >
> >
> > On Tue, Jan 14, 2014 at 1:23 AM, Sterling Augustine
> > <augustine.sterl...@gmail.com> wrote:
> >> On Thu, Jan 9, 2014 at 7:48 PM, Yangfei (Felix) <felix.y...@huawei.com>
> wrote:
> >>> And here is the xtensa configuration tested (include/xtensa-config.h):
> >>>
> >>> #define XCHAL_HAVE_BE           0
> >>> #define XCHAL_HAVE_LOOPS                1
> >>
> >>
> >> Hi Felix,
> >>
> >> I like this patch, and expect I will approve it. However, I would like
> >> you to do two more things before I do:
> >>
> >> 1. Ensure it doesn't generate zcl's when:
> >>
> >> #define XCHAL_HAVE_LOOPS 0
> >>
> >> 2. Ensure it doesn't produce loops bodies that contain ret, retw,
> >> ret.n or retw.n as the last instruction. It might be easier to just
> >> disallow them in loop bodies entirely though.
> >>
> >> Thanks!

Reply via email to