* claz...@gmail.com <claz...@gmail.com> [2018-10-31 10:33:33 +0200]:

> Thank you for your review. Please find attached a new respin patch with
> your feedback in.
> 
> Please let me know if it is ok,
> Claudiu 

> From 4ff7d8419783eceeffbaf27df017d0a93c3af942 Mon Sep 17 00:00:00 2001
> From: Claudiu Zissulescu <claz...@gmail.com>
> Date: Thu, 9 Aug 2018 14:29:05 +0300
> Subject: [PATCH] [ARC] Add peephole rules to combine store/loads into double
>  store/loads
> 
> Simple peephole rules which combines multiple ld/st instructions into
> 64-bit load/store instructions. It only works for architectures which
> are having double load/store option on.
> 
> gcc/
>       Claudiu Zissulescu  <claz...@synopsys.com>
> 
>       * config/arc/arc-protos.h (gen_operands_ldd_std): Add.
>       * config/arc/arc.c (operands_ok_ldd_std): New function.
>       (mem_ok_for_ldd_std): Likewise.
>       (gen_operands_ldd_std): Likewise.
>       * config/arc/arc.md: Add peephole2 rules for std/ldd.

Looks good.

Thanks,
Andrew


> ---
>  gcc/config/arc/arc-protos.h |   1 +
>  gcc/config/arc/arc.c        | 161 ++++++++++++++++++++++++++++++++++++
>  gcc/config/arc/arc.md       |  69 ++++++++++++++++
>  3 files changed, 231 insertions(+)
> 
> diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h
> index 24bea6e1efb..55f8ed4c643 100644
> --- a/gcc/config/arc/arc-protos.h
> +++ b/gcc/config/arc/arc-protos.h
> @@ -46,6 +46,7 @@ extern int arc_return_address_register (unsigned int);
>  extern unsigned int arc_compute_function_type (struct function *);
>  extern bool arc_is_uncached_mem_p (rtx);
>  extern bool arc_lra_p (void);
> +extern bool gen_operands_ldd_std (rtx *operands, bool load, bool commute);
>  #endif /* RTX_CODE */
>  
>  extern unsigned int arc_compute_frame_size (int);
> diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
> index 18dd0de6af7..daf785dbdb8 100644
> --- a/gcc/config/arc/arc.c
> +++ b/gcc/config/arc/arc.c
> @@ -10803,6 +10803,167 @@ arc_cannot_substitute_mem_equiv_p (rtx)
>    return true;
>  }
>  
> +/* Checks whether the operands are valid for use in an LDD/STD
> +   instruction.  Assumes that RT, and RT2 are REG.  This is guaranteed
> +   by the patterns.  Assumes that the address in the base register RN
> +   is word aligned.  Pattern guarantees that both memory accesses use
> +   the same base register, the offsets are constants within the range,
> +   and the gap between the offsets is 4.  If reload complete then
> +   check that registers are legal.  */
> +
> +static bool
> +operands_ok_ldd_std (rtx rt, rtx rt2, HOST_WIDE_INT offset)
> +{
> +  unsigned int t, t2;
> +
> +  if (!reload_completed)
> +    return true;
> +
> +  if (!(SMALL_INT_RANGE (offset, (GET_MODE_SIZE (DImode) - 1) & (~0x03),
> +                      (offset & (GET_MODE_SIZE (DImode) - 1) & 3
> +                       ? 0 : -(-GET_MODE_SIZE (DImode) | (~0x03)) >> 1))))
> +    return false;
> +
> +  t = REGNO (rt);
> +  t2 = REGNO (rt2);
> +
> +  if ((t2 == PROGRAM_COUNTER_REGNO)
> +      || (t % 2 != 0)        /* First destination register is not even.  */
> +      || (t2 != t + 1))
> +      return false;
> +
> +  return true;
> +}
> +
> +/* Helper for gen_operands_ldd_std.  Returns true iff the memory
> +   operand MEM's address contains an immediate offset from the base
> +   register and has no side effects, in which case it sets BASE and
> +   OFFSET accordingly.  */
> +
> +static bool
> +mem_ok_for_ldd_std (rtx mem, rtx *base, rtx *offset)
> +{
> +  rtx addr;
> +
> +  gcc_assert (base != NULL && offset != NULL);
> +
> +  /* TODO: Handle more general memory operand patterns, such as
> +     PRE_DEC and PRE_INC.  */
> +
> +  if (side_effects_p (mem))
> +    return false;
> +
> +  /* Can't deal with subregs.  */
> +  if (GET_CODE (mem) == SUBREG)
> +    return false;
> +
> +  gcc_assert (MEM_P (mem));
> +
> +  *offset = const0_rtx;
> +
> +  addr = XEXP (mem, 0);
> +
> +  /* If addr isn't valid for DImode, then we can't handle it.  */
> +  if (!arc_legitimate_address_p (DImode, addr,
> +                             reload_in_progress || reload_completed))
> +    return false;
> +
> +  if (REG_P (addr))
> +    {
> +      *base = addr;
> +      return true;
> +    }
> +  else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
> +    {
> +      *base = XEXP (addr, 0);
> +      *offset = XEXP (addr, 1);
> +      return (REG_P (*base) && CONST_INT_P (*offset));
> +    }
> +
> +  return false;
> +}
> +
> +/* Called from peephole2 to replace two word-size accesses with a
> +   single LDD/STD instruction.  Returns true iff we can generate a new
> +   instruction sequence.  That is, both accesses use the same base
> +   register and the gap between constant offsets is 4.  OPERANDS are
> +   the operands found by the peephole matcher; OPERANDS[0,1] are
> +   register operands, and OPERANDS[2,3] are the corresponding memory
> +   operands.  LOAD indicates whether the access is load or store.  */
> +
> +bool
> +gen_operands_ldd_std (rtx *operands, bool load, bool commute)
> +{
> +  int i, gap;
> +  HOST_WIDE_INT offsets[2], offset;
> +  int nops = 2;
> +  rtx cur_base, cur_offset, tmp;
> +  rtx base = NULL_RTX;
> +
> +  /* Check that the memory references are immediate offsets from the
> +     same base register.  Extract the base register, the destination
> +     registers, and the corresponding memory offsets.  */
> +  for (i = 0; i < nops; i++)
> +    {
> +      if (!mem_ok_for_ldd_std (operands[nops+i], &cur_base, &cur_offset))
> +     return false;
> +
> +      if (i == 0)
> +     base = cur_base;
> +      else if (REGNO (base) != REGNO (cur_base))
> +     return false;
> +
> +      offsets[i] = INTVAL (cur_offset);
> +      if (GET_CODE (operands[i]) == SUBREG)
> +     {
> +       tmp = SUBREG_REG (operands[i]);
> +       gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
> +       operands[i] = tmp;
> +     }
> +    }
> +
> +  /* Make sure there is no dependency between the individual loads.  */
> +  if (load && REGNO (operands[0]) == REGNO (base))
> +    return false; /* RAW.  */
> +
> +  if (load && REGNO (operands[0]) == REGNO (operands[1]))
> +    return false; /* WAW.  */
> +
> +  /* Make sure the instructions are ordered with lower memory access first.  
> */
> +  if (offsets[0] > offsets[1])
> +    {
> +      gap = offsets[0] - offsets[1];
> +      offset = offsets[1];
> +
> +      /* Swap the instructions such that lower memory is accessed first.  */
> +      std::swap (operands[0], operands[1]);
> +      std::swap (operands[2], operands[3]);
> +    }
> +  else
> +    {
> +      gap = offsets[1] - offsets[0];
> +      offset = offsets[0];
> +    }
> +
> +  /* Make sure accesses are to consecutive memory locations.  */
> +  if (gap != 4)
> +    return false;
> +
> +  /* Make sure we generate legal instructions.  */
> +  if (operands_ok_ldd_std (operands[0], operands[1], offset))
> +    return true;
> +
> +  if (load && commute)
> +    {
> +      /* Try reordering registers.  */
> +      std::swap (operands[0], operands[1]);
> +      if (operands_ok_ldd_std (operands[0], operands[1], offset))
> +     return true;
> +    }
> +
> +  return false;
> +}
> +
>  #undef TARGET_USE_ANCHORS_FOR_SYMBOL_P
>  #define TARGET_USE_ANCHORS_FOR_SYMBOL_P arc_use_anchors_for_symbol_p
>  
> diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
> index 1ed230fa5f0..526fd17a0cf 100644
> --- a/gcc/config/arc/arc.md
> +++ b/gcc/config/arc/arc.md
> @@ -6363,6 +6363,75 @@ archs4x, archs4xd, archs4xd_slow"
>    [(set (reg:CC CC_REG) (compare:CC (match_dup 3)
>                                   (ashift:SI (match_dup 1) (match_dup 2))))])
>  
> +(define_peephole2 ; std
> +  [(set (match_operand:SI 2 "memory_operand" "")
> +     (match_operand:SI 0 "register_operand" ""))
> +   (set (match_operand:SI 3 "memory_operand" "")
> +     (match_operand:SI 1 "register_operand" ""))]
> +  "TARGET_LL64"
> +  [(const_int 0)]
> +{
> +  if (!gen_operands_ldd_std (operands, false, false))
> +    FAIL;
> +  operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
> +  operands[2] = adjust_address (operands[2], DImode, 0);
> +  emit_insn (gen_rtx_SET (operands[2], operands[0]));
> +  DONE;
> +})
> +
> +(define_peephole2 ; ldd
> +  [(set (match_operand:SI 0 "register_operand" "")
> +     (match_operand:SI 2 "memory_operand" ""))
> +   (set (match_operand:SI 1 "register_operand" "")
> +     (match_operand:SI 3 "memory_operand" ""))]
> +  "TARGET_LL64"
> +  [(const_int 0)]
> +{
> +  if (!gen_operands_ldd_std (operands, true, false))
> +    FAIL;
> +  operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
> +  operands[2] = adjust_address (operands[2], DImode, 0);
> +  emit_insn (gen_rtx_SET (operands[0], operands[2]));
> +  DONE;
> +})
> +
> +;; We require consecutive registers for LDD instruction.  Check if we
> +;; can reorder them and use an LDD.
> +
> +(define_peephole2 ; swap the destination registers of two loads
> +               ; before a commutative operation.
> +  [(set (match_operand:SI 0 "register_operand" "")
> +     (match_operand:SI 2 "memory_operand" ""))
> +   (set (match_operand:SI 1 "register_operand" "")
> +     (match_operand:SI 3 "memory_operand" ""))
> +   (set (match_operand:SI 4 "register_operand" "")
> +     (match_operator:SI 5 "commutative_operator"
> +                        [(match_operand 6 "register_operand" "")
> +                         (match_operand 7 "register_operand" "") ]))]
> +  "TARGET_LL64
> +   && (((rtx_equal_p (operands[0], operands[6]))
> +      && (rtx_equal_p (operands[1], operands[7])))
> +     || ((rtx_equal_p (operands[0], operands[7]))
> +          && (rtx_equal_p (operands[1], operands[6]))))
> +   && (peep2_reg_dead_p (3, operands[0])
> +       || rtx_equal_p (operands[0], operands[4]))
> +   && (peep2_reg_dead_p (3, operands[1])
> +       || rtx_equal_p (operands[1], operands[4]))"
> +  [(set (match_dup 0) (match_dup 2))
> +   (set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))]
> +  {
> +    if (!gen_operands_ldd_std (operands, true, true))
> +     {
> +     FAIL;
> +     }
> +    else
> +     {
> +     operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
> +     operands[2] = adjust_address (operands[2], DImode, 0);
> +     }
> +   }
> +)
> +
>  ;; include the arc-FPX instructions
>  (include "fpx.md")
>  
> -- 
> 2.17.1
> 

Reply via email to