On Mon, Apr 27, 2015 at 02:42:36PM +0100, Wilco Dijkstra wrote:
> > -----Original Message-----
> > From: Wilco Dijkstra [mailto:wdijk...@arm.com]
> > Sent: 03 March 2015 16:19
> > To: GCC Patches
> > Subject: [PATCH][AArch64] Use conditional negate for abs expansion
> > 
> > Expand abs into a compare and conditional negate. This is the most obvious 
> > expansion, enables
> > merging of the comparison into ALU instructions and is faster on all 
> > implementations.
> > Bootstrapped & regression tested.
> > 
> > int f(int x) { return abs (x + 1); }
> > 
> > Before:
> >         add     w0, w0, 1
> >         sxtw    x0, w0
> >         eor     x1, x0, x0, asr 63
> >         sub     x1, x1, x0, asr 63
> >         mov     x0, x1
> >         ret
> > 
> > After:
> >         adds    w0, w0, 1
> >         csneg   w0, w0, w0, pl
> >         ret
> > 
> > ChangeLog:
> > 
> > 2015-03-03  Wilco Dijkstra  <wdijk...@arm.com>
> > 
> >     * gcc/config/aarch64/aarch64.md (absdi2): optimize abs expansion.
> >     (csneg3<mode>_insn): enable expansion of pattern.
> >     * gcc/testsuite/gcc.target/aarch64/abs_1.c (abs64): update test
> >     for new abs expansion.  (abs64_in_dreg): likewise.


This looks like it breaks support for abs in a D register (for example
at the end of a loop, or extracted from Neon Intrinsics, etc).

e.g. (totally contrived...)

  int64x1_t
  abs_max (int64x2_t x, int64_t *wb)
  {
    int64_t y = vgetq_lane_s64 (x, 0);
    if (y < 0)
      y = -y;
    return vdup_n_s64 (y);
  }

Which currently generates:

  abs_max:
          abs     d0, d0
          ret

I suppose we don't need to worry too much about that (and the current
implementation doesn't seem to catch it reliably anyway), but it would be
good if we could keep the support - even if it is rarely used.

Thanks,
James

> > 
> > ---
> >  gcc/config/aarch64/aarch64.md            | 33 
> > +++++++-------------------------
> >  gcc/testsuite/gcc.target/aarch64/abs_1.c |  5 ++---
> >  2 files changed, 9 insertions(+), 29 deletions(-)
> > 
> > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> > index 1f4169e..46b7a63 100644
> > --- a/gcc/config/aarch64/aarch64.md
> > +++ b/gcc/config/aarch64/aarch64.md
> > @@ -2172,35 +2172,16 @@
> >    [(set_attr "type" "alu_ext")]
> >  )
> > 
> > -(define_insn_and_split "absdi2"
> > -  [(set (match_operand:DI 0 "register_operand" "=&r,w")
> > -   (abs:DI (match_operand:DI 1 "register_operand" "r,w")))]
> > +(define_expand "abs<mode>2"
> > +  [(match_operand:GPI 0 "register_operand" "")
> > +   (match_operand:GPI 1 "register_operand" "")]
> >    ""
> > -  "@
> > -   #
> > -   abs\\t%d0, %d1"
> > -  "reload_completed
> > -   && GP_REGNUM_P (REGNO (operands[0]))
> > -   && GP_REGNUM_P (REGNO (operands[1]))"
> > -  [(const_int 0)]
> >    {
> > -    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
> > -                       gen_rtx_XOR (DImode,
> > -                                    gen_rtx_ASHIFTRT (DImode,
> > -                                                      operands[1],
> > -                                                      GEN_INT (63)),
> > -                                    operands[1])));
> > -    emit_insn (gen_rtx_SET (VOIDmode,
> > -                       operands[0],
> > -                       gen_rtx_MINUS (DImode,
> > -                                      operands[0],
> > -                                      gen_rtx_ASHIFTRT (DImode,
> > -                                                        operands[1],
> > -                                                        GEN_INT (63)))));
> > +    rtx ccreg = aarch64_gen_compare_reg (LT, operands[1], const0_rtx);
> > +    rtx x = gen_rtx_LT (VOIDmode, ccreg, const0_rtx);
> > +    emit_insn (gen_csneg3<mode>_insn (operands[0], x, operands[1], 
> > operands[1]));
> >      DONE;
> >    }
> > -  [(set_attr "type" "alu_sreg")
> > -   (set_attr "simd" "no,yes")]
> >  )
> > 
> >  (define_insn "neg<mode>2"
> > @@ -2879,7 +2860,7 @@
> >    [(set_attr "type" "csel")]
> >  )
> > 
> > -(define_insn "*csneg3<mode>_insn"
> > +(define_insn "csneg3<mode>_insn"
> >    [(set (match_operand:GPI 0 "register_operand" "=r")
> >          (if_then_else:GPI
> >       (match_operand 1 "aarch64_comparison_operation" "")
> > diff --git a/gcc/testsuite/gcc.target/aarch64/abs_1.c
> > b/gcc/testsuite/gcc.target/aarch64/abs_1.c
> > index 938bc84..11f1095 100644
> > --- a/gcc/testsuite/gcc.target/aarch64/abs_1.c
> > +++ b/gcc/testsuite/gcc.target/aarch64/abs_1.c
> > @@ -7,15 +7,14 @@ extern void abort (void);
> >  long long
> >  abs64 (long long a)
> >  {
> > -  /* { dg-final { scan-assembler "eor\t" } } */
> > -  /* { dg-final { scan-assembler "sub\t" } } */
> > +  /* { dg-final { scan-assembler "csneg\t" } } */
> >    return llabs (a);
> >  }
> > 
> >  long long
> >  abs64_in_dreg (long long a)
> >  {
> > -  /* { dg-final { scan-assembler "abs\td\[0-9\]+, d\[0-9\]+" } } */
> > +  /* { dg-final { scan-assembler "csneg\t" } } */
> >    register long long x asm ("d8") = a;
> >    register long long y asm ("d9");
> >    asm volatile ("" : : "w" (x));
> > --
> > 1.9.1
> 
> 

Reply via email to