On Thu, Jan 4, 2018 at 9:46 AM, Wilco Dijkstra <wilco.dijks...@arm.com> wrote: > This patch improves register allocation of fma by preferring to update the > accumulator register. This is done by adding fma insns with operand 1 as the > accumulator. The register allocator considers copy preferences only in > operand > order, so if the first operand is dead, it has the highest chance of being > reused as the destination. As a result code using fma often has a better > register allocation. Performance of SPECFP2017 improves by over 0.5% on some > implementations, while it had no effect on other implementations. Fma is more > readable too, in a simple example we now generate:
Seems like you should do something similar to the integer madd/msub instructions too (aarch64_mla is already correct but aarch64_mla_elt needs this too). Thanks, Andrew > > fmadd s16, s2, s1, s16 > fmadd s7, s17, s16, s7 > fmadd s6, s16, s7, s6 > fmadd s5, s7, s6, s5 > > instead of: > > fmadd s16, s16, s2, s1 > fmadd s7, s7, s16, s6 > fmadd s6, s6, s7, s5 > fmadd s5, s5, s6, s4 > > Bootstrap OK. OK for commit? > > ChangeLog: > 2018-01-04 Wilco Dijkstra <wdijk...@arm.com> > > gcc/ > * config/aarch64/aarch64.md (fma<mode>4): Change into expand pattern. > (fnma<mode>4): Likewise. > (fms<mode>4): Likewise. > (fnms<mode>4): Likewise. > (aarch64_fma<mode>4): Rename insn, reorder accumulator operand. > (aarch64_fnma<mode>4): Likewise. > (aarch64_fms<mode>4): Likewise. > (aarch64_fnms<mode>4): Likewise. > (aarch64_fnmadd<mode>4): Likewise. > -- > > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index > 382953e6ec42ae4475d66143be1e25d22e48571f..e773ec0c41559e47cf38e719dcb8c42d5bb4da49 > 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -4743,57 +4743,94 @@ (define_insn > "*aarch64_fcvt<su_optab><GPF:mode><GPI:mode>2_mult" > [(set_attr "type" "f_cvtf2i")] > ) > > -;; fma - no throw > +;; fma - expand fma into patterns with the accumulator operand first since > +;; reusing the accumulator results in better register allocation. > +;; The register allocator considers copy preferences in operand order, > +;; so this prefers fmadd s0, s1, s2, s0 over fmadd s1, s1, s2, s0. > + > +(define_expand "fma<mode>4" > + [(set (match_operand:GPF_F16 0 "register_operand") > + (fma:GPF_F16 (match_operand:GPF_F16 1 "register_operand") > + (match_operand:GPF_F16 2 "register_operand") > + (match_operand:GPF_F16 3 "register_operand")))] > + "TARGET_FLOAT" > +) > > -(define_insn "fma<mode>4" > +(define_insn "*aarch64_fma<mode>4" > [(set (match_operand:GPF_F16 0 "register_operand" "=w") > - (fma:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w") > - (match_operand:GPF_F16 2 "register_operand" "w") > - (match_operand:GPF_F16 3 "register_operand" "w")))] > + (fma:GPF_F16 (match_operand:GPF_F16 2 "register_operand" "w") > + (match_operand:GPF_F16 3 "register_operand" "w") > + (match_operand:GPF_F16 1 "register_operand" "w")))] > "TARGET_FLOAT" > - "fmadd\\t%<s>0, %<s>1, %<s>2, %<s>3" > + "fmadd\\t%<s>0, %<s>2, %<s>3, %<s>1" > [(set_attr "type" "fmac<stype>")] > ) > > -(define_insn "fnma<mode>4" > +(define_expand "fnma<mode>4" > + [(set (match_operand:GPF_F16 0 "register_operand") > + (fma:GPF_F16 > + (neg:GPF_F16 (match_operand:GPF_F16 1 "register_operand")) > + (match_operand:GPF_F16 2 "register_operand") > + (match_operand:GPF_F16 3 "register_operand")))] > + "TARGET_FLOAT" > +) > + > +(define_insn "*aarch64_fnma<mode>4" > [(set (match_operand:GPF_F16 0 "register_operand" "=w") > (fma:GPF_F16 > - (neg:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")) > - (match_operand:GPF_F16 2 "register_operand" "w") > - (match_operand:GPF_F16 3 "register_operand" "w")))] > + (neg:GPF_F16 (match_operand:GPF_F16 2 "register_operand" "w")) > + (match_operand:GPF_F16 3 "register_operand" "w") > + (match_operand:GPF_F16 1 "register_operand" "w")))] > "TARGET_FLOAT" > - "fmsub\\t%<s>0, %<s>1, %<s>2, %<s>3" > + "fmsub\\t%<s>0, %<s>2, %<s>3, %<s>1" > [(set_attr "type" "fmac<stype>")] > ) > > -(define_insn "fms<mode>4" > + > +(define_expand "fms<mode>4" > + [(set (match_operand:GPF 0 "register_operand") > + (fma:GPF (match_operand:GPF 1 "register_operand") > + (match_operand:GPF 2 "register_operand") > + (neg:GPF (match_operand:GPF 3 "register_operand"))))] > + "TARGET_FLOAT" > +) > + > +(define_insn "*aarch64_fms<mode>4" > [(set (match_operand:GPF 0 "register_operand" "=w") > - (fma:GPF (match_operand:GPF 1 "register_operand" "w") > - (match_operand:GPF 2 "register_operand" "w") > - (neg:GPF (match_operand:GPF 3 "register_operand" "w"))))] > + (fma:GPF (match_operand:GPF 2 "register_operand" "w") > + (match_operand:GPF 3 "register_operand" "w") > + (neg:GPF (match_operand:GPF 1 "register_operand" "w"))))] > "TARGET_FLOAT" > - "fnmsub\\t%<s>0, %<s>1, %<s>2, %<s>3" > + "fnmsub\\t%<s>0, %<s>2, %<s>3, %<s>1" > [(set_attr "type" "fmac<s>")] > ) > > -(define_insn "fnms<mode>4" > +(define_expand "fnms<mode>4" > + [(set (match_operand:GPF 0 "register_operand") > + (fma:GPF (neg:GPF (match_operand:GPF 1 "register_operand")) > + (match_operand:GPF 2 "register_operand") > + (neg:GPF (match_operand:GPF 3 "register_operand"))))] > + "TARGET_FLOAT" > +) > + > +(define_insn "*aarch64_fnms<mode>4" > [(set (match_operand:GPF 0 "register_operand" "=w") > - (fma:GPF (neg:GPF (match_operand:GPF 1 "register_operand" "w")) > - (match_operand:GPF 2 "register_operand" "w") > - (neg:GPF (match_operand:GPF 3 "register_operand" "w"))))] > + (fma:GPF (neg:GPF (match_operand:GPF 2 "register_operand" "w")) > + (match_operand:GPF 3 "register_operand" "w") > + (neg:GPF (match_operand:GPF 1 "register_operand" "w"))))] > "TARGET_FLOAT" > - "fnmadd\\t%<s>0, %<s>1, %<s>2, %<s>3" > + "fnmadd\\t%<s>0, %<s>2, %<s>3, %<s>1" > [(set_attr "type" "fmac<s>")] > ) > > ;; If signed zeros are ignored, -(a * b + c) = -a * b - c. > -(define_insn "*fnmadd<mode>4" > +(define_insn "*aarch64_fnmadd<mode>4" > [(set (match_operand:GPF 0 "register_operand" "=w") > - (neg:GPF (fma:GPF (match_operand:GPF 1 "register_operand" "w") > - (match_operand:GPF 2 "register_operand" "w") > - (match_operand:GPF 3 "register_operand" "w"))))] > + (neg:GPF (fma:GPF (match_operand:GPF 2 "register_operand" "w") > + (match_operand:GPF 3 "register_operand" "w") > + (match_operand:GPF 1 "register_operand" "w"))))] > "!HONOR_SIGNED_ZEROS (<MODE>mode) && TARGET_FLOAT" > - "fnmadd\\t%<s>0, %<s>1, %<s>2, %<s>3" > + "fnmadd\\t%<s>0, %<s>2, %<s>3, %<s>1" > [(set_attr "type" "fmac<s>")] > ) >