On Wed, 30 Sep 2020 at 12:53, Richard Sandiford via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > movti lacked an way of zeroing an FPR, meaning that we'd do: > > mov x0, 0 > mov x1, 0 > fmov d0, x0 > fmov v0.d[1], x1 > > instead of just: > > movi v0.2d, #0 > > movtf had the opposite problem for GPRs: we'd generate: > > movi v0.2d, #0 > fmov x0, d0 > fmov x1, v0.d[1] > > instead of just: > > mov x0, 0 > mov x1, 0 > > Also, there was an unnecessary earlyclobber on the GPR<-GPR movtf > alternative (but not the movti one). The splitter handles overlap > correctly. > > The TF splitter used aarch64_reg_or_imm, but the _imm part only > accepts integer constants, not floating-point ones. The patch > changes it to nonmemory_operand instead. > > Tested on aarch64-linux-gnu, pushed. > > Richard > > > gcc/ > * config/aarch64/aarch64.c (aarch64_split_128bit_move_p): Add a > function comment. Tighten check for FP moves. > * config/aarch64/aarch64.md (*movti_aarch64): Add a w<-Z alternative. > (*movtf_aarch64): Handle r<-Y like r<-r. Remove unnecessary > earlyclobber. Change splitter predicate from aarch64_reg_or_imm > to nonmemory_operand. > > gcc/testsuite/ > * gcc.target/aarch64/movtf_1.c: New test. > * gcc.target/aarch64/movti_1.c: Likewise.
Sorry to bother you, the new tests fail with -mabi=ilp32 :-( gcc.target/aarch64/movtf_1.c check-function-bodies load_q gcc.target/aarch64/movtf_1.c check-function-bodies load_x gcc.target/aarch64/movtf_1.c check-function-bodies store_q gcc.target/aarch64/movtf_1.c check-function-bodies store_x gcc.target/aarch64/movti_1.c check-function-bodies load_q gcc.target/aarch64/movti_1.c check-function-bodies load_x gcc.target/aarch64/movti_1.c check-function-bodies store_q gcc.target/aarch64/movti_1.c check-function-bodies store_x I don't think that's high priority though. Christophe > --- > gcc/config/aarch64/aarch64.c | 9 ++- > gcc/config/aarch64/aarch64.md | 17 +++-- > gcc/testsuite/gcc.target/aarch64/movtf_1.c | 87 ++++++++++++++++++++++ > gcc/testsuite/gcc.target/aarch64/movti_1.c | 87 ++++++++++++++++++++++ > 4 files changed, 190 insertions(+), 10 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/aarch64/movtf_1.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/movti_1.c > > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index 491fc582dab..9e88438b3c3 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -3422,11 +3422,16 @@ aarch64_split_128bit_move (rtx dst, rtx src) > } > } > > +/* Return true if we should split a move from 128-bit value SRC > + to 128-bit register DEST. */ > + > bool > aarch64_split_128bit_move_p (rtx dst, rtx src) > { > - return (! REG_P (src) > - || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src)))); > + if (FP_REGNUM_P (REGNO (dst))) > + return REG_P (src) && !FP_REGNUM_P (REGNO (src)); > + /* All moves to GPRs need to be split. */ > + return true; > } > > /* Split a complex SIMD combine. */ > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index 19ec9e33f9f..78fe7c43a00 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -1361,13 +1361,14 @@ (define_expand "movti" > > (define_insn "*movti_aarch64" > [(set (match_operand:TI 0 > - "nonimmediate_operand" "= r,w, r,w,r,m,m,w,m") > + "nonimmediate_operand" "= r,w,w, r,w,r,m,m,w,m") > (match_operand:TI 1 > - "aarch64_movti_operand" " rUti,r, w,w,m,r,Z,m,w"))] > + "aarch64_movti_operand" " rUti,Z,r, w,w,m,r,Z,m,w"))] > "(register_operand (operands[0], TImode) > || aarch64_reg_or_zero (operands[1], TImode))" > "@ > # > + movi\\t%0.2d, #0 > # > # > mov\\t%0.16b, %1.16b > @@ -1376,11 +1377,11 @@ (define_insn "*movti_aarch64" > stp\\txzr, xzr, %0 > ldr\\t%q0, %1 > str\\t%q1, %0" > - [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \ > + [(set_attr "type" "multiple,neon_move,f_mcr,f_mrc,neon_logic_q, \ > load_16,store_16,store_16,\ > load_16,store_16") > - (set_attr "length" "8,8,8,4,4,4,4,4,4") > - (set_attr "arch" "*,*,*,simd,*,*,*,fp,fp")] > + (set_attr "length" "8,4,8,8,4,4,4,4,4,4") > + (set_attr "arch" "*,simd,*,*,simd,*,*,*,fp,fp")] > ) > > ;; Split a TImode register-register or register-immediate move into > @@ -1511,9 +1512,9 @@ (define_split > > (define_insn "*movtf_aarch64" > [(set (match_operand:TF 0 > - "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r,m ,m") > + "nonimmediate_operand" "=w,?r ,w ,?r,w,?w,w,m,?r,m ,m") > (match_operand:TF 1 > - "general_operand" " w,?r, ?r,w ,Y,Y ,m,w,m ,?r,Y"))] > + "general_operand" " w,?rY,?r,w ,Y,Y ,m,w,m ,?r,Y"))] > "TARGET_FLOAT && (register_operand (operands[0], TFmode) > || aarch64_reg_or_fp_zero (operands[1], TFmode))" > "@ > @@ -1536,7 +1537,7 @@ (define_insn "*movtf_aarch64" > > (define_split > [(set (match_operand:TF 0 "register_operand" "") > - (match_operand:TF 1 "aarch64_reg_or_imm" ""))] > + (match_operand:TF 1 "nonmemory_operand" ""))] > "reload_completed && aarch64_split_128bit_move_p (operands[0], > operands[1])" > [(const_int 0)] > { > diff --git a/gcc/testsuite/gcc.target/aarch64/movtf_1.c > b/gcc/testsuite/gcc.target/aarch64/movtf_1.c > new file mode 100644 > index 00000000000..570de931389 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/movtf_1.c > @@ -0,0 +1,87 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +/* > +** zero_q: > +** movi v0.2d, #0 > +** ret > +*/ > +void > +zero_q () > +{ > + register _Float128 q0 asm ("q0"); > + q0 = 0; > + asm volatile ("" :: "w" (q0)); > +} > + > +/* > +** load_q: > +** ldr q0, \[x0\] > +** ret > +*/ > +void > +load_q (_Float128 *ptr) > +{ > + register _Float128 q0 asm ("q0"); > + q0 = *ptr; > + asm volatile ("" :: "w" (q0)); > +} > + > +/* > +** store_q: > +** str q0, \[x0\] > +** ret > +*/ > +void > +store_q (_Float128 *ptr) > +{ > + register _Float128 q0 asm ("q0"); > + asm volatile ("" : "=w" (q0)); > + *ptr = q0; > +} > + > +/* > +** zero_x: > +** ( > +** mov x0, #?0 > +** mov x1, #?0 > +** | > +** mov x1, #?0 > +** mov x0, #?0 > +** ) > +** ret > +*/ > +void > +zero_x () > +{ > + register _Float128 x0 asm ("x0"); > + x0 = 0; > + asm volatile ("" :: "r" (x0)); > +} > + > +/* > +** load_x: > +** ldp x2, x3, \[x0\] > +** ret > +*/ > +void > +load_x (_Float128 *ptr) > +{ > + register _Float128 x2 asm ("x2"); > + x2 = *ptr; > + asm volatile ("" :: "r" (x2)); > +} > + > +/* > +** store_x: > +** stp x2, x3, \[x0\] > +** ret > +*/ > +void > +store_x (_Float128 *ptr) > +{ > + register _Float128 x2 asm ("x2"); > + asm volatile ("" : "=r" (x2)); > + *ptr = x2; > +} > diff --git a/gcc/testsuite/gcc.target/aarch64/movti_1.c > b/gcc/testsuite/gcc.target/aarch64/movti_1.c > new file mode 100644 > index 00000000000..160e1acd281 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/movti_1.c > @@ -0,0 +1,87 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +/* > +** zero_q: > +** movi v0.2d, #0 > +** ret > +*/ > +void > +zero_q () > +{ > + register __int128_t q0 asm ("q0"); > + q0 = 0; > + asm volatile ("" :: "w" (q0)); > +} > + > +/* > +** load_q: > +** ldr q0, \[x0\] > +** ret > +*/ > +void > +load_q (__int128_t *ptr) > +{ > + register __int128_t q0 asm ("q0"); > + q0 = *ptr; > + asm volatile ("" :: "w" (q0)); > +} > + > +/* > +** store_q: > +** str q0, \[x0\] > +** ret > +*/ > +void > +store_q (__int128_t *ptr) > +{ > + register __int128_t q0 asm ("q0"); > + asm volatile ("" : "=w" (q0)); > + *ptr = q0; > +} > + > +/* > +** zero_x: > +** ( > +** mov x0, #?0 > +** mov x1, #?0 > +** | > +** mov x1, #?0 > +** mov x0, #?0 > +** ) > +** ret > +*/ > +void > +zero_x () > +{ > + register __int128_t x0 asm ("x0"); > + x0 = 0; > + asm volatile ("" :: "r" (x0)); > +} > + > +/* > +** load_x: > +** ldp x2, x3, \[x0\] > +** ret > +*/ > +void > +load_x (__int128_t *ptr) > +{ > + register __int128_t x2 asm ("x2"); > + x2 = *ptr; > + asm volatile ("" :: "r" (x2)); > +} > + > +/* > +** store_x: > +** stp x2, x3, \[x0\] > +** ret > +*/ > +void > +store_x (__int128_t *ptr) > +{ > + register __int128_t x2 asm ("x2"); > + asm volatile ("" : "=r" (x2)); > + *ptr = x2; > +}