On 11/10/18 19:37, Wilco Dijkstra wrote: > Here is the same version again with an extra test added: > > The popcount expansion uses SIMD instructions acting on 64-bit values. > As a result a popcount of a 32-bit integer requires zero-extension before > moving the zero-extended value into an FP register. This patch adds > support for zero-extended int->FP moves to avoid the redundant uxtw. > Similarly, add support for 32-bit zero-extending load->FP register > and 32-bit zero-extending FP->FP and FP->int moves. > Add a missing 'fp' arch attribute to the related 8/16-bit pattern and > fix an incorrect type attribute. > > To complete zero-extended load support, add a new alternative to > load_pair_zero_extendsidi2_aarch64 to support LDP into FP registers too. > > int f (int a) > { > return __builtin_popcount (a); > } > > Before: > uxtw x0, w0 > fmov d0, x0 > cnt v0.8b, v0.8b > addv b0, v0.8b > fmov w0, s0 > ret > > After: > fmov s0, w0 > cnt v0.8b, v0.8b > addv b0, v0.8b > fmov w0, s0 > ret > > Passes regress on AArch64, OK for commit? > > ChangeLog: > 2018-10-11 Wilco Dijkstra <wdijk...@arm.com> > > gcc/ > * config/aarch64/aarch64.md (zero_extendsidi2_aarch64): Add alternatives > to zero-extend between int and floating-point registers. > (load_pair_zero_extendsidi2_aarch64): Add alternative to emit > zero-extended > ldp into floating-point registers. Add type and arch attributes. > (zero_extend<SHORT:mode><GPI:mode>2_aarch64): Add arch attribute. > Use f_loads for type attribute. > > testsuite/ > * gcc.target/aarch64/popcnt.c: Test zero-extended popcount. > * gcc.target/aarch64/vec_zeroextend.c: Test zero-extended vectors.
OK. R. > > -- > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index > ef2368706e88a551b9d0d2db2385860112bdbdde..5e42485a4727079cc7647f4ded45dd175030b3fb > 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -1588,26 +1588,34 @@ (define_insn "*load_pair_extendsidi2_aarch64" > ) > > (define_insn "*zero_extendsidi2_aarch64" > - [(set (match_operand:DI 0 "register_operand" "=r,r") > - (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m")))] > + [(set (match_operand:DI 0 "register_operand" "=r,r,w,w,r,w") > + (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" > "r,m,r,m,w,w")))] > "" > "@ > uxtw\t%0, %w1 > - ldr\t%w0, %1" > - [(set_attr "type" "extend,load_4")] > + ldr\t%w0, %1 > + fmov\t%s0, %w1 > + ldr\t%s0, %1 > + fmov\t%w0, %s1 > + fmov\t%s0, %s1" > + [(set_attr "type" "extend,load_4,f_mcr,f_loads,f_mrc,fmov") > + (set_attr "arch" "*,*,fp,fp,fp,fp")] > ) > > (define_insn "*load_pair_zero_extendsidi2_aarch64" > - [(set (match_operand:DI 0 "register_operand" "=r") > - (zero_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" "Ump"))) > - (set (match_operand:DI 2 "register_operand" "=r") > - (zero_extend:DI (match_operand:SI 3 "memory_operand" "m")))] > + [(set (match_operand:DI 0 "register_operand" "=r,w") > + (zero_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" > "Ump,Ump"))) > + (set (match_operand:DI 2 "register_operand" "=r,w") > + (zero_extend:DI (match_operand:SI 3 "memory_operand" "m,m")))] > "rtx_equal_p (XEXP (operands[3], 0), > plus_constant (Pmode, > XEXP (operands[1], 0), > GET_MODE_SIZE (SImode)))" > - "ldp\\t%w0, %w2, %1" > - [(set_attr "type" "load_8")] > + "@ > + ldp\t%w0, %w2, %1 > + ldp\t%s0, %s2, %1" > + [(set_attr "type" "load_8,neon_load1_2reg") > + (set_attr "arch" "*,fp")] > ) > > (define_expand "<ANY_EXTEND:optab><SHORT:mode><GPI:mode>2" > @@ -1634,7 +1642,8 @@ (define_insn > "*zero_extend<SHORT:mode><GPI:mode>2_aarch64" > and\t%<GPI:w>0, %<GPI:w>1, <SHORT:short_mask> > ldr<SHORT:size>\t%w0, %1 > ldr\t%<SHORT:size>0, %1" > - [(set_attr "type" "logic_imm,load_4,load_4")] > + [(set_attr "type" "logic_imm,load_4,f_loads") > + (set_attr "arch" "*,*,fp")] > ) > > (define_expand "<optab>qihi2" > diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt.c > b/gcc/testsuite/gcc.target/aarch64/popcnt.c > index > 7e957966d8e81b8633a444bb42944d0da82ae5db..2b5e9f3e2c0245438ed7bcc5d0d4e01efe01b1ee > 100644 > --- a/gcc/testsuite/gcc.target/aarch64/popcnt.c > +++ b/gcc/testsuite/gcc.target/aarch64/popcnt.c > @@ -19,5 +19,16 @@ foo2 (long long x) > return __builtin_popcountll (x); > } > > -/* { dg-final { scan-assembler-not "popcount" } } */ > -/* { dg-final { scan-assembler-times "cnt\t" 3 } } */ > +int > +foo3 (int *p) > +{ > + return __builtin_popcount (*p); > +} > + > +/* { dg-final { scan-assembler-not {popcount} } } */ > +/* { dg-final { scan-assembler-times {cnt\t} 4 } } */ > +/* { dg-final { scan-assembler-times {fmov\ts} 1 {target lp64} } } */ > +/* { dg-final { scan-assembler-times {fmov\td} 2 {target lp64} } } */ > +/* { dg-final { scan-assembler-times {fmov\ts} 2 {target ilp32} } } */ > +/* { dg-final { scan-assembler-times {fmov\td} 1 {target ilp32} } } */ > +/* { dg-final { scan-assembler-times {ldr\ts} 1 } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/vec_zeroextend.c > b/gcc/testsuite/gcc.target/aarch64/vec_zeroextend.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..9c3971f036add8b1ee0204141d09db24720d05c6 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/vec_zeroextend.c > @@ -0,0 +1,21 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +#define vector __attribute__((vector_size(16) )) > + > +vector unsigned long long > +f1(vector unsigned long long b, vector unsigned int a) > +{ > + b[0] = a[0]; > + return b; > +} > + > +unsigned long long > +f2(vector unsigned int a) > +{ > + return a[0]; > +} > + > +/* { dg-final { scan-assembler-times {fmov} 2 } } */ > +/* { dg-final { scan-assembler-not {umov} } } */ > +/* { dg-final { scan-assembler-not {uxtw} } } */ > > >