Ping
Richard Sandiford <richard.sandif...@arm.com> writes: > Kyrylo Tkachov <kyrylo.tkac...@arm.com> writes: >> This looks like a productive way forward to me. >> Okay if the other maintainer don't object by the end of the week. > > Thanks. Dennis pointed out off-list that it regressed > armv8_2-fp16-arith-2.c, which was expecting FP16 vectorisation > to be rejected for -fno-fast-math. As mentioned above, that shouldn't > be necessary given that FP16 arithmetic (unlike FP32 arithmetic) has a > flush-to-zero control. > > This version therefore updates the test to expect the same output > as the -ffast-math version. > > Tested on arm-linux-gnueabi (hopefully for real this time -- I must > have messed up the testing last time). OK for trunk? > > FWIW, the non-testsuite part is the same as before. > > Richard > > > gcc/ > * config/arm/arm.h (ARM_HAVE_NEON_V8QI_ARITH, ARM_HAVE_NEON_V4HI_ARITH) > (ARM_HAVE_NEON_V2SI_ARITH, ARM_HAVE_NEON_V16QI_ARITH): New macros. > (ARM_HAVE_NEON_V8HI_ARITH, ARM_HAVE_NEON_V4SI_ARITH): Likewise. > (ARM_HAVE_NEON_V2DI_ARITH, ARM_HAVE_NEON_V4HF_ARITH): Likewise. > (ARM_HAVE_NEON_V8HF_ARITH, ARM_HAVE_NEON_V2SF_ARITH): Likewise. > (ARM_HAVE_NEON_V4SF_ARITH, ARM_HAVE_V8QI_ARITH, ARM_HAVE_V4HI_ARITH) > (ARM_HAVE_V2SI_ARITH, ARM_HAVE_V16QI_ARITH, ARM_HAVE_V8HI_ARITH) > (ARM_HAVE_V4SI_ARITH, ARM_HAVE_V2DI_ARITH, ARM_HAVE_V4HF_ARITH) > (ARM_HAVE_V2SF_ARITH, ARM_HAVE_V8HF_ARITH, ARM_HAVE_V4SF_ARITH): > Likewise. > * config/arm/iterators.md (VNIM, VNINOTM): Delete. > * config/arm/vec-common.md (add<VNIM:mode>3, addv8hf3) > (add<VNINOTM:mode>3): Replace with... > (add<VDQ:mode>3): ...this new expander. > * config/arm/neon.md (*add<VDQ:mode>3_neon): Use the new > ARM_HAVE_NEON_<MODE>_ARITH macros as the C condition. > (addv8hf3_neon, addv4hf3, add<VFH:mode>3_fp16): Delete in > favor of the above. > (neon_vadd<VH:mode>): Use gen_add<mode>3 instead of > gen_add<mode>3_fp16. > > gcc/testsuite/ > * gcc.target/arm/armv8_2-fp16-arith-2.c: Expect FP16 vectorization > even without -ffast-math. > --- > gcc/config/arm/arm.h | 41 ++++++++++++++++ > gcc/config/arm/iterators.md | 8 ---- > gcc/config/arm/neon.md | 47 +------------------ > gcc/config/arm/vec-common.md | 42 ++--------------- > .../gcc.target/arm/armv8_2-fp16-arith-2.c | 20 +++++--- > 5 files changed, 61 insertions(+), 97 deletions(-) > > diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h > index f4d3676c5bc..4a63d33c70d 100644 > --- a/gcc/config/arm/arm.h > +++ b/gcc/config/arm/arm.h > @@ -1110,6 +1110,47 @@ extern const int arm_arch_cde_coproc_bits[]; > #define VALID_MVE_STRUCT_MODE(MODE) \ > ((MODE) == TImode || (MODE) == OImode || (MODE) == XImode) > > +/* The conditions under which vector modes are supported for general > + arithmetic using Neon. */ > + > +#define ARM_HAVE_NEON_V8QI_ARITH TARGET_NEON > +#define ARM_HAVE_NEON_V4HI_ARITH TARGET_NEON > +#define ARM_HAVE_NEON_V2SI_ARITH TARGET_NEON > + > +#define ARM_HAVE_NEON_V16QI_ARITH TARGET_NEON > +#define ARM_HAVE_NEON_V8HI_ARITH TARGET_NEON > +#define ARM_HAVE_NEON_V4SI_ARITH TARGET_NEON > +#define ARM_HAVE_NEON_V2DI_ARITH TARGET_NEON > + > +/* HF operations have their own flush-to-zero control (FPSCR.FZ16). */ > +#define ARM_HAVE_NEON_V4HF_ARITH TARGET_NEON_FP16INST > +#define ARM_HAVE_NEON_V8HF_ARITH TARGET_NEON_FP16INST > + > +/* SF operations always flush to zero, regardless of FPSCR.FZ, so we can > + only use them for general arithmetic when -funsafe-math-optimizations > + is in effect. */ > +#define ARM_HAVE_NEON_V2SF_ARITH \ > + (TARGET_NEON && flag_unsafe_math_optimizations) > +#define ARM_HAVE_NEON_V4SF_ARITH ARM_HAVE_NEON_V2SF_ARITH > + > +/* The conditions under which vector modes are supported for general > + arithmetic by any vector extension. */ > + > +#define ARM_HAVE_V8QI_ARITH (ARM_HAVE_NEON_V8QI_ARITH || > TARGET_REALLY_IWMMXT) > +#define ARM_HAVE_V4HI_ARITH (ARM_HAVE_NEON_V4HI_ARITH || > TARGET_REALLY_IWMMXT) > +#define ARM_HAVE_V2SI_ARITH (ARM_HAVE_NEON_V2SI_ARITH || > TARGET_REALLY_IWMMXT) > + > +#define ARM_HAVE_V16QI_ARITH (ARM_HAVE_NEON_V16QI_ARITH || TARGET_HAVE_MVE) > +#define ARM_HAVE_V8HI_ARITH (ARM_HAVE_NEON_V8HI_ARITH || TARGET_HAVE_MVE) > +#define ARM_HAVE_V4SI_ARITH (ARM_HAVE_NEON_V4SI_ARITH || TARGET_HAVE_MVE) > +#define ARM_HAVE_V2DI_ARITH ARM_HAVE_NEON_V2DI_ARITH > + > +#define ARM_HAVE_V4HF_ARITH ARM_HAVE_NEON_V4HF_ARITH > +#define ARM_HAVE_V2SF_ARITH ARM_HAVE_NEON_V2SF_ARITH > + > +#define ARM_HAVE_V8HF_ARITH (ARM_HAVE_NEON_V8HF_ARITH || > TARGET_HAVE_MVE_FLOAT) > +#define ARM_HAVE_V4SF_ARITH (ARM_HAVE_NEON_V4SF_ARITH || > TARGET_HAVE_MVE_FLOAT) > + > /* The register numbers in sequence, for passing to arm_gen_load_multiple. > */ > extern int arm_regs_in_sequence[]; > > diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md > index 0bc9eba0722..c70e3bc2731 100644 > --- a/gcc/config/arm/iterators.md > +++ b/gcc/config/arm/iterators.md > @@ -66,14 +66,6 @@ (define_mode_iterator VSHFT [V4HI V2SI DI]) > ;; Integer and float modes supported by Neon and IWMMXT. > (define_mode_iterator VALL [V2DI V2SI V4HI V8QI V2SF V4SI V8HI V16QI V4SF]) > > -;; Integer and float modes supported by Neon, IWMMXT and MVE, used by > -;; arithmetic epxand patterns. > -(define_mode_iterator VNIM [V16QI V8HI V4SI V4SF]) > - > -;; Integer and float modes supported by Neon and IWMMXT but not MVE, used by > -;; arithmetic epxand patterns. > -(define_mode_iterator VNINOTM [V2SI V4HI V8QI V2SF V2DI]) > - > ;; Integer and float modes supported by Neon, IWMMXT and MVE. > (define_mode_iterator VNIM1 [V16QI V8HI V4SI V4SF V2DI]) > > diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md > index 3e7b51d8ab6..96bf277f501 100644 > --- a/gcc/config/arm/neon.md > +++ b/gcc/config/arm/neon.md > @@ -501,7 +501,7 @@ (define_insn "*add<mode>3_neon" > [(set (match_operand:VDQ 0 "s_register_operand" "=w") > (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") > (match_operand:VDQ 2 "s_register_operand" "w")))] > - "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" > + "ARM_HAVE_NEON_<MODE>_ARITH" > "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" > [(set (attr "type") > (if_then_else (match_test "<Is_float_mode>") > @@ -509,49 +509,6 @@ (define_insn "*add<mode>3_neon" > (const_string "neon_add<q>")))] > ) > > -;; As with SFmode, full support for HFmode vector arithmetic is only > available > -;; when flag-unsafe-math-optimizations is enabled. > - > -;; Add pattern with modes V8HF and V4HF is split into separate patterns to > add > -;; support for standard pattern addv8hf3 in MVE. Following pattern is called > -;; from "addv8hf3" standard pattern inside vec-common.md file. > - > -(define_insn "addv8hf3_neon" > - [(set > - (match_operand:V8HF 0 "s_register_operand" "=w") > - (plus:V8HF > - (match_operand:V8HF 1 "s_register_operand" "w") > - (match_operand:V8HF 2 "s_register_operand" "w")))] > - "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" > - "vadd.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2" > - [(set_attr "type" "neon_fp_addsub_s_q")] > -) > - > -(define_insn "addv4hf3" > - [(set > - (match_operand:V4HF 0 "s_register_operand" "=w") > - (plus:V4HF > - (match_operand:V4HF 1 "s_register_operand" "w") > - (match_operand:V4HF 2 "s_register_operand" "w")))] > - "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" > - "vadd.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2" > - [(set_attr "type" "neon_fp_addsub_s_q")] > -) > - > -(define_insn "add<mode>3_fp16" > - [(set > - (match_operand:VH 0 "s_register_operand" "=w") > - (plus:VH > - (match_operand:VH 1 "s_register_operand" "w") > - (match_operand:VH 2 "s_register_operand" "w")))] > - "TARGET_NEON_FP16INST" > - "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" > - [(set (attr "type") > - (if_then_else (match_test "<Is_float_mode>") > - (const_string "neon_fp_addsub_s<q>") > - (const_string "neon_add<q>")))] > -) > - > (define_insn "*sub<mode>3_neon" > [(set (match_operand:VDQ 0 "s_register_operand" "=w") > (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") > @@ -1837,7 +1794,7 @@ (define_expand "neon_vadd<mode>" > (match_operand:VH 2 "s_register_operand")] > "TARGET_NEON_FP16INST" > { > - emit_insn (gen_add<mode>3_fp16 (operands[0], operands[1], operands[2])); > + emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2])); > DONE; > }) > > diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md > index b7e3619caf4..c3c86c46355 100644 > --- a/gcc/config/arm/vec-common.md > +++ b/gcc/config/arm/vec-common.md > @@ -81,43 +81,11 @@ (define_expand "movv8hf" > ;; patterns separately for Neon, IWMMXT and MVE. > > (define_expand "add<mode>3" > - [(set (match_operand:VNIM 0 "s_register_operand") > - (plus:VNIM (match_operand:VNIM 1 "s_register_operand") > - (match_operand:VNIM 2 "s_register_operand")))] > - "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode) > - || flag_unsafe_math_optimizations)) > - || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode)) > - || (TARGET_HAVE_MVE && VALID_MVE_SI_MODE(<MODE>mode)) > - || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE(<MODE>mode))" > -{ > -}) > - > -;; Vector arithmetic. Expanders are blank, then unnamed insns implement > -;; patterns separately for Neon and MVE. > - > -(define_expand "addv8hf3" > - [(set (match_operand:V8HF 0 "s_register_operand") > - (plus:V8HF (match_operand:V8HF 1 "s_register_operand") > - (match_operand:V8HF 2 "s_register_operand")))] > - "(TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE(V8HFmode)) > - || (TARGET_NEON_FP16INST && flag_unsafe_math_optimizations)" > -{ > - if (TARGET_NEON_FP16INST && flag_unsafe_math_optimizations) > - emit_insn (gen_addv8hf3_neon (operands[0], operands[1], operands[2])); > -}) > - > -;; Vector arithmetic. Expanders are blank, then unnamed insns implement > -;; patterns separately for Neon and IWMMXT. > - > -(define_expand "add<mode>3" > - [(set (match_operand:VNINOTM 0 "s_register_operand") > - (plus:VNINOTM (match_operand:VNINOTM 1 "s_register_operand") > - (match_operand:VNINOTM 2 "s_register_operand")))] > - "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode) > - || flag_unsafe_math_optimizations)) > - || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))" > -{ > -}) > + [(set (match_operand:VDQ 0 "s_register_operand") > + (plus:VDQ (match_operand:VDQ 1 "s_register_operand") > + (match_operand:VDQ 2 "s_register_operand")))] > + "ARM_HAVE_<MODE>_ARITH" > +) > > ;; Vector arithmetic. Expanders are blank, then unnamed insns implement > ;; patterns separately for IWMMXT and Neon. > diff --git a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-2.c > b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-2.c > index 24d0528540d..81bad225a1f 100644 > --- a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-2.c > +++ b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-2.c > @@ -89,17 +89,23 @@ TEST_CMP (greaterthanqual, >=, int16x8_t, float16x8_t) > /* { dg-final { scan-assembler-times {vneg\.f16\ts[0-9]+, s[0-9]+} 1 } } */ > /* { dg-final { scan-assembler-times {vneg\.f16\td[0-9]+, d[0-9]+} 1 } } */ > /* { dg-final { scan-assembler-times {vneg\.f16\tq[0-9]+, q[0-9]+} 1 } } */ > +/* { dg-final { scan-assembler-times {vabs\.f16\ts[0-9]+, s[0-9]+} 2 } } */ > + > +/* { dg-final { scan-assembler-times {vadd\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} > 1 } } */ > +/* { dg-final { scan-assembler-times {vadd\.f16\td[0-9]+, d[0-9]+, d[0-9]+} > 1 } } */ > +/* { dg-final { scan-assembler-times {vadd\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} > 1 } } */ > + > +/* { dg-final { scan-assembler-times {vsub\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} > 1 } } */ > +/* { dg-final { scan-assembler-times {vsub\.f16\td[0-9]+, d[0-9]+, d[0-9]+} > 1 } } */ > +/* { dg-final { scan-assembler-times {vsub\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} > 1 } } */ > + > +/* { dg-final { scan-assembler-times {vmul\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} > 1 } } */ > +/* { dg-final { scan-assembler-times {vmul\.f16\td[0-9]+, d[0-9]+, d[0-9]+} > 1 } } */ > +/* { dg-final { scan-assembler-times {vmul\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} > 1 } } */ > > -/* { dg-final { scan-assembler-times {vadd\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} > 13 } } */ > -/* { dg-final { scan-assembler-times {vsub\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} > 13 } } */ > -/* { dg-final { scan-assembler-times {vmul\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} > 13 } } */ > /* { dg-final { scan-assembler-times {vdiv\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} > 13 } } */ > /* { dg-final { scan-assembler-times {vcmp\.f32\ts[0-9]+, s[0-9]+} 26 } } */ > - > /* { dg-final { scan-assembler-times {vcmpe\.f32\ts[0-9]+, s[0-9]+} 52 } } > */ > -/* { dg-final { scan-assembler-times {vcmpe\.f32\ts[0-9]+, #0} 2 } } */ > - > -/* { dg-final { scan-assembler-not {vabs\.f16} } } */ > > /* { dg-final { scan-assembler-not {vadd\.f32} } } */ > /* { dg-final { scan-assembler-not {vsub\.f32} } } */