Tamar Christina <tamar.christ...@arm.com> writes:
> Hi All,
>
> The backend has an existing V2HFmode that is used by pairwise operations.
> This mode was however never made fully functional.  Amongst other things it 
> was
> never declared as a vector type which made it unusable from the mid-end.
>
> It's also lacking an implementation for load/stores so reload ICEs if this 
> mode
> is every used.  This finishes the implementation by providing the above.
>
> Note that I have created a new iterator VHSDF_P instead of extending VHSDF
> because the previous iterator is used in far more things than just 
> load/stores.
>
> It's also used for instance in intrinsics and extending this would force me to
> provide support for mangling the type while we never expose it through
> intrinsics.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
>       * config/aarch64/aarch64-simd.md (*aarch64_simd_movv2hf): New.
>       (mov<mode>, movmisalign<mode>, aarch64_dup_lane<mode>,
>       aarch64_store_lane0<mode>, aarch64_simd_vec_set<mode>,
>       @aarch64_simd_vec_copy_lane<mode>, vec_set<mode>,
>       reduc_<optab>_scal_<mode>, reduc_<fmaxmin>_scal_<mode>,
>       aarch64_reduc_<optab>_internal<mode>, aarch64_get_lane<mode>,
>       vec_init<mode><Vel>, vec_extract<mode><Vel>): Support V2HF.
>       * config/aarch64/aarch64.cc (aarch64_classify_vector_mode):
>       Add E_V2HFmode.
>       * config/aarch64/iterators.md (VHSDF_P): New.
>       (V2F, VALL_F16_FULL, nunits, Vtype, Vmtype, Vetype, stype, VEL,
>       Vel, q, vp): Add V2HF.
>       * config/arm/types.md (neon_fp_reduc_add_h): New.
>
> gcc/testsuite/ChangeLog:
>
>       * gcc.target/aarch64/sve/slp_1.c: Update testcase.
>
> --- inline copy of patch -- 
> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index 
> 25aed74f8cf939562ed65a578fe32ca76605b58a..93a2888f567460ad10ec050ea7d4f701df4729d1
>  100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -19,10 +19,10 @@
>  ;; <http://www.gnu.org/licenses/>.
>  
>  (define_expand "mov<mode>"
> -  [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
> -     (match_operand:VALL_F16 1 "general_operand"))]
> +  [(set (match_operand:VALL_F16_FULL 0 "nonimmediate_operand")
> +     (match_operand:VALL_F16_FULL 1 "general_operand"))]
>    "TARGET_SIMD"
> -  "
> +{
>    /* Force the operand into a register if it is not an
>       immediate whose use can be replaced with xzr.
>       If the mode is 16 bytes wide, then we will be doing
> @@ -46,12 +46,11 @@ (define_expand "mov<mode>"
>        aarch64_expand_vector_init (operands[0], operands[1]);
>        DONE;
>      }
> -  "
> -)
> +})
>  
>  (define_expand "movmisalign<mode>"
> -  [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
> -        (match_operand:VALL_F16 1 "general_operand"))]
> +  [(set (match_operand:VALL_F16_FULL 0 "nonimmediate_operand")
> +        (match_operand:VALL_F16_FULL 1 "general_operand"))]
>    "TARGET_SIMD && !STRICT_ALIGNMENT"
>  {
>    /* This pattern is not permitted to fail during expansion: if both 
> arguments
> @@ -85,10 +84,10 @@ (define_insn "aarch64_simd_dup<mode>"
>  )
>  
>  (define_insn "aarch64_dup_lane<mode>"
> -  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
> -     (vec_duplicate:VALL_F16
> +  [(set (match_operand:VALL_F16_FULL 0 "register_operand" "=w")
> +     (vec_duplicate:VALL_F16_FULL
>         (vec_select:<VEL>
> -         (match_operand:VALL_F16 1 "register_operand" "w")
> +         (match_operand:VALL_F16_FULL 1 "register_operand" "w")
>           (parallel [(match_operand:SI 2 "immediate_operand" "i")])
>            )))]
>    "TARGET_SIMD"
> @@ -142,6 +141,29 @@ (define_insn "*aarch64_simd_mov<VDMOV:mode>"
>                    mov_reg, neon_move<q>")]
>  )
>  
> +(define_insn "*aarch64_simd_movv2hf"
> +  [(set (match_operand:V2HF 0 "nonimmediate_operand"
> +             "=w, m,  m,  w, ?r, ?w, ?r, w, w")
> +     (match_operand:V2HF 1 "general_operand"
> +             "m,  Dz, w,  w,  w,  r,  r, Dz, Dn"))]
> +  "TARGET_SIMD_F16INST
> +   && (register_operand (operands[0], V2HFmode)
> +       || aarch64_simd_reg_or_zero (operands[1], V2HFmode))"
> +   "@
> +    ldr\\t%s0, %1
> +    str\\twzr, %0
> +    str\\t%s1, %0
> +    mov\\t%0.2s[0], %1.2s[0]
> +    umov\\t%w0, %1.s[0]
> +    fmov\\t%s0, %1
> +    mov\\t%0, %1
> +    movi\\t%d0, 0
> +    * return aarch64_output_simd_mov_immediate (operands[1], 32);"
> +  [(set_attr "type" "neon_load1_1reg, store_8, neon_store1_1reg,\
> +                  neon_logic, neon_to_gp, f_mcr,\
> +                  mov_reg, neon_move, neon_move")]
> +)
> +
>  (define_insn "*aarch64_simd_mov<VQMOV:mode>"
>    [(set (match_operand:VQMOV 0 "nonimmediate_operand"
>               "=w, Umn,  m,  w, ?r, ?w, ?r, w")
> @@ -182,7 +204,7 @@ (define_insn "*aarch64_simd_mov<VQMOV:mode>"
>  
>  (define_insn "aarch64_store_lane0<mode>"
>    [(set (match_operand:<VEL> 0 "memory_operand" "=m")
> -     (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
> +     (vec_select:<VEL> (match_operand:VALL_F16_FULL 1 "register_operand" "w")
>                       (parallel [(match_operand 2 "const_int_operand" 
> "n")])))]
>    "TARGET_SIMD
>     && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
> @@ -1035,11 +1057,11 @@ (define_insn "one_cmpl<mode>2"
>  )
>  
>  (define_insn "aarch64_simd_vec_set<mode>"
> -  [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
> -     (vec_merge:VALL_F16
> -         (vec_duplicate:VALL_F16
> +  [(set (match_operand:VALL_F16_FULL 0 "register_operand" "=w,w,w")
> +     (vec_merge:VALL_F16_FULL
> +         (vec_duplicate:VALL_F16_FULL
>               (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand" 
> "w,?r,Utv"))
> -         (match_operand:VALL_F16 3 "register_operand" "0,0,0")
> +         (match_operand:VALL_F16_FULL 3 "register_operand" "0,0,0")
>           (match_operand:SI 2 "immediate_operand" "i,i,i")))]
>    "TARGET_SIMD"
>    {
> @@ -1061,14 +1083,14 @@ (define_insn "aarch64_simd_vec_set<mode>"
>  )
>  
>  (define_insn "@aarch64_simd_vec_copy_lane<mode>"
> -  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
> -     (vec_merge:VALL_F16
> -         (vec_duplicate:VALL_F16
> +  [(set (match_operand:VALL_F16_FULL 0 "register_operand" "=w")
> +     (vec_merge:VALL_F16_FULL
> +         (vec_duplicate:VALL_F16_FULL
>             (vec_select:<VEL>
> -             (match_operand:VALL_F16 3 "register_operand" "w")
> +             (match_operand:VALL_F16_FULL 3 "register_operand" "w")
>               (parallel
>                 [(match_operand:SI 4 "immediate_operand" "i")])))
> -         (match_operand:VALL_F16 1 "register_operand" "0")
> +         (match_operand:VALL_F16_FULL 1 "register_operand" "0")
>           (match_operand:SI 2 "immediate_operand" "i")))]
>    "TARGET_SIMD"
>    {
> @@ -1376,7 +1398,7 @@ (define_insn "vec_shr_<mode>"
>  )
>  
>  (define_expand "vec_set<mode>"
> -  [(match_operand:VALL_F16 0 "register_operand")
> +  [(match_operand:VALL_F16_FULL 0 "register_operand")
>     (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand")
>     (match_operand:SI 2 "immediate_operand")]
>    "TARGET_SIMD"
> @@ -3503,7 +3525,7 @@ (define_insn "popcount<mode>2"
>  ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP 
> smax/smin).
>  (define_expand "reduc_<optab>_scal_<mode>"
>    [(match_operand:<VEL> 0 "register_operand")
> -   (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
> +   (unspec:<VEL> [(match_operand:VHSDF_P 1 "register_operand")]
>                FMAXMINV)]
>    "TARGET_SIMD"
>    {
> @@ -3518,7 +3540,7 @@ (define_expand "reduc_<optab>_scal_<mode>"
>  
>  (define_expand "reduc_<fmaxmin>_scal_<mode>"
>    [(match_operand:<VEL> 0 "register_operand")
> -   (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
> +   (unspec:<VEL> [(match_operand:VHSDF_P 1 "register_operand")]
>                FMAXMINNMV)]
>    "TARGET_SIMD"
>    {
> @@ -3562,8 +3584,8 @@ (define_insn "aarch64_reduc_<optab>_internalv2si"
>  )
>  
>  (define_insn "aarch64_reduc_<optab>_internal<mode>"
> - [(set (match_operand:VHSDF 0 "register_operand" "=w")
> -       (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
> + [(set (match_operand:VHSDF_P 0 "register_operand" "=w")
> +       (unspec:VHSDF_P [(match_operand:VHSDF_P 1 "register_operand" "w")]
>                     FMAXMINV))]
>   "TARGET_SIMD"
>   "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
> @@ -4208,7 +4230,7 @@ (define_insn 
> "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
>  (define_insn_and_split "aarch64_get_lane<mode>"
>    [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, 
> Utv")
>       (vec_select:<VEL>
> -       (match_operand:VALL_F16 1 "register_operand" "w, w, w")
> +       (match_operand:VALL_F16_FULL 1 "register_operand" "w, w, w")
>         (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
>    "TARGET_SIMD"
>    {
> @@ -7989,7 +8011,7 @@ (define_expand "aarch64_st1<VALL_F16:mode>"
>  ;; Standard pattern name vec_init<mode><Vel>.
>  
>  (define_expand "vec_init<mode><Vel>"
> -  [(match_operand:VALL_F16 0 "register_operand")
> +  [(match_operand:VALL_F16_FULL 0 "register_operand")
>     (match_operand 1 "" "")]
>    "TARGET_SIMD"
>  {
> @@ -8068,7 +8090,7 @@ (define_insn "aarch64_urecpe<mode>"
>  
>  (define_expand "vec_extract<mode><Vel>"
>    [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
> -   (match_operand:VALL_F16 1 "register_operand")
> +   (match_operand:VALL_F16_FULL 1 "register_operand")
>     (match_operand:SI 2 "immediate_operand")]
>    "TARGET_SIMD"
>  {
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 
> f05bac713e88ea8c7feaa2367d55bd523ca66f57..1e08f8453688210afe1566092b19b59c9bdd0c97
>  100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -3566,6 +3566,7 @@ aarch64_classify_vector_mode (machine_mode mode)
>      case E_V8BFmode:
>      case E_V4SFmode:
>      case E_V2DFmode:
> +    case E_V2HFmode:
>        return TARGET_SIMD ? VEC_ADVSIMD : 0;
>  
>      default:
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index 
> 37d8161a33b1c399d80be82afa67613a087389d4..1df09f7fe2eb35aed96113476541e0faa5393551
>  100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -160,6 +160,10 @@ (define_mode_iterator VDQF [V2SF V4SF V2DF])
>  (define_mode_iterator VHSDF [(V4HF "TARGET_SIMD_F16INST")
>                            (V8HF "TARGET_SIMD_F16INST")
>                            V2SF V4SF V2DF])
> +;; Advanced SIMD Float modes suitable for pairwise operations.
> +(define_mode_iterator VHSDF_P [(V4HF "TARGET_SIMD_F16INST")
> +                            (V8HF "TARGET_SIMD_F16INST")
> +                            V2SF V4SF V2DF (V2HF "TARGET_SIMD_F16INST")])
>  
>  ;; Advanced SIMD Float modes, and DF.
>  (define_mode_iterator VDQF_DF [V2SF V4SF V2DF DF])
> @@ -188,15 +192,23 @@ (define_mode_iterator VDQF_COND [V2SF V2SI V4SF V4SI 
> V2DF V2DI])
>  (define_mode_iterator VALLF [V2SF V4SF V2DF SF DF])
>  
>  ;; Advanced SIMD Float modes with 2 elements.
> -(define_mode_iterator V2F [V2SF V2DF])
> +(define_mode_iterator V2F [V2SF V2DF V2HF])
>  
>  ;; All Advanced SIMD modes on which we support any arithmetic operations.
>  (define_mode_iterator VALL [V8QI V16QI V4HI V8HI V2SI V4SI V2DI V2SF V4SF 
> V2DF])
>  
> -;; All Advanced SIMD modes suitable for moving, loading, and storing.
> +;; All Advanced SIMD modes suitable for moving, loading, and storing
> +;; except V2HF.
>  (define_mode_iterator VALL_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
>                               V4HF V8HF V4BF V8BF V2SF V4SF V2DF])
>  
> +;; All Advanced SIMD modes suitable for moving, loading, and storing
> +;; including V2HF
> +(define_mode_iterator VALL_F16_FULL [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
> +                                  V4HF V8HF V4BF V8BF V2SF V4SF V2DF
> +                                  (V2HF "TARGET_SIMD_F16INST")])

This name might cause confusion with the SVE iterators, where FULL
means "every bit of the register is used".  How about something like
VMOVE instead?

With this change, I guess VALL_F16 represents "The set of all modes
for which the vld1 intrinsics are provided" and VMOVE or whatever
is "All Advanced SIMD modes suitable for moving, loading, and storing".
That is, VMOVE extends VALL_F16 with modes that are not manifested
via intrinsics.

> +
> +
>  ;; The VALL_F16 modes except the 128-bit 2-element ones.
>  (define_mode_iterator VALL_F16_NO_V2Q [V8QI V16QI V4HI V8HI V2SI V4SI
>                               V4HF V8HF V2SF V4SF])
> @@ -1076,7 +1088,7 @@ (define_mode_attr nunits [(V8QI "8") (V16QI "16")
>                         (V2SF "2") (V4SF "4")
>                         (V1DF "1") (V2DF "2")
>                         (DI "1") (DF "1")
> -                       (V8DI "8")])
> +                       (V8DI "8") (V2HF "2")])
>  
>  ;; Map a mode to the number of bits in it, if the size of the mode
>  ;; is constant.
> @@ -1090,6 +1102,7 @@ (define_mode_attr s [(HF "h") (SF "s") (DF "d") (SI 
> "s") (DI "d")])
>  
>  ;; Give the length suffix letter for a sign- or zero-extension.
>  (define_mode_attr size [(QI "b") (HI "h") (SI "w")])
> +(define_mode_attr sizel [(QI "b") (HI "h") (SI "")])
>  
>  ;; Give the number of bits in the mode
>  (define_mode_attr sizen [(QI "8") (HI "16") (SI "32") (DI "64")])
> @@ -1134,8 +1147,9 @@ (define_mode_attr Vtype [(V8QI "8b") (V16QI "16b")
>                           (V2SI "2s") (V4SI  "4s")
>                           (DI   "1d") (DF    "1d")
>                           (V2DI "2d") (V2SF "2s")
> -                      (V4SF "4s") (V2DF "2d")
> -                      (V4HF "4h") (V8HF "8h")
> +                      (V2HF "2h") (V4SF "4s")
> +                      (V2DF "2d") (V4HF "4h")
> +                      (V8HF "8h")
>                        (V2x8QI "8b") (V2x4HI "4h")
>                        (V2x2SI "2s") (V2x1DI  "1d")
>                        (V2x4HF "4h") (V2x2SF "2s")

Where is the 2h used, and is it valid syntax in that context?

Same for later instances of 2h.

Thanks,
Richard

> @@ -1175,9 +1189,10 @@ (define_mode_attr Vmtype [(V8QI ".8b") (V16QI ".16b")
>                        (V4HI ".4h") (V8HI  ".8h")
>                        (V2SI ".2s") (V4SI  ".4s")
>                        (V2DI ".2d") (V4HF ".4h")
> -                      (V8HF ".8h") (V4BF ".4h")
> -                      (V8BF ".8h") (V2SF ".2s")
> -                      (V4SF ".4s") (V2DF ".2d")
> +                      (V8HF ".8h") (V2HF ".2h")
> +                      (V4BF ".4h") (V8BF ".8h")
> +                      (V2SF ".2s") (V4SF ".4s")
> +                      (V2DF ".2d")
>                        (DI   "")    (SI   "")
>                        (HI   "")    (QI   "")
>                        (TI   "")    (HF   "")
> @@ -1193,7 +1208,7 @@ (define_mode_attr Vmntype [(V8HI ".8b") (V4SI ".4h")
>  (define_mode_attr Vetype [(V8QI "b") (V16QI "b")
>                         (V4HI "h") (V8HI  "h")
>                         (V2SI "s") (V4SI  "s")
> -                       (V2DI "d")
> +                       (V2DI "d") (V2HF  "h")
>                         (V4HF "h") (V8HF  "h")
>                         (V2SF "s") (V4SF  "s")
>                         (V2DF "d")
> @@ -1285,7 +1300,7 @@ (define_mode_attr Vcwtype [(VNx16QI "b") (VNx8QI "h") 
> (VNx4QI "w") (VNx2QI "d")
>  ;; more accurately.
>  (define_mode_attr stype [(V8QI "b") (V16QI "b") (V4HI "s") (V8HI "s")
>                        (V2SI "s") (V4SI "s") (V2DI "d") (V4HF "s")
> -                      (V8HF "s") (V2SF "s") (V4SF "s") (V2DF "d")
> +                      (V8HF "s") (V2SF "s") (V4SF "s") (V2DF "d") (V2HF "s")
>                        (HF "s") (SF "s") (DF "d") (QI "b") (HI "s")
>                        (SI "s") (DI "d")])
>  
> @@ -1360,8 +1375,8 @@ (define_mode_attr VEL [(V8QI  "QI") (V16QI "QI")
>                      (V4HF "HF") (V8HF  "HF")
>                      (V2SF "SF") (V4SF  "SF")
>                      (DF   "DF") (V2DF  "DF")
> -                    (SI   "SI") (HI    "HI")
> -                    (QI   "QI")
> +                    (SI   "SI") (V2HF  "HF")
> +                    (QI   "QI") (HI    "HI")
>                      (V4BF "BF") (V8BF "BF")
>                      (VNx16QI "QI") (VNx8QI "QI") (VNx4QI "QI") (VNx2QI "QI")
>                      (VNx8HI "HI") (VNx4HI "HI") (VNx2HI "HI")
> @@ -1381,7 +1396,7 @@ (define_mode_attr Vel [(V8QI "qi") (V16QI "qi")
>                      (V2SF "sf") (V4SF "sf")
>                      (V2DF "df") (DF   "df")
>                      (SI   "si") (HI   "hi")
> -                    (QI   "qi")
> +                    (QI   "qi") (V2HF "hf")
>                      (V4BF "bf") (V8BF "bf")
>                      (VNx16QI "qi") (VNx8QI "qi") (VNx4QI "qi") (VNx2QI "qi")
>                      (VNx8HI "hi") (VNx4HI "hi") (VNx2HI "hi")
> @@ -1866,7 +1881,7 @@ (define_mode_attr q [(V8QI "") (V16QI "_q")
>                    (V4HF "") (V8HF "_q")
>                    (V4BF "") (V8BF "_q")
>                    (V2SF "") (V4SF  "_q")
> -                            (V2DF  "_q")
> +                  (V2HF "") (V2DF  "_q")
>                    (QI "") (HI "") (SI "") (DI "") (HF "") (SF "") (DF "")
>                    (V2x8QI "") (V2x16QI "_q")
>                    (V2x4HI "") (V2x8HI "_q")
> @@ -1905,6 +1920,7 @@ (define_mode_attr vp [(V8QI "v") (V16QI "v")
>                     (V2SI "p") (V4SI  "v")
>                     (V2DI "p") (V2DF  "p")
>                     (V2SF "p") (V4SF  "v")
> +                   (V2HF "p")
>                     (V4HF "v") (V8HF  "v")])
>  
>  (define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")
> diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md
> index 
> 7d0504bdd944e9c0d1b545b0b66a9a1adc808714..3cfbc7a93cca1bea4925853e51d0a147c5722247
>  100644
> --- a/gcc/config/arm/types.md
> +++ b/gcc/config/arm/types.md
> @@ -483,6 +483,7 @@ (define_attr "autodetect_type"
>  ; neon_fp_minmax_s_q
>  ; neon_fp_minmax_d
>  ; neon_fp_minmax_d_q
> +; neon_fp_reduc_add_h
>  ; neon_fp_reduc_add_s
>  ; neon_fp_reduc_add_s_q
>  ; neon_fp_reduc_add_d
> @@ -1033,6 +1034,7 @@ (define_attr "type"
>    neon_fp_minmax_d,\
>    neon_fp_minmax_d_q,\
>  \
> +  neon_fp_reduc_add_h,\
>    neon_fp_reduc_add_s,\
>    neon_fp_reduc_add_s_q,\
>    neon_fp_reduc_add_d,\
> @@ -1257,8 +1259,8 @@ (define_attr "is_neon_type" "yes,no"
>            neon_fp_compare_d, neon_fp_compare_d_q, neon_fp_minmax_s,\
>            neon_fp_minmax_s_q, neon_fp_minmax_d, neon_fp_minmax_d_q,\
>            neon_fp_neg_s, neon_fp_neg_s_q, neon_fp_neg_d, neon_fp_neg_d_q,\
> -          neon_fp_reduc_add_s, neon_fp_reduc_add_s_q, neon_fp_reduc_add_d,\
> -          neon_fp_reduc_add_d_q, neon_fp_reduc_minmax_s,
> +          neon_fp_reduc_add_h, neon_fp_reduc_add_s, neon_fp_reduc_add_s_q,\
> +          neon_fp_reduc_add_d, neon_fp_reduc_add_d_q, 
> neon_fp_reduc_minmax_s,\
>            neon_fp_reduc_minmax_s_q, neon_fp_reduc_minmax_d,\
>            neon_fp_reduc_minmax_d_q,\
>            neon_fp_cvt_narrow_s_q, neon_fp_cvt_narrow_d_q,\
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c
> index 
> 07d71a63414b1066ea431e287286ad048515711a..8e35e0b574d49913b43c7d8d4f4ba75f127f42e9
>  100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c
> @@ -30,11 +30,9 @@ vec_slp_##TYPE (TYPE *restrict a, TYPE b, TYPE c, int n)   
> \
>  TEST_ALL (VEC_PERM)
>  
>  /* We should use one DUP for each of the 8-, 16- and 32-bit types,
> -   although we currently use LD1RW for _Float16.  We should use two
> -   DUPs for each of the three 64-bit types.  */
> +   We should use two DUPs for each of the three 64-bit types.  */
>  /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, [hw]} 2 } } */
> -/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, [sw]} 2 } } */
> -/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 1 } } */
> +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, [sw]} 3 } } */
>  /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, [dx]} 9 } } */
>  /* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, 
> z[0-9]+\.d\n} 3 } } */
>  /* { dg-final { scan-assembler-not {\tzip2\t} } } */

Reply via email to