Tamar Christina <tamar.christ...@arm.com> writes:
> gcc/ChangeLog:
>
>       PR target/96342
>       * config/aarch64/aarch64-sve.md (vec_init<mode><Vhalf>): New.
>       (@aarch64_pack_partial<mode>): New.
>       * config/aarch64/aarch64.cc (aarch64_sve_expand_vector_init_subvector): 
> New.
>       * config/aarch64/iterators.md (SVE_NO2E): New.
>       (VHALF, Vhalf): Add SVE partial vectors.
>
> gcc/testsuite/ChangeLog:
>
>       PR target/96342
>       * gcc.target/aarch64/vect-simd-clone-2.c: New test.

OK, thanks.

Richard

> Bootstrapped Regtested on aarch64-none-linux-gnu  and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> -- inline copy of patch --
>
> diff --git a/gcc/config/aarch64/aarch64-sve.md 
> b/gcc/config/aarch64/aarch64-sve.md
> index 
> a72ca2a500d394598268c6adfe717eed94a304b3..8ed4221dbe5c49db97b37f186365fa391900eadb
>  100644
> --- a/gcc/config/aarch64/aarch64-sve.md
> +++ b/gcc/config/aarch64/aarch64-sve.md
> @@ -2839,6 +2839,16 @@ (define_expand "vec_init<mode><Vel>"
>    }
>  )
>  
> +(define_expand "vec_init<mode><Vhalf>"
> +  [(match_operand:SVE_NO2E 0 "register_operand")
> +   (match_operand 1 "")]
> +  "TARGET_SVE"
> +  {
> +    aarch64_sve_expand_vector_init (operands[0], operands[1]);
> +    DONE;
> +  }
> +)
> +
>  ;; Shift an SVE vector left and insert a scalar into element 0.
>  (define_insn "vec_shl_insert_<mode>"
>    [(set (match_operand:SVE_FULL 0 "register_operand")
> @@ -9289,6 +9299,19 @@ (define_insn "vec_pack_trunc_<Vwide>"
>    "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
>  )
>  
> +;; Integer partial pack packing two partial SVE types into a single full SVE
> +;; type of the same element type.  Use UZP1 on the wider type, which discards
> +;; the high part of each wide element.  This allows to concat SVE partial 
> types
> +;; into a wider vector.
> +(define_insn "@aarch64_pack_partial<mode>"
> +  [(set (match_operand:SVE_NO2E 0 "register_operand" "=w")
> +     (vec_concat:SVE_NO2E
> +       (match_operand:<VHALF> 1 "register_operand" "w")
> +       (match_operand:<VHALF> 2 "register_operand" "w")))]
> +  "TARGET_SVE"
> +  "uzp1\t%0.<Vctype>, %1.<Vctype>, %2.<Vctype>"
> +)
> +
>  ;; -------------------------------------------------------------------------
>  ;; ---- [INT<-INT] Unpacks
>  ;; -------------------------------------------------------------------------
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 
> de4c0a0783912b54ac35d7c818c24574b27a4ca0..40214e318f3c4e30e619d96073b253887c973efc
>  100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -24859,6 +24859,17 @@ aarch64_sve_expand_vector_init (rtx target, rtx vals)
>      v.quick_push (XVECEXP (vals, 0, i));
>    v.finalize ();
>  
> +  /* If we have two elements and are concatting vector.  */
> +  machine_mode elem_mode = GET_MODE (v.elt (0));
> +  if (nelts == 2 && VECTOR_MODE_P (elem_mode))
> +    {
> +      /* We've failed expansion using a dup.  Try using a cheeky truncate. */
> +      rtx arg0 = force_reg (elem_mode, v.elt(0));
> +      rtx arg1 = force_reg (elem_mode, v.elt(1));
> +      emit_insn (gen_aarch64_pack_partial (mode, target, arg0, arg1));
> +      return;
> +    }
> +
>    /* If neither sub-vectors of v could be initialized specially,
>       then use INSR to insert all elements from v into TARGET.
>       ??? This might not be optimal for vectors with large
> @@ -24870,6 +24881,30 @@ aarch64_sve_expand_vector_init (rtx target, rtx vals)
>      aarch64_sve_expand_vector_init_insert_elems (target, v, nelts);
>  }
>  
> +/* Initialize register TARGET from the two vector subelements in PARALLEL
> +   rtx VALS.  */
> +
> +void
> +aarch64_sve_expand_vector_init_subvector (rtx target, rtx vals)
> +{
> +  machine_mode mode = GET_MODE (target);
> +  int nelts = XVECLEN (vals, 0);
> +
> +  gcc_assert (nelts == 2);
> +
> +  rtx arg0 = XVECEXP (vals, 0, 0);
> +  rtx arg1 = XVECEXP (vals, 0, 1);
> +
> +  /* If we have two elements and are concatting vector.  */
> +  machine_mode elem_mode = GET_MODE (arg0);
> +  gcc_assert (VECTOR_MODE_P (elem_mode));
> +
> +  arg0 = force_reg (elem_mode, arg0);
> +  arg1 = force_reg (elem_mode, arg1);
> +  emit_insn (gen_aarch64_pack_partial (mode, target, arg0, arg1));
> +  return;
> +}
> +
>  /* Check whether VALUE is a vector constant in which every element
>     is either a power of 2 or a negated power of 2.  If so, return
>     a constant vector of log2s, and flip CODE between PLUS and MINUS
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index 
> 89c72b24aeb791adbbd3edfdb131478d52b248e6..34200b05a3abf6d51919313de1027aa4988bcb8d
>  100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -140,6 +140,10 @@ (define_mode_iterator VQ_I [V16QI V8HI V4SI V2DI])
>  ;; VQ without 2 element modes.
>  (define_mode_iterator VQ_NO2E [V16QI V8HI V4SI V8HF V4SF V8BF])
>  
> +;; SVE modes without 2 element modes.
> +(define_mode_iterator SVE_NO2E [VNx16QI VNx8QI VNx4QI VNx8HI VNx4HI VNx8HF
> +                             VNx4HF VNx8BF VNx4BF VNx4SI VNx4SF])
> +
>  ;; 2 element quad vector modes.
>  (define_mode_iterator VQ_2E [V2DI V2DF])
>  
> @@ -1737,7 +1741,13 @@ (define_mode_attr VHALF [(V8QI "V4QI")  (V16QI "V8QI")
>                        (V2DI "DI")    (V2SF  "SF")
>                        (V4SF "V2SF")  (V4HF "V2HF")
>                        (V8HF "V4HF")  (V2DF  "DF")
> -                      (V8BF "V4BF")])
> +                      (V8BF "V4BF")
> +                      (VNx16QI "VNx8QI") (VNx8QI "VNx4QI")
> +                      (VNx4QI "VNx2QI")
> +                      (VNx8HI "VNx4HI")  (VNx4HI "VNx2HI")
> +                      (VNx8HF "VNx4HF")  (VNx4HF "VNx2HF")
> +                      (VNx8BF "VNx4BF")  (VNx4BF "VNx2BF")
> +                      (VNx4SI "VNx2SI")  (VNx4SF "VNx2SF")])
>  
>  ;; Half modes of all vector modes, in lower-case.
>  (define_mode_attr Vhalf [(V8QI "v4qi")  (V16QI "v8qi")
> @@ -1745,7 +1755,13 @@ (define_mode_attr Vhalf [(V8QI "v4qi")  (V16QI "v8qi")
>                        (V8HF  "v4hf") (V8BF  "v4bf")
>                        (V2SI "si")    (V4SI  "v2si")
>                        (V2DI "di")    (V2SF  "sf")
> -                      (V4SF "v2sf")  (V2DF  "df")])
> +                      (V4SF "v2sf")  (V2DF  "df")
> +                      (VNx16QI "vnx8qi") (VNx8QI "vnx4qi")
> +                      (VNx4QI "vnx2qi")
> +                      (VNx8HI "vnx4hi")  (VNx4HI "vnx2hi")
> +                      (VNx8HF "vnx4hf")  (VNx4HF "vnx2hf")
> +                      (VNx8BF "vnx4bf")  (VNx4BF "vnx2bf")
> +                      (VNx4SI "vnx2si")  (VNx4SF "vnx2sf")])
>  
>  ;; Single-element half modes of quad vector modes.
>  (define_mode_attr V1HALF [(V2DI "V1DI")  (V2DF  "V1DF")])
> diff --git a/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-2.c 
> b/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-2.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..a25cae2708dd18cc91a7732f845419bbdb06c5c1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-2.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile }  */
> +/* { dg-options "-std=c99" } */
> +/* { dg-additional-options "-O3 -march=armv8-a" } */
> +
> +#pragma GCC target ("+sve")
> +extern char __attribute__ ((simd, const)) fn3 (int, char);
> +void test_fn3 (int *a, int *b, char *c, int n)
> +{
> +  for (int i = 0; i < n; ++i)
> +    a[i] = (int) (fn3 (b[i], c[i]) + c[i]);
> +}
> +
> +/* { dg-final { scan-assembler {\s+_ZGVsMxvv_fn3\n} } } */

Reply via email to