ping
> -----Original Message----- > From: Tamar Christina > Sent: Wednesday, December 4, 2024 12:18 PM > To: gcc-patches@gcc.gnu.org > Cc: nd <n...@arm.com>; Richard Earnshaw <richard.earns...@arm.com>; > ktkac...@gcc.gnu.org; Richard Sandiford <richard.sandif...@arm.com> > Subject: [PATCH 7/7]AArch64: Implement vector concat of partial SVE vectors > > Hi All, > > This patch adds support for vector constructor from two partial SVE vectors > into > a full SVE vector. It also implements support for the standard vec_init obtab > to > do this. > > gcc/ChangeLog: > > PR target/96342 > * config/aarch64/aarch64-sve.md (vec_init<mode><Vhalf>): New. > (@aarch64_pack_partial<mode>): New. > * config/aarch64/aarch64.cc (aarch64_sve_expand_vector_init): Special > case constructors of two vectors. > * config/aarch64/iterators.md (SVE_NO2E, SVE_PARTIAL_NO2E): New. > (VHALF, Vhalf, Vwstype): Add SVE partial vectors. > > gcc/testsuite/ChangeLog: > > PR target/96342 > * gcc.target/aarch64/vect-simd-clone-2.c: New test. > > Bootstrapped Regtested on aarch64-none-linux-gnu, > arm-none-linux-gnueabihf, x86_64-pc-linux-gnu > -m32, -m64 and no issues. > > Ok for master? > > Thanks, > Tamar > > --- > diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64- > sve.md > index > 9afd11d347626eeb640722fdba2ab763b8479aa7..9e3577be6e943d7a5c95119 > 6463873d4bcfee07c 100644 > --- a/gcc/config/aarch64/aarch64-sve.md > +++ b/gcc/config/aarch64/aarch64-sve.md > @@ -2840,6 +2840,16 @@ (define_expand "vec_init<mode><Vel>" > } > ) > > +(define_expand "vec_init<mode><Vhalf>" > + [(match_operand:SVE_NO2E 0 "register_operand") > + (match_operand 1 "")] > + "TARGET_SVE" > + { > + aarch64_sve_expand_vector_init (operands[0], operands[1]); > + DONE; > + } > +) > + > ;; Shift an SVE vector left and insert a scalar into element 0. > (define_insn "vec_shl_insert_<mode>" > [(set (match_operand:SVE_FULL 0 "register_operand") > @@ -9347,6 +9357,20 @@ (define_insn "vec_pack_trunc_<Vwide>" > "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" > ) > > +;; Integer partial pack packing two partial SVE types into a single full SVE > +;; type of the same element type. Use UZP1 on the wider type, which discards > +;; the high part of each wide element. This allows to concat SVE partial > types > +;; into a wider vector. > +(define_insn "@aarch64_pack_partial<mode>" > + [(set (match_operand:SVE_PARTIAL_NO2E 0 "register_operand" "=w") > + (unspec:SVE_PARTIAL_NO2E > + [(match_operand:<VHALF> 1 "register_operand" "w") > + (match_operand:<VHALF> 2 "register_operand" "w")] > + UNSPEC_PACK))] > + "TARGET_SVE" > + "uzp1\t%0.<Vwstype>, %1.<Vwstype>, %2.<Vwstype>" > +) > + > ;; ------------------------------------------------------------------------- > ;; ---- [INT<-INT] Unpacks > ;; ------------------------------------------------------------------------- > diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc > index > af6fede102c2be6673c24f8020d000ea56322997..690d54b0a2954327e00d559f > 96c414c81c2e18cd 100644 > --- a/gcc/config/aarch64/aarch64.cc > +++ b/gcc/config/aarch64/aarch64.cc > @@ -24790,6 +24790,17 @@ aarch64_sve_expand_vector_init (rtx target, rtx > vals) > v.quick_push (XVECEXP (vals, 0, i)); > v.finalize (); > > + /* If we have two elements and are concatting vector. */ > + machine_mode elem_mode = GET_MODE (v.elt (0)); > + if (nelts == 2 && VECTOR_MODE_P (elem_mode)) > + { > + /* We've failed expansion using a dup. Try using a cheeky truncate. */ > + rtx arg0 = force_reg (elem_mode, v.elt(0)); > + rtx arg1 = force_reg (elem_mode, v.elt(1)); > + emit_insn (gen_aarch64_pack_partial (mode, target, arg0, arg1)); > + return; > + } > + > /* If neither sub-vectors of v could be initialized specially, > then use INSR to insert all elements from v into TARGET. > ??? This might not be optimal for vectors with large > diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md > index > 023893d35f3e955e222c322ce370e84c95c29ee6..77d23d6ad795630d3d5fb5c0 > 76c086a479d46fee 100644 > --- a/gcc/config/aarch64/iterators.md > +++ b/gcc/config/aarch64/iterators.md > @@ -138,6 +138,14 @@ (define_mode_iterator VQ_I [V16QI V8HI V4SI V2DI]) > ;; VQ without 2 element modes. > (define_mode_iterator VQ_NO2E [V16QI V8HI V4SI V8HF V4SF V8BF]) > > +;; SVE modes without 2 element modes. > +(define_mode_iterator SVE_NO2E [VNx16QI VNx8QI VNx4QI VNx8HI VNx4HI > VNx8HF > + VNx4HF VNx8BF VNx4BF VNx4SI VNx4SF]) > + > +;; Partial SVE modes without 2 element modes. > +(define_mode_iterator SVE_PARTIAL_NO2E [VNx8QI VNx4QI VNx4HI > + VNx4HF VNx8BF VNx4BF]) > + > ;; 2 element quad vector modes. > (define_mode_iterator VQ_2E [V2DI V2DF]) > > @@ -1678,7 +1686,15 @@ (define_mode_attr VHALF [(V8QI "V4QI") (V16QI > "V8QI") > (V2DI "DI") (V2SF "SF") > (V4SF "V2SF") (V4HF "V2HF") > (V8HF "V4HF") (V2DF "DF") > - (V8BF "V4BF")]) > + (V8BF "V4BF") > + (VNx16QI "VNx8QI") (VNx8QI "VNx4QI") > + (VNx4QI "VNx2QI") (VNx2QI "QI") > + (VNx8HI "VNx4HI") (VNx4HI "VNx2HI") (VNx2HI "HI") > + (VNx8HF "VNx4HF") (VNx4HF "VNx2HF") (VNx2HF "HF") > + (VNx8BF "VNx4BF") (VNx4BF "VNx2BF") (VNx2BF "BF") > + (VNx4SI "VNx2SI") (VNx2SI "SI") > + (VNx4SF "VNx2SF") (VNx2SF "SF") > + (VNx2DI "DI") (VNx2DF "DF")]) > > ;; Half modes of all vector modes, in lower-case. > (define_mode_attr Vhalf [(V8QI "v4qi") (V16QI "v8qi") > @@ -1686,7 +1702,15 @@ (define_mode_attr Vhalf [(V8QI "v4qi") (V16QI > "v8qi") > (V8HF "v4hf") (V8BF "v4bf") > (V2SI "si") (V4SI "v2si") > (V2DI "di") (V2SF "sf") > - (V4SF "v2sf") (V2DF "df")]) > + (V4SF "v2sf") (V2DF "df") > + (VNx16QI "vnx8qi") (VNx8QI "vnx4qi") > + (VNx4QI "vnx2qi") (VNx2QI "qi") > + (VNx8HI "vnx4hi") (VNx4HI "vnx2hi") (VNx2HI "hi") > + (VNx8HF "vnx4hf") (VNx4HF "vnx2hf") (VNx2HF "hf") > + (VNx8BF "vnx4bf") (VNx4BF "vnx2bf") (VNx2BF "bf") > + (VNx4SI "vnx2si") (VNx2SI "si") > + (VNx4SF "vnx2sf") (VNx2SF "sf") > + (VNx2DI "di") (VNx2DF "df")]) > > ;; Single-element half modes of quad vector modes. > (define_mode_attr V1HALF [(V2DI "V1DI") (V2DF "V1DF")]) > @@ -1815,7 +1839,10 @@ (define_mode_attr Vwtype [(V8QI "8h") (V4HI "4s") > ;; Widened scalar register suffixes. > (define_mode_attr Vwstype [(V8QI "h") (V4HI "s") > (V2SI "") (V16QI "h") > - (V8HI "s") (V4SI "d")]) > + (V8HI "s") (V4SI "d") > + (VNx8QI "h") (VNx4QI "h") > + (VNx4HF "s") (VNx4HI "s") > + (VNx8BF "h") (VNx4BF "h")]) > ;; Add a .1d for V2SI. > (define_mode_attr Vwsuf [(V8QI "") (V4HI "") > (V2SI ".1d") (V16QI "") > diff --git a/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-2.c > b/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-2.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..a25cae2708dd18cc91a773 > 2f845419bbdb06c5c1 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-2.c > @@ -0,0 +1,13 @@ > +/* { dg-do compile } */ > +/* { dg-options "-std=c99" } */ > +/* { dg-additional-options "-O3 -march=armv8-a" } */ > + > +#pragma GCC target ("+sve") > +extern char __attribute__ ((simd, const)) fn3 (int, char); > +void test_fn3 (int *a, int *b, char *c, int n) > +{ > + for (int i = 0; i < n; ++i) > + a[i] = (int) (fn3 (b[i], c[i]) + c[i]); > +} > + > +/* { dg-final { scan-assembler {\s+_ZGVsMxvv_fn3\n} } } */ > > > > > --