Re: [PATCH] AArch64 SVE: Fix wrong sve predicate split (PR100048)

Tamar Christina via Gcc-patches Fri, 16 Apr 2021 07:09:53 -0700

Hi Richard,

The 04/16/2021 12:23, Richard Sandiford wrote:
> Tamar Christina <tamar.christ...@arm.com> writes:
> > diff --git a/gcc/config/aarch64/aarch64-sve.md 
> > b/gcc/config/aarch64/aarch64-sve.md
> > index 
> > 7db2938bb84e04d066a7b07574e5cf344a3a8fb6..2cdc6338902216760622a39b14f0076994458c98
> >  100644
> > --- a/gcc/config/aarch64/aarch64-sve.md
> > +++ b/gcc/config/aarch64/aarch64-sve.md
> > @@ -8657,6 +8657,22 @@ (define_insn "@aarch64_sve_<perm_insn><mode>"
> >    "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
> >  )
> >  
> > +;; Special purpose permute used by the predicate generation instructions.
> > +;; This version only accepts VNx16BI as input but can output as any 
> > predicate
> > +;; type and will reinterpet the input registers as the type in operand 3.
> 
> I think it would be more accurate to say something like:
> 
> ;; Special purpose permute used by the predicate generation instructions.
> ;; Unlike the normal permute patterns, these instructions operate on VNx16BI
> ;; regardless of the element size, so that all input and output bits are
> ;; well-defined.  Operand 3 then indicates the size of the permute.
> 
> > +(define_insn "@aarch64_sve_trn1_conv<mode>"
> > +  [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
> > +   (unspec:VNx16BI [(match_operand:VNx16BI 1 "register_operand" "Upa")
> > +                    (match_operand:VNx16BI 2 "register_operand" "Upa")
> > +                    (clobber
> > +                     (match_operand:PRED_ALL 3 "register_operand" "=Upa"))
> 
> I don't think we need a register for operand 3.  We could just use the
> CONST0_RTX of the mode:
> 
>    (match_operand:PRED_ALL 3 "aarch64_simd_imm_zero")
>


Ah! good shout! I was wondering if I could avoid the clobber and this works 
great.

Thanks!

Bootstrapped and regtested on aarch64-none-linux-gnu and no issues.

Ok for trunk and GCC 10?

Regards,
Tamar

> (no need for a constraint).
> 
> > +                   ]
> 
> Formatting nit: ] is usually on the previous line.
> 
> > +                   UNSPEC_TRN1_CONV))]
> > +  "TARGET_SVE"
> > +  "trn1\t%0.<PRED_ALL:Vetype>, %1.<PRED_ALL:Vetype>, %2.<PRED_ALL:Vetype>"
> > +)
> > +
> > +
> 
> Just one blank line here (sorry for the nitpick).
> 
> >  ;; 
> > =========================================================================
> >  ;; == Conversions
> >  ;; 
> > =========================================================================
> > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> > index 
> > 994fafc2dc857ca5c7f345e49b47cc7e7dcf5900..61337881bfd05dbf6e84ada6810b87fa36dc989d
> >  100644
> > --- a/gcc/config/aarch64/aarch64.c
> > +++ b/gcc/config/aarch64/aarch64.c
> > @@ -5481,12 +5481,13 @@ aarch64_expand_sve_const_pred_trn (rtx target, 
> > rtx_vector_builder &builder,
> >     }
> >      }
> >  
> > -  /* Emit the TRN1 itself.  */
> > +  /* Emit the TRN1 itself.  We emit a TRN that will always take a
> > +     input registers as VNx16BI but re-interpret the results to
> > +     MODE.  */
> 
> Here too I think the output register mode is as important as the
> input register mode, since we rely on all bits of the output being
> well-defined.  How about something like:
> 
>   /* Emit the TRN1 itself.  We emit a TRN that operates on VNx16BI
>      operands but permutes them as though they had mode MODE.  */
> 
> Thanks,
> Richard
> 
> >    machine_mode mode = aarch64_sve_pred_mode (permute_size).require ();
> > -  target = aarch64_target_reg (target, mode);
> > -  emit_insn (gen_aarch64_sve (UNSPEC_TRN1, mode, target,
> > -                         gen_lowpart (mode, a),
> > -                         gen_lowpart (mode, b)));
> > +  target = aarch64_target_reg (target, GET_MODE (a));
> > +  rtx type_reg = gen_reg_rtx (mode);
> > +  emit_insn (gen_aarch64_sve_trn1_conv (mode, target, a, b, type_reg));
> >    return target;
> >  }
> >  
> > diff --git a/gcc/config/aarch64/iterators.md 
> > b/gcc/config/aarch64/iterators.md
> > index 
> > 5f5abd60525ba52fdb466e94a92ff4d011bee5cd..cac33ae812b382cd55611b0da8a6e9eac3a513c4
> >  100644
> > --- a/gcc/config/aarch64/iterators.md
> > +++ b/gcc/config/aarch64/iterators.md
> > @@ -649,6 +649,7 @@ (define_c_enum "unspec"
> >      UNSPEC_UZP2Q   ; Used in aarch64-sve.md.
> >      UNSPEC_ZIP1Q   ; Used in aarch64-sve.md.
> >      UNSPEC_ZIP2Q   ; Used in aarch64-sve.md.
> > +    UNSPEC_TRN1_CONV       ; Used in aarch64-sve.md.
> >      UNSPEC_COND_CMPEQ_WIDE ; Used in aarch64-sve.md.
> >      UNSPEC_COND_CMPGE_WIDE ; Used in aarch64-sve.md.
> >      UNSPEC_COND_CMPGT_WIDE ; Used in aarch64-sve.md.
> > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr100048.c 
> > b/gcc/testsuite/gcc.target/aarch64/sve/pr100048.c
> > new file mode 100644
> > index 
> > 0000000000000000000000000000000000000000..525933863f7d67d76ba7afa4321346efa27ba000
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr100048.c
> > @@ -0,0 +1,25 @@
> > +/* { dg-additional-options "-O2 -fno-schedule-insns" } */
> > +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
> > +
> > +#include "arm_sve.h"
> > +
> > +/*
> > +** foo:
> > +**        ptrue   (p[0-7])\.d, all
> > +**        pfalse  (p[0-7])\.b
> > +**        ptrue   (p[0-7])\.s, all
> > +**        trn1    (p[0-7])\.d, \2\.d, \3\.d
> > +**        trn1    \2\.d, \1\.d, \3\.d
> > +**        faddv   (h[0-31]), \4\, (z[0-31]).h
> > +**        faddv   (h[0-31]), \2\, \6\.h
> > +**        str     \5, [x0]
> > +**        str     \7, [x0, 2]
> > +**        ret
> > +*/
> > +void foo(svfloat16_t in, float16_t *dst) {
> > +  const svbool_t pg_q0 = svdupq_n_b16(1, 0, 1, 0, 0, 0, 0, 0);
> > +  const svbool_t pg_f0 = svdupq_n_b16(1, 0, 0, 0, 0, 0, 0, 0);
> > +  dst[0] = svaddv_f16(pg_f0, in);
> > +  dst[1] = svaddv_f16(pg_q0, in);
> > +}
> > +

--

diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 7db2938bb84e04d066a7b07574e5cf344a3a8fb6..b8b6f55e1607e5697620bf205fbe9edf3be7c549 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -8657,6 +8657,20 @@ (define_insn "@aarch64_sve_<perm_insn><mode>"
   "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
 )
 
+;; Special purpose permute used by the predicate generation instructions.
+;; Unlike the normal permute patterns, these instructions operate on VNx16BI
+;; regardless of the element size, so that all input and output bits are
+;; well-defined.  Operand 3 then indicates the size of the permute.
+(define_insn "@aarch64_sve_trn1_conv<mode>"
+  [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+	(unspec:VNx16BI [(match_operand:VNx16BI 1 "register_operand" "Upa")
+			 (match_operand:VNx16BI 2 "register_operand" "Upa")
+			 (match_operand:PRED_ALL 3 "aarch64_simd_imm_zero")]
+			UNSPEC_TRN1_CONV))]
+  "TARGET_SVE"
+  "trn1\t%0.<PRED_ALL:Vetype>, %1.<PRED_ALL:Vetype>, %2.<PRED_ALL:Vetype>"
+)
+
 ;; =========================================================================
 ;; == Conversions
 ;; =========================================================================
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 994fafc2dc857ca5c7f345e49b47cc7e7dcf5900..2c113322ff0874ee8762e0a642368adaba8c3793 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -5481,12 +5481,12 @@ aarch64_expand_sve_const_pred_trn (rtx target, rtx_vector_builder &builder,
 	}
     }
 
-  /* Emit the TRN1 itself.  */
+  /* Emit the TRN1 itself.  We emit a TRN that operates on VNx16BI
+     operands but permutes them as though they had mode MODE.  */
   machine_mode mode = aarch64_sve_pred_mode (permute_size).require ();
-  target = aarch64_target_reg (target, mode);
-  emit_insn (gen_aarch64_sve (UNSPEC_TRN1, mode, target,
-			      gen_lowpart (mode, a),
-			      gen_lowpart (mode, b)));
+  target = aarch64_target_reg (target, GET_MODE (a));
+  rtx type_reg = CONST0_RTX (mode);
+  emit_insn (gen_aarch64_sve_trn1_conv (mode, target, a, b, type_reg));
   return target;
 }
 
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 5f5abd60525ba52fdb466e94a92ff4d011bee5cd..cac33ae812b382cd55611b0da8a6e9eac3a513c4 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -649,6 +649,7 @@ (define_c_enum "unspec"
     UNSPEC_UZP2Q	; Used in aarch64-sve.md.
     UNSPEC_ZIP1Q	; Used in aarch64-sve.md.
     UNSPEC_ZIP2Q	; Used in aarch64-sve.md.
+    UNSPEC_TRN1_CONV	; Used in aarch64-sve.md.
     UNSPEC_COND_CMPEQ_WIDE ; Used in aarch64-sve.md.
     UNSPEC_COND_CMPGE_WIDE ; Used in aarch64-sve.md.
     UNSPEC_COND_CMPGT_WIDE ; Used in aarch64-sve.md.
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr100048.c b/gcc/testsuite/gcc.target/aarch64/sve/pr100048.c
new file mode 100644
index 0000000000000000000000000000000000000000..525933863f7d67d76ba7afa4321346efa27ba000
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr100048.c
@@ -0,0 +1,25 @@
+/* { dg-additional-options "-O2 -fno-schedule-insns" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "arm_sve.h"
+
+/*
+** foo:
+**        ptrue   (p[0-7])\.d, all
+**        pfalse  (p[0-7])\.b
+**        ptrue   (p[0-7])\.s, all
+**        trn1    (p[0-7])\.d, \2\.d, \3\.d
+**        trn1    \2\.d, \1\.d, \3\.d
+**        faddv   (h[0-31]), \4\, (z[0-31]).h
+**        faddv   (h[0-31]), \2\, \6\.h
+**        str     \5, [x0]
+**        str     \7, [x0, 2]
+**        ret
+*/
+void foo(svfloat16_t in, float16_t *dst) {
+  const svbool_t pg_q0 = svdupq_n_b16(1, 0, 1, 0, 0, 0, 0, 0);
+  const svbool_t pg_f0 = svdupq_n_b16(1, 0, 0, 0, 0, 0, 0, 0);
+  dst[0] = svaddv_f16(pg_f0, in);
+  dst[1] = svaddv_f16(pg_q0, in);
+}
+

Re: [PATCH] AArch64 SVE: Fix wrong sve predicate split (PR100048)

Reply via email to