Hi All,

this is a minor respin with changes echo'd from feedback from aarch64.
I assume still OK for trunk.

Regtested on arm-none-eabi, armeb-none-eabi,
aarch64-none-elf and aarch64_be-none-elf with no issues found.

Ok for trunk?

gcc/
2017-10-06  Tamar Christina  <tamar.christ...@arm.com>

        * config/arm/arm-builtins.c (arm_unsigned_uternop_qualifiers): New.
        (UTERNOP_QUALIFIERS, arm_umac_lane_qualifiers, UMAC_LANE_QUALIFIERS): 
New.
        * config/arm/arm_neon_builtins.def (sdot, udot, sdot_lane, udot_lane): 
new.
        * config/arm/iterators.md (DOTPROD, VSI2QI, vsi2qi): New.
        (UNSPEC_DOT_S, UNSPEC_DOT_U, opsuffix): New.
        * config/arm/neon.md (neon_<sup>dot<vsi2qi>): New.
        (neon_<sup>dot_lane<vsi2qi>, <sup>dot_prod<vsi2qi>): New.
        * config/arm/types.md (neon_dot, neon_dot_q): New.
        * config/arm/unspecs.md (sup): Add UNSPEC_DOT_S, UNSPEC_DOT_U.
________________________________________
From: Kyrill Tkachov <kyrylo.tkac...@foss.arm.com>
Sent: Wednesday, September 13, 2017 10:36:38 AM
To: Tamar Christina; gcc-patches@gcc.gnu.org
Cc: nd; Ramana Radhakrishnan; Richard Earnshaw; ni...@redhat.com
Subject: Re: [PATCH][GCC][ARM] Dot Product NEON patterns [Patch (2/8)]

Hi Tamar,

On 01/09/17 14:33, Tamar Christina wrote:
> Hi All,
>
> This patch adds the instructions for Dot Product to ARM along
> with the intrinsics and vectorizer pattern.
>
> Armv8.2-a dot product supports 8-bit element values both
> signed and unsigned.
>
> Dot product is available from Armv8.2-a and onwards.
>
> Regtested and bootstrapped on arm-none-eabi and no issues.
>
> Ok for trunk?

This is ok once the prerequisites are approved with one ChangeLog nit.

Kyrill

> gcc/
> 2017-09-01  Tamar Christina  <tamar.christ...@arm.com>
>
>       * config/arm/arm-builtins.c (arm_unsigned_uternop_qualifiers): New.
>       (UTERNOP_QUALIFIERS, arm_umac_lane_qualifiers, UMAC_LANE_QUALIFIERS): 
> New.
>       * config/arm/arm_neon_builtins.def (sdot, udot, sdot_lane, udot_lane): 
> new.
>       * config/arm/iterators.md (DOTPROD, DOT_MODE, dot_mode): New.
>       (UNSPEC_DOT_S, UNSPEC_DOT_U, opsuffix): New.
>       * config/arm/neon.md (neon_<sup>dot<dot_mode>): New.
>       (neon_<sup>dot_lane<dot_mode>, <sup>dot_prod<dot_mode>): New.
>       * config/arm/types.md (neon_dot, neon_dot_q): New.
>       * config/arm/unspecs.md (UNSPEC_DOT_S, UNSPEC_DOT_U): New.
>

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 
7acbaf1bb40a4f270e75968804546508f7839e49..139e09fd929e17216ad9383505f1453a73d071fb
 100644
--- a/gcc/config/arm/iterators.md

--snip---

  ;;----------------------------------------------------------------------------
  ;; Code attributes
  ;;----------------------------------------------------------------------------
@@ -816,6 +822,7 @@
    (UNSPEC_VSRA_S_N "s") (UNSPEC_VSRA_U_N "u")
    (UNSPEC_VRSRA_S_N "s") (UNSPEC_VRSRA_U_N "u")
    (UNSPEC_VCVTH_S "s") (UNSPEC_VCVTH_U "u")
+  (UNSPEC_DOT_S "s") (UNSPEC_DOT_U "u")
  ])

In your ChangeLog you list this as "New" whereas your patch just adds them to 
the "sup" int_attr.

diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index 569f960fd2e534df6e972245b35ae6def5bec033..6d1b20c80f9a24a8d26a10bea6f6e316886bce31 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -105,6 +105,13 @@ arm_ternop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_none, qualifier_none, qualifier_none, qualifier_none };
 #define TERNOP_QUALIFIERS (arm_ternop_qualifiers)
 
+/* unsigned T (unsigned T, unsigned T, unsigned T).  */
+static enum arm_type_qualifiers
+arm_unsigned_uternop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
+      qualifier_unsigned };
+#define UTERNOP_QUALIFIERS (arm_unsigned_uternop_qualifiers)
+
 /* T (T, immediate).  */
 static enum arm_type_qualifiers
 arm_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
@@ -131,6 +138,13 @@ arm_mac_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
       qualifier_none, qualifier_lane_index };
 #define MAC_LANE_QUALIFIERS (arm_mac_lane_qualifiers)
 
+/* unsigned T (unsigned T, unsigned T, unsigend T, lane index).  */
+static enum arm_type_qualifiers
+arm_umac_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
+      qualifier_unsigned, qualifier_lane_index };
+#define UMAC_LANE_QUALIFIERS (arm_umac_lane_qualifiers)
+
 /* T (T, T, immediate).  */
 static enum arm_type_qualifiers
 arm_ternop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def
index 07f0368343a0c940c1cc1848d31f28a47a587b6f..982eec810dafb5ec955273099853f8842020d104 100644
--- a/gcc/config/arm/arm_neon_builtins.def
+++ b/gcc/config/arm/arm_neon_builtins.def
@@ -331,3 +331,7 @@ VAR11 (STORE1, vst4,
 	v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf)
 VAR9 (STORE1LANE, vst4_lane,
 	v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
+VAR2 (TERNOP, sdot, v8qi, v16qi)
+VAR2 (UTERNOP, udot, v8qi, v16qi)
+VAR2 (MAC_LANE, sdot_lane, v8qi, v16qi)
+VAR2 (UMAC_LANE, udot_lane, v8qi, v16qi)
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 7acbaf1bb40a4f270e75968804546508f7839e49..a4fb234a846795e1c0dd5bf7de76ff7da487be23 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -410,6 +410,8 @@
 
 (define_int_iterator VFM_LANE_AS [UNSPEC_VFMA_LANE UNSPEC_VFMS_LANE])
 
+(define_int_iterator DOTPROD [UNSPEC_DOT_S UNSPEC_DOT_U])
+
 ;;----------------------------------------------------------------------------
 ;; Mode attributes
 ;;----------------------------------------------------------------------------
@@ -720,6 +722,9 @@
 
 (define_mode_attr pf [(V8QI "p") (V16QI "p") (V2SF "f") (V4SF "f")])
 
+(define_mode_attr VSI2QI [(V2SI "V8QI") (V4SI "V16QI")])
+(define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")])
+
 ;;----------------------------------------------------------------------------
 ;; Code attributes
 ;;----------------------------------------------------------------------------
@@ -816,6 +821,7 @@
   (UNSPEC_VSRA_S_N "s") (UNSPEC_VSRA_U_N "u")
   (UNSPEC_VRSRA_S_N "s") (UNSPEC_VRSRA_U_N "u")
   (UNSPEC_VCVTH_S "s") (UNSPEC_VCVTH_U "u")
+  (UNSPEC_DOT_S "s") (UNSPEC_DOT_U "u")
 ])
 
 (define_int_attr vcvth_op
@@ -1003,3 +1009,6 @@
 
 (define_int_attr mrrc [(VUNSPEC_MRRC "mrrc") (VUNSPEC_MRRC2 "mrrc2")])
 (define_int_attr MRRC [(VUNSPEC_MRRC "MRRC") (VUNSPEC_MRRC2 "MRRC2")])
+
+(define_int_attr opsuffix [(UNSPEC_DOT_S "s8")
+			   (UNSPEC_DOT_U "u8")])
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 12ba2d98a0ae0517465dd61cefe8e59011508a88..e715a5c2ae16ea2baf263a1fedfb991e7a29e1b9 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -3044,6 +3044,76 @@
   DONE;
 })
 
+;; These instructions map to the __builtins for the Dot Product operations.
+(define_insn "neon_<sup>dot<vsi2qi>"
+  [(set (match_operand:VCVTI 0 "register_operand" "=w")
+	(plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
+		    (unspec:VCVTI [(match_operand:<VSI2QI> 2
+							"register_operand" "w")
+				   (match_operand:<VSI2QI> 3
+							"register_operand" "w")]
+		DOTPROD)))]
+  "TARGET_DOTPROD"
+  "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
+  [(set_attr "type" "neon_dot")]
+)
+
+;; These instructions map to the __builtins for the Dot Product
+;; indexed operations.
+(define_insn "neon_<sup>dot_lane<vsi2qi>"
+  [(set (match_operand:VCVTI 0 "register_operand" "=w")
+	(plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
+		    (unspec:VCVTI [(match_operand:<VSI2QI> 2
+							"register_operand" "w")
+				   (match_operand:V8QI 3 "register_operand" "t")
+				   (match_operand:SI 4 "immediate_operand" "i")]
+		DOTPROD)))]
+  "TARGET_DOTPROD"
+  {
+    operands[4]
+      = GEN_INT (NEON_ENDIAN_LANE_N (V8QImode, INTVAL (operands[4])));
+    return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
+  }
+  [(set_attr "type" "neon_dot")]
+)
+
+;; These expands map to the Dot Product optab the vectorizer checks for.
+;; The auto-vectorizer expects a dot product builtin that also does an
+;; accumulation into the provided register.
+;; Given the following pattern
+;;
+;; for (i=0; i<len; i++) {
+;;     c = a[i] * b[i];
+;;     r += c;
+;; }
+;; return result;
+;;
+;; This can be auto-vectorized to
+;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
+;;
+;; given enough iterations.  However the vectorizer can keep unrolling the loop
+;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
+;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
+;; ...
+;;
+;; and so the vectorizer provides r, in which the result has to be accumulated.
+(define_expand "<sup>dot_prod<vsi2qi>"
+  [(set (match_operand:VCVTI 0 "register_operand")
+	(plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1
+							"register_operand")
+				   (match_operand:<VSI2QI> 2
+							"register_operand")]
+		     DOTPROD)
+		    (match_operand:VCVTI 3 "register_operand")))]
+  "TARGET_DOTPROD"
+{
+  emit_insn (
+    gen_neon_<sup>dot<vsi2qi> (operands[3], operands[3], operands[1],
+				 operands[2]));
+  emit_insn (gen_rtx_SET (operands[0], operands[3]));
+  DONE;
+})
+
 (define_expand "neon_copysignf<mode>"
   [(match_operand:VCVTF 0 "register_operand")
    (match_operand:VCVTF 1 "register_operand")
diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md
index 22d993d46a30fd4a158b10164324e3fae019cd40..03e9cdebb7509333a950581b9206a88afe7b2d0b 100644
--- a/gcc/config/arm/types.md
+++ b/gcc/config/arm/types.md
@@ -316,6 +316,8 @@
 ; neon_cls_q
 ; neon_cnt
 ; neon_cnt_q
+; neon_dot
+; neon_dot_q
 ; neon_ext
 ; neon_ext_q
 ; neon_rbit
@@ -764,6 +766,8 @@
 \
   neon_abs,\
   neon_abs_q,\
+  neon_dot,\
+  neon_dot_q,\
   neon_neg,\
   neon_neg_q,\
   neon_qneg,\
@@ -1110,8 +1114,8 @@
           neon_sub, neon_sub_q, neon_sub_widen, neon_sub_long, neon_qsub,\
           neon_qsub_q, neon_sub_halve, neon_sub_halve_q,\
           neon_sub_halve_narrow_q,\
-          neon_abs, neon_abs_q, neon_neg, neon_neg_q, neon_qneg,\
-          neon_qneg_q, neon_qabs, neon_qabs_q, neon_abd, neon_abd_q,\
+	  neon_abs, neon_abs_q, neon_dot, neon_dot_q, neon_neg, neon_neg_q,\
+	  neon_qneg, neon_qneg_q, neon_qabs, neon_qabs_q, neon_abd, neon_abd_q,\
           neon_abd_long, neon_minmax, neon_minmax_q, neon_compare,\
           neon_compare_q, neon_compare_zero, neon_compare_zero_q,\
           neon_arith_acc, neon_arith_acc_q, neon_reduc_add,\
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index 99cfa41b08dad24a85e78f069331e83c03c8bce1..c474f4bb5db995b60f464f098e478f0398ce15f9 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -410,4 +410,6 @@
   UNSPEC_VRNDN
   UNSPEC_VRNDP
   UNSPEC_VRNDX
+  UNSPEC_DOT_S
+  UNSPEC_DOT_U
 ])

Reply via email to