gcc/ChangeLog:

        * config/arm/arm-builtins.cc (enum arm_builtins): Add new
        ARM_BUILTIN_* enum values: SDOTV8QI, SDOTV16QI, UDOTV8QI,
        UDOTV16QI, USDOTV8QI, USDOTV16QI.
        (arm_init_dotprod_builtins): New.
        (arm_init_builtins): Add call to `arm_init_dotprod_builtins'.
        (arm_general_gimple_fold_builtin): New.
        * config/arm/arm-protos.h (arm_general_gimple_fold_builtin):
        New prototype.
        * config/arm/arm.cc (arm_gimple_fold_builtin): Add call to
        `arm_general_gimple_fold_builtin'.
        * config/arm/neon.md (<sup>dot_prod<vsi2qi>): Renamed to...
        (<sup>dot_prod<mode><vsi2qi>): ...this.
        (neon_usdot<vsi2qi>): Renamed to...
        (neon_usdot<mode><vsi2qi>): ...this.
---
 gcc/config/arm/arm-builtins.cc       | 95 ++++++++++++++++++++++++++++
 gcc/config/arm/arm-protos.h          |  3 +
 gcc/config/arm/arm.cc                |  1 +
 gcc/config/arm/arm_neon_builtins.def |  3 -
 gcc/config/arm/neon.md               |  6 +-
 5 files changed, 102 insertions(+), 6 deletions(-)

diff --git a/gcc/config/arm/arm-builtins.cc b/gcc/config/arm/arm-builtins.cc
index c9d50bf8fbb..b23b6caa063 100644
--- a/gcc/config/arm/arm-builtins.cc
+++ b/gcc/config/arm/arm-builtins.cc
@@ -45,6 +45,8 @@
 #include "arm-builtins.h"
 #include "stringpool.h"
 #include "attribs.h"
+#include "basic-block.h"
+#include "gimple.h"
 
 #define SIMD_MAX_BUILTIN_ARGS 7
 
@@ -1298,6 +1300,13 @@ enum arm_builtins
 #define VAR1(T, N, X) \
   ARM_BUILTIN_##N,
 
+  ARM_BUILTIN_NEON_SDOTV8QI,
+  ARM_BUILTIN_NEON_SDOTV16QI,
+  ARM_BUILTIN_NEON_UDOTV8QI,
+  ARM_BUILTIN_NEON_UDOTV16QI,
+  ARM_BUILTIN_NEON_USDOTV8QI,
+  ARM_BUILTIN_NEON_USDOTV16QI,
+
   ARM_BUILTIN_ACLE_BASE,
   ARM_BUILTIN_SAT_IMM_CHECK = ARM_BUILTIN_ACLE_BASE,
 
@@ -2648,6 +2657,60 @@ arm_init_fp16_builtins (void)
                                               "__fp16");
 }
 
+static void
+arm_init_dotprod_builtins (void)
+{
+  tree fndecl = NULL;
+  tree ftype = NULL;
+
+  tree uv8qi = arm_simd_builtin_type (V8QImode, qualifier_unsigned);
+  tree sv8qi = arm_simd_builtin_type (V8QImode, qualifier_none);
+  tree uv16qi = arm_simd_builtin_type (V16QImode, qualifier_unsigned);
+  tree sv16qi = arm_simd_builtin_type (V16QImode, qualifier_none);
+  tree uv2si = arm_simd_builtin_type (V2SImode, qualifier_unsigned);
+  tree sv2si = arm_simd_builtin_type (V2SImode, qualifier_none);
+  tree uv4si = arm_simd_builtin_type (V4SImode, qualifier_unsigned);
+  tree sv4si = arm_simd_builtin_type (V4SImode, qualifier_none);
+
+  struct builtin_decls_data
+  {
+    tree out_type_node;
+    tree in_type1_node;
+    tree in_type2_node;
+    const char *builtin_name;
+    int function_code;
+  };
+
+#define NAME(A) "__builtin_neon_" #A
+#define ENUM(B) ARM_BUILTIN_NEON_##B
+
+  builtin_decls_data bdda[] =
+  {
+    { sv2si, sv8qi,  sv8qi,  NAME (sdotv8qi),      ENUM (SDOTV8QI)   },
+    { uv2si, uv8qi,  uv8qi,  NAME (udotv8qi_uuuu),  ENUM (UDOTV8QI)   },
+    { sv2si, uv8qi,  sv8qi,  NAME (usdotv8qi_ssus), ENUM (USDOTV8QI)  },
+    { sv4si, sv16qi, sv16qi, NAME (sdotv16qi),     ENUM (SDOTV16QI)  },
+    { uv4si, uv16qi, uv16qi, NAME (udotv16qi_uuuu),  ENUM (UDOTV16QI)  },
+    { sv4si, uv16qi, sv16qi, NAME (usdotv16qi_ssus), ENUM (USDOTV16QI) },
+  };
+
+#undef NAME
+#undef ENUM
+
+  builtin_decls_data *bdd = bdda;
+  builtin_decls_data *bdd_end = bdd + (ARRAY_SIZE (bdda));
+
+  for (; bdd < bdd_end; bdd++)
+  {
+    ftype = build_function_type_list (bdd->out_type_node, bdd->out_type_node,
+                                     bdd->in_type1_node, bdd->in_type2_node,
+                                     NULL_TREE);
+    fndecl = arm_general_add_builtin_function (bdd->builtin_name,
+                                              ftype, bdd->function_code);
+    arm_builtin_decls[bdd->function_code] = fndecl;
+  }
+}
+
 void
 arm_init_builtins (void)
 {
@@ -2676,6 +2739,7 @@ arm_init_builtins (void)
        arm_init_neon_builtins ();
       arm_init_vfp_builtins ();
       arm_init_crypto_builtins ();
+      arm_init_dotprod_builtins ();
     }
 
   if (TARGET_CDE)
@@ -2738,6 +2802,37 @@ arm_builtin_decl (unsigned code, bool initialize_p 
ATTRIBUTE_UNUSED)
     }
 }
 
+/* Try to fold STMT, given that it's a call to the built-in function with
+   subcode FCODE.  Return the new statement on success and null on
+   failure.  */
+gimple *
+arm_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
+                                gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED)
+{
+  gimple *new_stmt = NULL;
+  unsigned nargs = gimple_call_num_args (stmt);
+  tree *args = (nargs > 0
+               ? gimple_call_arg_ptr (stmt, 0)
+               : &error_mark_node);
+
+  switch (fcode)
+    {
+    case ARM_BUILTIN_NEON_SDOTV8QI:
+    case ARM_BUILTIN_NEON_SDOTV16QI:
+    case ARM_BUILTIN_NEON_UDOTV8QI:
+    case ARM_BUILTIN_NEON_UDOTV16QI:
+    case ARM_BUILTIN_NEON_USDOTV8QI:
+    case ARM_BUILTIN_NEON_USDOTV16QI:
+      new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
+                                     DOT_PROD_EXPR, args[1],
+                                     args[2], args[0]);
+      break;
+    default:
+      break;
+    }
+  return new_stmt;
+}
+
 /* Errors in the source file can cause expand_expr to return const0_rtx
    where we expect a vector.  To avoid crashing, use one of the vector
    clear instructions.  */
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 50cae2b513a..4e31d1d0225 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -57,6 +57,9 @@ extern rtx arm_expand_builtin (tree exp, rtx target, rtx 
subtarget
 extern tree arm_builtin_decl (unsigned code, bool initialize_p
                              ATTRIBUTE_UNUSED);
 extern void arm_init_builtins (void);
+extern gimple *arm_general_gimple_fold_builtin (unsigned int fcode, gcall 
*stmt,
+                                               gimple_stmt_iterator *gsi
+                                               ATTRIBUTE_UNUSED);
 extern void arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree 
*update);
 extern rtx arm_simd_vect_par_cnst_half (machine_mode mode, bool high);
 extern bool arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 92cd168e659..109e9c131f5 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -2852,6 +2852,7 @@ arm_gimple_fold_builtin (gimple_stmt_iterator *gsi)
   switch (code & ARM_BUILTIN_CLASS)
     {
     case ARM_BUILTIN_GENERAL:
+      new_stmt = arm_general_gimple_fold_builtin (subcode, stmt, gsi);
       break;
     case ARM_BUILTIN_MVE:
       new_stmt = arm_mve::gimple_fold_builtin (subcode, stmt);
diff --git a/gcc/config/arm/arm_neon_builtins.def 
b/gcc/config/arm/arm_neon_builtins.def
index 0c5d40b96e5..cf5537ca95d 100644
--- a/gcc/config/arm/arm_neon_builtins.def
+++ b/gcc/config/arm/arm_neon_builtins.def
@@ -349,14 +349,11 @@ VAR13 (STORE1, vst4,
        v8qi, v4hi, v4hf, v4bf, v2si, v2sf, di, v16qi, v8hi, v8hf, v8bf, v4si, 
v4sf)
 VAR11 (STORE1LANE, vst4_lane,
        v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf, v4bf, v8bf)
-VAR2 (TERNOP, sdot, v8qi, v16qi)
-VAR2 (UTERNOP, udot, v8qi, v16qi)
 VAR2 (MAC_LANE, sdot_lane, v8qi, v16qi)
 VAR2 (UMAC_LANE, udot_lane, v8qi, v16qi)
 VAR2 (MAC_LANE, sdot_laneq, v8qi, v16qi)
 VAR2 (UMAC_LANE, udot_laneq, v8qi, v16qi)
 
-VAR2 (USTERNOP, usdot, v8qi, v16qi)
 VAR2 (USMAC_LANE_QUADTUP, usdot_lane, v8qi, v16qi)
 VAR2 (SUMAC_LANE_QUADTUP, sudot_lane, v8qi, v16qi)
 VAR2 (USMAC_LANE_QUADTUP, usdot_laneq, v8qi, v16qi)
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index fa4a7aeda35..b3a3564ca2b 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -2989,7 +2989,7 @@ (define_expand "cmul<conj_op><mode>3"
 ;; ...
 ;;
 ;; and so the vectorizer provides r, in which the result has to be accumulated.
-(define_insn "<sup>dot_prod<vsi2qi>"
+(define_insn "<sup>dot_prod<mode><vsi2qi>"
   [(set (match_operand:VCVTI 0 "register_operand" "=w")
        (plus:VCVTI
          (unspec:VCVTI [(match_operand:<VSI2QI> 1 "register_operand" "w")
@@ -3013,7 +3013,7 @@ (define_expand "neon_<sup>dot<vsi2qi>"
 )
 
 ;; These instructions map to the __builtins for the Dot Product operations.
-(define_insn "neon_usdot<vsi2qi>"
+(define_insn "neon_usdot<mode><vsi2qi>"
   [(set (match_operand:VCVTI 0 "register_operand" "=w")
        (plus:VCVTI
          (unspec:VCVTI
@@ -3112,7 +3112,7 @@ (define_insn "neon_<sup>dot_laneq<vsi2qi>"
 )
 
 ;; Auto-vectorizer pattern for usdot
-(define_expand "usdot_prod<vsi2qi>"
+(define_expand "usdot_prod<mode><vsi2qi>"
   [(set (match_operand:VCVTI 0 "register_operand")
        (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1
                                                        "register_operand")
-- 
2.34.1

Reply via email to