This patch adds strided load/store support on loop vectorizer depending on 
STMT_VINFO_STRIDED_P.

Bootstrap and regression on X86 passed.

Ok for trunk ?

gcc/ChangeLog:

        * internal-fn.cc (strided_load_direct): New function.
        (strided_store_direct): Ditto.
        (expand_strided_store_optab_fn): Ditto.
        (expand_scatter_store_optab_fn): Add strided store.
        (expand_strided_load_optab_fn): New function.
        (expand_gather_load_optab_fn): Add strided load.
        (direct_strided_load_optab_supported_p): New function.
        (direct_strided_store_optab_supported_p): Ditto.
        (internal_load_fn_p): Add strided load.
        (internal_strided_fn_p): New function.
        (internal_fn_len_index): Add strided load/store.
        (internal_fn_mask_index): Ditto.
        (internal_fn_stored_value_index): Add strided store.
        (internal_strided_fn_supported_p): New function.
        * internal-fn.def (MASK_LEN_STRIDED_LOAD): New IFN.
        (MASK_LEN_STRIDED_STORE): Ditto.
        * internal-fn.h (internal_strided_fn_p): New function.
        (internal_strided_fn_supported_p): Ditto.
        * optabs-query.cc (supports_vec_gather_load_p): Add strided load.
        (supports_vec_scatter_store_p): Add strided store.
        * optabs-query.h (supports_vec_gather_load_p): Add strided load.
        (supports_vec_scatter_store_p): Add strided store.
        * tree-vect-data-refs.cc (vect_prune_runtime_alias_test_list): Add 
strided load/store.
        (vect_gather_scatter_fn_p): Ditto.
        (vect_check_gather_scatter): Ditto.
        * tree-vect-stmts.cc (check_load_store_for_partial_vectors): Ditto.
        (vect_truncate_gather_scatter_offset): Ditto.
        (vect_use_strided_gather_scatters_p): Ditto.
        (vect_get_strided_load_store_ops): Ditto.
        (vectorizable_store): Ditto.
        (vectorizable_load): Ditto.
        * tree-vectorizer.h (vect_gather_scatter_fn_p): Ditto.

---
 gcc/internal-fn.cc         | 101 ++++++++++++++++++++++++++++++++-----
 gcc/internal-fn.def        |   4 ++
 gcc/internal-fn.h          |   2 +
 gcc/optabs-query.cc        |  25 ++++++---
 gcc/optabs-query.h         |   4 +-
 gcc/tree-vect-data-refs.cc |  45 +++++++++++++----
 gcc/tree-vect-stmts.cc     |  65 ++++++++++++++++++------
 gcc/tree-vectorizer.h      |   2 +-
 8 files changed, 199 insertions(+), 49 deletions(-)

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index c7d3564faef..a31a65755c7 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -164,6 +164,7 @@ init_internal_fns ()
 #define load_lanes_direct { -1, -1, false }
 #define mask_load_lanes_direct { -1, -1, false }
 #define gather_load_direct { 3, 1, false }
+#define strided_load_direct { -1, -1, false }
 #define len_load_direct { -1, -1, false }
 #define mask_len_load_direct { -1, 4, false }
 #define mask_store_direct { 3, 2, false }
@@ -172,6 +173,7 @@ init_internal_fns ()
 #define vec_cond_mask_direct { 1, 0, false }
 #define vec_cond_direct { 2, 0, false }
 #define scatter_store_direct { 3, 1, false }
+#define strided_store_direct { 1, 1, false }
 #define len_store_direct { 3, 3, false }
 #define mask_len_store_direct { 4, 5, false }
 #define vec_set_direct { 3, 3, false }
@@ -3561,62 +3563,87 @@ expand_LAUNDER (internal_fn, gcall *call)
   expand_assignment (lhs, gimple_call_arg (call, 0), false);
 }
 
+#define expand_strided_store_optab_fn expand_scatter_store_optab_fn
+
 /* Expand {MASK_,}SCATTER_STORE{S,U} call CALL using optab OPTAB.  */
 
 static void
 expand_scatter_store_optab_fn (internal_fn, gcall *stmt, direct_optab optab)
 {
+  insn_code icode;
   internal_fn ifn = gimple_call_internal_fn (stmt);
   int rhs_index = internal_fn_stored_value_index (ifn);
   tree base = gimple_call_arg (stmt, 0);
   tree offset = gimple_call_arg (stmt, 1);
-  tree scale = gimple_call_arg (stmt, 2);
   tree rhs = gimple_call_arg (stmt, rhs_index);
 
   rtx base_rtx = expand_normal (base);
   rtx offset_rtx = expand_normal (offset);
-  HOST_WIDE_INT scale_int = tree_to_shwi (scale);
   rtx rhs_rtx = expand_normal (rhs);
 
   class expand_operand ops[8];
   int i = 0;
   create_address_operand (&ops[i++], base_rtx);
-  create_input_operand (&ops[i++], offset_rtx, TYPE_MODE (TREE_TYPE (offset)));
-  create_integer_operand (&ops[i++], TYPE_UNSIGNED (TREE_TYPE (offset)));
-  create_integer_operand (&ops[i++], scale_int);
+  if (internal_strided_fn_p (ifn))
+    {
+      create_address_operand (&ops[i++], offset_rtx);
+      icode = direct_optab_handler (optab, TYPE_MODE (TREE_TYPE (rhs)));
+    }
+  else
+    {
+      tree scale = gimple_call_arg (stmt, 2);
+      HOST_WIDE_INT scale_int = tree_to_shwi (scale);
+      create_input_operand (&ops[i++], offset_rtx,
+                           TYPE_MODE (TREE_TYPE (offset)));
+      create_integer_operand (&ops[i++], TYPE_UNSIGNED (TREE_TYPE (offset)));
+      create_integer_operand (&ops[i++], scale_int);
+      icode = convert_optab_handler (optab, TYPE_MODE (TREE_TYPE (rhs)),
+                                    TYPE_MODE (TREE_TYPE (offset)));
+    }
   create_input_operand (&ops[i++], rhs_rtx, TYPE_MODE (TREE_TYPE (rhs)));
   i = add_mask_and_len_args (ops, i, stmt);
 
-  insn_code icode = convert_optab_handler (optab, TYPE_MODE (TREE_TYPE (rhs)),
-                                          TYPE_MODE (TREE_TYPE (offset)));
   expand_insn (icode, i, ops);
 }
 
+#define expand_strided_load_optab_fn expand_gather_load_optab_fn
+
 /* Expand {MASK_,}GATHER_LOAD call CALL using optab OPTAB.  */
 
 static void
 expand_gather_load_optab_fn (internal_fn, gcall *stmt, direct_optab optab)
 {
+  insn_code icode;
+  internal_fn ifn = gimple_call_internal_fn (stmt);
   tree lhs = gimple_call_lhs (stmt);
   tree base = gimple_call_arg (stmt, 0);
   tree offset = gimple_call_arg (stmt, 1);
-  tree scale = gimple_call_arg (stmt, 2);
 
   rtx lhs_rtx = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
   rtx base_rtx = expand_normal (base);
   rtx offset_rtx = expand_normal (offset);
-  HOST_WIDE_INT scale_int = tree_to_shwi (scale);
 
   int i = 0;
   class expand_operand ops[8];
   create_output_operand (&ops[i++], lhs_rtx, TYPE_MODE (TREE_TYPE (lhs)));
   create_address_operand (&ops[i++], base_rtx);
-  create_input_operand (&ops[i++], offset_rtx, TYPE_MODE (TREE_TYPE (offset)));
-  create_integer_operand (&ops[i++], TYPE_UNSIGNED (TREE_TYPE (offset)));
-  create_integer_operand (&ops[i++], scale_int);
+  if (internal_strided_fn_p (ifn))
+    {
+      create_address_operand (&ops[i++], offset_rtx);
+      icode = direct_optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs)));
+    }
+  else
+    {
+      tree scale = gimple_call_arg (stmt, 2);
+      HOST_WIDE_INT scale_int = tree_to_shwi (scale);
+      create_input_operand (&ops[i++], offset_rtx,
+                           TYPE_MODE (TREE_TYPE (offset)));
+      create_integer_operand (&ops[i++], TYPE_UNSIGNED (TREE_TYPE (offset)));
+      create_integer_operand (&ops[i++], scale_int);
+      icode = convert_optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs)),
+                                    TYPE_MODE (TREE_TYPE (offset)));
+    }
   i = add_mask_and_len_args (ops, i, stmt);
-  insn_code icode = convert_optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs)),
-                                          TYPE_MODE (TREE_TYPE (offset)));
   expand_insn (icode, i, ops);
   if (!rtx_equal_p (lhs_rtx, ops[0].value))
     emit_move_insn (lhs_rtx, ops[0].value);
@@ -4012,6 +4039,7 @@ multi_vector_optab_supported_p (convert_optab optab, 
tree_pair types,
 #define direct_load_lanes_optab_supported_p multi_vector_optab_supported_p
 #define direct_mask_load_lanes_optab_supported_p multi_vector_optab_supported_p
 #define direct_gather_load_optab_supported_p convert_optab_supported_p
+#define direct_strided_load_optab_supported_p direct_optab_supported_p
 #define direct_len_load_optab_supported_p direct_optab_supported_p
 #define direct_mask_len_load_optab_supported_p convert_optab_supported_p
 #define direct_mask_store_optab_supported_p convert_optab_supported_p
@@ -4020,6 +4048,7 @@ multi_vector_optab_supported_p (convert_optab optab, 
tree_pair types,
 #define direct_vec_cond_mask_optab_supported_p convert_optab_supported_p
 #define direct_vec_cond_optab_supported_p convert_optab_supported_p
 #define direct_scatter_store_optab_supported_p convert_optab_supported_p
+#define direct_strided_store_optab_supported_p direct_optab_supported_p
 #define direct_len_store_optab_supported_p direct_optab_supported_p
 #define direct_mask_len_store_optab_supported_p convert_optab_supported_p
 #define direct_while_optab_supported_p convert_optab_supported_p
@@ -4596,6 +4625,7 @@ internal_load_fn_p (internal_fn fn)
     case IFN_GATHER_LOAD:
     case IFN_MASK_GATHER_LOAD:
     case IFN_MASK_LEN_GATHER_LOAD:
+    case IFN_MASK_LEN_STRIDED_LOAD:
     case IFN_LEN_LOAD:
     case IFN_MASK_LEN_LOAD:
       return true;
@@ -4648,6 +4678,22 @@ internal_gather_scatter_fn_p (internal_fn fn)
     }
 }
 
+/* Return true if IFN is some form of strided load or strided store.  */
+
+bool
+internal_strided_fn_p (internal_fn fn)
+{
+  switch (fn)
+    {
+    case IFN_MASK_LEN_STRIDED_LOAD:
+    case IFN_MASK_LEN_STRIDED_STORE:
+      return true;
+
+    default:
+      return false;
+    }
+}
+
 /* If FN takes a vector len argument, return the index of that argument,
    otherwise return -1.  */
 
@@ -4683,6 +4729,8 @@ internal_fn_len_index (internal_fn fn)
     case IFN_COND_LEN_XOR:
     case IFN_COND_LEN_SHL:
     case IFN_COND_LEN_SHR:
+    case IFN_MASK_LEN_STRIDED_LOAD:
+    case IFN_MASK_LEN_STRIDED_STORE:
       return 4;
 
     case IFN_COND_LEN_NEG:
@@ -4776,6 +4824,10 @@ internal_fn_mask_index (internal_fn fn)
     case IFN_MASK_LEN_STORE:
       return 2;
 
+    case IFN_MASK_LEN_STRIDED_LOAD:
+    case IFN_MASK_LEN_STRIDED_STORE:
+      return 3;
+
     case IFN_MASK_GATHER_LOAD:
     case IFN_MASK_SCATTER_STORE:
     case IFN_MASK_LEN_GATHER_LOAD:
@@ -4796,6 +4848,9 @@ internal_fn_stored_value_index (internal_fn fn)
 {
   switch (fn)
     {
+    case IFN_MASK_LEN_STRIDED_STORE:
+      return 2;
+
     case IFN_MASK_STORE:
     case IFN_MASK_STORE_LANES:
     case IFN_SCATTER_STORE:
@@ -4845,6 +4900,24 @@ internal_gather_scatter_fn_supported_p (internal_fn ifn, 
tree vector_type,
          && insn_operand_matches (icode, 3 + output_ops, GEN_INT (scale)));
 }
 
+/* Return true if the target supports strided load or strided store function
+   IFN.  For loads, VECTOR_TYPE is the vector type of the load result,
+   while for stores it is the vector type of the stored data argument.
+   MEMORY_ELEMENT_TYPE is the type of the memory elements being loaded
+   or stored.  */
+
+bool
+internal_strided_fn_supported_p (internal_fn ifn, tree vector_type,
+                                tree memory_element_type)
+{
+  if (!tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (vector_type)),
+                          TYPE_SIZE (memory_element_type)))
+    return false;
+  optab optab = direct_internal_fn_optab (ifn);
+  insn_code icode = direct_optab_handler (optab, TYPE_MODE (vector_type));
+  return icode != CODE_FOR_nothing;
+}
+
 /* Return true if the target supports IFN_CHECK_{RAW,WAR}_PTRS function IFN
    for pointers of type TYPE when the accesses have LENGTH bytes and their
    common byte alignment is ALIGN.  */
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index a2023ab9c3d..922359ed5cf 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -199,6 +199,8 @@ DEF_INTERNAL_OPTAB_FN (MASK_GATHER_LOAD, ECF_PURE,
                       mask_gather_load, gather_load)
 DEF_INTERNAL_OPTAB_FN (MASK_LEN_GATHER_LOAD, ECF_PURE,
                       mask_len_gather_load, gather_load)
+DEF_INTERNAL_OPTAB_FN (MASK_LEN_STRIDED_LOAD, ECF_PURE,
+                      mask_len_strided_load, strided_load)
 
 DEF_INTERNAL_OPTAB_FN (LEN_LOAD, ECF_PURE, len_load, len_load)
 DEF_INTERNAL_OPTAB_FN (MASK_LEN_LOAD, ECF_PURE, mask_len_load, mask_len_load)
@@ -208,6 +210,8 @@ DEF_INTERNAL_OPTAB_FN (MASK_SCATTER_STORE, 0,
                       mask_scatter_store, scatter_store)
 DEF_INTERNAL_OPTAB_FN (MASK_LEN_SCATTER_STORE, 0,
                       mask_len_scatter_store, scatter_store)
+DEF_INTERNAL_OPTAB_FN (MASK_LEN_STRIDED_STORE, 0,
+                      mask_len_strided_store, strided_store)
 
 DEF_INTERNAL_OPTAB_FN (MASK_STORE, 0, maskstore, mask_store)
 DEF_INTERNAL_OPTAB_FN (STORE_LANES, ECF_CONST, vec_store_lanes, store_lanes)
diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
index 7d72f4db2d0..ea6a7369f23 100644
--- a/gcc/internal-fn.h
+++ b/gcc/internal-fn.h
@@ -235,12 +235,14 @@ extern bool can_interpret_as_conditional_op_p (gimple *, 
tree *,
 extern bool internal_load_fn_p (internal_fn);
 extern bool internal_store_fn_p (internal_fn);
 extern bool internal_gather_scatter_fn_p (internal_fn);
+extern bool internal_strided_fn_p (internal_fn);
 extern int internal_fn_mask_index (internal_fn);
 extern int internal_fn_len_index (internal_fn);
 extern int internal_fn_else_index (internal_fn);
 extern int internal_fn_stored_value_index (internal_fn);
 extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree,
                                                    tree, tree, int);
+extern bool internal_strided_fn_supported_p (internal_fn, tree, tree);
 extern bool internal_check_ptrs_fn_supported_p (internal_fn, tree,
                                                poly_uint64, unsigned int);
 #define VECT_PARTIAL_BIAS_UNSUPPORTED 127
diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
index 947ccef218c..860e744995b 100644
--- a/gcc/optabs-query.cc
+++ b/gcc/optabs-query.cc
@@ -670,14 +670,18 @@ supports_vec_convert_optab_p (optab op, machine_mode mode)
    for at least one vector mode.  */
 
 bool
-supports_vec_gather_load_p (machine_mode mode)
+supports_vec_gather_load_p (machine_mode mode, bool strided_p)
 {
   if (!this_fn_optabs->supports_vec_gather_load[mode])
     this_fn_optabs->supports_vec_gather_load[mode]
       = (supports_vec_convert_optab_p (gather_load_optab, mode)
-        || supports_vec_convert_optab_p (mask_gather_load_optab, mode)
-        || supports_vec_convert_optab_p (mask_len_gather_load_optab, mode)
-        ? 1 : -1);
+            || supports_vec_convert_optab_p (mask_gather_load_optab, mode)
+            || supports_vec_convert_optab_p (mask_len_gather_load_optab, mode)
+            || (strided_p
+                && direct_optab_handler (mask_len_strided_load_optab, mode)
+                     != CODE_FOR_nothing)
+          ? 1
+          : -1);
 
   return this_fn_optabs->supports_vec_gather_load[mode] > 0;
 }
@@ -687,14 +691,19 @@ supports_vec_gather_load_p (machine_mode mode)
    for at least one vector mode.  */
 
 bool
-supports_vec_scatter_store_p (machine_mode mode)
+supports_vec_scatter_store_p (machine_mode mode, bool strided_p)
 {
   if (!this_fn_optabs->supports_vec_scatter_store[mode])
     this_fn_optabs->supports_vec_scatter_store[mode]
       = (supports_vec_convert_optab_p (scatter_store_optab, mode)
-        || supports_vec_convert_optab_p (mask_scatter_store_optab, mode)
-        || supports_vec_convert_optab_p (mask_len_scatter_store_optab, mode)
-        ? 1 : -1);
+            || supports_vec_convert_optab_p (mask_scatter_store_optab, mode)
+            || supports_vec_convert_optab_p (mask_len_scatter_store_optab,
+                                             mode)
+            || (strided_p
+                && direct_optab_handler (mask_len_strided_store_optab, mode)
+                     != CODE_FOR_nothing)
+          ? 1
+          : -1);
 
   return this_fn_optabs->supports_vec_scatter_store[mode] > 0;
 }
diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h
index 920eb6a1b67..7c22edc5a78 100644
--- a/gcc/optabs-query.h
+++ b/gcc/optabs-query.h
@@ -191,8 +191,8 @@ bool can_compare_and_swap_p (machine_mode, bool);
 bool can_atomic_exchange_p (machine_mode, bool);
 bool can_atomic_load_p (machine_mode);
 bool lshift_cheap_p (bool);
-bool supports_vec_gather_load_p (machine_mode = E_VOIDmode);
-bool supports_vec_scatter_store_p (machine_mode = E_VOIDmode);
+bool supports_vec_gather_load_p (machine_mode = E_VOIDmode, bool = false);
+bool supports_vec_scatter_store_p (machine_mode = E_VOIDmode, bool = false);
 bool can_vec_extract (machine_mode, machine_mode);
 
 /* Version of find_widening_optab_handler_and_mode that operates on
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index d5c9c4a11c2..8662a570417 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -3903,7 +3903,8 @@ vect_prune_runtime_alias_test_list (loop_vec_info 
loop_vinfo)
 
 /* Check whether we can use an internal function for a gather load
    or scatter store.  READ_P is true for loads and false for stores.
-   MASKED_P is true if the load or store is conditional.  MEMORY_TYPE is
+   MASKED_P is true if the load or store is conditional.  STRIDED_P
+   is true if the load or store is strided access.  MEMORY_TYPE is
    the type of the memory elements being loaded or stored.  OFFSET_TYPE
    is the type of the offset that is being applied to the invariant
    base address.  SCALE is the amount by which the offset should
@@ -3914,8 +3915,8 @@ vect_prune_runtime_alias_test_list (loop_vec_info 
loop_vinfo)
 
 bool
 vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
-                         tree vectype, tree memory_type, tree offset_type,
-                         int scale, internal_fn *ifn_out,
+                         bool strided_p, tree vectype, tree memory_type,
+                         tree offset_type, int scale, internal_fn *ifn_out,
                          tree *offset_vectype_out)
 {
   unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
@@ -3926,7 +3927,7 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, 
bool masked_p,
     return false;
 
   /* Work out which function we need.  */
-  internal_fn ifn, alt_ifn, alt_ifn2;
+  internal_fn ifn, alt_ifn, alt_ifn2, alt_ifn3;
   if (read_p)
     {
       ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
@@ -3935,6 +3936,12 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, 
bool masked_p,
         use MASK_LEN_GATHER_LOAD regardless whether len and
         mask are valid or not.  */
       alt_ifn2 = IFN_MASK_LEN_GATHER_LOAD;
+      /* When target supports MASK_LEN_STRIDED_LOAD, we can relax the
+        restrictions around the relationship of the vector offset type
+        to the loaded by using a gather load with strided access.
+        E.g. a "gather" of N bytes with a 64-bit stride would in principle
+        be possible without needing an Nx64-bit vector offset type.  */
+      alt_ifn3 = IFN_MASK_LEN_STRIDED_LOAD;
     }
   else
     {
@@ -3944,10 +3951,26 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, 
bool masked_p,
         use MASK_LEN_SCATTER_STORE regardless whether len and
         mask are valid or not.  */
       alt_ifn2 = IFN_MASK_LEN_SCATTER_STORE;
+      /* When target supports MASK_LEN_STRIDED_STORE, we can relax the
+        restrictions around the relationship of the vector offset type
+        to the stored by using a scatter store with strided access.
+        E.g. a "scatter" of N bytes with a 64-bit stride would in principle
+        be possible without needing an Nx64-bit vector offset type.  */
+      alt_ifn3 = IFN_MASK_LEN_STRIDED_STORE;
     }
 
   for (;;)
     {
+      /* We don't need to check whether target supports gather/scatter IFN
+        with expected vector offset for gather/scatter with a strided access
+        when target itself support strided load/store IFN.  */
+      if (strided_p
+         && internal_strided_fn_supported_p (alt_ifn3, vectype, memory_type))
+       {
+         *ifn_out = alt_ifn3;
+         return true;
+       }
+
       tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type);
       if (!offset_vectype)
        return false;
@@ -4030,6 +4053,7 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, 
loop_vec_info loop_vinfo,
   internal_fn ifn;
   tree offset_vectype;
   bool masked_p = false;
+  bool strided_p = STMT_VINFO_STRIDED_P (stmt_info);
 
   /* See whether this is already a call to a gather/scatter internal function.
      If not, see whether it's a masked load or store.  */
@@ -4197,12 +4221,14 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, 
loop_vec_info loop_vinfo,
                 supports it for at least some offset type.  */
              if (use_ifn_p
                  && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
-                                               masked_p, vectype, memory_type,
+                                               masked_p, strided_p,
+                                               vectype, memory_type,
                                                signed_char_type_node,
                                                new_scale, &ifn,
                                                &offset_vectype)
                  && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
-                                               masked_p, vectype, memory_type,
+                                               masked_p, strided_p,
+                                               vectype, memory_type,
                                                unsigned_char_type_node,
                                                new_scale, &ifn,
                                                &offset_vectype))
@@ -4226,7 +4252,8 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, 
loop_vec_info loop_vinfo,
              && TREE_CODE (off) == SSA_NAME
              && !POINTER_TYPE_P (TREE_TYPE (off))
              && vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
-                                          masked_p, vectype, memory_type,
+                                          masked_p, strided_p,
+                                          vectype, memory_type,
                                           TREE_TYPE (off), scale, &ifn,
                                           &offset_vectype))
            break;
@@ -4281,8 +4308,8 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, 
loop_vec_info loop_vinfo,
   if (use_ifn_p)
     {
       if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
-                                    vectype, memory_type, offtype, scale,
-                                    &ifn, &offset_vectype))
+                                    strided_p, vectype, memory_type, offtype,
+                                    scale, &ifn, &offset_vectype))
        ifn = IFN_LAST;
       decl = NULL_TREE;
     }
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index f895aaf3083..907d0c0bcbb 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -1515,10 +1515,14 @@ check_load_store_for_partial_vectors (loop_vec_info 
loop_vinfo, tree vectype,
       internal_fn len_ifn = (is_load
                             ? IFN_MASK_LEN_GATHER_LOAD
                             : IFN_MASK_LEN_SCATTER_STORE);
-      if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
-                                                 gs_info->memory_type,
-                                                 gs_info->offset_vectype,
-                                                 gs_info->scale))
+      if (internal_strided_fn_p (gs_info->ifn)
+         && internal_strided_fn_supported_p (gs_info->ifn, vectype,
+                                             gs_info->memory_type))
+       vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
+      else if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
+                                                      gs_info->memory_type,
+                                                      gs_info->offset_vectype,
+                                                      gs_info->scale))
        vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
       else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
                                                       gs_info->memory_type,
@@ -1703,6 +1707,7 @@ vect_truncate_gather_scatter_offset (stmt_vec_info 
stmt_info,
         no narrower than OFFSET_TYPE.  */
       tree memory_type = TREE_TYPE (DR_REF (dr));
       if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
+                                    STMT_VINFO_STRIDED_P (stmt_info),
                                     vectype, memory_type, offset_type, scale,
                                     &gs_info->ifn, &gs_info->offset_vectype)
          || gs_info->ifn == IFN_LAST)
@@ -1743,6 +1748,15 @@ vect_use_strided_gather_scatters_p (stmt_vec_info 
stmt_info,
       || gs_info->ifn == IFN_LAST)
     return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
                                                masked_p, gs_info);
+  else if (internal_strided_fn_p (gs_info->ifn))
+    {
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_NOTE, vect_location,
+                        "using strided IFN for strided/grouped access,"
+                        " scale = %d\n",
+                        gs_info->scale);
+      return true;
+    }
 
   tree old_offset_type = TREE_TYPE (gs_info->offset);
   tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
@@ -3012,6 +3026,14 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
       *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
     }
 
+  /* Target supports strided load/store use DR_STEP directly.  */
+  if (internal_strided_fn_p (gs_info->ifn))
+    {
+      *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo,
+                                                  unshare_expr (DR_STEP (dr)));
+      return;
+    }
+
   /* The offset given in GS_INFO can have pointer type, so use the element
      type of the vector instead.  */
   tree offset_type = TREE_TYPE (gs_info->offset_vectype);
@@ -9130,7 +9152,7 @@ vectorizable_store (vec_info *vinfo,
                vec_offset = vec_offsets[j];
              tree scale = size_int (gs_info.scale);
 
-             if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE)
+             if (internal_fn_len_index (gs_info.ifn) >= 0)
                {
                  if (loop_lens)
                    final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
@@ -9150,10 +9172,18 @@ vectorizable_store (vec_info *vinfo,
 
              gcall *call;
              if (final_len && final_mask)
-               call = gimple_build_call_internal (IFN_MASK_LEN_SCATTER_STORE,
-                                                  7, dataref_ptr, vec_offset,
-                                                  scale, vec_oprnd, final_mask,
-                                                  final_len, bias);
+               {
+                 if (internal_strided_fn_p (gs_info.ifn))
+                   call
+                     = gimple_build_call_internal (IFN_MASK_LEN_STRIDED_STORE,
+                                                   6, dataref_ptr, vec_offset,
+                                                   vec_oprnd, final_mask,
+                                                   final_len, bias);
+                 else
+                   call = gimple_build_call_internal (
+                     IFN_MASK_LEN_SCATTER_STORE, 7, dataref_ptr, vec_offset,
+                     scale, vec_oprnd, final_mask, final_len, bias);
+               }
              else if (final_mask)
                call
                  = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
@@ -10955,7 +10985,7 @@ vectorizable_load (vec_info *vinfo,
                  tree zero = build_zero_cst (vectype);
                  tree scale = size_int (gs_info.scale);
 
-                 if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
+                 if (internal_fn_len_index (gs_info.ifn) >= 0)
                    {
                      if (loop_lens)
                        final_len
@@ -10978,11 +11008,16 @@ vectorizable_load (vec_info *vinfo,
 
                  gcall *call;
                  if (final_len && final_mask)
-                   call
-                     = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, 7,
-                                                   dataref_ptr, vec_offset,
-                                                   scale, zero, final_mask,
-                                                   final_len, bias);
+                   {
+                     if (internal_strided_fn_p (gs_info.ifn))
+                       call = gimple_build_call_internal (
+                         IFN_MASK_LEN_STRIDED_LOAD, 6, dataref_ptr, vec_offset,
+                         zero, final_mask, final_len, bias);
+                     else
+                       call = gimple_build_call_internal (
+                         IFN_MASK_LEN_GATHER_LOAD, 7, dataref_ptr, vec_offset,
+                         scale, zero, final_mask, final_len, bias);
+                   }
                  else if (final_mask)
                    call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, 5,
                                                       dataref_ptr, vec_offset,
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 254d172231d..4d9e9799470 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2309,7 +2309,7 @@ extern opt_result vect_analyze_data_refs_alignment 
(loop_vec_info);
 extern bool vect_slp_analyze_instance_alignment (vec_info *, slp_instance);
 extern opt_result vect_analyze_data_ref_accesses (vec_info *, vec<int> *);
 extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info);
-extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree,
+extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, bool, tree, tree,
                                      tree, int, internal_fn *, tree *);
 extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info,
                                       gather_scatter_info *);
-- 
2.36.3

Reply via email to