Note if-conversion emits IFN_MASK_LOAD/STORE, only the vectorizer later
emits the LEN variants. So this is about whether there are (might) be
uarchs that have vector aligned loads (aka target alignment is sizeof(vector))
and in addition to that have support for misaligned loads but those with still
element alignment. The above even says all masked load/store uarchs
suport aribitrary byte-aligned (len-)masked vector loads/stores.
Ah yeah, of course, _LEN happens later...
I moved the checks now and we can get away with an optional gs_info* parameter
for vect_supportable_alignment so I guess that in itself it's at least
a bit cleaner than before. Also, I removed the IFN_MASK_LOAD/STORE hunk to see
what breaks. Attached is the current version and these mentioned changes are
the only ones.
riscv and power10 didn't show any noticeable problems, aarch64 is still
running. x86 is unchanged (as it has legacy gathers/scatters anyway).
Power7 would be interesting I guess but cfarm110 only has a base GCC 4.8.5.
--
Regards
Robin
[PATCH] vect: Misalign checks for gather/scatter.
This patch adds simple misalignment checks for gather/scatter
operations. Previously, we assumed that those perform element accesses
internally so alignment does not matter. The riscv vector spec however
explicitly states that vector operations are allowed to fault on
element-misaligned accesses. Reasonable uarchs won't, but...
For gather/scatter we have two paths in the vectorizer:
(1) Regular analysis based on datarefs. Here we can also create
strided loads.
(2) Non-affine access where each gather index is relative to the
initial address.
The assumption this patch works off is that once the alignment for the
first scalar is correct, all others will fall in line, as the index is
always a multiple of the first element's size.
For (1) we have a dataref and can check it for alignment as in other
cases. For (2) this patch checks the object alignment of BASE and
compares it against the natural alignment of the current vectype's unit.
The patch also adds a pointer argument to the gather/scatter IFNs that
contains the necessary alignment. Most of the patch is thus mechanical
in that it merely adjusts indices.
I tested the riscv version with a custom qemu version that faults on
element-misaligned vector accesses. With this patch applied, there is
just a single fault left, which is due to PR120782 and which will be
addressed separately.
Bootstrapped and regtested on x86 and aarch64. Regtested on
rv64gcv_zvl512b with and without unaligned vector support.
gcc/ChangeLog:
* internal-fn.cc (internal_fn_len_index): Adjust indices for new
alias_ptr param.
(internal_fn_else_index): Ditto.
(internal_fn_mask_index): Ditto.
(internal_fn_stored_value_index): Ditto.
(internal_fn_alias_ptr_index): Ditto.
(internal_fn_offset_index): Ditto.
(internal_fn_scale_index): Ditto.
(internal_gather_scatter_fn_supported_p): Ditto.
* optabs-query.cc (supports_vec_gather_load_p): Ditto.
* tree-vect-data-refs.cc (vect_check_gather_scatter): Add alias
pointer.
* tree-vect-patterns.cc (vect_recog_gather_scatter_pattern): Add
alias pointer.
* tree-vect-slp.cc (vect_get_operand_map): Adjust for alias
pointer.
* tree-vect-stmts.cc (vect_truncate_gather_scatter_offset): Add
alias pointer and misalignment handling.
(get_load_store_type): Move from here...
(get_group_load_store_type): ...To here.
(vectorizable_store): Add alias pointer.
(vectorizable_load): Ditto.
* tree-vectorizer.h (struct gather_scatter_info): Ditto.
asdfsdf
---
gcc/internal-fn.cc | 43 ++++++---
gcc/internal-fn.h | 1 +
gcc/optabs-query.cc | 6 +-
gcc/tree-vect-data-refs.cc | 61 ++++++++++---
gcc/tree-vect-patterns.cc | 17 ++--
gcc/tree-vect-slp.cc | 16 ++--
gcc/tree-vect-stmts.cc | 179 +++++++++++++++++++++----------------
gcc/tree-vectorizer.h | 7 +-
8 files changed, 206 insertions(+), 124 deletions(-)
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 1411f449789..bf2fac81807 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -4967,11 +4967,13 @@ internal_fn_len_index (internal_fn fn)
return 2;
case IFN_MASK_LEN_SCATTER_STORE:
+ return 6;
+
case IFN_MASK_LEN_STRIDED_LOAD:
return 5;
case IFN_MASK_LEN_GATHER_LOAD:
- return 6;
+ return 7;
case IFN_COND_LEN_FMA:
case IFN_COND_LEN_FMS:
@@ -5075,7 +5077,7 @@ internal_fn_else_index (internal_fn fn)
case IFN_MASK_GATHER_LOAD:
case IFN_MASK_LEN_GATHER_LOAD:
- return 5;
+ return 6;
default:
return -1;
@@ -5110,7 +5112,7 @@ internal_fn_mask_index (internal_fn fn)
case IFN_MASK_SCATTER_STORE:
case IFN_MASK_LEN_GATHER_LOAD:
case IFN_MASK_LEN_SCATTER_STORE:
- return 4;
+ return 5;
case IFN_VCOND_MASK:
case IFN_VCOND_MASK_LEN:
@@ -5135,10 +5137,11 @@ internal_fn_stored_value_index (internal_fn fn)
case IFN_MASK_STORE:
case IFN_MASK_STORE_LANES:
+ return 3;
case IFN_SCATTER_STORE:
case IFN_MASK_SCATTER_STORE:
case IFN_MASK_LEN_SCATTER_STORE:
- return 3;
+ return 4;
case IFN_LEN_STORE:
return 4;
@@ -5152,6 +5155,28 @@ internal_fn_stored_value_index (internal_fn fn)
}
}
+/* If FN has an alias pointer return its index, otherwise return -1. */
+
+int
+internal_fn_alias_ptr_index (internal_fn fn)
+{
+ switch (fn)
+ {
+ case IFN_MASK_LOAD:
+ case IFN_MASK_LEN_LOAD:
+ case IFN_GATHER_LOAD:
+ case IFN_MASK_GATHER_LOAD:
+ case IFN_MASK_LEN_GATHER_LOAD:
+ case IFN_SCATTER_STORE:
+ case IFN_MASK_SCATTER_STORE:
+ case IFN_MASK_LEN_SCATTER_STORE:
+ return 1;
+
+ default:
+ return -1;
+ }
+}
+
/* If FN is a gather/scatter return the index of its offset argument,
otherwise return -1. */
@@ -5169,7 +5194,7 @@ internal_fn_offset_index (internal_fn fn)
case IFN_SCATTER_STORE:
case IFN_MASK_SCATTER_STORE:
case IFN_MASK_LEN_SCATTER_STORE:
- return 1;
+ return 2;
default:
return -1;
@@ -5193,7 +5218,7 @@ internal_fn_scale_index (internal_fn fn)
case IFN_SCATTER_STORE:
case IFN_MASK_SCATTER_STORE:
case IFN_MASK_LEN_SCATTER_STORE:
- return 2;
+ return 3;
default:
return -1;
@@ -5277,13 +5302,9 @@ internal_gather_scatter_fn_supported_p (internal_fn ifn,
tree vector_type,
&& insn_operand_matches (icode, 2 + output_ops, GEN_INT (unsigned_p))
&& insn_operand_matches (icode, 3 + output_ops, GEN_INT (scale));
- /* For gather the optab's operand indices do not match the IFN's because
- the latter does not have the extension operand (operand 3). It is
- implicitly added during expansion so we use the IFN's else index + 1.
- */
if (ok && elsvals)
get_supported_else_vals
- (icode, internal_fn_else_index (IFN_MASK_GATHER_LOAD) + 1, *elsvals);
+ (icode, internal_fn_else_index (IFN_MASK_GATHER_LOAD), *elsvals);
return ok;
}
diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
index 825381660bb..fd21694dfeb 100644
--- a/gcc/internal-fn.h
+++ b/gcc/internal-fn.h
@@ -242,6 +242,7 @@ extern int internal_fn_else_index (internal_fn);
extern int internal_fn_stored_value_index (internal_fn);
extern int internal_fn_offset_index (internal_fn fn);
extern int internal_fn_scale_index (internal_fn fn);
+extern int internal_fn_alias_ptr_index (internal_fn fn);
extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree,
tree, tree, int,
vec<int> * = nullptr);
diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
index f5ca98da818..5335d0d8401 100644
--- a/gcc/optabs-query.cc
+++ b/gcc/optabs-query.cc
@@ -719,13 +719,9 @@ supports_vec_gather_load_p (machine_mode mode, vec<int>
*elsvals)
= (icode != CODE_FOR_nothing) ? 1 : -1;
}
- /* For gather the optab's operand indices do not match the IFN's because
- the latter does not have the extension operand (operand 3). It is
- implicitly added during expansion so we use the IFN's else index + 1.
- */
if (elsvals && icode != CODE_FOR_nothing)
get_supported_else_vals
- (icode, internal_fn_else_index (IFN_MASK_GATHER_LOAD) + 1, *elsvals);
+ (icode, internal_fn_else_index (IFN_MASK_GATHER_LOAD), *elsvals);
return this_fn_optabs->supports_vec_gather_load[mode] > 0;
}
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index 019f0b6ca36..277bc132bcd 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -4539,6 +4539,8 @@ vect_describe_gather_scatter_call (stmt_vec_info
stmt_info,
info->ifn = gimple_call_internal_fn (call);
info->decl = NULL_TREE;
info->base = gimple_call_arg (call, 0);
+ info->alias_ptr = gimple_call_arg
+ (call, internal_fn_alias_ptr_index (info->ifn));
info->offset = gimple_call_arg
(call, internal_fn_offset_index (info->ifn));
info->offset_dt = vect_unknown_def_type;
@@ -4869,6 +4871,11 @@ vect_check_gather_scatter (stmt_vec_info stmt_info,
loop_vec_info loop_vinfo,
info->ifn = ifn;
info->decl = decl;
info->base = base;
+
+ info->alias_ptr = build_int_cst
+ (reference_alias_ptr_type (DR_REF (dr)),
+ get_object_alignment (DR_REF (dr)));
+
info->offset = off;
info->offset_dt = vect_unknown_def_type;
info->offset_vectype = offset_vectype;
@@ -7364,13 +7371,14 @@ vect_can_force_dr_alignment_p (const_tree decl,
poly_uint64 alignment)
alignment.
If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
it is aligned, i.e., check if it is possible to vectorize it with different
- alignment. */
+ alignment. If GS_INFO is passed we are dealing with a gather/scatter. */
enum dr_alignment_support
vect_supportable_dr_alignment (vec_info *vinfo, dr_vec_info *dr_info,
- tree vectype, int misalignment)
+ tree vectype, int misalignment,
+ gather_scatter_info *gs_info)
{
- data_reference *dr = dr_info->dr;
+ data_reference *dr = dr_info ? dr_info->dr : nullptr;
stmt_vec_info stmt_info = dr_info->stmt;
machine_mode mode = TYPE_MODE (vectype);
loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
@@ -7382,14 +7390,6 @@ vect_supportable_dr_alignment (vec_info *vinfo,
dr_vec_info *dr_info,
else if (dr_safe_speculative_read_required (stmt_info))
return dr_unaligned_unsupported;
- /* For now assume all conditional loads/stores support unaligned
- access without any special code. */
- if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
- if (gimple_call_internal_p (stmt)
- && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
- || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
- return dr_unaligned_supported;
-
if (loop_vinfo)
{
vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
@@ -7459,7 +7459,7 @@ vect_supportable_dr_alignment (vec_info *vinfo,
dr_vec_info *dr_info,
}
} */
- if (DR_IS_READ (dr))
+ if (dr && DR_IS_READ (dr))
{
if (can_implement_p (vec_realign_load_optab, mode)
&& (!targetm.vectorize.builtin_mask_for_load
@@ -7487,10 +7487,43 @@ vect_supportable_dr_alignment (vec_info *vinfo,
dr_vec_info *dr_info,
bool is_packed = false;
tree type = TREE_TYPE (DR_REF (dr));
+ bool is_gather_scatter = gs_info != nullptr;
if (misalignment == DR_MISALIGNMENT_UNKNOWN)
- is_packed = not_size_aligned (DR_REF (dr));
+ {
+ if (!is_gather_scatter || dr != nullptr)
+ is_packed = not_size_aligned (DR_REF (dr));
+ else
+ {
+ /* Gather-scatter accesses normally perform only component accesses
+ so alignment is irrelevant for them. Targets like riscv do care
+ about scalar alignment in vector accesses, though, so check scalar
+ alignment here. We determined the alias pointer as well as the
+ base alignment during pattern recognition and can re-use it here.
+
+ As we do not have an analyzed dataref we only know the alignment
+ of the reference itself and nothing about init, steps, etc.
+ For now don't try harder to determine misalignment and
+ just assume it is unknown. We consider the type packed if its
+ scalar alignment is lower than the natural alignment of a vector
+ element's type. */
+
+ gcc_assert (!GATHER_SCATTER_LEGACY_P (*gs_info));
+ gcc_assert (dr == nullptr);
+
+ tree inner_vectype = TREE_TYPE (vectype);
+
+ unsigned HOST_WIDE_INT scalar_align
+ = tree_to_uhwi (gs_info->alias_ptr);
+ unsigned HOST_WIDE_INT inner_vectype_sz
+ = tree_to_uhwi (TYPE_SIZE (inner_vectype));
+
+ bool is_misaligned = scalar_align < inner_vectype_sz;
+ is_packed = scalar_align > 1 && is_misaligned;
+ }
+ }
if (targetm.vectorize.support_vector_misalignment (mode, type, misalignment,
- is_packed, false))
+ is_packed,
+ is_gather_scatter))
return dr_unaligned_supported;
/* Unsupported. */
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index 0f6d6b77ea1..f0ddbf9660c 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -6042,12 +6042,14 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo,
tree vec_els
= vect_get_mask_load_else (elsval, TREE_TYPE (gs_vectype));
- pattern_stmt = gimple_build_call_internal (gs_info.ifn, 6, base,
+ pattern_stmt = gimple_build_call_internal (gs_info.ifn, 7, base,
+ gs_info.alias_ptr,
offset, scale, zero, mask,
vec_els);
}
else
- pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
+ pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
+ gs_info.alias_ptr,
offset, scale, zero);
tree lhs = gimple_get_lhs (stmt_info->stmt);
tree load_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
@@ -6057,12 +6059,13 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo,
{
tree rhs = vect_get_store_rhs (stmt_info);
if (mask != NULL)
- pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
- base, offset, scale, rhs,
- mask);
+ pattern_stmt = gimple_build_call_internal (gs_info.ifn, 6,
+ base, gs_info.alias_ptr,
+ offset, scale, rhs, mask);
else
- pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
- base, offset, scale, rhs);
+ pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
+ base, gs_info.alias_ptr,
+ offset, scale, rhs);
}
gimple_call_set_nothrow (pattern_stmt, true);
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index af42f455cca..73a3c899295 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -511,11 +511,11 @@ vect_def_types_match (enum vect_def_type dta, enum
vect_def_type dtb)
static const int no_arg_map[] = { 0 };
static const int arg0_map[] = { 1, 0 };
-static const int arg1_map[] = { 1, 1 };
+static const int arg2_map[] = { 1, 2 };
static const int arg2_arg3_map[] = { 2, 2, 3 };
-static const int arg1_arg3_map[] = { 2, 1, 3 };
-static const int arg1_arg4_arg5_map[] = { 3, 1, 4, 5 };
-static const int arg1_arg3_arg4_map[] = { 3, 1, 3, 4 };
+static const int arg2_arg4_map[] = { 2, 2, 4 };
+static const int arg2_arg5_arg6_map[] = { 3, 2, 5, 6 };
+static const int arg2_arg4_arg5_map[] = { 3, 2, 4, 5 };
static const int arg3_arg2_map[] = { 2, 3, 2 };
static const int op1_op0_map[] = { 2, 1, 0 };
static const int off_map[] = { 1, GATHER_SCATTER_OFFSET };
@@ -570,18 +570,18 @@ vect_get_operand_map (const gimple *stmt, bool
gather_scatter_p = false,
return gather_scatter_p ? off_arg2_arg3_map : arg2_arg3_map;
case IFN_GATHER_LOAD:
- return arg1_map;
+ return arg2_map;
case IFN_MASK_GATHER_LOAD:
case IFN_MASK_LEN_GATHER_LOAD:
- return arg1_arg4_arg5_map;
+ return arg2_arg5_arg6_map;
case IFN_SCATTER_STORE:
- return arg1_arg3_map;
+ return arg2_arg4_map;
case IFN_MASK_SCATTER_STORE:
case IFN_MASK_LEN_SCATTER_STORE:
- return arg1_arg3_arg4_map;
+ return arg2_arg4_arg5_map;
case IFN_MASK_STORE:
return gather_scatter_p ? off_arg3_arg2_map : arg3_arg2_map;
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 66e79891b09..d5c5fbe25f4 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -1803,6 +1803,9 @@ vect_truncate_gather_scatter_offset (stmt_vec_info
stmt_info,
/* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
but we don't need to store that here. */
gs_info->base = NULL_TREE;
+ gs_info->alias_ptr = build_int_cst
+ (reference_alias_ptr_type (DR_REF (dr)),
+ get_object_alignment (DR_REF (dr)));
gs_info->element_type = TREE_TYPE (vectype);
gs_info->offset = fold_convert (offset_type, step);
gs_info->offset_dt = vect_constant_def;
@@ -2106,7 +2109,7 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info
stmt_info,
separated by the stride, until we have a complete vector.
Fall back to scalar accesses if that isn't possible. */
*memory_access_type = VMAT_STRIDED_SLP;
- else
+ else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
{
int cmp = compare_step_with_zero (vinfo, stmt_info);
if (cmp < 0)
@@ -2349,19 +2352,71 @@ get_group_load_store_type (vec_info *vinfo,
stmt_vec_info stmt_info,
allows us to use contiguous accesses. */
if ((*memory_access_type == VMAT_ELEMENTWISE
|| *memory_access_type == VMAT_STRIDED_SLP)
+ && !STMT_VINFO_GATHER_SCATTER_P (stmt_info)
&& single_element_p
&& SLP_TREE_LANES (slp_node) == 1
&& loop_vinfo
&& vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
masked_p, gs_info, elsvals))
*memory_access_type = VMAT_GATHER_SCATTER;
+ else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+ {
+ *memory_access_type = VMAT_GATHER_SCATTER;
+ if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info,
+ elsvals))
+ gcc_unreachable ();
+ /* When using internal functions, we rely on pattern recognition
+ to convert the type of the offset to the type that the target
+ requires, with the result being a call to an internal function.
+ If that failed for some reason (e.g. because another pattern
+ took priority), just handle cases in which the offset already
+ has the right type. */
+ else if (GATHER_SCATTER_IFN_P (*gs_info)
+ && !is_gimple_call (stmt_info->stmt)
+ && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
+ TREE_TYPE (gs_info->offset_vectype)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "%s offset requires a conversion\n",
+ vls_type == VLS_LOAD ? "gather" : "scatter");
+ return false;
+ }
+ else if (!vect_is_simple_use (gs_info->offset, vinfo,
+ &gs_info->offset_dt,
+ &gs_info->offset_vectype))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "%s index use not simple.\n",
+ vls_type == VLS_LOAD ? "gather" : "scatter");
+ return false;
+ }
+ else if (GATHER_SCATTER_EMULATED_P (*gs_info))
+ {
+ if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
+ || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant ()
+ || VECTOR_BOOLEAN_TYPE_P (gs_info->offset_vectype)
+ || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
+ (gs_info->offset_vectype),
+ TYPE_VECTOR_SUBPARTS (vectype)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "unsupported vector types for emulated "
+ "gather.\n");
+ return false;
+ }
+ }
+ }
if (*memory_access_type == VMAT_CONTIGUOUS_DOWN
|| *memory_access_type == VMAT_CONTIGUOUS_REVERSE)
*poffset = neg_ldst_offset;
- if (*memory_access_type == VMAT_GATHER_SCATTER
- || *memory_access_type == VMAT_ELEMENTWISE
+ if (*memory_access_type == VMAT_ELEMENTWISE
+ || (*memory_access_type == VMAT_GATHER_SCATTER
+ && GATHER_SCATTER_LEGACY_P (*gs_info))
|| *memory_access_type == VMAT_STRIDED_SLP
|| *memory_access_type == VMAT_INVARIANT)
{
@@ -2370,10 +2425,15 @@ get_group_load_store_type (vec_info *vinfo,
stmt_vec_info stmt_info,
}
else
{
- *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
+ if (*memory_access_type == VMAT_GATHER_SCATTER
+ && !first_dr_info)
+ *misalignment = DR_MISALIGNMENT_UNKNOWN;
+ else
+ *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
*alignment_support_scheme
- = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
- *misalignment);
+ = vect_supportable_dr_alignment
+ (vinfo, first_dr_info, vectype, *misalignment,
+ *memory_access_type == VMAT_GATHER_SCATTER ? gs_info : nullptr);
}
if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
@@ -2443,58 +2503,12 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info
stmt_info,
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
*misalignment = DR_MISALIGNMENT_UNKNOWN;
*poffset = 0;
- if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
- {
- *memory_access_type = VMAT_GATHER_SCATTER;
- if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info,
- elsvals))
- gcc_unreachable ();
- /* When using internal functions, we rely on pattern recognition
- to convert the type of the offset to the type that the target
- requires, with the result being a call to an internal function.
- If that failed for some reason (e.g. because another pattern
- took priority), just handle cases in which the offset already
- has the right type. */
- else if (GATHER_SCATTER_IFN_P (*gs_info)
- && !is_gimple_call (stmt_info->stmt)
- && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
- TREE_TYPE (gs_info->offset_vectype)))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "%s offset requires a conversion\n",
- vls_type == VLS_LOAD ? "gather" : "scatter");
- return false;
- }
- slp_tree offset_node = SLP_TREE_CHILDREN (slp_node)[0];
- gs_info->offset_dt = SLP_TREE_DEF_TYPE (offset_node);
- gs_info->offset_vectype = SLP_TREE_VECTYPE (offset_node);
- if (gs_info->ifn == IFN_LAST && !gs_info->decl)
- {
- if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
- || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant ()
- || VECTOR_BOOLEAN_TYPE_P (gs_info->offset_vectype)
- || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
- (gs_info->offset_vectype),
- TYPE_VECTOR_SUBPARTS (vectype)))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "unsupported vector types for emulated "
- "gather.\n");
- return false;
- }
- }
- /* Gather-scatter accesses perform only component accesses, alignment
- is irrelevant for them. */
- *alignment_support_scheme = dr_unaligned_supported;
- }
- else if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
- masked_p,
- vls_type, memory_access_type, poffset,
- alignment_support_scheme,
- misalignment, gs_info, lanes_ifn,
- elsvals))
+ if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
+ masked_p,
+ vls_type, memory_access_type, poffset,
+ alignment_support_scheme,
+ misalignment, gs_info, lanes_ifn,
+ elsvals))
return false;
if ((*memory_access_type == VMAT_ELEMENTWISE
@@ -2528,17 +2542,18 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info
stmt_info,
"alignment. With non-contiguous memory vectorization"
" could read out of bounds at %G ",
STMT_VINFO_STMT (stmt_info));
- if (inbounds)
- LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
- else
- return false;
+ if (inbounds)
+ LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
+ else
+ return false;
}
/* If this DR needs alignment for correctness, we must ensure the target
alignment is a constant power-of-two multiple of the amount read per
vector iteration or force masking. */
if (dr_safe_speculative_read_required (stmt_info)
- && *alignment_support_scheme == dr_aligned)
+ && (*alignment_support_scheme == dr_aligned
+ && *memory_access_type != VMAT_GATHER_SCATTER))
{
/* We can only peel for loops, of course. */
gcc_checking_assert (loop_vinfo);
@@ -8456,7 +8471,6 @@ vectorizable_store (vec_info *vinfo,
if (dump_enabled_p ()
&& memory_access_type != VMAT_ELEMENTWISE
- && memory_access_type != VMAT_GATHER_SCATTER
&& memory_access_type != VMAT_STRIDED_SLP
&& memory_access_type != VMAT_INVARIANT
&& alignment_support_scheme != dr_aligned)
@@ -9157,24 +9171,31 @@ vectorizable_store (vec_info *vinfo,
{
if (VECTOR_TYPE_P (TREE_TYPE (vec_offset)))
call = gimple_build_call_internal (
- IFN_MASK_LEN_SCATTER_STORE, 7, dataref_ptr,
+ IFN_MASK_LEN_SCATTER_STORE, 8, dataref_ptr,
+ gs_info.alias_ptr,
vec_offset, scale, vec_oprnd, final_mask, final_len,
bias);
else
/* Non-vector offset indicates that prefer to take
MASK_LEN_STRIDED_STORE instead of the
- IFN_MASK_SCATTER_STORE with direct stride arg. */
+ IFN_MASK_SCATTER_STORE with direct stride arg.
+ Similar to the gather case we have checked the
+ alignment for a scatter already and assume
+ that the strided store has the same requirements. */
call = gimple_build_call_internal (
IFN_MASK_LEN_STRIDED_STORE, 6, dataref_ptr,
vec_offset, vec_oprnd, final_mask, final_len, bias);
}
else if (final_mask)
call = gimple_build_call_internal
- (IFN_MASK_SCATTER_STORE, 5, dataref_ptr,
+ (IFN_MASK_SCATTER_STORE, 6, dataref_ptr,
+ gs_info.alias_ptr,
vec_offset, scale, vec_oprnd, final_mask);
else
- call = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
- dataref_ptr, vec_offset,
+ call = gimple_build_call_internal (IFN_SCATTER_STORE, 5,
+ dataref_ptr,
+ gs_info.alias_ptr,
+ vec_offset,
scale, vec_oprnd);
gimple_call_set_nothrow (call, true);
vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
@@ -10641,7 +10662,6 @@ vectorizable_load (vec_info *vinfo,
vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
}
- gcc_assert (alignment_support_scheme);
vec_loop_masks *loop_masks
= (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
? &LOOP_VINFO_MASKS (loop_vinfo)
@@ -10661,10 +10681,12 @@ vectorizable_load (vec_info *vinfo,
/* Targets with store-lane instructions must not require explicit
realignment. vect_supportable_dr_alignment always returns either
- dr_aligned or dr_unaligned_supported for masked operations. */
+ dr_aligned or dr_unaligned_supported for (non-length) masked
+ operations. */
gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
&& !mask
&& !loop_masks)
+ || memory_access_type == VMAT_GATHER_SCATTER
|| alignment_support_scheme == dr_aligned
|| alignment_support_scheme == dr_unaligned_supported);
@@ -11009,8 +11031,6 @@ vectorizable_load (vec_info *vinfo,
if (memory_access_type == VMAT_GATHER_SCATTER)
{
- gcc_assert (alignment_support_scheme == dr_aligned
- || alignment_support_scheme == dr_unaligned_supported);
gcc_assert (!grouped_load && !slp_perm);
unsigned int inside_cost = 0, prologue_cost = 0;
@@ -11099,7 +11119,8 @@ vectorizable_load (vec_info *vinfo,
{
if (VECTOR_TYPE_P (TREE_TYPE (vec_offset)))
call = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD,
- 8, dataref_ptr,
+ 9, dataref_ptr,
+ gs_info.alias_ptr,
vec_offset, scale, zero,
final_mask, vec_els,
final_len, bias);
@@ -11114,13 +11135,15 @@ vectorizable_load (vec_info *vinfo,
}
else if (final_mask)
call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD,
- 6, dataref_ptr,
+ 7, dataref_ptr,
+ gs_info.alias_ptr,
vec_offset, scale,
zero, final_mask, vec_els);
else
- call = gimple_build_call_internal (IFN_GATHER_LOAD, 4,
- dataref_ptr, vec_offset,
- scale, zero);
+ call = gimple_build_call_internal (IFN_GATHER_LOAD, 5,
+ dataref_ptr,
+ gs_info.alias_ptr,
+ vec_offset, scale, zero);
gimple_call_set_nothrow (call, true);
new_stmt = call;
data_ref = NULL_TREE;
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 7b927491b1c..4d51ad61fa8 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1557,6 +1557,10 @@ struct gather_scatter_info {
/* The loop-invariant base value. */
tree base;
+ /* The TBBA alias pointer the value of which determines the alignment
+ of the scalar accesses. */
+ tree alias_ptr;
+
/* The original scalar offset, which is a non-loop-invariant SSA_NAME. */
tree offset;
@@ -2542,7 +2546,8 @@ extern bool ref_within_array_bound (gimple *, tree);
/* In tree-vect-data-refs.cc. */
extern bool vect_can_force_dr_alignment_p (const_tree, poly_uint64);
extern enum dr_alignment_support vect_supportable_dr_alignment
- (vec_info *, dr_vec_info *, tree, int);
+ (vec_info *, dr_vec_info *, tree, int,
+ gather_scatter_info * = nullptr);
extern tree vect_get_smallest_scalar_type (stmt_vec_info, tree);
extern opt_result vect_analyze_data_ref_dependences (loop_vec_info, unsigned
int *);
extern bool vect_slp_analyze_instance_dependence (vec_info *, slp_instance);
--
2.50.0