On Fri, 4 Aug 2023 at 20:36, Richard Sandiford <richard.sandif...@arm.com> wrote: > > Full review this time, sorry for the skipping the tests earlier. Thanks for the detailed review! Please find my responses inline below. > > Prathamesh Kulkarni <prathamesh.kulka...@linaro.org> writes: > > diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc > > index 7e5494dfd39..680d0e54fd4 100644 > > --- a/gcc/fold-const.cc > > +++ b/gcc/fold-const.cc > > @@ -85,6 +85,10 @@ along with GCC; see the file COPYING3. If not see > > #include "vec-perm-indices.h" > > #include "asan.h" > > #include "gimple-range.h" > > +#include <algorithm> > > This should be included by defining INCLUDE_ALGORITHM instead. Done. Just curious, why do we use this macro instead of directly including <algorithm> ? > > > +#include "tree-pretty-print.h" > > +#include "gimple-pretty-print.h" > > +#include "print-tree.h" > > Are these still needed, or were they for debugging? Just for debugging, removed. > > > > > /* Nonzero if we are folding constants inside an initializer or a C++ > > manifestly-constant-evaluated context; zero otherwise. > > @@ -10494,15 +10498,9 @@ fold_mult_zconjz (location_t loc, tree type, tree > > expr) > > static bool > > vec_cst_ctor_to_array (tree arg, unsigned int nelts, tree *elts) > > { > > - unsigned HOST_WIDE_INT i, nunits; > > + unsigned HOST_WIDE_INT i; > > > > - if (TREE_CODE (arg) == VECTOR_CST > > - && VECTOR_CST_NELTS (arg).is_constant (&nunits)) > > - { > > - for (i = 0; i < nunits; ++i) > > - elts[i] = VECTOR_CST_ELT (arg, i); > > - } > > - else if (TREE_CODE (arg) == CONSTRUCTOR) > > + if (TREE_CODE (arg) == CONSTRUCTOR) > > { > > constructor_elt *elt; > > > > @@ -10520,6 +10518,192 @@ vec_cst_ctor_to_array (tree arg, unsigned int > > nelts, tree *elts) > > return true; > > } > > > > +/* Helper routine for fold_vec_perm_cst to check if SEL is a suitable > > + mask for VLA vec_perm folding. > > + REASON if specified, will contain the reason why SEL is not suitable. > > + Used only for debugging and unit-testing. > > + VERBOSE if enabled is used for debugging output. */ > > + > > +static bool > > +valid_mask_for_fold_vec_perm_cst_p (tree arg0, tree arg1, > > + const vec_perm_indices &sel, > > + const char **reason = NULL, > > + ATTRIBUTE_UNUSED bool verbose = false) > > Since verbose is no longer needed (good!), I think we should just remove it. Done. > > > +{ > > + unsigned sel_npatterns = sel.encoding ().npatterns (); > > + unsigned sel_nelts_per_pattern = sel.encoding ().nelts_per_pattern (); > > + > > + if (!(pow2p_hwi (sel_npatterns) > > + && pow2p_hwi (VECTOR_CST_NPATTERNS (arg0)) > > + && pow2p_hwi (VECTOR_CST_NPATTERNS (arg1)))) > > + { > > + if (reason) > > + *reason = "npatterns is not power of 2"; > > + return false; > > + } > > + > > + /* We want to avoid cases where sel.length is not a multiple of > > npatterns. > > + For eg: sel.length = 2 + 2x, and sel npatterns = 4. */ > > + poly_uint64 esel; > > + if (!multiple_p (sel.length (), sel_npatterns, &esel)) > > + { > > + if (reason) > > + *reason = "sel.length is not multiple of sel_npatterns"; > > + return false; > > + } > > + > > + if (sel_nelts_per_pattern < 3) > > + return true; > > + > > + for (unsigned pattern = 0; pattern < sel_npatterns; pattern++) > > + { > > + poly_uint64 a1 = sel[pattern + sel_npatterns]; > > + poly_uint64 a2 = sel[pattern + 2 * sel_npatterns]; > > + HOST_WIDE_INT S; > > Trailing whitespace. The convention is to use lowercase variable > names, so please call this "step". Fixed, thanks. > > > + if (!poly_int64 (a2 - a1).is_constant (&S)) > > + { > > + if (reason) > > + *reason = "step is not constant"; > > + return false; > > + } > > + // FIXME: Punt on S < 0 for now, revisit later. > > + if (S < 0) > > + return false; > > + if (S == 0) > > + continue; > > + > > + if (!pow2p_hwi (S)) > > + { > > + if (reason) > > + *reason = "step is not power of 2"; > > + return false; > > + } > > + > > + /* Ensure that stepped sequence of the pattern selects elements > > + only from the same input vector if it's VLA. */ > > s/ if it's VLA// Oops sorry, that was a relic of something else I was trying :) Fixed, thanks. > > > + uint64_t q1, qe; > > + poly_uint64 r1, re; > > + poly_uint64 ae = a1 + (esel - 2) * S; > > + poly_uint64 arg_len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); > > + > > + if (!(can_div_trunc_p (a1, arg_len, &q1, &r1) > > + && can_div_trunc_p (ae, arg_len, &qe, &re) > > + && q1 == qe)) > > + { > > + if (reason) > > + *reason = "crossed input vectors"; > > + return false; > > + } > > + > > Probably worth a comment above the following code too: > > /* Ensure that the stepped sequence always selects from the same > input pattern. */ Done. > > > + unsigned arg_npatterns > > + = ((q1 & 0) == 0) ? VECTOR_CST_NPATTERNS (arg0) > > + : VECTOR_CST_NPATTERNS (arg1); > > + > > + if (!multiple_p (S, arg_npatterns)) > > + { > > + if (reason) > > + *reason = "S is not multiple of npatterns"; > > + return false; > > + } > > + } > > + > > + return true; > > +} > > + > > +/* Try to fold permutation of ARG0 and ARG1 with SEL selector when > > + the input vectors are VECTOR_CST. Return NULL_TREE otherwise. > > + REASON and VERBOSE have same purpose as described in > > + valid_mask_for_fold_vec_perm_cst_p. > > + > > + (1) If SEL is a suitable mask as determined by > > + valid_mask_for_fold_vec_perm_cst_p, then: > > + res_npatterns = max of npatterns between ARG0, ARG1, and SEL > > + res_nelts_per_pattern = max of nelts_per_pattern between > > + ARG0, ARG1 and SEL. > > + (2) If SEL is not a suitable mask, and ARG0, ARG1 are VLS, > > + then: > > + res_npatterns = nelts in input vector. > > s/input vector/result vector/ Fixed, thanks. > > > + res_nelts_per_pattern = 1. > > + This exception is made so that VLS ARG0, ARG1 and SEL work as > > before. */ > > Guess this is personal preference, but (1) and (2) seem more like > implementation details, so I think they belong... > > > + > > +static tree > > +fold_vec_perm_cst (tree type, tree arg0, tree arg1, const vec_perm_indices > > &sel, > > + const char **reason = NULL, bool verbose = false) > > +{ > > + unsigned res_npatterns, res_nelts_per_pattern; > > + unsigned HOST_WIDE_INT res_nelts; > > + > > ...here instead. Done. > > > + if (valid_mask_for_fold_vec_perm_cst_p (arg0, arg1, sel, reason, > > verbose)) > > + { > > + res_npatterns > > + = std::max (VECTOR_CST_NPATTERNS (arg0), > > + std::max (VECTOR_CST_NPATTERNS (arg1), > > + sel.encoding ().npatterns ())); > > + > > + res_nelts_per_pattern > > + = std::max (VECTOR_CST_NELTS_PER_PATTERN (arg0), > > + std::max (VECTOR_CST_NELTS_PER_PATTERN (arg1), > > + sel.encoding ().nelts_per_pattern ())); > > + > > + res_nelts = res_npatterns * res_nelts_per_pattern; > > + } > > + else if (TYPE_VECTOR_SUBPARTS (type).is_constant (&res_nelts)) > > + { > > + res_npatterns = res_nelts; > > + res_nelts_per_pattern = 1; > > + } > > + else > > + return NULL_TREE; > > + > > + tree_vector_builder out_elts (type, res_npatterns, > > res_nelts_per_pattern); > > + for (unsigned i = 0; i < res_nelts; i++) > > + { > > + poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); > > + uint64_t q; > > + poly_uint64 r; > > + unsigned HOST_WIDE_INT index; > > + > > + unsigned HOST_WIDE_INT arg_nelts; > > + if (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)).is_constant (&arg_nelts) > > + && known_ge (sel[i], poly_int64 (2 * arg_nelts))) > > + { > > + if (reason) > > + *reason = "out of bounds access"; > > + return NULL_TREE; > > + } > > I don't think this is needed. The selector indices wrap, and the code > below should handle the wrapping correctly. Removed, thanks. > > > + > > + /* Punt if sel[i] /trunc_div len cannot be determined, > > + because the input vector to be chosen will depend on > > + runtime vector length. > > + For example if len == 4 + 4x, and sel[i] == 4, > > + If len at runtime equals 4, we choose arg1[0]. > > + For any other value of len > 4 at runtime, we choose arg0[4]. > > + which makes the element choice dependent on runtime vector length. > > */ > > + if (!can_div_trunc_p (sel[i], len, &q, &r)) > > + { > > + if (reason) > > + *reason = "cannot divide selector element by arg len"; > > + return NULL_TREE; > > + } > > + > > + /* sel[i] % len will give the index of element in the chosen input > > + vector. For example if sel[i] == 5 + 4x and len == 4 + 4x, > > + we will choose arg1[1] since (5 + 4x) % (4 + 4x) == 1. */ > > + if (!r.is_constant (&index)) > > + { > > + if (reason) > > + *reason = "remainder is not constant"; > > + return NULL_TREE; > > + } > > + > > + tree arg = ((q & 1) == 0) ? arg0 : arg1; > > + tree elem = vector_cst_elt (arg, index); > > + out_elts.quick_push (elem); > > + } > > + > > + return out_elts.build (); > > +} > > + > > /* Attempt to fold vector permutation of ARG0 and ARG1 vectors using SEL > > selector. Return the folded VECTOR_CST or CONSTRUCTOR if successful, > > NULL_TREE otherwise. */ > > @@ -10529,43 +10713,40 @@ fold_vec_perm (tree type, tree arg0, tree arg1, > > const vec_perm_indices &sel) > > { > > unsigned int i; > > unsigned HOST_WIDE_INT nelts; > > - bool need_ctor = false; > > > > - if (!sel.length ().is_constant (&nelts)) > > - return NULL_TREE; > > - gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), nelts) > > - && known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)), nelts) > > - && known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)), nelts)); > > + gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), sel.length ()) > > + && known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)), > > + TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)))); > > + > > if (TREE_TYPE (TREE_TYPE (arg0)) != TREE_TYPE (type) > > || TREE_TYPE (TREE_TYPE (arg1)) != TREE_TYPE (type)) > > return NULL_TREE; > > > > + if (TREE_CODE (arg0) == VECTOR_CST > > + && TREE_CODE (arg1) == VECTOR_CST) > > + return fold_vec_perm_cst (type, arg0, arg1, sel); > > + > > + /* For fall back case, we want to ensure we have VLS vectors > > + with equal length. */ > > + if (!sel.length ().is_constant (&nelts)) > > + return NULL_TREE; > > + > > + gcc_assert (known_eq (sel.length (), TYPE_VECTOR_SUBPARTS (TREE_TYPE > > (arg0)))); > > Nit: long line. Fixed, thanks. > > > tree *in_elts = XALLOCAVEC (tree, nelts * 2); > > if (!vec_cst_ctor_to_array (arg0, nelts, in_elts) > > || !vec_cst_ctor_to_array (arg1, nelts, in_elts + nelts)) > > return NULL_TREE; > > > > - tree_vector_builder out_elts (type, nelts, 1); > > + vec<constructor_elt, va_gc> *v; > > + vec_alloc (v, nelts); > > for (i = 0; i < nelts; i++) > > { > > HOST_WIDE_INT index; > > if (!sel[i].is_constant (&index)) > > return NULL_TREE; > > - if (!CONSTANT_CLASS_P (in_elts[index])) > > - need_ctor = true; > > - out_elts.quick_push (unshare_expr (in_elts[index])); > > - } > > - > > - if (need_ctor) > > - { > > - vec<constructor_elt, va_gc> *v; > > - vec_alloc (v, nelts); > > - for (i = 0; i < nelts; i++) > > - CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, out_elts[i]); > > - return build_constructor (type, v); > > + CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, in_elts[index]); > > } > > - else > > - return out_elts.build (); > > + return build_constructor (type, v); > > } > > > > /* Try to fold a pointer difference of type TYPE two address expressions of > > @@ -16892,6 +17073,508 @@ test_arithmetic_folding () > > x); > > } > > > > +namespace test_fold_vec_perm_cst { > > + > > +static tree > > +get_preferred_vectype (tree inner_type) > > +{ > > + scalar_int_mode int_mode = SCALAR_INT_TYPE_MODE (inner_type); > > + machine_mode vmode = targetm.vectorize.preferred_simd_mode (int_mode); > > + poly_uint64 nunits = GET_MODE_NUNITS (vmode); > > + return build_vector_type (inner_type, nunits); > > +} > > + > > +static tree > > +build_vec_cst_rand (tree inner_type, unsigned npatterns, > > + unsigned nelts_per_pattern, int S = 0, > > Similar comment about lowercase variable names here. > > > + tree vectype = NULL_TREE) > > +{ > > + if (!vectype) > > + vectype = get_preferred_vectype (inner_type); > > I'm not sure how portable this is. It looks like the tests rely on > the integer_type_node vectors being 4 + 4x, but that isn't necessarily > true on all VLA targets. > > Perhaps instead the tests could be classified based on the vector > lengths that they assume. Then we can iterate through the vector > modes and call the appropriate function based on GET_MODE_NUNITS > and GET_MODE_INNER. I tried this approach in the attached patch. Does it look OK ? > > > + tree_vector_builder builder (vectype, npatterns, nelts_per_pattern); > > + > > + // Fill a0 for each pattern > > + for (unsigned i = 0; i < npatterns; i++) > > + builder.quick_push (build_int_cst (inner_type, rand () % 100)); > > + > > + if (nelts_per_pattern == 1) > > + return builder.build (); > > + > > + // Fill a1 for each pattern > > + for (unsigned i = 0; i < npatterns; i++) > > + builder.quick_push (build_int_cst (inner_type, rand () % 100)); > > + > > + if (nelts_per_pattern == 2) > > + return builder.build (); > > + > > + for (unsigned i = npatterns * 2; i < npatterns * nelts_per_pattern; i++) > > + { > > + tree prev_elem = builder[i - npatterns]; > > + int prev_elem_val = TREE_INT_CST_LOW (prev_elem); > > + int val = prev_elem_val + S; > > + builder.quick_push (build_int_cst (inner_type, val)); > > + } > > + > > + return builder.build (); > > +} > > + > > +static void > > +validate_res (unsigned npatterns, unsigned nelts_per_pattern, > > + tree res, tree *expected_res) > > +{ > > + ASSERT_TRUE (VECTOR_CST_NPATTERNS (res) == npatterns); > > + ASSERT_TRUE (VECTOR_CST_NELTS_PER_PATTERN (res) == nelts_per_pattern); > > I don't think this is safe when the inputs are randomised. E.g. we > could by chance end up with a vector of all zeros, which would have > a single pattern and a single element per pattern, regardless of the > shapes of the inputs. > > Given the way that vector_builder<T, Shape, Derived>::finalize > canonicalises the encoding, it should be safe to use: > > * VECTOR_CST_NPATTERNS (res) <= npatterns > * vector_cst_encoded_nelts (res) <= npatterns * nelts_per_pattern > > If we do that then... > > > + > > + for (unsigned i = 0; i < vector_cst_encoded_nelts (res); i++) > > ...this loop bound should be npatterns * nelts_per_pattern instead. Ah indeed. Fixed, thanks. > > > + ASSERT_TRUE (operand_equal_p (VECTOR_CST_ELT (res, i), > > expected_res[i], 0)); > > +} > > + > > +static void > > +validate_res_vls (tree res, tree *expected_res, unsigned expected_nelts) > > +{ > > + ASSERT_TRUE (known_eq (VECTOR_CST_NELTS (res), expected_nelts)); > > + for (unsigned i = 0; i < expected_nelts; i++) > > + ASSERT_TRUE (operand_equal_p (VECTOR_CST_ELT (res, i), > > expected_res[i], 0)); > > +} > > + > > +/* Verify VLA vec_perm folding. */ > > + > > +static void > > +test_stepped () > > +{ > > + /* Case 1: sel = {0, 1, 2, ...} > > + npatterns = 1, nelts_per_pattern = 3 > > + expected res: { arg0[0], arg0[1], arg0[2], ... } */ > > + { > > + tree arg0 = build_vec_cst_rand (integer_type_node, 1, 3, 2); > > + tree arg1 = build_vec_cst_rand (integer_type_node, 1, 3, 2); > > + poly_uint64 arg0_len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); > > + > > + vec_perm_builder builder (arg0_len, 1, 3); > > + builder.quick_push (0); > > + builder.quick_push (1); > > + builder.quick_push (2); > > + > > + vec_perm_indices sel (builder, 2, arg0_len); > > + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel); > > + tree expected_res[] = { vector_cst_elt (arg0, 0), vector_cst_elt > > (arg0, 1), > > + vector_cst_elt (arg0, 2) }; > > + validate_res (1, 3, res, expected_res); > > + } > > + > > + /* Case 2: sel = {len, len + 1, len + 2, ... } > > + npatterns = 1, nelts_per_pattern = 3 > > + FIXME: This should return > > + expected res: { op1[0], op1[1], op1[2], ... } > > + however it returns NULL_TREE. */ > > Looks like the comment is out of date. Fixed, thanks. > > > + { > > + tree arg0 = build_vec_cst_rand (integer_type_node, 1, 3, 2); > > + tree arg1 = build_vec_cst_rand (integer_type_node, 1, 3, 2); > > + poly_uint64 arg0_len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); > > + > > + vec_perm_builder builder (arg0_len, 1, 3); > > + builder.quick_push (arg0_len); > > + builder.quick_push (arg0_len + 1); > > + builder.quick_push (arg0_len + 2); > > + > > + vec_perm_indices sel (builder, 2, arg0_len); > > + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel, NULL, > > true); > > + tree expected_res[] = { vector_cst_elt (arg1, 0), vector_cst_elt > > (arg1, 1), > > + vector_cst_elt (arg1, 2) }; > > + validate_res (1, 3, res, expected_res); > > + } > > + > > + /* Case 3: Leading element of arg1, stepped sequence: pattern 0 of arg0. > > + sel = {len, 0, 0, 0, 2, 0, ...} > > + npatterns = 2, nelts_per_pattern = 3. > > + Use extra pattern {0, ...} to lower number of elements per pattern. > > */ > > + { > > + tree arg0 = build_vec_cst_rand (char_type_node, 1, 3, 2); > > + tree arg1 = build_vec_cst_rand (char_type_node, 1, 3, 2); > > + poly_uint64 arg0_len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); > > + > > + vec_perm_builder builder (arg0_len, 2, 3); > > + builder.quick_push (arg0_len); > > + int mask_elems[] = { 0, 0, 0, 2, 0 }; > > + for (int i = 0; i < 5; i++) > > + builder.quick_push (mask_elems[i]); > > This leaves one of the elements unspecified. Sorry, I didn't understand. It first pushes len in: builder.quick_push (arg0_len) and then pushes the remaining indices in the loop: for (int i = 0; i < 5; i++) builder.quick_push (mask_elems[i]) So overall, builder will have 6 elements: {len, 0, 0, 0, 2, 0} > > > + > > + vec_perm_indices sel (builder, 2, arg0_len); > > + const char *reason; > > + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel, > > &reason); > > + > > + tree expected_res[] = { vector_cst_elt (arg1, 0), vector_cst_elt > > (arg0, 0), > > + vector_cst_elt (arg0, 0), vector_cst_elt (arg0, > > 0), > > + vector_cst_elt (arg0, 2), vector_cst_elt (arg0, 0) > > + }; > > + validate_res (2, 3, res, expected_res); > > + } > > + > > + /* Case 4: > > + sel = { len, 0, 2, ... } npatterns = 1, nelts_per_pattern = 3. > > + This should return NULL because we cross the input vectors. > > + Because, > > + arg0_len = 16 + 16x > > + a1 = 0 > > + S = 2 > > + esel = arg0_len / npatterns_sel = 16+16x/1 = 16 + 16x > > + ae = 0 + (esel - 2) * S > > + = 0 + (16 + 16x - 2) * 2 > > + = 28 + 32x > > + a1 / arg0_len = 0 /trunc (16 + 16x) = 0 > > + ae / arg0_len = (28 + 32x) /trunc (16 + 16x), which is not defined, > > + since 28/16 != 32/16. > > + So return NULL_TREE. */ > > The division should succeed now, so as the test says, the reason should > instead be that ae is in the second input. Fixed, thanks. > > > + { > > + tree arg0 = build_vec_cst_rand (char_type_node, 1, 3, 2); > > + tree arg1 = build_vec_cst_rand (char_type_node, 1, 3, 2); > > + poly_uint64 arg0_len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); > > + > > + vec_perm_builder builder (arg0_len, 1, 3); > > + builder.quick_push (arg0_len); > > + builder.quick_push (0); > > + builder.quick_push (2); > > + > > + vec_perm_indices sel (builder, 2, arg0_len); > > + const char *reason; > > + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel, > > &reason, false); > > + gcc_assert (res == NULL_TREE); > > + gcc_assert (!strcmp (reason, "crossed input vectors")); > > The tests should use ASSERT_* macros rather than gcc_assert. Fixed, thanks. > > > + } > > + > > + /* Case 5: Select elements from different patterns. > > + Should return NULL. */ > > + { > > + tree op0 = build_vec_cst_rand (char_type_node, 2, 3, 2); > > + tree op1 = build_vec_cst_rand (char_type_node, 2, 3, 2); > > + poly_uint64 op0_len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (op0)); > > + > > + vec_perm_builder builder (op0_len, 2, 3); > > + builder.quick_push (op0_len); > > + int mask_elems[] = { 0, 0, 0, 1, 0 }; > > + for (int i = 0; i < 5; i++) > > + builder.quick_push (mask_elems[i]); > > Should be 6 elements here too. > > > + > > + vec_perm_indices sel (builder, 2, op0_len); > > + const char *reason; > > + tree res = fold_vec_perm_cst (TREE_TYPE (op0), op0, op1, sel, &reason, > > false); > > + gcc_assert (res == NULL_TREE); > > + gcc_assert (!strcmp (reason, "S is not multiple of npatterns")); > > + } > > + > > + /* Case 6: Select pattern 0 of op0 and dup of op0[0] > > + op0, op1, sel: npatterns = 2, nelts_per_pattern = 3 > > + sel = { 0, 0, 2, 0, 4, 0, ... }. > > + > > + For pattern {0, 2, 4, ...}: > > + a1 = 2 > > + len = 16 + 16x > > + S = 2 > > + esel = len / npatterns_sel = (16 + 16x) / 2 = (8 + 8x) > > + ae = a1 + (esel - 2) * S > > + = 2 + (8 + 8x - 2) * 2 > > + = 14 + 16x > > + a1 / arg0_len = 2 / (16 + 16x) = 0 > > + ae / arg0_len = (14 + 16x) / (16 + 16x) = 0 > > + So a1/arg0_len = ae/arg0_len = 0 > > + Hence we select from first vector op0 > > + S = 2, npatterns = 2. > > + Since S is multiple of npatterns(op0), we are selecting from > > + same pattern of op0. > > + > > + For pattern {0, ...}, we are choosing { op0[0] ... } > > + So res will be combination of above patterns: > > + res: { op0[0], op0[0], op0[2], op0[0], op0[4], op0[0], ... } > > + with npatterns = 2, nelts_per_pattern = 3. */ > > + { > > + tree op0 = build_vec_cst_rand (char_type_node, 2, 3, 2); > > + tree op1 = build_vec_cst_rand (char_type_node, 2, 3, 2); > > + poly_uint64 op0_len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (op0)); > > + > > + vec_perm_builder builder (op0_len, 2, 3); > > + int mask_elems[] = { 0, 0, 2, 0, 4, 0 }; > > + for (int i = 0; i < 6; i++) > > + builder.quick_push (mask_elems[i]); > > + > > + vec_perm_indices sel (builder, 2, op0_len); > > + tree res = fold_vec_perm_cst (TREE_TYPE (op0), op0, op1, sel); > > + tree expected_res[] = { vector_cst_elt (op0, 0), vector_cst_elt (op0, > > 0), > > + vector_cst_elt (op0, 2), vector_cst_elt (op0, 0), > > + vector_cst_elt (op0, 4), vector_cst_elt (op0, 0) > > }; > > + validate_res (2, 3, res, expected_res); > > + } > > + > > + /* Case 7: sel_npatterns > op_npatterns; > > + op0, op1: npatterns = 2, nelts_per_pattern = 3 > > + sel: { 0, 0, 1, len, 2, 0, 3, len, 4, 0, 5, len, ...}, > > + with npatterns = 4, nelts_per_pattern = 3. */ > > + { > > + tree op0 = build_vec_cst_rand (char_type_node, 2, 3, 2); > > + tree op1 = build_vec_cst_rand (char_type_node, 2, 3, 2); > > + poly_uint64 op0_len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (op0)); > > + > > + vec_perm_builder builder(op0_len, 4, 3); > > + // -1 is used as place holder for poly_int_cst > > + int mask_elems[] = { 0, 0, 1, -1, 2, 0, 3, -1, 4, 0, 5, -1 }; > > + for (int i = 0; i < 12; i++) > > + builder.quick_push ((mask_elems[i] == -1) ? op0_len : mask_elems[i]); > > + > > + vec_perm_indices sel (builder, 2, op0_len); > > + tree res = fold_vec_perm_cst (TREE_TYPE (op0), op0, op1, sel); > > + tree expected_res[] = { vector_cst_elt (op0, 0), vector_cst_elt (op0, > > 0), > > + vector_cst_elt (op0, 1), vector_cst_elt (op1, 0), > > + vector_cst_elt (op0, 2), vector_cst_elt (op0, 0), > > + vector_cst_elt (op0, 3), vector_cst_elt (op1, 0), > > + vector_cst_elt (op0, 4), vector_cst_elt (op0, 0), > > + vector_cst_elt (op0, 5), vector_cst_elt (op1, 0) > > }; > > + validate_res (4, 3, res, expected_res); > > + } > > +} > > + > > +static void > > +test_dup () > > +{ > > + /* Case 1: mask = {0, ...} */ > > + { > > + tree arg0 = build_vec_cst_rand (integer_type_node, 2, 3, 1); > > + tree arg1 = build_vec_cst_rand (integer_type_node, 2, 3, 1); > > + poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); > > + > > + vec_perm_builder builder (len, 1, 1); > > + builder.quick_push (0); > > + vec_perm_indices sel (builder, 2, len); > > + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel); > > + > > + tree expected_res[] = { vector_cst_elt (res, 0) }; > > + validate_res (1, 1, res, expected_res); > > + } > > + > > + /* Case 2: mask = {len, ...} */ > > + { > > + tree arg0 = build_vec_cst_rand (integer_type_node, 2, 3, 1); > > + tree arg1 = build_vec_cst_rand (integer_type_node, 2, 3, 1); > > + poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); > > + > > + vec_perm_builder builder (len, 1, 1); > > + builder.quick_push (len); > > + vec_perm_indices sel (builder, 2, len); > > + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel); > > + > > + tree expected_res[] = { vector_cst_elt (arg1, 0) }; > > + validate_res (1, 1, res, expected_res); > > + } > > + > > + /* Case 3: mask = { 0, len, ... } */ > > + { > > + tree arg0 = build_vec_cst_rand (integer_type_node, 2, 3, 1); > > + tree arg1 = build_vec_cst_rand (integer_type_node, 2, 3, 1); > > + poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); > > + > > + vec_perm_builder builder (len, 2, 1); > > + builder.quick_push (0); > > + builder.quick_push (len); > > + vec_perm_indices sel (builder, 2, len); > > + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel); > > + > > + tree expected_res[] = { vector_cst_elt (arg0, 0), vector_cst_elt > > (arg1, 0) }; > > + validate_res (2, 1, res, expected_res); > > + } > > + > > + /* Case 4: mask = { 0, len, 1, len+1, ... } */ > > + { > > + tree arg0 = build_vec_cst_rand (integer_type_node, 2, 3, 1); > > + tree arg1 = build_vec_cst_rand (integer_type_node, 2, 3, 1); > > + poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); > > + > > + vec_perm_builder builder (len, 2, 2); > > + builder.quick_push (0); > > + builder.quick_push (len); > > + builder.quick_push (1); > > + builder.quick_push (len + 1); > > + vec_perm_indices sel (builder, 2, len); > > + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel); > > + > > + tree expected_res[] = { vector_cst_elt (arg0, 0), vector_cst_elt > > (arg1, 0), > > + vector_cst_elt (arg0, 1), vector_cst_elt (arg1, 1) > > + }; > > + validate_res (2, 2, res, expected_res); > > + } > > + > > + /* Case 5: mask = { 0, len, 1, len+1, .... } > > + npatterns = 4, nelts_per_pattern = 1 */ > > + { > > + tree arg0 = build_vec_cst_rand (integer_type_node, 2, 3, 1); > > + tree arg1 = build_vec_cst_rand (integer_type_node, 2, 3, 1); > > + poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); > > + > > + vec_perm_builder builder (len, 4, 1); > > + builder.quick_push (0); > > + builder.quick_push (len); > > + builder.quick_push (1); > > + builder.quick_push (len + 1); > > + vec_perm_indices sel (builder, 2, len); > > + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel); > > + > > + tree expected_res[] = { vector_cst_elt (arg0, 0), vector_cst_elt > > (arg1, 0), > > + vector_cst_elt (arg0, 1), vector_cst_elt (arg1, 1) > > + }; > > + validate_res (4, 1, res, expected_res); > > + } > > + > > + /* Case 6: mask = {0, 4, ...} > > + npatterns = 1, nelts_per_pattern = 2. > > + This should return NULL_TREE because the index 4 may choose > > + from either arg0 or arg1 depending on vector length. */ > > + { > > + tree arg0 = build_vec_cst_rand (integer_type_node, 2, 3, 1); > > + tree arg1 = build_vec_cst_rand (integer_type_node, 2, 3, 1); > > + poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); > > + > > + vec_perm_builder builder (len, 1, 2); > > + builder.quick_push (0); > > + builder.quick_push (4); > > + vec_perm_indices sel (builder, 2, len); > > + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel); > > + ASSERT_TRUE (res == NULL_TREE); > > + } > > + > > + /* Case 7: npatterns(arg0) = 4 > npatterns(sel) = 2 > > + mask = {0, len, 1, len + 1, ...} > > + sel_npatterns = 2, sel_nelts_per_pattern = 2. */ > > This is a good test to have, but it doesn't seem to match the > name of the containing function (test_dup). Well since the selector has nelts_per_pattern = 2, ie, dup of a1, I chose to put it in test_dup. Anyway, the functions are now re-classified based on vector length in the patch. > > > + { > > + tree arg0 = build_vec_cst_rand (integer_type_node, 2, 3, 1); > > + tree arg1 = build_vec_cst_rand (integer_type_node, 2, 3, 1); > > + poly_uint64 arg0_len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); > > + > > + vec_perm_builder builder (arg0_len, 2, 2); > > + builder.quick_push (0); > > + builder.quick_push (arg0_len); > > + builder.quick_push (1); > > + builder.quick_push (arg0_len + 1); > > + vec_perm_indices sel (builder, 2, arg0_len); > > + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel); > > + > > + tree expected_res[] = { vector_cst_elt (arg0, 0), vector_cst_elt > > (arg1, 0), > > + vector_cst_elt (arg0, 1), vector_cst_elt (arg1, 1) > > + }; > > + validate_res (2, 2, res, expected_res); > > + } > > +} > > + > > +static void > > +test_mixed () > > +{ > > + /* Case 1: op0, op1 -> VLS, sel -> VLA and selects from both input > > vectors. > > + In this case, we treat res_npatterns = nelts in input vector > > + and res_nelts_per_pattern = 1, and create a dup pattern. > > + sel = { 0, 4, 1, 5, ... } > > + res = { op0[0], op1[0], op0[1], op1[1], ...} // (4, 1) > > + res_npatterns = 4, res_nelts_per_pattern = 1. */ > > + { > > + tree arg_vectype = build_vector_type (integer_type_node, 4); > > + tree arg0 = build_vec_cst_rand (integer_type_node, 4, 1, 0, > > arg_vectype); > > + tree arg1 = build_vec_cst_rand (integer_type_node, 4, 1, 0, > > arg_vectype); > > + > > + tree res_type = get_preferred_vectype (integer_type_node); > > + poly_uint64 res_len = TYPE_VECTOR_SUBPARTS (res_type); > > + vec_perm_builder builder (res_len, 4, 1); > > + builder.quick_push (0); > > + builder.quick_push (4); > > + builder.quick_push (1); > > + builder.quick_push (5); > > + > > + vec_perm_indices sel (builder, 2, res_len); > > + tree res = fold_vec_perm_cst (res_type, arg0, arg1, sel); > > + tree expected_res[] = { vector_cst_elt (arg0, 0), vector_cst_elt > > (arg1, 0), > > + vector_cst_elt (arg0, 1), vector_cst_elt (arg1, 1) > > + }; > > + validate_res (4, 1, res, expected_res); > > + } > > + > > + /* Case 2: Same as Case 1, but sel contains an out of bounds index. > > + result should be NULL_TREE. */ > > + { > > + tree arg_vectype = build_vector_type (integer_type_node, 4); > > + tree arg0 = build_vec_cst_rand (integer_type_node, 4, 1, 0, > > arg_vectype); > > + tree arg1 = build_vec_cst_rand (integer_type_node, 4, 1, 0, > > arg_vectype); > > + > > + tree res_type = get_preferred_vectype (integer_type_node); > > + poly_uint64 res_len = TYPE_VECTOR_SUBPARTS (res_type); > > + vec_perm_builder builder (res_len, 4, 1); > > + builder.quick_push (0); > > + builder.quick_push (8); > > + builder.quick_push (1); > > + builder.quick_push (5); > > + > > + vec_perm_indices sel (builder, 2, res_len); > > + const char *reason; > > + tree res = fold_vec_perm_cst (res_type, arg0, arg1, sel, &reason); > > + gcc_assert (res == NULL_TREE); > > + gcc_assert (!strcmp (reason, "out of bounds access")); > > + } > > + > > + /* Case 3: op0, op1 are VLA and sel is VLS. > > + op0, op1: VNx16QI with shape (2, 3) > > + sel = V4SI with values {0, 2, 4, 6} > > + res: V4SI with values { op0[0], op0[2], op0[4], op0[6] }. */ > > + { > > + tree arg0 = build_vec_cst_rand (char_type_node, 2, 3, 2); > > + tree arg1 = build_vec_cst_rand (char_type_node, 2, 3, 2); > > + > > + poly_uint64 res_len = 4; > > + tree res_type = build_vector_type (char_type_node, res_len); > > + vec_perm_builder builder (res_len, 4, 1); > > + builder.quick_push (0); > > + builder.quick_push (2); > > + builder.quick_push (4); > > + builder.quick_push (6); > > + > > + vec_perm_indices sel (builder, 2, res_len); > > + tree res = fold_vec_perm_cst (res_type, arg0, arg1, sel); > > + > > + tree expected_res[] = { vector_cst_elt (arg0, 0), vector_cst_elt > > (arg0, 2), > > + vector_cst_elt (arg0, 4), vector_cst_elt (arg0, 6) > > + }; > > + validate_res_vls (res, expected_res, 4); > > + } > > + > > + /* Case 4: Same as case 4, but op0, op1 are VNx4SI with shape (2, 3) and > > step = 2 > > Same as case 3? Oops sorry, fixed.
The attached patch passes bootstrap+test on aarch64-linux-gnu with and without SVE, and on x86_64-linux-gnu. Thanks, Prathamesh > > Thanks, > Richard > > > + sel = V4SI with values {0, 2, 4, 6} > > + In this case result should be NULL_TREE because we cross input vector > > + boundary at index 4. */ > > + { > > + tree arg0 = build_vec_cst_rand (integer_type_node, 2, 3, 2); > > + tree arg1 = build_vec_cst_rand (integer_type_node, 2, 3, 2); > > + > > + poly_uint64 res_len = 4; > > + tree res_type = build_vector_type (char_type_node, res_len); > > + vec_perm_builder builder (res_len, 4, 1); > > + builder.quick_push (0); > > + builder.quick_push (2); > > + builder.quick_push (4); > > + builder.quick_push (6); > > + > > + vec_perm_indices sel (builder, 2, res_len); > > + const char *reason; > > + tree res = fold_vec_perm_cst (res_type, arg0, arg1, sel, &reason); > > + gcc_assert (res == NULL_TREE); > > + gcc_assert (!strcmp (reason, "cannot divide selector element by arg > > len")); > > + } > > +} > > + > > +static void > > +test () > > +{ > > + tree vectype = get_preferred_vectype (integer_type_node); > > + if (TYPE_VECTOR_SUBPARTS (vectype).is_constant ()) > > + return; > > + > > + test_dup (); > > + test_stepped (); > > + test_mixed (); > > +} > > +}; > > + > > /* Verify that various binary operations on vectors are folded > > correctly. */ > > > > @@ -16943,6 +17626,7 @@ fold_const_cc_tests () > > test_arithmetic_folding (); > > test_vector_folding (); > > test_vec_duplicate_folding (); > > + test_fold_vec_perm_cst::test (); > > } > > > > } // namespace selftest
diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc index 7e5494dfd39..648ef5c647e 100644 --- a/gcc/fold-const.cc +++ b/gcc/fold-const.cc @@ -40,6 +40,7 @@ along with GCC; see the file COPYING3. If not see gimple code, we need to handle GIMPLE tuples as well as their corresponding tree equivalents. */ +#define INCLUDE_ALGORITHM #include "config.h" #include "system.h" #include "coretypes.h" @@ -10494,15 +10495,9 @@ fold_mult_zconjz (location_t loc, tree type, tree expr) static bool vec_cst_ctor_to_array (tree arg, unsigned int nelts, tree *elts) { - unsigned HOST_WIDE_INT i, nunits; + unsigned HOST_WIDE_INT i; - if (TREE_CODE (arg) == VECTOR_CST - && VECTOR_CST_NELTS (arg).is_constant (&nunits)) - { - for (i = 0; i < nunits; ++i) - elts[i] = VECTOR_CST_ELT (arg, i); - } - else if (TREE_CODE (arg) == CONSTRUCTOR) + if (TREE_CODE (arg) == CONSTRUCTOR) { constructor_elt *elt; @@ -10520,6 +10515,182 @@ vec_cst_ctor_to_array (tree arg, unsigned int nelts, tree *elts) return true; } +/* Helper routine for fold_vec_perm_cst to check if SEL is a suitable + mask for VLA vec_perm folding. + REASON if specified, will contain the reason why SEL is not suitable. + Used only for debugging and unit-testing. */ + +static bool +valid_mask_for_fold_vec_perm_cst_p (tree arg0, tree arg1, + const vec_perm_indices &sel, + const char **reason = NULL) +{ + unsigned sel_npatterns = sel.encoding ().npatterns (); + unsigned sel_nelts_per_pattern = sel.encoding ().nelts_per_pattern (); + + if (!(pow2p_hwi (sel_npatterns) + && pow2p_hwi (VECTOR_CST_NPATTERNS (arg0)) + && pow2p_hwi (VECTOR_CST_NPATTERNS (arg1)))) + { + if (reason) + *reason = "npatterns is not power of 2"; + return false; + } + + /* We want to avoid cases where sel.length is not a multiple of npatterns. + For eg: sel.length = 2 + 2x, and sel npatterns = 4. */ + poly_uint64 esel; + if (!multiple_p (sel.length (), sel_npatterns, &esel)) + { + if (reason) + *reason = "sel.length is not multiple of sel_npatterns"; + return false; + } + + if (sel_nelts_per_pattern < 3) + return true; + + for (unsigned pattern = 0; pattern < sel_npatterns; pattern++) + { + poly_uint64 a1 = sel[pattern + sel_npatterns]; + poly_uint64 a2 = sel[pattern + 2 * sel_npatterns]; + HOST_WIDE_INT step; + if (!poly_int64 (a2 - a1).is_constant (&step)) + { + if (reason) + *reason = "step is not constant"; + return false; + } + // FIXME: Punt on step < 0 for now, revisit later. + if (step < 0) + return false; + if (step == 0) + continue; + + if (!pow2p_hwi (step)) + { + if (reason) + *reason = "step is not power of 2"; + return false; + } + + /* Ensure that stepped sequence of the pattern selects elements + only from the same input vector. */ + uint64_t q1, qe; + poly_uint64 r1, re; + poly_uint64 ae = a1 + (esel - 2) * step; + poly_uint64 arg_len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); + + if (!(can_div_trunc_p (a1, arg_len, &q1, &r1) + && can_div_trunc_p (ae, arg_len, &qe, &re) + && q1 == qe)) + { + if (reason) + *reason = "crossed input vectors"; + return false; + } + + /* Ensure that the stepped sequence always selects from the same + input pattern. */ + unsigned arg_npatterns + = ((q1 & 0) == 0) ? VECTOR_CST_NPATTERNS (arg0) + : VECTOR_CST_NPATTERNS (arg1); + + if (!multiple_p (step, arg_npatterns)) + { + if (reason) + *reason = "step is not multiple of npatterns"; + return false; + } + } + + return true; +} + +/* Try to fold permutation of ARG0 and ARG1 with SEL selector when + the input vectors are VECTOR_CST. Return NULL_TREE otherwise. + REASON has same purpose as described in + valid_mask_for_fold_vec_perm_cst_p. */ + + +static tree +fold_vec_perm_cst (tree type, tree arg0, tree arg1, const vec_perm_indices &sel, + const char **reason = NULL) +{ + unsigned res_npatterns, res_nelts_per_pattern; + unsigned HOST_WIDE_INT res_nelts; + + /* (1) If SEL is a suitable mask as determined by + valid_mask_for_fold_vec_perm_cst_p, then: + res_npatterns = max of npatterns between ARG0, ARG1, and SEL + res_nelts_per_pattern = max of nelts_per_pattern between + ARG0, ARG1 and SEL. + (2) If SEL is not a suitable mask, and TYPE is VLS then: + res_npatterns = nelts in result vector. + res_nelts_per_pattern = 1. + This exception is made so that VLS ARG0, ARG1 and SEL work as before. */ + if (valid_mask_for_fold_vec_perm_cst_p (arg0, arg1, sel, reason)) + { + res_npatterns + = std::max (VECTOR_CST_NPATTERNS (arg0), + std::max (VECTOR_CST_NPATTERNS (arg1), + sel.encoding ().npatterns ())); + + res_nelts_per_pattern + = std::max (VECTOR_CST_NELTS_PER_PATTERN (arg0), + std::max (VECTOR_CST_NELTS_PER_PATTERN (arg1), + sel.encoding ().nelts_per_pattern ())); + + res_nelts = res_npatterns * res_nelts_per_pattern; + } + else if (TYPE_VECTOR_SUBPARTS (type).is_constant (&res_nelts)) + { + res_npatterns = res_nelts; + res_nelts_per_pattern = 1; + } + else + return NULL_TREE; + + tree_vector_builder out_elts (type, res_npatterns, res_nelts_per_pattern); + for (unsigned i = 0; i < res_nelts; i++) + { + poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); + uint64_t q; + poly_uint64 r; + unsigned HOST_WIDE_INT index; + + /* Punt if sel[i] /trunc_div len cannot be determined, + because the input vector to be chosen will depend on + runtime vector length. + For example if len == 4 + 4x, and sel[i] == 4, + If len at runtime equals 4, we choose arg1[0]. + For any other value of len > 4 at runtime, we choose arg0[4]. + which makes the element choice dependent on runtime vector length. */ + if (!can_div_trunc_p (sel[i], len, &q, &r)) + { + if (reason) + *reason = "cannot divide selector element by arg len"; + return NULL_TREE; + } + + /* sel[i] % len will give the index of element in the chosen input + vector. For example if sel[i] == 5 + 4x and len == 4 + 4x, + we will choose arg1[1] since (5 + 4x) % (4 + 4x) == 1. */ + if (!r.is_constant (&index)) + { + if (reason) + *reason = "remainder is not constant"; + return NULL_TREE; + } + + tree arg = ((q & 1) == 0) ? arg0 : arg1; + tree elem = vector_cst_elt (arg, index); + out_elts.quick_push (elem); + } + + return out_elts.build (); +} + /* Attempt to fold vector permutation of ARG0 and ARG1 vectors using SEL selector. Return the folded VECTOR_CST or CONSTRUCTOR if successful, NULL_TREE otherwise. */ @@ -10529,43 +10700,41 @@ fold_vec_perm (tree type, tree arg0, tree arg1, const vec_perm_indices &sel) { unsigned int i; unsigned HOST_WIDE_INT nelts; - bool need_ctor = false; - if (!sel.length ().is_constant (&nelts)) - return NULL_TREE; - gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), nelts) - && known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)), nelts) - && known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)), nelts)); + gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), sel.length ()) + && known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)), + TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)))); + if (TREE_TYPE (TREE_TYPE (arg0)) != TREE_TYPE (type) || TREE_TYPE (TREE_TYPE (arg1)) != TREE_TYPE (type)) return NULL_TREE; + if (TREE_CODE (arg0) == VECTOR_CST + && TREE_CODE (arg1) == VECTOR_CST) + return fold_vec_perm_cst (type, arg0, arg1, sel); + + /* For fall back case, we want to ensure we have VLS vectors + with equal length. */ + if (!sel.length ().is_constant (&nelts)) + return NULL_TREE; + + gcc_assert (known_eq (sel.length (), + TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)))); tree *in_elts = XALLOCAVEC (tree, nelts * 2); if (!vec_cst_ctor_to_array (arg0, nelts, in_elts) || !vec_cst_ctor_to_array (arg1, nelts, in_elts + nelts)) return NULL_TREE; - tree_vector_builder out_elts (type, nelts, 1); + vec<constructor_elt, va_gc> *v; + vec_alloc (v, nelts); for (i = 0; i < nelts; i++) { HOST_WIDE_INT index; if (!sel[i].is_constant (&index)) return NULL_TREE; - if (!CONSTANT_CLASS_P (in_elts[index])) - need_ctor = true; - out_elts.quick_push (unshare_expr (in_elts[index])); - } - - if (need_ctor) - { - vec<constructor_elt, va_gc> *v; - vec_alloc (v, nelts); - for (i = 0; i < nelts; i++) - CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, out_elts[i]); - return build_constructor (type, v); + CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, in_elts[index]); } - else - return out_elts.build (); + return build_constructor (type, v); } /* Try to fold a pointer difference of type TYPE two address expressions of @@ -16892,6 +17061,554 @@ test_arithmetic_folding () x); } +namespace test_fold_vec_perm_cst { + +/* Build a VECTOR_CST corresponding to VMODE, and has + encoding given by NPATTERNS, NELTS_PER_PATTERN and STEP. + Fill it with randomized elements, using rand() % THRESHOLD. */ + +static tree +build_vec_cst_rand (machine_mode vmode, unsigned npatterns, + unsigned nelts_per_pattern, + int step = 0, int threshold = 100) +{ + tree inner_type = lang_hooks.types.type_for_mode (GET_MODE_INNER (vmode), 1); + tree vectype = build_vector_type_for_mode (inner_type, vmode); + tree_vector_builder builder (vectype, npatterns, nelts_per_pattern); + + // Fill a0 for each pattern + for (unsigned i = 0; i < npatterns; i++) + builder.quick_push (build_int_cst (inner_type, rand () % threshold)); + + if (nelts_per_pattern == 1) + return builder.build (); + + // Fill a1 for each pattern + for (unsigned i = 0; i < npatterns; i++) + builder.quick_push (build_int_cst (inner_type, rand () % threshold)); + + if (nelts_per_pattern == 2) + return builder.build (); + + for (unsigned i = npatterns * 2; i < npatterns * nelts_per_pattern; i++) + { + tree prev_elem = builder[i - npatterns]; + int prev_elem_val = TREE_INT_CST_LOW (prev_elem); + int val = prev_elem_val + step; + builder.quick_push (build_int_cst (inner_type, val)); + } + + return builder.build (); +} + +/* Validate result of VEC_PERM_EXPR folding for the unit-tests below, + when result is VLA. */ + +static void +validate_res (unsigned npatterns, unsigned nelts_per_pattern, + tree res, tree *expected_res) +{ + /* Actual npatterns / nelts_per_pattern in res may be less than expected due + to canonicalization. */ + ASSERT_TRUE (VECTOR_CST_NPATTERNS (res) <= npatterns); + ASSERT_TRUE (VECTOR_CST_NELTS_PER_PATTERN (res) <= nelts_per_pattern); + + for (unsigned i = 0; i < npatterns * nelts_per_pattern; i++) + ASSERT_TRUE (operand_equal_p (VECTOR_CST_ELT (res, i), expected_res[i], 0)); +} + +/* Validate result of VEC_PERM_EXPR folding for the unit-tests below, + when the result is VLS. */ + +static void +validate_res_vls (tree res, tree *expected_res, unsigned expected_nelts) +{ + ASSERT_TRUE (known_eq (VECTOR_CST_NELTS (res), expected_nelts)); + for (unsigned i = 0; i < expected_nelts; i++) + ASSERT_TRUE (operand_equal_p (VECTOR_CST_ELT (res, i), expected_res[i], 0)); +} + +/* Test cases where result and input vectors are VNx4SI */ + +static void +test_vnx4si (machine_mode vmode) +{ + /* Case 1: mask = {0, ...} */ + { + tree arg0 = build_vec_cst_rand (vmode, 2, 3, 1); + tree arg1 = build_vec_cst_rand (vmode, 2, 3, 1); + poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); + + vec_perm_builder builder (len, 1, 1); + builder.quick_push (0); + vec_perm_indices sel (builder, 2, len); + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel); + + tree expected_res[] = { vector_cst_elt (res, 0) }; + validate_res (1, 1, res, expected_res); + } + + /* Case 2: mask = {len, ...} */ + { + tree arg0 = build_vec_cst_rand (vmode, 2, 3, 1); + tree arg1 = build_vec_cst_rand (vmode, 2, 3, 1); + poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); + + vec_perm_builder builder (len, 1, 1); + builder.quick_push (len); + vec_perm_indices sel (builder, 2, len); + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel); + + tree expected_res[] = { vector_cst_elt (arg1, 0) }; + validate_res (1, 1, res, expected_res); + } + + /* Case 3: mask = { 0, len, ... } */ + { + tree arg0 = build_vec_cst_rand (vmode, 2, 3, 1); + tree arg1 = build_vec_cst_rand (vmode, 2, 3, 1); + poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); + + vec_perm_builder builder (len, 2, 1); + builder.quick_push (0); + builder.quick_push (len); + vec_perm_indices sel (builder, 2, len); + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel); + + tree expected_res[] = { vector_cst_elt (arg0, 0), vector_cst_elt (arg1, 0) }; + validate_res (2, 1, res, expected_res); + } + + /* Case 4: mask = { 0, len, 1, len+1, ... } */ + { + tree arg0 = build_vec_cst_rand (vmode, 2, 3, 1); + tree arg1 = build_vec_cst_rand (vmode, 2, 3, 1); + poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); + + vec_perm_builder builder (len, 2, 2); + builder.quick_push (0); + builder.quick_push (len); + builder.quick_push (1); + builder.quick_push (len + 1); + vec_perm_indices sel (builder, 2, len); + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel); + + tree expected_res[] = { vector_cst_elt (arg0, 0), vector_cst_elt (arg1, 0), + vector_cst_elt (arg0, 1), vector_cst_elt (arg1, 1) + }; + validate_res (2, 2, res, expected_res); + } + + /* Case 5: mask = { 0, len, 1, len+1, .... } + npatterns = 4, nelts_per_pattern = 1 */ + { + tree arg0 = build_vec_cst_rand (vmode, 2, 3, 1); + tree arg1 = build_vec_cst_rand (vmode, 2, 3, 1); + poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); + + vec_perm_builder builder (len, 4, 1); + builder.quick_push (0); + builder.quick_push (len); + builder.quick_push (1); + builder.quick_push (len + 1); + vec_perm_indices sel (builder, 2, len); + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel); + + tree expected_res[] = { vector_cst_elt (arg0, 0), vector_cst_elt (arg1, 0), + vector_cst_elt (arg0, 1), vector_cst_elt (arg1, 1) + }; + validate_res (4, 1, res, expected_res); + } + + /* Case 6: mask = {0, 4, ...} + npatterns = 1, nelts_per_pattern = 2. + This should return NULL_TREE because the index 4 may choose + from either arg0 or arg1 depending on vector length. */ + { + tree arg0 = build_vec_cst_rand (vmode, 2, 3, 1); + tree arg1 = build_vec_cst_rand (vmode, 2, 3, 1); + poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); + + vec_perm_builder builder (len, 1, 2); + builder.quick_push (0); + builder.quick_push (4); + vec_perm_indices sel (builder, 2, len); + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel); + ASSERT_TRUE (res == NULL_TREE); + } + + /* Case 7: npatterns(arg0) = 4 > npatterns(sel) = 2 + mask = {0, len, 1, len + 1, ...} + sel_npatterns = 2, sel_nelts_per_pattern = 2. */ + { + tree arg0 = build_vec_cst_rand (vmode, 2, 3, 1); + tree arg1 = build_vec_cst_rand (vmode, 2, 3, 1); + poly_uint64 arg0_len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); + + vec_perm_builder builder (arg0_len, 2, 2); + builder.quick_push (0); + builder.quick_push (arg0_len); + builder.quick_push (1); + builder.quick_push (arg0_len + 1); + vec_perm_indices sel (builder, 2, arg0_len); + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel); + + tree expected_res[] = { vector_cst_elt (arg0, 0), vector_cst_elt (arg1, 0), + vector_cst_elt (arg0, 1), vector_cst_elt (arg1, 1) + }; + validate_res (2, 2, res, expected_res); + } + + /* Case 8: sel = {0, 1, 2, ...} + npatterns = 1, nelts_per_pattern = 3 + expected res: { arg0[0], arg0[1], arg0[2], ... } */ + { + tree arg0 = build_vec_cst_rand (vmode, 1, 3, 2); + tree arg1 = build_vec_cst_rand (vmode, 1, 3, 2); + poly_uint64 arg0_len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); + + vec_perm_builder builder (arg0_len, 1, 3); + builder.quick_push (0); + builder.quick_push (1); + builder.quick_push (2); + + vec_perm_indices sel (builder, 2, arg0_len); + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel); + tree expected_res[] = { vector_cst_elt (arg0, 0), vector_cst_elt (arg0, 1), + vector_cst_elt (arg0, 2) }; + validate_res (1, 3, res, expected_res); + } + + /* Case 9: sel = {len, len + 1, len + 2, ... } + npatterns = 1, nelts_per_pattern = 3 + expected res: { op1[0], op1[1], op1[2], ... } */ + { + tree arg0 = build_vec_cst_rand (vmode, 1, 3, 2); + tree arg1 = build_vec_cst_rand (vmode, 1, 3, 2); + poly_uint64 arg0_len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); + + vec_perm_builder builder (arg0_len, 1, 3); + builder.quick_push (arg0_len); + builder.quick_push (arg0_len + 1); + builder.quick_push (arg0_len + 2); + + vec_perm_indices sel (builder, 2, arg0_len); + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel, NULL); + tree expected_res[] = { vector_cst_elt (arg1, 0), vector_cst_elt (arg1, 1), + vector_cst_elt (arg1, 2) }; + validate_res (1, 3, res, expected_res); + } +} + +/* Test cases where result and input vectors are VNx16QI */ + +static void +test_vnx16qi (machine_mode vmode) +{ + /* Case 1: Leading element of arg1, stepped sequence: pattern 0 of arg0. + sel = {len, 0, 0, 0, 2, 0, ...} + npatterns = 2, nelts_per_pattern = 3. + Use extra pattern {0, ...} to lower number of elements per pattern. */ + { + tree arg0 = build_vec_cst_rand (vmode, 1, 3, 2); + tree arg1 = build_vec_cst_rand (vmode, 1, 3, 2); + poly_uint64 arg0_len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); + + vec_perm_builder builder (arg0_len, 2, 3); + builder.quick_push (arg0_len); + int mask_elems[] = { 0, 0, 0, 2, 0 }; + for (int i = 0; i < 5; i++) + builder.quick_push (mask_elems[i]); + + vec_perm_indices sel (builder, 2, arg0_len); + const char *reason; + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel, &reason); + + tree expected_res[] = { vector_cst_elt (arg1, 0), vector_cst_elt (arg0, 0), + vector_cst_elt (arg0, 0), vector_cst_elt (arg0, 0), + vector_cst_elt (arg0, 2), vector_cst_elt (arg0, 0) + }; + validate_res (2, 3, res, expected_res); + } + + /* Case 2: + sel = { len, 0, 2, ... } npatterns = 1, nelts_per_pattern = 3. + This should return NULL because we cross the input vectors. + Because, + arg0_len = 16 + 16x + a1 = 0 + S = 2 + esel = arg0_len / npatterns_sel = 16+16x/1 = 16 + 16x + ae = 0 + (esel - 2) * S + = 0 + (16 + 16x - 2) * 2 + = 28 + 32x + Let q1 = a1 / arg0_len = 0 /trunc (16 + 16x) = 0 + Let qe = ae / arg0_len = (28 + 32x) /trunc (16 + 16x) = 1. + Since q1 != qe, we cross input vectors. + So return NULL_TREE. */ + { + tree arg0 = build_vec_cst_rand (vmode, 1, 3, 2); + tree arg1 = build_vec_cst_rand (vmode, 1, 3, 2); + poly_uint64 arg0_len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); + + vec_perm_builder builder (arg0_len, 1, 3); + builder.quick_push (arg0_len); + builder.quick_push (0); + builder.quick_push (2); + + vec_perm_indices sel (builder, 2, arg0_len); + const char *reason; + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel, &reason); + ASSERT_TRUE (res == NULL_TREE); + ASSERT_TRUE (!strcmp (reason, "crossed input vectors")); + } + + /* Case 3: Select elements from different patterns. + Should return NULL. */ + { + tree op0 = build_vec_cst_rand (vmode, 2, 3, 2); + tree op1 = build_vec_cst_rand (vmode, 2, 3, 2); + poly_uint64 op0_len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (op0)); + + vec_perm_builder builder (op0_len, 2, 3); + builder.quick_push (op0_len); + int mask_elems[] = { 0, 0, 0, 1, 0 }; + for (int i = 0; i < 5; i++) + builder.quick_push (mask_elems[i]); + + vec_perm_indices sel (builder, 2, op0_len); + const char *reason; + tree res = fold_vec_perm_cst (TREE_TYPE (op0), op0, op1, sel, &reason); + ASSERT_TRUE (res == NULL_TREE); + ASSERT_TRUE (!strcmp (reason, "step is not multiple of npatterns")); + } + + /* Case 4: Select pattern 0 of op0 and dup of op0[0] + op0, op1, sel: npatterns = 2, nelts_per_pattern = 3 + sel = { 0, 0, 2, 0, 4, 0, ... }. + + For pattern {0, 2, 4, ...}: + a1 = 2 + len = 16 + 16x + S = 2 + esel = len / npatterns_sel = (16 + 16x) / 2 = (8 + 8x) + ae = a1 + (esel - 2) * S + = 2 + (8 + 8x - 2) * 2 + = 14 + 16x + a1 / arg0_len = 2 / (16 + 16x) = 0 + ae / arg0_len = (14 + 16x) / (16 + 16x) = 0 + So a1/arg0_len = ae/arg0_len = 0 + Hence we select from first vector op0 + S = 2, npatterns = 2. + Since S is multiple of npatterns(op0), we are selecting from + same pattern of op0. + + For pattern {0, ...}, we are choosing { op0[0] ... } + So res will be combination of above patterns: + res: { op0[0], op0[0], op0[2], op0[0], op0[4], op0[0], ... } + with npatterns = 2, nelts_per_pattern = 3. */ + { + tree op0 = build_vec_cst_rand (vmode, 2, 3, 2); + tree op1 = build_vec_cst_rand (vmode, 2, 3, 2); + poly_uint64 op0_len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (op0)); + + vec_perm_builder builder (op0_len, 2, 3); + int mask_elems[] = { 0, 0, 2, 0, 4, 0 }; + for (int i = 0; i < 6; i++) + builder.quick_push (mask_elems[i]); + + vec_perm_indices sel (builder, 2, op0_len); + tree res = fold_vec_perm_cst (TREE_TYPE (op0), op0, op1, sel); + tree expected_res[] = { vector_cst_elt (op0, 0), vector_cst_elt (op0, 0), + vector_cst_elt (op0, 2), vector_cst_elt (op0, 0), + vector_cst_elt (op0, 4), vector_cst_elt (op0, 0) }; + validate_res (2, 3, res, expected_res); + } + + /* Case 7: sel_npatterns > op_npatterns; + op0, op1: npatterns = 2, nelts_per_pattern = 3 + sel: { 0, 0, 1, len, 2, 0, 3, len, 4, 0, 5, len, ...}, + with npatterns = 4, nelts_per_pattern = 3. */ + { + tree op0 = build_vec_cst_rand (vmode, 2, 3, 2); + tree op1 = build_vec_cst_rand (vmode, 2, 3, 2); + poly_uint64 op0_len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (op0)); + + vec_perm_builder builder(op0_len, 4, 3); + // -1 is used as place holder for poly_int_cst + int mask_elems[] = { 0, 0, 1, -1, 2, 0, 3, -1, 4, 0, 5, -1 }; + for (int i = 0; i < 12; i++) + builder.quick_push ((mask_elems[i] == -1) ? op0_len : mask_elems[i]); + + vec_perm_indices sel (builder, 2, op0_len); + tree res = fold_vec_perm_cst (TREE_TYPE (op0), op0, op1, sel); + tree expected_res[] = { vector_cst_elt (op0, 0), vector_cst_elt (op0, 0), + vector_cst_elt (op0, 1), vector_cst_elt (op1, 0), + vector_cst_elt (op0, 2), vector_cst_elt (op0, 0), + vector_cst_elt (op0, 3), vector_cst_elt (op1, 0), + vector_cst_elt (op0, 4), vector_cst_elt (op0, 0), + vector_cst_elt (op0, 5), vector_cst_elt (op1, 0) }; + validate_res (4, 3, res, expected_res); + } +} + +/* Test cases where result is VNx4SI and input vectors are V4SI. */ + +static void +test_vnx4si_v4si (machine_mode vnx4si_mode, machine_mode v4si_mode) +{ + /* Case 1: + sel = { 0, 4, 1, 5, ... } + res = { op0[0], op1[0], op0[1], op1[1], ...} // (4, 1) */ + { + tree arg0 = build_vec_cst_rand (v4si_mode, 4, 1, 0); + tree arg1 = build_vec_cst_rand (v4si_mode, 4, 1, 0); + + tree inner_type + = lang_hooks.types.type_for_mode (GET_MODE_INNER (vnx4si_mode), 1); + tree res_type = build_vector_type_for_mode (inner_type, vnx4si_mode); + + poly_uint64 res_len = TYPE_VECTOR_SUBPARTS (res_type); + vec_perm_builder builder (res_len, 4, 1); + builder.quick_push (0); + builder.quick_push (4); + builder.quick_push (1); + builder.quick_push (5); + + vec_perm_indices sel (builder, 2, res_len); + tree res = fold_vec_perm_cst (res_type, arg0, arg1, sel); + tree expected_res[] = { vector_cst_elt (arg0, 0), vector_cst_elt (arg1, 0), + vector_cst_elt (arg0, 1), vector_cst_elt (arg1, 1) + }; + validate_res (4, 1, res, expected_res); + } + + /* Case 2: Same as case 1, but contains an out of bounds access which + should wrap around. + sel = {0, 8, 4, 12, ...} (4, 1) + res = { op0[0], op0[0], op1[0], op1[0], ... } (4, 1). */ + { + tree arg0 = build_vec_cst_rand (v4si_mode, 4, 1, 0); + tree arg1 = build_vec_cst_rand (v4si_mode, 4, 1, 0); + + tree inner_type + = lang_hooks.types.type_for_mode (GET_MODE_INNER (vnx4si_mode), 1); + tree res_type = build_vector_type_for_mode (inner_type, vnx4si_mode); + + poly_uint64 res_len = TYPE_VECTOR_SUBPARTS (res_type); + vec_perm_builder builder (res_len, 4, 1); + builder.quick_push (0); + builder.quick_push (8); + builder.quick_push (4); + builder.quick_push (12); + + vec_perm_indices sel (builder, 2, res_len); + tree res = fold_vec_perm_cst (res_type, arg0, arg1, sel); + tree expected_res[] = { vector_cst_elt (arg0, 0), vector_cst_elt (arg0, 0), + vector_cst_elt (arg1, 0), vector_cst_elt (arg1, 0) + }; + validate_res (4, 1, res, expected_res); + } +} + +/* Test cases where result is V4SI and input vectors are VNx4SI. */ + +static void +test_v4si_vnx4si (machine_mode v4si_mode, machine_mode vnx4si_mode) +{ + /* Case 1: + sel = { 0, 1, 2, 3} + res = { op0[0], op0[1], op0[2], op0[3] }. */ + { + tree arg0 = build_vec_cst_rand (vnx4si_mode, 4, 1); + tree arg1 = build_vec_cst_rand (vnx4si_mode, 4, 1); + + tree inner_type + = lang_hooks.types.type_for_mode (GET_MODE_INNER (v4si_mode), 1); + tree res_type = build_vector_type_for_mode (inner_type, v4si_mode); + + poly_uint64 res_len = TYPE_VECTOR_SUBPARTS (res_type); + vec_perm_builder builder (res_len, 4, 1); + for (int i = 0; i < 4; i++) + builder.quick_push (i); + + vec_perm_indices sel (builder, 2, res_len); + tree res = fold_vec_perm_cst (res_type, arg0, arg1, sel); + tree expected_res[] = { vector_cst_elt (arg0, 0), vector_cst_elt (arg0, 1), + vector_cst_elt (arg0, 2), vector_cst_elt (arg0, 3) }; + validate_res_vls (res, expected_res, 4); + } + + /* Case 2: Same as Case 1, but crossing input vector. + sel = {0, 2, 4, 6} + In this case,the index 4 is ambiguous since len = 4 + 4x. + If x = 0 at runtime, we choose op1[0] + For x > 0 at runtime, we choose op0[4] + Since we cannot determine, which vector to choose from during compile time, + should return NULL_TREE. */ + { + tree arg0 = build_vec_cst_rand (vnx4si_mode, 4, 1); + tree arg1 = build_vec_cst_rand (vnx4si_mode, 4, 1); + + tree inner_type + = lang_hooks.types.type_for_mode (GET_MODE_INNER (v4si_mode), 1); + tree res_type = build_vector_type_for_mode (inner_type, v4si_mode); + + poly_uint64 res_len = TYPE_VECTOR_SUBPARTS (res_type); + vec_perm_builder builder (res_len, 4, 1); + for (int i = 0; i < 8; i += 2) + builder.quick_push (i); + + vec_perm_indices sel (builder, 2, res_len); + const char *reason; + tree res = fold_vec_perm_cst (res_type, arg0, arg1, sel, &reason); + ASSERT_TRUE (res == NULL_TREE); + ASSERT_TRUE (!strcmp (reason, "cannot divide selector element by arg len")); + } +} + +/* Helper function to get a vector mode that has element + mode as INNER_MODE and GET_MODE_NUNITS equal to len. */ + +static machine_mode +get_vmode (machine_mode inner_mode, poly_uint64 len) +{ + machine_mode vmode; + FOR_EACH_MODE_IN_CLASS (vmode, MODE_VECTOR_INT) + if (GET_MODE_INNER (vmode) == inner_mode + && known_eq (GET_MODE_NUNITS (vmode), len)) + return vmode; + return E_VOIDmode; +} + +/* Invoke tests for fold_vec_perm_cst. */ + +static void +test () +{ + /* Conditionally execute fold_vec_perm_cst tests, if target supports + VLA vectors. Use a compile time check so we avoid instantiating + poly_uint64 with N > 1 on targets that do not support VLA vectors. */ + if constexpr (poly_int_traits<poly_uint64>::num_coeffs > 1) + { + machine_mode vnx4si_mode = get_vmode (SImode, poly_uint64 (4, 4)); + if (vnx4si_mode == E_VOIDmode) + return; + machine_mode vnx16qi_mode = get_vmode (QImode, poly_uint64 (16, 16)); + if (vnx16qi_mode == E_VOIDmode) + return; + machine_mode v4si_mode = get_vmode (SImode, 4); + if (v4si_mode == E_VOIDmode) + return; + + test_vnx4si (vnx4si_mode); + test_vnx16qi (vnx16qi_mode); + test_vnx4si_v4si (vnx4si_mode, v4si_mode); + test_v4si_vnx4si (v4si_mode, vnx4si_mode); + } +} +}; // end of test_fold_vec_perm_cst namespace + /* Verify that various binary operations on vectors are folded correctly. */ @@ -16943,6 +17660,7 @@ fold_const_cc_tests () test_arithmetic_folding (); test_vector_folding (); test_vec_duplicate_folding (); + test_fold_vec_perm_cst::test (); } } // namespace selftest