Currently the vectorizer forces unrolling for grouped loads that have DR_STEP not constant, forcing the elements loaded with strided load support. The following patch enhances that machinery to deal with SLP used groups that have non-constant DR_STEP, avoiding the excessive unrolling (and (un-)packing).
Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk. Richard. 2015-05-08 Richard Biener <rguent...@suse.de> * tree-vect-data-refs.c (vect_compute_data_ref_alignment): Handle strided group loads. (vect_verify_datarefs_alignment): Likewise. (vect_enhance_data_refs_alignment): Likewise. (vect_analyze_group_access): Likewise. (vect_analyze_data_ref_access): Likewise. (vect_analyze_data_ref_accesses): Likewise. * tree-vect-stmts.c (vect_model_load_cost): Likewise. (vectorizable_load): Likewise. * gcc.dg/vect/slp-41.c: New testcase. Index: gcc/tree-vect-data-refs.c =================================================================== *** gcc/tree-vect-data-refs.c.orig 2015-05-08 13:24:31.797746925 +0200 --- gcc/tree-vect-data-refs.c 2015-05-08 13:26:23.839725349 +0200 *************** vect_compute_data_ref_alignment (struct *** 671,677 **** tree vectype; tree base, base_addr; bool base_aligned; ! tree misalign; tree aligned_to; unsigned HOST_WIDE_INT alignment; --- 671,677 ---- tree vectype; tree base, base_addr; bool base_aligned; ! tree misalign = NULL_TREE; tree aligned_to; unsigned HOST_WIDE_INT alignment; *************** vect_compute_data_ref_alignment (struct *** 687,696 **** /* Strided loads perform only component accesses, misalignment information is irrelevant for them. */ ! if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) return true; ! misalign = DR_INIT (dr); aligned_to = DR_ALIGNED_TO (dr); base_addr = DR_BASE_ADDRESS (dr); vectype = STMT_VINFO_VECTYPE (stmt_info); --- 687,698 ---- /* Strided loads perform only component accesses, misalignment information is irrelevant for them. */ ! if (STMT_VINFO_STRIDE_LOAD_P (stmt_info) ! && !STMT_VINFO_GROUPED_ACCESS (stmt_info)) return true; ! if (tree_fits_shwi_p (DR_STEP (dr))) ! misalign = DR_INIT (dr); aligned_to = DR_ALIGNED_TO (dr); base_addr = DR_BASE_ADDRESS (dr); vectype = STMT_VINFO_VECTYPE (stmt_info); *************** vect_compute_data_ref_alignment (struct *** 704,712 **** if (loop && nested_in_vect_loop_p (loop, stmt)) { tree step = DR_STEP (dr); - HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step); ! if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0) { if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, --- 706,714 ---- if (loop && nested_in_vect_loop_p (loop, stmt)) { tree step = DR_STEP (dr); ! if (tree_fits_shwi_p (step) ! && tree_to_shwi (step) % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0) { if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, *************** vect_compute_data_ref_alignment (struct *** 732,740 **** if (!loop) { tree step = DR_STEP (dr); - HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step); ! if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, --- 734,742 ---- if (!loop) { tree step = DR_STEP (dr); ! if (tree_fits_shwi_p (step) ! && tree_to_shwi (step) % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, *************** vect_verify_datarefs_alignment (loop_vec *** 964,970 **** /* Strided loads perform only component accesses, alignment is irrelevant for them. */ ! if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) continue; supportable_dr_alignment = vect_supportable_dr_alignment (dr, false); --- 966,973 ---- /* Strided loads perform only component accesses, alignment is irrelevant for them. */ ! if (STMT_VINFO_STRIDE_LOAD_P (stmt_info) ! && !STMT_VINFO_GROUPED_ACCESS (stmt_info)) continue; supportable_dr_alignment = vect_supportable_dr_alignment (dr, false); *************** vect_enhance_data_refs_alignment (loop_v *** 1431,1437 **** /* Strided loads perform only component accesses, alignment is irrelevant for them. */ ! if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) continue; supportable_dr_alignment = vect_supportable_dr_alignment (dr, true); --- 1434,1441 ---- /* Strided loads perform only component accesses, alignment is irrelevant for them. */ ! if (STMT_VINFO_STRIDE_LOAD_P (stmt_info) ! && !STMT_VINFO_GROUPED_ACCESS (stmt_info)) continue; supportable_dr_alignment = vect_supportable_dr_alignment (dr, true); *************** vect_enhance_data_refs_alignment (loop_v *** 1723,1729 **** /* Strided loads perform only component accesses, alignment is irrelevant for them. */ ! if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) continue; save_misalignment = DR_MISALIGNMENT (dr); --- 1727,1734 ---- /* Strided loads perform only component accesses, alignment is irrelevant for them. */ ! if (STMT_VINFO_STRIDE_LOAD_P (stmt_info) ! && !STMT_VINFO_GROUPED_ACCESS (stmt_info)) continue; save_misalignment = DR_MISALIGNMENT (dr); *************** vect_enhance_data_refs_alignment (loop_v *** 1841,1850 **** && GROUP_FIRST_ELEMENT (stmt_info) != stmt)) continue; - /* Strided loads perform only component accesses, alignment is - irrelevant for them. */ if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) ! continue; supportable_dr_alignment = vect_supportable_dr_alignment (dr, false); --- 1846,1860 ---- && GROUP_FIRST_ELEMENT (stmt_info) != stmt)) continue; if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) ! { ! /* Strided loads perform only component accesses, alignment is ! irrelevant for them. */ ! if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)) ! continue; ! do_versioning = false; ! break; ! } supportable_dr_alignment = vect_supportable_dr_alignment (dr, false); *************** vect_analyze_group_access (struct data_r *** 2057,2063 **** stmt_vec_info stmt_info = vinfo_for_stmt (stmt); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); ! HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step); HOST_WIDE_INT groupsize, last_accessed_element = 1; bool slp_impossible = false; struct loop *loop = NULL; --- 2067,2073 ---- stmt_vec_info stmt_info = vinfo_for_stmt (stmt); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); ! HOST_WIDE_INT dr_step = -1; HOST_WIDE_INT groupsize, last_accessed_element = 1; bool slp_impossible = false; struct loop *loop = NULL; *************** vect_analyze_group_access (struct data_r *** 2067,2073 **** /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the size of the interleaving group (including gaps). */ ! groupsize = absu_hwi (dr_step) / type_size; /* Not consecutive access is possible only if it is a part of interleaving. */ if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) --- 2077,2089 ---- /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the size of the interleaving group (including gaps). */ ! if (tree_fits_shwi_p (step)) ! { ! dr_step = tree_to_shwi (step); ! groupsize = absu_hwi (dr_step) / type_size; ! } ! else ! groupsize = 0; /* Not consecutive access is possible only if it is a part of interleaving. */ if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) *************** vect_analyze_group_access (struct data_r *** 2142,2148 **** tree prev_init = DR_INIT (data_ref); gimple prev = stmt; HOST_WIDE_INT diff, gaps = 0; - unsigned HOST_WIDE_INT count_in_bytes; while (next) { --- 2158,2163 ---- *************** vect_analyze_group_access (struct data_r *** 2211,2240 **** count++; } ! /* COUNT is the number of accesses found, we multiply it by the size of ! the type to get COUNT_IN_BYTES. */ ! count_in_bytes = type_size * count; ! ! /* Check that the size of the interleaving (including gaps) is not ! greater than STEP. */ ! if (dr_step != 0 ! && absu_hwi (dr_step) < count_in_bytes + gaps * type_size) ! { ! if (dump_enabled_p ()) ! { ! dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, ! "interleaving size is greater than step for "); ! dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, ! DR_REF (dr)); ! dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); ! } ! return false; ! } ! /* Check that the size of the interleaving is equal to STEP for stores, i.e., that there are no gaps. */ ! if (dr_step != 0 ! && absu_hwi (dr_step) != count_in_bytes) { if (DR_IS_READ (dr)) { --- 2226,2237 ---- count++; } ! if (groupsize == 0) ! groupsize = count + gaps; ! /* Check that the size of the interleaving is equal to count for stores, i.e., that there are no gaps. */ ! if (groupsize != count) { if (DR_IS_READ (dr)) { *************** vect_analyze_group_access (struct data_r *** 2253,2278 **** } } - /* Check that STEP is a multiple of type size. */ - if (dr_step != 0 - && (dr_step % type_size) != 0) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "step is not a multiple of type size: step "); - dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, step); - dump_printf (MSG_MISSED_OPTIMIZATION, " size "); - dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, - TYPE_SIZE_UNIT (scalar_type)); - dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); - } - return false; - } - - if (groupsize == 0) - groupsize = count + gaps; - GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize; if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, --- 2250,2255 ---- *************** vect_analyze_data_ref_access (struct dat *** 2392,2400 **** return false; } /* Assume this is a DR handled by non-constant strided load case. */ if (TREE_CODE (step) != INTEGER_CST) ! return STMT_VINFO_STRIDE_LOAD_P (stmt_info); /* Not consecutive access - check if it's a part of interleaving group. */ return vect_analyze_group_access (dr); --- 2369,2380 ---- return false; } + /* Assume this is a DR handled by non-constant strided load case. */ if (TREE_CODE (step) != INTEGER_CST) ! return (STMT_VINFO_STRIDE_LOAD_P (stmt_info) ! && (!STMT_VINFO_GROUPED_ACCESS (stmt_info) ! || vect_analyze_group_access (dr))); /* Not consecutive access - check if it's a part of interleaving group. */ return vect_analyze_group_access (dr); *************** vect_analyze_data_ref_accesses (loop_vec *** 2596,2610 **** || !gimple_assign_single_p (DR_STMT (drb))) break; ! /* Check that the data-refs have the same constant size and step. */ tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))); tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))); if (!tree_fits_uhwi_p (sza) || !tree_fits_uhwi_p (szb) ! || !tree_int_cst_equal (sza, szb) ! || !tree_fits_shwi_p (DR_STEP (dra)) ! || !tree_fits_shwi_p (DR_STEP (drb)) ! || !tree_int_cst_equal (DR_STEP (dra), DR_STEP (drb))) break; /* Do not place the same access in the interleaving chain twice. */ --- 2576,2591 ---- || !gimple_assign_single_p (DR_STMT (drb))) break; ! /* Check that the data-refs have the same constant size. */ tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))); tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))); if (!tree_fits_uhwi_p (sza) || !tree_fits_uhwi_p (szb) ! || !tree_int_cst_equal (sza, szb)) ! break; ! ! /* Check that the data-refs have the same step. */ ! if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0)) break; /* Do not place the same access in the interleaving chain twice. */ *************** vect_analyze_data_ref_accesses (loop_vec *** 2637,2647 **** != type_size_a)) break; ! /* The step (if not zero) is greater than the difference between ! data-refs' inits. This splits groups into suitable sizes. */ ! HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra)); ! if (step != 0 && step <= (init_b - init_a)) ! break; if (dump_enabled_p ()) { --- 2618,2632 ---- != type_size_a)) break; ! /* If the step (if not zero or non-constant) is greater than the ! difference between data-refs' inits this splits groups into ! suitable sizes. */ ! if (tree_fits_shwi_p (DR_STEP (dra))) ! { ! HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra)); ! if (step != 0 && step <= (init_b - init_a)) ! break; ! } if (dump_enabled_p ()) { Index: gcc/tree-vect-stmts.c =================================================================== *** gcc/tree-vect-stmts.c.orig 2015-05-08 13:24:31.797746925 +0200 --- gcc/tree-vect-stmts.c 2015-05-08 13:28:00.920573458 +0200 *************** vect_model_load_cost (stmt_vec_info stmt *** 1112,1118 **** equivalent to the cost of GROUP_SIZE separate loads. If a grouped access is instead being provided by a load-and-permute operation, include the cost of the permutes. */ ! if (!load_lanes_p && group_size > 1) { /* Uses an even and odd extract operations or shuffle operations for each needed permute. */ --- 1112,1119 ---- equivalent to the cost of GROUP_SIZE separate loads. If a grouped access is instead being provided by a load-and-permute operation, include the cost of the permutes. */ ! if (!load_lanes_p && group_size > 1 ! && !STMT_VINFO_STRIDE_LOAD_P (stmt_info)) { /* Uses an even and odd extract operations or shuffle operations for each needed permute. */ *************** vect_model_load_cost (stmt_vec_info stmt *** 1127,1141 **** } /* The loads themselves. */ ! if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) { /* N scalar loads plus gathering them into a vector. */ tree vectype = STMT_VINFO_VECTYPE (stmt_info); inside_cost += record_stmt_cost (body_cost_vec, ncopies * TYPE_VECTOR_SUBPARTS (vectype), scalar_load, stmt_info, 0, vect_body); - inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct, - stmt_info, 0, vect_body); } else vect_get_load_cost (first_dr, ncopies, --- 1128,1141 ---- } /* The loads themselves. */ ! if (STMT_VINFO_STRIDE_LOAD_P (stmt_info) ! && !STMT_VINFO_GROUPED_ACCESS (stmt_info)) { /* N scalar loads plus gathering them into a vector. */ tree vectype = STMT_VINFO_VECTYPE (stmt_info); inside_cost += record_stmt_cost (body_cost_vec, ncopies * TYPE_VECTOR_SUBPARTS (vectype), scalar_load, stmt_info, 0, vect_body); } else vect_get_load_cost (first_dr, ncopies, *************** vect_model_load_cost (stmt_vec_info stmt *** 1143,1148 **** --- 1143,1151 ---- || group_size > 1 || slp_node), &inside_cost, &prologue_cost, prologue_cost_vec, body_cost_vec, true); + if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) + inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct, + stmt_info, 0, vect_body); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, *************** vectorizable_load (gimple stmt, gimple_s *** 5657,5663 **** gimple ptr_incr = NULL; int nunits = TYPE_VECTOR_SUBPARTS (vectype); int ncopies; ! int i, j, group_size, group_gap; tree msq = NULL_TREE, lsq; tree offset = NULL_TREE; tree byte_offset = NULL_TREE; --- 5660,5666 ---- gimple ptr_incr = NULL; int nunits = TYPE_VECTOR_SUBPARTS (vectype); int ncopies; ! int i, j, group_size = -1, group_gap; tree msq = NULL_TREE, lsq; tree offset = NULL_TREE; tree byte_offset = NULL_TREE; *************** vectorizable_load (gimple stmt, gimple_s *** 5790,5798 **** return false; } ! if (!slp && !PURE_SLP_STMT (stmt_info)) { - group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); if (vect_load_lanes_supported (vectype, group_size)) load_lanes_p = true; else if (!vect_grouped_load_supported (vectype, group_size)) --- 5793,5803 ---- return false; } ! group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); ! if (!slp ! && !PURE_SLP_STMT (stmt_info) ! && !STMT_VINFO_STRIDE_LOAD_P (stmt_info)) { if (vect_load_lanes_supported (vectype, group_size)) load_lanes_p = true; else if (!vect_grouped_load_supported (vectype, group_size)) *************** vectorizable_load (gimple stmt, gimple_s *** 5847,5853 **** } } else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) ! ; else { negative = tree_int_cst_compare (nested_in_vect_loop --- 5852,5873 ---- } } else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) ! { ! if ((grouped_load ! && (slp || PURE_SLP_STMT (stmt_info))) ! && (group_size > nunits ! || nunits % group_size != 0 ! /* ??? During analysis phase we are not called with the ! slp node/instance we are in so whether we'll end up ! with a permutation we don't know. Still we don't ! support load permutations. */ ! || slp_perm)) ! { ! dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, ! "unhandled strided group load\n"); ! return false; ! } ! } else { negative = tree_int_cst_compare (nested_in_vect_loop *************** vectorizable_load (gimple stmt, gimple_s *** 6136,6169 **** prev_stmt_info = NULL; running_off = offvar; alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0); for (j = 0; j < ncopies; j++) { tree vec_inv; ! vec_alloc (v, nunits); ! for (i = 0; i < nunits; i++) { ! tree newref, newoff; ! gimple incr; ! newref = build2 (MEM_REF, TREE_TYPE (vectype), ! running_off, alias_off); ! ! newref = force_gimple_operand_gsi (gsi, newref, true, ! NULL_TREE, true, ! GSI_SAME_STMT); ! CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref); ! newoff = copy_ssa_name (running_off); ! incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR, running_off, stride_step); vect_finish_stmt_generation (stmt, incr, gsi); running_off = newoff; } ! vec_inv = build_constructor (vectype, v); ! new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi); ! new_stmt = SSA_NAME_DEF_STMT (new_temp); ! if (j == 0) STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; else --- 6156,6220 ---- prev_stmt_info = NULL; running_off = offvar; alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0); + int nloads = nunits; + tree ltype = TREE_TYPE (vectype); + if (slp) + { + nloads = nunits / group_size; + if (group_size < nunits) + ltype = build_vector_type (TREE_TYPE (vectype), group_size); + else + ltype = vectype; + ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype))); + ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + gcc_assert (!slp_perm); + } for (j = 0; j < ncopies; j++) { tree vec_inv; ! if (nloads > 1) { ! vec_alloc (v, nloads); ! for (i = 0; i < nloads; i++) ! { ! tree newref, newoff; ! gimple incr; ! newref = build2 (MEM_REF, ltype, running_off, alias_off); ! ! newref = force_gimple_operand_gsi (gsi, newref, true, ! NULL_TREE, true, ! GSI_SAME_STMT); ! CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref); ! newoff = copy_ssa_name (running_off); ! incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR, ! running_off, stride_step); ! vect_finish_stmt_generation (stmt, incr, gsi); ! ! running_off = newoff; ! } ! ! vec_inv = build_constructor (vectype, v); ! new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi); ! new_stmt = SSA_NAME_DEF_STMT (new_temp); ! } ! else ! { ! new_stmt = gimple_build_assign (make_ssa_name (ltype), ! build2 (MEM_REF, ltype, ! running_off, alias_off)); ! vect_finish_stmt_generation (stmt, new_stmt, gsi); ! ! tree newoff = copy_ssa_name (running_off); ! gimple incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR, running_off, stride_step); vect_finish_stmt_generation (stmt, incr, gsi); running_off = newoff; } ! if (slp) ! SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); if (j == 0) STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; else Index: gcc/testsuite/gcc.dg/vect/slp-41.c =================================================================== *** /dev/null 1970-01-01 00:00:00.000000000 +0000 --- gcc/testsuite/gcc.dg/vect/slp-41.c 2015-05-08 13:26:23.916726022 +0200 *************** *** 0 **** --- 1,69 ---- + /* { dg-require-effective-target vect_int } */ + /* { dg-require-effective-target vect_pack_trunc } */ + /* { dg-require-effective-target vect_unpack } */ + /* { dg-require-effective-target vect_hw_misalign } */ + + #include "tree-vect.h" + + void __attribute__((noinline,noclone)) + testi (int *p, short *q, int stride, int n) + { + int i; + for (i = 0; i < n; ++i) + { + q[i*4+0] = p[i*stride+0]; + q[i*4+1] = p[i*stride+1]; + q[i*4+2] = p[i*stride+2]; + q[i*4+3] = p[i*stride+3]; + } + } + + void __attribute__((noinline,noclone)) + testi2 (int *q, short *p, int stride, int n) + { + int i; + for (i = 0; i < n; ++i) + { + q[i*4+0] = p[i*stride+0]; + q[i*4+1] = p[i*stride+1]; + q[i*4+2] = p[i*stride+2]; + q[i*4+3] = p[i*stride+3]; + } + } + + int ia[256]; + short sa[256]; + + extern void abort (void); + + int main() + { + int i; + + check_vect (); + + for (i = 0; i < 256; ++i) + { + ia[i] = sa[i] = i; + __asm__ volatile (""); + } + testi (ia, sa, 8, 32); + for (i = 0; i < 128; ++i) + if (sa[i] != ia[(i / 4) * 8 + i % 4]) + abort (); + + for (i = 0; i < 256; ++i) + { + ia[i] = sa[i] = i; + __asm__ volatile (""); + } + testi2 (ia, sa, 8, 32); + for (i = 0; i < 128; ++i) + if (ia[i] != sa[(i / 4) * 8 + i % 4]) + abort (); + + return 0; + } + + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */