Hi! The following patch adds exclusive scan support for simd, it is similar to the inclusive scan, just we need to swap the input and scan phases and use slightly different pattern at the start of the scan phase, so that it computes what we need.
Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk. 2019-06-21 Jakub Jelinek <ja...@redhat.com> * omp-low.c (lower_rec_simd_input_clauses): Add rvar2 argument, create another "omp scan inscan exclusive" array if !ctx->scan_inclusive. (lower_rec_input_clauses): Handle exclusive scan inscan reductions. (lower_omp_scan): Likewise. * tree-vectorizer.h (struct _stmt_vec_info): Use 3-bit instead of 2-bit bitfield for simd_lane_access_p member. * tree-vect-data-refs.c (vect_analyze_data_refs): Also handle aux == (void *)-4 as simd lane access. * tree-vect-stmts.c (check_scan_store): Handle exclusive scan. Update comment with permutations to show the canonical permutation order. (vectorizable_scan_store): Handle exclusive scan. (vectorizable_store): Call vectorizable_scan_store even for STMT_VINFO_SIMD_LANE_ACCESS_P > 3. * gcc.dg/vect/vect-simd-12.c: New test. * gcc.dg/vect/vect-simd-13.c: New test. * gcc.dg/vect/vect-simd-14.c: New test. * gcc.dg/vect/vect-simd-15.c: New test. * gcc.target/i386/sse2-vect-simd-12.c: New test. * gcc.target/i386/sse2-vect-simd-13.c: New test. * gcc.target/i386/sse2-vect-simd-14.c: New test. * gcc.target/i386/sse2-vect-simd-15.c: New test. * gcc.target/i386/avx2-vect-simd-12.c: New test. * gcc.target/i386/avx2-vect-simd-13.c: New test. * gcc.target/i386/avx2-vect-simd-14.c: New test. * gcc.target/i386/avx2-vect-simd-15.c: New test. * gcc.target/i386/avx512f-vect-simd-12.c: New test. * gcc.target/i386/avx512f-vect-simd-13.c: New test. * gcc.target/i386/avx512f-vect-simd-14.c: New test. * gcc.target/i386/avx512bw-vect-simd-15.c: New test. * g++.dg/vect/simd-6.cc: New test. * g++.dg/vect/simd-7.cc: New test. * g++.dg/vect/simd-8.cc: New test. * g++.dg/vect/simd-9.cc: New test. * c-c++-common/gomp/scan-2.c: Don't expect any diagnostics. --- gcc/omp-low.c.jj 2019-06-20 13:26:29.085150770 +0200 +++ gcc/omp-low.c 2019-06-20 15:46:25.964253058 +0200 @@ -3692,7 +3692,8 @@ struct omplow_simd_context { static bool lower_rec_simd_input_clauses (tree new_var, omp_context *ctx, omplow_simd_context *sctx, tree &ivar, - tree &lvar, tree *rvar = NULL) + tree &lvar, tree *rvar = NULL, + tree *rvar2 = NULL) { if (known_eq (sctx->max_vf, 0U)) { @@ -3767,6 +3768,25 @@ lower_rec_simd_input_clauses (tree new_v *rvar = build4 (ARRAY_REF, TREE_TYPE (new_var), iavar, sctx->lastlane, NULL_TREE, NULL_TREE); TREE_THIS_NOTRAP (*rvar) = 1; + + if (!ctx->scan_inclusive) + { + /* And for exclusive scan yet another one, which will + hold the value during the scan phase. */ + tree savar = create_tmp_var_raw (atype); + if (TREE_ADDRESSABLE (new_var)) + TREE_ADDRESSABLE (savar) = 1; + DECL_ATTRIBUTES (savar) + = tree_cons (get_identifier ("omp simd array"), NULL, + tree_cons (get_identifier ("omp simd inscan " + "exclusive"), NULL, + DECL_ATTRIBUTES (savar))); + gimple_add_tmp_var (savar); + ctx->cb.decl_map->put (iavar, savar); + *rvar2 = build4 (ARRAY_REF, TREE_TYPE (new_var), savar, + sctx->idx, NULL_TREE, NULL_TREE); + TREE_THIS_NOTRAP (*rvar2) = 1; + } } ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), iavar, sctx->idx, NULL_TREE, NULL_TREE); @@ -5185,14 +5205,15 @@ lower_rec_input_clauses (tree clauses, g new_vard = TREE_OPERAND (new_var, 0); gcc_assert (DECL_P (new_vard)); } - tree rvar = NULL_TREE, *rvarp = NULL; + tree rvar = NULL_TREE, *rvarp = NULL, rvar2 = NULL_TREE; if (is_simd && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION && OMP_CLAUSE_REDUCTION_INSCAN (c)) rvarp = &rvar; if (is_simd && lower_rec_simd_input_clauses (new_var, ctx, &sctx, - ivar, lvar, rvarp)) + ivar, lvar, rvarp, + &rvar2)) { if (new_vard == new_var) { @@ -5220,6 +5241,14 @@ lower_rec_input_clauses (tree clauses, g (c, ivar2, build_outer_var_ref (var, ctx)); gimplify_and_add (x, &llist[0]); + if (rvar2) + { + x = lang_hooks.decls.omp_clause_default_ctor + (c, unshare_expr (rvar2), + build_outer_var_ref (var, ctx)); + gimplify_and_add (x, &llist[0]); + } + /* For types that need construction, add another private var which will be default constructed and optionally initialized with @@ -5229,7 +5258,9 @@ lower_rec_input_clauses (tree clauses, g iteration. */ tree nv = create_tmp_var_raw (TREE_TYPE (ivar)); gimple_add_tmp_var (nv); - ctx->cb.decl_map->put (TREE_OPERAND (ivar, 0), + ctx->cb.decl_map->put (TREE_OPERAND (rvar2 + ? rvar2 + : ivar, 0), nv); x = lang_hooks.decls.omp_clause_default_ctor (c, nv, build_outer_var_ref (var, ctx)); @@ -5296,6 +5327,18 @@ lower_rec_input_clauses (tree clauses, g gimplify_stmt (&dtor, &tseq); gimple_seq_add_seq (&llist[1], tseq); } + + if (rvar2) + { + x = lang_hooks.decls.omp_clause_dtor (c, rvar2); + if (x) + { + tseq = NULL; + dtor = x; + gimplify_stmt (&dtor, &tseq); + gimple_seq_add_seq (&llist[1], tseq); + } + } break; } if (x) @@ -5390,6 +5433,24 @@ lower_rec_input_clauses (tree clauses, g gimple_seq_add_seq (ilist, tseq); } OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL; + if (!ctx->scan_inclusive) + { + tree nv2 + = create_tmp_var_raw (TREE_TYPE (new_var)); + gimple_add_tmp_var (nv2); + ctx->cb.decl_map->put (nv, nv2); + x = lang_hooks.decls.omp_clause_default_ctor + (c, nv2, build_outer_var_ref (var, ctx)); + gimplify_and_add (x, ilist); + x = lang_hooks.decls.omp_clause_dtor (c, nv2); + if (x) + { + tseq = NULL; + dtor = x; + gimplify_stmt (&dtor, &tseq); + gimple_seq_add_seq (dlist, tseq); + } + } x = lang_hooks.decls.omp_clause_dtor (c, nv); if (x) { @@ -5399,6 +5460,21 @@ lower_rec_input_clauses (tree clauses, g gimple_seq_add_seq (dlist, tseq); } } + else if (!ctx->scan_inclusive + && TREE_ADDRESSABLE (TREE_TYPE (new_var))) + { + tree nv2 = create_tmp_var_raw (TREE_TYPE (new_var)); + gimple_add_tmp_var (nv2); + ctx->cb.decl_map->put (new_vard, nv2); + x = lang_hooks.decls.omp_clause_dtor (c, nv2); + if (x) + { + tseq = NULL; + dtor = x; + gimplify_stmt (&dtor, &tseq); + gimple_seq_add_seq (dlist, tseq); + } + } DECL_HAS_VALUE_EXPR_P (placeholder) = 0; goto do_dtor; } @@ -5487,14 +5563,15 @@ lower_rec_input_clauses (tree clauses, g new_vard = TREE_OPERAND (new_var, 0); gcc_assert (DECL_P (new_vard)); } - tree rvar = NULL_TREE, *rvarp = NULL; + tree rvar = NULL_TREE, *rvarp = NULL, rvar2 = NULL_TREE; if (is_simd && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION && OMP_CLAUSE_REDUCTION_INSCAN (c)) rvarp = &rvar; if (is_simd && lower_rec_simd_input_clauses (new_var, ctx, &sctx, - ivar, lvar, rvarp)) + ivar, lvar, rvarp, + &rvar2)) { if (new_vard != new_var) { @@ -8573,18 +8650,40 @@ lower_omp_scan (gimple_stmt_iterator *gs gimple_seq before = NULL; omp_context *octx = ctx->outer; gcc_assert (octx); + if (!octx->scan_inclusive && !has_clauses) + { + gimple_stmt_iterator gsi2 = *gsi_p; + gsi_next (&gsi2); + gimple *stmt2 = gsi_stmt (gsi2); + /* For exclusive scan, swap GIMPLE_OMP_SCAN without clauses + with following GIMPLE_OMP_SCAN with clauses, so that input_phase, + the one with exclusive clause(s), comes first. */ + if (stmt2 + && gimple_code (stmt2) == GIMPLE_OMP_SCAN + && gimple_omp_scan_clauses (as_a <gomp_scan *> (stmt2)) != NULL) + { + gsi_remove (gsi_p, false); + gsi_insert_after (gsi_p, stmt, GSI_SAME_STMT); + ctx = maybe_lookup_ctx (stmt2); + gcc_assert (ctx); + lower_omp_scan (gsi_p, ctx); + return; + } + } + bool input_phase = has_clauses ^ octx->scan_inclusive; if (gimple_code (octx->stmt) == GIMPLE_OMP_FOR && (gimple_omp_for_kind (octx->stmt) & GF_OMP_FOR_SIMD) - && !gimple_omp_for_combined_into_p (octx->stmt) - && octx->scan_inclusive) + && !gimple_omp_for_combined_into_p (octx->stmt)) { if (tree c = omp_find_clause (gimple_omp_for_clauses (octx->stmt), OMP_CLAUSE__SIMDUID_)) { tree uid = OMP_CLAUSE__SIMDUID__DECL (c); lane = create_tmp_var (unsigned_type_node); - tree t = build_int_cst (integer_type_node, 1 + !input_phase); + tree t = build_int_cst (integer_type_node, + input_phase ? 1 + : octx->scan_inclusive ? 2 : 3); gimple *g = gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 2, uid, t); gimple_call_set_lhs (g, lane); @@ -8601,6 +8700,8 @@ lower_omp_scan (gimple_stmt_iterator *gs tree val = new_var; tree var2 = NULL_TREE; tree var3 = NULL_TREE; + tree var4 = NULL_TREE; + tree lane0 = NULL_TREE; tree new_vard = new_var; if (omp_is_reference (var)) { @@ -8623,16 +8724,26 @@ lower_omp_scan (gimple_stmt_iterator *gs DECL_ATTRIBUTES (v))) { val = unshare_expr (val); + lane0 = TREE_OPERAND (val, 1); TREE_OPERAND (val, 1) = lane; var2 = lookup_decl (v, octx); + if (!octx->scan_inclusive) + var4 = lookup_decl (var2, octx); if (input_phase && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) - var3 = maybe_lookup_decl (var2, octx); + var3 = maybe_lookup_decl (var4 ? var4 : var2, octx); if (!input_phase) { var2 = build4 (ARRAY_REF, TREE_TYPE (val), var2, lane, NULL_TREE, NULL_TREE); TREE_THIS_NOTRAP (var2) = 1; + if (!octx->scan_inclusive) + { + var4 = build4 (ARRAY_REF, TREE_TYPE (val), + var4, lane, NULL_TREE, + NULL_TREE); + TREE_THIS_NOTRAP (var4) = 1; + } } else var2 = val; @@ -8643,12 +8754,28 @@ lower_omp_scan (gimple_stmt_iterator *gs else { var2 = build_outer_var_ref (var, octx); - if (input_phase && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) + if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) { var3 = maybe_lookup_decl (new_vard, octx); - if (var3 == new_vard) + if (var3 == new_vard || var3 == NULL_TREE) var3 = NULL_TREE; + else if (!octx->scan_inclusive && !input_phase) + { + var4 = maybe_lookup_decl (var3, octx); + if (var4 == var3 || var4 == NULL_TREE) + { + if (TREE_ADDRESSABLE (TREE_TYPE (new_var))) + { + var4 = var3; + var3 = NULL_TREE; + } + else + var4 = NULL_TREE; + } + } } + if (!octx->scan_inclusive && !input_phase && var4 == NULL_TREE) + var4 = create_tmp_var (TREE_TYPE (val)); } if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) { @@ -8689,9 +8816,17 @@ lower_omp_scan (gimple_stmt_iterator *gs } else { + tree x; + if (!octx->scan_inclusive) + { + tree v4 = unshare_expr (var4); + tree v2 = unshare_expr (var2); + x = lang_hooks.decls.omp_clause_assign_op (c, v4, v2); + gimplify_and_add (x, &before); + } gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c); - tree x = (DECL_HAS_VALUE_EXPR_P (new_vard) - ? DECL_VALUE_EXPR (new_vard) : NULL_TREE); + x = (DECL_HAS_VALUE_EXPR_P (new_vard) + ? DECL_VALUE_EXPR (new_vard) : NULL_TREE); tree vexpr = val; if (x && omp_is_reference (var)) vexpr = build_fold_addr_expr_loc (clause_loc, val); @@ -8706,8 +8841,18 @@ lower_omp_scan (gimple_stmt_iterator *gs SET_DECL_VALUE_EXPR (new_vard, x); SET_DECL_VALUE_EXPR (placeholder, NULL_TREE); DECL_HAS_VALUE_EXPR_P (placeholder) = 0; - x = lang_hooks.decls.omp_clause_assign_op (c, val, var2); - gimplify_and_add (x, &before); + if (octx->scan_inclusive) + { + x = lang_hooks.decls.omp_clause_assign_op (c, val, + var2); + gimplify_and_add (x, &before); + } + else if (lane0 == NULL_TREE) + { + x = lang_hooks.decls.omp_clause_assign_op (c, val, + var4); + gimplify_and_add (x, &before); + } } } else @@ -8728,10 +8873,29 @@ lower_omp_scan (gimple_stmt_iterator *gs tree x = build2 (code, TREE_TYPE (var2), unshare_expr (var2), unshare_expr (val)); - gimplify_assign (unshare_expr (var2), x, &before); - gimplify_assign (val, var2, &before); + if (octx->scan_inclusive) + { + gimplify_assign (unshare_expr (var2), x, &before); + gimplify_assign (val, var2, &before); + } + else + { + gimplify_assign (unshare_expr (var4), + unshare_expr (var2), &before); + gimplify_assign (var2, x, &before); + if (lane0 == NULL_TREE) + gimplify_assign (val, var4, &before); + } } } + if (!octx->scan_inclusive && !input_phase && lane0) + { + tree vexpr = unshare_expr (var4); + TREE_OPERAND (vexpr, 1) = lane0; + if (omp_is_reference (var)) + vexpr = build_fold_addr_expr_loc (clause_loc, vexpr); + SET_DECL_VALUE_EXPR (new_vard, vexpr); + } } } else if (has_clauses) --- gcc/tree-vectorizer.h.jj 2019-06-20 13:26:29.078150879 +0200 +++ gcc/tree-vectorizer.h 2019-06-20 14:18:04.241075200 +0200 @@ -917,7 +917,7 @@ struct _stmt_vec_info { bool strided_p; /* For both loads and stores. */ - unsigned simd_lane_access_p : 2; + unsigned simd_lane_access_p : 3; /* Classifies how the load or store is going to be implemented for loop vectorization. */ --- gcc/tree-vect-data-refs.c.jj 2019-06-20 13:55:35.421150589 +0200 +++ gcc/tree-vect-data-refs.c 2019-06-20 14:18:04.240075216 +0200 @@ -4223,7 +4223,8 @@ vect_analyze_data_refs (vec_info *vinfo, /* See if this was detected as SIMD lane access. */ if (dr->aux == (void *)-1 || dr->aux == (void *)-2 - || dr->aux == (void *)-3) + || dr->aux == (void *)-3 + || dr->aux == (void *)-4) { if (nested_in_vect_loop_p (loop, stmt_info)) return opt_result::failure_at (stmt_info->stmt, --- gcc/tree-vect-stmts.c.jj 2019-06-20 13:26:29.084150785 +0200 +++ gcc/tree-vect-stmts.c 2019-06-20 14:18:04.239075231 +0200 @@ -6512,7 +6512,37 @@ check_scan_store (stmt_vec_info stmt_inf kinds are there in order to allow optimizing the initializer store and combiner sequence, e.g. if it is originally some C++ish user defined reduction, but allow the vectorizer to pattern recognize it - and turn into the appropriate vectorized scan. */ + and turn into the appropriate vectorized scan. + + For exclusive scan, this is slightly different: + #pragma omp simd reduction(inscan,+:r) + for (...) + { + use (r); + #pragma omp scan exclusive (r) + r += something (); + } + shall have body with: + // Initialization for input phase, store the reduction initializer: + _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0); + _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1); + D.2042[_21] = 0; + // Actual input phase: + ... + r.0_5 = D.2042[_20]; + _6 = _4 + r.0_5; + D.2042[_20] = _6; + // Initialization for scan phase: + _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3); + _26 = D.2043[_25]; + D.2044[_25] = _26; + _27 = D.2042[_25]; + _28 = _26 + _27; + D.2043[_25] = _28; + // Actual scan phase: + ... + r.1_8 = D.2044[_20]; + ... */ if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2) { @@ -6553,26 +6583,52 @@ check_scan_store (stmt_vec_info stmt_inf if (TREE_CODE (rhs) != SSA_NAME) goto fail; - use_operand_p use_p; - imm_use_iterator iter; gimple *other_store_stmt = NULL; - FOR_EACH_IMM_USE_FAST (use_p, iter, rhs) + tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0); + bool inscan_var_store + = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL; + + if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4) { - gimple *use_stmt = USE_STMT (use_p); - if (use_stmt == stmt || is_gimple_debug (use_stmt)) - continue; - if (gimple_bb (use_stmt) != gimple_bb (stmt) - || !gimple_store_p (use_stmt) - || other_store_stmt) - goto fail; - other_store_stmt = use_stmt; + if (!inscan_var_store) + { + use_operand_p use_p; + imm_use_iterator iter; + FOR_EACH_IMM_USE_FAST (use_p, iter, rhs) + { + gimple *use_stmt = USE_STMT (use_p); + if (use_stmt == stmt || is_gimple_debug (use_stmt)) + continue; + if (gimple_bb (use_stmt) != gimple_bb (stmt) + || !is_gimple_assign (use_stmt) + || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS + || other_store_stmt + || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME) + goto fail; + other_store_stmt = use_stmt; + } + if (other_store_stmt == NULL) + goto fail; + rhs = gimple_assign_lhs (other_store_stmt); + if (!single_imm_use (rhs, &use_p, &other_store_stmt)) + goto fail; + } } - if (other_store_stmt == NULL) - goto fail; - stmt_vec_info other_store_stmt_info - = loop_vinfo->lookup_stmt (other_store_stmt); - if (other_store_stmt_info == NULL - || STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info) != 3) + else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3) + { + use_operand_p use_p; + imm_use_iterator iter; + FOR_EACH_IMM_USE_FAST (use_p, iter, rhs) + { + gimple *use_stmt = USE_STMT (use_p); + if (use_stmt == stmt || is_gimple_debug (use_stmt)) + continue; + if (other_store_stmt) + goto fail; + other_store_stmt = use_stmt; + } + } + else goto fail; gimple *def_stmt = SSA_NAME_DEF_STMT (rhs); @@ -6599,8 +6655,7 @@ check_scan_store (stmt_vec_info stmt_inf tree rhs1 = gimple_assign_rhs1 (def_stmt); tree rhs2 = gimple_assign_rhs2 (def_stmt); - if (TREE_CODE (rhs1) != SSA_NAME - || TREE_CODE (rhs2) != SSA_NAME) + if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME) goto fail; gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1); @@ -6615,22 +6670,83 @@ check_scan_store (stmt_vec_info stmt_inf stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt); if (load1_stmt_info == NULL || load2_stmt_info == NULL - || STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info) != 3 - || STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info) != 3) + || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info) + != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)) + || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info) + != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))) goto fail; - if (scan_operand_equal_p (gimple_assign_lhs (stmt), + if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store) + { + dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info); + if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR + || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0))) + goto fail; + tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0); + tree lrhs; + if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1))) + lrhs = rhs1; + else + lrhs = rhs2; + use_operand_p use_p; + imm_use_iterator iter; + FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs) + { + gimple *use_stmt = USE_STMT (use_p); + if (use_stmt == def_stmt || is_gimple_debug (use_stmt)) + continue; + if (other_store_stmt) + goto fail; + other_store_stmt = use_stmt; + } + } + + if (other_store_stmt == NULL) + goto fail; + if (gimple_bb (other_store_stmt) != gimple_bb (stmt) + || !gimple_store_p (other_store_stmt)) + goto fail; + + stmt_vec_info other_store_stmt_info + = loop_vinfo->lookup_stmt (other_store_stmt); + if (other_store_stmt_info == NULL + || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info) + != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))) + goto fail; + + gimple *stmt1 = stmt; + gimple *stmt2 = other_store_stmt; + if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store) + std::swap (stmt1, stmt2); + if (scan_operand_equal_p (gimple_assign_lhs (stmt1), gimple_assign_rhs1 (load2_stmt))) { std::swap (rhs1, rhs2); std::swap (load1_stmt, load2_stmt); std::swap (load1_stmt_info, load2_stmt_info); } - if (!scan_operand_equal_p (gimple_assign_lhs (stmt), - gimple_assign_rhs1 (load1_stmt)) - || !scan_operand_equal_p (gimple_assign_lhs (other_store_stmt), + if (!scan_operand_equal_p (gimple_assign_lhs (stmt1), + gimple_assign_rhs1 (load1_stmt))) + goto fail; + + tree var3 = NULL_TREE; + if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3 + && !scan_operand_equal_p (gimple_assign_lhs (stmt2), gimple_assign_rhs1 (load2_stmt))) goto fail; + else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4) + { + dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info); + if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR + || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0))) + goto fail; + var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0); + if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3)) + || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3)) + || lookup_attribute ("omp simd inscan exclusive", + DECL_ATTRIBUTES (var3))) + goto fail; + } dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info); if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR @@ -6648,6 +6764,14 @@ check_scan_store (stmt_vec_info stmt_inf if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1))) std::swap (var1, var2); + if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4) + { + if (!lookup_attribute ("omp simd inscan exclusive", + DECL_ATTRIBUTES (var1))) + goto fail; + var1 = var3; + } + if (loop_vinfo->scan_map == NULL) goto fail; tree *init = loop_vinfo->scan_map->get (var1); @@ -6655,6 +6779,7 @@ check_scan_store (stmt_vec_info stmt_inf goto fail; /* The IL is as expected, now check if we can actually vectorize it. + Inclusive scan: _26 = D.2043[_25]; _27 = D.2042[_25]; _28 = _26 + _27; @@ -6664,21 +6789,49 @@ check_scan_store (stmt_vec_info stmt_inf from the D.2042[_21] = 0; store): _30 = MEM <vector(8) int> [(int *)&D.2043]; _31 = MEM <vector(8) int> [(int *)&D.2042]; - _32 = VEC_PERM_EXPR <_31, _40, { 8, 0, 1, 2, 3, 4, 5, 6 }>; + _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>; _33 = _31 + _32; // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] }; - _34 = VEC_PERM_EXPR <_33, _40, { 8, 9, 0, 1, 2, 3, 4, 5 }>; + _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>; _35 = _33 + _34; // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3], // _31[1]+.._31[4], ... _31[4]+.._31[7] }; - _36 = VEC_PERM_EXPR <_35, _40, { 8, 9, 10, 11, 0, 1, 2, 3 }>; + _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>; _37 = _35 + _36; // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3], // _31[0]+.._31[4], ... _31[0]+.._31[7] }; _38 = _30 + _37; _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>; MEM <vector(8) int> [(int *)&D.2043] = _39; - MEM <vector(8) int> [(int *)&D.2042] = _38; */ + MEM <vector(8) int> [(int *)&D.2042] = _38; + Exclusive scan: + _26 = D.2043[_25]; + D.2044[_25] = _26; + _27 = D.2042[_25]; + _28 = _26 + _27; + D.2043[_25] = _28; + should be vectorized as (where _40 is the vectorized rhs + from the D.2042[_21] = 0; store): + _30 = MEM <vector(8) int> [(int *)&D.2043]; + _31 = MEM <vector(8) int> [(int *)&D.2042]; + _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>; + _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>; + _34 = _32 + _33; + // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3], + // _31[3]+_31[4], ... _31[5]+.._31[6] }; + _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>; + _36 = _34 + _35; + // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3], + // _31[1]+.._31[4], ... _31[3]+.._31[6] }; + _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>; + _38 = _36 + _37; + // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3], + // _31[0]+.._31[4], ... _31[0]+.._31[6] }; + _39 = _30 + _38; + _50 = _31 + _39; + _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>; + MEM <vector(8) int> [(int *)&D.2044] = _39; + MEM <vector(8) int> [(int *)&D.2042] = _51; */ enum machine_mode vec_mode = TYPE_MODE (vectype); optab optab = optab_for_tree_code (code, vectype, optab_default); if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing) @@ -6715,6 +6868,24 @@ vectorizable_scan_store (stmt_vec_info s tree rhs = gimple_assign_rhs1 (stmt); gcc_assert (TREE_CODE (rhs) == SSA_NAME); + tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0); + bool inscan_var_store + = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL; + + if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store) + { + use_operand_p use_p; + imm_use_iterator iter; + FOR_EACH_IMM_USE_FAST (use_p, iter, rhs) + { + gimple *use_stmt = USE_STMT (use_p); + if (use_stmt == stmt || is_gimple_debug (use_stmt)) + continue; + rhs = gimple_assign_lhs (use_stmt); + break; + } + } + gimple *def_stmt = SSA_NAME_DEF_STMT (rhs); enum tree_code code = gimple_assign_rhs_code (def_stmt); if (code == POINTER_PLUS_EXPR) @@ -6737,15 +6908,12 @@ vectorizable_scan_store (stmt_vec_info s { std::swap (rhs1, rhs2); std::swap (var1, var2); + std::swap (load1_dr_info, load2_dr_info); } tree *init = loop_vinfo->scan_map->get (var1); gcc_assert (init); - tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0); - bool inscan_var_store - = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL; - unsigned HOST_WIDE_INT nunits; if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)) gcc_unreachable (); @@ -6789,29 +6957,50 @@ vectorizable_scan_store (stmt_vec_info s tree vec_oprnd1 = NULL_TREE; tree vec_oprnd2 = NULL_TREE; tree vec_oprnd3 = NULL_TREE; - tree dataref_ptr = unshare_expr (DR_BASE_ADDRESS (dr_info->dr)); + tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr); tree dataref_offset = build_int_cst (ref_type, 0); tree bump = vect_get_data_ptr_increment (dr_info, vectype, VMAT_CONTIGUOUS); + tree ldataref_ptr = NULL_TREE; tree orig = NULL_TREE; + if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store) + ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr); for (int j = 0; j < ncopies; j++) { stmt_vec_info new_stmt_info; if (j == 0) { vec_oprnd1 = vect_get_vec_def_for_operand (*init, stmt_info); - vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info); + if (ldataref_ptr == NULL) + vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info); vec_oprnd3 = vect_get_vec_def_for_operand (rhs2, stmt_info); orig = vec_oprnd3; } else { vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1); - vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2); + if (ldataref_ptr == NULL) + vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2); vec_oprnd3 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd3); if (!inscan_var_store) dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump); } + if (ldataref_ptr) + { + vec_oprnd2 = make_ssa_name (vectype); + tree data_ref = fold_build2 (MEM_REF, vectype, + unshare_expr (ldataref_ptr), + dataref_offset); + vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr)); + gimple *g = gimple_build_assign (vec_oprnd2, data_ref); + new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi); + if (prev_stmt_info == NULL) + STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info; + else + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; + prev_stmt_info = new_stmt_info; + } + tree v = vec_oprnd2; for (int i = 0; i < units_log2; ++i) { @@ -6848,6 +7037,17 @@ vectorizable_scan_store (stmt_vec_info s new_temp = new_temp2; } + /* For exclusive scan, perform the perms[i] permutation once + more. */ + if (i == 0 + && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 + && v == vec_oprnd2) + { + v = new_temp; + --i; + continue; + } + tree new_temp2 = make_ssa_name (vectype); g = gimple_build_assign (new_temp2, code, v, new_temp); new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi); @@ -6863,16 +7063,30 @@ vectorizable_scan_store (stmt_vec_info s STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; prev_stmt_info = new_stmt_info; + tree last_perm_arg = new_temp; + /* For exclusive scan, new_temp computed above is the exclusive scan + prefix sum. Turn it into inclusive prefix sum for the broadcast + of the last element into orig. */ + if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4) + { + last_perm_arg = make_ssa_name (vectype); + g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2); + new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi); + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; + prev_stmt_info = new_stmt_info; + } + orig = make_ssa_name (vectype); - g = gimple_build_assign (orig, VEC_PERM_EXPR, new_temp, new_temp, - perms[units_log2]); + g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg, + last_perm_arg, perms[units_log2]); new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi); STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; prev_stmt_info = new_stmt_info; if (!inscan_var_store) { - tree data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr, + tree data_ref = fold_build2 (MEM_REF, vectype, + unshare_expr (dataref_ptr), dataref_offset); vect_copy_ref_info (data_ref, DR_REF (dr_info->dr)); g = gimple_build_assign (data_ref, new_temp); @@ -6888,7 +7102,8 @@ vectorizable_scan_store (stmt_vec_info s if (j != 0) dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump); - tree data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr, + tree data_ref = fold_build2 (MEM_REF, vectype, + unshare_expr (dataref_ptr), dataref_offset); vect_copy_ref_info (data_ref, DR_REF (dr_info->dr)); gimple *g = gimple_build_assign (data_ref, orig); @@ -7325,7 +7540,7 @@ vectorizable_store (stmt_vec_info stmt_i } return true; } - else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3) + else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3) return vectorizable_scan_store (stmt_info, gsi, vec_stmt, ncopies); if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) --- gcc/testsuite/gcc.dg/vect/vect-simd-12.c.jj 2019-06-20 15:08:50.260400440 +0200 +++ gcc/testsuite/gcc.dg/vect/vect-simd-12.c 2019-06-20 15:08:24.332805239 +0200 @@ -0,0 +1,122 @@ +/* { dg-require-effective-target size32plus } */ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */ + +#ifndef main +#include "tree-vect.h" +#endif + +int r, a[1024], b[1024]; + +__attribute__((noipa)) void +foo (int *a, int *b) +{ + #pragma omp simd reduction (inscan, +:r) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r += a[i]; + } +} + +__attribute__((noipa)) int +bar (void) +{ + int s = 0; + #pragma omp simd reduction (inscan, +:s) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s += 2 * a[i]; + } + return s; +} + +__attribute__((noipa)) void +baz (int *a, int *b) +{ + #pragma omp simd reduction (inscan, +:r) if (simd: 0) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r += a[i]; + } +} + +__attribute__((noipa)) int +qux (void) +{ + int s = 0; + #pragma omp simd reduction (inscan, +:s) simdlen (1) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s += 2 * a[i]; + } + return s; +} + +int +main () +{ + int s = 0; +#ifndef main + check_vect (); +#endif + for (int i = 0; i < 1024; ++i) + { + a[i] = i; + b[i] = -1; + asm ("" : "+g" (i)); + } + foo (a, b); + if (r != 1024 * 1023 / 2) + abort (); + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = 25; + s += i; + } + if (bar () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = -1; + s += 2 * i; + } + r = 0; + baz (a, b); + if (r != 1024 * 1023 / 2) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = -25; + s += i; + } + if (qux () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + s += 2 * i; + } + return 0; +} --- gcc/testsuite/gcc.dg/vect/vect-simd-13.c.jj 2019-06-20 15:47:23.580359715 +0200 +++ gcc/testsuite/gcc.dg/vect/vect-simd-13.c 2019-06-20 15:13:23.500134387 +0200 @@ -0,0 +1,124 @@ +/* { dg-require-effective-target size32plus } */ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */ + +#ifndef main +#include "tree-vect.h" +#endif + +int r, a[1024], b[1024]; + +#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0) + +__attribute__((noipa)) void +foo (int *a, int *b) +{ + #pragma omp simd reduction (inscan, foo:r) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r += a[i]; + } +} + +__attribute__((noipa)) int +bar (void) +{ + int s = 0; + #pragma omp simd reduction (inscan, foo:s) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s += 2 * a[i]; + } + return s; +} + +__attribute__((noipa)) void +baz (int *a, int *b) +{ + #pragma omp simd reduction (inscan, foo:r) if (simd: 0) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r += a[i]; + } +} + +__attribute__((noipa)) int +qux (void) +{ + int s = 0; + #pragma omp simd reduction (inscan, foo:s) simdlen (1) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s += 2 * a[i]; + } + return s; +} + +int +main () +{ + int s = 0; +#ifndef main + check_vect (); +#endif + for (int i = 0; i < 1024; ++i) + { + a[i] = i; + b[i] = -1; + asm ("" : "+g" (i)); + } + foo (a, b); + if (r != 1024 * 1023 / 2) + abort (); + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = 25; + s += i; + } + if (bar () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = -1; + s += 2 * i; + } + r = 0; + baz (a, b); + if (r != 1024 * 1023 / 2) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = -25; + s += i; + } + if (qux () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + s += 2 * i; + } + return 0; +} --- gcc/testsuite/gcc.dg/vect/vect-simd-14.c.jj 2019-06-20 15:48:30.536321539 +0200 +++ gcc/testsuite/gcc.dg/vect/vect-simd-14.c 2019-06-20 15:54:39.291617792 +0200 @@ -0,0 +1,94 @@ +/* { dg-require-effective-target size32plus } */ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */ + +#ifndef main +#include "tree-vect.h" +#endif + +float r = 1.0f, a[1024], b[1024]; + +__attribute__((noipa)) void +foo (float *a, float *b) +{ + #pragma omp simd reduction (inscan, *:r) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r *= a[i]; + } +} + +__attribute__((noipa)) float +bar (void) +{ + float s = -__builtin_inff (); + #pragma omp simd reduction (inscan, max:s) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s = s > a[i] ? s : a[i]; + } + return s; +} + +int +main () +{ + float s = 1.0f; +#ifndef main + check_vect (); +#endif + for (int i = 0; i < 1024; ++i) + { + if (i < 80) + a[i] = (i & 1) ? 0.25f : 0.5f; + else if (i < 200) + a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f; + else if (i < 280) + a[i] = (i & 1) ? 0.25f : 0.5f; + else if (i < 380) + a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f; + else + switch (i % 6) + { + case 0: a[i] = 0.25f; break; + case 1: a[i] = 2.0f; break; + case 2: a[i] = -1.0f; break; + case 3: a[i] = -4.0f; break; + case 4: a[i] = 0.5f; break; + case 5: a[i] = 1.0f; break; + default: a[i] = 0.0f; break; + } + b[i] = -19.0f; + asm ("" : "+g" (i)); + } + foo (a, b); + if (r * 16384.0f != 0.125f) + abort (); + float m = -175.25f; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = -231.75f; + s *= a[i]; + a[i] = m - ((i % 3) == 1 ? 2.0f : (i % 3) == 2 ? 4.0f : 0.0f); + m += 0.75f; + } + if (bar () != 592.0f) + abort (); + s = -__builtin_inff (); + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + if (s < a[i]) + s = a[i]; + } + return 0; +} --- gcc/testsuite/gcc.dg/vect/vect-simd-15.c.jj 2019-06-20 15:50:34.483399705 +0200 +++ gcc/testsuite/gcc.dg/vect/vect-simd-15.c 2019-06-20 15:52:09.976919050 +0200 @@ -0,0 +1,186 @@ +/* { dg-require-effective-target size32plus } */ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */ + +#ifndef main +#include "tree-vect.h" +#endif + +int r, a[1024], b[1024]; +unsigned short r2, b2[1024]; +unsigned char r3, b3[1024]; + +__attribute__((noipa)) void +foo (int *a, int *b, unsigned short *b2, unsigned char *b3) +{ + #pragma omp simd reduction (inscan, +:r, r2, r3) + for (int i = 0; i < 1024; i++) + { + { + b[i] = r; + b2[i] = r2; + b3[i] = r3; + } + #pragma omp scan exclusive(r, r2, r3) + { r += a[i]; r2 += a[i]; r3 += a[i]; } + } +} + +__attribute__((noipa)) int +bar (unsigned short *s2p, unsigned char *s3p) +{ + int s = 0; + unsigned short s2 = 0; + unsigned char s3 = 0; + #pragma omp simd reduction (inscan, +:s, s2, s3) + for (int i = 0; i < 1024; i++) + { + { b[i] = s; b2[i] = s2; b3[i] = s3; } + #pragma omp scan exclusive(s, s2, s3) + { + s += 2 * a[i]; + s2 += 2 * a[i]; + s3 += 2 * a[i]; + } + } + *s2p = s2; + *s3p = s3; + return s; +} + +__attribute__((noipa)) void +baz (int *a, int *b, unsigned short *b2, unsigned char *b3) +{ + #pragma omp simd reduction (inscan, +:r, r2, r3) if (simd: 0) + for (int i = 0; i < 1024; i++) + { + { + b[i] = r; + b2[i] = r2; + b3[i] = r3; + } + #pragma omp scan exclusive(r, r2, r3) + { + r += a[i]; + r2 += a[i]; + r3 += a[i]; + } + } +} + +__attribute__((noipa)) int +qux (unsigned short *s2p, unsigned char *s3p) +{ + int s = 0; + unsigned short s2 = 0; + unsigned char s3 = 0; + #pragma omp simd reduction (inscan, +:s, s2, s3) simdlen (1) + for (int i = 0; i < 1024; i++) + { + { b[i] = s; b2[i] = s2; b3[i] = s3; } + #pragma omp scan exclusive(s, s2, s3) + { s += 2 * a[i]; s2 += 2 * a[i]; s3 += 2 * a[i]; } + } + *s2p = s2; + *s3p = s3; + return s; +} + +int +main () +{ + int s = 0; + unsigned short s2; + unsigned char s3; +#ifndef main + check_vect (); +#endif + for (int i = 0; i < 1024; ++i) + { + a[i] = i; + b[i] = -1; + b2[i] = -1; + b3[i] = -1; + asm ("" : "+g" (i)); + } + foo (a, b, b2, b3); + if (r != 1024 * 1023 / 2 + || r2 != (unsigned short) r + || r3 != (unsigned char) r) + abort (); + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s + || b2[i] != (unsigned short) s + || b3[i] != (unsigned char) s) + abort (); + else + { + b[i] = 25; + b2[i] = 24; + b3[i] = 26; + } + s += i; + } + if (bar (&s2, &s3) != 1024 * 1023) + abort (); + if (s2 != (unsigned short) (1024 * 1023) + || s3 != (unsigned char) (1024 * 1023)) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s + || b2[i] != (unsigned short) s + || b3[i] != (unsigned char) s) + abort (); + else + { + b[i] = -1; + b2[i] = -1; + b3[i] = -1; + } + s += 2 * i; + } + r = 0; + r2 = 0; + r3 = 0; + baz (a, b, b2, b3); + if (r != 1024 * 1023 / 2 + || r2 != (unsigned short) r + || r3 != (unsigned char) r) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s + || b2[i] != (unsigned short) s + || b3[i] != (unsigned char) s) + abort (); + else + { + b[i] = 25; + b2[i] = 24; + b3[i] = 26; + } + s += i; + } + s2 = 0; + s3 = 0; + if (qux (&s2, &s3) != 1024 * 1023) + abort (); + if (s2 != (unsigned short) (1024 * 1023) + || s3 != (unsigned char) (1024 * 1023)) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s + || b2[i] != (unsigned short) s + || b3[i] != (unsigned char) s) + abort (); + s += 2 * i; + } + return 0; +} --- gcc/testsuite/gcc.target/i386/sse2-vect-simd-12.c.jj 2019-06-20 15:58:35.276983324 +0200 +++ gcc/testsuite/gcc.target/i386/sse2-vect-simd-12.c 2019-06-20 15:58:35.274983355 +0200 @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */ +/* { dg-require-effective-target sse2 } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */ + +#include "sse2-check.h" + +#define main() do_main () + +#include "../../gcc.dg/vect/vect-simd-12.c" + +static void +sse2_test (void) +{ + do_main (); +} --- gcc/testsuite/gcc.target/i386/sse2-vect-simd-13.c.jj 2019-06-20 15:58:35.283983216 +0200 +++ gcc/testsuite/gcc.target/i386/sse2-vect-simd-13.c 2019-06-20 15:58:35.281983247 +0200 @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */ +/* { dg-require-effective-target sse2 } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */ + +#include "sse2-check.h" + +#define main() do_main () + +#include "../../gcc.dg/vect/vect-simd-13.c" + +static void +sse2_test (void) +{ + do_main (); +} --- gcc/testsuite/gcc.target/i386/sse2-vect-simd-14.c.jj 2019-06-20 15:58:35.288983139 +0200 +++ gcc/testsuite/gcc.target/i386/sse2-vect-simd-14.c 2019-06-20 15:58:35.287983154 +0200 @@ -0,0 +1,15 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */ +/* { dg-require-effective-target sse2 } */ + +#include "sse2-check.h" + +#define main() do_main () + +#include "../../gcc.dg/vect/vect-simd-14.c" + +static void +sse2_test (void) +{ + do_main (); +} --- gcc/testsuite/gcc.target/i386/sse2-vect-simd-15.c.jj 2019-06-20 15:58:35.293983061 +0200 +++ gcc/testsuite/gcc.target/i386/sse2-vect-simd-15.c 2019-06-20 15:58:35.292983077 +0200 @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */ +/* { dg-require-effective-target sse2 } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */ + +#include "sse2-check.h" + +#define main() do_main () + +#include "../../gcc.dg/vect/vect-simd-15.c" + +static void +sse2_test (void) +{ + do_main (); +} --- gcc/testsuite/gcc.target/i386/avx2-vect-simd-12.c.jj 2019-06-20 15:58:35.299982969 +0200 +++ gcc/testsuite/gcc.target/i386/avx2-vect-simd-12.c 2019-06-20 15:58:35.297982999 +0200 @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */ +/* { dg-require-effective-target avx2 } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */ + +#include "avx2-check.h" + +#define main() do_main () + +#include "../../gcc.dg/vect/vect-simd-12.c" + +static void +avx2_test (void) +{ + do_main (); +} --- gcc/testsuite/gcc.target/i386/avx2-vect-simd-13.c.jj 2019-06-20 15:58:35.305982876 +0200 +++ gcc/testsuite/gcc.target/i386/avx2-vect-simd-13.c 2019-06-20 15:58:35.303982907 +0200 @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */ +/* { dg-require-effective-target avx2 } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */ + +#include "avx2-check.h" + +#define main() do_main () + +#include "../../gcc.dg/vect/vect-simd-13.c" + +static void +avx2_test (void) +{ + do_main (); +} --- gcc/testsuite/gcc.target/i386/avx2-vect-simd-14.c.jj 2019-06-20 15:58:35.310982799 +0200 +++ gcc/testsuite/gcc.target/i386/avx2-vect-simd-14.c 2019-06-20 15:58:35.309982815 +0200 @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */ +/* { dg-require-effective-target avx2 } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */ + +#include "avx2-check.h" + +#define main() do_main () + +#include "../../gcc.dg/vect/vect-simd-14.c" + +static void +avx2_test (void) +{ + do_main (); +} --- gcc/testsuite/gcc.target/i386/avx2-vect-simd-15.c.jj 2019-06-20 15:58:35.316982707 +0200 +++ gcc/testsuite/gcc.target/i386/avx2-vect-simd-15.c 2019-06-20 15:58:35.314982738 +0200 @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */ +/* { dg-require-effective-target avx2 } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */ + +#include "avx2-check.h" + +#define main() do_main () + +#include "../../gcc.dg/vect/vect-simd-15.c" + +static void +avx2_test (void) +{ + do_main (); +} --- gcc/testsuite/gcc.target/i386/avx512f-vect-simd-12.c.jj 2019-06-20 15:58:35.323982599 +0200 +++ gcc/testsuite/gcc.target/i386/avx512f-vect-simd-12.c 2019-06-20 15:58:35.321982630 +0200 @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 -fdump-tree-vect-details" } */ +/* { dg-require-effective-target avx512f } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */ + +#include "avx512f-check.h" + +#define main() do_main () + +#include "../../gcc.dg/vect/vect-simd-12.c" + +static void +avx512f_test (void) +{ + do_main (); +} --- gcc/testsuite/gcc.target/i386/avx512f-vect-simd-13.c.jj 2019-06-20 15:58:35.328982522 +0200 +++ gcc/testsuite/gcc.target/i386/avx512f-vect-simd-13.c 2019-06-20 15:58:35.326982553 +0200 @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 -fdump-tree-vect-details" } */ +/* { dg-require-effective-target avx512f } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */ + +#include "avx512f-check.h" + +#define main() do_main () + +#include "../../gcc.dg/vect/vect-simd-13.c" + +static void +avx512f_test (void) +{ + do_main (); +} --- gcc/testsuite/gcc.target/i386/avx512f-vect-simd-14.c.jj 2019-06-20 15:58:35.333982445 +0200 +++ gcc/testsuite/gcc.target/i386/avx512f-vect-simd-14.c 2019-06-20 15:58:35.332982461 +0200 @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 -fdump-tree-vect-details" } */ +/* { dg-require-effective-target avx512f } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */ + +#include "avx512f-check.h" + +#define main() do_main () + +#include "../../gcc.dg/vect/vect-simd-14.c" + +static void +avx512f_test (void) +{ + do_main (); +} --- gcc/testsuite/gcc.target/i386/avx512bw-vect-simd-15.c.jj 2019-06-20 15:58:35.347982230 +0200 +++ gcc/testsuite/gcc.target/i386/avx512bw-vect-simd-15.c 2019-06-20 15:58:35.346982245 +0200 @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp-simd -mavx512bw -mprefer-vector-width=512 -fdump-tree-vect-details" } */ +/* { dg-require-effective-target avx512bw } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */ + +#include "avx512bw-check.h" + +#define main() do_main () + +#include "../../gcc.dg/vect/vect-simd-15.c" + +static void +avx512bw_test (void) +{ + do_main (); +} --- gcc/testsuite/g++.dg/vect/simd-6.cc.jj 2019-06-20 16:00:34.800142524 +0200 +++ gcc/testsuite/g++.dg/vect/simd-6.cc 2019-06-20 16:07:41.722559826 +0200 @@ -0,0 +1,161 @@ +// { dg-require-effective-target size32plus } +// { dg-additional-options "-fopenmp-simd" } +// { dg-additional-options "-mavx" { target avx_runtime } } +// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { xfail *-*-* } } } + +#include "../../gcc.dg/vect/tree-vect.h" + +template <typename T> +struct S { + inline S (); + inline ~S (); + inline S (const S &); + inline S & operator= (const S &); + T s; +}; + +template <typename T> +S<T>::S () : s (0) +{ +} + +template <typename T> +S<T>::~S () +{ +} + +template <typename T> +S<T>::S (const S &x) +{ + s = x.s; +} + +template <typename T> +S<T> & +S<T>::operator= (const S &x) +{ + s = x.s; + return *this; +} + +template <typename T> +static inline void +ini (S<T> &x) +{ + x.s = 0; +} + +S<int> r, a[1024], b[1024]; + +#pragma omp declare reduction (+: S<int>: omp_out.s += omp_in.s) +#pragma omp declare reduction (plus: S<int>: omp_out.s += omp_in.s) initializer (ini (omp_priv)) + +template <typename T> +__attribute__((noipa)) void +foo (S<T> *a, S<T> *b) +{ + #pragma omp simd reduction (inscan, +:r) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r.s += a[i].s; + } +} + +template <typename T> +__attribute__((noipa)) S<T> +bar (void) +{ + S<T> s; + #pragma omp simd reduction (inscan, plus:s) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s.s += 2 * a[i].s; + } + return S<T> (s); +} + +__attribute__((noipa)) void +baz (S<int> *a, S<int> *b) +{ + #pragma omp simd reduction (inscan, +:r) simdlen(1) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r.s += a[i].s; + } +} + +__attribute__((noipa)) S<int> +qux (void) +{ + S<int> s; + #pragma omp simd if (0) reduction (inscan, plus:s) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s.s += 2 * a[i].s; + } + return S<int> (s); +} + +int +main () +{ + S<int> s; + check_vect (); + for (int i = 0; i < 1024; ++i) + { + a[i].s = i; + b[i].s = -1; + asm ("" : "+g" (i)); + } + foo (a, b); + if (r.s != 1024 * 1023 / 2) + abort (); + for (int i = 0; i < 1024; ++i) + { + if (b[i].s != s.s) + abort (); + else + b[i].s = 25; + s.s += i; + } + if (bar<int> ().s != 1024 * 1023) + abort (); + s.s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i].s != s.s) + abort (); + s.s += 2 * i; + } + r.s = 0; + baz (a, b); + if (r.s != 1024 * 1023 / 2) + abort (); + s.s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i].s != s.s) + abort (); + else + b[i].s = 25; + s.s += i; + } + if (qux ().s != 1024 * 1023) + abort (); + s.s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i].s != s.s) + abort (); + s.s += 2 * i; + } + return 0; +} --- gcc/testsuite/g++.dg/vect/simd-7.cc.jj 2019-06-20 16:00:51.095891542 +0200 +++ gcc/testsuite/g++.dg/vect/simd-7.cc 2019-06-20 16:12:50.222747875 +0200 @@ -0,0 +1,124 @@ +// { dg-require-effective-target size32plus } +// { dg-additional-options "-fopenmp-simd" } +// { dg-additional-options "-mavx" { target avx_runtime } } +// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */ + +#include "../../gcc.dg/vect/tree-vect.h" + +int r, a[1024], b[1024], q; + +template <typename T, typename U> +__attribute__((noipa)) void +foo (T a, T b, U r) +{ + #pragma omp simd reduction (inscan, +:r) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r += a[i]; + } +} + +template <typename T> +__attribute__((noipa)) T +bar (void) +{ + T &s = q; + q = 0; + #pragma omp simd reduction (inscan, +:s) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s += 2 * a[i]; + } + return s; +} + +template <typename T> +__attribute__((noipa)) void +baz (T *a, T *b, T &r) +{ + #pragma omp simd reduction (inscan, +:r) if (simd: 0) + for (T i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r += a[i]; + } +} + +template <typename T> +__attribute__((noipa)) int +qux (void) +{ + T s = q; + q = 0; + #pragma omp simd reduction (inscan, +:s) simdlen (1) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s += 2 * a[i]; + } + return s; +} + +int +main () +{ + int s = 0; + check_vect (); + for (int i = 0; i < 1024; ++i) + { + a[i] = i; + b[i] = -1; + asm ("" : "+g" (i)); + } + foo<int *, int &> (a, b, r); + if (r != 1024 * 1023 / 2) + abort (); + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = 25; + s += i; + } + if (bar<int> () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = -1; + s += 2 * i; + } + r = 0; + baz<int> (a, b, r); + if (r != 1024 * 1023 / 2) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = -25; + s += i; + } + if (qux<int &> () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + s += 2 * i; + } + return 0; +} --- gcc/testsuite/g++.dg/vect/simd-8.cc.jj 2019-06-20 16:00:54.154844430 +0200 +++ gcc/testsuite/g++.dg/vect/simd-8.cc 2019-06-20 16:15:37.994133891 +0200 @@ -0,0 +1,122 @@ +// { dg-require-effective-target size32plus } +// { dg-additional-options "-fopenmp-simd" } +// { dg-additional-options "-mavx" { target avx_runtime } } +// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } + +#include "../../gcc.dg/vect/tree-vect.h" + +int r, a[1024], b[1024], q; + +#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0) + +__attribute__((noipa)) void +foo (int *a, int *b, int &r) +{ + #pragma omp simd reduction (inscan, foo:r) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r += a[i]; + } +} + +__attribute__((noipa)) int +bar (void) +{ + int &s = q; + q = 0; + #pragma omp simd reduction (inscan, foo:s) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s += 2 * a[i]; + } + return s; +} + +__attribute__((noipa)) void +baz (int *a, int *b, int &r) +{ + #pragma omp simd reduction (inscan, foo:r) if (simd: 0) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r += a[i]; + } +} + +__attribute__((noipa)) int +qux (void) +{ + int &s = q; + q = 0; + #pragma omp simd reduction (inscan, foo:s) simdlen (1) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s += 2 * a[i]; + } + return s; +} + +int +main () +{ + int s = 0; + check_vect (); + for (int i = 0; i < 1024; ++i) + { + a[i] = i; + b[i] = -1; + asm ("" : "+g" (i)); + } + foo (a, b, r); + if (r != 1024 * 1023 / 2) + abort (); + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = 25; + s += i; + } + if (bar () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = -1; + s += 2 * i; + } + r = 0; + baz (a, b, r); + if (r != 1024 * 1023 / 2) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = -25; + s += i; + } + if (qux () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + s += 2 * i; + } + return 0; +} --- gcc/testsuite/g++.dg/vect/simd-9.cc.jj 2019-06-20 16:00:57.197797566 +0200 +++ gcc/testsuite/g++.dg/vect/simd-9.cc 2019-06-20 16:17:27.484427949 +0200 @@ -0,0 +1,153 @@ +// { dg-require-effective-target size32plus } +// { dg-additional-options "-fopenmp-simd" } +// { dg-additional-options "-mavx" { target avx_runtime } } +// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { xfail *-*-* } } } + +#include "../../gcc.dg/vect/tree-vect.h" + +struct S { + inline S (); + inline ~S (); + inline S (const S &); + inline S & operator= (const S &); + int s; +}; + +S::S () : s (0) +{ +} + +S::~S () +{ +} + +S::S (const S &x) +{ + s = x.s; +} + +S & +S::operator= (const S &x) +{ + s = x.s; + return *this; +} + +static inline void +ini (S &x) +{ + x.s = 0; +} + +S r, a[1024], b[1024]; + +#pragma omp declare reduction (+: S: omp_out.s += omp_in.s) +#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer (ini (omp_priv)) + +__attribute__((noipa)) void +foo (S *a, S *b, S &r) +{ + #pragma omp simd reduction (inscan, +:r) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r.s += a[i].s; + } +} + +__attribute__((noipa)) S +bar (void) +{ + S s; + #pragma omp simd reduction (inscan, plus:s) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s.s += 2 * a[i].s; + } + return s; +} + +__attribute__((noipa)) void +baz (S *a, S *b, S &r) +{ + #pragma omp simd reduction (inscan, +:r) simdlen(1) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r.s += a[i].s; + } +} + +__attribute__((noipa)) S +qux (void) +{ + S s; + #pragma omp simd if (0) reduction (inscan, plus:s) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s.s += 2 * a[i].s; + } + return s; +} + +int +main () +{ + S s; + check_vect (); + for (int i = 0; i < 1024; ++i) + { + a[i].s = i; + b[i].s = -1; + asm ("" : "+g" (i)); + } + foo (a, b, r); + if (r.s != 1024 * 1023 / 2) + abort (); + for (int i = 0; i < 1024; ++i) + { + if (b[i].s != s.s) + abort (); + else + b[i].s = 25; + s.s += i; + } + if (bar ().s != 1024 * 1023) + abort (); + s.s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i].s != s.s) + abort (); + s.s += 2 * i; + } + r.s = 0; + baz (a, b, r); + if (r.s != 1024 * 1023 / 2) + abort (); + s.s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i].s != s.s) + abort (); + else + b[i].s = 25; + s.s += i; + } + if (qux ().s != 1024 * 1023) + abort (); + s.s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i].s != s.s) + abort (); + s.s += 2 * i; + } + return 0; +} --- gcc/testsuite/c-c++-common/gomp/scan-2.c.jj 2019-06-10 14:18:17.461525669 +0200 +++ gcc/testsuite/c-c++-common/gomp/scan-2.c 2019-06-20 23:54:03.615422149 +0200 @@ -8,7 +8,7 @@ f1 (int *c, int *d) for (i = 0; i < 64; i++) { d[i] = a; - #pragma omp scan exclusive (a) /* { dg-message "sorry, unimplemented: '#pragma omp scan' not supported yet" } */ + #pragma omp scan exclusive (a) a += c[i]; } } Jakub