On 01/10/2019 12:07, Joel wrote: > > SLP vectorization: vectorize vector constructors > > > Currently SLP vectorization can build SLP trees staring from > reductions or from group stores. This patch adds a third starting > point: vector constructors. > > > For the following test case (compiled with -O3 -fno-vect-cost-model): > > > char g_d[1024], g_s1[1024], g_s2[1024]; void test_loop(void) { char /d > = g_d, /s1 = g_s1, *s2 = g_s2; > > > for ( int y = 0; y < 128; y++ ) > { > for ( int x = 0; x < 16; x++ ) > d[x] = s1[x] + s2[x]; > d += 16; > } > > } > > > before patch: test_loop: .LFB0: .cfi_startproc adrp x0, g_s1 adrp x2, > g_s2 add x3, x0, :lo12:g_s1 add x4, x2, :lo12:g_s2 ldrb w7, [x2, > #:lo12:g_s2] ldrb w1, [x0, #:lo12:g_s1] adrp x0, g_d ldrb w6, [x4, 1] > add x0, x0, :lo12:g_d ldrb w5, [x3, 1] add w1, w1, w7 fmov s0, w1 ldrb > w7, [x4, 2] add w5, w5, w6 ldrb w1, [x3, 2] ldrb w6, [x4, 3] add x2, > x0, 2048 ins v0.b[1], w5 add w1, w1, w7 ldrb w7, [x3, 3] ldrb w5, [x4, > 4] add w7, w7, w6 ldrb w6, [x3, 4] ins v0.b[2], w1 ldrb w8, [x4, 5] > add w6, w6, w5 ldrb w5, [x3, 5] ldrb w9, [x4, 6] add w5, w5, w8 ldrb > w1, [x3, 6] ins v0.b[3], w7 ldrb w8, [x4, 7] add w1, w1, w9 ldrb w11, > [x3, 7] ldrb w7, [x4, 8] add w11, w11, w8 ldrb w10, [x3, 8] ins > v0.b[4], w6 ldrb w8, [x4, 9] add w10, w10, w7 ldrb w9, [x3, 9] ldrb > w7, [x4, 10] add w9, w9, w8 ldrb w8, [x3, 10] ins v0.b[5], w5 ldrb w6, > [x4, 11] add w8, w8, w7 ldrb w7, [x3, 11] ldrb w5, [x4, 12] add w7, > w7, w6 ldrb w6, [x3, 12] ins v0.b[6], w1 ldrb w12, [x4, 13] add w6, > w6, w5 ldrb w5, [x3, 13] ldrb w1, [x3, 14] add w5, w5, w12 ldrb w13, > [x4, 14] ins v0.b[7], w11 ldrb w12, [x4, 15] add w4, w1, w13 ldrb w1, > [x3, 15] add w1, w1, w12 ins v0.b[8], w10 ins v0.b[9], w9 ins > v0.b[10], w8 ins v0.b[11], w7 ins v0.b[12], w6 ins v0.b[13], w5 ins > v0.b[14], w4 ins v0.b[15], w1 .p2align 3,,7 .L2: str q0, [x0], 16 cmp > x2, x0 bne .L2 ret .cfi_endproc .LFE0: > > > After patch: > > > test_loop: .LFB0: .cfi_startproc adrp x3, g_s1 adrp x2, g_s2 add x3, > x3, :lo12:g_s1 add x2, x2, :lo12:g_s2 adrp x0, g_d add x0, x0, > :lo12:g_d add x1, x0, 2048 ldr q1, [x2] ldr q0, [x3] add v0.16b, > v0.16b, v1.16b .p2align 3,,7 .L2: str q0, [x0], 16 cmp x0, x1 bne .L2 > ret .cfi_endproc .LFE0: > > > > > bootstrapped and tested on aarch64-none-linux-gnu > Patch attached:
From 7b9e6d02017ffe6f7ab17cbdd48da41ccc5f6db0 Mon Sep 17 00:00:00 2001 From: Joel Hutton <joel.hut...@arm.com> Date: Fri, 27 Sep 2019 10:26:00 +0100 Subject: [PATCH] SLP vectorization: vectorize vector constructors
--- gcc/tree-vect-slp.c | 98 ++++++++++++++++++++++++++++++++++++------- gcc/tree-vect-stmts.c | 20 +++++++++ 2 files changed, 103 insertions(+), 15 deletions(-) diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 9b86b67734ad3e3506e9cee6a532b68decf24ae6..4c715ebe34dbdb8072e15dc9053f53a1949a070d 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -1923,6 +1923,8 @@ vect_analyze_slp_instance (vec_info *vinfo, struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); vec<stmt_vec_info> scalar_stmts; + bool constructor = false; + if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) { scalar_type = TREE_TYPE (DR_REF (dr)); @@ -1935,6 +1937,17 @@ vect_analyze_slp_instance (vec_info *vinfo, vectype = STMT_VINFO_VECTYPE (stmt_info); group_size = REDUC_GROUP_SIZE (stmt_info); } + else if (is_gimple_assign (stmt_info->stmt) + && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt))) + == VECTOR_TYPE + && gimple_assign_rhs_code (stmt_info->stmt) == CONSTRUCTOR) + { + vectype = TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)); + group_size = CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt_info->stmt)); + constructor = true; + if (TREE_CODE (vectype) != VECTOR_TYPE) + vectype = NULL; + } else { gcc_assert (is_a <loop_vec_info> (vinfo)); @@ -1981,6 +1994,32 @@ vect_analyze_slp_instance (vec_info *vinfo, STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ())); } + else if (constructor) + { + tree rhs = gimple_assign_rhs1 (stmt_info->stmt); + tree val; + FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs), i, val) + { + tree prev_val; + int j; + FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs), j, prev_val) + { + if (val == prev_val && i!=j) + return false; + } + if (TREE_CODE (val) == SSA_NAME) + { + gimple* def = SSA_NAME_DEF_STMT (val); + stmt_vec_info def_info = vinfo->lookup_stmt (def); + /* Value is defined in another basic block. */ + if (!def_info) + return false; + scalar_stmts.safe_push (def_info); + } + else + return false; + } + } else { /* Collect reduction statements. */ @@ -2227,6 +2266,50 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size) max_tree_size); } + /* Find SLP sequences starting from a vector constructor. */ + gimple_stmt_iterator gsi; + if (is_a <bb_vec_info> (vinfo)) + { + bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo); + + for (gsi = bb_vinfo->region_begin; + gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + bool vectorizable = true; + if ( is_gimple_assign (stmt) + && gimple_assign_rhs_code (stmt) == CONSTRUCTOR + && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE) + { + tree rhs = gimple_assign_rhs1 (stmt); + tree val; + int i; + /* Check that the constructor elements are unique. */ + FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs), i, val) + { + tree prev_val; + int j; + FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs), j, prev_val) + { + if (val == prev_val && i!=j) + { + vectorizable = false; + break; + } + } + } + + if (vectorizable) + { + first_element = bb_vinfo->lookup_stmt (stmt); + vect_analyze_slp_instance (vinfo, first_element, + max_tree_size); + } + } + } + } + + return opt_result::success (); } @@ -2908,21 +2991,6 @@ vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin, return NULL; } - /* If there are no grouped stores in the region there is no need - to continue with pattern recog as vect_analyze_slp will fail - anyway. */ - if (bb_vinfo->grouped_stores.is_empty ()) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "not vectorized: no grouped stores in " - "basic block.\n"); - - delete bb_vinfo; - return NULL; - } - - /* While the rest of the analysis below depends on it in some way. */ fatal = false; vect_pattern_recog (bb_vinfo); diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index b1e97f85d96b352e9e52bbb7c265dda6e1d0f3ad..9e1a762f922d069da4e5bcc3cb05df2bc3d1acff 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -6237,6 +6237,26 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, gimple_assign_set_lhs (new_stmt, new_temp); new_stmt_info = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); + + /* For vector constructors, the same SSA name must be used, so that + data flow into other basic blocks is maintained. */ + imm_use_iterator use_iter; + gimple *use_stmt; + tree lhs = gimple_get_lhs (stmt_info->stmt); + FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs) + { + if (is_gimple_assign (use_stmt) + && gimple_assign_rhs_code (use_stmt) == CONSTRUCTOR + && use_stmt->bb == stmt_info->stmt->bb) + { + gassign *rstmt + = gimple_build_assign (gimple_get_lhs (use_stmt), + gimple_get_lhs (new_stmt)); + gimple_stmt_iterator rgsi = gsi_for_stmt (use_stmt); + gsi_replace (&rgsi, rstmt, true); + } + } + if (vec_cvt_dest) { new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp); -- 2.17.1