On Tue, 23 Feb 2021, Tamar Christina wrote:
> Hi Richi,
>
> The attached testcase ICEs due to a couple of issues.
> In the testcase you have two SLP instances that share the majority of their
> definition with each other. One tree defines a COMPLEX_MUL sequence and the
> other tree a COMPLEX_FMA.
>
> The ice happens because:
>
> 1. the refcounts are wrong, in particular the FMA case doesn't correctly count
> the references for the COMPLEX_MUL that it consumes.
>
> 2. when the FMA is created it incorrectly assumes it can just tear apart the
> MUL
> node that it's consuming. This is wrong and should only be done when there is
> no more uses of the node, in which case the vector only pattern is no longer
> relevant.
>
> To fix the last part the SLP only pattern reset code was moved into
> vect_free_slp_tree which results in cleaner code. I also think it does belong
> there since that function knows when there are no more uses of the node and so
> the pattern should be unmarked, so when the the vectorizer is inspecting the
> BB
> it doesn't find the now invalid vector only patterns.
>
> The patch also clears the SLP_TREE_REPRESENTATIVE when stores are removed such
> that we don't hit an error later trying to free the stmt_vec_info again.
>
> Lastly it also tweaks the results of whether a pattern has been detected or
> not
> to return true when another SLP instance has created a pattern that is only
> used
> by a different instance (due to the trees being unshared).
>
> Instead of ICEing this code now produces
>
> adrp x1, .LANCHOR0
> add x2, x1, :lo12:.LANCHOR0
> movi v1.2s, 0
> mov w0, 0
> ldr x4, [x1, #:lo12:.LANCHOR0]
> ldrsw x3, [x2, 16]
> ldr x1, [x2, 8]
> ldrsw x2, [x2, 20]
> ldr d0, [x4]
> ldr d2, [x1, x3, lsl 3]
> fcmla v2.2s, v0.2s, v0.2s, #0
> fcmla v2.2s, v0.2s, v0.2s, #90
> str d2, [x1, x3, lsl 3]
> fcmla v1.2s, v0.2s, v0.2s, #0
> fcmla v1.2s, v0.2s, v0.2s, #90
> str d1, [x1, x2, lsl 3]
> ret
>
> PS. This testcase actually shows that the codegen we get in these cases is not
> optimal. It should generate a MUL + ADD instead MUL + FMA.
>
> But that's for GCC 12.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
OK.
Thanks,
Richard.
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> PR tree-optimization/99149
> * tree-vect-slp-patterns.c (vect_detect_pair_op): Don't recreate the
> buffer.
> (vect_slp_reset_pattern): Remove.
> (complex_fma_pattern::matches): Remove call to vect_slp_reset_pattern.
> (complex_mul_pattern::build, complex_fma_pattern::build,
> complex_fms_pattern::build): Fix ref counts.
> * tree-vect-slp.c (vect_free_slp_tree): Undo SLP only pattern relevancy
> when node is being deleted.
> (vect_match_slp_patterns_2): Correct result of cache hit on patterns.
> (vect_schedule_slp): Invalidate SLP_TREE_REPRESENTATIVE of removed
> stores.
> * tree-vectorizer.c (vec_info::new_stmt_vec_info): Initialize value.
>
> gcc/testsuite/ChangeLog:
>
> PR tree-optimization/99149
> * g++.dg/vect/pr99149.cc: New test.
>
> --- inline copy of patch --
> diff --git a/gcc/testsuite/g++.dg/vect/pr99149.cc
> b/gcc/testsuite/g++.dg/vect/pr99149.cc
> new file mode 100755
> index
> 0000000000000000000000000000000000000000..902a26f576fcc79d2802bec093668674cca1c84f
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/vect/pr99149.cc
> @@ -0,0 +1,28 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-w -O3 -march=armv8.3-a -fdump-tree-slp-all" {
> target { aarch64*-*-* } } } */
> +
> +class a {
> + float b;
> + float c;
> +
> +public:
> + a(float d, float e) : b(d), c(e) {}
> + a operator+(a d) { return a(b + d.b, c + d.c); }
> + a operator*(a d) { return a(b * b - c * c, b * c + c * d.b); }
> +};
> +int f, g;
> +class {
> + a *h;
> + a *i;
> +
> +public:
> + void j() {
> + a k = h[0], l = i[g], m = k * i[f];
> + i[g] = l + m;
> + i[f] = m;
> + }
> +} n;
> +main() { n.j(); }
> +
> +/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_MUL" 1 "slp2" } } */
> +/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_FMA" 1 "slp2" } } */
> diff --git a/gcc/tree-vect-slp-patterns.c b/gcc/tree-vect-slp-patterns.c
> index
> f0817da9f622d22e3df2e30410d1cf610b4ffa1d..1e2769662a54229ab8e24390f97dfe206f17ab57
> 100644
> --- a/gcc/tree-vect-slp-patterns.c
> +++ b/gcc/tree-vect-slp-patterns.c
> @@ -407,9 +407,8 @@ vect_detect_pair_op (slp_tree node1, slp_tree node2,
> lane_permutation_t &lanes,
>
> if (result != CMPLX_NONE && ops != NULL)
> {
> - ops->create (2);
> - ops->quick_push (node1);
> - ops->quick_push (node2);
> + ops->safe_push (node1);
> + ops->safe_push (node2);
> }
> return result;
> }
> @@ -1090,15 +1089,17 @@ complex_mul_pattern::build (vec_info *vinfo)
> {
> slp_tree node;
> unsigned i;
> + slp_tree newnode
> + = vect_build_combine_node (this->m_ops[0], this->m_ops[1],
> *this->m_node);
> + SLP_TREE_REF_COUNT (this->m_ops[2])++;
> +
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
> vect_free_slp_tree (node);
>
> /* First re-arrange the children. */
> SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2);
> SLP_TREE_CHILDREN (*this->m_node)[0] = this->m_ops[2];
> - SLP_TREE_CHILDREN (*this->m_node)[1] =
> - vect_build_combine_node (this->m_ops[0], this->m_ops[1], *this->m_node);
> - SLP_TREE_REF_COUNT (this->m_ops[2])++;
> + SLP_TREE_CHILDREN (*this->m_node)[1] = newnode;
>
> /* And then rewrite the node itself. */
> complex_pattern::build (vinfo);
> @@ -1133,18 +1134,6 @@ class complex_fma_pattern : public complex_pattern
> }
> };
>
> -/* Helper function to "reset" a previously matched node and undo the changes
> - made enough so that the node is treated as an irrelevant node. */
> -
> -static inline void
> -vect_slp_reset_pattern (slp_tree node)
> -{
> - stmt_vec_info stmt_info = vect_orig_stmt (SLP_TREE_REPRESENTATIVE (node));
> - STMT_VINFO_IN_PATTERN_P (stmt_info) = false;
> - STMT_SLP_TYPE (stmt_info) = pure_slp;
> - SLP_TREE_REPRESENTATIVE (node) = stmt_info;
> -}
> -
> /* Pattern matcher for trying to match complex multiply and accumulate
> and multiply and subtract patterns in SLP tree.
> If the operation matches then IFN is set to the operation it matched and
> @@ -1208,15 +1197,6 @@ complex_fma_pattern::matches (complex_operation_t op,
> if (!vect_pattern_validate_optab (ifn, vnode))
> return IFN_LAST;
>
> - /* FMA matched ADD + CMUL. During the matching of CMUL the
> - stmt that starts the pattern is marked as being in a pattern,
> - namely the CMUL. When replacing this with a CFMA we have to
> - unmark this statement as being in a pattern. This is because
> - vect_mark_pattern_stmts will only mark the current stmt as being
> - in a pattern. Later on when the scalar stmts are examined the
> - old statement which is supposed to be irrelevant will point to
> - CMUL unless we undo the pattern relationship here. */
> - vect_slp_reset_pattern (node);
> ops->truncate (0);
> ops->create (3);
>
> @@ -1259,10 +1239,17 @@ complex_fma_pattern::recognize
> (slp_tree_to_load_perm_map_t *perm_cache,
> void
> complex_fma_pattern::build (vec_info *vinfo)
> {
> + slp_tree node = SLP_TREE_CHILDREN (*this->m_node)[1];
> +
> SLP_TREE_CHILDREN (*this->m_node).release ();
> SLP_TREE_CHILDREN (*this->m_node).create (3);
> SLP_TREE_CHILDREN (*this->m_node).safe_splice (this->m_ops);
>
> + SLP_TREE_REF_COUNT (this->m_ops[1])++;
> + SLP_TREE_REF_COUNT (this->m_ops[2])++;
> +
> + vect_free_slp_tree (node);
> +
> complex_pattern::build (vinfo);
> }
>
> @@ -1427,6 +1414,11 @@ complex_fms_pattern::build (vec_info *vinfo)
> {
> slp_tree node;
> unsigned i;
> + slp_tree newnode =
> + vect_build_combine_node (this->m_ops[2], this->m_ops[3], *this->m_node);
> + SLP_TREE_REF_COUNT (this->m_ops[0])++;
> + SLP_TREE_REF_COUNT (this->m_ops[1])++;
> +
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
> vect_free_slp_tree (node);
>
> @@ -1436,10 +1428,7 @@ complex_fms_pattern::build (vec_info *vinfo)
> /* First re-arrange the children. */
> SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[0]);
> SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[1]);
> - SLP_TREE_CHILDREN (*this->m_node).quick_push (
> - vect_build_combine_node (this->m_ops[2], this->m_ops[3], *this->m_node));
> - SLP_TREE_REF_COUNT (this->m_ops[0])++;
> - SLP_TREE_REF_COUNT (this->m_ops[1])++;
> + SLP_TREE_CHILDREN (*this->m_node).quick_push (newnode);
>
> /* And then rewrite the node itself. */
> complex_pattern::build (vinfo);
> diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
> index
> ea8a97b01c6371791ac66de3e1dabfedee69cb67..605873714a5cafaaf822f61f1f769f96b3876694
> 100644
> --- a/gcc/tree-vect-slp.c
> +++ b/gcc/tree-vect-slp.c
> @@ -146,6 +146,16 @@ vect_free_slp_tree (slp_tree node)
> if (child)
> vect_free_slp_tree (child);
>
> + /* If the node defines any SLP only patterns then those patterns are no
> + longer valid and should be removed. */
> + stmt_vec_info rep_stmt_info = SLP_TREE_REPRESENTATIVE (node);
> + if (rep_stmt_info && STMT_VINFO_SLP_VECT_ONLY_PATTERN (rep_stmt_info))
> + {
> + stmt_vec_info stmt_info = vect_orig_stmt (rep_stmt_info);
> + STMT_VINFO_IN_PATTERN_P (stmt_info) = false;
> + STMT_SLP_TYPE (stmt_info) = STMT_SLP_TYPE (rep_stmt_info);
> + }
> +
> delete node;
> }
>
> @@ -2395,7 +2405,9 @@ vect_match_slp_patterns_2 (slp_tree *ref_node, vec_info
> *vinfo,
> slp_tree node = *ref_node;
> bool found_p = false;
> if (!node || visited->add (node))
> - return false;
> + return node
> + && SLP_TREE_REPRESENTATIVE (node)
> + && STMT_VINFO_SLP_VECT_ONLY_PATTERN (SLP_TREE_REPRESENTATIVE (node));
>
> slp_tree child;
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> @@ -6451,6 +6463,11 @@ vect_schedule_slp (vec_info *vinfo, vec<slp_instance>
> slp_instances)
> store_info = vect_orig_stmt (store_info);
> /* Free the attached stmt_vec_info and remove the stmt. */
> vinfo->remove_stmt (store_info);
> +
> + /* Invalidate SLP_TREE_REPRESENTATIVE in case we released it
> + to not crash in vect_free_slp_tree later. */
> + if (SLP_TREE_REPRESENTATIVE (root) == store_info)
> + SLP_TREE_REPRESENTATIVE (root) = NULL;
> }
> }
> }
> diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
> index
> 5b45df3a4e00266b7530eb4da6985f0d940cb05b..63ba594f2276850a00fc372072d98326891f19e6
> 100644
> --- a/gcc/tree-vectorizer.c
> +++ b/gcc/tree-vectorizer.c
> @@ -695,6 +695,7 @@ vec_info::new_stmt_vec_info (gimple *stmt)
> STMT_VINFO_REDUC_FN (res) = IFN_LAST;
> STMT_VINFO_REDUC_IDX (res) = -1;
> STMT_VINFO_SLP_VECT_ONLY (res) = false;
> + STMT_VINFO_SLP_VECT_ONLY_PATTERN (res) = false;
> STMT_VINFO_VEC_STMTS (res) = vNULL;
>
> if (is_a <loop_vec_info> (this)
>
>
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)