On Fri, 19 Mar 2021, Tamar Christina wrote: > Hi Richi, > > The attach testcase ICEs because as you showed on the PR we have one child > which is an internal with a PERM of EVENEVEN and one with TOP. > > The problem is while we can conceptually merge the permute itself into > EVENEVEN, > merging the lanes don't really make sense. > > That said, we no longer even require the merged lanes as we create the > permutes > based on the KIND directly. > > This patch just removes all of that code. > > Unfortunately it still won't vectorize with the cost model enabled due to the > blend that's created combining the load and the external > > note: node 0x51f2ce8 (max_nunits=1, refcnt=1) > note: op: VEC_PERM_EXPR > note: { } > note: lane permutation { 0[0] 1[1] } > note: children 0x51f23e0 0x51f2578 > note: node 0x51f23e0 (max_nunits=2, refcnt=1) > note: op template: _16 = REALPART_EXPR <*t1_9(D)>; > note: stmt 0 _16 = REALPART_EXPR <*t1_9(D)>; > note: stmt 1 _16 = REALPART_EXPR <*t1_9(D)>; > note: load permutation { 0 0 } > note: node (external) 0x51f2578 (max_nunits=1, refcnt=1) > note: { _18, _18 } > > which costs the cost for the load-and-split and the cost of the external > splat, > and the one for blending them while in reality it's just a scalar load and > insert. > > The compiler (with the cost model disabled) generates > > ldr q1, [x19] > dup v1.2d, v1.d[0] > ldr d0, [x19, 8] > fneg d0, d0 > ins v1.d[1], v0.d[0] > > while really it should be > > ldp d1, d0, [x19] > fneg d0, d0 > ins v1.d[1], v0.d[0] > > but that's for another time. > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for master?
OK. Thanks, Richard. > Thanks, > Tamar > > gcc/ChangeLog: > > PR tree-optimization/99656 > * tree-vect-slp-patterns.c (linear_loads_p, > complex_add_pattern::matches, is_eq_or_top, > vect_validate_multiplication, complex_mul_pattern::matches, > complex_fms_pattern::matches): Remove complex_perm_kinds_t. > * tree-vectorizer.h: (complex_load_perm_t): Removed. > (slp_tree_to_load_perm_map_t): Use complex_perm_kinds_t instead of > complex_load_perm_t. > > gcc/testsuite/ChangeLog: > > PR tree-optimization/99656 > * gfortran.dg/vect/pr99656.f90: New test. > > --- inline copy of patch -- > diff --git a/gcc/testsuite/gfortran.dg/vect/pr99656.f90 > b/gcc/testsuite/gfortran.dg/vect/pr99656.f90 > new file mode 100644 > index > 0000000000000000000000000000000000000000..59a28ee19e8f352d534e51558a7fce5c4d78100e > --- /dev/null > +++ b/gcc/testsuite/gfortran.dg/vect/pr99656.f90 > @@ -0,0 +1,24 @@ > +! { dg-do compile { target { aarch64*-*-* } } } > +! { dg-require-effective-target le } > +! { dg-additional-options "-march=armv8.3-a -O1 -ftree-slp-vectorize" } > + > +SUBROUTINE ZLAHQR2(H, LDH, H22, T1) > + > + INTEGER LDH > + COMPLEX*16 H(LDH, *) > + > + INTEGER NR > + COMPLEX*16 H22, SUM, T1, V2 > + > + COMPLEX*16 V( 3 ) > + > + EXTERNAL ZLARFG > + INTRINSIC DCONJG > + > + V2 = H22 > + CALL ZLARFG(T1) > + SUM = DCONJG(T1) * H(1, 1) > + H(1, 1) = SUM * V2 > + > + RETURN > +END > diff --git a/gcc/tree-vect-slp-patterns.c b/gcc/tree-vect-slp-patterns.c > index > 1e2769662a54229ab8e24390f97dfe206f17ab57..85f2d03754d3ed87e4e34befdca417f2dd4ea21d > 100644 > --- a/gcc/tree-vect-slp-patterns.c > +++ b/gcc/tree-vect-slp-patterns.c > @@ -203,68 +203,49 @@ vect_merge_perms (complex_perm_kinds_t a, > complex_perm_kinds_t b) > /* Check to see if all loads rooted in ROOT are linear. Linearity is > defined as having no gaps between values loaded. */ > > -static complex_load_perm_t > +static complex_perm_kinds_t > linear_loads_p (slp_tree_to_load_perm_map_t *perm_cache, slp_tree root) > { > if (!root) > - return std::make_pair (PERM_UNKNOWN, vNULL); > + return PERM_UNKNOWN; > > unsigned i; > - complex_load_perm_t *tmp; > + complex_perm_kinds_t *tmp; > > if ((tmp = perm_cache->get (root)) != NULL) > return *tmp; > > - complex_load_perm_t retval = std::make_pair (PERM_UNKNOWN, vNULL); > + complex_perm_kinds_t retval = PERM_UNKNOWN; > perm_cache->put (root, retval); > > /* If it's a load node, then just read the load permute. */ > if (SLP_TREE_LOAD_PERMUTATION (root).exists ()) > { > - retval.first = is_linear_load_p (SLP_TREE_LOAD_PERMUTATION (root)); > - retval.second = SLP_TREE_LOAD_PERMUTATION (root); > + retval = is_linear_load_p (SLP_TREE_LOAD_PERMUTATION (root)); > perm_cache->put (root, retval); > return retval; > } > else if (SLP_TREE_DEF_TYPE (root) != vect_internal_def) > { > - retval.first = PERM_TOP; > + retval = PERM_TOP; > perm_cache->put (root, retval); > return retval; > } > > - auto_vec<load_permutation_t> all_loads; > complex_perm_kinds_t kind = PERM_TOP; > > slp_tree child; > FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i, child) > { > - complex_load_perm_t res = linear_loads_p (perm_cache, child); > - kind = vect_merge_perms (kind, res.first); > + complex_perm_kinds_t res = linear_loads_p (perm_cache, child); > + kind = vect_merge_perms (kind, res); > /* Unknown and Top are not valid on blends as they produce no permute. > */ > - retval.first = kind; > + retval = kind; > if (kind == PERM_UNKNOWN || kind == PERM_TOP) > return retval; > - all_loads.safe_push (res.second); > } > > - if (SLP_TREE_LANE_PERMUTATION (root).exists ()) > - { > - lane_permutation_t perm = SLP_TREE_LANE_PERMUTATION (root); > - load_permutation_t nloads; > - nloads.create (SLP_TREE_LANES (root)); > - nloads.quick_grow (SLP_TREE_LANES (root)); > - for (i = 0; i < SLP_TREE_LANES (root); i++) > - nloads[i] = all_loads[perm[i].first][perm[i].second]; > - > - retval.first = kind; > - retval.second = nloads; > - } > - else > - { > - retval.first = kind; > - retval.second = all_loads[0]; > - } > + retval = kind; > > perm_cache->put (root, retval); > return retval; > @@ -704,11 +685,11 @@ complex_add_pattern::matches (complex_operation_t op, > vec<slp_tree> children = SLP_TREE_CHILDREN ((*ops)[0]); > > /* First node must be unpermuted. */ > - if (linear_loads_p (perm_cache, children[0]).first != PERM_EVENODD) > + if (linear_loads_p (perm_cache, children[0]) != PERM_EVENODD) > return IFN_LAST; > > /* Second node must be permuted. */ > - if (linear_loads_p (perm_cache, children[1]).first != PERM_ODDEVEN) > + if (linear_loads_p (perm_cache, children[1]) != PERM_ODDEVEN) > return IFN_LAST; > > if (!vect_pattern_validate_optab (ifn, *node)) > @@ -795,9 +776,9 @@ vect_normalize_conj_loc (vec<slp_tree> args, bool > *neg_first_p = NULL) > /* Helper function to check if PERM is KIND or PERM_TOP. */ > > static inline bool > -is_eq_or_top (complex_load_perm_t perm, complex_perm_kinds_t kind) > +is_eq_or_top (complex_perm_kinds_t perm, complex_perm_kinds_t kind) > { > - return perm.first == kind || perm.first == PERM_TOP; > + return perm == kind || perm == PERM_TOP; > } > > /* Helper function that checks to see if LEFT_OP and RIGHT_OP are both > MULT_EXPR > @@ -828,7 +809,7 @@ vect_validate_multiplication (slp_tree_to_load_perm_map_t > *perm_cache, > /* Canonicalization for fms is not consistent. So have to test both > variants to be sure. This needs to be fixed in the mid-end so > this part can be simpler. */ > - kind = linear_loads_p (perm_cache, right_op[0]).first; > + kind = linear_loads_p (perm_cache, right_op[0]); > if (!((is_eq_or_top (linear_loads_p (perm_cache, right_op[0]), > PERM_ODDODD) > && is_eq_or_top (linear_loads_p (perm_cache, right_op[1]), > PERM_ODDEVEN)) > @@ -839,7 +820,7 @@ vect_validate_multiplication (slp_tree_to_load_perm_map_t > *perm_cache, > } > else > { > - if (linear_loads_p (perm_cache, right_op[1]).first != PERM_ODDODD > + if (linear_loads_p (perm_cache, right_op[1]) != PERM_ODDODD > && !is_eq_or_top (linear_loads_p (perm_cache, right_op[0]), > PERM_ODDEVEN)) > return false; > @@ -852,15 +833,15 @@ vect_validate_multiplication > (slp_tree_to_load_perm_map_t *perm_cache, > /* Check if the conjugate is on the second first or second operand. The > order of the node with the conjugate value determines this, and the dup > node must be one of lane 0 of the same DR as the neg node. */ > - kind = linear_loads_p (perm_cache, left_op[index1]).first; > + kind = linear_loads_p (perm_cache, left_op[index1]); > if (kind == PERM_TOP) > { > - if (linear_loads_p (perm_cache, left_op[index2]).first == PERM_EVENODD) > + if (linear_loads_p (perm_cache, left_op[index2]) == PERM_EVENODD) > return true; > } > else if (kind == PERM_EVENODD) > { > - if ((kind = linear_loads_p (perm_cache, left_op[index2]).first) == > PERM_EVENODD) > + if ((kind = linear_loads_p (perm_cache, left_op[index2])) == > PERM_EVENODD) > return false; > return true; > } > @@ -1003,7 +984,7 @@ complex_mul_pattern::matches (complex_operation_t op, > left_op.safe_splice (SLP_TREE_CHILDREN (muls[0])); > right_op.safe_splice (SLP_TREE_CHILDREN (muls[1])); > > - if (linear_loads_p (perm_cache, left_op[1]).first == PERM_ODDEVEN) > + if (linear_loads_p (perm_cache, left_op[1]) == PERM_ODDEVEN) > return IFN_LAST; > > bool neg_first = false; > @@ -1035,7 +1016,7 @@ complex_mul_pattern::matches (complex_operation_t op, > ops->truncate (0); > ops->create (3); > > - complex_perm_kinds_t kind = linear_loads_p (perm_cache, left_op[0]).first; > + complex_perm_kinds_t kind = linear_loads_p (perm_cache, left_op[0]); > if (kind == PERM_EVENODD) > { > ops->quick_push (left_op[1]); > @@ -1356,7 +1337,7 @@ complex_fms_pattern::matches (complex_operation_t op, > ops->truncate (0); > ops->create (4); > > - complex_perm_kinds_t kind = linear_loads_p (perm_cache, right_op[0]).first; > + complex_perm_kinds_t kind = linear_loads_p (perm_cache, right_op[0]); > if (kind == PERM_EVENODD) > { > ops->quick_push (child); > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > index > b861c97ab3aef179ba9b2900701cf09e75a847a5..9861d9e88102138c0e2de8dfc34422ff65a0e9e0 > 100644 > --- a/gcc/tree-vectorizer.h > +++ b/gcc/tree-vectorizer.h > @@ -2059,13 +2059,8 @@ typedef enum _complex_perm_kinds { > PERM_TOP > } complex_perm_kinds_t; > > -/* A pair with a load permute and a corresponding complex_perm_kind which > gives > - information about the load it represents. */ > -typedef std::pair<complex_perm_kinds_t, load_permutation_t> > - complex_load_perm_t; > - > /* Cache from nodes to the load permutation they represent. */ > -typedef hash_map <slp_tree, complex_load_perm_t> > +typedef hash_map <slp_tree, complex_perm_kinds_t> > slp_tree_to_load_perm_map_t; > > /* Vector pattern matcher base class. All SLP pattern matchers must inherit > > > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)