On November 22, 2019 3:25:05 PM GMT+01:00, Jakub Jelinek <ja...@redhat.com> wrote: >Hi! > >The undistribute_bitref_for_vector function assumes that BIT_FIELD_REFs >with vector first argument must have always type of the vector element, >but that is not the case, BIT_FIELD_REF can extract any type with the >corresponding size from any other vector. > >So, without this patch, when it saw e.g. addition in unsigned long long >type, even when the vector was 2x long long, it computed the addition >in 2x long long vector (in theory could run into undefined overflows) >and fail on IL checking because conversion from long long to unsigned >long >long is not useless. Even worse, for double addition with the vector >2x >long long, it would again perform the addition in 2x long long vector >type, >which is completely wrong. > >The following patch determines the right vector type and adds VCE >around >the operands when needed. > >Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
Ok. Thanks, Richard. >2019-11-22 Jakub Jelinek <ja...@redhat.com> > > PR tree-optimization/92618 > * tree-ssa-reassoc.c (v_info): Change from auto_vec to a struct > containing the auto_vec and a tree. > (undistribute_bitref_for_vector): Handle the case when element type > of vec is not the same as type of the BIT_FIELD_REF. Formatting > fixes. > > * gcc.c-torture/compile/pr92618.c: New test. > * gcc.c-torture/execute/pr92618.c: New test. > >--- gcc/tree-ssa-reassoc.c.jj 2019-11-13 10:54:56.038884580 +0100 >+++ gcc/tree-ssa-reassoc.c 2019-11-22 10:57:24.956931307 +0100 >@@ -1775,7 +1775,10 @@ undistribute_ops_list (enum tree_code op > first: element index for each relevant BIT_FIELD_REF. > second: the index of vec ops* for each relevant BIT_FIELD_REF. */ > typedef std::pair<unsigned, unsigned> v_info_elem; >-typedef auto_vec<v_info_elem, 32> v_info; >+struct v_info { >+ tree vec_type; >+ auto_vec<v_info_elem, 32> vec; >+}; > typedef v_info *v_info_ptr; > >/* Comparison function for qsort on VECTOR SSA_NAME trees by machine >mode. */ >@@ -1840,8 +1843,11 @@ undistribute_bitref_for_vector (enum tre > if (ops->length () <= 1) > return false; > >- if (opcode != PLUS_EXPR && opcode != MULT_EXPR && opcode != >BIT_XOR_EXPR >- && opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) >+ if (opcode != PLUS_EXPR >+ && opcode != MULT_EXPR >+ && opcode != BIT_XOR_EXPR >+ && opcode != BIT_IOR_EXPR >+ && opcode != BIT_AND_EXPR) > return false; > > hash_map<tree, v_info_ptr> v_info_map; >@@ -1879,9 +1885,45 @@ undistribute_bitref_for_vector (enum tre > if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ()) > continue; > >+ if (VECTOR_TYPE_P (TREE_TYPE (rhs)) >+ || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs)))) >+ continue; >+ >+ /* The type of BIT_FIELD_REF might not be equal to the element >type of >+ the vector. We want to use a vector type with element type the >+ same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */ >+ if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE >(vec_type))) >+ { >+ machine_mode simd_mode; >+ unsigned HOST_WIDE_INT size, nunits; >+ unsigned HOST_WIDE_INT elem_size >+ = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs))); >+ if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size)) >+ continue; >+ if (size <= elem_size || (size % elem_size) != 0) >+ continue; >+ nunits = size / elem_size; >+ if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)), >+ nunits).exists (&simd_mode)) >+ continue; >+ vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode); >+ >+ /* Ignore it if target machine can't support this VECTOR type. */ >+ if (!VECTOR_MODE_P (TYPE_MODE (vec_type))) >+ continue; >+ >+ /* Check const vector type, constrain BIT_FIELD_REF offset and >+ size. */ >+ if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ()) >+ continue; >+ >+ if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)), >+ GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec))))) >+ continue; >+ } >+ > tree elem_type = TREE_TYPE (vec_type); >- unsigned HOST_WIDE_INT elem_size >- = TREE_INT_CST_LOW (TYPE_SIZE (elem_type)); >+ unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE >(elem_type)); > if (maybe_ne (bit_field_size (rhs), elem_size)) > continue; > >@@ -1898,8 +1940,13 @@ undistribute_bitref_for_vector (enum tre > bool existed; > v_info_ptr &info = v_info_map.get_or_insert (vec, &existed); > if (!existed) >- info = new v_info; >- info->safe_push (std::make_pair (idx, i)); >+ { >+ info = new v_info; >+ info->vec_type = vec_type; >+ } >+ else if (!types_compatible_p (vec_type, info->vec_type)) >+ continue; >+ info->vec.safe_push (std::make_pair (idx, i)); > } > > /* At least two VECTOR to combine. */ >@@ -1919,14 +1966,15 @@ undistribute_bitref_for_vector (enum tre > { > tree cand_vec = (*it).first; > v_info_ptr cand_info = (*it).second; >- unsigned int num_elems = VECTOR_CST_NELTS (cand_vec).to_constant >(); >- if (cand_info->length () != num_elems) >+ unsigned int num_elems >+ = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant (); >+ if (cand_info->vec.length () != num_elems) > continue; > sbitmap holes = sbitmap_alloc (num_elems); > bitmap_ones (holes); > bool valid = true; > v_info_elem *curr; >- FOR_EACH_VEC_ELT (*cand_info, i, curr) >+ FOR_EACH_VEC_ELT (cand_info->vec, i, curr) > { > if (!bitmap_bit_p (holes, curr->first)) > { >@@ -1962,25 +2010,53 @@ undistribute_bitref_for_vector (enum tre > > unsigned int idx, j; > gimple *sum = NULL; >- v_info_ptr info_ptr; > tree sum_vec = tvec; >+ v_info_ptr info_ptr = *(v_info_map.get (tvec)); > v_info_elem *elem; >+ tree vec_type = info_ptr->vec_type; > > /* Build the sum for all candidates with same mode. */ > do > { >- sum = build_and_add_sum (TREE_TYPE (sum_vec), sum_vec, >+ sum = build_and_add_sum (vec_type, sum_vec, > valid_vecs[i + 1], opcode); >+ if (!useless_type_conversion_p (vec_type, >+ TREE_TYPE (valid_vecs[i + 1]))) >+ { >+ /* Update the operands only after build_and_add_sum, >+ so that we don't have to repeat the placement algorithm >+ of build_and_add_sum. */ >+ gimple_stmt_iterator gsi = gsi_for_stmt (sum); >+ tree vce = build1 (VIEW_CONVERT_EXPR, vec_type, >+ valid_vecs[i + 1]); >+ tree lhs = make_ssa_name (vec_type); >+ gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce); >+ gimple_set_uid (g, gimple_uid (sum)); >+ gsi_insert_before (&gsi, g, GSI_NEW_STMT); >+ gimple_assign_set_rhs2 (sum, lhs); >+ if (sum_vec == tvec) >+ { >+ vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec); >+ lhs = make_ssa_name (vec_type); >+ g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce); >+ gimple_set_uid (g, gimple_uid (sum)); >+ gsi_insert_before (&gsi, g, GSI_NEW_STMT); >+ gimple_assign_set_rhs1 (sum, lhs); >+ } >+ update_stmt (sum); >+ } > sum_vec = gimple_get_lhs (sum); > info_ptr = *(v_info_map.get (valid_vecs[i + 1])); >+ gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type)); > /* Update those related ops of current candidate VECTOR. */ >- FOR_EACH_VEC_ELT (*info_ptr, j, elem) >+ FOR_EACH_VEC_ELT (info_ptr->vec, j, elem) > { > idx = elem->second; > gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op); > /* Set this then op definition will get DCEd later. */ > gimple_set_visited (def, true); >- if (opcode == PLUS_EXPR || opcode == BIT_XOR_EXPR >+ if (opcode == PLUS_EXPR >+ || opcode == BIT_XOR_EXPR > || opcode == BIT_IOR_EXPR) > (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op)); > else if (opcode == MULT_EXPR) >@@ -2007,16 +2083,16 @@ undistribute_bitref_for_vector (enum tre > BIT_FIELD_REF statements accordingly. */ > info_ptr = *(v_info_map.get (tvec)); > gcc_assert (sum); >- tree elem_type = TREE_TYPE (TREE_TYPE (tvec)); >- FOR_EACH_VEC_ELT (*info_ptr, j, elem) >+ tree elem_type = TREE_TYPE (vec_type); >+ FOR_EACH_VEC_ELT (info_ptr->vec, j, elem) > { > idx = elem->second; > tree dst = make_ssa_name (elem_type); >- gimple *gs = gimple_build_assign ( >- dst, BIT_FIELD_REF, >- build3 (BIT_FIELD_REF, elem_type, sum_vec, TYPE_SIZE (elem_type), >- bitsize_int (elem->first >- * tree_to_uhwi (TYPE_SIZE (elem_type))))); >+ tree pos = bitsize_int (elem->first >+ * tree_to_uhwi (TYPE_SIZE (elem_type))); >+ tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec, >+ TYPE_SIZE (elem_type), pos); >+ gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr); > insert_stmt_after (gs, sum); > gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op); > /* Set this then op definition will get DCEd later. */ >--- gcc/testsuite/gcc.c-torture/compile/pr92618.c.jj 2019-11-22 >11:11:52.971470532 +0100 >+++ gcc/testsuite/gcc.c-torture/compile/pr92618.c 2019-11-22 >11:11:39.434680213 +0100 >@@ -0,0 +1,67 @@ >+/* PR tree-optimization/92618 */ >+ >+typedef long long __m128i __attribute__((__may_alias__, >__vector_size__(2 * sizeof (long long)))); >+typedef long long __m256i __attribute__((__may_alias__, >__vector_size__(4 * sizeof (long long)))); >+typedef long long __m512i __attribute__((__may_alias__, >__vector_size__(8 * sizeof (long long)))); >+ >+double a[32]; >+unsigned long long b[32]; >+__m128i bar (void); >+__m256i qux (void); >+__m512i corge (void); >+ >+void >+foo (unsigned long long *x) >+{ >+ __m128i c = bar (); >+ __m128i d = bar (); >+ __m256i e = qux (); >+ __m256i f = qux (); >+ __m256i g = qux (); >+ __m512i h = corge (); >+ __m512i i = corge (); >+ *(__m128i *) &b[0] = c; >+ *(__m128i *) &b[2] = d; >+ *(__m256i *) &b[4] = e; >+ *(__m256i *) &b[8] = f; >+ *(__m256i *) &b[12] = g; >+ *(__m512i *) &b[16] = h; >+ *(__m512i *) &b[24] = i; >+ *x = b[0] + b[1] + b[2] + b[3] >+ + b[4] + b[5] + b[6] + b[7] >+ + b[8] + b[9] + b[10] + b[11] >+ + b[12] + b[13] + b[14] + b[15] >+ + b[16] + b[17] + b[18] + b[19] >+ + b[20] + b[21] + b[22] + b[23] >+ + b[24] + b[25] + b[26] + b[27] >+ + b[28] + b[29] + b[30] + b[31]; >+} >+ >+void >+baz (double *x) >+{ >+#if __SIZEOF_LONG_LONG__ == __SIZEOF_DOUBLE__ >+ __m128i c = bar (); >+ __m128i d = bar (); >+ __m256i e = qux (); >+ __m256i f = qux (); >+ __m256i g = qux (); >+ __m512i h = corge (); >+ __m512i i = corge (); >+ *(__m128i *) &a[0] = c; >+ *(__m128i *) &a[2] = d; >+ *(__m256i *) &a[4] = e; >+ *(__m256i *) &a[8] = f; >+ *(__m256i *) &a[12] = g; >+ *(__m512i *) &a[16] = h; >+ *(__m512i *) &a[24] = i; >+ *x = a[0] + a[1] + a[2] + a[3] >+ + a[4] + a[5] + a[6] + a[7] >+ + a[8] + a[9] + a[10] + a[11] >+ + a[12] + a[13] + a[14] + a[15] >+ + a[16] + a[17] + a[18] + a[19] >+ + a[20] + a[21] + a[22] + a[23] >+ + a[24] + a[25] + a[26] + a[27] >+ + a[28] + a[29] + a[30] + a[31]; >+#endif >+} >--- gcc/testsuite/gcc.c-torture/execute/pr92618.c.jj 2019-11-22 >11:11:10.543127733 +0100 >+++ gcc/testsuite/gcc.c-torture/execute/pr92618.c 2019-11-22 >11:10:56.930338594 +0100 >@@ -0,0 +1,63 @@ >+/* PR tree-optimization/92618 */ >+ >+typedef long long __m128i __attribute__((__may_alias__, >__vector_size__(2 * sizeof (long long)))); >+ >+double a[4]; >+unsigned long long b[4]; >+ >+__attribute__((noipa)) __m128i >+bar (void) >+{ >+ static int cnt; >+ cnt += 2; >+ return (__m128i) { cnt, cnt + 1 }; >+} >+ >+#if __SIZEOF_LONG_LONG__ == __SIZEOF_DOUBLE__ >+typedef double __m128d __attribute__((__may_alias__, __vector_size__(2 >* sizeof (double)))); >+ >+__attribute__((noipa)) __m128i >+qux (void) >+{ >+ static double cnt; >+ cnt += 2.0; >+ return (__m128i) (__m128d) { cnt, cnt + 1.0 }; >+} >+#endif >+ >+void >+foo (unsigned long long *x) >+{ >+ __m128i c = bar (); >+ __m128i d = bar (); >+ *(__m128i *) &b[0] = c; >+ *(__m128i *) &b[2] = d; >+ *x = b[0] + b[1] + b[2] + b[3]; >+} >+ >+void >+baz (double *x) >+{ >+#if __SIZEOF_LONG_LONG__ == __SIZEOF_DOUBLE__ >+ __m128i c = qux (); >+ __m128i d = qux (); >+ *(__m128i *) &a[0] = c; >+ *(__m128i *) &a[2] = d; >+ *x = a[0] + a[1] + a[2] + a[3]; >+#endif >+} >+ >+int >+main () >+{ >+ unsigned long long c = 0; >+ foo (&c); >+ if (c != 2 + 3 + 4 + 5) >+ __builtin_abort (); >+#if __SIZEOF_LONG_LONG__ == __SIZEOF_DOUBLE__ >+ double d = 0.0; >+ baz (&d); >+ if (d != 2.0 + 3.0 + 4.0 + 5.0) >+ __builtin_abort (); >+#endif >+} > > Jakub