From: Pan Li <pan2...@intel.com> This patch would like to fix one bug when expanding const vector for the interleave case. For example, we have:
base1 = 151 step = 121 For vec_series, we will generate vector in format of v[i] = base + i * step. Then the vec_series will have below result for HImode, and we can find that the result overflow to the highest 8 bits of HImode. v1.b = {151, 255, 7, 0, 119, 0, 231, 0, 87, 1, 199, 1, 55, 2, 167, 2} Aka we expect v1.b should be: v1.b = {151, 0, 7, 0, 119, 0, 231, 0, 87, 0, 199, 0, 55, 0, 167, 0} After that it will perform the IOR with v2 for the base2(aka another series). v2.b = {0, 17, 0, 33, 0, 49, 0, 65, 0, 81, 0, 97, 0, 113, 0, 129} Unfortunately, the base1 + i * step1 in HImode may overflow to the high 8 bits, and the high 8 bits will pollute the v2 and result in incorrect value in const_vector. This patch would like to perform the overflow to smode check before IOR the base2 series, and perform the clean highest bit if the const_vector overflow to smode occurs. If no overflow or VLA, will do nothing here. The below test suites are passed for this patch. * The rv64gcv fully regression test. PR target/118931 gcc/ChangeLog: * config/riscv/riscv-v.cc (expand_const_vector): Add overflow to smode check and clean up highest bits if overflow. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr118931-run-1.c: New test. Signed-off-by: Pan Li <pan2...@intel.com> --- gcc/config/riscv/riscv-v.cc | 36 +++++++++++++++---- .../riscv/rvv/base/pr118931-run-1.c | 19 ++++++++++ 2 files changed, 48 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr118931-run-1.c diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 7cc15f3d53c..d55f8333fb3 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -1489,22 +1489,44 @@ expand_const_vector (rtx target, rtx src) EEW = 32, { 2, 4, ... }. - This only works as long as the larger type does not overflow - as we can't guarantee a zero value for each second element - of the sequence with smaller EEW. - ??? For now we assume that no overflow happens with positive - steps and forbid negative steps altogether. */ + Both the series1 and series2 may overflow before taking the IOR + to generate the final result. However, only series1 matters + because the series2 will shift before IOR, thus the overflow + bits will never pollute the final result. + + For now we forbid the negative steps and overflow, and they + will fall back to the default merge way to generate the + const_vector. */ + unsigned int new_smode_bitsize = builder.inner_bits_size () * 2; scalar_int_mode new_smode; machine_mode new_mode; poly_uint64 new_nunits = exact_div (GET_MODE_NUNITS (builder.mode ()), 2); + + poly_int64 base1_poly = rtx_to_poly_int64 (base1); + bool overflow_smode_p = false; + + if (!step1.is_constant ()) + overflow_smode_p = true; + else + { + int elem_count = XVECLEN (src, 0); + uint64_t step1_val = (uint64_t)step1.to_constant (); + uint64_t base1_val = (uint64_t)base1_poly.to_constant (); + uint64_t elem_val = base1_val + (elem_count - 1) * step1_val; + + if ((elem_val >> builder.inner_bits_size ()) != 0) + overflow_smode_p = true; + } + if (known_ge (step1, 0) && known_ge (step2, 0) && int_mode_for_size (new_smode_bitsize, 0).exists (&new_smode) - && get_vector_mode (new_smode, new_nunits).exists (&new_mode)) + && get_vector_mode (new_smode, new_nunits).exists (&new_mode) + && !overflow_smode_p) { rtx tmp1 = gen_reg_rtx (new_mode); - base1 = gen_int_mode (rtx_to_poly_int64 (base1), new_smode); + base1 = gen_int_mode (base1_poly, new_smode); expand_vec_series (tmp1, base1, gen_int_mode (step1, new_smode)); if (rtx_equal_p (base2, const0_rtx) && known_eq (step2, 0)) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr118931-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118931-run-1.c new file mode 100644 index 00000000000..ef866a72039 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118931-run-1.c @@ -0,0 +1,19 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-options "-O3 -march=rv64gcv -flto -mrvv-vector-bits=zvl" } */ + +long long m; +char f = 151; +char h = 103; +unsigned char a = 109; + +int main() { + for (char l = 0; l < 255 - 241; l += h - 102) + a *= f; + + m = a; + + if (m != 29) + __builtin_abort (); + + return 0; +} -- 2.43.0