From: Pan Li <pan2...@intel.com> This patch would like to fix one bug when expanding const vector for the interleave case. For example, we have:
base1 = 151 step = 121 For vec_series, we will generate vector in format of v[i] = base + i * step. Then the vec_series will have below result for HImode, and we can find that the result overflow to the highest 8 bits of HImode. v1.b = {151, 255, 7, 0, 119, 0, 231, 0, 87, 1, 199, 1, 55, 2, 167, 2} Aka we expect v1.b should be: v1.b = {151, 0, 7, 0, 119, 0, 231, 0, 87, 0, 199, 0, 55, 0, 167, 0} After that it will perform the IOR with v2 for the base2(aka another series). v2.b = {0, 17, 0, 33, 0, 49, 0, 65, 0, 81, 0, 97, 0, 113, 0, 129} Unfortunately, the base1 + i * step1 in HImode may overflow to the high 8 bits, and the high 8 bits will pollute the v2 and result in incorrect value in const_vector. This patch would like to perform the overflow to smode check before IOR the base2 series, and perform the clean highest bit if the const_vector overflow to smode occurs. If no overflow or VLA, will do nothing here. The below test suites are passed for this patch. * The rv64gcv fully regression test. PR target/118931 gcc/ChangeLog: * config/riscv/riscv-v.cc (expand_const_vector): Add overflow to smode check and clean up highest bits if overflow. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr118931-run-1.c: New test. Signed-off-by: Pan Li <pan2...@intel.com> --- gcc/config/riscv/riscv-v.cc | 39 ++++++++++++++++++- .../riscv/rvv/base/pr118931-run-1.c | 19 +++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr118931-run-1.c diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 7cc15f3d53c..a308a2364c7 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -1504,9 +1504,46 @@ expand_const_vector (rtx target, rtx src) && get_vector_mode (new_smode, new_nunits).exists (&new_mode)) { rtx tmp1 = gen_reg_rtx (new_mode); - base1 = gen_int_mode (rtx_to_poly_int64 (base1), new_smode); + poly_int64 base1_poly = rtx_to_poly_int64 (base1); + base1 = gen_int_mode (base1_poly, new_smode); expand_vec_series (tmp1, base1, gen_int_mode (step1, new_smode)); + bool overflow_smode_p = false; + + if (!step1.is_constant ()) + overflow_smode_p = true; // Fall back to less optimized for VLA. + else + { + int elem_count = XVECLEN (src, 0); + uint64_t elem_val = 0; + uint64_t step1_val = (uint64_t)step1.to_constant (); + uint64_t base1_val = (uint64_t)base1_poly.to_constant (); + + if (elem_count > 0) + elem_val = base1_val + (elem_count - 1) * step1_val; + + if (elem_count > 1) + elem_val |= base1_val + (elem_count - 2) * step1_val; + + if ((elem_val >> builder.inner_bits_size ()) != 0) + overflow_smode_p = true; + } + + if (overflow_smode_p) + { + /* The vec_series base1 may overflow bits to base2 series. */ + rtx vec_mask = gen_vec_duplicate (new_mode, + CONSTM1_RTX (new_smode)); + rtx lshift_vec_mask = gen_reg_rtx (new_mode); + rtx shift = gen_int_mode (builder.inner_bits_size (), Xmode); + rtx lshift_ops[] = {lshift_vec_mask, vec_mask, shift}; + emit_vlmax_insn (code_for_pred_scalar (LSHIFTRT, new_mode), + BINARY_OP, lshift_ops); + rtx and_ops[] = {tmp1, tmp1, lshift_vec_mask}; + emit_vlmax_insn (code_for_pred (AND, new_mode), BINARY_OP, + and_ops); + } + if (rtx_equal_p (base2, const0_rtx) && known_eq (step2, 0)) /* { 1, 0, 2, 0, ... }. */ emit_move_insn (result, gen_lowpart (mode, tmp1)); diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr118931-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118931-run-1.c new file mode 100644 index 00000000000..ef866a72039 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118931-run-1.c @@ -0,0 +1,19 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-options "-O3 -march=rv64gcv -flto -mrvv-vector-bits=zvl" } */ + +long long m; +char f = 151; +char h = 103; +unsigned char a = 109; + +int main() { + for (char l = 0; l < 255 - 241; l += h - 102) + a *= f; + + m = a; + + if (m != 29) + __builtin_abort (); + + return 0; +} -- 2.43.0