From: Pan Li <pan2...@intel.com>

This patch would like to fix one bug when expanding const vector for the
interleave case.  For example, we have:

base1 = 151
step = 121

For vec_series, we will generate vector in format of v[i] = base + i * step.
Then the vec_series will have below result for HImode, and we can find
that the result overflow to the highest 8 bits of HImode.

v1.b = {151, 255, 7,  0, 119,  0, 231,  0, 87,  1, 199,  1, 55,   2, 167,   2}

Aka we expect v1.b should be:

v1.b = {151, 0, 7,  0, 119,  0, 231,  0, 87,  0, 199,  0, 55,   0, 167,   0}

After that it will perform the IOR with v2 for the base2(aka another series).

v2.b =  {0,  17, 0, 33,   0, 49,   0, 65,  0, 81,   0, 97,  0, 113,   0, 129}

Unfortunately, the base1 + i * step1 in HImode may overflow to the high
8 bits, and the high 8 bits will pollute the v2 and result in incorrect
value in const_vector.

This patch would like to perform the overflow to smode check before IOR
the base2 series, and perform the clean highest bit if the const_vector
overflow to smode occurs.  If no overflow or VLA, will do nothing here.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

        PR target/118931

gcc/ChangeLog:

        * config/riscv/riscv-v.cc (expand_const_vector): Add overflow to
        smode check and clean up highest bits if overflow.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/base/pr118931-run-1.c: New test.

Signed-off-by: Pan Li <pan2...@intel.com>
---
 gcc/config/riscv/riscv-v.cc                   | 39 ++++++++++++++++++-
 .../riscv/rvv/base/pr118931-run-1.c           | 19 +++++++++
 2 files changed, 57 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr118931-run-1.c

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 7cc15f3d53c..a308a2364c7 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1504,9 +1504,46 @@ expand_const_vector (rtx target, rtx src)
              && get_vector_mode (new_smode, new_nunits).exists (&new_mode))
            {
              rtx tmp1 = gen_reg_rtx (new_mode);
-             base1 = gen_int_mode (rtx_to_poly_int64 (base1), new_smode);
+             poly_int64 base1_poly = rtx_to_poly_int64 (base1);
+             base1 = gen_int_mode (base1_poly, new_smode);
              expand_vec_series (tmp1, base1, gen_int_mode (step1, new_smode));
 
+             bool overflow_smode_p = false;
+
+             if (!step1.is_constant ())
+               overflow_smode_p = true; // Fall back to less optimized for VLA.
+             else
+               {
+                 int elem_count = XVECLEN (src, 0);
+                 uint64_t elem_val = 0;
+                 uint64_t step1_val = (uint64_t)step1.to_constant ();
+                 uint64_t base1_val = (uint64_t)base1_poly.to_constant ();
+
+                 if (elem_count > 0)
+                   elem_val = base1_val + (elem_count - 1) * step1_val;
+
+                 if (elem_count > 1)
+                   elem_val |= base1_val + (elem_count - 2) * step1_val;
+
+                 if ((elem_val >> builder.inner_bits_size ()) != 0)
+                   overflow_smode_p = true;
+               }
+
+             if (overflow_smode_p)
+               {
+                 /* The vec_series base1 may overflow bits to base2 series.  */
+                 rtx vec_mask = gen_vec_duplicate (new_mode,
+                                                   CONSTM1_RTX (new_smode));
+                 rtx lshift_vec_mask = gen_reg_rtx (new_mode);
+                 rtx shift = gen_int_mode (builder.inner_bits_size (), Xmode);
+                 rtx lshift_ops[] = {lshift_vec_mask, vec_mask, shift};
+                 emit_vlmax_insn (code_for_pred_scalar (LSHIFTRT, new_mode),
+                                  BINARY_OP, lshift_ops);
+                 rtx and_ops[] = {tmp1, tmp1, lshift_vec_mask};
+                 emit_vlmax_insn (code_for_pred (AND, new_mode), BINARY_OP,
+                                 and_ops);
+               }
+
              if (rtx_equal_p (base2, const0_rtx) && known_eq (step2, 0))
                /* { 1, 0, 2, 0, ... }.  */
                emit_move_insn (result, gen_lowpart (mode, tmp1));
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr118931-run-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118931-run-1.c
new file mode 100644
index 00000000000..ef866a72039
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118931-run-1.c
@@ -0,0 +1,19 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-options "-O3 -march=rv64gcv -flto -mrvv-vector-bits=zvl" } */
+
+long long m;
+char f = 151;
+char h = 103;
+unsigned char a = 109;
+
+int main() {
+  for (char l = 0; l < 255 - 241; l += h - 102)
+    a *= f;
+
+  m = a;
+
+  if (m != 29)
+    __builtin_abort ();
+
+  return 0;
+}
-- 
2.43.0

Reply via email to