Hi,

in PR118832 we have another instance of the problem already noticed in
PR117878.  We sometimes use e.g. expand_simple_binop for vector
operations like shift or and.  While this is usually OK, it causes
problems when doing it late, e.g. during LRA.

In particular, we might rematerialize a const_vector during LRA, which
then leaves an insn laying around that cannot be split any more if it
requires a pseudo.  Therefore we should only use the split variants
in expand_const_vector.

This patch fixed the issue in the PR and also pre-emptively rewrites two
other spots that might be prone to the same issue.

Regtested on rv64gcv_zvl512b.  As the two other cases don't have a test
(so might not even trigger) I unconditionally enabled them for my testsuite
run.

Regards
 Robin

        PR target/118832

gcc/ChangeLog:

        * config/riscv/riscv-v.cc (expand_const_vector):  Expand as
        vlmax insn during lra.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/autovec/pr118832.c: New test.
---
 gcc/config/riscv/riscv-v.cc                   | 46 +++++++++++++++----
 .../gcc.target/riscv/rvv/autovec/pr118832.c   | 13 ++++++
 2 files changed, 51 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118832.c

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 9847439ca77..3e86b12bb40 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1265,7 +1265,16 @@ expand_const_vector (rtx target, rtx src)
               element. Use element width = 64 and broadcast a vector with
               all element equal to 0x0706050403020100.  */
          rtx ele = builder.get_merged_repeating_sequence ();
-         rtx dup = expand_vector_broadcast (builder.new_mode (), ele);
+         rtx dup;
+         if (lra_in_progress)
+           {
+             dup = gen_reg_rtx (builder.new_mode ());
+             rtx ops[] = {dup, ele};
+             emit_vlmax_insn (code_for_pred_broadcast
+                              (builder.new_mode ()), UNARY_OP, ops);
+           }
+         else
+           dup = expand_vector_broadcast (builder.new_mode (), ele);
          emit_move_insn (result, gen_lowpart (mode, dup));
        }
       else
@@ -1523,10 +1532,20 @@ expand_const_vector (rtx target, rtx src)
                  base2 = gen_int_mode (rtx_to_poly_int64 (base2), new_smode);
                  expand_vec_series (tmp2, base2,
                                     gen_int_mode (step2, new_smode));
-                 rtx shifted_tmp2 = expand_simple_binop (
-                   new_mode, ASHIFT, tmp2,
-                   gen_int_mode (builder.inner_bits_size (), Pmode), NULL_RTX,
-                   false, OPTAB_DIRECT);
+                 rtx shifted_tmp2;
+                 rtx shift = gen_int_mode (builder.inner_bits_size (), Xmode);
+                 if (lra_in_progress)
+                   {
+                     shifted_tmp2 = gen_reg_rtx (new_mode);
+                     rtx shift_ops[] = {shifted_tmp2, tmp2, shift};
+                     emit_vlmax_insn (code_for_pred_scalar
+                                      (ASHIFT, new_mode), BINARY_OP,
+                                      shift_ops);
+                   }
+                 else
+                   shifted_tmp2 = expand_simple_binop (new_mode, ASHIFT, tmp2,
+                                                       shift, NULL_RTX, false,
+                                                       OPTAB_DIRECT);
                  rtx tmp3 = gen_reg_rtx (new_mode);
                  rtx ior_ops[] = {tmp3, tmp1, shifted_tmp2};
                  emit_vlmax_insn (code_for_pred (IOR, new_mode), BINARY_OP,
@@ -1539,9 +1558,20 @@ expand_const_vector (rtx target, rtx src)
              rtx vid = gen_reg_rtx (mode);
              expand_vec_series (vid, const0_rtx, const1_rtx);
              /* Transform into { 0, 0, 1, 1, 2, 2, ... }.  */
-             rtx shifted_vid
-               = expand_simple_binop (mode, LSHIFTRT, vid, const1_rtx,
-                                      NULL_RTX, false, OPTAB_DIRECT);
+             rtx shifted_vid;
+             if (lra_in_progress)
+               {
+                 shifted_vid = gen_reg_rtx (mode);
+                 rtx shift = gen_int_mode (1, Xmode);
+                 rtx shift_ops[] = {shifted_vid, vid, shift};
+                 emit_vlmax_insn (code_for_pred_scalar
+                                  (ASHIFT, mode), BINARY_OP,
+                                  shift_ops);
+               }
+             else
+               shifted_vid = expand_simple_binop (mode, LSHIFTRT, vid,
+                                                  const1_rtx, NULL_RTX,
+                                                  false, OPTAB_DIRECT);
              rtx tmp1 = gen_reg_rtx (mode);
              rtx tmp2 = gen_reg_rtx (mode);
              expand_vec_series (tmp1, base1,
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118832.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118832.c
new file mode 100644
index 00000000000..db0b12bee5a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118832.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zbb -mabi=lp64d -O3" } */
+
+int *a;
+void b(int *);
+void c(int *g, short h) {
+    int d[8], e[8];
+    for (int f = 0; f < h; f++)
+      d[f] = g[f] << 24 | (g[f] & 4278190080) >> 24;
+    b(d);
+    for (int f = 0; f < h; f++)
+      a[f] = e[f] << 24 | (e[f] & 4278190080) >> 24;
+}
-- 
2.48.1

Reply via email to