LGTM


juzhe.zh...@rivai.ai
 
From: Robin Dapp
Date: 2025-02-12 22:03
To: gcc-patches
CC: pal...@dabbelt.com; kito.ch...@gmail.com; juzhe.zh...@rivai.ai; 
jeffreya...@gmail.com; pan2...@intel.com; rdapp....@gmail.com
Subject: [PATCH] RISC-V: Avoid more unsplit insns in const expander [PR118832].
Hi,
 
in PR118832 we have another instance of the problem already noticed in
PR117878.  We sometimes use e.g. expand_simple_binop for vector
operations like shift or and.  While this is usually OK, it causes
problems when doing it late, e.g. during LRA.
 
In particular, we might rematerialize a const_vector during LRA, which
then leaves an insn laying around that cannot be split any more if it
requires a pseudo.  Therefore we should only use the split variants
in expand_const_vector.
 
This patch fixed the issue in the PR and also pre-emptively rewrites two
other spots that might be prone to the same issue.
 
Regtested on rv64gcv_zvl512b.  As the two other cases don't have a test
(so might not even trigger) I unconditionally enabled them for my testsuite
run.
 
Regards
Robin
 
PR target/118832
 
gcc/ChangeLog:
 
* config/riscv/riscv-v.cc (expand_const_vector):  Expand as
vlmax insn during lra.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/autovec/pr118832.c: New test.
---
gcc/config/riscv/riscv-v.cc                   | 46 +++++++++++++++----
.../gcc.target/riscv/rvv/autovec/pr118832.c   | 13 ++++++
2 files changed, 51 insertions(+), 8 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118832.c
 
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 9847439ca77..3e86b12bb40 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1265,7 +1265,16 @@ expand_const_vector (rtx target, rtx src)
       element. Use element width = 64 and broadcast a vector with
       all element equal to 0x0706050403020100.  */
  rtx ele = builder.get_merged_repeating_sequence ();
-   rtx dup = expand_vector_broadcast (builder.new_mode (), ele);
+   rtx dup;
+   if (lra_in_progress)
+     {
+       dup = gen_reg_rtx (builder.new_mode ());
+       rtx ops[] = {dup, ele};
+       emit_vlmax_insn (code_for_pred_broadcast
+        (builder.new_mode ()), UNARY_OP, ops);
+     }
+   else
+     dup = expand_vector_broadcast (builder.new_mode (), ele);
  emit_move_insn (result, gen_lowpart (mode, dup));
}
       else
@@ -1523,10 +1532,20 @@ expand_const_vector (rtx target, rtx src)
  base2 = gen_int_mode (rtx_to_poly_int64 (base2), new_smode);
  expand_vec_series (tmp2, base2,
     gen_int_mode (step2, new_smode));
-   rtx shifted_tmp2 = expand_simple_binop (
-     new_mode, ASHIFT, tmp2,
-     gen_int_mode (builder.inner_bits_size (), Pmode), NULL_RTX,
-     false, OPTAB_DIRECT);
+   rtx shifted_tmp2;
+   rtx shift = gen_int_mode (builder.inner_bits_size (), Xmode);
+   if (lra_in_progress)
+     {
+       shifted_tmp2 = gen_reg_rtx (new_mode);
+       rtx shift_ops[] = {shifted_tmp2, tmp2, shift};
+       emit_vlmax_insn (code_for_pred_scalar
+        (ASHIFT, new_mode), BINARY_OP,
+        shift_ops);
+     }
+   else
+     shifted_tmp2 = expand_simple_binop (new_mode, ASHIFT, tmp2,
+ shift, NULL_RTX, false,
+ OPTAB_DIRECT);
  rtx tmp3 = gen_reg_rtx (new_mode);
  rtx ior_ops[] = {tmp3, tmp1, shifted_tmp2};
  emit_vlmax_insn (code_for_pred (IOR, new_mode), BINARY_OP,
@@ -1539,9 +1558,20 @@ expand_const_vector (rtx target, rtx src)
      rtx vid = gen_reg_rtx (mode);
      expand_vec_series (vid, const0_rtx, const1_rtx);
      /* Transform into { 0, 0, 1, 1, 2, 2, ... }.  */
-       rtx shifted_vid
- = expand_simple_binop (mode, LSHIFTRT, vid, const1_rtx,
-        NULL_RTX, false, OPTAB_DIRECT);
+       rtx shifted_vid;
+       if (lra_in_progress)
+ {
+   shifted_vid = gen_reg_rtx (mode);
+   rtx shift = gen_int_mode (1, Xmode);
+   rtx shift_ops[] = {shifted_vid, vid, shift};
+   emit_vlmax_insn (code_for_pred_scalar
+    (ASHIFT, mode), BINARY_OP,
+    shift_ops);
+ }
+       else
+ shifted_vid = expand_simple_binop (mode, LSHIFTRT, vid,
+    const1_rtx, NULL_RTX,
+    false, OPTAB_DIRECT);
      rtx tmp1 = gen_reg_rtx (mode);
      rtx tmp2 = gen_reg_rtx (mode);
      expand_vec_series (tmp1, base1,
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118832.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118832.c
new file mode 100644
index 00000000000..db0b12bee5a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118832.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zbb -mabi=lp64d -O3" } */
+
+int *a;
+void b(int *);
+void c(int *g, short h) {
+    int d[8], e[8];
+    for (int f = 0; f < h; f++)
+      d[f] = g[f] << 24 | (g[f] & 4278190080) >> 24;
+    b(d);
+    for (int f = 0; f < h; f++)
+      a[f] = e[f] << 24 | (e[f] & 4278190080) >> 24;
+}
-- 
2.48.1
 
 

Reply via email to