https://gcc.gnu.org/g:810e3cbbb1f4c61450d175eb8208d9260c17ff70

commit 810e3cbbb1f4c61450d175eb8208d9260c17ff70
Author: Pan Li <pan2...@intel.com>
Date:   Fri Sep 12 16:43:00 2025 +0800

    RISC-V: Combine vec_duplicate + vwaddu.vv to vwaddu.vx on GR2VR cost
    
    This patch would like to combine the vec_duplicate + vwaddu.vv to the
    vwaddu.vx.  From example as below code.  The related pattern will depend
    on the cost of vec_duplicate from GR2VR.  Then the late-combine will
    take action if the cost of GR2VR is zero, and reject the combination
    if the GR2VR cost is greater than zero.
    
    Assume we have example code like below, GR2VR cost is 0.
    
    Before this patch:
      11       beq a3,zero,.L8
      12       vsetvli a5,zero,e32,m1,ta,ma
      13       vmv.v.x v2,a2
      ...
      16   .L3:
      17       vsetvli a5,a3,e32,m1,ta,ma
      ...
      22       vwaddu.vv v1,v2,v3
      ...
      25       bne a3,zero,.L3
    
    After this patch:
      11       beq a3,zero,.L8
      ...
      14    .L3:
      15       vsetvli a5,a3,e32,m1,ta,ma
      ...
      20       vwaddu.vx v1,a2,v3
      ...
      23       bne a3,zero,.L3
    
    The pattern of this patch only works on DImode, aka below pattern.
    v1:RVVM1DImode = (zero_extend:RVVM1DImode v2:RVVM1SImode)
      + (vec_dup:RVVM1DImode (zero_extend:DImode x2:SImode));
    
    Unfortunately, for uint16_t to uint32_t or uint8_t to uint16_t, we loss
    this extend op after expand.
    
    For uint16_t => uint32_t we have:
    (set (reg:SI 149) (subreg/s/v:SI (reg/v:DI 146 [ rs1 ]) 0))
    
    For uint32_t => uint64_t we have:
    (set (reg:DI 148 [ _6 ])
         (zero_extend:DI (subreg/s/u:SI (reg/v:DI 146 [ rs1 ]) 0)))
    
    We can see there is no zero_extend for uint16_t to uint32_t, and we
    cannot hit the pattern above.  So the combine will try below pattern
    for uint16_t to uint32_t.
    
    v1:RVVM1SImode = (zero_extend:RVVM1SImode v2:RVVM1HImode)
      + (vec_dup:RVVM1SImode (subreg:SIMode (:DImode x2:SImode)))
    
    But it cannot match the vwaddu sematics, thus we need another handing
    for the vwaddu.vv for uint16_t to uint32_t, as well as the uint8_t to
    uint16_t.
    
    gcc/ChangeLog:
    
            * config/riscv/autovec-opt.md 
(*widen_first_<any_extend:su>_vx_<mode>):
            Add helper bridge pattern for vwaddu.vx combine.
            (*widen_<any_widen_binop:optab>_<any_extend:su>_vx_<mode>): Add
            new pattern to match vwaddu.vx combine.
            * config/riscv/iterators.md: Add code attr to get extend CODE.
            * config/riscv/vector-iterators.md: Add Dmode iterator for
            widen.
    
    Signed-off-by: Pan Li <pan2...@intel.com>
    (cherry picked from commit 638320686c462e7fcdd4e7b3d49d090ac480bd18)

Diff:
---
 gcc/config/riscv/autovec-opt.md      | 42 ++++++++++++++++++++++++++++++++++++
 gcc/config/riscv/iterators.md        |  3 +++
 gcc/config/riscv/vector-iterators.md | 16 ++++++++++++++
 3 files changed, 61 insertions(+)

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 315cd1d2ad50..a5eb49cc81ed 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1868,6 +1868,48 @@
   }
   [(set_attr "type" "vimuladd")])
 
+(define_insn_and_split "*widen_first_<any_extend:su>_vx_<mode>"
+ [(set (match_operand:VWEXTI_D     0 "register_operand")
+       (vec_duplicate:VWEXTI_D
+        (any_extend:<VEL>
+          (match_operand:<VSUBEL> 1 "register_operand"))))]
+  "TARGET_VECTOR && TARGET_64BIT && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    rtx extend_scalar = gen_reg_rtx (<VEL>mode);
+    emit_insn (gen_<any_extend:extend_name><vsubel><vel>2 (extend_scalar,
+                                                          operands[1]));
+
+    insn_code icode = code_for_pred_broadcast (<MODE>mode);
+    rtx vec_dup_ops[] = {operands[0], extend_scalar};
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP, vec_dup_ops);
+
+    DONE;
+  })
+
+(define_insn_and_split 
"*widen_<any_widen_binop:optab>_<any_extend:su>_vx_<mode>"
+ [(set (match_operand:VWEXTI_D             0 "register_operand")
+       (any_widen_binop:VWEXTI_D
+        (any_extend:VWEXTI_D
+          (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand"))
+        (vec_duplicate:VWEXTI_D
+          (any_extend:<VEL>
+            (match_operand:<VSUBEL>       2 "register_operand")))))]
+  "TARGET_VECTOR && TARGET_64BIT && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    insn_code icode = code_for_pred_dual_widen_scalar (<any_widen_binop:CODE>,
+                                                      <any_extend:CODE>,
+                                                      <MODE>mode);
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands);
+
+    DONE;
+  }
+  [(set_attr "type" "viwalu")])
 
 ;; 
=============================================================================
 ;; Combine vec_duplicate + op.vv to op.vf
diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index cd8fd7a2b252..35de17f76cd9 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -207,6 +207,9 @@
 ;; This code iterator allows signed and unsigned widening multiplications
 ;; to use the same template.
 (define_code_iterator any_extend [sign_extend zero_extend])
+(define_code_attr extend_name [
+  (sign_extend "extend") (zero_extend "zero_extend")
+])
 
 ;; These code iterators allow unsigned and signed extraction to be generated
 ;; from the same template.
diff --git a/gcc/config/riscv/vector-iterators.md 
b/gcc/config/riscv/vector-iterators.md
index ed7e9c3a9516..45af65642cd9 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -943,6 +943,22 @@
   (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64")
 ])
 
+(define_mode_iterator VWEXTI_D [
+  (RVVM8DI "TARGET_VECTOR_ELEN_64") (RVVM4DI "TARGET_VECTOR_ELEN_64")
+  (RVVM2DI "TARGET_VECTOR_ELEN_64") (RVVM1DI "TARGET_VECTOR_ELEN_64")
+
+  (V1DI "riscv_vector::vls_mode_valid_p (V1DImode) && TARGET_VECTOR_ELEN_64")
+  (V2DI "riscv_vector::vls_mode_valid_p (V2DImode) && TARGET_VECTOR_ELEN_64")
+  (V4DI "riscv_vector::vls_mode_valid_p (V4DImode) && TARGET_VECTOR_ELEN_64")
+  (V8DI "riscv_vector::vls_mode_valid_p (V8DImode) && TARGET_VECTOR_ELEN_64 && 
TARGET_MIN_VLEN >= 64")
+  (V16DI "riscv_vector::vls_mode_valid_p (V16DImode) && TARGET_VECTOR_ELEN_64 
&& TARGET_MIN_VLEN >= 128")
+  (V32DI "riscv_vector::vls_mode_valid_p (V32DImode) && TARGET_VECTOR_ELEN_64 
&& TARGET_MIN_VLEN >= 256")
+  (V64DI "riscv_vector::vls_mode_valid_p (V64DImode) && TARGET_VECTOR_ELEN_64 
&& TARGET_MIN_VLEN >= 512")
+  (V128DI "riscv_vector::vls_mode_valid_p (V128DImode) && 
TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 1024")
+  (V256DI "riscv_vector::vls_mode_valid_p (V256DImode) && 
TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 2048")
+  (V512DI "riscv_vector::vls_mode_valid_p (V512DImode) && 
TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 4096")
+])
+
 (define_mode_iterator VWEXTI [
   RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_VECTOR_ELEN_64")

Reply via email to