From: Pan Li <pan2...@intel.com>

This patch would like to combine the vec_duplicate + vadd.vv to the
vadd.vx.  From example as below code.  The related pattern will depend
on the cost of vec_duplicate from GR2VR, it will:

* The pattern matching will be inactive if GR2VR cost is zero.
* The cost of GR2VR will be added to the total cost of pattern, and
  the late-combine will decide to perform the replacement or not
  based on the cost value.

Assume we have example code like below, GR2VR cost is 2 by default.

  #define DEF_VX_BINARY(T, OP)                                        \
  void                                                                \
  test_vx_binary (T * restrict out, T * restrict in, T x, unsigned n) \
  {                                                                   \
    for (unsigned i = 0; i < n; i++)                                  \
      out[i] = in[i] OP x;                                            \
  }

  DEF_VX_BINARY(int32_t, +)

Before this patch:
  10   │ test_binary_vx_add:
  11   │     beq a3,zero,.L8
  12   │     vsetvli a5,zero,e32,m1,ta,ma // eliminated if GR2VR cost non-zero
  13   │     vmv.v.x v2,a2                // Ditto.
  14   │     slli    a3,a3,32
  15   │     srli    a3,a3,32
  16   │ .L3:
  17   │     vsetvli a5,a3,e32,m1,ta,ma
  18   │     vle32.v v1,0(a1)
  19   │     slli    a4,a5,2
  20   │     sub a3,a3,a5
  21   │     add a1,a1,a4
  22   │     vadd.vv v1,v2,v1
  23   │     vse32.v v1,0(a0)
  24   │     add a0,a0,a4
  25   │     bne a3,zero,.L3

After this patch:
  10   │ test_binary_vx_add:
  11   │     beq a3,zero,.L8
  12   │     slli    a3,a3,32
  13   │     srli    a3,a3,32
  14   │ .L3:
  15   │     vsetvli a5,a3,e32,m1,ta,ma
  16   │     vle32.v v1,0(a1)
  17   │     slli    a4,a5,2
  18   │     sub a3,a3,a5
  19   │     add a1,a1,a4
  20   │     vadd.vx v1,v1,a2
  21   │     vse32.v v1,0(a0)
  22   │     add a0,a0,a4
  23   │     bne a3,zero,.L3

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

gcc/ChangeLog:

        * config/riscv/autovec-opt.md (*<optab>_vx_<mode>): Add new
        combine to convert vec_duplicate + vadd.vv to vaddvx on GR2VR
        cost.
        * config/riscv/riscv.cc (riscv_rtx_costs): Extract vector
        cost into a separated func.
        (riscv_vector_rtx_costs): Add new func to take care of the
        cost of vector rtx, default to 1 and append GR2VR cost to
        vec_duplicate rtx.
        * config/riscv/vector-iterators.md: Add new iterator for vx.

Signed-off-by: Pan Li <pan2...@intel.com>
---
 gcc/config/riscv/autovec-opt.md      | 22 ++++++++++++++++++++++
 gcc/config/riscv/riscv.cc            | 26 ++++++++++++++++++++------
 gcc/config/riscv/vector-iterators.md |  4 ++++
 3 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 0c3b0cc7e05..1bc3985f1a3 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1673,3 +1673,25 @@ (define_insn_and_split "*vandn_<mode>"
     DONE;
   }
   [(set_attr "type" "vandn")])
+
+;; 
=============================================================================
+;; Combine vec_duplicate + op.vv to op.vx
+;; Include
+;; - vadd.vx
+;; 
=============================================================================
+(define_insn_and_split "*<optab>_vx_<mode>"
+ [(set (match_operand:V_VLSI    0 "register_operand")
+       (any_int_binop_no_shift_vx:V_VLSI
+        (vec_duplicate:V_VLSI
+          (match_operand:<VEL> 1 "register_operand"))
+        (match_operand:V_VLSI  2 "<binop_rhs2_predicate>")))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    rtx ops[] = {operands[0], operands[2], operands[1]};
+    riscv_vector::emit_vlmax_insn (code_for_pred_scalar (<CODE>, <MODE>mode),
+                                  riscv_vector::BINARY_OP, ops);
+  }
+  [(set_attr "type" "vialu")])
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index d3656a7a430..31e9b06568a 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3856,16 +3856,30 @@ riscv_extend_cost (rtx op, bool unsigned_p)
 #define SINGLE_SHIFT_COST 1
 
 static bool
-riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno 
ATTRIBUTE_UNUSED,
-                int *total, bool speed)
+riscv_vector_rtx_costs (rtx x, machine_mode mode, int *total)
 {
+  gcc_assert (riscv_v_ext_mode_p (mode));
+
   /* TODO: We set RVV instruction cost as 1 by default.
      Cost Model need to be well analyzed and supported in the future. */
+  int cost_val = 1;
+  enum rtx_code rcode = GET_CODE (x);
+
+  /* Aka (vec_duplicate:RVVM1DI (reg/v:DI 143 [ x ]))  */
+  if (rcode == VEC_DUPLICATE && SCALAR_INT_MODE_P (GET_MODE (XEXP (x, 0))))
+    cost_val += get_vector_costs ()->regmove->GR2VR;
+
+  *total = COSTS_N_INSNS (cost_val);
+
+  return true;
+}
+
+static bool
+riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno 
ATTRIBUTE_UNUSED,
+                int *total, bool speed)
+{
   if (riscv_v_ext_mode_p (mode))
-    {
-      *total = COSTS_N_INSNS (1);
-      return true;
-    }
+    return riscv_vector_rtx_costs (x, mode, total);
 
   bool float_mode_p = FLOAT_MODE_P (mode);
   int cost;
diff --git a/gcc/config/riscv/vector-iterators.md 
b/gcc/config/riscv/vector-iterators.md
index f8da71b1d65..b5fc833f1d5 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -4052,6 +4052,10 @@ (define_code_iterator any_int_binop_no_shift
  [plus minus and ior xor smax umax smin umin mult div udiv mod umod
 ])
 
+(define_code_iterator any_int_binop_no_shift_vx
+ [plus
+])
+
 (define_code_iterator any_sat_int_binop [ss_plus ss_minus us_plus us_minus])
 (define_code_iterator sat_int_plus_binop [ss_plus us_plus])
 (define_code_iterator sat_int_minus_binop [ss_minus us_minus])
-- 
2.43.0

Reply via email to