Changes since v1:
 - Fix snafu in test.

This improves codegen for x264 sum of absolute difference routines.
The insn count is same, but we avoid double widening ops and ensuing
whole register moves.

Also for more general applicability, we chose to implement abs diff
vs. the sum of abs diff variant.

Suggested-by: Robin Dapp <rd...@ventanamicro.com>
Co-developed-by: Pan Li <pan2...@intel.com>
Signed-off-by: Vineet Gupta <vine...@rivosinc.com>

        PR target/117722

gcc/ChangeLog:
        * config/riscv/autovec.md: Add uabd expander.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/autovec/pr117722.c: New test.

Signed-off-by: Vineet Gupta <vine...@rivosinc.com>
---
 gcc/config/riscv/autovec.md                   | 26 +++++++++++++++++++
 .../gcc.target/riscv/rvv/autovec/pr117722.c   | 23 ++++++++++++++++
 2 files changed, 49 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr117722.c

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 2529dc77f221..4678906fb918 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2928,3 +2928,29 @@
     riscv_vector::expand_strided_store (<MODE>mode, operands);
     DONE;
   })
+
+; ========
+; == Absolute difference (not including sum)
+; ========
+(define_expand "uabd<mode>3"
+  [(match_operand:V_VLSI 0 "register_operand")
+   (match_operand:V_VLSI 1 "register_operand")
+   (match_operand:V_VLSI 2 "register_operand")]
+  "TARGET_VECTOR"
+  {
+    rtx max = gen_reg_rtx (<MODE>mode);
+    insn_code icode = code_for_pred (UMAX, <MODE>mode);
+    rtx ops1[] = {max, operands[1], operands[2]};
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops1);
+
+    rtx min = gen_reg_rtx (<MODE>mode);
+    icode = code_for_pred (UMIN, <MODE>mode);
+    rtx ops2[] = {min, operands[1], operands[2]};
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops2);
+
+    icode = code_for_pred (MINUS, <MODE>mode);
+    rtx ops3[] = {operands[0], max, min};
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops3);
+
+    DONE;
+  });
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr117722.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr117722.c
new file mode 100644
index 000000000000..b675930818e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr117722.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -O2" } */
+
+/* Generate sum of absolute difference as sub (max, min).
+   This helps with x264 sad routines.  */
+
+inline int abs(int i)
+{
+  return (i < 0 ? -i : i);
+}
+
+int pixel_sad_n(unsigned char *pix1, unsigned char *pix2, int n)
+{
+  int sum = 0;
+  for( int i = 0; i < n; i++ )
+       sum += abs(pix1[i] - pix2[i]);
+
+  return sum;
+}
+
+/* { dg-final { scan-assembler {vmin\.v} } } */
+/* { dg-final { scan-assembler {vmax\.v} } } */
+/* { dg-final { scan-assembler {vsub\.v} } } */
-- 
2.43.0

Reply via email to