This patch add the vector rotate shift pattern for auto-vect.
With this patch, the scalar rotate shift can be automatically
vectorized into vector rotate shift.

gcc/ChangeLog:

        * config/riscv/autovec.md (v<bitmanip_optab><mode>3):
        Add new define_expand pattern for vector rotate shift.
gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/autovec/binop/vrolr-1.c: New test.
        * gcc.target/riscv/rvv/autovec/binop/vrolr-run.c: New test.
        * gcc.target/riscv/rvv/autovec/binop/vrolr-template.h: New test.

Signed-off-by: Feng Wang <wangf...@eswincomputing.com>
---
 gcc/config/riscv/autovec.md                   | 16 ++++
 .../riscv/rvv/autovec/binop/vrolr-1.c         |  9 ++
 .../riscv/rvv/autovec/binop/vrolr-run.c       | 88 +++++++++++++++++++
 .../riscv/rvv/autovec/binop/vrolr-template.h  | 29 ++++++
 4 files changed, 142 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-run.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-template.h

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 0423d7bee13..e6649bf3f75 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2764,3 +2764,19 @@
     operands[2] = const0_rtx;
   }
 )
+
+;; -------------------------------------------------------------------------
+;; - vrol.vv vror.vv
+;; -------------------------------------------------------------------------
+(define_expand "v<bitmanip_optab><mode>3"
+  [(set (match_operand:VI 0 "register_operand")
+        (bitmanip_rotate:VI
+         (match_operand:VI 1 "register_operand")
+         (match_operand:VI 2 "register_operand")))]
+  "TARGET_ZVBB || TARGET_ZVKB"
+  {
+    riscv_vector::emit_vlmax_insn (code_for_pred_v (<CODE>, <MODE>mode),
+                                  riscv_vector::BINARY_OP, operands);
+    DONE;
+  }
+)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-1.c
new file mode 100644
index 00000000000..55dac27697c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-add-options "riscv_v" } */
+/* { dg-add-options "riscv_zvbb" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model" } */
+
+#include "vrolr-template.h"
+
+/* { dg-final { scan-assembler-times {\tvrol\.vv} 4 } } */
+/* { dg-final { scan-assembler-times {\tvror\.vv} 4 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-run.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-run.c
new file mode 100644
index 00000000000..b659a0804f5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-run.c
@@ -0,0 +1,88 @@
+/* { dg-do run } */
+/* { dg-require-effective-target "riscv_zvbb_ok" } */
+/* { dg-add-options "riscv_v" } */
+/* { dg-add-options "riscv_zvbb" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model" } */
+
+#include <stdint-gcc.h>
+#include <assert.h>
+
+#include <stdio.h>
+#include <stdint.h>
+#include <assert.h>
+
+#define ARRAY_SIZE 512
+
+#define CIRCULAR_LEFT_SHIFT_ARRAY(arr, shifts, bit_size, size) \
+    for (int i = 0; i < size; i++) { \
+        (arr)[i] = (((arr)[i] << (shifts)[i]) | ((arr)[i] >> (bit_size - 
(shifts)[i]))); \
+    }
+
+#define CIRCULAR_RIGHT_SHIFT_ARRAY(arr, shifts, bit_size, size) \
+    for (int i = 0; i < size; i++) { \
+        (arr)[i] = (((arr)[i] >> (shifts)[i]) | ((arr)[i] << (bit_size - 
(shifts)[i]))); \
+    }
+
+void __attribute__((optimize("no-tree-vectorize"))) compare_results8(
+    uint8_t *result_left, uint8_t *result_right,
+    int bit_size, uint8_t *shift_values)
+{
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+        assert(result_left[i] == (i << shift_values[i]) | (i >> (bit_size - 
shift_values[i])));
+        assert(result_right[i] == (i >> shift_values[i]) | (i << (bit_size - 
shift_values[i])));
+    }
+}
+
+void __attribute__((optimize("no-tree-vectorize"))) compare_results16(
+    uint16_t *result_left, uint16_t *result_right,
+    int bit_size, uint16_t *shift_values)
+{
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+        assert(result_left[i] == (i << shift_values[i]) | (i >> (bit_size - 
shift_values[i])));
+        assert(result_right[i] == (i >> shift_values[i]) | (i << (bit_size - 
shift_values[i])));
+    }
+}
+
+void __attribute__((optimize("no-tree-vectorize"))) compare_results32(
+    uint32_t *result_left, uint32_t *result_right,
+    int bit_size, uint32_t *shift_values)
+{
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+        assert(result_left[i] == (i << shift_values[i]) | (i >> (bit_size - 
shift_values[i])));
+        assert(result_right[i] == (i >> shift_values[i]) | (i << (bit_size - 
shift_values[i])));
+    }
+}
+
+void __attribute__((optimize("no-tree-vectorize"))) compare_results64(
+    uint64_t *result_left, uint64_t *result_right,
+    int bit_size, uint64_t *shift_values)
+{
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+        assert(result_left[i] == ((uint64_t)i << shift_values[i]) | 
((uint64_t)i >> (bit_size - shift_values[i])));
+        assert(result_right[i] == ((uint64_t)i >> shift_values[i]) | 
((uint64_t)i << (bit_size - shift_values[i])));
+    }
+}
+
+#define TEST_SHIFT_OPERATIONS(TYPE, bit_size) \
+    TYPE shift_val##bit_size[ARRAY_SIZE];\
+    TYPE result_left##bit_size[ARRAY_SIZE];\
+    TYPE result_right##bit_size[ARRAY_SIZE];\
+    do { \
+        for (int i = 0; i < ARRAY_SIZE; i++) { \
+           result_left##bit_size[i] = i;\
+           result_right##bit_size[i] = i;\
+            shift_val##bit_size[i] = i % bit_size; \
+        } \
+       CIRCULAR_LEFT_SHIFT_ARRAY(result_left##bit_size, shift_val##bit_size, 
bit_size, ARRAY_SIZE)\
+       CIRCULAR_RIGHT_SHIFT_ARRAY(result_right##bit_size, shift_val##bit_size, 
bit_size, ARRAY_SIZE)\
+        compare_results##bit_size(result_left##bit_size, 
result_right##bit_size, bit_size, shift_val##bit_size); \
+    } while(0)
+
+
+int main() {
+    TEST_SHIFT_OPERATIONS(uint8_t, 8);
+    TEST_SHIFT_OPERATIONS(uint16_t, 16);
+    TEST_SHIFT_OPERATIONS(uint32_t, 32);
+    TEST_SHIFT_OPERATIONS(uint64_t, 64);
+    return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-template.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-template.h
new file mode 100644
index 00000000000..3db0d8643a8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-template.h
@@ -0,0 +1,29 @@
+#include <stdint-gcc.h>
+
+#define VROL_VV(SEW, S, T) \
+__attribute__ ((noipa))\
+void autovect_vrol_vv_##S##SEW (T *out,  T *op1, T *op2, int n){\
+    for(int i=0; i<n; i++){ \
+        op2[i] = op2[i] & (SEW-1);\
+        out[i]= (op1[i] << op2[i]) | (op1[i] >> (SEW - op2[i]));\
+    }\
+}
+
+#define VROR_VV(SEW, S, T) \
+__attribute__ ((noipa))\
+void autovect_vror_vv_##S##SEW (T *out,  T *op1, T *op2, int n){\
+    for(int i=0; i<n; i++){ \
+        op2[i] = op2[i] & (SEW-1);\
+        out[i]= (op1[i] >> op2[i]) | (op1[i] << (SEW - op2[i]));\
+    }\
+}
+
+VROL_VV(8,  u, uint8_t)
+VROL_VV(16, u, uint16_t)
+VROL_VV(32, u, uint32_t)
+VROL_VV(64, u, uint64_t)
+
+VROR_VV(8,  u, uint8_t)
+VROR_VV(16, u, uint16_t)
+VROR_VV(32, u, uint32_t)
+VROR_VV(64, u, uint64_t)
-- 
2.17.1

Reply via email to