This patch add the vector rotate shift pattern for auto-vect. With this patch, the scalar rotate shift can be automatically vectorized into vector rotate shift.
gcc/ChangeLog: * config/riscv/autovec.md (v<bitmanip_optab><mode>3): Add new define_expand pattern for vector rotate shift. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vrolr-1.c: New test. * gcc.target/riscv/rvv/autovec/binop/vrolr-run.c: New test. * gcc.target/riscv/rvv/autovec/binop/vrolr-template.h: New test. Signed-off-by: Feng Wang <wangf...@eswincomputing.com> --- gcc/config/riscv/autovec.md | 16 ++++ .../riscv/rvv/autovec/binop/vrolr-1.c | 9 ++ .../riscv/rvv/autovec/binop/vrolr-run.c | 88 +++++++++++++++++++ .../riscv/rvv/autovec/binop/vrolr-template.h | 29 ++++++ 4 files changed, 142 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-run.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-template.h diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index 0423d7bee13..e6649bf3f75 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -2764,3 +2764,19 @@ operands[2] = const0_rtx; } ) + +;; ------------------------------------------------------------------------- +;; - vrol.vv vror.vv +;; ------------------------------------------------------------------------- +(define_expand "v<bitmanip_optab><mode>3" + [(set (match_operand:VI 0 "register_operand") + (bitmanip_rotate:VI + (match_operand:VI 1 "register_operand") + (match_operand:VI 2 "register_operand")))] + "TARGET_ZVBB || TARGET_ZVKB" + { + riscv_vector::emit_vlmax_insn (code_for_pred_v (<CODE>, <MODE>mode), + riscv_vector::BINARY_OP, operands); + DONE; + } +) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-1.c new file mode 100644 index 00000000000..55dac27697c --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-1.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-add-options "riscv_v" } */ +/* { dg-add-options "riscv_zvbb" } */ +/* { dg-additional-options "-std=c99 -fno-vect-cost-model" } */ + +#include "vrolr-template.h" + +/* { dg-final { scan-assembler-times {\tvrol\.vv} 4 } } */ +/* { dg-final { scan-assembler-times {\tvror\.vv} 4 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-run.c new file mode 100644 index 00000000000..b659a0804f5 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-run.c @@ -0,0 +1,88 @@ +/* { dg-do run } */ +/* { dg-require-effective-target "riscv_zvbb_ok" } */ +/* { dg-add-options "riscv_v" } */ +/* { dg-add-options "riscv_zvbb" } */ +/* { dg-additional-options "-std=c99 -fno-vect-cost-model" } */ + +#include <stdint-gcc.h> +#include <assert.h> + +#include <stdio.h> +#include <stdint.h> +#include <assert.h> + +#define ARRAY_SIZE 512 + +#define CIRCULAR_LEFT_SHIFT_ARRAY(arr, shifts, bit_size, size) \ + for (int i = 0; i < size; i++) { \ + (arr)[i] = (((arr)[i] << (shifts)[i]) | ((arr)[i] >> (bit_size - (shifts)[i]))); \ + } + +#define CIRCULAR_RIGHT_SHIFT_ARRAY(arr, shifts, bit_size, size) \ + for (int i = 0; i < size; i++) { \ + (arr)[i] = (((arr)[i] >> (shifts)[i]) | ((arr)[i] << (bit_size - (shifts)[i]))); \ + } + +void __attribute__((optimize("no-tree-vectorize"))) compare_results8( + uint8_t *result_left, uint8_t *result_right, + int bit_size, uint8_t *shift_values) +{ + for (int i = 0; i < ARRAY_SIZE; i++) { + assert(result_left[i] == (i << shift_values[i]) | (i >> (bit_size - shift_values[i]))); + assert(result_right[i] == (i >> shift_values[i]) | (i << (bit_size - shift_values[i]))); + } +} + +void __attribute__((optimize("no-tree-vectorize"))) compare_results16( + uint16_t *result_left, uint16_t *result_right, + int bit_size, uint16_t *shift_values) +{ + for (int i = 0; i < ARRAY_SIZE; i++) { + assert(result_left[i] == (i << shift_values[i]) | (i >> (bit_size - shift_values[i]))); + assert(result_right[i] == (i >> shift_values[i]) | (i << (bit_size - shift_values[i]))); + } +} + +void __attribute__((optimize("no-tree-vectorize"))) compare_results32( + uint32_t *result_left, uint32_t *result_right, + int bit_size, uint32_t *shift_values) +{ + for (int i = 0; i < ARRAY_SIZE; i++) { + assert(result_left[i] == (i << shift_values[i]) | (i >> (bit_size - shift_values[i]))); + assert(result_right[i] == (i >> shift_values[i]) | (i << (bit_size - shift_values[i]))); + } +} + +void __attribute__((optimize("no-tree-vectorize"))) compare_results64( + uint64_t *result_left, uint64_t *result_right, + int bit_size, uint64_t *shift_values) +{ + for (int i = 0; i < ARRAY_SIZE; i++) { + assert(result_left[i] == ((uint64_t)i << shift_values[i]) | ((uint64_t)i >> (bit_size - shift_values[i]))); + assert(result_right[i] == ((uint64_t)i >> shift_values[i]) | ((uint64_t)i << (bit_size - shift_values[i]))); + } +} + +#define TEST_SHIFT_OPERATIONS(TYPE, bit_size) \ + TYPE shift_val##bit_size[ARRAY_SIZE];\ + TYPE result_left##bit_size[ARRAY_SIZE];\ + TYPE result_right##bit_size[ARRAY_SIZE];\ + do { \ + for (int i = 0; i < ARRAY_SIZE; i++) { \ + result_left##bit_size[i] = i;\ + result_right##bit_size[i] = i;\ + shift_val##bit_size[i] = i % bit_size; \ + } \ + CIRCULAR_LEFT_SHIFT_ARRAY(result_left##bit_size, shift_val##bit_size, bit_size, ARRAY_SIZE)\ + CIRCULAR_RIGHT_SHIFT_ARRAY(result_right##bit_size, shift_val##bit_size, bit_size, ARRAY_SIZE)\ + compare_results##bit_size(result_left##bit_size, result_right##bit_size, bit_size, shift_val##bit_size); \ + } while(0) + + +int main() { + TEST_SHIFT_OPERATIONS(uint8_t, 8); + TEST_SHIFT_OPERATIONS(uint16_t, 16); + TEST_SHIFT_OPERATIONS(uint32_t, 32); + TEST_SHIFT_OPERATIONS(uint64_t, 64); + return 0; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-template.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-template.h new file mode 100644 index 00000000000..3db0d8643a8 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-template.h @@ -0,0 +1,29 @@ +#include <stdint-gcc.h> + +#define VROL_VV(SEW, S, T) \ +__attribute__ ((noipa))\ +void autovect_vrol_vv_##S##SEW (T *out, T *op1, T *op2, int n){\ + for(int i=0; i<n; i++){ \ + op2[i] = op2[i] & (SEW-1);\ + out[i]= (op1[i] << op2[i]) | (op1[i] >> (SEW - op2[i]));\ + }\ +} + +#define VROR_VV(SEW, S, T) \ +__attribute__ ((noipa))\ +void autovect_vror_vv_##S##SEW (T *out, T *op1, T *op2, int n){\ + for(int i=0; i<n; i++){ \ + op2[i] = op2[i] & (SEW-1);\ + out[i]= (op1[i] >> op2[i]) | (op1[i] << (SEW - op2[i]));\ + }\ +} + +VROL_VV(8, u, uint8_t) +VROL_VV(16, u, uint16_t) +VROL_VV(32, u, uint32_t) +VROL_VV(64, u, uint64_t) + +VROR_VV(8, u, uint8_t) +VROR_VV(16, u, uint16_t) +VROR_VV(32, u, uint32_t) +VROR_VV(64, u, uint64_t) -- 2.17.1