Hi Segher, Sorry for the late, I've addressed your comments in the attached patch. Some points: 1) Remove explict AND part. 2) Rename predicate name to vint_reg_or_vint_const. 3) Split test cases into altivec and power8.
As to the predicate name and usage, I checked the current vector shifts, they don't need to check const_vector specially (like right to left conversion), excepting for the one "vec_shr_<mode>", but it checks for scalar const int. Btw, I've changed the + rtx imm_vec = + simplify_const_unary_operation back to + rtx imm_vec + = simplify_const_unary_operation Otherwise check_GNU_style will report "Trailing operator" error. :( Bootstrapped and regtested on powerpc64le-unknown-linux-gnu. Thanks, Kewen ------------ gcc/ChangeLog 2019-08-02 Kewen Lin <li...@gcc.gnu.org> * config/rs6000/predicates.md (vint_reg_or_vint_const): New predicate. * config/rs6000/vector.md (vrotr<mode>3): New define_expand. gcc/testsuite/ChangeLog 2019-08-02 Kewen Lin <li...@gcc.gnu.org> * gcc.target/powerpc/vec_rotate-1.c: New test. * gcc.target/powerpc/vec_rotate-2.c: New test. * gcc.target/powerpc/vec_rotate-3.c: New test. * gcc.target/powerpc/vec_rotate-4.c: New test.
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 8ca98299950..faf057425a8 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -163,6 +163,17 @@ return VINT_REGNO_P (REGNO (op)); }) +;; Return 1 if op is a vector register that operates on integer vectors +;; or if op is a const vector with integer vector modes. +(define_predicate "vint_reg_or_vint_const" + (match_code "reg,subreg,const_vector") +{ + if (GET_CODE (op) == CONST_VECTOR && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) + return 1; + + return vint_operand (op, mode); +}) + ;; Return 1 if op is a vector register to do logical operations on (and, or, ;; xor, etc.) (define_predicate "vlogical_operand" diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index 70bcfe02e22..3111ca9029f 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -1260,6 +1260,33 @@ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") +;; Expanders for rotatert to make use of vrotl +(define_expand "vrotr<mode>3" + [(set (match_operand:VEC_I 0 "vint_operand") + (rotatert:VEC_I (match_operand:VEC_I 1 "vint_operand") + (match_operand:VEC_I 2 "vint_reg_or_vint_const")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + rtx rot_count = gen_reg_rtx (<MODE>mode); + if (GET_CODE (operands[2]) == CONST_VECTOR) + { + machine_mode inner_mode = GET_MODE_INNER (<MODE>mode); + unsigned int bits = GET_MODE_PRECISION (inner_mode); + rtx mask_vec = gen_const_vec_duplicate (<MODE>mode, GEN_INT (bits - 1)); + rtx imm_vec + = simplify_const_unary_operation (NEG, <MODE>mode, operands[2], + GET_MODE (operands[2])); + imm_vec + = simplify_const_binary_operation (AND, <MODE>mode, imm_vec, mask_vec); + rot_count = force_reg (<MODE>mode, imm_vec); + } + else + emit_insn (gen_neg<mode>2 (rot_count, operands[2])); + + emit_insn (gen_vrotl<mode>3 (operands[0], operands[1], rot_count)); + DONE; +}) + ;; Expanders for arithmetic shift left on each vector element (define_expand "vashl<mode>3" [(set (match_operand:VEC_I 0 "vint_operand") diff --git a/gcc/testsuite/gcc.target/powerpc/vec_rotate-1.c b/gcc/testsuite/gcc.target/powerpc/vec_rotate-1.c new file mode 100644 index 00000000000..f035a578292 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec_rotate-1.c @@ -0,0 +1,39 @@ +/* { dg-options "-O3" } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ + +/* Check vectorizer can exploit vector rotation instructions on Power, mainly + for the case rotation count is const number. + + Check for instructions vrlb/vrlh/vrlw only available if altivec supported. */ + +#define N 256 +unsigned int suw[N], ruw[N]; +unsigned short suh[N], ruh[N]; +unsigned char sub[N], rub[N]; + +void +testUW () +{ + for (int i = 0; i < 256; ++i) + ruw[i] = (suw[i] >> 8) | (suw[i] << (sizeof (suw[0]) * 8 - 8)); +} + +void +testUH () +{ + for (int i = 0; i < 256; ++i) + ruh[i] = (unsigned short) (suh[i] >> 9) + | (unsigned short) (suh[i] << (sizeof (suh[0]) * 8 - 9)); +} + +void +testUB () +{ + for (int i = 0; i < 256; ++i) + rub[i] = (unsigned char) (sub[i] >> 5) + | (unsigned char) (sub[i] << (sizeof (sub[0]) * 8 - 5)); +} + +/* { dg-final { scan-assembler {\mvrlw\M} } } */ +/* { dg-final { scan-assembler {\mvrlh\M} } } */ +/* { dg-final { scan-assembler {\mvrlb\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec_rotate-2.c b/gcc/testsuite/gcc.target/powerpc/vec_rotate-2.c new file mode 100644 index 00000000000..0a2a965ddcb --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec_rotate-2.c @@ -0,0 +1,19 @@ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-O3 -mdejagnu-cpu=power8" } */ + +/* Check vectorizer can exploit vector rotation instructions on Power8, mainly + for the case rotation count is const number. + + Check for vrld which is available on Power8 and above. */ + +#define N 256 +unsigned long long sud[N], rud[N]; + +void +testULL () +{ + for (int i = 0; i < 256; ++i) + rud[i] = (sud[i] >> 8) | (sud[i] << (sizeof (sud[0]) * 8 - 8)); +} + +/* { dg-final { scan-assembler {\mvrld\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec_rotate-3.c b/gcc/testsuite/gcc.target/powerpc/vec_rotate-3.c new file mode 100644 index 00000000000..5e90ae6fd63 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec_rotate-3.c @@ -0,0 +1,40 @@ +/* { dg-options "-O3" } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ + +/* Check vectorizer can exploit vector rotation instructions on Power, mainly + for the case rotation count isn't const number. + + Check for instructions vrlb/vrlh/vrlw only available if altivec supported. */ + +#define N 256 +unsigned int suw[N], ruw[N]; +unsigned short suh[N], ruh[N]; +unsigned char sub[N], rub[N]; +extern unsigned char rot_cnt; + +void +testUW () +{ + for (int i = 0; i < 256; ++i) + ruw[i] = (suw[i] >> rot_cnt) | (suw[i] << (sizeof (suw[0]) * 8 - rot_cnt)); +} + +void +testUH () +{ + for (int i = 0; i < 256; ++i) + ruh[i] = (unsigned short) (suh[i] >> rot_cnt) + | (unsigned short) (suh[i] << (sizeof (suh[0]) * 8 - rot_cnt)); +} + +void +testUB () +{ + for (int i = 0; i < 256; ++i) + rub[i] = (unsigned char) (sub[i] >> rot_cnt) + | (unsigned char) (sub[i] << (sizeof (sub[0]) * 8 - rot_cnt)); +} + +/* { dg-final { scan-assembler {\mvrlw\M} } } */ +/* { dg-final { scan-assembler {\mvrlh\M} } } */ +/* { dg-final { scan-assembler {\mvrlb\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec_rotate-4.c b/gcc/testsuite/gcc.target/powerpc/vec_rotate-4.c new file mode 100644 index 00000000000..0d3e8378ed6 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec_rotate-4.c @@ -0,0 +1,20 @@ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-O3 -mdejagnu-cpu=power8" } */ + +/* Check vectorizer can exploit vector rotation instructions on Power8, mainly + for the case rotation count isn't const number. + + Check for vrld which is available on Power8 and above. */ + +#define N 256 +unsigned long long sud[N], rud[N]; +extern unsigned char rot_cnt; + +void +testULL () +{ + for (int i = 0; i < 256; ++i) + rud[i] = (sud[i] >> rot_cnt) | (sud[i] << (sizeof (sud[0]) * 8 - rot_cnt)); +} + +/* { dg-final { scan-assembler {\mvrld\M} } } */