Hi Segher, Thanks for the clarification!
Compared with the previous one, this add vrl<mode>_and define_insn(s) for explicit AND (truncation) as Jakub's suggestion. Bootstrapped and regtested on powerpc64le-unknown-linux-gnu, is it ok for trunk? Thanks, Kewen ---------------- gcc/ChangeLog 2019-07-23 Kewen Lin <li...@gcc.gnu.org> * config/rs6000/predicates.md (vint_reg_or_const_vector): New predicate. * config/rs6000/vector.md (vrotr<mode>3): New define_expand. * config/rs6000/altivec.md (vrl<mode>_and): New define_insns. gcc/testsuite/ChangeLog 2019-07-23 Kewen Lin <li...@gcc.gnu.org> * gcc.target/powerpc/vec_rotate-1.c: New test. * gcc.target/powerpc/vec_rotate-2.c: New test. on 2019/7/19 下午11:06, Segher Boessenkool wrote: > Hi! > > On Fri, Jul 19, 2019 at 10:21:06AM +0800, Kewen.Lin wrote: >> on 2019/7/19 上午3:48, Segher Boessenkool wrote: >>> On Thu, Jul 18, 2019 at 01:44:36PM +0800, Kewen.Lin wrote: >>>> on 2019/7/17 下午9:40, Segher Boessenkool wrote: >>>>> On Wed, Jul 17, 2019 at 04:32:15PM +0800, Kewen.Lin wrote: >>>>>> Regression testing just launched, is it OK for trunk if it's bootstrapped >>>>>> and regresstested on powerpc64le-unknown-linux-gnu? >>>>> >>>>>> +;; Expanders for rotatert to make use of vrotl >>>>>> +(define_expand "vrotr<mode>3" >>>>>> + [(set (match_operand:VEC_I 0 "vint_operand") >>>>>> + (rotatert:VEC_I (match_operand:VEC_I 1 "vint_operand") >>>>>> + (match_operand:VEC_I 2 >>>>>> "vint_reg_or_const_vector")))] >>>>> >>>>> Having any rotatert in a define_expand or define_insn will regress. > > This is wrong. I don't know why I thought this for a while. > > There shouldn't be any rotatert in anything that goes through recog, but > that is everything *except* define_expand. So define_insn, define_split, > define_peephole, define_peephole2 (and define_insn_and_split, which is > just syntactic sugar). > >> Thanks for further explanation! Sorry that, but I didn't find this >> HAVE_rotatert definition. I guess it's due to the preparation is always >> "DONE"? Then it doesn't really generate rotatert. > > You only had one in a define_expand. That is fine, that pattern is never > recognised against. HAVE_rotatert means that something somewhere will > recognise rotatert RTL insns; if it isn't set, it doesn't make sense to > ever create them, because they will never match. > >> although I can see rotatert in insn like below, it seems fine in note? > > Sure, many things are allowed in notes that can never show up in RTL > proper. > > So, this approach will work fine, and not be too bad. Could you do a > new patch with it? It's simple to do, and even if the generic thing > will happen eventually, this is a nice stepping stone for that. > > Thanks, and sorry for the confusion, > > > Segher >
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index b6a22d9010c..2b6a957d4a6 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -1666,6 +1666,60 @@ "vrl<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) +;; Here these vrl<VI2>_and are for vrotr<mode>3 expansion. +;; since SHIFT_COUNT_TRUNCATED is set as zero, to append one explicit +;; AND to indicate truncation but emit vrl<VI_char> insn. +(define_insn "vrlv2di_and" + [(set (match_operand:V2DI 0 "register_operand" "=v") + (and:V2DI + (rotate:V2DI (match_operand:V2DI 1 "register_operand" "v") + (match_operand:V2DI 2 "register_operand" "v")) + (const_vector:V2DI [(const_int 63) (const_int 63)])))] + "VECTOR_UNIT_P8_VECTOR_P (V2DImode)" + "vrld %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +(define_insn "vrlv4si_and" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (and:V4SI + (rotate:V4SI (match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")) + (const_vector:V4SI [(const_int 31) (const_int 31) + (const_int 31) (const_int 31)])))] + "VECTOR_UNIT_ALTIVEC_P (V4SImode)" + "vrlw %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +(define_insn "vrlv8hi_and" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (and:V8HI + (rotate:V8HI (match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v")) + (const_vector:V8HI [(const_int 15) (const_int 15) + (const_int 15) (const_int 15) + (const_int 15) (const_int 15) + (const_int 15) (const_int 15)])))] + "VECTOR_UNIT_ALTIVEC_P (V8HImode)" + "vrlh %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +(define_insn "vrlv16qi_and" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (and:V16QI + (rotate:V16QI (match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")) + (const_vector:V16QI [(const_int 7) (const_int 7) + (const_int 7) (const_int 7) + (const_int 7) (const_int 7) + (const_int 7) (const_int 7) + (const_int 7) (const_int 7) + (const_int 7) (const_int 7) + (const_int 7) (const_int 7) + (const_int 7) (const_int 7)])))] + "VECTOR_UNIT_ALTIVEC_P (V16QImode)" + "vrlb %0,%1,%2" + [(set_attr "type" "vecsimple")]) + (define_insn "altivec_vrl<VI_char>mi" [(set (match_operand:VIlong 0 "register_operand" "=v") (unspec:VIlong [(match_operand:VIlong 1 "register_operand" "0") diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 8ca98299950..c4c74630d26 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -163,6 +163,17 @@ return VINT_REGNO_P (REGNO (op)); }) +;; Return 1 if op is a vector register that operates on integer vectors +;; or if op is a const vector with integer vector modes. +(define_predicate "vint_reg_or_const_vector" + (match_code "reg,subreg,const_vector") +{ + if (GET_CODE (op) == CONST_VECTOR && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) + return 1; + + return vint_operand (op, mode); +}) + ;; Return 1 if op is a vector register to do logical operations on (and, or, ;; xor, etc.) (define_predicate "vlogical_operand" diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index 70bcfe02e22..b53f9717eb2 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -1260,6 +1260,35 @@ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") +;; Expanders for rotatert to make use of vrotl +(define_expand "vrotr<mode>3" + [(set (match_operand:VEC_I 0 "vint_operand") + (rotatert:VEC_I (match_operand:VEC_I 1 "vint_operand") + (match_operand:VEC_I 2 "vint_reg_or_const_vector")))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + rtx rot_count = gen_reg_rtx (<MODE>mode); + if (GET_CODE (operands[2]) == CONST_VECTOR) + { + machine_mode inner_mode = GET_MODE_INNER (<MODE>mode); + unsigned int bits = GET_MODE_PRECISION (inner_mode); + rtx mask_vec = gen_const_vec_duplicate (<MODE>mode, GEN_INT (bits - 1)); + rtx imm_vec + = simplify_const_unary_operation (NEG, <MODE>mode, operands[2], + GET_MODE (operands[2])); + imm_vec + = simplify_const_binary_operation (AND, <MODE>mode, imm_vec, mask_vec); + rot_count = force_reg (<MODE>mode, imm_vec); + emit_insn (gen_vrotl<mode>3 (operands[0], operands[1], rot_count)); + } + else + { + emit_insn (gen_neg<mode>2 (rot_count, operands[2])); + emit_insn (gen_vrl<mode>_and (operands[0], operands[1], rot_count)); + } + DONE; +}) + ;; Expanders for arithmetic shift left on each vector element (define_expand "vashl<mode>3" [(set (match_operand:VEC_I 0 "vint_operand") diff --git a/gcc/testsuite/gcc.target/powerpc/vec_rotate-1.c b/gcc/testsuite/gcc.target/powerpc/vec_rotate-1.c new file mode 100644 index 00000000000..80aca1a94a5 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec_rotate-1.c @@ -0,0 +1,46 @@ +/* { dg-options "-O3" } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ + +/* Check vectorizer can exploit vector rotation instructions on Power, mainly + for the case rotation count is const number. */ + +#define N 256 +unsigned long long sud[N], rud[N]; +unsigned int suw[N], ruw[N]; +unsigned short suh[N], ruh[N]; +unsigned char sub[N], rub[N]; + +void +testULL () +{ + for (int i = 0; i < 256; ++i) + rud[i] = (sud[i] >> 8) | (sud[i] << (sizeof (sud[0]) * 8 - 8)); +} + +void +testUW () +{ + for (int i = 0; i < 256; ++i) + ruw[i] = (suw[i] >> 8) | (suw[i] << (sizeof (suw[0]) * 8 - 8)); +} + +void +testUH () +{ + for (int i = 0; i < 256; ++i) + ruh[i] = (unsigned short) (suh[i] >> 9) + | (unsigned short) (suh[i] << (sizeof (suh[0]) * 8 - 9)); +} + +void +testUB () +{ + for (int i = 0; i < 256; ++i) + rub[i] = (unsigned char) (sub[i] >> 5) + | (unsigned char) (sub[i] << (sizeof (sub[0]) * 8 - 5)); +} + +/* { dg-final { scan-assembler {\mvrld\M} } } */ +/* { dg-final { scan-assembler {\mvrlw\M} } } */ +/* { dg-final { scan-assembler {\mvrlh\M} } } */ +/* { dg-final { scan-assembler {\mvrlb\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec_rotate-2.c b/gcc/testsuite/gcc.target/powerpc/vec_rotate-2.c new file mode 100644 index 00000000000..affda6c023b --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec_rotate-2.c @@ -0,0 +1,47 @@ +/* { dg-options "-O3" } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ + +/* Check vectorizer can exploit vector rotation instructions on Power, mainly + for the case rotation count isn't const number. */ + +#define N 256 +unsigned long long sud[N], rud[N]; +unsigned int suw[N], ruw[N]; +unsigned short suh[N], ruh[N]; +unsigned char sub[N], rub[N]; +extern unsigned char rot_cnt; + +void +testULL () +{ + for (int i = 0; i < 256; ++i) + rud[i] = (sud[i] >> rot_cnt) | (sud[i] << (sizeof (sud[0]) * 8 - rot_cnt)); +} + +void +testUW () +{ + for (int i = 0; i < 256; ++i) + ruw[i] = (suw[i] >> rot_cnt) | (suw[i] << (sizeof (suw[0]) * 8 - rot_cnt)); +} + +void +testUH () +{ + for (int i = 0; i < 256; ++i) + ruh[i] = (unsigned short) (suh[i] >> rot_cnt) + | (unsigned short) (suh[i] << (sizeof (suh[0]) * 8 - rot_cnt)); +} + +void +testUB () +{ + for (int i = 0; i < 256; ++i) + rub[i] = (unsigned char) (sub[i] >> rot_cnt) + | (unsigned char) (sub[i] << (sizeof (sub[0]) * 8 - rot_cnt)); +} + +/* { dg-final { scan-assembler {\mvrld\M} } } */ +/* { dg-final { scan-assembler {\mvrlw\M} } } */ +/* { dg-final { scan-assembler {\mvrlh\M} } } */ +/* { dg-final { scan-assembler {\mvrlb\M} } } */