Hi Kewen, On Tue, Jul 23, 2019 at 02:28:28PM +0800, Kewen.Lin wrote: > --- a/gcc/config/rs6000/altivec.md > +++ b/gcc/config/rs6000/altivec.md > @@ -1666,6 +1666,60 @@ > "vrl<VI_char> %0,%1,%2" > [(set_attr "type" "vecsimple")]) > > +;; Here these vrl<VI2>_and are for vrotr<mode>3 expansion. > +;; since SHIFT_COUNT_TRUNCATED is set as zero, to append one explicit > +;; AND to indicate truncation but emit vrl<VI_char> insn. > +(define_insn "vrlv2di_and" > + [(set (match_operand:V2DI 0 "register_operand" "=v") > + (and:V2DI > + (rotate:V2DI (match_operand:V2DI 1 "register_operand" "v") > + (match_operand:V2DI 2 "register_operand" "v")) > + (const_vector:V2DI [(const_int 63) (const_int 63)])))] > + "VECTOR_UNIT_P8_VECTOR_P (V2DImode)" > + "vrld %0,%1,%2" > + [(set_attr "type" "vecsimple")])
"vrlv2di_and" is an a bit unhappy name, we have a "vrlv" intruction. Just something like "rotatev2di_something", maybe? Do we have something similar for non-rotate vector shifts, already? We probably should, so please keep that in mind for naming things. "vrlv2di_and" sounds like you first do the rotate, and then on what that results in you do the and. And that is what the pattern does, too. But this is wrong: it should mask off all but the lower bits of operand 2, instead. > +(define_insn "vrlv4si_and" > + [(set (match_operand:V4SI 0 "register_operand" "=v") > + (and:V4SI > + (rotate:V4SI (match_operand:V4SI 1 "register_operand" "v") > + (match_operand:V4SI 2 "register_operand" "v")) > + (const_vector:V4SI [(const_int 31) (const_int 31) > + (const_int 31) (const_int 31)])))] > + "VECTOR_UNIT_ALTIVEC_P (V4SImode)" > + "vrlw %0,%1,%2" > + [(set_attr "type" "vecsimple")]) > + > +(define_insn "vrlv8hi_and" > + [(set (match_operand:V8HI 0 "register_operand" "=v") > + (and:V8HI > + (rotate:V8HI (match_operand:V8HI 1 "register_operand" "v") > + (match_operand:V8HI 2 "register_operand" "v")) > + (const_vector:V8HI [(const_int 15) (const_int 15) > + (const_int 15) (const_int 15) > + (const_int 15) (const_int 15) > + (const_int 15) (const_int 15)])))] > + "VECTOR_UNIT_ALTIVEC_P (V8HImode)" > + "vrlh %0,%1,%2" > + [(set_attr "type" "vecsimple")]) > + > +(define_insn "vrlv16qi_and" > + [(set (match_operand:V16QI 0 "register_operand" "=v") > + (and:V16QI > + (rotate:V16QI (match_operand:V16QI 1 "register_operand" "v") > + (match_operand:V16QI 2 "register_operand" "v")) > + (const_vector:V16QI [(const_int 7) (const_int 7) > + (const_int 7) (const_int 7) > + (const_int 7) (const_int 7) > + (const_int 7) (const_int 7) > + (const_int 7) (const_int 7) > + (const_int 7) (const_int 7) > + (const_int 7) (const_int 7) > + (const_int 7) (const_int 7)])))] > + "VECTOR_UNIT_ALTIVEC_P (V16QImode)" > + "vrlb %0,%1,%2" > + [(set_attr "type" "vecsimple")]) All the patterns can be merged into one (using some code_iterator). That can be a later improvement. > +;; Return 1 if op is a vector register that operates on integer vectors > +;; or if op is a const vector with integer vector modes. > +(define_predicate "vint_reg_or_const_vector" > + (match_code "reg,subreg,const_vector") > +{ > + if (GET_CODE (op) == CONST_VECTOR && GET_MODE_CLASS (mode) == > MODE_VECTOR_INT) > + return 1; > + > + return vint_operand (op, mode); > +}) Hrm, I don't like this name very much. Why is just vint_operand not enough for what you use this for? > +;; Expanders for rotatert to make use of vrotl > +(define_expand "vrotr<mode>3" > + [(set (match_operand:VEC_I 0 "vint_operand") > + (rotatert:VEC_I (match_operand:VEC_I 1 "vint_operand") > + (match_operand:VEC_I 2 "vint_reg_or_const_vector")))] > + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" > +{ > + rtx rot_count = gen_reg_rtx (<MODE>mode); > + if (GET_CODE (operands[2]) == CONST_VECTOR) > + { > + machine_mode inner_mode = GET_MODE_INNER (<MODE>mode); > + unsigned int bits = GET_MODE_PRECISION (inner_mode); > + rtx mask_vec = gen_const_vec_duplicate (<MODE>mode, GEN_INT (bits - > 1)); > + rtx imm_vec > + = simplify_const_unary_operation (NEG, <MODE>mode, operands[2], (The "=" goes on the previous line). > + GET_MODE (operands[2])); > + imm_vec > + = simplify_const_binary_operation (AND, <MODE>mode, imm_vec, mask_vec); > + rot_count = force_reg (<MODE>mode, imm_vec); > + emit_insn (gen_vrotl<mode>3 (operands[0], operands[1], rot_count)); > + } > + else > + { > + emit_insn (gen_neg<mode>2 (rot_count, operands[2])); > + emit_insn (gen_vrl<mode>_and (operands[0], operands[1], rot_count)); > + } > + DONE; > +}) Why do you have to emit as the "and" form here? Emitting the "bare" rotate should work just as well here? > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/vec_rotate-1.c > @@ -0,0 +1,46 @@ > +/* { dg-options "-O3" } */ > +/* { dg-require-effective-target powerpc_vsx_ok } */ > +/* { dg-final { scan-assembler {\mvrld\M} } } */ > +/* { dg-final { scan-assembler {\mvrlw\M} } } */ > +/* { dg-final { scan-assembler {\mvrlh\M} } } */ > +/* { dg-final { scan-assembler {\mvrlb\M} } } */ You need to generate code for whatever cpu introduced those insns, if you expect those to be generated ;-) vsx_ok isn't needed. Segher