Hi Mike, On Fri, Dec 09, 2016 at 03:48:10PM -0500, Michael Meissner wrote: > @@ -7528,6 +7528,49 @@ rs6000_split_vec_extract_var (rtx dest,
> + if (TARGET_P9_VECTOR > + && (mode == V16QImode || mode == V8HImode || mode == V4SImode) > + && INT_REGNO_P (dest_regno) > + && ALTIVEC_REGNO_P (src_regno) > + && INT_REGNO_P (element_regno)) > + { > + rtx insn = NULL_RTX; You don't need this initialisation (which just works around a warning) if you do the emit_insn directly for all cases. > + else if (mode == V8HImode) > + { > + emit_insn (gen_ashlsi3 (dest_si, element_si, const1_rtx)); > + insn = (VECTOR_ELT_ORDER_BIG > + ? gen_vextuhlx (dest_si, dest_si, src) > + : gen_vextuhrx (dest_si, dest_si, src)); > + } If you use a separate temporary reg (instead of reusing dest_si) GCC has a better chance at optimising this (also in the next case). > @@ -2535,36 +2547,63 @@ (define_expand "vsx_extract_<mode>" > }) > > (define_insn "vsx_extract_<mode>_p9" > - [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=<VSX_EX>") > + [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>") > (vec_select:<VS_scalar> > - (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>") > - (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))] > + (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>") > + (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))] > "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB > && TARGET_VSX_SMALL_INTEGER" > { > - HOST_WIDE_INT elt = INTVAL (operands[2]); > - HOST_WIDE_INT elt_adj = (!VECTOR_ELT_ORDER_BIG > - ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt > - : elt); > - > - HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode); > - HOST_WIDE_INT offset = unit_size * elt_adj; > - > - operands[2] = GEN_INT (offset); > - if (unit_size == 4) > - return "xxextractuw %x0,%x1,%2"; > + if (which_alternative == 0) > + return "#"; > + > else Without else-after-return the code as well as the patch become more readable. > - return "vextractu<wd> %0,%1,%2"; > + { > + HOST_WIDE_INT elt = INTVAL (operands[2]); > + HOST_WIDE_INT elt_adj = (!VECTOR_ELT_ORDER_BIG > + ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt > + : elt); > + > + HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode); > + HOST_WIDE_INT offset = unit_size * elt_adj; > + > + operands[2] = GEN_INT (offset); > + if (unit_size == 4) > + return "xxextractuw %x0,%x1,%2"; > + else > + return "vextractu<wd> %0,%1,%2"; > + } > } > [(set_attr "type" "vecsimple")]) > +(define_split > + [(set (match_operand:<VS_scalar> 0 "int_reg_operand" "") > + (vec_select:<VS_scalar> > + (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "") > + (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "")])))] > + "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB > + && TARGET_VSX_SMALL_INTEGER && reload_completed" > + [(const_int 0)] Please lose the default args (""). Why only do the split when reload_completed? > +(define_insn_and_split "*vsx_extract_<VSX_EXTRACT_I:mode>_<SDI:mode>_var" > + [(set (match_operand:SDI 0 "gpc_reg_operand" "=r,r,r") > + (zero_extend:SDI > + (unspec:<VSX_EXTRACT_I:VS_scalar> > + [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m") > + (match_operand:DI 2 "gpc_reg_operand" "r,r,r")] > + UNSPEC_VSX_EXTRACT))) > + (clobber (match_scratch:DI 3 "=X,r,&b")) > + (clobber (match_scratch:V2DI 4 "=X,&v,X"))] > + "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT" > + "#" > + "&& reload_completed" And here. And everything following. > + [(const_int 0)] > +{ > + machine_mode smode = <VSX_EXTRACT_I:MODE>mode; > + rs6000_split_vec_extract_var (gen_rtx_REG (smode, REGNO (operands[0])), > + operands[1], operands[2], > + operands[3], operands[4]); > + DONE; > +}) Please write the gen_rtx_REG on its own line (but you can eliminate smode then). > --- gcc/testsuite/gcc.target/powerpc/vec-extract-v4si-df.c > (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc) > (revision 0) > +++ gcc/testsuite/gcc.target/powerpc/vec-extract-v4si-df.c > (.../gcc/testsuite/gcc.target/powerpc) (revision 243492) > @@ -0,0 +1,12 @@ > +/* { dg-do run { target { powerpc*-*-linux* } } } */ > +/* { dg-require-effective-target vsx_hw } */ > +/* { dg-options "-O2 -mvsx" } */ Why limit to linux? In most other testcases we explicitly disallow most other targets (also not ideal at all, no, but consistency helps here). Same for the other new files. > +/* { dg-final { scan-assembler "vextub\[lr\]x" } } */ > +/* { dg-final { scan-assembler "vextuh\[lr\]x" } } */ > +/* { dg-final { scan-assembler "vextuw\[lr\]x" } } */ > +/* { dg-final { scan-assembler "extsb" } } */ > +/* { dg-final { scan-assembler "extsh" } } */ > +/* { dg-final { scan-assembler "extsw" } } */ > +/* { dg-final { scan-assembler-not "m\[ft\]vsr" } } */ > +/* { dg-final { scan-assembler-not "stxvd2x" } } */ > +/* { dg-final { scan-assembler-not "stxv" } } */ > +/* { dg-final { scan-assembler-not "lwa" } } */ > +/* { dg-final { scan-assembler-not "lwz" } } */ > +/* { dg-final { scan-assembler-not "lha" } } */ > +/* { dg-final { scan-assembler-not "lhz" } } */ The "stxvd2x" is already covered by the "stxv". Things like "lwa" are an accident waiting to happen (it matches "always", for example). A comment here saying what exactly you intend to test would help, too. Segher