Hi Mike,

On Fri, Dec 09, 2016 at 03:48:10PM -0500, Michael Meissner wrote:
> @@ -7528,6 +7528,49 @@ rs6000_split_vec_extract_var (rtx dest, 

> +      if (TARGET_P9_VECTOR
> +       && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
> +       && INT_REGNO_P (dest_regno)
> +       && ALTIVEC_REGNO_P (src_regno)
> +       && INT_REGNO_P (element_regno))
> +     {
> +       rtx insn = NULL_RTX;

You don't need this initialisation (which just works around a warning) if
you do the emit_insn directly for all cases.

> +       else if (mode == V8HImode)
> +         {
> +           emit_insn (gen_ashlsi3 (dest_si, element_si, const1_rtx));
> +           insn = (VECTOR_ELT_ORDER_BIG
> +                   ? gen_vextuhlx (dest_si, dest_si, src)
> +                   : gen_vextuhrx (dest_si, dest_si, src));
> +         }

If you use a separate temporary reg (instead of reusing dest_si) GCC
has a better chance at optimising this (also in the next case).

> @@ -2535,36 +2547,63 @@ (define_expand  "vsx_extract_<mode>"
>  })
>  
>  (define_insn "vsx_extract_<mode>_p9"
> -  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=<VSX_EX>")
> +  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>")
>       (vec_select:<VS_scalar>
> -      (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>")
> -      (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))]
> +      (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
> +      (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))]
>    "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB
>     && TARGET_VSX_SMALL_INTEGER"
>  {
> -  HOST_WIDE_INT elt = INTVAL (operands[2]);
> -  HOST_WIDE_INT elt_adj = (!VECTOR_ELT_ORDER_BIG
> -                        ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
> -                        : elt);
> -
> -  HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
> -  HOST_WIDE_INT offset = unit_size * elt_adj;
> -
> -  operands[2] = GEN_INT (offset);
> -  if (unit_size == 4)
> -    return "xxextractuw %x0,%x1,%2";
> +  if (which_alternative == 0)
> +    return "#";
> +
>    else

Without else-after-return the code as well as the patch become more readable.

> -    return "vextractu<wd> %0,%1,%2";
> +    {
> +      HOST_WIDE_INT elt = INTVAL (operands[2]);
> +      HOST_WIDE_INT elt_adj = (!VECTOR_ELT_ORDER_BIG
> +                            ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
> +                            : elt);
> +
> +      HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
> +      HOST_WIDE_INT offset = unit_size * elt_adj;
> +
> +      operands[2] = GEN_INT (offset);
> +      if (unit_size == 4)
> +     return "xxextractuw %x0,%x1,%2";
> +      else
> +     return "vextractu<wd> %0,%1,%2";
> +    }
>  }
>    [(set_attr "type" "vecsimple")])


> +(define_split
> +  [(set (match_operand:<VS_scalar> 0 "int_reg_operand" "")
> +     (vec_select:<VS_scalar>
> +      (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "")
> +      (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "")])))]
> +  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB
> +   && TARGET_VSX_SMALL_INTEGER && reload_completed"
> +  [(const_int 0)]

Please lose the default args ("").

Why only do the split when reload_completed?

> +(define_insn_and_split "*vsx_extract_<VSX_EXTRACT_I:mode>_<SDI:mode>_var"
> +  [(set (match_operand:SDI 0 "gpc_reg_operand" "=r,r,r")
> +     (zero_extend:SDI
> +      (unspec:<VSX_EXTRACT_I:VS_scalar>
> +       [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
> +        (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
> +       UNSPEC_VSX_EXTRACT)))
> +   (clobber (match_scratch:DI 3 "=X,r,&b"))
> +   (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
> +  "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
> +  "#"
> +  "&& reload_completed"

And here.  And everything following.

> +  [(const_int 0)]
> +{
> +  machine_mode smode = <VSX_EXTRACT_I:MODE>mode;
> +  rs6000_split_vec_extract_var (gen_rtx_REG (smode, REGNO (operands[0])),
> +                             operands[1], operands[2],
> +                             operands[3], operands[4]);
> +  DONE;
> +})

Please write the gen_rtx_REG on its own line (but you can eliminate smode
then).

> --- gcc/testsuite/gcc.target/powerpc/vec-extract-v4si-df.c    
> (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc)
>      (revision 0)
> +++ gcc/testsuite/gcc.target/powerpc/vec-extract-v4si-df.c    
> (.../gcc/testsuite/gcc.target/powerpc)  (revision 243492)
> @@ -0,0 +1,12 @@
> +/* { dg-do run { target { powerpc*-*-linux* } } } */
> +/* { dg-require-effective-target vsx_hw } */
> +/* { dg-options "-O2 -mvsx" } */

Why limit to linux?  In most other testcases we explicitly disallow most
other targets (also not ideal at all, no, but consistency helps here).
Same for the other new files.

> +/* { dg-final { scan-assembler     "vextub\[lr\]x" } } */
> +/* { dg-final { scan-assembler     "vextuh\[lr\]x" } } */
> +/* { dg-final { scan-assembler     "vextuw\[lr\]x" } } */
> +/* { dg-final { scan-assembler     "extsb"         } } */
> +/* { dg-final { scan-assembler     "extsh"         } } */
> +/* { dg-final { scan-assembler     "extsw"         } } */
> +/* { dg-final { scan-assembler-not "m\[ft\]vsr"    } } */
> +/* { dg-final { scan-assembler-not "stxvd2x"       } } */
> +/* { dg-final { scan-assembler-not "stxv"          } } */
> +/* { dg-final { scan-assembler-not "lwa"           } } */
> +/* { dg-final { scan-assembler-not "lwz"           } } */
> +/* { dg-final { scan-assembler-not "lha"           } } */
> +/* { dg-final { scan-assembler-not "lhz"           } } */

The "stxvd2x" is already covered by the "stxv".  Things like "lwa" are
an accident waiting to happen (it matches "always", for example).

A comment here saying what exactly you intend to test would help, too.


Segher

Reply via email to