On Fri, 2021-11-05 at 00:09 -0400, Michael Meissner wrote:
> Generate XXSPLTIW on power10.
> 

Hi,


> This patch adds support to automatically generate the ISA 3.1 XXSPLTIW
> instruction for V8HImode, V4SImode, and V4SFmode vectors.  It does this by
> adding support for vector constants that can be used, and adding a
> VEC_DUPLICATE pattern to generate the actual XXSPLTIW instruction.
> 
> The eP constraint was added to recognize constants that can be loaded into
> vector registers with a single prefixed instruction.

Perhaps Swap "... the eP constraint was added ..."  for "Add the eP
constraint to ..."


> 
> I added 4 new tests to test loading up V16QI, V8HI, V4SI, and V4SF vector
> constants.


> 
> 2021-11-05  Michael Meissner  <meiss...@linux.ibm.com>
> 
> gcc/
> 
>       * config/rs6000/constraints.md (eP): Update comment.
>       * config/rs6000/predicates.md (easy_fp_constant): Add support for
>       generating XXSPLTIW.
>       (vsx_prefixed_constant): New predicate.
>       (easy_vector_constant): Add support for
>       generating XXSPLTIW.
>       * config/rs6000/rs6000-protos.h (prefixed_xxsplti_p): New
>       declaration.
>       (constant_generates_xxspltiw): Likewise.
>       * config/rs6000/rs6000.c (xxspltib_constant_p): If we can generate
>       XXSPLTIW, don't do XXSPLTIB and sign extend.

Perhaps just 'generate XXSPLTIW if possible'.  

>       (output_vec_const_move): Add support for XXSPLTIW.
>       (prefixed_xxsplti_p): New function.
>       (constant_generates_xxspltiw): New function.
>       * config/rs6000/rs6000.md (prefixed attribute): Add support to
>       mark XXSPLTI* instructions as being prefixed.
>       * config/rs6000/rs6000.opt (-msplat-word-constant): New debug
>       switch.
>       * config/rs6000/vsx.md (vsx_mov<mode>_64bit): Add support for
>       generating XXSPLTIW or XXSPLTIDP.
>       (vsx_mov<mode>_32bit): Likewise.
>       * doc/md.texi (PowerPC and IBM RS6000 constraints): Document the
>       eP constraint.
> 
> gcc/testsuite/
> 
>       * gcc.target/powerpc/vec-splat-constant-v16qi.c: New test.
>       * gcc.target/powerpc/vec-splat-constant-v4sf.c: New test.
>       * gcc.target/powerpc/vec-splat-constant-v4si.c: New test.
>       * gcc.target/powerpc/vec-splat-constant-v8hi.c: New test.
>       * gcc.target/powerpc/vec-splati-runnable.c: Update insn count.
> ---
>  gcc/config/rs6000/constraints.md              |  6 ++
>  gcc/config/rs6000/predicates.md               | 46 ++++++++++-
>  gcc/config/rs6000/rs6000-protos.h             |  2 +
>  gcc/config/rs6000/rs6000.c                    | 81 +++++++++++++++++++
>  gcc/config/rs6000/rs6000.md                   |  5 ++
>  gcc/config/rs6000/rs6000.opt                  |  4 +
>  gcc/config/rs6000/vsx.md                      | 28 +++----
>  gcc/doc/md.texi                               |  4 +
>  .../powerpc/vec-splat-constant-v16qi.c        | 27 +++++++
>  .../powerpc/vec-splat-constant-v4sf.c         | 67 +++++++++++++++
>  .../powerpc/vec-splat-constant-v4si.c         | 51 ++++++++++++
>  .../powerpc/vec-splat-constant-v8hi.c         | 62 ++++++++++++++
>  .../gcc.target/powerpc/vec-splati-runnable.c  |  4 +-
>  13 files changed, 369 insertions(+), 18 deletions(-)
>  create mode 100644 
> gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v16qi.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4sf.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4si.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v8hi.c
> 
> diff --git a/gcc/config/rs6000/constraints.md 
> b/gcc/config/rs6000/constraints.md
> index e72132b4c28..a4b05837fa6 100644
> --- a/gcc/config/rs6000/constraints.md
> +++ b/gcc/config/rs6000/constraints.md
> @@ -213,6 +213,12 @@ (define_constraint "eI"
>    "A signed 34-bit integer constant if prefixed instructions are supported."
>    (match_operand 0 "cint34_operand"))
> 
> +;; A SF/DF scalar constant or a vector constant that can be loaded into 
> vector
> +;; registers with one prefixed instruction such as XXSPLTIDP or XXSPLTIW.
> +(define_constraint "eP"
> +  "A constant that can be loaded into a VSX register with one prefixed insn."
> +  (match_operand 0 "vsx_prefixed_constant"))
> +
>  ;; A TF/KF scalar constant or a vector constant that can load certain IEEE
>  ;; 128-bit constants into vector registers using LXVKQ.
>  (define_constraint "eQ"
> diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
> index e0d1c718e9f..ed6252bd0c4 100644
> --- a/gcc/config/rs6000/predicates.md
> +++ b/gcc/config/rs6000/predicates.md
> @@ -605,7 +605,10 @@ (define_predicate "easy_fp_constant"
>    vec_const_128bit_type vsx_const;
>    if (TARGET_POWER10 && vec_const_128bit_to_bytes (op, mode, &vsx_const))
>      {
> -      if (constant_generates_lxvkq (&vsx_const) != 0)
> +      if (constant_generates_lxvkq (&vsx_const))
> +     return true;
> +
> +      if (constant_generates_xxspltiw (&vsx_const))
>       return true;
>      }
> 

ok

> @@ -617,6 +620,42 @@ (define_predicate "easy_fp_constant"
>     return 0;
>  })
> 
> +;; Return 1 if the operand is a 64-bit floating point scalar constant or a
> +;; vector constant that can be loaded to a VSX register with one prefixed
> +;; instruction, such as XXSPLTIDP or XXSPLTIW.
> +;;
> +;; In addition regular constants, we also recognize constants formed with the
> +;; VEC_DUPLICATE insn from scalar constants.
> +;;
> +;; We don't handle scalar integer constants here because the assumption is 
> the
> +;; normal integer constants will be loaded into GPR registers.  For the
> +;; constants that need to be loaded into vector registers, the instructions
> +;; don't work well with TImode variables assigned a constant.  This is 
> because
> +;; the 64-bit scalar constants are splatted into both halves of the register.
> +
> +(define_predicate "vsx_prefixed_constant"
> +  (match_code "const_double,const_vector,vec_duplicate")
> +{
> +  /* If we can generate the constant with 1-2 Altivec instructions, don't
> +      generate a prefixed instruction.  */

1-2 Altivec instructions is both vague and specific.  Perhaps swap for
a comment something like "If ..  with easy altivec instructions ... " 

> +  if (CONST_VECTOR_P (op) && easy_altivec_constant (op, mode))
> +    return false;
> +
> +  /* Do we have prefixed instructions and are VSX registers available?  Is 
> the
> +     constant recognized?  */
> +  if (!TARGET_PREFIXED || !TARGET_VSX)
> +    return false;
> +
> +  vec_const_128bit_type vsx_const;
> +  if (!vec_const_128bit_to_bytes (op, mode, &vsx_const))
> +    return false;
> +
> +  if (constant_generates_xxspltiw (&vsx_const))
> +    return true;
> +
> +  return false;
> +})

ok

> +
>  ;; Return 1 if the operand is a special IEEE 128-bit value that can be loaded
>  ;; via the LXVKQ instruction.
> 
> @@ -683,7 +722,10 @@ (define_predicate "easy_vector_constant"
>        vec_const_128bit_type vsx_const;
>        if (TARGET_POWER10 && vec_const_128bit_to_bytes (op, mode, &vsx_const))
>       {
> -       if (constant_generates_lxvkq (&vsx_const) != 0)
> +       if (constant_generates_lxvkq (&vsx_const))
> +         return true;
> +
> +       if (constant_generates_xxspltiw (&vsx_const))
>           return true;
>       }


ok


> 
> diff --git a/gcc/config/rs6000/rs6000-protos.h 
> b/gcc/config/rs6000/rs6000-protos.h
> index 494a95cc6ee..99c6a671289 100644
> --- a/gcc/config/rs6000/rs6000-protos.h
> +++ b/gcc/config/rs6000/rs6000-protos.h
> @@ -198,6 +198,7 @@ enum non_prefixed_form reg_to_non_prefixed (rtx reg, 
> machine_mode mode);
>  extern bool prefixed_load_p (rtx_insn *);
>  extern bool prefixed_store_p (rtx_insn *);
>  extern bool prefixed_paddi_p (rtx_insn *);
> +extern bool prefixed_xxsplti_p (rtx_insn *);
>  extern void rs6000_asm_output_opcode (FILE *);
>  extern void output_pcrel_opt_reloc (rtx);
>  extern void rs6000_final_prescan_insn (rtx_insn *, rtx [], int);
> @@ -251,6 +252,7 @@ typedef struct {
>  extern bool vec_const_128bit_to_bytes (rtx, machine_mode,
>                                      vec_const_128bit_type *);
>  extern unsigned constant_generates_lxvkq (vec_const_128bit_type *);
> +extern unsigned constant_generates_xxspltiw (vec_const_128bit_type *);
>  #endif /* RTX_CODE */
> 
>  #ifdef TREE_CODE


ok

> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 06d02085b06..be24f56eb31 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -6940,6 +6940,11 @@ xxspltib_constant_p (rtx op,
>    else if (IN_RANGE (value, -1, 0))
>      *num_insns_ptr = 1;
> 
> +  /* If we can generate XXSPLTIW or XXSPLTIDP, don't generate XXSPLTIB and a
> +     sign extend operation.  */
> +  else if (vsx_prefixed_constant (op, mode))
> +    return false;

Comment is accurate, but might be clearer with stl
  Don't generate this (xxspltib) instruction if we will be able to
generate an xxspltiw or xxspltidp.  


> +
>    else
>      *num_insns_ptr = 2;
> 
> @@ -7000,6 +7005,13 @@ output_vec_const_move (rtx *operands)
>             operands[2] = GEN_INT (imm);
>             return "lxvkq %x0,%2";
>           }
> +
> +       imm = constant_generates_xxspltiw (&vsx_const);
> +       if (imm)
> +         {
> +           operands[2] = GEN_INT (imm);
> +           return "xxspltiw %x0,%2";
> +         }
>       }
> 
>        if (TARGET_P9_VECTOR

ok

> @@ -26767,6 +26779,41 @@ prefixed_paddi_p (rtx_insn *insn)
>    return (iform == INSN_FORM_PCREL_EXTERNAL || iform == 
> INSN_FORM_PCREL_LOCAL);
>  }
> 
> +/* Whether an instruction is a prefixed XXSPLTI* instruction.  This is called
> +   from the prefixed attribute processing.  */
> +
> +bool
> +prefixed_xxsplti_p (rtx_insn *insn)
> +{
> +  rtx set = single_set (insn);
> +  if (!set)
> +    return false;
> +
> +  rtx dest = SET_DEST (set);
> +  rtx src = SET_SRC (set);
> +  machine_mode mode = GET_MODE (dest);
> +
> +  if (!REG_P (dest) && !SUBREG_P (dest))
> +    return false;
> +
> +  if (GET_CODE (src) == UNSPEC)
> +    {
> +      int unspec = XINT (src, 1);
> +      return (unspec == UNSPEC_XXSPLTIW
> +           || unspec == UNSPEC_XXSPLTIDP
> +           || unspec == UNSPEC_XXSPLTI32DX);
> +    }
> +
> +  vec_const_128bit_type vsx_const;
> +  if (vec_const_128bit_to_bytes (src, mode, &vsx_const))
> +    {
> +      if (constant_generates_xxspltiw (&vsx_const))
> +     return true;
> +    }
> +
> +  return false;
> +}
> +

ok.


>  /* Whether the next instruction needs a 'p' prefix issued before the
>     instruction is printed out.  */
>  static bool prepend_p_to_next_insn;
> @@ -28933,6 +28980,40 @@ constant_generates_lxvkq (vec_const_128bit_type 
> *vsx_const)
>    return 0;
>  }
> 
> +/* Determine if a vector constant can be loaded with XXSPLTIW.  Return zero 
> if
> +   the XXSPLTIW instruction cannot be used.  Otherwise return the immediate
> +   value to be used with the XXSPLTIW instruction.  */
> +
> +unsigned
> +constant_generates_xxspltiw (vec_const_128bit_type *vsx_const)
> +{
> +  if (!TARGET_SPLAT_WORD_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
> +    return 0;
> +
> +  if (!vsx_const->all_words_same)
> +    return 0;
> +
> +  /* If we can use XXSPLTIB, don't generate XXSPLTIW.  */
> +  if (vsx_const->all_bytes_same)
> +    return 0;
> +
> +  /* See if we can use VSPLTISH or VSPLTISW.  */
> +  if (vsx_const->all_half_words_same)
> +    {
> +      unsigned short h_word = vsx_const->half_words[0];
> +      short sign_h_word = ((h_word & 0xffff) ^ 0x8000) - 0x8000;
> +      if (EASY_VECTOR_15 (sign_h_word))
> +     return 0;
> +    }
> +
> +  unsigned int word = vsx_const->words[0];
> +  int sign_word = ((word & 0xffffffff) ^ 0x80000000) - 0x80000000;
> +  if (EASY_VECTOR_15 (sign_word))
> +    return 0;
> +
> +  return vsx_const->words[0];
> +}
> +

ok

>  
>  struct gcc_target targetm = TARGET_INITIALIZER;
> 
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 6bec2bddbde..3a7bcd2426e 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -314,6 +314,11 @@ (define_attr "prefixed" "no,yes"
> 
>        (eq_attr "type" "integer,add")
>        (if_then_else (match_test "prefixed_paddi_p (insn)")
> +                    (const_string "yes")
> +                    (const_string "no"))
> +
> +      (eq_attr "type" "vecperm")
> +      (if_then_else (match_test "prefixed_xxsplti_p (insn)")
>                      (const_string "yes")
>                      (const_string "no"))]
> 

ok


> diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
> index b7433ec4e30..ec7b106fddb 100644
> --- a/gcc/config/rs6000/rs6000.opt
> +++ b/gcc/config/rs6000/rs6000.opt
> @@ -640,6 +640,10 @@ mprivileged
>  Target Var(rs6000_privileged) Init(0)
>  Generate code that will run in privileged state.
> 
> +msplat-word-constant
> +Target Var(TARGET_SPLAT_WORD_CONSTANT) Init(1) Save
> +Generate (do not generate) code that uses the XXSPLTIW instruction.
> +
>  mieee128-constant
>  Target Var(TARGET_IEEE128_CONSTANT) Init(1) Save
>  Generate (do not generate) code that uses the LXVKQ instruction.

ok


> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
> index 0a376ee4c28..9f0c48db6f2 100644
> --- a/gcc/config/rs6000/vsx.md
> +++ b/gcc/config/rs6000/vsx.md
> @@ -1192,19 +1192,19 @@ (define_insn_and_split "*xxspltib_<mode>_split"
> 
>  ;;              VSX store  VSX load   VSX move  VSX->GPR   GPR->VSX    LQ 
> (GPR)
>  ;;              STQ (GPR)  GPR load   GPR store GPR move   XXSPLTIB    
> VSPLTISW
> -;;              LXVKQ
> +;;              LXVKQ      XXSPLTI*
>  ;;              VSX 0/-1   VMX const  GPR const LVX (VMX)  STVX (VMX)
>  (define_insn "vsx_mov<mode>_64bit"
>    [(set (match_operand:VSX_M 0 "nonimmediate_operand"
>                 "=ZwO,      wa,        wa,        r,         we,        ?wQ,
>                  ?&r,       ??r,       ??Y,       <??r>,     wa,        v,
> -                wa,
> +                wa,        wa,
>                  ?wa,       v,         <??r>,     wZ,        v")
> 
>       (match_operand:VSX_M 1 "input_operand" 
>                 "wa,        ZwO,       wa,        we,        r,         r,
>                  wQ,        Y,         r,         r,         wE,        jwM,
> -                eQ,
> +                eQ,        eP,
>                  ?jwM,      W,         <nW>,      v,         wZ"))]
> 
>    "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
> @@ -1216,43 +1216,43 @@ (define_insn "vsx_mov<mode>_64bit"
>    [(set_attr "type"
>                 "vecstore,  vecload,   vecsimple, mtvsr,     mfvsr,     load,
>                  store,     load,      store,     *,         vecsimple, 
> vecsimple,
> -                vecperm,
> +                vecperm,   vecperm,
>                  vecsimple, *,         *,         vecstore,  vecload")
>     (set_attr "num_insns"
>                 "*,         *,         *,         2,         *,         2,
>                  2,         2,         2,         2,         *,         *,
> -                *,
> +                *,         *,
>                  *,         5,         2,         *,         *")
>     (set_attr "max_prefixed_insns"
>                 "*,         *,         *,         *,         *,         2,
>                  2,         2,         2,         2,         *,         *,
> -                *,
> +                *,         *,
>                  *,         *,         *,         *,         *")
>     (set_attr "length"
>                 "*,         *,         *,         8,         *,         8,
>                  8,         8,         8,         8,         *,         *,
> -                *,
> +                *,         *,
>                  *,         20,        8,         *,         *")
>     (set_attr "isa"
>                 "<VSisa>,   <VSisa>,   <VSisa>,   *,         *,         *,
>                  *,         *,         *,         *,         p9v,       *,
> -                p10,
> +                p10,       p10,
>                  <VSisa>,   *,         *,         *,         *")])
> 
>  ;;              VSX store  VSX load   VSX move   GPR load   GPR store  GPR 
> move
> -;;              LXVKQ
> +;;              LXVKQ      XXSPLTI*
>  ;;              XXSPLTIB   VSPLTISW   VSX 0/-1   VMX const  GPR const
>  ;;              LVX (VMX)  STVX (VMX)
>  (define_insn "*vsx_mov<mode>_32bit"
>    [(set (match_operand:VSX_M 0 "nonimmediate_operand"
>                 "=ZwO,      wa,        wa,        ??r,       ??Y,       <??r>,
> -                wa,
> +                wa,        wa,
>                  wa,        v,         ?wa,       v,         <??r>,
>                  wZ,        v")
> 
>       (match_operand:VSX_M 1 "input_operand" 
>                 "wa,        ZwO,       wa,        Y,         r,         r,
> -                eQ,
> +                eQ,        eP,
>                  wE,        jwM,       ?jwM,      W,         <nW>,
>                  v,         wZ"))]
> 
> @@ -1264,17 +1264,17 @@ (define_insn "*vsx_mov<mode>_32bit"
>  }
>    [(set_attr "type"
>                 "vecstore,  vecload,   vecsimple, load,      store,    *,
> -                vecperm,
> +                vecperm,   vecperm,
>                  vecsimple, vecsimple, vecsimple, *,         *,
>                  vecstore,  vecload")
>     (set_attr "length"
>                 "*,         *,         *,         16,        16,        16,
> -                *,
> +                *,         *,
>                  *,         *,         *,         20,        16,
>                  *,         *")
>     (set_attr "isa"
>                 "<VSisa>,   <VSisa>,   <VSisa>,   *,         *,         *,
> -                p10,
> +                p10,       p10,
>                  p9v,       *,         <VSisa>,   *,         *,
>                  *,         *")])
> 
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index 4af8fd76992..41a568b7d4e 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -3336,6 +3336,10 @@ A constant whose negation is a signed 16-bit constant.
>  @item eI
>  A signed 34-bit integer constant if prefixed instructions are supported.
> 
> +@item eP
> +A scalar floating point constant or a vector constant that can be
> +loaded with one prefixed instruction to a VSX register.


...  loaded to a VSX register with one previxed instruction.


> +
>  @item eQ
>  An IEEE 128-bit constant that can be loaded into a VSX register with a
>  single instruction.
> diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v16qi.c 
> b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v16qi.c
> new file mode 100644
> index 00000000000..27764ddbc83
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v16qi.c
> @@ -0,0 +1,27 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target power10_ok } */
> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
> +



> +#include <altivec.h>
> +
> +/* Test whether XXSPLTIW is generated for V16HI vector constants where the
> +   first 4 elements are the same as the next 4 elements, etc.  */
> +
> +vector unsigned char
> +v16qi_const_1 (void)
> +{
> +  return (vector unsigned char) { 1, 1, 1, 1, 1, 1, 1, 1,
> +                               1, 1, 1, 1, 1, 1, 1, 1, }; /* VSLTPISB.  */
> +}
> +
> +vector unsigned char
> +v16qi_const_2 (void)
> +{
> +  return (vector unsigned char) { 1, 2, 3, 4, 1, 2, 3, 4,
> +                               1, 2, 3, 4, 1, 2, 3, 4, }; /* XXSPLTIW.  */
> +}
> +
> +/* { dg-final { scan-assembler-times {\mxxspltiw\M}              1 } } */
> +/* { dg-final { scan-assembler-times {\mvspltisb\M|\mxxspltib\M} 1 } } */
> +/* { dg-final { scan-assembler-not   {\mlxvx?\M}                   } } */
> +/* { dg-final { scan-assembler-not   {\mplxv\M}                    } } */


ok


Nothing jumped out at me with the test cases below..

Thanks
-Will


> diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4sf.c 
> b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4sf.c
> new file mode 100644
> index 00000000000..1f0475cf47a

<snip>


Reply via email to