Re: [PATCH,rs6000] Make MMA builtins use opaque modes

Segher Boessenkool Tue, 17 Nov 2020 14:43:35 -0800

Hi!

On Tue, Nov 17, 2020 at 11:48:04AM -0600, [email protected] wrote:
> This patch changes powerpc MMA builtins to use the new opaque
> mode class and use modes OO (32 bytes) and XO (64 bytes)
> instead of POI/PXI. Using the opaque modes prevents
> optimization from trying to do anything with vector
> pair/quad, which was the problem we were seeing with the
> partial integer modes.


>       * gcc/config/rs6000/mma.md (unspec):
>       Add assemble/extract UNSPECs.

That fits on one line :-)  (More similar stuff below.)

>       * gcc/config/rs6000/rs6000-modes.def: Create XO and OO modes.

You also delete OImode, XImode, POImode, and PXImode here.

> --- a/gcc/config/rs6000/mma.md
> +++ b/gcc/config/rs6000/mma.md
> @@ -19,24 +19,19 @@
>  ;; along with GCC; see the file COPYING3.  If not see
>  ;; <http://www.gnu.org/licenses/>.
>  
> -;; The MMA patterns use the multi-register PXImode and POImode partial
> +;; The MMA patterns use the multi-register XOmode and OOmode partial
>  ;; integer modes to implement the target specific __vector_quad and
>  ;; __vector_pair types that the MMA built-in functions reference.

They are opaque modes, not partial integer modes now :-)

> +;; We define these modes with the new OPAQUE_MODE mechanism to prevent
> +;; anything from trying to open them up.

Any time you write "new" it is dated soon.  Maybe just "these are
OPAQUE_MODEs" or similar?

> +(define_insn_and_split "*movxo"
> +  [(set (match_operand:XO 0 "nonimmediate_operand" "=d,m,d,d")
> +     (match_operand:XO 1 "input_operand" "m,d,d,O"))]
>    "TARGET_MMA
> +   && (gpc_reg_operand (operands[0], XOmode)
> +       || gpc_reg_operand (operands[1], XOmode))"
>    "@
>     #
>     #
>     #
>     xxsetaccz %A0"
>    "&& reload_completed
> +   && !(fpr_reg_operand (operands[0], XOmode) && operands[1] == const0_rtx)"

XOmode can never be a const_int anymore, so this should just be
  "&& reload_completed"
and nothing more.

Or do you need to handle the unspec case in here?  It can just be
handled in a separate pattern in this case, so that would be much
simpler.

> +(define_insn_and_split "*mma_assemble_pair"
> +  [(set (match_operand:OO 0 "vsx_register_operand" "=wa")
> +     (unspec:OO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
> +                 (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")]
> +                 UNSPEC_MMA_ASSEMBLE))]
> +  "TARGET_MMA
> +   && vsx_register_operand (operands[0], OOmode)"

operands[0] always is an OOmode vsx_register_operand, so this is just
  "TARGET_MMA"

> +(define_insn_and_split "*mma_disassemble_pair"
> +  [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa")
> +       (unspec:V16QI [(match_operand:OO 1 "input_operand" "wa")
> +                      (match_operand 2 "const_int_operand")]
> +                   UNSPEC_MMA_EXTRACT))]

Use "const_0_to_1_operand" instead?

> +  "TARGET_MMA
> +   && fpr_reg_operand (operands[1], OOmode)"

But registers can be all in "wa", not just the FPRs?

> +  "#"
> +  "&& reload_completed"
> +  [(const_int 0)]
> +{
> +  gcc_assert (REG_P (operands[1]));

That is guaranteed by the insn condition.

> +  int reg = REGNO (operands[1]);
> +  int regoff = INTVAL (operands[2]);

Should this consider endianness?

> +  rtx src = gen_rtx_REG (V16QImode, reg + regoff);
> +  emit_move_insn (operands[0], src);
> +  DONE;
> +})


> +(define_expand "mma_disassemble_pair"

Please put the define_expand before the corresponding define_insn.

> +  [(match_operand:V16QI 0 "mma_disassemble_output_operand")
> +   (match_operand:OO 1 "input_operand")
> +   (match_operand 2 "const_int_operand")]

0_to_1 again

> +  "TARGET_MMA"
> +{
> +  rtx src;
> +  int regoff = INTVAL (operands[2]);
> +  gcc_assert (IN_RANGE (regoff, 0, 1));

This assert then trivially isn't needed anymore, either.

> +  src = gen_rtx_UNSPEC (V16QImode,
> +                        gen_rtvec (2, operands[1], GEN_INT (regoff)),
> +                        UNSPEC_MMA_EXTRACT);
> +  emit_move_insn (operands[0], src);
>    DONE;
>  })


> +(define_insn_and_split "*mma_disassemble_acc"
> +  [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa")
> +       (unspec:V16QI [(match_operand:XO 1 "input_operand" "d")
> +                      (match_operand 2 "const_int_operand")]

"const_0_to_3_operand", and everything else as well.

> +(define_expand "mma_disassemble_acc"

Also wrt this of course.

>  (define_expand "mma_xxsetaccz"
> -  [(set (match_operand:PXI 0 "fpr_reg_operand")
> +  [(set (match_operand:XO 0 "fpr_reg_operand")
>       (const_int 0))]
>    "TARGET_MMA"
>  {
> -  emit_insn (gen_movpxi (operands[0], const0_rtx));
> +  emit_insn (gen_movxo (operands[0], const0_rtx));
>    DONE;
>  })

That should wrap the zero in an unspec?

> --- a/gcc/config/rs6000/predicates.md
> +++ b/gcc/config/rs6000/predicates.md
> @@ -1072,8 +1072,9 @@ (define_predicate "input_operand"
>        && easy_fp_constant (op, mode))
>      return 1;
>  
> -  /* Allow any integer constant.  */
> -  if (SCALAR_INT_MODE_P (mode) && CONST_SCALAR_INT_P (op))
> +  /* Allow any integer constant.  Also allow consts for OPAQUE_MODE.  */
> +  if ((SCALAR_INT_MODE_P (mode) || OPAQUE_MODE_P (mode))
> +      && CONST_SCALAR_INT_P (op))
>      return 1;

We shouln't allow constant integers as input to OPAQUE_MODE?  Such
things should be wrapped in an unspec.

> +;; Return 1 if this operand is valid for an MMA disassemble insn.
> +(define_predicate "mma_disassemble_output_operand"
> +  (match_code "reg,subreg,mem")
> +{
> +  if (REG_P (op) && !vsx_register_operand (op, mode))
> +    return false;
> +  return true;
> +})

You need to handle subreg here, even if you do not need to do anything
for mem.  So something like:

{
  if (SUBREG_P (op))
    op = SUBREG_REG (op);

  if (!REG_P (op))
    return 1;

  return vsx_register_operand (op);
}

> --- a/gcc/config/rs6000/rs6000-call.c
> +++ b/gcc/config/rs6000/rs6000-call.c
> @@ -6325,6 +6325,22 @@ rs6000_discover_homogeneous_aggregate (machine_mode 
> mode, const_tree type,
>  bool
>  rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
>  {
> +  /* We do not allow MMA types being used as return values.  Only report
> +     the invalid return value usage the first time we encounter it.  */
> +  if (cfun != NULL

You can just write "cfun" instead of "cfun != NULL", it looks much less
silly.

> +      && !cfun->machine->mma_return_type_error
> +      && TREE_TYPE (cfun->decl) == fntype
> +      && (TYPE_MODE (type) == OOmode || TYPE_MODE (type) == XOmode))
> +    {
> +      /* Record we have now handled function CFUN, so the next time we
> +      are called, we do not re-report the same error.  */
> +      cfun->machine->mma_return_type_error = true;
> +      if (TYPE_CANONICAL (type) != NULL_TREE)
> +     type = TYPE_CANONICAL (type);
> +      error ("invalid use of MMA type %qs as a function return value",
> +          IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))));
> +    }

Cool, thanks!

> @@ -14049,21 +14055,21 @@ mma_init_builtins (void)
>       }
>        else
>       {
> -       if ((attr & RS6000_BTC_QUAD) == 0)
> +       if ( !( d->code == MMA_BUILTIN_DISASSEMBLE_ACC_INTERNAL

Some stray spaces in there.

> +               || d->code == MMA_BUILTIN_DISASSEMBLE_PAIR_INTERNAL)
> +            && ((attr & RS6000_BTC_QUAD) == 0))

Get rid of some of the superfluous parens?  It makes it hard to read.

>       }
>  
> -      if (icode == CODE_FOR_nothing)
> +      /* This is a disassemble pair/acc function. */
> +      if ( d->code == MMA_BUILTIN_DISASSEMBLE_ACC
> +        || d->code == MMA_BUILTIN_DISASSEMBLE_PAIR)

Bad space (on both lines).

> --- a/gcc/config/rs6000/rs6000-modes.def
> +++ b/gcc/config/rs6000/rs6000-modes.def
> @@ -83,12 +83,6 @@ VECTOR_MODE (INT, SI, 2);     /*                 V2SI  */
>     combination.  */
>  PARTIAL_INT_MODE (TI, 128, PTI);
>  
> -/* Define, but don't use the larger integer modes.  We need an integer mode
> -   defined that is the same size as the vector pair and vector quad modes.  
> */
> -
> -INT_MODE (OI, 32);
> -INT_MODE (XI, 64);
> -
>  /* Modes used by __vector_pair and __vector_quad.  */
> -PARTIAL_INT_MODE (OI, 256, POI);     /* __vector_pair.  */
> -PARTIAL_INT_MODE (XI, 512, PXI);     /* __vector_quad.  */

Yay :-)

> +OPAQUE_MODE (OO, 32); /* instead of POI */
> +OPAQUE_MODE (XO, 64); /* instead of PXI */

Those comments already make no sense, because those types occur nowhere
anymore!  Just say what they *are* used for?  __vector_pair and
__vector_quad.

> @@ -16434,8 +16433,11 @@ rs6000_split_multireg_move (rtx dst, rtx src)
>        if (GET_CODE (src) == UNSPEC)
>       {
>         gcc_assert (REG_P (dst)
> +                   && ((GET_MODE (src) == XOmode
> +                        && FP_REGNO_P (REGNO (dst)))
> +                       || (GET_MODE (src) == OOmode
> +                           && VSX_REGNO_P (REGNO (dst))))
> +                   && XINT (src, 1) == UNSPEC_MMA_ASSEMBLE);

You can write that as separate statements much more readable?

          gcc_assert (XINT (src, 1) == UNSPEC_MMA_ASSEMBLE);
          gcc_assert (REG_P (dst));
          if (GET_MODE (src) == XOmode)
            gcc_assert (FP_REGNO_P (REGNO (dst)));
          if (GET_MODE (src) == OOmode)
            gcc_assert (VSX_REGNO_P (REGNO (dst)));

> +       if ( GET_MODE (src) == XOmode )

(stray spaces)

> +         {
> +           /* We are writing an accumulator register, so we have to
> +              prime it after we've written it.  */
> +           emit_insn (gen_mma_xxmtacc (dst, dst));
> +         }

You don't need a block around single insns (and in GCC we do not usually
do that for comments either -- the comment should usually go before the
"if" anyway, like here :-) )


Lots of little stuff, but also a few more serious things.  Sorry :-)


Segher

Re: [PATCH,rs6000] Make MMA builtins use opaque modes

Reply via email to