Hi! On Tue, Nov 17, 2020 at 11:48:04AM -0600, acsaw...@linux.ibm.com wrote: > This patch changes powerpc MMA builtins to use the new opaque > mode class and use modes OO (32 bytes) and XO (64 bytes) > instead of POI/PXI. Using the opaque modes prevents > optimization from trying to do anything with vector > pair/quad, which was the problem we were seeing with the > partial integer modes.
> * gcc/config/rs6000/mma.md (unspec): > Add assemble/extract UNSPECs. That fits on one line :-) (More similar stuff below.) > * gcc/config/rs6000/rs6000-modes.def: Create XO and OO modes. You also delete OImode, XImode, POImode, and PXImode here. > --- a/gcc/config/rs6000/mma.md > +++ b/gcc/config/rs6000/mma.md > @@ -19,24 +19,19 @@ > ;; along with GCC; see the file COPYING3. If not see > ;; <http://www.gnu.org/licenses/>. > > -;; The MMA patterns use the multi-register PXImode and POImode partial > +;; The MMA patterns use the multi-register XOmode and OOmode partial > ;; integer modes to implement the target specific __vector_quad and > ;; __vector_pair types that the MMA built-in functions reference. They are opaque modes, not partial integer modes now :-) > +;; We define these modes with the new OPAQUE_MODE mechanism to prevent > +;; anything from trying to open them up. Any time you write "new" it is dated soon. Maybe just "these are OPAQUE_MODEs" or similar? > +(define_insn_and_split "*movxo" > + [(set (match_operand:XO 0 "nonimmediate_operand" "=d,m,d,d") > + (match_operand:XO 1 "input_operand" "m,d,d,O"))] > "TARGET_MMA > + && (gpc_reg_operand (operands[0], XOmode) > + || gpc_reg_operand (operands[1], XOmode))" > "@ > # > # > # > xxsetaccz %A0" > "&& reload_completed > + && !(fpr_reg_operand (operands[0], XOmode) && operands[1] == const0_rtx)" XOmode can never be a const_int anymore, so this should just be "&& reload_completed" and nothing more. Or do you need to handle the unspec case in here? It can just be handled in a separate pattern in this case, so that would be much simpler. > +(define_insn_and_split "*mma_assemble_pair" > + [(set (match_operand:OO 0 "vsx_register_operand" "=wa") > + (unspec:OO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa") > + (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")] > + UNSPEC_MMA_ASSEMBLE))] > + "TARGET_MMA > + && vsx_register_operand (operands[0], OOmode)" operands[0] always is an OOmode vsx_register_operand, so this is just "TARGET_MMA" > +(define_insn_and_split "*mma_disassemble_pair" > + [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa") > + (unspec:V16QI [(match_operand:OO 1 "input_operand" "wa") > + (match_operand 2 "const_int_operand")] > + UNSPEC_MMA_EXTRACT))] Use "const_0_to_1_operand" instead? > + "TARGET_MMA > + && fpr_reg_operand (operands[1], OOmode)" But registers can be all in "wa", not just the FPRs? > + "#" > + "&& reload_completed" > + [(const_int 0)] > +{ > + gcc_assert (REG_P (operands[1])); That is guaranteed by the insn condition. > + int reg = REGNO (operands[1]); > + int regoff = INTVAL (operands[2]); Should this consider endianness? > + rtx src = gen_rtx_REG (V16QImode, reg + regoff); > + emit_move_insn (operands[0], src); > + DONE; > +}) > +(define_expand "mma_disassemble_pair" Please put the define_expand before the corresponding define_insn. > + [(match_operand:V16QI 0 "mma_disassemble_output_operand") > + (match_operand:OO 1 "input_operand") > + (match_operand 2 "const_int_operand")] 0_to_1 again > + "TARGET_MMA" > +{ > + rtx src; > + int regoff = INTVAL (operands[2]); > + gcc_assert (IN_RANGE (regoff, 0, 1)); This assert then trivially isn't needed anymore, either. > + src = gen_rtx_UNSPEC (V16QImode, > + gen_rtvec (2, operands[1], GEN_INT (regoff)), > + UNSPEC_MMA_EXTRACT); > + emit_move_insn (operands[0], src); > DONE; > }) > +(define_insn_and_split "*mma_disassemble_acc" > + [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa") > + (unspec:V16QI [(match_operand:XO 1 "input_operand" "d") > + (match_operand 2 "const_int_operand")] "const_0_to_3_operand", and everything else as well. > +(define_expand "mma_disassemble_acc" Also wrt this of course. > (define_expand "mma_xxsetaccz" > - [(set (match_operand:PXI 0 "fpr_reg_operand") > + [(set (match_operand:XO 0 "fpr_reg_operand") > (const_int 0))] > "TARGET_MMA" > { > - emit_insn (gen_movpxi (operands[0], const0_rtx)); > + emit_insn (gen_movxo (operands[0], const0_rtx)); > DONE; > }) That should wrap the zero in an unspec? > --- a/gcc/config/rs6000/predicates.md > +++ b/gcc/config/rs6000/predicates.md > @@ -1072,8 +1072,9 @@ (define_predicate "input_operand" > && easy_fp_constant (op, mode)) > return 1; > > - /* Allow any integer constant. */ > - if (SCALAR_INT_MODE_P (mode) && CONST_SCALAR_INT_P (op)) > + /* Allow any integer constant. Also allow consts for OPAQUE_MODE. */ > + if ((SCALAR_INT_MODE_P (mode) || OPAQUE_MODE_P (mode)) > + && CONST_SCALAR_INT_P (op)) > return 1; We shouln't allow constant integers as input to OPAQUE_MODE? Such things should be wrapped in an unspec. > +;; Return 1 if this operand is valid for an MMA disassemble insn. > +(define_predicate "mma_disassemble_output_operand" > + (match_code "reg,subreg,mem") > +{ > + if (REG_P (op) && !vsx_register_operand (op, mode)) > + return false; > + return true; > +}) You need to handle subreg here, even if you do not need to do anything for mem. So something like: { if (SUBREG_P (op)) op = SUBREG_REG (op); if (!REG_P (op)) return 1; return vsx_register_operand (op); } > --- a/gcc/config/rs6000/rs6000-call.c > +++ b/gcc/config/rs6000/rs6000-call.c > @@ -6325,6 +6325,22 @@ rs6000_discover_homogeneous_aggregate (machine_mode > mode, const_tree type, > bool > rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) > { > + /* We do not allow MMA types being used as return values. Only report > + the invalid return value usage the first time we encounter it. */ > + if (cfun != NULL You can just write "cfun" instead of "cfun != NULL", it looks much less silly. > + && !cfun->machine->mma_return_type_error > + && TREE_TYPE (cfun->decl) == fntype > + && (TYPE_MODE (type) == OOmode || TYPE_MODE (type) == XOmode)) > + { > + /* Record we have now handled function CFUN, so the next time we > + are called, we do not re-report the same error. */ > + cfun->machine->mma_return_type_error = true; > + if (TYPE_CANONICAL (type) != NULL_TREE) > + type = TYPE_CANONICAL (type); > + error ("invalid use of MMA type %qs as a function return value", > + IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type)))); > + } Cool, thanks! > @@ -14049,21 +14055,21 @@ mma_init_builtins (void) > } > else > { > - if ((attr & RS6000_BTC_QUAD) == 0) > + if ( !( d->code == MMA_BUILTIN_DISASSEMBLE_ACC_INTERNAL Some stray spaces in there. > + || d->code == MMA_BUILTIN_DISASSEMBLE_PAIR_INTERNAL) > + && ((attr & RS6000_BTC_QUAD) == 0)) Get rid of some of the superfluous parens? It makes it hard to read. > } > > - if (icode == CODE_FOR_nothing) > + /* This is a disassemble pair/acc function. */ > + if ( d->code == MMA_BUILTIN_DISASSEMBLE_ACC > + || d->code == MMA_BUILTIN_DISASSEMBLE_PAIR) Bad space (on both lines). > --- a/gcc/config/rs6000/rs6000-modes.def > +++ b/gcc/config/rs6000/rs6000-modes.def > @@ -83,12 +83,6 @@ VECTOR_MODE (INT, SI, 2); /* V2SI */ > combination. */ > PARTIAL_INT_MODE (TI, 128, PTI); > > -/* Define, but don't use the larger integer modes. We need an integer mode > - defined that is the same size as the vector pair and vector quad modes. > */ > - > -INT_MODE (OI, 32); > -INT_MODE (XI, 64); > - > /* Modes used by __vector_pair and __vector_quad. */ > -PARTIAL_INT_MODE (OI, 256, POI); /* __vector_pair. */ > -PARTIAL_INT_MODE (XI, 512, PXI); /* __vector_quad. */ Yay :-) > +OPAQUE_MODE (OO, 32); /* instead of POI */ > +OPAQUE_MODE (XO, 64); /* instead of PXI */ Those comments already make no sense, because those types occur nowhere anymore! Just say what they *are* used for? __vector_pair and __vector_quad. > @@ -16434,8 +16433,11 @@ rs6000_split_multireg_move (rtx dst, rtx src) > if (GET_CODE (src) == UNSPEC) > { > gcc_assert (REG_P (dst) > + && ((GET_MODE (src) == XOmode > + && FP_REGNO_P (REGNO (dst))) > + || (GET_MODE (src) == OOmode > + && VSX_REGNO_P (REGNO (dst)))) > + && XINT (src, 1) == UNSPEC_MMA_ASSEMBLE); You can write that as separate statements much more readable? gcc_assert (XINT (src, 1) == UNSPEC_MMA_ASSEMBLE); gcc_assert (REG_P (dst)); if (GET_MODE (src) == XOmode) gcc_assert (FP_REGNO_P (REGNO (dst))); if (GET_MODE (src) == OOmode) gcc_assert (VSX_REGNO_P (REGNO (dst))); > + if ( GET_MODE (src) == XOmode ) (stray spaces) > + { > + /* We are writing an accumulator register, so we have to > + prime it after we've written it. */ > + emit_insn (gen_mma_xxmtacc (dst, dst)); > + } You don't need a block around single insns (and in GCC we do not usually do that for comments either -- the comment should usually go before the "if" anyway, like here :-) ) Lots of little stuff, but also a few more serious things. Sorry :-) Segher