Graham Stott <graham.st...@imgtec.com> writes: > +(define_constraint "YC" > + "@internal > + A constant vector with each element is a unsigned bitimm-bit integer with > only one bit set"
Maybe: A replicated vector constant in which the replicated value has a single bit set Likewise YZ and clear bits. > +(define_constraint "Y5" > + "@internal > + A constant vector with each element is a signed 6-bit integer" > + (and (match_code "const_vector") > + (match_test "mips_const_vector_any_int_p (op, mode, -32, 31)"))) Maybe use Usv6. A replicated vector constant in which the replicated value is a signed 6-bit number. > +(define_constraint "Y6" > + "@internal > + A constant vector with each element a unsigned 6-bit integer" > + (and (match_code "const_vector") > + (match_test "mips_const_vector_any_int_p (op, mode, 0, 31)"))) Similarly here for Uuv6. Upper bound should be 63 for a 6-bit integer. Would be good to have a test for that. > +(define_constraint "Y8" > + "@internal > + A constant vector with each element a unsigned 0-bit integer" > + (and (match_code "const_vector") > + (match_test "mips_const_vector_any_int_p (op, mode, 0, 255)"))) Similarly here for Uuv8. > @@ -127,3 +351,4 @@ > DEF_MIPS_FTYPE (1, (VOID, USI)) > DEF_MIPS_FTYPE (2, (VOID, V2HI, V2HI)) > DEF_MIPS_FTYPE (2, (VOID, V4QI, V4QI)) > + No newline here. > +(define_c_enum "unspec" [ > +UNSPEC_MSA_ADDVI > +UNSPEC_MSA_ANDI_B > +UNSPEC_MSA_ASUB_S > + UNSPEC_MSA_ASUB_U > + UNSPEC_MSA_AVE_S > + UNSPEC_MSA_AVE_U Formatting (second is right). > +(define_mode_iterator MODE128_2 [V2DF V4SF V2DI V4SI V8HI V16QI]) > +(define_mode_iterator IMODE128 [V2DI V4SI V8HI V16QI]) These two aren't used and I can't see where MODE128_2 would come in useful. Let's drop these for now. > +(define_mode_attr VHALFMODE > + [(V8HI "V16QI") > + (V4SI "V8HI") > + (V2SI "V4SI") > + (V2DI "V4SI") > + (V2DF "V4SF")]) > + > +;; This attribute gives the integer mode for selection mask in vec_perm. > +;; vcond also uses MSA_I for operand 0, 1, and 2. > +(define_mode_attr MSA_I > + [(V2DF "V2DI") > + (V4SF "V4SI") > + (V2DI "V2DI") > + (V4SI "V4SI") > + (V8HI "V8HI") > + (V16QI "V16QI")]) > + > +;; The attribute give the integer vector mode with same size > +(define_mode_attr MODE_I > + [(V2DF "V2DI") > + (V4SF "V4SI") > + (V2DI "V2DI") > + (V4SI "V4SI") > + (V8HI "V8HI") > + (V16QI "V16QI")]) Let's call this "VIMODE" for consistency with both "IMODE" in mips.md and the HALFMODE/VHALFMODE pair. VIMODE can be used in place of MSA_I; no need for both. > +;; This attribute qives suffix gives the mode of the result for "copy_s_b, > copy_u_b" etc. > +(define_mode_attr RES > + [(V2DF "DF") > + (V4SF "SF") > + (V2DI "DI") > + (V4SI "SI") > + (V8HI "SI") > + (V16QI "SI")]) Why we do need to promote sub-SI values to SI for this? I'd prefer that we use the "correct" mode (i.e. UNITMODE) instead. > +;; This is used in msa_cast* to output mov.s or mov.d. > +(define_mode_attr msafmt2 > + [(V2DF "d") > + (V4SF "s")]) Not really an MSA format. Maybe "unitfmt"? > +;; This attribute qives define_insn suffix for MSA instructions > +;; with need distinction between integer and floating point. > +(define_mode_attr msafmt3 > + [(V2DF "d_f") > + (V4SF "w_f") > + (V2DI "d") > + (V4SI "w") > + (V8HI "h") > + (V16QI "b")]) msafmt_f might be more mnemonic than msafmt3. > +;; The maximum index inside a vector. > +(define_mode_attr max_elem_index > + [(V2DF "1") > + (V4SF "3") > + (V2DI "1") > + (V4SI "3") > + (V8HI "7") > + (V16QI "15")]) In the asserts where this is used it could just be "GET_MODE_NUNITS (<MODE>mode)" > +;; This is used to form an immediate operand constraint > +;; using "const_<imm>_operand". > +(define_mode_attr imm > + [(V2DF "0_or_1") > + (V4SF "0_to_3") > + (V2DI "0_or_1") > + (V4SI "0_to_3") > + (V8HI "uimm3") > + (V16QI "uimm4")]) Maybe indeximm rather than imm, for consistency with bitimm? > +;; This attribute is used to form the MODE for reg_or_0_operand > +;; constraint. > +(define_mode_attr REGOR0 > + [(V2DF "DF") > + (V4SF "SF") > + (V2DI "DI") > + (V4SI "SI") > + (V8HI "SI") > + (V16QI "SI")]) Same as RES, and same comment. > +(define_expand "vec_extract<mode>" > + [(match_operand:<UNITMODE> 0 "register_operand") > + (match_operand:IMSA 1 "register_operand") > + (match_operand 2 "const_int_operand")] > + "ISA_HAS_MSA" > +{ > + gcc_assert (UINTVAL (operands[2]) <= <max_elem_index>); > + enum machine_mode mode0 = GET_MODE (operands[0]); > + if (mode0 == QImode || mode0 == HImode) > + emit_move_insn (operands[0], > + gen_lowpart (mode0, gen_reg_rtx (SImode))); > + else > + emit_insn (gen_msa_copy_s_<msafmt> (operands[0], operands[1], > operands[2])); > + DONE; > +}) The QImode/HImode case isn't right -- the source of the move is an uninitialised register. Please make sure there's a testcase for this. You should be able to use <UNITMODE>mode instead of mode0. > +(define_expand "vec_extract<mode>" > + [(match_operand:<UNITMODE> 0 "register_operand") > + (match_operand:FMSA 1 "register_operand") > + (match_operand 2 "const_int_operand")] > + "ISA_HAS_MSA" > +{ > + rtx temp; > + gcc_assert (UINTVAL (operands[2]) <= <max_elem_index>); > + > + if (INTVAL (operands[2]) == 0) > + temp = operands[1]; > + else > + { > + temp = gen_reg_rtx (<MODE>mode); > + emit_insn (gen_msa_sldi_<msafmt3> (temp, operands[1], operands[1], > + operands[2])); > + } > + emit_insn (gen_msa_cast_to_scalar_<msafmt3> (operands[0], temp)); > + DONE; > +}) OK, this was in the last version too, but please explain it a bit more. I don't see how SLDI.W and SLDI.D would rotate the element into the right position. My understanding is that e.g.: SLDI.D <a1,a2,...,d1,d2,e1,e2,...,h1,h2>, <a1,a2,...,d1,d2,e1,e2,...,h1,h2>, 1 (with the vector being divided into bytes) gives: <a2,a1,...,d2,d1,e2,e1,...,h2,h1> whereas I thought we wanted: <e1,e2,....,h1,h2,a1,a2,...,d1,d2> > +(define_expand "vcondu<MSA_2:mode><IMSA:mode>" > + [(set (match_operand:MSA_2 0 "register_operand") > + (if_then_else:MSA_2 > + (match_operator 3 "" > + [(match_operand:IMSA 4 "register_operand") > + (match_operand:IMSA 5 "register_operand")]) > + (match_operand:MSA_2 1 "reg_or_m1_operand") > + (match_operand:MSA_2 2 "reg_or_0_operand")))] > + "ISA_HAS_MSA > + && (GET_MODE_NUNITS (<MSA_2:MODE>mode) > + == GET_MODE_NUNITS (<IMSA:MODE>mode))" > +{ > + rtx true_val = CONSTM1_RTX (<MSA_2:MODE>mode); > + rtx false_val = CONST0_RTX (<MSA_2:MODE>mode); > + > + if (operands[1] == true_val && operands[2] == false_val) > + mips_expand_msa_vcond (operands[0], operands[1], operands[2], > + GET_CODE (operands[3]), operands[4], operands[5]); > + else > + { > + rtx res = gen_reg_rtx (<MSA_2:MODE_I>mode); > + rtx temp1 = gen_reg_rtx (<MSA_2:MODE_I>mode); > + rtx temp2 = gen_reg_rtx (<MSA_2:MODE_I>mode); > + rtx xres = gen_reg_rtx (<MSA_2:MODE_I>mode); > + rtx xop1 = gen_reg_rtx (<MSA_2:MODE_I>mode); > + rtx xop2 = gen_reg_rtx (<MSA_2:MODE_I>mode); > + > + mips_expand_msa_vcond (res, true_val, false_val, > + GET_CODE (operands[3]), operands[4], operands[5]); > + // Results in -1 or 0 so need to convert this to correct result for the > + // correct true/false given by operands[1]/operands[2] repectively. > + emit_move_insn (xres, res); > + if (operands[1] != true_val) > + { > + emit_move_insn (xop1, operands[1]); > + emit_insn (gen_and<MSA_2:mode_i>3 (temp1, xres, xop1)); > + } > + else > + emit_move_insn (temp1, xres); > + > + emit_move_insn (temp2, CONSTM1_RTX (<MSA_2:MODE_I>mode)); > + emit_insn (gen_xor<MSA_2:mode_i>3 (temp2, xres, temp2)); > + if (operands[2] != false_val) > + { > + emit_move_insn (xop2, operands[2]); > + emit_insn (gen_and<MSA_2:mode_i>3 (temp2, temp2, xop2)); > + } > + emit_insn (gen_ior<MSA_2:mode_i>3 (xres, temp1, temp2)); > + emit_move_insn (operands[0], xres); When you have the pattern for BSEL.V/BMZ.V/... that I mentioned in the previous review, this should just use the expander for that pattern rather than generate individual ops. Same for the signed version. I stopped here for the .md file because the main thing I'd want to check is that all the patterns I mentioned have been defined. Like you say, it's still WIP so I'd rather wait until the patch is finished before doing all that again. > @@ -1777,6 +1787,190 @@ > : SYMBOL_REF_LOCAL_P (x)); > } > > +bool > +mips_const_vector_bitimm_set_p (rtx op, enum machine_mode mode) > +{ > + if (GET_CODE (op) == CONST_VECTOR && op != const0_rtx) > + { > + rtx elt0 = CONST_VECTOR_ELT (op, 0); > + HOST_WIDE_INT val = INTVAL (elt0); > + int vlog2 = exact_log2 (val); > + > + if (vlog2 != -1) > + { > + switch (mode) > + { > + case V16QImode: > + if (!(0 <= vlog2 && vlog2 <= 7)) > + return false; > + break; > + case V8HImode: > + if (!(0 <= vlog2 && vlog2 <= 15)) > + return false; > + break; > + case V4SImode: > + if (!(0 <= vlog2 && vlog2 <= 31)) > + return false; > + break; > + case V2DImode: > + if (!(0 <= vlog2 && vlog2 <= 63)) > + return false; > + break; > + default: > + gcc_unreachable (); > + } > + > + return mips_const_vector_same_int_p (op, mode, 0, val); > + } > + } > + > + return false; > +} > + > +bool > +mips_const_vector_bitimm_clr_p (rtx op, enum machine_mode mode) > +{ > + if (GET_CODE (op) == CONST_VECTOR && op != constm1_rtx) > + { > + rtx elt0 = CONST_VECTOR_ELT (op, 0); > + HOST_WIDE_INT val = INTVAL (elt0); > + int vlog2 = exact_log2 (~val); > + > + if (vlog2 != -1) > + { > + switch (mode) > + { > + case V16QImode: > + if (!(0 <= vlog2 && vlog2 <= 7)) > + return false; > + break; > + case V8HImode: > + if (!(0 <= vlog2 && vlog2 <= 15)) > + return false; > + break; > + case V4SImode: > + if (!(0 <= vlog2 && vlog2 <= 31)) > + return false; > + break; > + case V2DImode: > + if (!(0 <= vlog2 && vlog2 <= 63)) > + return false; > + break; > + default: > + gcc_unreachable (); > + } > + > + return mips_const_vector_same_val_p (op, mode); > + } > + } > + > + return false; > +} Rather than the switch statement you could use GET_MODE_UNIT_BITSIZE. > +/* Return true if OP is a constant vector with the number of units in MODE, > + and each unit has the same integer value. */ > + > +bool > +mips_const_vector_same_val_p (rtx op, enum machine_mode mode) > +{ > + HOST_WIDE_INT prev_value; > + int i, nunits = GET_MODE_NUNITS (mode); > + rtx elem; > + > + if (GET_CODE (op) != CONST_VECTOR || CONST_VECTOR_NUNITS (op) != nunits) > + return false; Why check the nunits rather than the mode? > + elem = CONST_VECTOR_ELT (op, 0); > + if (!CONST_INT_P (elem)) > + return false; > + > + prev_value = INTVAL (elem); > + for (i = 1; i < nunits; i++) > + { > + elem = CONST_VECTOR_ELT (op, i); > + if (!CONST_INT_P (elem)) > + return false; > + > + if (INTVAL (elem) != prev_value) > + return false; > + } I don't see any reason to enforce the integerness here. Might as well just have: if (GET_CODE (op) != CONST_VECTOR || GET_MODE (op) != mode) return false; first = CONST_VECTOR_ELT (op, 0); for (int i = 1; i < nunits; i++) if (!rtx_equal_p (first, CONST_VECTOR_ELT (op, i))) return false; return true; > +/* Return true if OP is a constant vector with the number of units in MODE, > + and each unit has the same integer value in the range [LOW, HIGH]. */ > + > +bool > +mips_const_vector_same_int_p (rtx op, enum machine_mode mode, HOST_WIDE_INT > low, > + HOST_WIDE_INT high) > +{ > + HOST_WIDE_INT value, prev_value; > + int i, nunits = GET_MODE_NUNITS (mode); > + rtx elem; > + > + if (GET_CODE (op) != CONST_VECTOR || CONST_VECTOR_NUNITS (op) != nunits) > + return false; > + > + elem = CONST_VECTOR_ELT (op, 0); > + if (!CONST_INT_P (elem)) > + return false; > + > + value = INTVAL (elem); > + if (value < low || value > high) > + return false; > + > + prev_value = value; > + for (i = 1; i < nunits; i++) > + { > + elem = CONST_VECTOR_ELT (op, i); > + if (!CONST_INT_P (elem)) > + return false; > + > + value = INTVAL (elem); > + if (value != prev_value || value < low || value > high) > + return false; > + } > + > + return true; > +} Please use mips_const_vector_same_val_p and check the integer at the end. > @@ -2148,6 +2342,11 @@ > static int > mips_symbol_insns (enum mips_symbol_type type, enum machine_mode mode) > { > + /* MSA LD.* and ST.* cannot support loading symbols via an immediate > + operand. */ > + if (MSA_SUPPORTED_MODE_P (mode)) > + return 0; > + I think we should check TARGET_MSA here too. Given this change... > @@ -2315,6 +2519,10 @@ > && GET_MODE_BITSIZE (mode) > GET_MODE_ALIGNMENT (mode)) > return false; > > + /* MSA LD.* and ST.* cannot support loading symbols via %lo($base). */ > + if (MSA_SUPPORTED_MODE_P (mode)) > + return false; > + ...why is this one is also needed? (BTW, please generate the diff with -p.) > @@ -2444,6 +2652,8 @@ > return true; > if (ISA_HAS_LDX && mode == DImode) > return true; > + if (ISA_HAS_MSA && MSA_SUPPORTED_MODE_P (mode)) > + return true; > return false; > } > > @@ -2481,6 +2691,7 @@ > { > struct mips_address_info addr; > int factor; > + bool msa_p = (TARGET_MSA && !might_split_p && MSA_SUPPORTED_MODE_P (mode)); > > /* BLKmode is used for single unaligned loads and stores and should > not count as a multiword mode. (GET_MODE_SIZE (BLKmode) is pretty This is the wrong level to be checking this. Specifically: > @@ -2495,6 +2706,14 @@ > switch (addr.type) > { > case ADDRESS_REG: > + if (msa_p) > + { > + /* MSA LD.* and ST.* supports 10-bit signed offsets. */ > + if (mips_signed_immediate_p (INTVAL (addr.offset), 10, 0)) > + return 1; > + else > + return 0; > + } > if (TARGET_MIPS16 > && !mips16_unextended_reference_p (mode, addr.reg, > UINTVAL (addr.offset))) ...this should be checked in mips_valid_offset_p rather than here. > @@ -2502,13 +2721,13 @@ > return factor; > > case ADDRESS_LO_SUM: > - return TARGET_MIPS16 ? factor * 2 : factor; > + return msa_p ? 0 : TARGET_MIPS16 ? factor * 2 : factor; > > case ADDRESS_CONST_INT: > - return factor; > + return msa_p ? 0 : factor; > > case ADDRESS_SYMBOLIC: > - return factor * mips_symbol_insns (addr.symbol_type, mode); > + return msa_p ? 0 : factor * mips_symbol_insns (addr.symbol_type, mode); > } The CONST_INT case should be handled in mips_classify_address, probably by using mips_valid_offset_p rather than the current SMALL_INT check. The other two changes should already be redundant given your change to mips_symbol_insns. > +/* Return the number of instructions needed for an MSA integer division. */ > + > +int > +mips_msa_idiv_insns (void) > +{ > + if (TARGET_CHECK_ZERO_DIV) > + return 3; > + else > + return 1; > +} > > /* Emit a move from SRC to DEST. Assume that the move expanders can > handle all moves if !can_create_pseudo_p (). The distinction is > @@ -3980,6 +4239,10 @@ > case NE: > case UNORDERED: > case LTGT: > + case UNGE: > + case UNGT: > + case UNLE: > + case UNLT: > /* Branch comparisons have VOIDmode, so use the first operand's > mode instead. */ > mode = GET_MODE (XEXP (x, 0)); > @@ -4133,6 +4396,10 @@ > return true; > } > *total = COSTS_N_INSNS (mips_idiv_insns ()); > + if (MSA_SUPPORTED_MODE_P (mode)) > + *total = COSTS_N_INSNS (mips_msa_idiv_insns ()); > + else > + *total = COSTS_N_INSNS (mips_idiv_insns ()); > } > else if (mode == DImode) > *total = mips_cost->int_div_di; Seems like it'd be easier to modify mips_idiv_insns instead. > @@ -4342,6 +4609,26 @@ > return simplify_gen_subreg (word_mode, op, mode, byte); > } > > +/* Return one word of 128-bit value OP, taking into account the fixed > + endianness of certain registers. BYTE selects from the byte address. */ > + > +rtx > +mips_subword_at_byte (rtx op, unsigned int byte) > +{ > + enum machine_mode mode; > + > + mode = GET_MODE (op); > + if (mode == VOIDmode) > + mode = TImode; > + > + gcc_assert (!FP_REG_RTX_P (op)); > + > + if (MEM_P (op)) > + return mips_rewrite_small_data (adjust_address (op, word_mode, byte)); > + > + return simplify_gen_subreg (word_mode, op, mode, byte); > +} Please instead handle FP_REG_RTX_P and make mips_subword a simple wrapper around this. > @@ -4469,6 +4760,234 @@ > return SPLIT_IF_NECESSARY; > } > > +/* Return true if a 128-bit move from SRC to DEST should be split into two > + or four. */ > +bool > +mips_split_128bit_move_p (rtx dest, rtx src) > +{ > + /* MSA-to-MSA moves can be done in a single instruction. */ > + if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest)) > + return false; > + > + /* Check for MSA loads and stores. */ > + if (FP_REG_RTX_P (dest) && MEM_P (src)) > + return false; > + if (FP_REG_RTX_P (src) && MEM_P (dest)) > + return false; > + > + /* Check for MSA set to 0 */ > + if (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src))) > + return false; Why just zero, when LDI is available? > +/* Split copy_s.d. */ > + > +void > +mips_split_msa_copy_d (rtx dest, rtx src, rtx index, rtx (*gen_fn)(rtx, rtx, > rtx)) Long line, Space between ")(". Document the arguments. E.g.: Split a COPY_S.D with operands DEST, SRC and INDEX. GEN_FN is ... > + emit_insn ((gen_fn)(low, new_src, GEN_INT (INTVAL (index) * 2))); > + emit_insn ((gen_fn)(high, new_src, GEN_INT (INTVAL (index) * 2 + 1))); Just "gen_fn (...". > +/* Split insert.d. */ > + > +void > +mips_split_msa_insert_d (rtx dest, rtx src1, rtx index, rtx src2) > +{ > + gcc_assert (GET_MODE (dest) == GET_MODE (src1)); > + gcc_assert ((GET_MODE (dest) == V2DImode > + && (GET_MODE (src2) == DImode || src2 == const0_rtx)) > + || (GET_MODE (dest) == V2DFmode && GET_MODE (src2) == DFmode)); > + > + /* Note that low is always from the lower index, and high is always > + from the higher index. */ > + rtx low, high; > + if (src2 == const0_rtx) > + { > + low = src2; > + high = src2; > + } > + else > + { > + low = mips_subword (src2, false); > + high = mips_subword (src2, true); > + } As before, I don't think 0 needs to be a special case. mips_subword should handle it correctly. Other instances later. > @@ -5092,6 +5662,7 @@ > memset (cum, 0, sizeof (*cum)); > cum->prototype = (fntype && prototype_p (fntype)); > cum->gp_reg_found = (cum->prototype && stdarg_p (fntype)); > + cum->stdarg_p = (cum->prototype && stdarg_p (fntype)); Please use: cum->stdarg_p = (cum->prototype && stdarg_p (fntype)); cum->gp_reg_found = cum->stdarg_p; It looked like a lot of the code after this was still awaiting changes so I stopped there. Nice refactoring work on the .md file, thanks. Certainly looks cleaner now. Richard