Re: [PATCH RFC] MIPS add support for MIPS SIMD ARCHITECTURE V1.07

Richard Sandiford Sat, 15 Feb 2014 05:03:49 -0800

Graham Stott <graham.st...@imgtec.com> writes:
> +(define_constraint "YC"
> +  "@internal
> +   A constant vector with each element is a unsigned bitimm-bit integer with 
> only one bit set"


Maybe:

  A replicated vector constant in which the replicated value has a single
  bit set

Likewise YZ and clear bits.

> +(define_constraint "Y5"
> +  "@internal
> +   A constant vector with each element is a signed 6-bit integer"
> +  (and (match_code "const_vector")
> +       (match_test "mips_const_vector_any_int_p (op, mode, -32, 31)")))

Maybe use Usv6.

  A replicated vector constant in which the replicated value is a signed
  6-bit number.

> +(define_constraint "Y6"
> +  "@internal
> +   A constant vector with each element a unsigned 6-bit integer"
> +  (and (match_code "const_vector")
> +       (match_test "mips_const_vector_any_int_p (op, mode, 0, 31)")))

Similarly here for Uuv6.  Upper bound should be 63 for a 6-bit integer.
Would be good to have a test for that.

> +(define_constraint "Y8"
> +  "@internal
> +   A constant vector with each element a unsigned 0-bit integer"
> +  (and (match_code "const_vector")
> +       (match_test "mips_const_vector_any_int_p (op, mode, 0, 255)")))

Similarly here for Uuv8.

> @@ -127,3 +351,4 @@
>  DEF_MIPS_FTYPE (1, (VOID, USI))
>  DEF_MIPS_FTYPE (2, (VOID, V2HI, V2HI))
>  DEF_MIPS_FTYPE (2, (VOID, V4QI, V4QI))
> +

No newline here.

> +(define_c_enum "unspec" [
> +UNSPEC_MSA_ADDVI
> +UNSPEC_MSA_ANDI_B
> +UNSPEC_MSA_ASUB_S
> +  UNSPEC_MSA_ASUB_U
> +  UNSPEC_MSA_AVE_S
> +  UNSPEC_MSA_AVE_U

Formatting (second is right).

> +(define_mode_iterator MODE128_2 [V2DF V4SF V2DI V4SI V8HI V16QI])
> +(define_mode_iterator IMODE128 [V2DI V4SI V8HI V16QI])

These two aren't used and I can't see where MODE128_2 would come in useful.
Let's drop these for now.

> +(define_mode_attr VHALFMODE 
> +  [(V8HI "V16QI")
> +   (V4SI "V8HI")
> +   (V2SI "V4SI")
> +   (V2DI "V4SI")
> +   (V2DF "V4SF")])
> +
> +;; This attribute gives the integer mode for selection mask in vec_perm.
> +;; vcond also uses MSA_I for operand 0, 1, and 2.
> +(define_mode_attr MSA_I
> +  [(V2DF "V2DI")
> +   (V4SF "V4SI")
> +   (V2DI "V2DI")
> +   (V4SI "V4SI")
> +   (V8HI "V8HI")
> +   (V16QI "V16QI")])
> +
> +;; The attribute give the integer vector mode with same size
> +(define_mode_attr MODE_I
> +  [(V2DF "V2DI")
> +   (V4SF "V4SI")
> +   (V2DI "V2DI")
> +   (V4SI "V4SI")
> +   (V8HI "V8HI")
> +   (V16QI "V16QI")])

Let's call this "VIMODE" for consistency with both "IMODE" in mips.md
and the HALFMODE/VHALFMODE pair.  VIMODE can be used in place of MSA_I;
no need for both.

> +;; This attribute qives suffix gives the mode of the result for "copy_s_b, 
> copy_u_b" etc.
> +(define_mode_attr RES
> +  [(V2DF "DF")
> +   (V4SF "SF")
> +   (V2DI "DI")
> +   (V4SI "SI")
> +   (V8HI "SI")
> +   (V16QI "SI")])

Why we do need to promote sub-SI values to SI for this?  I'd prefer
that we use the "correct" mode (i.e. UNITMODE) instead.

> +;; This is used in msa_cast* to output mov.s or mov.d.
> +(define_mode_attr msafmt2
> +  [(V2DF "d")
> +   (V4SF "s")])

Not really an MSA format.  Maybe "unitfmt"?

> +;; This attribute qives define_insn suffix for MSA instructions 
> +;; with need distinction between integer and floating point.
> +(define_mode_attr msafmt3
> +  [(V2DF "d_f")
> +   (V4SF "w_f")
> +   (V2DI "d")
> +   (V4SI "w")
> +   (V8HI "h")
> +   (V16QI "b")])

msafmt_f might be more mnemonic than msafmt3.

> +;; The maximum index inside a vector.
> +(define_mode_attr max_elem_index
> +  [(V2DF "1")
> +   (V4SF "3")
> +   (V2DI "1")
> +   (V4SI "3")
> +   (V8HI "7")
> +   (V16QI "15")])

In the asserts where this is used it could just be
"GET_MODE_NUNITS (<MODE>mode)"

> +;; This is used to form an immediate operand constraint 
> +;; using "const_<imm>_operand".
> +(define_mode_attr imm
> +  [(V2DF "0_or_1")
> +   (V4SF "0_to_3")
> +   (V2DI "0_or_1")
> +   (V4SI "0_to_3")
> +   (V8HI "uimm3")
> +   (V16QI "uimm4")])

Maybe indeximm rather than imm, for consistency with bitimm?

> +;; This attribute is used to form the MODE for reg_or_0_operand
> +;; constraint.
> +(define_mode_attr REGOR0
> +  [(V2DF "DF")
> +   (V4SF "SF")
> +   (V2DI "DI")
> +   (V4SI "SI")
> +   (V8HI "SI")
> +   (V16QI "SI")])

Same as RES, and same comment.

> +(define_expand "vec_extract<mode>"
> +  [(match_operand:<UNITMODE> 0 "register_operand")
> +   (match_operand:IMSA 1 "register_operand")
> +   (match_operand 2 "const_int_operand")]
> +  "ISA_HAS_MSA"
> +{
> +  gcc_assert (UINTVAL (operands[2]) <= <max_elem_index>);
> +  enum machine_mode mode0 = GET_MODE (operands[0]);
> +  if (mode0 == QImode || mode0 == HImode)
> +    emit_move_insn (operands[0],
> +                 gen_lowpart (mode0, gen_reg_rtx (SImode)));
> +  else
> +    emit_insn (gen_msa_copy_s_<msafmt> (operands[0], operands[1], 
> operands[2]));
> +  DONE;
> +})

The QImode/HImode case isn't right -- the source of the move is an
uninitialised register.  Please make sure there's a testcase for this.

You should be able to use <UNITMODE>mode instead of mode0.

> +(define_expand "vec_extract<mode>"
> +  [(match_operand:<UNITMODE> 0 "register_operand")
> +   (match_operand:FMSA 1 "register_operand")
> +   (match_operand 2 "const_int_operand")]
> +  "ISA_HAS_MSA"
> +{
> +  rtx temp;
> +  gcc_assert (UINTVAL (operands[2]) <= <max_elem_index>);
> +
> +  if (INTVAL (operands[2]) == 0)
> +    temp = operands[1];
> +  else
> +    {
> +      temp = gen_reg_rtx (<MODE>mode);
> +      emit_insn (gen_msa_sldi_<msafmt3> (temp, operands[1], operands[1],
> +                                      operands[2]));
> +    }
> +  emit_insn (gen_msa_cast_to_scalar_<msafmt3> (operands[0], temp));
> +  DONE;
> +})

OK, this was in the last version too, but please explain it a bit more.
I don't see how SLDI.W and SLDI.D would rotate the element into the right
position.  My understanding is that e.g.:

    SLDI.D <a1,a2,...,d1,d2,e1,e2,...,h1,h2>,
           <a1,a2,...,d1,d2,e1,e2,...,h1,h2>, 1

(with the vector being divided into bytes) gives:

    <a2,a1,...,d2,d1,e2,e1,...,h2,h1>

whereas I thought we wanted:

   <e1,e2,....,h1,h2,a1,a2,...,d1,d2>

> +(define_expand "vcondu<MSA_2:mode><IMSA:mode>"
> +  [(set (match_operand:MSA_2 0 "register_operand")
> +     (if_then_else:MSA_2
> +       (match_operator 3 ""
> +         [(match_operand:IMSA 4 "register_operand")
> +          (match_operand:IMSA 5 "register_operand")])
> +       (match_operand:MSA_2 1 "reg_or_m1_operand")
> +       (match_operand:MSA_2 2 "reg_or_0_operand")))]
> +  "ISA_HAS_MSA
> +   && (GET_MODE_NUNITS (<MSA_2:MODE>mode)
> +       == GET_MODE_NUNITS (<IMSA:MODE>mode))"
> +{
> +  rtx true_val = CONSTM1_RTX (<MSA_2:MODE>mode);
> +  rtx false_val = CONST0_RTX (<MSA_2:MODE>mode);
> +
> +  if (operands[1] == true_val && operands[2] == false_val)
> +    mips_expand_msa_vcond (operands[0], operands[1], operands[2],
> +                        GET_CODE (operands[3]), operands[4], operands[5]);
> +  else
> +    {
> +      rtx res = gen_reg_rtx (<MSA_2:MODE_I>mode);
> +      rtx temp1 = gen_reg_rtx (<MSA_2:MODE_I>mode);
> +      rtx temp2 = gen_reg_rtx (<MSA_2:MODE_I>mode);
> +      rtx xres = gen_reg_rtx (<MSA_2:MODE_I>mode);
> +      rtx xop1 = gen_reg_rtx (<MSA_2:MODE_I>mode);
> +      rtx xop2 = gen_reg_rtx (<MSA_2:MODE_I>mode);
> +
> +      mips_expand_msa_vcond (res, true_val, false_val,
> +                          GET_CODE (operands[3]), operands[4], operands[5]);
> +      // Results in -1 or 0 so need to convert this to correct result for the
> +      // correct true/false given by operands[1]/operands[2] repectively.
> +      emit_move_insn (xres, res);
> +      if (operands[1] != true_val)
> +     {
> +       emit_move_insn (xop1, operands[1]);
> +       emit_insn (gen_and<MSA_2:mode_i>3 (temp1, xres, xop1));
> +     }
> +      else
> +     emit_move_insn (temp1, xres);
> +
> +      emit_move_insn (temp2, CONSTM1_RTX (<MSA_2:MODE_I>mode));
> +      emit_insn (gen_xor<MSA_2:mode_i>3 (temp2, xres, temp2));
> +      if (operands[2] != false_val)
> +     {
> +       emit_move_insn (xop2, operands[2]);
> +       emit_insn (gen_and<MSA_2:mode_i>3 (temp2, temp2, xop2));
> +     }
> +      emit_insn (gen_ior<MSA_2:mode_i>3 (xres, temp1, temp2));
> +      emit_move_insn (operands[0], xres);

When you have the pattern for BSEL.V/BMZ.V/... that I mentioned
in the previous review, this should just use the expander for that
pattern rather than generate individual ops.  Same for the signed version.

I stopped here for the .md file because the main thing I'd want
to check is that all the patterns I mentioned have been defined.
Like you say, it's still WIP so I'd rather wait until the patch
is finished before doing all that again.

> @@ -1777,6 +1787,190 @@
>         : SYMBOL_REF_LOCAL_P (x));
>  }
>  
> +bool
> +mips_const_vector_bitimm_set_p (rtx op, enum machine_mode mode)
> +{
> +  if (GET_CODE (op) == CONST_VECTOR && op != const0_rtx)
> +    {
> +      rtx elt0 = CONST_VECTOR_ELT (op, 0);
> +      HOST_WIDE_INT val = INTVAL (elt0);
> +      int vlog2 = exact_log2 (val);
> +
> +      if (vlog2 != -1)
> +     {
> +       switch (mode)
> +         {
> +         case V16QImode:
> +           if (!(0 <= vlog2 && vlog2 <= 7))
> +             return false;
> +           break;    
> +         case V8HImode:
> +           if (!(0 <= vlog2 && vlog2 <= 15))
> +             return false;
> +           break;
> +         case V4SImode:
> +           if (!(0 <= vlog2 && vlog2 <= 31))
> +             return false;
> +           break;
> +         case V2DImode:
> +           if (!(0 <= vlog2 && vlog2 <= 63))
> +             return false;
> +           break;
> +         default:
> +           gcc_unreachable ();
> +         }
> +
> +       return mips_const_vector_same_int_p (op, mode, 0, val);
> +     }
> +    }
> +
> +  return false;
> +}
> +
> +bool
> +mips_const_vector_bitimm_clr_p (rtx op, enum machine_mode mode)
> +{
> +  if (GET_CODE (op) == CONST_VECTOR && op != constm1_rtx)
> +    {
> +      rtx elt0 = CONST_VECTOR_ELT (op, 0);
> +      HOST_WIDE_INT val = INTVAL (elt0);
> +      int vlog2 = exact_log2 (~val);
> +
> +      if (vlog2 != -1)
> +     {
> +       switch (mode)
> +         {
> +         case V16QImode:
> +           if (!(0 <= vlog2 && vlog2 <= 7))
> +             return false;
> +           break;
> +         case V8HImode:
> +           if (!(0 <= vlog2 && vlog2 <= 15))
> +             return false;
> +           break;
> +         case V4SImode:
> +           if (!(0 <= vlog2 && vlog2 <= 31))
> +             return false;
> +           break;
> +         case V2DImode:
> +           if (!(0 <= vlog2 && vlog2 <= 63))
> +             return false;
> +           break;
> +         default:
> +           gcc_unreachable ();
> +         }
> +
> +       return mips_const_vector_same_val_p (op, mode);
> +     }
> +  }
> +
> +  return false;
> +}

Rather than the switch statement you could use GET_MODE_UNIT_BITSIZE.

> +/* Return true if OP is a constant vector with the number of units in MODE,
> +   and each unit has the same integer value. */
> +
> +bool
> +mips_const_vector_same_val_p (rtx op, enum machine_mode mode)
> +{
> +  HOST_WIDE_INT prev_value;
> +  int i, nunits = GET_MODE_NUNITS (mode);
> +  rtx elem;
> +
> +  if (GET_CODE (op) != CONST_VECTOR || CONST_VECTOR_NUNITS (op) != nunits)
> +    return false;

Why check the nunits rather than the mode?

> +  elem =  CONST_VECTOR_ELT (op, 0);
> +  if (!CONST_INT_P (elem))
> +    return false;
> +
> +  prev_value = INTVAL (elem);
> +  for (i = 1; i < nunits; i++)
> +    {
> +      elem = CONST_VECTOR_ELT (op, i);
> +      if (!CONST_INT_P (elem))
> +     return false;
> +
> +      if (INTVAL (elem) != prev_value)
> +     return false;
> +    }

I don't see any reason to enforce the integerness here.  Might as well
just have:

  if (GET_CODE (op) != CONST_VECTOR || GET_MODE (op) != mode)
    return false;

  first = CONST_VECTOR_ELT (op, 0);
  for (int i = 1; i < nunits; i++)
    if (!rtx_equal_p (first, CONST_VECTOR_ELT (op, i)))
      return false;

  return true;

> +/* Return true if OP is a constant vector with the number of units in MODE,
> +   and each unit has the same integer value in the range [LOW, HIGH].  */
> +
> +bool
> +mips_const_vector_same_int_p (rtx op, enum machine_mode mode, HOST_WIDE_INT 
> low,
> +                           HOST_WIDE_INT high)
> +{
> +  HOST_WIDE_INT value, prev_value;
> +  int i, nunits = GET_MODE_NUNITS (mode);
> +  rtx elem;
> + 
> +  if (GET_CODE (op) != CONST_VECTOR || CONST_VECTOR_NUNITS (op) != nunits)
> +    return false;
> +
> +  elem =  CONST_VECTOR_ELT (op, 0);
> +  if (!CONST_INT_P (elem))
> +    return false;
> +
> +  value = INTVAL (elem);
> +  if (value < low || value > high)
> +    return false;
> +
> +  prev_value = value;
> +  for (i = 1; i < nunits; i++)
> +    {
> +      elem = CONST_VECTOR_ELT (op, i);
> +      if (!CONST_INT_P (elem))
> +     return false;
> +
> +      value = INTVAL (elem);
> +      if (value != prev_value || value < low || value > high)
> +     return false;
> +    }
> +
> +  return true;
> +}

Please use mips_const_vector_same_val_p and check the integer at the end.

> @@ -2148,6 +2342,11 @@
>  static int
>  mips_symbol_insns (enum mips_symbol_type type, enum machine_mode mode)
>  {
> +  /* MSA LD.* and ST.* cannot support loading symbols via an immediate
> +     operand.  */
> +  if (MSA_SUPPORTED_MODE_P (mode))
> +    return 0;
> +

I think we should check TARGET_MSA here too.  Given this change...

> @@ -2315,6 +2519,10 @@
>        && GET_MODE_BITSIZE (mode) > GET_MODE_ALIGNMENT (mode))
>      return false;
>  
> +  /* MSA LD.* and ST.* cannot support loading symbols via %lo($base).  */
> +  if (MSA_SUPPORTED_MODE_P (mode))
> +    return false;
> +

...why is this one is also needed?

(BTW, please generate the diff with -p.)

> @@ -2444,6 +2652,8 @@
>      return true;
>    if (ISA_HAS_LDX && mode == DImode)
>      return true;
> +  if (ISA_HAS_MSA && MSA_SUPPORTED_MODE_P (mode))
> +    return true;
>    return false;
>  }
>  
> @@ -2481,6 +2691,7 @@
>  {
>    struct mips_address_info addr;
>    int factor;
> +  bool msa_p = (TARGET_MSA && !might_split_p && MSA_SUPPORTED_MODE_P (mode));
>  
>    /* BLKmode is used for single unaligned loads and stores and should
>       not count as a multiword mode.  (GET_MODE_SIZE (BLKmode) is pretty

This is the wrong level to be checking this.  Specifically:

> @@ -2495,6 +2706,14 @@
>      switch (addr.type)
>        {
>        case ADDRESS_REG:
> +     if (msa_p)
> +       {
> +         /* MSA LD.* and ST.* supports 10-bit signed offsets.  */
> +         if (mips_signed_immediate_p (INTVAL (addr.offset), 10, 0))
> +           return 1;
> +         else
> +           return 0;
> +       }
>       if (TARGET_MIPS16
>           && !mips16_unextended_reference_p (mode, addr.reg,
>                                              UINTVAL (addr.offset)))

...this should be checked in mips_valid_offset_p rather than here.

> @@ -2502,13 +2721,13 @@
>       return factor;
>  
>        case ADDRESS_LO_SUM:
> -     return TARGET_MIPS16 ? factor * 2 : factor;
> +     return msa_p ? 0 : TARGET_MIPS16 ? factor * 2 : factor;
>  
>        case ADDRESS_CONST_INT:
> -     return factor;
> +     return msa_p ? 0 : factor;
>  
>        case ADDRESS_SYMBOLIC:
> -     return factor * mips_symbol_insns (addr.symbol_type, mode);
> +     return msa_p ? 0 : factor * mips_symbol_insns (addr.symbol_type, mode);
>        }

The CONST_INT case should be handled in mips_classify_address,
probably by using mips_valid_offset_p rather than the current
SMALL_INT check.

The other two changes should already be redundant given your
change to mips_symbol_insns.

> +/* Return the number of instructions needed for an MSA integer division.  */
> +
> +int
> +mips_msa_idiv_insns (void)
> +{
> +  if (TARGET_CHECK_ZERO_DIV)
> +    return 3;
> +  else
> +    return 1;
> +}
>  
>  /* Emit a move from SRC to DEST.  Assume that the move expanders can
>     handle all moves if !can_create_pseudo_p ().  The distinction is
> @@ -3980,6 +4239,10 @@
>      case NE:
>      case UNORDERED:
>      case LTGT:
> +    case UNGE:
> +    case UNGT:
> +    case UNLE:
> +    case UNLT:
>        /* Branch comparisons have VOIDmode, so use the first operand's
>        mode instead.  */
>        mode = GET_MODE (XEXP (x, 0));
> @@ -4133,6 +4396,10 @@
>             return true;
>           }
>         *total = COSTS_N_INSNS (mips_idiv_insns ());
> +       if (MSA_SUPPORTED_MODE_P (mode))
> +         *total = COSTS_N_INSNS (mips_msa_idiv_insns ());
> +       else
> +         *total = COSTS_N_INSNS (mips_idiv_insns ());
>       }
>        else if (mode == DImode)
>          *total = mips_cost->int_div_di;

Seems like it'd be easier to modify mips_idiv_insns instead.

> @@ -4342,6 +4609,26 @@
>    return simplify_gen_subreg (word_mode, op, mode, byte);
>  }
>  
> +/* Return one word of 128-bit value OP, taking into account the fixed
> +   endianness of certain registers.  BYTE selects from the byte address.  */
> +
> +rtx
> +mips_subword_at_byte (rtx op, unsigned int byte)
> +{
> +  enum machine_mode mode;
> +
> +  mode = GET_MODE (op);
> +  if (mode == VOIDmode)
> +    mode = TImode;
> +
> +  gcc_assert (!FP_REG_RTX_P (op));
> +
> +  if (MEM_P (op))
> +    return mips_rewrite_small_data (adjust_address (op, word_mode, byte));
> +
> +  return simplify_gen_subreg (word_mode, op, mode, byte);
> +}

Please instead handle FP_REG_RTX_P and make mips_subword a simple wrapper
around this.

> @@ -4469,6 +4760,234 @@
>    return SPLIT_IF_NECESSARY;
>  }
>  
> +/* Return true if a 128-bit move from SRC to DEST should be split into two
> +   or four.  */
> +bool
> +mips_split_128bit_move_p (rtx dest, rtx src)
> +{
> +  /* MSA-to-MSA moves can be done in a single instruction.  */
> +  if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
> +    return false;
> +
> +  /* Check for MSA loads and stores.  */
> +  if (FP_REG_RTX_P (dest) && MEM_P (src))
> +    return false;
> +  if (FP_REG_RTX_P (src) && MEM_P (dest))
> +    return false;
> +
> +  /* Check for MSA set to 0  */
> +  if (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src)))
> +    return false;

Why just zero, when LDI is available?

> +/* Split copy_s.d.  */
> +
> +void
> +mips_split_msa_copy_d (rtx dest, rtx src, rtx index, rtx (*gen_fn)(rtx, rtx, 
> rtx))

Long line,  Space between ")(".  Document the arguments.  E.g.:

   Split a COPY_S.D with operands DEST, SRC and INDEX.  GEN_FN is ...

> +  emit_insn ((gen_fn)(low, new_src, GEN_INT (INTVAL (index) * 2)));
> +  emit_insn ((gen_fn)(high, new_src, GEN_INT (INTVAL (index) * 2 + 1)));

Just "gen_fn (...".

> +/* Split insert.d.  */
> +
> +void
> +mips_split_msa_insert_d (rtx dest, rtx src1, rtx index, rtx src2)
> +{
> +  gcc_assert (GET_MODE (dest) == GET_MODE (src1));
> +  gcc_assert ((GET_MODE (dest) == V2DImode
> +            && (GET_MODE (src2) == DImode || src2 == const0_rtx))
> +           || (GET_MODE (dest) == V2DFmode && GET_MODE (src2) == DFmode));
> +
> +  /* Note that low is always from the lower index, and high is always
> +     from the higher index.  */
> +  rtx low, high;
> +  if (src2 == const0_rtx)
> +    {
> +      low = src2;
> +      high = src2;
> +    }
> +  else
> +    {
> +      low = mips_subword (src2, false);
> +      high = mips_subword (src2, true);
> +    }

As before, I don't think 0 needs to be a special case.  mips_subword
should handle it correctly.  Other instances later.

> @@ -5092,6 +5662,7 @@
>    memset (cum, 0, sizeof (*cum));
>    cum->prototype = (fntype && prototype_p (fntype));
>    cum->gp_reg_found = (cum->prototype && stdarg_p (fntype));
> +  cum->stdarg_p = (cum->prototype && stdarg_p (fntype));

Please use:

  cum->stdarg_p = (cum->prototype && stdarg_p (fntype));
  cum->gp_reg_found = cum->stdarg_p;

It looked like a lot of the code after this was still awaiting changes
so I stopped there.

Nice refactoring work on the .md file, thanks.  Certainly looks cleaner now.

Richard

Re: [PATCH RFC] MIPS add support for MIPS SIMD ARCHITECTURE V1.07

Reply via email to