--- gcc/config/mips/loongson.md | 24 +++- gcc/config/mips/mips-modes.def | 1 + gcc/config/mips/mips-protos.h | 1 + gcc/config/mips/mips-ps-3d.md | 145 ++++++++++++++-------- gcc/config/mips/mips.c | 266 ++++++++++++++++++++++++++++++++++++++-- gcc/config/mips/predicates.md | 7 +- 6 files changed, 376 insertions(+), 68 deletions(-)
diff --git a/gcc/config/mips/loongson.md b/gcc/config/mips/loongson.md index 225f4d1..23c37d7 100644 --- a/gcc/config/mips/loongson.md +++ b/gcc/config/mips/loongson.md @@ -403,12 +403,11 @@ ;; Shuffle halfwords. (define_insn "loongson_pshufh" [(set (match_operand:VH 0 "register_operand" "=f") - (unspec:VH [(match_operand:VH 1 "register_operand" "0") - (match_operand:VH 2 "register_operand" "f") - (match_operand:SI 3 "register_operand" "f")] + (unspec:VH [(match_operand:VH 1 "register_operand" "f") + (match_operand:SI 2 "register_operand" "f")] UNSPEC_LOONGSON_PSHUFH))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pshufh\t%0,%2,%3" + "pshufh\t%0,%1,%2" [(set_attr "type" "fmul")]) ;; Shift left logical. @@ -479,7 +478,7 @@ [(set_attr "type" "fadd")]) ;; Unpack high data. -(define_insn "vec_interleave_high<mode>" +(define_insn "loongson_punpckh<V_stretch_half_suffix>" [(set (match_operand:VWHB 0 "register_operand" "=f") (unspec:VWHB [(match_operand:VWHB 1 "register_operand" "f") (match_operand:VWHB 2 "register_operand" "f")] @@ -489,7 +488,7 @@ [(set_attr "type" "fdiv")]) ;; Unpack low data. -(define_insn "vec_interleave_low<mode>" +(define_insn "loongson_punpckl<V_stretch_half_suffix>" [(set (match_operand:VWHB 0 "register_operand" "=f") (unspec:VWHB [(match_operand:VWHB 1 "register_operand" "f") (match_operand:VWHB 2 "register_operand" "f")] @@ -498,6 +497,19 @@ "punpckl<V_stretch_half_suffix>\t%0,%1,%2" [(set_attr "type" "fdiv")]) +(define_expand "vec_perm_const<mode>" + [(match_operand:VWHB 0 "register_operand" "") + (match_operand:VWHB 1 "register_operand" "") + (match_operand:VWHB 2 "register_operand" "") + (match_operand:VWHB 3 "" "")] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" +{ + if (mips_expand_vec_perm_const (operands)) + DONE; + else + FAIL; +}) + ;; Integer division and modulus. For integer multiplication, see mips.md. (define_insn "<u>div<mode>3" diff --git a/gcc/config/mips/mips-modes.def b/gcc/config/mips/mips-modes.def index b9c508b..03b9632 100644 --- a/gcc/config/mips/mips-modes.def +++ b/gcc/config/mips/mips-modes.def @@ -29,6 +29,7 @@ FLOAT_MODE (TF, 16, mips_quad_format); VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */ VECTOR_MODES (INT, 4); /* V4QI V2HI */ +VECTOR_MODES (FLOAT, 16); VECTOR_MODES (FRACT, 4); /* V4QQ V2HQ */ VECTOR_MODES (UFRACT, 4); /* V4UQQ V2UHQ */ diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h index dbabdff..37c958d 100644 --- a/gcc/config/mips/mips-protos.h +++ b/gcc/config/mips/mips-protos.h @@ -328,6 +328,7 @@ extern void mips_expand_atomic_qihi (union mips_gen_fn_ptrs, rtx, rtx, rtx, rtx); extern void mips_expand_vector_init (rtx, rtx); +extern bool mips_expand_vec_perm_const (rtx op[4]); extern bool mips_eh_uses (unsigned int); extern bool mips_epilogue_uses (unsigned int); diff --git a/gcc/config/mips/mips-ps-3d.md b/gcc/config/mips/mips-ps-3d.md index 504f43c..d81abf8 100644 --- a/gcc/config/mips/mips-ps-3d.md +++ b/gcc/config/mips/mips-ps-3d.md @@ -89,61 +89,102 @@ DONE; }) -; pul.ps - Pair Upper Lower -(define_insn "mips_pul_ps" +(define_insn "vec_perm_const_ps" [(set (match_operand:V2SF 0 "register_operand" "=f") - (vec_merge:V2SF - (match_operand:V2SF 1 "register_operand" "f") - (match_operand:V2SF 2 "register_operand" "f") - (const_int 2)))] + (vec_select:V2SF + (vec_concat:V4SF + (match_operand:V2SF 1 "register_operand" "f") + (match_operand:V2SF 2 "register_operand" "f")) + (parallel [(match_operand:SI 3 "const_0_or_1_operand" "") + (match_operand:SI 4 "const_2_or_3_operand" "")])))] "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" - "pul.ps\t%0,%1,%2" +{ + static const int * const mnemonics[2][4] = { + /* LE */ { "pll.ps\t%0,%2,%1", "pul.ps\t%0,%2,%1", + "plu.ps\t%0,%2,%1", "puu.ps\t%0,%2,%1" }, + /* BE */ { "puu.ps\t%0,%1,%2", "plu.ps\t%0,%1,%2", + "pul.ps\t%0,%1,%2", "pll.ps\t%0,%1,%2" }, + }; + + unsigned mask = INTVAL (operands[3]) * 2 + (INTVAL (operands[4]) - 2); + return mnemonics[WORDS_BIG_ENDIAN][mask]; +} [(set_attr "type" "fmove") (set_attr "mode" "SF")]) -; puu.ps - Pair upper upper -(define_insn "mips_puu_ps" - [(set (match_operand:V2SF 0 "register_operand" "=f") - (vec_merge:V2SF - (match_operand:V2SF 1 "register_operand" "f") - (vec_select:V2SF (match_operand:V2SF 2 "register_operand" "f") - (parallel [(const_int 1) - (const_int 0)])) - (const_int 2)))] - "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" - "puu.ps\t%0,%1,%2" - [(set_attr "type" "fmove") - (set_attr "mode" "SF")]) +(define_expand "vec_perm_constv2sf" + [(match_operand:V2SF 0 "register_operand" "") + (match_operand:V2SF 1 "register_operand" "") + (match_operand:V2SF 2 "register_operand" "") + (match_operand:V2SI 3 "" "")] + "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" +{ + if (mips_expand_vec_perm_const (operands)) + DONE; + else + FAIL; +}) -; pll.ps - Pair Lower Lower -(define_insn "mips_pll_ps" - [(set (match_operand:V2SF 0 "register_operand" "=f") - (vec_merge:V2SF - (vec_select:V2SF (match_operand:V2SF 1 "register_operand" "f") - (parallel [(const_int 1) - (const_int 0)])) - (match_operand:V2SF 2 "register_operand" "f") - (const_int 2)))] - "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" - "pll.ps\t%0,%1,%2" - [(set_attr "type" "fmove") - (set_attr "mode" "SF")]) +;; Expanders for builtins. +(define_expand "mips_puu_ps" + [(match_operand:V2SF 0 "register_operand" "") + (match_operand:V2SF 1 "register_operand" "") + (match_operand:V2SF 2 "register_operand" "")] + "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" +{ + if (WORDS_BIG_ENDIAN) + emit_insn (gen_vec_perm_const_ps (operands[0], operands[1], operands[2], + const0_rtx, const2_rtx)); + else + emit_insn (gen_vec_perm_const_ps (operands[0], operands[2], operands[1], + const1_rtx, GEN_INT (3))); + DONE; +}) -; plu.ps - Pair Lower Upper -(define_insn "mips_plu_ps" - [(set (match_operand:V2SF 0 "register_operand" "=f") - (vec_merge:V2SF - (vec_select:V2SF (match_operand:V2SF 1 "register_operand" "f") - (parallel [(const_int 1) - (const_int 0)])) - (vec_select:V2SF (match_operand:V2SF 2 "register_operand" "f") - (parallel [(const_int 1) - (const_int 0)])) - (const_int 2)))] - "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" - "plu.ps\t%0,%1,%2" - [(set_attr "type" "fmove") - (set_attr "mode" "SF")]) +(define_expand "mips_pul_ps" + [(match_operand:V2SF 0 "register_operand" "") + (match_operand:V2SF 1 "register_operand" "") + (match_operand:V2SF 2 "register_operand" "")] + "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" +{ + if (WORDS_BIG_ENDIAN) + emit_insn (gen_vec_perm_const_ps (operands[0], operands[1], operands[2], + const0_rtx, const2_rtx)); + else + emit_insn (gen_vec_perm_const_ps (operands[0], operands[2], operands[1], + const1_rtx, GEN_INT (3))); + DONE; +}) + +(define_expand "mips_plu_ps" + [(match_operand:V2SF 0 "register_operand" "") + (match_operand:V2SF 1 "register_operand" "") + (match_operand:V2SF 2 "register_operand" "")] + "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" +{ + if (WORDS_BIG_ENDIAN) + emit_insn (gen_vec_perm_const_ps (operands[0], operands[1], operands[2], + const1_rtx, const2_rtx)); + else + emit_insn (gen_vec_perm_const_ps (operands[0], operands[2], operands[1], + const0_rtx, GEN_INT (3))); + DONE; +}) + +(define_expand "mips_pll_ps" + [(match_operand:V2SF 0 "register_operand" "") + (match_operand:V2SF 1 "register_operand" "") + (match_operand:V2SF 2 "register_operand" "")] + "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" +{ + if (WORDS_BIG_ENDIAN) + emit_insn (gen_vec_perm_const_ps (operands[0], operands[1], operands[2], + const1_rtx, GEN_INT (3))); + else + emit_insn (gen_vec_perm_const_ps (operands[0], operands[2], operands[1], + const0_rtx, const2_rtx)); + DONE; +}) ; vec_init (define_expand "vec_initv2sf" @@ -206,10 +247,10 @@ then use a PUL instruction. */ temp = gen_reg_rtx (V2SFmode); emit_insn (gen_mips_cvt_ps_s (temp, operands[1], operands[1])); - if (INTVAL (operands[2]) == !BYTES_BIG_ENDIAN) - emit_insn (gen_mips_pul_ps (operands[0], temp, operands[0])); - else - emit_insn (gen_mips_pul_ps (operands[0], operands[0], temp)); + + emit_insn (gen_vec_perm_const_ps (operands[0], temp, operands[0], + operands[2], + GEN_INT (1 - INTVAL (operands[2]) + 2))); DONE; }) diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c index d3fd709..f1c3665 100644 --- a/gcc/config/mips/mips.c +++ b/gcc/config/mips/mips.c @@ -12774,12 +12774,6 @@ AVAIL_NON_MIPS16 (cache, TARGET_CACHE_BUILTIN) #define CODE_FOR_loongson_psubsb CODE_FOR_sssubv8qi3 #define CODE_FOR_loongson_psubush CODE_FOR_ussubv4hi3 #define CODE_FOR_loongson_psubusb CODE_FOR_ussubv8qi3 -#define CODE_FOR_loongson_punpckhbh CODE_FOR_vec_interleave_highv8qi -#define CODE_FOR_loongson_punpckhhw CODE_FOR_vec_interleave_highv4hi -#define CODE_FOR_loongson_punpckhwd CODE_FOR_vec_interleave_highv2si -#define CODE_FOR_loongson_punpcklbh CODE_FOR_vec_interleave_lowv8qi -#define CODE_FOR_loongson_punpcklhw CODE_FOR_vec_interleave_lowv4hi -#define CODE_FOR_loongson_punpcklwd CODE_FOR_vec_interleave_lowv2si static const struct mips_builtin_description mips_builtins[] = { DIRECT_BUILTIN (pll_ps, MIPS_V2SF_FTYPE_V2SF_V2SF, paired_single), @@ -13021,8 +13015,8 @@ static const struct mips_builtin_description mips_builtins[] = { LOONGSON_BUILTIN (pasubub, MIPS_UV8QI_FTYPE_UV8QI_UV8QI), LOONGSON_BUILTIN (biadd, MIPS_UV4HI_FTYPE_UV8QI), LOONGSON_BUILTIN (psadbh, MIPS_UV4HI_FTYPE_UV8QI_UV8QI), - LOONGSON_BUILTIN_SUFFIX (pshufh, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI_UQI), - LOONGSON_BUILTIN_SUFFIX (pshufh, s, MIPS_V4HI_FTYPE_V4HI_V4HI_UQI), + LOONGSON_BUILTIN_SUFFIX (pshufh, u, MIPS_UV4HI_FTYPE_UV4HI_UQI), + LOONGSON_BUILTIN_SUFFIX (pshufh, s, MIPS_V4HI_FTYPE_V4HI_UQI), LOONGSON_BUILTIN_SUFFIX (psllh, u, MIPS_UV4HI_FTYPE_UV4HI_UQI), LOONGSON_BUILTIN_SUFFIX (psllh, s, MIPS_V4HI_FTYPE_V4HI_UQI), LOONGSON_BUILTIN_SUFFIX (psllw, u, MIPS_UV2SI_FTYPE_UV2SI_UQI), @@ -16326,6 +16320,259 @@ mips_shift_truncation_mask (enum machine_mode mode) } +/* Generate or test for an insn that supports a constant permutation. */ + +#define MAX_VECT_LEN 8 + +struct expand_vec_perm_d +{ + rtx target, op0, op1; + unsigned char perm[MAX_VECT_LEN]; + enum machine_mode vmode; + unsigned char nelt; + bool one_vector_p; + bool testing_p; +}; + +/* Recognize patterns for the MIPS3D P[UL][UL].PS instructions. */ + +static bool +mips_expand_vpc_ps (struct expand_vec_perm_d *d) +{ + unsigned perm0, perm1; + + if (!(TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT)) + return false; + if (d->vmode != V2SFmode) + return false; + + perm0 = d->perm[0]; + perm1 = d->perm[1]; + + /* If the first selector comes from the second operand, swap. */ + if (perm0 & 2) + { + rtx x; + if (perm1 & 2) + return false; + perm0 -= 2; + perm1 += 2; + x = d->op0, d->op0 = d->op1, d->op1 = x; + } + /* If the second selector does not come from the second operand, fail. */ + else if ((perm1 & 2) == 0) + return false; + + /* Success! */ + if (!d->testing_p) + emit_insn (gen_vec_perm_const_ps (d->target, d->op0, d->op1, + GEN_INT (perm0), GEN_INT (perm1))); + + return true; +} + +/* Recognize patterns for the Loongson PUNPCK* instructions. */ + +static bool +mips_expand_vpc_loongson_interleave (struct expand_vec_perm_d *d) +{ + unsigned int i, low, swap, nelt = d->nelt, mask; + rtx x; + + if (!(TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS)) + return false; + if (GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT) + return false; + if (GET_MODE_SIZE (d->vmode) != 16) + return false; + + /* Note that these are big-endian tests. Adjust for little-endian later. */ + low = nelt / 2; + swap = nelt; + if (d->perm[0] == swap + low) + ; + else if (d->perm[0] == swap) + low = 0; + else if (d->perm[0] == low) + swap = 0; + else if (d->perm[0] == 0) + low = 0, swap = 0; + else + return false; + mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1); + + for (i = 0; i < nelt / 2; i++) + { + unsigned elt; + elt = i + low + swap; + if (d->perm[i * 2] != elt) + return false; + elt = (elt + nelt) & mask; + if (d->perm[i * 2 + 1] != elt) + return false; + } + + /* Success! */ + if (d->testing_p) + return true; + + /* Adjust for little-endian. */ + if (!WORDS_BIG_ENDIAN) + swap = !swap, low = !low; + /* Adjust for matched swapped operand pattern. */ + if (swap) + x = d->op0, d->op0 = d->op1, d->op1 = x; + + /* Generate one of the loongson_punpck* instructions. */ + /* ??? We should consider using standard (vec_select (vec_concat)) form. */ + x = gen_rtx_UNSPEC (d->vmode, gen_rtvec (2, d->op0, d->op1), + low ? UNSPEC_LOONGSON_PUNPCKL : UNSPEC_LOONGSON_PUNPCKH); + emit_insn (gen_rtx_SET (VOIDmode, d->target, x)); + return true; +} + +/* Recognize patterns for the Loongson PSHUFH instruction. */ + +static bool +mips_expand_vpc_loongson_pshufh (struct expand_vec_perm_d *d) +{ + unsigned i, mask; + + if (!(TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS)) + return false; + if (d->vmode != V4HImode) + return false; + if (!d->one_vector_p) + return false; + if (d->testing_p) + return true; + + /* Convert the selector into the packed 8-bit form for pshufh. */ + for (i = mask = 0; i < 4; i++) + mask |= (d->perm[i] & 3) << (i * 2); + + emit_insn (gen_loongson_pshufh (d->target, d->op0, + force_reg (SImode, GEN_INT (mask)))); + return true; +} + +static bool +mips_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) +{ + if (mips_expand_vpc_ps (d)) + return true; + if (mips_expand_vpc_loongson_interleave (d)) + return true; + if (mips_expand_vpc_loongson_pshufh (d)) + return true; + return false; +} + +/* Expand a vec_perm_const pattern. */ + +bool +mips_expand_vec_perm_const (rtx operands[4]) +{ + struct expand_vec_perm_d d; + int i, nelt, which; + rtx sel; + + d.target = operands[0]; + d.op0 = operands[1]; + d.op1 = operands[2]; + sel = operands[3]; + + d.vmode = GET_MODE (d.target); + gcc_assert (VECTOR_MODE_P (d.vmode)); + d.nelt = nelt = GET_MODE_NUNITS (d.vmode); + d.testing_p = false; + + for (i = which = 0; i < nelt; ++i) + { + rtx e = XVECEXP (sel, 0, i); + int ei = INTVAL (e) & (2 * nelt - 1); + which |= (ei < nelt ? 1 : 2); + d.perm[i] = ei; + } + + switch (which) + { + default: + gcc_unreachable(); + + case 3: + d.one_vector_p = false; + if (!rtx_equal_p (d.op0, d.op1)) + break; + + /* The elements of PERM do not suggest that only the first operand + is used, but both operands are identical. Allow easier matching + of the permutation by folding the permutation into the single + input vector. */ + for (i = 0; i < nelt; ++i) + if (d.perm[i] >= nelt) + d.perm[i] -= nelt; + /* FALLTHRU */ + + case 1: + d.op1 = d.op0; + d.one_vector_p = true; + break; + + case 2: + for (i = 0; i < nelt; ++i) + d.perm[i] -= nelt; + d.op0 = d.op1; + d.one_vector_p = true; + break; + } + + return mips_expand_vec_perm_const_1 (&d); +} + +/* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */ + +static bool +mips_vectorize_vec_perm_const_ok (enum machine_mode vmode, + const unsigned char *sel) +{ + struct expand_vec_perm_d d; + unsigned int i, nelt, which; + bool ret; + + d.vmode = vmode; + d.nelt = nelt = GET_MODE_NUNITS (d.vmode); + d.testing_p = true; + memcpy (d.perm, sel, nelt); + + /* Categorize the set of elements in the selector. */ + for (i = which = 0; i < nelt; ++i) + { + unsigned char e = d.perm[i]; + gcc_assert (e < 2 * nelt); + which |= (e < nelt ? 1 : 2); + } + + /* For all elements from second vector, fold the elements to first. */ + if (which == 2) + for (i = 0; i < nelt; ++i) + d.perm[i] -= nelt; + + /* Check whether the mask can be applied to the vector type. */ + d.one_vector_p = (which != 3); + + d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); + d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); + if (!d.one_vector_p) + d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); + + start_sequence (); + ret = mips_expand_vec_perm_const_1 (&d); + end_sequence (); + + return ret; +} + /* Initialize the GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" @@ -16544,6 +16791,9 @@ mips_shift_truncation_mask (enum machine_mode mode) #undef TARGET_SHIFT_TRUNCATION_MASK #define TARGET_SHIFT_TRUNCATION_MASK mips_shift_truncation_mask +#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK +#define TARGET_VECTORIZE_VEC_PERM_CONST_OK mips_vectorize_vec_perm_const_ok + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-mips.h" diff --git a/gcc/config/mips/predicates.md b/gcc/config/mips/predicates.md index 5e9398e..4936a49 100644 --- a/gcc/config/mips/predicates.md +++ b/gcc/config/mips/predicates.md @@ -73,8 +73,11 @@ ;; This is used for indexing into vectors, and hence only accepts const_int. (define_predicate "const_0_or_1_operand" (and (match_code "const_int") - (ior (match_test "op == CONST0_RTX (GET_MODE (op))") - (match_test "op == CONST1_RTX (GET_MODE (op))")))) + (match_test "IN_RANGE (INTVAL (op), 0, 1)"))) + +(define_predicate "const_2_or_3_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 2, 3)"))) (define_predicate "qi_mask_operand" (and (match_code "const_int") -- 1.7.7.3