On Mon, Feb 11, 2019 at 11:55 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > Emulate MMX sse_cvtpi2ps with SSE2 cvtdq2ps, preserving upper 64 bits of > destination XMM register. Only SSE register source operand is allowed. > > PR target/89021 > * config/i386/mmx.md (UNSPEC_CVTPI2PS): New. > (sse_cvtpi2ps): Renamed to ... > (*mmx_cvtpi2ps): This. Disabled for TARGET_MMX_WITH_SSE. > (sse_cvtpi2ps): New. > (mmx_cvtpi2ps_sse): Likewise. > --- > gcc/config/i386/sse.md | 83 +++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 81 insertions(+), 2 deletions(-) > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index 80bb4cb935d..75e711624ce 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -18,6 +18,9 @@ > ;; <http://www.gnu.org/licenses/>. > > (define_c_enum "unspec" [ > + ;; MMX with SSE > + UNSPEC_CVTPI2PS > + > ;; SSE > UNSPEC_MOVNT > > @@ -4655,14 +4658,90 @@ > ;; > ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; > > -(define_insn "sse_cvtpi2ps" > +(define_expand "sse_cvtpi2ps" > + [(set (match_operand:V4SF 0 "register_operand") > + (vec_merge:V4SF > + (vec_duplicate:V4SF > + (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand"))) > + (match_operand:V4SF 1 "register_operand") > + (const_int 3)))] > + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE" > +{ > + if (TARGET_MMX_WITH_SSE) > + { > + rtx op2 = force_reg (V2SImode, operands[2]); > + rtx op3 = gen_reg_rtx (V4SFmode); > + rtx op4 = gen_reg_rtx (V4SFmode); > + rtx insn = gen_mmx_cvtpi2ps_sse (operands[0], operands[1], op2, > + op3, op4); > + emit_insn (insn); > + DONE; > + } > +}) > + > +(define_insn_and_split "mmx_cvtpi2ps_sse" > + [(set (match_operand:V4SF 0 "register_operand" "=x,Yv") > + (unspec:V4SF [(match_operand:V2SI 2 "register_operand" "x,Yv") > + (match_operand:V4SF 1 "register_operand" "0,Yv")] > + UNSPEC_CVTPI2PS)) > + (set (match_operand:V4SF 3 "register_operand" "=x,Yv") > + (unspec:V4SF [(match_operand:V4SF 4 "register_operand" "3,3")] > + UNSPEC_CVTPI2PS))]
This is indeed one strange pattern. Can you please elaborate why it should be written in this way. Do you need a scratch register (match_scratch) here? Uros. > + "TARGET_MMX_WITH_SSE" > + "#" > + "&& reload_completed" > + [(const_int 0)] > +{ > + rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2])); > + /* Generate SSE2 cvtdq2ps. */ > + rtx insn = gen_floatv4siv4sf2 (operands[3], op2); > + emit_insn (insn); > + > + /* Merge operands[3] with operands[0]. */ > + rtx mask, op1; > + if (TARGET_AVX) > + { > + mask = gen_rtx_PARALLEL (VOIDmode, > + gen_rtvec (4, GEN_INT (0), GEN_INT (1), > + GEN_INT (6), GEN_INT (7))); > + op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[3], operands[1]); > + op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask); > + insn = gen_rtx_SET (operands[0], op2); > + } > + else > + { > + /* NB: SSE can only concatenate OP0 and OP3 to OP0. */ > + mask = gen_rtx_PARALLEL (VOIDmode, > + gen_rtvec (4, GEN_INT (2), GEN_INT (3), > + GEN_INT (4), GEN_INT (5))); > + op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[0], operands[3]); > + op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask); > + insn = gen_rtx_SET (operands[0], op2); > + emit_insn (insn); > + > + /* Swap bits 0:63 with bits 64:127. */ > + mask = gen_rtx_PARALLEL (VOIDmode, > + gen_rtvec (4, GEN_INT (2), GEN_INT (3), > + GEN_INT (0), GEN_INT (1))); > + rtx dest = gen_rtx_REG (V4SImode, REGNO (operands[0])); > + op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask); > + insn = gen_rtx_SET (dest, op1); > + } > + emit_insn (insn); > + DONE; > +} > + [(set_attr "isa" "noavx,avx") > + (set_attr "type" "ssecvt") > + (set_attr "mode" "V4SF")]) > + > +(define_insn "*mmx_cvtpi2ps" > [(set (match_operand:V4SF 0 "register_operand" "=x") > (vec_merge:V4SF > (vec_duplicate:V4SF > (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym"))) > (match_operand:V4SF 1 "register_operand" "0") > (const_int 3)))] > - "TARGET_SSE" > + "TARGET_SSE && !TARGET_MMX_WITH_SSE" > "cvtpi2ps\t{%2, %0|%0, %2}" > [(set_attr "type" "ssecvt") > (set_attr "mode" "V4SF")]) > -- > 2.20.1 >