https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89021
--- Comment #5 from H.J. Lu <hjl.tools at gmail dot com> ---
Most of MMX instructions can be implemented with SSE/SSE2. Thee are
couple tricky cases:
1. MMX maskmovq vs SSE2 maskmovdqu. They aren't equivalent. I have
(define_insn_and_split "sse2_maskmovq_<mode>"
[(set (mem:V8QI (match_operand:P 0 "register_operand" "D"))
(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "Yy")
(match_operand:V8QI 2 "register_operand" "Yy")
(mem:V8QI (match_dup 0))]
UNSPEC_MASKMOV))
(set (match_operand:V2DI 3 "register_operand" "=Yy")
(unspec:V2DI [(match_operand:V2DI 4 "register_operand" "3")]
UNSPEC_MASKMOV))]
"TARGET_MMX_WITH_SSE"
"#"
"&& reload_completed"
[(const_int 0)]
{
/* Copy the lower 64 bits of operand 2 to operand 3. NB: Invalid
memory access may happen when bits 64:127 at memory location are
unmapped. */
rtx op3 = operands[3];
rtx op2 = gen_rtx_REG (V2DImode, REGNO (operands[2]));
rtx insn = gen_sse2_movq128 (op3, op2);
emit_insn (insn);
/* Generate SSE2 maskmovdqu with operand 3. */
rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]));
op3 = gen_rtx_REG (V16QImode, REGNO (operands[3]));
if (Pmode == SImode)
insn = gen_sse2_maskmovdqu_si (operands[0], op1, op3);
else
insn = gen_sse2_maskmovdqu_di (operands[0], op1, op3);
emit_insn (insn);
DONE;
}
[(set_attr "type" "ssemov")
(set_attr "znver1_decode" "vector")
(set_attr "mode" "TI")])
2. MMX movntq vs SSE2 movntidi, which is only for 64-bit mode. I have
(define_expand "sse_movntq"
[(set (match_operand:DI 0 "memory_operand")
(unspec:DI [(match_operand:DI 1 "register_operand")]
UNSPEC_MOVNTQ))]
"TARGET_SSE || TARGET_3DNOW_A"
{
if (TARGET_MMX_WITH_SSE)
{
rtx insn = gen_sse2_movntidi (operands[0], operands[1]);
emit_insn (insn);
DONE;
}
})
3. MMX pshufb vs SSE pshufb. I have
(define_expand "ssse3_pshufbv8qi3"
[(set (match_operand:V8QI 0 "register_operand")
(unspec:V8QI [(match_operand:V8QI 1 "register_operand")
(match_operand:V8QI 2 "nonimmediate_operand")]
UNSPEC_PSHUFB))]
"TARGET_SSSE3"
{
if (TARGET_MMX_WITH_SSE)
{
/* Emulate MMX version of pshufb with SSE version by masking
out the bit 3 of the shuffle control byte. */
rtvec par = gen_rtvec (4, GEN_INT (0xf7f7f7f7),
GEN_INT (0xf7f7f7f7),
GEN_INT (0xf7f7f7f7),
GEN_INT (0xf7f7f7f7));
rtx vec_const = gen_rtx_CONST_VECTOR (V4SImode, par);
vec_const = force_const_mem (V4SImode, vec_const);
rtx op3 = gen_reg_rtx (V4SImode);
rtx op4 = gen_reg_rtx (V4SImode);
rtx insn = gen_rtx_SET (op4, vec_const);
emit_insn (insn);
rtx op2 = force_reg (V8QImode, operands[2]);
insn = gen_ssse3_pshufbv8qi3_sse (operands[0], operands[1],
op2, op3, op4);
emit_insn (insn);
DONE;
}
})
and
(define_insn_and_split "ssse3_pshufbv8qi3_sse"
[(set (match_operand:V8QI 0 "register_operand" "=Yx,Yy")
(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,Yy")
(match_operand:V8QI 2 "register_operand" "Yx,Yy")]
UNSPEC_PSHUFB))
(set (match_operand:V4SI 3 "register_operand" "=Yx,Yy")
(unspec:V4SI [(match_operand:V4SI 4 "register_operand" "3,3")]
UNSPEC_PSHUFB))]
"TARGET_SSSE3 && TARGET_MMX_WITH_SSE"
"#"
"&& reload_completed"
[(const_int 0)]
{
/* Mask out the bit 3 of the shuffle control byte. */
rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2]));
rtx op3 = operands[3];
rtx insn = gen_andv4si3 (op3, op3, op2);
emit_insn (insn);
/* Generate SSE version of pshufb. */
rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]));
rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]));
op3 = gen_rtx_REG (V16QImode, REGNO (op3));
insn = gen_ssse3_pshufbv16qi3 (op0, op1, op3);
emit_insn (insn);
DONE;
}
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog1")
(set_attr "mode" "TI,TI")])