Emulate MMX version of palignrq with SSE version by concatenating 2 64-bit MMX operands into a single 128-bit SSE operand, followed by SSE psrldq. Only SSE register source operand is allowed.
PR target/89021 * config/i386/sse.md (ssse3_palignrdi): Changed to define_insn_and_split to support SSE emulation. --- gcc/config/i386/sse.md | 44 +++++++++++++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 6cad298eb86..f0d42a17c93 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -15967,23 +15967,53 @@ (set_attr "prefix" "orig,vex,evex") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "ssse3_palignrdi" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI [(match_operand:DI 1 "register_operand" "0") - (match_operand:DI 2 "nonimmediate_operand" "ym") - (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")] +(define_insn_and_split "ssse3_palignrdi" + [(set (match_operand:DI 0 "register_operand" "=y,Yx,Yy") + (unspec:DI [(match_operand:DI 1 "register_operand" "0,0,Yy") + (match_operand:DI 2 "nonimmediate_operand" "ym,Yx,Yy") + (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")] UNSPEC_PALIGNR))] "TARGET_SSSE3" { operands[3] = GEN_INT (INTVAL (operands[3]) / 8); return "palignr\t{%3, %2, %0|%0, %2, %3}"; } - [(set_attr "type" "sseishft") + "&& reload_completed && TARGET_MMX_WITH_SSE" + [(const_int 0)] +{ + /* Emulate MMX palignrdi with SSE psrldq. */ + rtx op0 = gen_rtx_REG (V2DImode, REGNO (operands[0])); + rtx insn; + if (TARGET_AVX) + insn = gen_vec_concatv2di (op0, operands[2], operands[1]); + else + { + /* NB: SSE can only concatenate OP0 and OP1 to OP0. */ + insn = gen_vec_concatv2di (op0, operands[1], operands[2]); + emit_insn (insn); + /* Swap bits 0:63 with bits 64:127. */ + rtx mask = gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (4, GEN_INT (2), + GEN_INT (3), + GEN_INT (0), + GEN_INT (1))); + rtx op1 = gen_rtx_REG (V4SImode, REGNO (op0)); + rtx op2 = gen_rtx_VEC_SELECT (V4SImode, op1, mask); + insn = gen_rtx_SET (op1, op2); + } + emit_insn (insn); + op0 = gen_rtx_REG (V1TImode, REGNO (op0)); + insn = gen_sse2_lshrv1ti3 (op0, op0, operands[3]); + emit_insn (insn); + DONE; +} + [(set_attr "isa" "*,noavx,avx") + (set_attr "type" "sseishft") (set_attr "atom_unit" "sishuf") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) - (set_attr "mode" "DI")]) + (set_attr "mode" "DI,TI,TI")]) ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI ;; modes for abs instruction on pre AVX-512 targets. -- 2.20.1