We can use pinsrd when moving DImode value from integer register pair to xmm reg for 32bit SSE4.1 targets.
2019-05-11 Uroš Bizjak <ubiz...@gmail.com> * config/i386/i386.md (floatdi<X87MODEF:mode>2_i387_with_xmm): Use pinsrd for TARGET_SSE4_1. * config/i386/sse.md (movdi_to_sse): Ditto. Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Committed to mainline SVN. Uros.
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 46277f158bb3..1886715fe77e 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -5117,12 +5117,12 @@ }) (define_insn_and_split "floatdi<X87MODEF:mode>2_i387_with_xmm" - [(set (match_operand:X87MODEF 0 "register_operand" "=f") + [(set (match_operand:X87MODEF 0 "register_operand" "=f,f") (float:X87MODEF - (match_operand:DI 1 "register_operand" "r"))) - (clobber (match_scratch:V4SI 3 "=x")) - (clobber (match_scratch:V4SI 4 "=x")) - (clobber (match_operand:DI 2 "memory_operand" "=m"))] + (match_operand:DI 1 "register_operand" "r,r"))) + (clobber (match_operand:DI 2 "memory_operand" "=m,m")) + (clobber (match_scratch:V4SI 3 "=x,x")) + (clobber (match_scratch:V4SI 4 "=X,x"))] "!TARGET_64BIT && TARGET_INTER_UNIT_MOVES_TO_VEC && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode) && TARGET_SSE2 && optimize_function_for_speed_p (cfun)" @@ -5135,14 +5135,21 @@ Assemble the 64-bit DImode value in an xmm register. */ emit_insn (gen_sse2_loadld (operands[3], CONST0_RTX (V4SImode), gen_lowpart (SImode, operands[1]))); - emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode), - gen_highpart (SImode, operands[1]))); - emit_insn (gen_vec_interleave_lowv4si (operands[3], operands[3], - operands[4])); - + if (TARGET_SSE4_1) + emit_insn (gen_sse4_1_pinsrd (operands[3], operands[3], + gen_highpart (SImode, operands[1]), + GEN_INT (2))); + else + { + emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode), + gen_highpart (SImode, operands[1]))); + emit_insn (gen_vec_interleave_lowv4si (operands[3], operands[3], + operands[4])); + } operands[3] = gen_lowpart (DImode, operands[3]); } - [(set_attr "type" "multi") + [(set_attr "isa" "sse4,*") + (set_attr "type" "multi") (set_attr "mode" "<X87MODEF:MODE>") (set_attr "unit" "i387") (set_attr "fp_int_src" "true")]) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 6b8298d957ed..a223a58ed540 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1300,10 +1300,10 @@ ;; from there. (define_insn_and_split "movdi_to_sse" - [(set (match_operand:V4SI 0 "register_operand" "=?x,x") - (unspec:V4SI [(match_operand:DI 1 "nonimmediate_operand" "r,m")] + [(set (match_operand:V4SI 0 "register_operand" "=x,x,?x") + (unspec:V4SI [(match_operand:DI 1 "nonimmediate_operand" "r,m,r")] UNSPEC_MOVDI_TO_SSE)) - (clobber (match_scratch:V4SI 2 "=&x,X"))] + (clobber (match_scratch:V4SI 2 "=X,X,&x"))] "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC" "#" "&& reload_completed" @@ -1315,18 +1315,26 @@ Assemble the 64-bit DImode value in an xmm register. */ emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode), gen_lowpart (SImode, operands[1]))); - emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode), - gen_highpart (SImode, operands[1]))); - emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0], - operands[2])); - } + if (TARGET_SSE4_1) + emit_insn (gen_sse4_1_pinsrd (operands[0], operands[0], + gen_highpart (SImode, operands[1]), + GEN_INT (2))); + else + { + emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode), + gen_highpart (SImode, operands[1]))); + emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0], + operands[2])); + } + } else if (memory_operand (operands[1], DImode)) emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx)); else gcc_unreachable (); DONE; -}) +} + [(set_attr "isa" "sse4,*,*")]) (define_split [(set (match_operand:V4SF 0 "register_operand")