http://gcc.gnu.org/bugzilla/show_bug.cgi?id=54855
--- Comment #3 from Marc Glisse <glisse at gcc dot gnu.org> 2012-10-12 17:08:20 UTC --- The following patch gives this loop: .L7: subsd %xmm0, %xmm1 subl $1, %eax addpd %xmm1, %xmm1 jne .L7 I guess I should add the same for mul and div at the same time, but I don't know if it is the right approach. --- config/i386/sse.md (revision 192405) +++ config/i386/sse.md (working copy) @@ -812,20 +812,38 @@ (const_int 1)))] "TARGET_SSE" "@ <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2} v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseadd") (set_attr "prefix" "orig,vex") (set_attr "mode" "<ssescalarmode>")]) +(define_insn "*sse2_vm<plusminus_insn>v2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x,x") + (vec_concat:V2DF + (plusminus:DF + (vec_select:DF + (match_operand:V2DF 1 "register_operand" "0,x") + (parallel [(const_int 0)])) + (match_operand:DF 2 "nonimmediate_operand" "xm,xm")) + (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))))] + "TARGET_SSE2" + "@ + <plusminus_mnemonic>sd\t{%2, %0|%0, %2} + v<plusminus_mnemonic>sd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "DF")]) + (define_expand "mul<mode>3" [(set (match_operand:VF 0 "register_operand") (mult:VF (match_operand:VF 1 "nonimmediate_operand") (match_operand:VF 2 "nonimmediate_operand")))] "TARGET_SSE" "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);") (define_insn "*mul<mode>3" [(set (match_operand:VF 0 "register_operand" "=x,x")