https://gcc.gnu.org/bugzilla/show_bug.cgi?id=117562
Richard Biener <rguenth at gcc dot gnu.org> changed: What |Removed |Added ---------------------------------------------------------------------------- Status|ASSIGNED |NEW Assignee|rguenth at gcc dot gnu.org |unassigned at gcc dot gnu.org Keywords| |ra --- Comment #6 from Richard Biener <rguenth at gcc dot gnu.org> --- Removing the alternative fixes the slowdown, both LTO and non-LTO, the multi-epilogue version is now faster. diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 72acd5bde5e..0a3ee38f55a 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -11045,11 +11045,11 @@ }) (define_insn "sse_movhlps" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m") + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v") (vec_select:V4SF (vec_concat:V8SF - (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0") - (match_operand:V4SF 2 "nonimmediate_operand" " x,v,o,o,v")) + (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v") + (match_operand:V4SF 2 "nonimmediate_operand" " x,v,o,o")) (parallel [(const_int 6) (const_int 7) (const_int 2) @@ -11059,12 +11059,11 @@ movhlps\t{%2, %0|%0, %2} vmovhlps\t{%2, %1, %0|%0, %1, %2} movlps\t{%H2, %0|%0, %H2} - vmovlps\t{%H2, %1, %0|%0, %1, %H2} - %vmovhps\t{%2, %0|%q0, %2}" - [(set_attr "isa" "noavx,avx,noavx,avx,*") + vmovlps\t{%H2, %1, %0|%0, %1, %H2}" + [(set_attr "isa" "noavx,avx,noavx,avx") (set_attr "type" "ssemov2") - (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex") - (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")]) + (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex") + (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")]) (define_expand "sse_movlhps_exp" [(set (match_operand:V4SF 0 "nonimmediate_operand")