Attached patch adds missing SSE4.1 xmm->reg/mem extractions for V2SI modes.
2019-08-01 Uroš Bizjak <ubiz...@gmail.com> * config/i386/mmx.md (vec_extractv2si_0): Add (r,x) alternative. (*vec_extractv2si_0_zext_sse4): New insn pattern. (*vec_extractv2si_0_zext): Ditto. (*vec_extractv2si_1): Add (rm,x) alternative. (*vec_extractv2si_1_zext): New insn pattern. (*vec_extractv2si_zext_mem): Add "TARGET_MMX || TARGET_MMX_WITH_SSE" insn constraint. Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Committed to mainline SVN. Uros.
Index: config/i386/mmx.md =================================================================== --- config/i386/mmx.md (revision 273969) +++ config/i386/mmx.md (working copy) @@ -1620,9 +1620,9 @@ ;; Avoid combining registers from different units in a single alternative, ;; see comment above inline_secondary_memory_needed function in i386.c (define_insn_and_split "*vec_extractv2si_0" - [(set (match_operand:SI 0 "nonimmediate_operand" "=x,m,y, m,r") + [(set (match_operand:SI 0 "nonimmediate_operand" "=x,m,y, m,r,r") (vec_select:SI - (match_operand:V2SI 1 "nonimmediate_operand" "xm,x,ym,y,m") + (match_operand:V2SI 1 "nonimmediate_operand" "xm,x,ym,y,m,x") (parallel [(const_int 0)])))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && !(MEM_P (operands[0]) && MEM_P (operands[1]))" @@ -1630,33 +1630,76 @@ "&& reload_completed" [(set (match_dup 0) (match_dup 1))] "operands[1] = gen_lowpart (SImode, operands[1]);" - [(set_attr "mmx_isa" "*,*,native,native,*")]) + [(set_attr "isa" "*,*,*,*,*,sse2") + (set_attr "mmx_isa" "*,*,native,native,*,*") + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "5") + (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC") + ] + (symbol_ref "true")))]) +(define_insn "*vec_extractv2si_0_zext_sse4" + [(set (match_operand:DI 0 "register_operand" "=r,x") + (zero_extend:DI + (vec_select:SI + (match_operand:V2SI 1 "register_operand" "x,x") + (parallel [(const_int 0)]))))] + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE4_1" + "#" + [(set_attr "isa" "x64,*") + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "0") + (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC") + ] + (symbol_ref "true")))]) + +(define_insn "*vec_extractv2si_0_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (vec_select:SI + (match_operand:V2SI 1 "register_operand" "x") + (parallel [(const_int 0)]))))] + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC" + "#") + +(define_split + [(set (match_operand:DI 0 "register_operand") + (zero_extend:DI + (vec_select:SI + (match_operand:V2SI 1 "register_operand") + (parallel [(const_int 0)]))))] + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && TARGET_SSE2 && reload_completed" + [(set (match_dup 0) (zero_extend:DI (match_dup 1)))] + "operands[1] = gen_lowpart (SImode, operands[1]);") + ;; Avoid combining registers from different units in a single alternative, ;; see comment above inline_secondary_memory_needed function in i386.c (define_insn "*vec_extractv2si_1" - [(set (match_operand:SI 0 "nonimmediate_operand" "=y,x,x,y,x,r") + [(set (match_operand:SI 0 "nonimmediate_operand" "=y,rm,x,x,y,x,r") (vec_select:SI - (match_operand:V2SI 1 "nonimmediate_operand" " 0,x,x,o,o,o") + (match_operand:V2SI 1 "nonimmediate_operand" " 0,x ,x,x,o,o,o") (parallel [(const_int 1)])))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ punpckhdq\t%0, %0 + %vpextrd\t{$1, %1, %0|%0, %1, 1} %vpshufd\t{$0xe5, %1, %0|%0, %1, 0xe5} shufps\t{$0xe5, %1, %0|%0, %1, 0xe5} # # #" - [(set_attr "isa" "*,sse2,noavx,*,*,*") - (set_attr "mmx_isa" "native,*,*,native,*,*") - (set_attr "type" "mmxcvt,sseshuf1,sseshuf1,mmxmov,ssemov,imov") + [(set_attr "isa" "*,sse4,sse2,noavx,*,*,*") + (set_attr "mmx_isa" "native,*,*,*,native,*,*") + (set_attr "type" "mmxcvt,ssemov,sseshuf1,sseshuf1,mmxmov,ssemov,imov") (set (attr "length_immediate") - (if_then_else (eq_attr "alternative" "1,2") + (if_then_else (eq_attr "alternative" "1,2,3") (const_string "1") (const_string "*"))) - (set_attr "prefix" "orig,maybe_vex,orig,orig,orig,orig") - (set_attr "mode" "DI,TI,V4SF,SI,SI,SI")]) + (set_attr "prefix" "orig,maybe_vex,maybe_vex,orig,orig,orig,orig") + (set_attr "mode" "DI,TI,TI,V4SF,SI,SI,SI")]) (define_split [(set (match_operand:SI 0 "register_operand") @@ -1667,6 +1710,21 @@ [(set (match_dup 0) (match_dup 1))] "operands[1] = adjust_address (operands[1], SImode, 4);") +(define_insn "*vec_extractv2si_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (vec_select:SI + (match_operand:V2SI 1 "register_operand" "x") + (parallel [(const_int 1)]))))] + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && TARGET_64BIT && TARGET_SSE4_1" + "%vpextrd\t{$1, %1, %k0|%k0, %1, 1}" + [(set_attr "type" "sselog1") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "TI")]) + (define_insn_and_split "*vec_extractv2si_zext_mem" [(set (match_operand:DI 0 "register_operand" "=y,x,r") (zero_extend:DI @@ -1673,7 +1731,7 @@ (vec_select:SI (match_operand:V2SI 1 "memory_operand" "o,o,o") (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))))] - "TARGET_64BIT" + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_64BIT" "#" "&& reload_completed" [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]