https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104704
--- Comment #12 from Hongtao.liu <crazylht at gmail dot com> --- (In reply to H.J. Lu from comment #10) > Created attachment 52553 [details] > A patch to always return pseudo register in ix86_gen_scratch_sse_rtx For pr100865-8a.c,pr100865-9c.c,pr100865-8c.c +/* { dg-final { scan-assembler-times "(?:vpbroadcastd|vpshufd)\[\\t \]+\[^\n\]*, %xmm\[0-9\]+" 1 { xfail *-*-* } } } */ This can be fixed by (define_insn "*vec_dupv4si" - [(set (match_operand:V4SI 0 "register_operand" "=v,v,x") + [(set (match_operand:V4SI 0 "register_operand" "=v,v,x,v") (vec_duplicate:V4SI - (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))] + (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0,$r")))] "TARGET_SSE" "@ %vpshufd\t{$0, %1, %0|%0, %1, 0} vbroadcastss\t{%1, %0|%0, %1} - shufps\t{$0, %0, %0|%0, %0, 0}" - [(set_attr "isa" "sse2,avx,noavx") - (set_attr "type" "sselog1,ssemov,sselog1") - (set_attr "length_immediate" "1,0,1") - (set_attr "prefix_extra" "0,1,*") - (set_attr "prefix" "maybe_vex,maybe_evex,orig") - (set_attr "mode" "TI,V4SF,V4SF")]) + shufps\t{$0, %0, %0|%0, %0, 0} + #" + [(set_attr "isa" "sse2,avx,noavx,noavx512vl") + (set_attr "type" "sselog1,ssemov,sselog1,sselog1") + (set_attr "length_immediate" "1,0,1,1") + (set_attr "prefix_extra" "0,1,*,0") + (set_attr "prefix" "maybe_vex,maybe_evex,orig,maybe_vex") + (set_attr "mode" "TI,V4SF,V4SF,TI") + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "3") + (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC") + ] + (symbol_ref "true")))]) + +(define_split + [(set (match_operand:V4SI 0 "sse_reg_operand") + (vec_duplicate:V4SI + (match_operand:SI 1 "general_reg_operand")))] + "TARGET_SSE && reload_completed + /* Disable this splitter if avx512vl_vec_dup_gprv4si insn is + available, because then we can broadcast from GPRs directly. */ + && !TARGET_AVX512VL" + [(const_int 0)] +{ + emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]), + CONST0_RTX (V4SImode), + gen_lowpart (SImode, operands[1]))); + emit_insn (gen_vec_duplicatev4si (operands[0], operands[0])); + DONE; +})