On Fri, Nov 2, 2018 at 6:25 PM H.J. Lu <hongjiu...@intel.com> wrote: > > Remove duplicated AVX2/AVX512 vec_dup patterns and replace them with > subreg. gcc.target/i386/avx2-vbroadcastss_ps256-1.c is changed by > > avx2_test: > .cfi_startproc > - vmovaps x(%rip), %xmm1 > - vbroadcastss %xmm1, %ymm0 > + vbroadcastss x(%rip), %ymm0 > vmovaps %ymm0, y(%rip) > vzeroupper > ret > .cfi_endproc > > gcc.target/i386/avx512vl-vbroadcast-3.c is changed by > > @@ -113,7 +113,7 @@ f10: > .cfi_startproc > vmovaps %ymm0, %ymm16 > vpermilps $85, %ymm16, %ymm16 > - vbroadcastss %xmm16, %ymm16 > + vshuff32x4 $0x0, %ymm16, %ymm16, %ymm16 > vzeroupper > ret > .cfi_endproc > @@ -153,8 +153,7 @@ f12: > f13: > .LFB12: > .cfi_startproc > - vmovaps (%rdi), %ymm16 > - vbroadcastss %xmm16, %ymm16 > + vbroadcastss (%rdi), %ymm16 > vzeroupper > ret > .cfi_endproc
Actually, we can achieve the same with pre-reload splitters. Please see the attached patch for a couple of examples and a fix for vbroadcastss that accesses the memory in wrong mode. Uros.
Index: sse.md =================================================================== --- sse.md (revision 265740) +++ sse.md (working copy) @@ -7129,6 +7129,20 @@ (set_attr "prefix" "maybe_evex") (set_attr "mode" "<MODE>")]) +(define_insn_and_split "*avx2_vec_dup<mode>_1" + [(set (match_operand:VF1_128_256 0 "register_operand") + (vec_duplicate:VF1_128_256 + (vec_select:SF + (match_operand:V4SF 1 "memory_operand") + (parallel [(const_int 0)]))))] + "TARGET_AVX2 + && can_create_pseudo_p ()" + "#" + "&& 1" + [(set (match_dup 0) + (vec_duplicate:VF1_128_256 (match_dup 1)))] + "operands[1] = adjust_address_nv (operands[1], SFmode, 0);") + (define_insn "avx2_vec_dupv8sf_1" [(set (match_operand:V8SF 0 "register_operand" "=v") (vec_duplicate:V8SF @@ -7141,6 +7155,20 @@ (set_attr "prefix" "maybe_evex") (set_attr "mode" "V8SF")]) +(define_insn_and_split "*avx2_vec_dupv8sf_1" + [(set (match_operand:V8SF 0 "register_operand") + (vec_duplicate:V8SF + (vec_select:SF + (match_operand:V4SF 1 "memory_operand") + (parallel [(const_int 0)]))))] + "TARGET_AVX2 + && can_create_pseudo_p ()" + "#" + "&& 1" + [(set (match_dup 0) + (vec_duplicate:VF1_128_256 (match_dup 1)))] + "operands[1] = adjust_address_nv (operands[1], SFmode, 0);") + (define_insn "avx512f_vec_dup<mode>_1" [(set (match_operand:VF_512 0 "register_operand" "=v") (vec_duplicate:VF_512 @@ -17908,7 +17936,7 @@ [(set (match_operand:VI 0 "register_operand" "=x,v") (vec_duplicate:VI (vec_select:<ssescalarmode> - (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm,vm") + (match_operand:<ssexmmmode> 1 "register_operand" "x,v") (parallel [(const_int 0)]))))] "TARGET_AVX2" "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}" @@ -17918,24 +17946,64 @@ (set_attr "prefix" "vex,evex") (set_attr "mode" "<sseinsnmode>")]) +(define_insn_and_split "*avx2_pbroadcast<mode>_mem_1" + [(set (match_operand:VI 0 "register_operand") + (vec_duplicate:VI + (vec_select:<ssescalarmode> + (match_operand:<ssexmmmode> 1 "memory_operand") + (parallel [(const_int 0)]))))] + "TARGET_AVX2 + && can_create_pseudo_p ()" + "#" + "&& 1" + [(set (match_dup 0) + (vec_duplicate:VI (match_dup 1)))] + "operands[1] = adjust_address_nv (operands[1], <ssescalarmode>mode, 0);") + (define_insn "avx2_pbroadcast<mode>_1" - [(set (match_operand:VI_256 0 "register_operand" "=x,x,v,v") + [(set (match_operand:VI_256 0 "register_operand" "=x,v") (vec_duplicate:VI_256 (vec_select:<ssescalarmode> - (match_operand:VI_256 1 "nonimmediate_operand" "m,x,m,v") + (match_operand:VI_256 1 "register_operand" "x,v") (parallel [(const_int 0)]))))] "TARGET_AVX2" - "@ - vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1} - vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1} - vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1} - vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}" - [(set_attr "isa" "*,*,<pbroadcast_evex_isa>,<pbroadcast_evex_isa>") + "vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}" + [(set_attr "isa" "*,<pbroadcast_evex_isa>") (set_attr "type" "ssemov") (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "<sseinsnmode>")]) +(define_insn_and_split "*avx2_pbroadcast<mode>_1_mem_1" + [(set (match_operand:VI_256 0 "register_operand" "=x,v") + (vec_duplicate:VI_256 + (vec_select:<ssescalarmode> + (match_operand:VI_256 1 "memory_operand" "m,m") + (parallel [(const_int 0)]))))] + "TARGET_AVX2 + && can_create_pseudo_p ()" + "#" + "&& 1" + [(set (match_dup 0) + (vec_duplicate:VI_256 (match_dup 1)))] + "operands[1] = adjust_address_nv (operands[1], <ssescalarmode>mode, 0);") + +(define_insn "*avx2_pbroadcast<mode>_mem" + [(set (match_operand:VI 0 "register_operand" "=x,v") + (vec_duplicate:VI + (match_operand:<ssescalarmode> 1 "memory_operand" "m,m")))] + "TARGET_AVX2" + "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %1}" + [(set_attr "isa" "*,<pbroadcast_evex_isa>") + (set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex,evex") + (set_attr "mode" "<sseinsnmode>")]) + + + + + (define_insn "<avx2_avx512>_permvar<mode><mask_name>" [(set (match_operand:VI48F_256_512 0 "register_operand" "=v") (unspec:VI48F_256_512