On Fri, Nov 2, 2018 at 6:25 PM H.J. Lu <hongjiu...@intel.com> wrote:
>
> Remove duplicated AVX2/AVX512 vec_dup patterns and replace them with
> subreg.  gcc.target/i386/avx2-vbroadcastss_ps256-1.c is changed by
>
>  avx2_test:
>         .cfi_startproc
> -       vmovaps x(%rip), %xmm1
> -       vbroadcastss    %xmm1, %ymm0
> +       vbroadcastss    x(%rip), %ymm0
>         vmovaps %ymm0, y(%rip)
>         vzeroupper
>         ret
>         .cfi_endproc
>
> gcc.target/i386/avx512vl-vbroadcast-3.c is changed by
>
> @@ -113,7 +113,7 @@ f10:
>         .cfi_startproc
>         vmovaps %ymm0, %ymm16
>         vpermilps       $85, %ymm16, %ymm16
> -       vbroadcastss    %xmm16, %ymm16
> +       vshuff32x4      $0x0, %ymm16, %ymm16, %ymm16
>         vzeroupper
>         ret
>         .cfi_endproc
> @@ -153,8 +153,7 @@ f12:
>  f13:
>  .LFB12:
>         .cfi_startproc
> -       vmovaps (%rdi), %ymm16
> -       vbroadcastss    %xmm16, %ymm16
> +       vbroadcastss    (%rdi), %ymm16
>         vzeroupper
>         ret
>         .cfi_endproc

Actually, we can achieve the same with pre-reload splitters. Please
see the attached patch for a couple of examples and a fix for
vbroadcastss that accesses the memory in wrong mode.

Uros.
Index: sse.md
===================================================================
--- sse.md      (revision 265740)
+++ sse.md      (working copy)
@@ -7129,6 +7129,20 @@
     (set_attr "prefix" "maybe_evex")
     (set_attr "mode" "<MODE>")])
 
+(define_insn_and_split "*avx2_vec_dup<mode>_1"
+  [(set (match_operand:VF1_128_256 0 "register_operand")
+       (vec_duplicate:VF1_128_256
+         (vec_select:SF
+           (match_operand:V4SF 1 "memory_operand")
+           (parallel [(const_int 0)]))))]
+  "TARGET_AVX2
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+       (vec_duplicate:VF1_128_256 (match_dup 1)))]
+  "operands[1] = adjust_address_nv (operands[1], SFmode, 0);")
+
 (define_insn "avx2_vec_dupv8sf_1"
   [(set (match_operand:V8SF 0 "register_operand" "=v")
        (vec_duplicate:V8SF
@@ -7141,6 +7155,20 @@
     (set_attr "prefix" "maybe_evex")
     (set_attr "mode" "V8SF")])
 
+(define_insn_and_split "*avx2_vec_dupv8sf_1"
+  [(set (match_operand:V8SF 0 "register_operand")
+       (vec_duplicate:V8SF
+         (vec_select:SF
+           (match_operand:V4SF 1 "memory_operand")
+           (parallel [(const_int 0)]))))]
+  "TARGET_AVX2
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+       (vec_duplicate:VF1_128_256 (match_dup 1)))]
+  "operands[1] = adjust_address_nv (operands[1], SFmode, 0);")
+
 (define_insn "avx512f_vec_dup<mode>_1"
   [(set (match_operand:VF_512 0 "register_operand" "=v")
        (vec_duplicate:VF_512
@@ -17908,7 +17936,7 @@
   [(set (match_operand:VI 0 "register_operand" "=x,v")
        (vec_duplicate:VI
          (vec_select:<ssescalarmode>
-           (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm,vm")
+           (match_operand:<ssexmmmode> 1 "register_operand" "x,v")
            (parallel [(const_int 0)]))))]
   "TARGET_AVX2"
   "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
@@ -17918,24 +17946,64 @@
    (set_attr "prefix" "vex,evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn_and_split "*avx2_pbroadcast<mode>_mem_1"
+  [(set (match_operand:VI 0 "register_operand")
+       (vec_duplicate:VI
+         (vec_select:<ssescalarmode>
+           (match_operand:<ssexmmmode> 1 "memory_operand")
+           (parallel [(const_int 0)]))))]
+  "TARGET_AVX2
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+       (vec_duplicate:VI (match_dup 1)))]
+  "operands[1] = adjust_address_nv (operands[1], <ssescalarmode>mode, 0);")
+
 (define_insn "avx2_pbroadcast<mode>_1"
-  [(set (match_operand:VI_256 0 "register_operand" "=x,x,v,v")
+  [(set (match_operand:VI_256 0 "register_operand" "=x,v")
        (vec_duplicate:VI_256
          (vec_select:<ssescalarmode>
-           (match_operand:VI_256 1 "nonimmediate_operand" "m,x,m,v")
+           (match_operand:VI_256 1 "register_operand" "x,v")
            (parallel [(const_int 0)]))))]
   "TARGET_AVX2"
-  "@
-   vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
-   vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
-   vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
-   vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
-  [(set_attr "isa" "*,*,<pbroadcast_evex_isa>,<pbroadcast_evex_isa>")
+  "vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
+  [(set_attr "isa" "*,<pbroadcast_evex_isa>")
    (set_attr "type" "ssemov")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn_and_split "*avx2_pbroadcast<mode>_1_mem_1"
+  [(set (match_operand:VI_256 0 "register_operand" "=x,v")
+       (vec_duplicate:VI_256
+         (vec_select:<ssescalarmode>
+           (match_operand:VI_256 1 "memory_operand" "m,m")
+           (parallel [(const_int 0)]))))]
+  "TARGET_AVX2
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+       (vec_duplicate:VI_256 (match_dup 1)))]
+  "operands[1] = adjust_address_nv (operands[1], <ssescalarmode>mode, 0);")
+
+(define_insn "*avx2_pbroadcast<mode>_mem"
+  [(set (match_operand:VI 0 "register_operand" "=x,v")
+       (vec_duplicate:VI
+         (match_operand:<ssescalarmode> 1 "memory_operand" "m,m")))]
+  "TARGET_AVX2"
+  "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %1}"
+  [(set_attr "isa" "*,<pbroadcast_evex_isa>")
+   (set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex,evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+
+
+
+
 (define_insn "<avx2_avx512>_permvar<mode><mask_name>"
   [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
        (unspec:VI48F_256_512

Reply via email to