Hi!

The AVX512F documentation clearly states that in instructions where the
destination is a memory only merging-masking is possible, not zero-masking,
and the assembler enforces that.

The testcase in this patch fails to assemble because of
Error: unsupported masking for `vextracti32x8'
on
        vextracti32x8   $0x0, %zmm1, -64(%rsp){%k1}{z}
For the vector extraction patterns, we apparently have 7 *_maskm patterns
that only accept memory destinations and rtx_equal_p merge-masking source
for it, 7 *<mask_name> corresponding patterns that allow memory destination
only for the non-masked cases (through <store_mask_constraint>), then 2
*<mask_name> patterns (lo ssehalf V16FI and lo ssehalf VI8F_256 ones) which
do allow memory destination even for masked cases and are the cause of the
testsuite failure, because we must not allow C constraint if the destination
is m, and finally one pair of patterns (separate * and *_mask, hi ssehalf
VI4F_256), which has another issue (for which I don't have a testcase
though), where if it would match zero-masking with register destination,
it wouldn't emit the needed {z} into assembly.
The attached patch fixes those 3 issues only, perhaps more suitable for
backporting.
But, even with that fixed, we are missing 3 further *_maskm patterns and
more importantly, I find the split into 3 separate patterns after subst,
*_maskm for masking with memory destination, *_mask for masking with
register destination and * for non-masking unnecessarily complex and harder
for reload, so the included patch below (non-attached) instead kills all
*_maskm patterns and splits the *<mask_name> patterns into * and *_mask
by hand instead of subst, where the *_mask ones make sure that with v
destination they use 0C, while with m destination they use 0 and as
condition enforce that either destination is not MEM, or rtx_equal_p between
the destination and corresponding merging-masking operand source.
If we had those 3 missing *_maskm patterns, this patch would actually result
in both shorter sse.md and shorter machine description after subst (e.g.
length of tmp-mddump.md), as we don't have them, the patch is actually 16
lines longer sse.md, but still shorter tmp-mddump.md.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk (and is
the shorter patch ok for backports)?

2019-12-30  Jakub Jelinek  <ja...@redhat.com>

        PR target/93069
        * config/i386/subst.md (store_mask_constraint, store_mask_predicate):
        Remove.
        (avx512dq_vextract<shuffletype>64x2_1_maskm,
        avx512f_vextract<shuffletype>32x4_1_maskm,
        vec_extract_lo_<mode>_maskm, vec_extract_hi_<mode>_maskm): Remove.
        (<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name>): Split
        into ...
        (*avx512dq_vextract<shuffletype>64x2_1,
        avx512dq_vextract<shuffletype>64x2_1_mask): ... these new
        define_insns.  Even in the masked variant allow memory output but in
        that case use 0 rather than 0C constraint on the source of masked-out
        elts.
        (<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>): Split
        into ...
        (*avx512f_vextract<shuffletype>32x4_1,
        avx512f_vextract<shuffletype>32x4_1_mask): ... these new define_insns.
        Even in the masked variant allow memory output but in that case use
        0 rather than 0C constraint on the source of masked-out elts.
        (vec_extract_lo_<mode><mask_name>): Split into ...
        (vec_extract_lo_<mode>, vec_extract_lo_<mode>_mask): ... these new
        define_insns.  Even in the masked variant allow memory output but in
        that case use 0 rather than 0C constraint on the source of masked-out
        elts.
        (vec_extract_hi_<mode><mask_name>): Split into ...
        (vec_extract_hi_<mode>, vec_extract_hi_<mode>_mask): ... these new
        define_insns.  Even in the masked variant allow memory output but in
        that case use 0 rather than 0C constraint on the source of masked-out
        elts.

        * gcc.target/i386/avx512vl-pr93069.c: New test.
        * gcc.dg/vect/pr93069.c: New test.

--- gcc/config/i386/subst.md.jj 2019-10-28 22:16:14.651007061 +0100
+++ gcc/config/i386/subst.md    2019-12-28 14:43:56.654042070 +0100
@@ -57,8 +57,6 @@ (define_subst_attr "mask_mode512bit_cond
 (define_subst_attr "mask_avx512vl_condition" "mask" "1" "TARGET_AVX512VL")
 (define_subst_attr "mask_avx512bw_condition" "mask" "1" "TARGET_AVX512BW")
 (define_subst_attr "mask_avx512dq_condition" "mask" "1" "TARGET_AVX512DQ")
-(define_subst_attr "store_mask_constraint" "mask" "vm" "v")
-(define_subst_attr "store_mask_predicate" "mask" "nonimmediate_operand" 
"register_operand")
 (define_subst_attr "mask_prefix" "mask" "vex" "evex")
 (define_subst_attr "mask_prefix2" "mask" "maybe_vex" "evex")
 (define_subst_attr "mask_prefix3" "mask" "orig,vex" "evex,evex")
--- gcc/config/i386/sse.md.jj   2019-12-27 18:16:48.146431083 +0100
+++ gcc/config/i386/sse.md      2019-12-29 12:36:33.232414154 +0100
@@ -8415,60 +8415,31 @@ (define_expand "<extract_type>_vextract<
   DONE;
 })
 
-(define_insn "avx512dq_vextract<shuffletype>64x2_1_maskm"
-  [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
+(define_insn "avx512dq_vextract<shuffletype>64x2_1_mask"
+  [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=v,m")
        (vec_merge:<ssequartermode>
          (vec_select:<ssequartermode>
-           (match_operand:V8FI 1 "register_operand" "v")
-           (parallel [(match_operand 2  "const_0_to_7_operand")
-             (match_operand 3  "const_0_to_7_operand")]))
-         (match_operand:<ssequartermode> 4 "memory_operand" "0")
-         (match_operand:QI 5 "register_operand" "Yk")))]
+           (match_operand:V8FI 1 "register_operand" "v,v")
+           (parallel [(match_operand 2 "const_0_to_7_operand")
+                      (match_operand 3 "const_0_to_7_operand")]))
+         (match_operand:<ssequartermode> 4 "nonimm_or_0_operand" "0C,0")
+         (match_operand:QI 5 "register_operand" "Yk,Yk")))]
   "TARGET_AVX512DQ
    && INTVAL (operands[2]) % 2 == 0
    && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
-   && rtx_equal_p (operands[4], operands[0])"
+   && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[4]))"
 {
-  operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
-  return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}|%0%{%5%}, %1, %2}";
-}
-  [(set_attr "type" "sselog")
-   (set_attr "prefix_extra" "1")
-   (set_attr "length_immediate" "1")
-   (set_attr "memory" "store")
-   (set_attr "prefix" "evex")
-   (set_attr "mode" "<sseinsnmode>")])
-
-(define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
-  [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
-       (vec_merge:<ssequartermode>
-         (vec_select:<ssequartermode>
-           (match_operand:V16FI 1 "register_operand" "v")
-           (parallel [(match_operand 2  "const_0_to_15_operand")
-             (match_operand 3  "const_0_to_15_operand")
-             (match_operand 4  "const_0_to_15_operand")
-             (match_operand 5  "const_0_to_15_operand")]))
-         (match_operand:<ssequartermode> 6 "memory_operand" "0")
-         (match_operand:QI 7 "register_operand" "Yk")))]
-  "TARGET_AVX512F
-   && INTVAL (operands[2]) % 4 == 0
-   && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
-   && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
-   && INTVAL (operands[4]) == INTVAL (operands[5]) - 1
-   && rtx_equal_p (operands[6], operands[0])"
-{
-  operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
-  return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
+  operands[2] = GEN_INT (INTVAL (operands[2]) >> 1);
+  return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, 
%2}";
 }
-  [(set_attr "type" "sselog")
+  [(set_attr "type" "sselog1")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
-   (set_attr "memory" "store")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name>"
-  [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" 
"=<store_mask_constraint>")
+(define_insn "*avx512dq_vextract<shuffletype>64x2_1"
+  [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=vm")
        (vec_select:<ssequartermode>
          (match_operand:V8FI 1 "register_operand" "v")
          (parallel [(match_operand 2  "const_0_to_7_operand")
@@ -8478,7 +8449,7 @@ (define_insn "<mask_codefor>avx512dq_vex
    && INTVAL (operands[2]) == INTVAL (operands[3]) - 1"
 {
   operands[2] = GEN_INT (INTVAL (operands[2]) >> 1);
-  return "vextract<shuffletype>64x2\t{%2, %1, 
%0<mask_operand4>|%0<mask_operand4>, %1, %2}";
+  return "vextract<shuffletype>64x2\t{%2, %1, %0|%0, %1, %2}";
 }
   [(set_attr "type" "sselog1")
    (set_attr "prefix_extra" "1")
@@ -8507,14 +8478,41 @@ (define_split
     operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
 })
 
-(define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
-  [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" 
"=<store_mask_constraint>")
+(define_insn "avx512f_vextract<shuffletype>32x4_1_mask"
+  [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=v,m")
+       (vec_merge:<ssequartermode>
+         (vec_select:<ssequartermode>
+           (match_operand:V16FI 1 "register_operand" "v,v")
+           (parallel [(match_operand 2 "const_0_to_15_operand")
+                      (match_operand 3 "const_0_to_15_operand")
+                      (match_operand 4 "const_0_to_15_operand")
+                      (match_operand 5 "const_0_to_15_operand")]))
+         (match_operand:<ssequartermode> 6 "nonimm_or_0_operand" "0C,0")
+         (match_operand:QI 7 "register_operand" "Yk,Yk")))]
+  "TARGET_AVX512F
+   && INTVAL (operands[2]) % 4 == 0
+   && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
+   && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
+   && INTVAL (operands[4]) == INTVAL (operands[5]) - 1
+   && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[6]))"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
+  return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}%N6|%0%{%7%}%N6, %1, 
%2}";
+}
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*avx512f_vextract<shuffletype>32x4_1"
+  [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=vm")
        (vec_select:<ssequartermode>
          (match_operand:V16FI 1 "register_operand" "v")
-         (parallel [(match_operand 2  "const_0_to_15_operand")
-            (match_operand 3  "const_0_to_15_operand")
-            (match_operand 4  "const_0_to_15_operand")
-            (match_operand 5  "const_0_to_15_operand")])))]
+         (parallel [(match_operand 2 "const_0_to_15_operand")
+                    (match_operand 3 "const_0_to_15_operand")
+                    (match_operand 4 "const_0_to_15_operand")
+                    (match_operand 5 "const_0_to_15_operand")])))]
   "TARGET_AVX512F
    && INTVAL (operands[2]) % 4 == 0
    && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
@@ -8522,7 +8520,7 @@ (define_insn "<mask_codefor>avx512f_vext
    && INTVAL (operands[4]) == INTVAL (operands[5]) - 1"
 {
   operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
-  return "vextract<shuffletype>32x4\t{%2, %1, 
%0<mask_operand6>|%0<mask_operand6>, %1, %2}";
+  return "vextract<shuffletype>32x4\t{%2, %1, %0|%0, %1, %2}";
 }
   [(set_attr "type" "sselog1")
    (set_attr "prefix_extra" "1")
@@ -8606,35 +8604,35 @@ (define_split
   [(set (match_dup 0) (match_dup 1))]
   "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
 
-(define_insn "vec_extract_lo_<mode>_maskm"
-  [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
+(define_insn "vec_extract_lo_<mode>_mask"
+  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
        (vec_merge:<ssehalfvecmode>
          (vec_select:<ssehalfvecmode>
-           (match_operand:V8FI 1 "register_operand" "v")
+           (match_operand:V8FI 1 "register_operand" "v,v")
            (parallel [(const_int 0) (const_int 1)
-             (const_int 2) (const_int 3)]))
-         (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
-         (match_operand:QI 3 "register_operand" "Yk")))]
+                      (const_int 2) (const_int 3)]))
+         (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
+         (match_operand:QI 3 "register_operand" "Yk,Yk")))]
   "TARGET_AVX512F
-   && rtx_equal_p (operands[2], operands[0])"
-  "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
+   && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
+  "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
   [(set_attr "type" "sselog1")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
+   (set_attr "memory" "none,store")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "vec_extract_lo_<mode><mask_name>"
-  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" 
"=v,<store_mask_constraint>,v")
+(define_insn "vec_extract_lo_<mode>"
+  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,vm,v")
        (vec_select:<ssehalfvecmode>
-         (match_operand:V8FI 1 "<store_mask_predicate>" 
"v,v,<store_mask_constraint>")
+         (match_operand:V8FI 1 "nonimmediate_operand" "v,v,vm")
          (parallel [(const_int 0) (const_int 1)
-            (const_int 2) (const_int 3)])))]
-  "TARGET_AVX512F
-   && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
+                    (const_int 2) (const_int 3)])))]
+  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
-  if (<mask_applied> || (!TARGET_AVX512VL && !MEM_P (operands[1])))
-    return "vextract<shuffletype>64x4\t{$0x0, %1, 
%0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
+  if (!TARGET_AVX512VL && !MEM_P (operands[1]))
+    return "vextract<shuffletype>64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
   else
     return "#";
 }
@@ -8645,70 +8643,69 @@ (define_insn "vec_extract_lo_<mode><mask
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "vec_extract_hi_<mode>_maskm"
-  [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
+(define_insn "vec_extract_hi_<mode>_mask"
+  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
        (vec_merge:<ssehalfvecmode>
          (vec_select:<ssehalfvecmode>
-           (match_operand:V8FI 1 "register_operand" "v")
+           (match_operand:V8FI 1 "register_operand" "v,v")
            (parallel [(const_int 4) (const_int 5)
-             (const_int 6) (const_int 7)]))
-         (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
-         (match_operand:QI 3 "register_operand" "Yk")))]
+                      (const_int 6) (const_int 7)]))
+         (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
+         (match_operand:QI 3 "register_operand" "Yk,Yk")))]
   "TARGET_AVX512F
-   && rtx_equal_p (operands[2], operands[0])"
-  "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
-  [(set_attr "type" "sselog")
+   && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
+  "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
+  [(set_attr "type" "sselog1")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
-   (set_attr "memory" "store")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "vec_extract_hi_<mode><mask_name>"
-  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" 
"=<store_mask_constraint>")
+(define_insn "vec_extract_hi_<mode>"
+  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm")
        (vec_select:<ssehalfvecmode>
          (match_operand:V8FI 1 "register_operand" "v")
          (parallel [(const_int 4) (const_int 5)
-            (const_int 6) (const_int 7)])))]
+                    (const_int 6) (const_int 7)])))]
   "TARGET_AVX512F"
-  "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, 
%1, 0x1}"
+  "vextract<shuffletype>64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
   [(set_attr "type" "sselog1")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "vec_extract_hi_<mode>_maskm"
-   [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
+(define_insn "vec_extract_hi_<mode>_mask"
+  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
        (vec_merge:<ssehalfvecmode>
          (vec_select:<ssehalfvecmode>
-           (match_operand:V16FI 1 "register_operand" "v")
+           (match_operand:V16FI 1 "register_operand" "v,v")
            (parallel [(const_int 8) (const_int 9)
-             (const_int 10) (const_int 11)
-             (const_int 12) (const_int 13)
-             (const_int 14) (const_int 15)]))
-         (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
-         (match_operand:QI 3 "register_operand" "Yk")))]
+                      (const_int 10) (const_int 11)
+                      (const_int 12) (const_int 13)
+                      (const_int 14) (const_int 15)]))
+         (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
+         (match_operand:QI 3 "register_operand" "Yk,Yk")))]
   "TARGET_AVX512DQ
-   && rtx_equal_p (operands[2], operands[0])"
-  "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
+   && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
+  "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
   [(set_attr "type" "sselog1")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "vec_extract_hi_<mode><mask_name>"
-  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" 
"=<store_mask_constraint>,vm")
+(define_insn "vec_extract_hi_<mode>"
+  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm,vm")
        (vec_select:<ssehalfvecmode>
          (match_operand:V16FI 1 "register_operand" "v,v")
          (parallel [(const_int 8) (const_int 9)
-            (const_int 10) (const_int 11)
-           (const_int 12) (const_int 13)
-           (const_int 14) (const_int 15)])))]
-  "TARGET_AVX512F && <mask_avx512dq_condition>"
+                    (const_int 10) (const_int 11)
+                    (const_int 12) (const_int 13)
+                    (const_int 14) (const_int 15)])))]
+  "TARGET_AVX512F"
   "@
-   vextract<shuffletype>32x8\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, 
%1, 0x1}
+   vextract<shuffletype>32x8\t{$0x1, %1, %0|%0, %1, 0x1}
    vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
   [(set_attr "type" "sselog1")
    (set_attr "prefix_extra" "1")
@@ -8781,24 +8778,42 @@ (define_expand "avx_vextractf128<mode>"
   DONE;
 })
 
-(define_insn "vec_extract_lo_<mode><mask_name>"
+(define_insn "vec_extract_lo_<mode>_mask"
+  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
+       (vec_merge:<ssehalfvecmode>
+         (vec_select:<ssehalfvecmode>
+           (match_operand:V16FI 1 "register_operand" "v,v")
+           (parallel [(const_int 0) (const_int 1)
+                      (const_int 2) (const_int 3)
+                      (const_int 4) (const_int 5)
+                      (const_int 6) (const_int 7)]))
+         (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
+         (match_operand:QI 3 "register_operand" "Yk,Yk")))]
+  "TARGET_AVX512F
+   && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
+  "vextract<shuffletype>32x8\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "memory" "none,store")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vec_extract_lo_<mode>"
   [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,v,m")
        (vec_select:<ssehalfvecmode>
-         (match_operand:V16FI 1 "<store_mask_predicate>"
-                                "v,<store_mask_constraint>,v")
+         (match_operand:V16FI 1 "nonimmediate_operand" "v,m,v")
          (parallel [(const_int 0) (const_int 1)
-                     (const_int 2) (const_int 3)
-                     (const_int 4) (const_int 5)
-                     (const_int 6) (const_int 7)])))]
+                    (const_int 2) (const_int 3)
+                    (const_int 4) (const_int 5)
+                    (const_int 6) (const_int 7)])))]
   "TARGET_AVX512F
-   && <mask_mode512bit_condition>
-   && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
-  if (<mask_applied>
-      || (!TARGET_AVX512VL
-         && !REG_P (operands[0])
-         && EXT_REX_SSE_REG_P (operands[1])))
-    return "vextract<shuffletype>32x8\t{$0x0, %1, 
%0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
+  if (!TARGET_AVX512VL
+      && !REG_P (operands[0])
+      && EXT_REX_SSE_REG_P (operands[1]))
+    return "vextract<shuffletype>32x8\t{$0x0, %1, %0|%0, %1, 0x0}";
   else
     return "#";
 }
@@ -8833,28 +8848,34 @@ (define_split
     operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
 })
 
-(define_insn "vec_extract_lo_<mode><mask_name>"
-  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,v,m")
-       (vec_select:<ssehalfvecmode>
-         (match_operand:VI8F_256 1 "<store_mask_predicate>"
-                                   "v,<store_mask_constraint>,v")
-         (parallel [(const_int 0) (const_int 1)])))]
-  "TARGET_AVX
-   && <mask_avx512vl_condition> && <mask_avx512dq_condition>
-   && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
-{
-  if (<mask_applied>)
-    return "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}";
-  else
-    return "#";
-}
+(define_insn "vec_extract_lo_<mode>_mask"
+  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
+       (vec_merge:<ssehalfvecmode>
+         (vec_select:<ssehalfvecmode>
+           (match_operand:VI8F_256 1 "register_operand" "v,v")
+           (parallel [(const_int 0) (const_int 1)]))
+         (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
+         (match_operand:QI 3 "register_operand" "Yk,Yk")))]
+  "TARGET_AVX512DQ
+   && TARGET_AVX512VL
+   && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
+  "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
    [(set_attr "type" "sselog1")
     (set_attr "prefix_extra" "1")
     (set_attr "length_immediate" "1")
-    (set_attr "memory" "none,load,store")
+    (set_attr "memory" "none,store")
     (set_attr "prefix" "evex")
     (set_attr "mode" "XI")])
 
+(define_insn "vec_extract_lo_<mode>"
+  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm,v")
+       (vec_select:<ssehalfvecmode>
+         (match_operand:VI8F_256 1 "nonimmediate_operand" "v,vm")
+         (parallel [(const_int 0) (const_int 1)])))]
+  "TARGET_AVX
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "#")
+
 (define_split
   [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
        (vec_select:<ssehalfvecmode>
@@ -8865,20 +8886,38 @@ (define_split
   [(set (match_dup 0) (match_dup 1))]
   "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
 
-(define_insn "vec_extract_hi_<mode><mask_name>"
-  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" 
"=v,<store_mask_constraint>")
+(define_insn "vec_extract_hi_<mode>_mask"
+  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
+       (vec_merge:<ssehalfvecmode>
+         (vec_select:<ssehalfvecmode>
+           (match_operand:VI8F_256 1 "register_operand" "v,v")
+           (parallel [(const_int 2) (const_int 3)]))
+         (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
+         (match_operand:QI 3 "register_operand" "Yk,Yk")))]
+  "TARGET_AVX512DQ
+   && TARGET_AVX512VL
+   && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
+  "vextract<shuffletype>64x2\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vec_extract_hi_<mode>"
+  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm")
        (vec_select:<ssehalfvecmode>
-         (match_operand:VI8F_256 1 "register_operand" "v,v")
+         (match_operand:VI8F_256 1 "register_operand" "v")
          (parallel [(const_int 2) (const_int 3)])))]
-  "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
+  "TARGET_AVX"
 {
   if (TARGET_AVX512VL)
-  {
-    if (TARGET_AVX512DQ)
-      return "vextract<shuffletype>64x2\t{$0x1, %1, 
%0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
-    else
-      return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}";
-  }
+    {
+      if (TARGET_AVX512DQ)
+       return "vextract<shuffletype>64x2\t{$0x1, %1, %0|%0, %1, 0x1}";
+      else
+       return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}";
+    }
   else
     return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
 }
@@ -8899,74 +8938,51 @@ (define_split
   [(set (match_dup 0) (match_dup 1))]
   "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
 
-(define_insn "vec_extract_lo_<mode><mask_name>"
-  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>"
-                                         "=<store_mask_constraint>,v")
-       (vec_select:<ssehalfvecmode>
-         (match_operand:VI4F_256 1 "<store_mask_predicate>"
-                                   "v,<store_mask_constraint>")
-         (parallel [(const_int 0) (const_int 1)
-                    (const_int 2) (const_int 3)])))]
-  "TARGET_AVX
-   && <mask_avx512vl_condition> && <mask_avx512dq_condition>
-   && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
-{
-  if (<mask_applied>)
-    return "vextract<shuffletype>32x4\t{$0x0, %1, 
%0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
-  else
-    return "#";
-}
-  [(set_attr "type" "sselog1")
-   (set_attr "prefix_extra" "1")
-   (set_attr "length_immediate" "1")
-   (set_attr "prefix" "evex")
-   (set_attr "mode" "<sseinsnmode>")])
-
-(define_insn "vec_extract_lo_<mode>_maskm"
-  [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
+(define_insn "vec_extract_lo_<mode>_mask"
+  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
        (vec_merge:<ssehalfvecmode>
          (vec_select:<ssehalfvecmode>
-           (match_operand:VI4F_256 1 "register_operand" "v")
+           (match_operand:VI4F_256 1 "register_operand" "v,v")
            (parallel [(const_int 0) (const_int 1)
-                     (const_int 2) (const_int 3)]))
-         (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
-         (match_operand:QI 3 "register_operand" "Yk")))]
-  "TARGET_AVX512VL && TARGET_AVX512F
-   && rtx_equal_p (operands[2], operands[0])"
-  "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
+                      (const_int 2) (const_int 3)]))
+         (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
+         (match_operand:QI 3 "register_operand" "Yk,Yk")))]
+  "TARGET_AVX512DQ
+   && TARGET_AVX512VL
+   && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
+  "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
   [(set_attr "type" "sselog1")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "vec_extract_hi_<mode>_maskm"
-  [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
-       (vec_merge:<ssehalfvecmode>
-         (vec_select:<ssehalfvecmode>
-           (match_operand:VI4F_256 1 "register_operand" "v")
-           (parallel [(const_int 4) (const_int 5)
-                     (const_int 6) (const_int 7)]))
-         (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
-         (match_operand:<ssehalfvecmode> 3 "register_operand" "Yk")))]
-  "TARGET_AVX512F && TARGET_AVX512VL
-   && rtx_equal_p (operands[2], operands[0])"
-  "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
+(define_insn "vec_extract_lo_<mode>"
+  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm,v")
+       (vec_select:<ssehalfvecmode>
+         (match_operand:VI4F_256 1 "nonimmediate_operand" "v,vm")
+         (parallel [(const_int 0) (const_int 1)
+                    (const_int 2) (const_int 3)])))]
+  "TARGET_AVX
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "#"
   [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "vec_extract_hi_<mode>_mask"
-  [(set (match_operand:<ssehalfvecmode> 0 "register_operand" "=v")
+  [(set (match_operand:<ssehalfvecmode> 0 "register_operand" "=v,m")
        (vec_merge:<ssehalfvecmode>
          (vec_select:<ssehalfvecmode>
-           (match_operand:VI4F_256 1 "register_operand" "v")
+           (match_operand:VI4F_256 1 "register_operand" "v,v")
            (parallel [(const_int 4) (const_int 5)
                       (const_int 6) (const_int 7)]))
-         (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C")
-         (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
-  "TARGET_AVX512VL"
+         (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
+         (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
+  "TARGET_AVX512VL
+   && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
   "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
   [(set_attr "type" "sselog1")
    (set_attr "length_immediate" "1")
--- gcc/testsuite/gcc.target/i386/avx512vl-pr93069.c.jj 2019-12-28 
16:31:30.118695074 +0100
+++ gcc/testsuite/gcc.target/i386/avx512vl-pr93069.c    2019-12-28 
16:32:16.920990539 +0100
@@ -0,0 +1,12 @@
+/* PR target/93069 */
+/* { dg-do assemble { target vect_simd_clones } } */
+/* { dg-options "-O2 -fopenmp-simd -mtune=skylake-avx512" } */
+/* { dg-additional-options "-mavx512vl" { target avx512vl } } */
+/* { dg-additional-options "-mavx512dq" { target avx512dq } } */
+
+#pragma omp declare simd
+int
+foo (int x, int y)
+{
+  return x == 0 ? x : y;
+}
--- gcc/testsuite/gcc.dg/vect/pr93069.c.jj      2019-12-28 16:31:01.822121036 
+0100
+++ gcc/testsuite/gcc.dg/vect/pr93069.c 2019-12-28 16:30:35.503517205 +0100
@@ -0,0 +1,10 @@
+/* PR target/93069 */
+/* { dg-do assemble { target vect_simd_clones } } */
+/* { dg-options "-O2 -fopenmp-simd" } */
+
+#pragma omp declare simd
+int
+foo (int x, int y)
+{
+  return x == 0 ? x : y;
+}

        Jakub
2019-12-30  Jakub Jelinek  <ja...@redhat.com>

        PR target/93069
        * config/i386/sse.md (vec_extract_lo_<mode><mask_name>): Use
        <store_mask_constraint> instead of m in output operand constraint.
        (vec_extract_hi_<mode><mask_name>): Use <mask_operand2> instead of
        %{%3%}.

        * gcc.target/i386/avx512vl-pr93069.c: New test.
        * gcc.dg/vect/pr93069.c: New test.

--- gcc/config/i386/sse.md.jj   2019-12-27 18:16:48.146431083 +0100
+++ gcc/config/i386/sse.md      2019-12-28 14:43:29.181456611 +0100
@@ -8782,7 +8782,8 @@
 })
 
 (define_insn "vec_extract_lo_<mode><mask_name>"
-  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,v,m")
+  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>"
+                                         "=v,v,<store_mask_constraint>")
        (vec_select:<ssehalfvecmode>
          (match_operand:V16FI 1 "<store_mask_predicate>"
                                 "v,<store_mask_constraint>,v")
@@ -8834,7 +8835,8 @@
 })
 
 (define_insn "vec_extract_lo_<mode><mask_name>"
-  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,v,m")
+  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>"
+                                         "=v,v,<store_mask_constraint>")
        (vec_select:<ssehalfvecmode>
          (match_operand:VI8F_256 1 "<store_mask_predicate>"
                                    "v,<store_mask_constraint>,v")
@@ -8844,7 +8846,7 @@
    && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
 {
   if (<mask_applied>)
-    return "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}";
+    return "vextract<shuffletype>64x2\t{$0x0, %1, 
%0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
   else
     return "#";
 }
--- gcc/testsuite/gcc.target/i386/avx512vl-pr93069.c.jj 2019-12-28 
16:31:30.118695074 +0100
+++ gcc/testsuite/gcc.target/i386/avx512vl-pr93069.c    2019-12-28 
16:32:16.920990539 +0100
@@ -0,0 +1,12 @@
+/* PR target/93069 */
+/* { dg-do assemble { target vect_simd_clones } } */
+/* { dg-options "-O2 -fopenmp-simd -mtune=skylake-avx512" } */
+/* { dg-additional-options "-mavx512vl" { target avx512vl } } */
+/* { dg-additional-options "-mavx512dq" { target avx512dq } } */
+
+#pragma omp declare simd
+int
+foo (int x, int y)
+{
+  return x == 0 ? x : y;
+}
--- gcc/testsuite/gcc.dg/vect/pr93069.c.jj      2019-12-28 16:31:01.822121036 
+0100
+++ gcc/testsuite/gcc.dg/vect/pr93069.c 2019-12-28 16:30:35.503517205 +0100
@@ -0,0 +1,10 @@
+/* PR target/93069 */
+/* { dg-do assemble { target vect_simd_clones } } */
+/* { dg-options "-O2 -fopenmp-simd" } */
+
+#pragma omp declare simd
+int
+foo (int x, int y)
+{
+  return x == 0 ? x : y;
+}

Reply via email to