Hi! This patch fixes various issues with the vec_extract_lo_* patterns. There are splitters for these, but only for some cases (no mask, and in one case also not xmm32+ reg) that change those into just a copy or load of the low part subreg, but if those can't be used, the vextract* insns don't accept memory input operand, but 3 of the 4 patterns have nonimmediate_operand input, which is wrong for the masked case, and the other one uses register_operand, even when the splitter can handle nonimmediate_operand when not masked.
Thus this patch makes sure that the input is nonimmediate_operand and v,vm if not masked and register_operand and v,v if masked, returns "#" to ensure splitting in cases the input is a memory, simplifies the conditions (for masked we don't need to test at runtime if both arguments aren't MEMs, because the predicate is now register_operand with v constraint), and changes the single case that used register_operand to follow the rest. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2017-06-29 Jakub Jelinek <ja...@redhat.com> PR target/81225 * config/i386/sse.md (vec_extract_lo_<mode><mask_name>): For V8FI, V16FI and VI8F_256 iterators, use <store_mask_predicate> instead of nonimmediate_operand and <store_mask_constraint> instead of m for the input operand. For V8FI iterator, always split if input is a MEM. For V16FI and V8SF_256 iterators, don't test if both operands are MEM if <mask_applied>. For VI4F_256 iterator, use <store_mask_predicate> instead of register_operand and <store_mask_constraint> instead of v for the input operand. Make sure both operands aren't MEMs for if not <mask_applied>. * gcc.target/i386/pr81225.c: New test. --- gcc/config/i386/sse.md.jj 2017-06-21 22:01:41.000000000 +0200 +++ gcc/config/i386/sse.md 2017-06-28 12:30:49.304820307 +0200 @@ -7359,13 +7359,13 @@ (define_insn "vec_extract_lo_<mode>_mask (define_insn "vec_extract_lo_<mode><mask_name>" [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,v") (vec_select:<ssehalfvecmode> - (match_operand:V8FI 1 "nonimmediate_operand" "v,m") + (match_operand:V8FI 1 "<store_mask_predicate>" "v,<store_mask_constraint>") (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)])))] "TARGET_AVX512F && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))" { - if (<mask_applied> || !TARGET_AVX512VL) + if (<mask_applied> || (!TARGET_AVX512VL && !MEM_P (operands[1]))) return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}"; else return "#"; @@ -7515,14 +7515,15 @@ (define_expand "avx_vextractf128<mode>" (define_insn "vec_extract_lo_<mode><mask_name>" [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m") (vec_select:<ssehalfvecmode> - (match_operand:V16FI 1 "nonimmediate_operand" "vm,v") + (match_operand:V16FI 1 "<store_mask_predicate>" + "<store_mask_constraint>,v") (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3) (const_int 4) (const_int 5) (const_int 6) (const_int 7)])))] "TARGET_AVX512F && <mask_mode512bit_condition> - && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))" { if (<mask_applied>) return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}"; @@ -7546,11 +7547,12 @@ (define_split (define_insn "vec_extract_lo_<mode><mask_name>" [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,m") (vec_select:<ssehalfvecmode> - (match_operand:VI8F_256 1 "nonimmediate_operand" "vm,v") + (match_operand:VI8F_256 1 "<store_mask_predicate>" + "<store_mask_constraint>,v") (parallel [(const_int 0) (const_int 1)])))] "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition> - && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))" { if (<mask_applied>) return "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"; @@ -7610,12 +7612,16 @@ (define_split "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);") (define_insn "vec_extract_lo_<mode><mask_name>" - [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>") + [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" + "=<store_mask_constraint>,v") (vec_select:<ssehalfvecmode> - (match_operand:VI4F_256 1 "register_operand" "v") + (match_operand:VI4F_256 1 "<store_mask_predicate>" + "v,<store_mask_constraint>") (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)])))] - "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>" + "TARGET_AVX + && <mask_avx512vl_condition> && <mask_avx512dq_condition> + && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))" { if (<mask_applied>) return "vextract<shuffletype>32x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}"; --- gcc/testsuite/gcc.target/i386/pr81225.c.jj 2017-06-28 12:51:10.606338225 +0200 +++ gcc/testsuite/gcc.target/i386/pr81225.c 2017-06-28 12:50:52.000000000 +0200 @@ -0,0 +1,14 @@ +/* PR target/81225 */ +/* { dg-do compile } */ +/* { dg-options "-mavx512ifma -O3 -ffloat-store" } */ + +long a[24]; +float b[4], c[24]; +int d; + +void +foo () +{ + for (d = 0; d < 24; d++) + c[d] = (float) d ? : b[a[d]]; +} Jakub