Hi! Looking at the Yr constraint, it seems to me it is really meant to be used for noavx, only in that case whether we use xmm0-xmm7 or xmm8+ matters for the size of the instruction (number of prefixes). In most of the places where Yr is used, we typically have 2 noavx alternatives, one with Yr constraint, another one with *x, and then one avx alternative with x or v.
But in a couple of spots we do the wrong thing, e.g. use Yr constraint always (which (ought to act, see a later patch) acts as first half of x for -mtune={silvermont,intel} and otherwise as v, and otherwise uses *, which means limiting RA unnecessarily. The following patch fixes the vpmov* insns. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2016-05-24 Jakub Jelinek <ja...@redhat.com> * config/i386/sse.md (sse4_1_<code>v8qiv8hi2<mask_name>): Limit first two alternatives to noavx, use *x instead of *v in the second one, add avx alternative without *. (sse4_1_<code>v4qiv4si2<mask_name>, sse4_1_<code>v4hiv4si2<mask_name>, sse4_1_<code>v2qiv2di2<mask_name>, sse4_1_<code>v2hiv2di2<mask_name>, sse4_1_<code>v2siv2di2<mask_name>): Likewise. --- gcc/config/i386/sse.md.jj 2016-05-24 10:55:52.000000000 +0200 +++ gcc/config/i386/sse.md 2016-05-24 14:50:14.566277449 +0200 @@ -14748,19 +14752,20 @@ (define_insn "avx512bw_<code>v32qiv32hi2 (set_attr "mode" "XI")]) (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>" - [(set (match_operand:V8HI 0 "register_operand" "=Yr,*v") + [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v") (any_extend:V8HI (vec_select:V8QI - (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm") + (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*xm,vm") (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3) (const_int 4) (const_int 5) (const_int 6) (const_int 7)]))))] "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>" "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" - [(set_attr "type" "ssemov") + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") (set_attr "prefix_extra" "1") - (set_attr "prefix" "maybe_vex") + (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "TI")]) (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>" @@ -14790,17 +14795,18 @@ (define_insn "avx2_<code>v8qiv8si2<mask_ (set_attr "mode" "OI")]) (define_insn "sse4_1_<code>v4qiv4si2<mask_name>" - [(set (match_operand:V4SI 0 "register_operand" "=Yr,*v") + [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v") (any_extend:V4SI (vec_select:V4QI - (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm") + (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*xm,vm") (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))] "TARGET_SSE4_1 && <mask_avx512vl_condition>" "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}" - [(set_attr "type" "ssemov") + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") (set_attr "prefix_extra" "1") - (set_attr "prefix" "maybe_vex") + (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "TI")]) (define_insn "avx512f_<code>v16hiv16si2<mask_name>" @@ -14825,17 +14831,18 @@ (define_insn "avx2_<code>v8hiv8si2<mask_ (set_attr "mode" "OI")]) (define_insn "sse4_1_<code>v4hiv4si2<mask_name>" - [(set (match_operand:V4SI 0 "register_operand" "=Yr,*v") + [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v") (any_extend:V4SI (vec_select:V4HI - (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*vm") + (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*xm,vm") (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))] "TARGET_SSE4_1 && <mask_avx512vl_condition>" "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" - [(set_attr "type" "ssemov") + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") (set_attr "prefix_extra" "1") - (set_attr "prefix" "maybe_vex") + (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "TI")]) (define_insn "avx512f_<code>v8qiv8di2<mask_name>" @@ -14868,16 +14875,17 @@ (define_insn "avx2_<code>v4qiv4di2<mask_ (set_attr "mode" "OI")]) (define_insn "sse4_1_<code>v2qiv2di2<mask_name>" - [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v") + [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v") (any_extend:V2DI (vec_select:V2QI - (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm") + (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*xm,vm") (parallel [(const_int 0) (const_int 1)]))))] "TARGET_SSE4_1 && <mask_avx512vl_condition>" "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %w1}" - [(set_attr "type" "ssemov") + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") (set_attr "prefix_extra" "1") - (set_attr "prefix" "maybe_vex") + (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "TI")]) (define_insn "avx512f_<code>v8hiv8di2<mask_name>" @@ -14905,16 +14913,17 @@ (define_insn "avx2_<code>v4hiv4di2<mask_ (set_attr "mode" "OI")]) (define_insn "sse4_1_<code>v2hiv2di2<mask_name>" - [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v") + [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v") (any_extend:V2DI (vec_select:V2HI - (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*vm") + (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*xm,vm") (parallel [(const_int 0) (const_int 1)]))))] "TARGET_SSE4_1 && <mask_avx512vl_condition>" "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}" - [(set_attr "type" "ssemov") + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") (set_attr "prefix_extra" "1") - (set_attr "prefix" "maybe_vex") + (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "TI")]) (define_insn "avx512f_<code>v8siv8di2<mask_name>" @@ -14939,16 +14948,17 @@ (define_insn "avx2_<code>v4siv4di2<mask_ (set_attr "mode" "OI")]) (define_insn "sse4_1_<code>v2siv2di2<mask_name>" - [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v") + [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v") (any_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "Yrm,*vm") + (match_operand:V4SI 1 "nonimmediate_operand" "Yrm,*xm,vm") (parallel [(const_int 0) (const_int 1)]))))] "TARGET_SSE4_1 && <mask_avx512vl_condition>" "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" - [(set_attr "type" "ssemov") + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") (set_attr "prefix_extra" "1") - (set_attr "prefix" "maybe_vex") + (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "TI")]) ;; ptestps/ptestpd are very similar to comiss and ucomiss when Jakub