On Wed, Sep 20, 2017 at 8:58 PM, Jakub Jelinek <ja...@redhat.com> wrote: > Hi! > > As mentioned in the PR, in some cases (one or both *movqi_internal > operands %dil/%sil/%bpl/%spl, neither %r*b) the movl variant is smaller > and in cases where movb and movl is the same size, we might as well choose > the faster instruction. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2017-09-20 Jakub Jelinek <ja...@redhat.com> > > PR target/82260 > * config/i386/i386.md (*movqi_internal): Replace =q <- q alternative > with =Q <- Q, =R <- R and =r <- r alternatives, only enable the > latter two for 64-bit, renumber alternatives, for -Os imov =q <- n > alternative always use QI mode, for -Os imov =R <- R alternative > always use SI mode, for imov =Q <- Q or =r <- r alternatives > ignore -Os.
BTW: As a personal preference, I find alternatives marked as (=q,n) easier to read, but I guess we could also live with the above notation. > * gcc.target/i386/pr82260-1.c: New test. > * gcc.target/i386/pr82260-2.c: New test. LGTM, I hope you catched all changes of alternative indexes. Thanks, Uros. > --- gcc/config/i386/i386.md.jj 2017-09-20 10:46:08.000000000 +0200 > +++ gcc/config/i386/i386.md 2017-09-20 15:15:19.365142707 +0200 > @@ -2571,9 +2571,9 @@ (define_insn "*movhi_internal" > > (define_insn "*movqi_internal" > [(set (match_operand:QI 0 "nonimmediate_operand" > - "=q,q ,q ,r,r ,?r,m ,k,k,r,m,k") > + "=Q,R,r,q,q,r,r ,?r,m ,k,k,r,m,k") > (match_operand:QI 1 "general_operand" > - "q ,qn,qm,q,rn,qm,qn,r,k,k,k,m"))] > + "Q ,R,r,n,m,q,rn, m,qn,r,k,k,k,m"))] > "!(MEM_P (operands[0]) && MEM_P (operands[1]))" > { > static char buf[128]; > @@ -2589,17 +2589,17 @@ (define_insn "*movqi_internal" > case TYPE_MSKMOV: > switch (which_alternative) > { > - case 7: > + case 9: > ops = "kmov%s\t{%%k1, %%0|%%0, %%k1}"; > break; > - case 9: > + case 11: > ops = "kmov%s\t{%%1, %%k0|%%k0, %%1}"; > break; > - case 10: > - case 11: > + case 12: > + case 13: > gcc_assert (TARGET_AVX512DQ); > /* FALLTHRU */ > - case 8: > + case 10: > ops = "kmov%s\t{%%1, %%0|%%0, %%1}"; > break; > default: > @@ -2619,51 +2619,67 @@ (define_insn "*movqi_internal" > } > } > [(set (attr "isa") > - (if_then_else (eq_attr "alternative" "10,11") > - (const_string "avx512dq") > - (const_string "*"))) > + (cond [(eq_attr "alternative" "1,2") > + (const_string "x64") > + (eq_attr "alternative" "12,13") > + (const_string "avx512dq") > + ] > + (const_string "*"))) > (set (attr "type") > - (cond [(eq_attr "alternative" "7,8,9,10,11") > + (cond [(eq_attr "alternative" "9,10,11,12,13") > (const_string "mskmov") > - (and (eq_attr "alternative" "5") > + (and (eq_attr "alternative" "7") > (not (match_operand:QI 1 "aligned_operand"))) > (const_string "imovx") > (match_test "optimize_function_for_size_p (cfun)") > (const_string "imov") > - (and (eq_attr "alternative" "3") > + (and (eq_attr "alternative" "5") > (ior (not (match_test "TARGET_PARTIAL_REG_STALL")) > (not (match_test "TARGET_QIMODE_MATH")))) > (const_string "imov") > - (eq_attr "alternative" "3,5") > + (eq_attr "alternative" "5,7") > (const_string "imovx") > (and (match_test "TARGET_MOVX") > - (eq_attr "alternative" "2")) > + (eq_attr "alternative" "4")) > (const_string "imovx") > ] > (const_string "imov"))) > (set (attr "prefix") > - (if_then_else (eq_attr "alternative" "7,8,9") > + (if_then_else (eq_attr "alternative" "9,10,11") > (const_string "vex") > (const_string "orig"))) > (set (attr "mode") > - (cond [(eq_attr "alternative" "3,4,5") > + (cond [(eq_attr "alternative" "5,6,7") > (const_string "SI") > - (eq_attr "alternative" "6") > + (eq_attr "alternative" "8") > (const_string "QI") > - (and (eq_attr "alternative" "7,8,9") > + (and (eq_attr "alternative" "9,10,11") > (not (match_test "TARGET_AVX512DQ"))) > (const_string "HI") > (eq_attr "type" "imovx") > (const_string "SI") > + ;; For -Os, 8-bit immediates are always shorter than 32-bit > + ;; ones. > + (and (eq_attr "type" "imov") > + (and (eq_attr "alternative" "3") > + (match_test "optimize_function_for_size_p (cfun)"))) > + (const_string "QI") > + ;; For -Os, movl where one or both operands are NON_Q_REGS > + ;; and both are LEGACY_REGS is shorter than movb. > + ;; Otherwise movb and movl sizes are the same, so decide purely > + ;; based on speed factors. > + (and (eq_attr "type" "imov") > + (and (eq_attr "alternative" "1") > + (match_test "optimize_function_for_size_p (cfun)"))) > + (const_string "SI") > (and (eq_attr "type" "imov") > - (and (eq_attr "alternative" "0,1") > + (and (eq_attr "alternative" "0,1,2,3") > (and (match_test "TARGET_PARTIAL_REG_DEPENDENCY") > - (and (not (match_test > "optimize_function_for_size_p (cfun)")) > - (not (match_test > "TARGET_PARTIAL_REG_STALL")))))) > + (not (match_test "TARGET_PARTIAL_REG_STALL"))))) > (const_string "SI") > ;; Avoid partial register stalls when not using QImode arithmetic > (and (eq_attr "type" "imov") > - (and (eq_attr "alternative" "0,1") > + (and (eq_attr "alternative" "0,1,2,3") > (and (match_test "TARGET_PARTIAL_REG_STALL") > (not (match_test "TARGET_QIMODE_MATH"))))) > (const_string "SI") > --- gcc/testsuite/gcc.target/i386/pr82260-1.c.jj 2017-09-20 > 15:27:43.696823321 +0200 > +++ gcc/testsuite/gcc.target/i386/pr82260-1.c 2017-09-20 15:34:45.913536355 > +0200 > @@ -0,0 +1,26 @@ > +/* PR target/82260 */ > +/* { dg-do compile { target lp64 } } */ > +/* { dg-options "-Os -mtune=generic -masm=att" } */ > +/* movl %esi, %ecx is shorter than movb %sil, %cl. While > + movl %edx, %ecx is the same size as movb %dl, %cl and > + movl %r8d, %ecx is the same size as movb %r8b, %cl, movl > + is faster on contemporary CPUs. */ > +/* { dg-final { scan-assembler-not {\mmovb\M} } } */ > + > +int > +foo (int x, int c) > +{ > + return x >> c; > +} > + > +int > +bar (int x, int y, int z) > +{ > + return x >> z; > +} > + > +int > +baz (int x, int y, int z, int u, int v) > +{ > + return x >> v; > +} > --- gcc/testsuite/gcc.target/i386/pr82260-2.c.jj 2017-09-20 > 15:30:51.690469279 +0200 > +++ gcc/testsuite/gcc.target/i386/pr82260-2.c 2017-09-20 15:35:31.358967291 > +0200 > @@ -0,0 +1,25 @@ > +/* PR target/82260 */ > +/* { dg-do compile { target lp64 } } */ > +/* { dg-options "-Os -mtune=generic -masm=att > -mtune-ctrl=^partial_reg_dependency" } */ > +/* { dg-final { scan-assembler-not {\mmovb\t%sil, %cl} } } */ > +/* { dg-final { scan-assembler {\mmovl\t%esi, %ecx} } } */ > +/* { dg-final { scan-assembler {\mmovb\t%dl, %cl} } } */ > +/* { dg-final { scan-assembler {\mmovb\t%r8b, %cl} } } */ > + > +int > +foo (int x, int c) > +{ > + return x >> c; > +} > + > +int > +bar (int x, int y, int z) > +{ > + return x >> z; > +} > + > +int > +baz (int x, int y, int z, int u, int v) > +{ > + return x >> v; > +} > > Jakub