On Tue, Nov 24, 2020 at 9:20 AM Jakub Jelinek <ja...@redhat.com> wrote: > > Hi! > > As the following testcase shows, unlike char, int or long long sized > __builtin_*_overflow{,_p}, for short sized one in most cases the ce1 pass > doesn't optimize the jo/jno or jc/jnc jumps with setting of a pseudo to 0/1 > into seto/setc. The reason is missing *setcc_hi_1* pattern. The following > patch implements it using mode iterators so that on i486 and pentium? > one can get the zero extension through and instead of movzbw. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2020-11-24 Jakub Jelinek <ja...@redhat.com> > > PR target/97950 > * config/i386/i386.md (*setcc_si_1_and): Macroize into... > (*setcc_<mode>_1_and): New define_insn_and_split with SWI24 iterator. > (*setcc_si_1_movzbl): Macroize into... > (*setcc_<mode>_1_movzbl): New define_insn_and_split with SWI24 > iterator. > > * gcc.target/i386/pr97950.c: New test.
OK. Thanks, Uros. > > --- gcc/config/i386/i386.md.jj 2020-11-23 17:01:48.235055044 +0100 > +++ gcc/config/i386/i386.md 2020-11-23 21:29:43.425842870 +0100 > @@ -12714,9 +12714,9 @@ (define_insn_and_split "*setcc_di_1" > operands[2] = gen_lowpart (QImode, operands[0]); > }) > > -(define_insn_and_split "*setcc_si_1_and" > - [(set (match_operand:SI 0 "register_operand" "=q") > - (match_operator:SI 1 "ix86_comparison_operator" > +(define_insn_and_split "*setcc_<mode>_1_and" > + [(set (match_operand:SWI24 0 "register_operand" "=q") > + (match_operator:SWI24 1 "ix86_comparison_operator" > [(reg FLAGS_REG) (const_int 0)])) > (clobber (reg:CC FLAGS_REG))] > "!TARGET_PARTIAL_REG_STALL > @@ -12724,7 +12724,7 @@ (define_insn_and_split "*setcc_si_1_and" > "#" > "&& reload_completed" > [(set (match_dup 2) (match_dup 1)) > - (parallel [(set (match_dup 0) (zero_extend:SI (match_dup 2))) > + (parallel [(set (match_dup 0) (zero_extend:SWI24 (match_dup 2))) > (clobber (reg:CC FLAGS_REG))])] > { > operands[1] = shallow_copy_rtx (operands[1]); > @@ -12732,16 +12732,16 @@ (define_insn_and_split "*setcc_si_1_and" > operands[2] = gen_lowpart (QImode, operands[0]); > }) > > -(define_insn_and_split "*setcc_si_1_movzbl" > - [(set (match_operand:SI 0 "register_operand" "=q") > - (match_operator:SI 1 "ix86_comparison_operator" > +(define_insn_and_split "*setcc_<mode>_1_movzbl" > + [(set (match_operand:SWI24 0 "register_operand" "=q") > + (match_operator:SWI24 1 "ix86_comparison_operator" > [(reg FLAGS_REG) (const_int 0)]))] > "!TARGET_PARTIAL_REG_STALL > && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))" > "#" > "&& reload_completed" > [(set (match_dup 2) (match_dup 1)) > - (set (match_dup 0) (zero_extend:SI (match_dup 2)))] > + (set (match_dup 0) (zero_extend:SWI24 (match_dup 2)))] > { > operands[1] = shallow_copy_rtx (operands[1]); > PUT_MODE (operands[1], QImode); > --- gcc/testsuite/gcc.target/i386/pr97950.c.jj 2020-11-23 17:20:33.481605139 > +0100 > +++ gcc/testsuite/gcc.target/i386/pr97950.c 2020-11-23 21:32:53.593734242 > +0100 > @@ -0,0 +1,153 @@ > +/* PR target/95950 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mtune=generic" } */ > +/* { dg-final { scan-assembler-times "\tseta\t" 4 } } */ > +/* { dg-final { scan-assembler-times "\tseto\t" 16 } } */ > +/* { dg-final { scan-assembler-times "\tsetc\t" 4 } } */ > +/* { dg-final { scan-assembler-not "\tjn?a\t" } } */ > +/* { dg-final { scan-assembler-not "\tjn?o\t" } } */ > +/* { dg-final { scan-assembler-not "\tjn?c\t" } } */ > + > +char > +f1 (short a, short b) > +{ > + return __builtin_mul_overflow_p (a, b, (short) 0); > +} > + > +char > +f2 (short a, short b) > +{ > + return __builtin_add_overflow_p (a, b, (short) 0); > +} > + > +char > +f3 (short a, short b) > +{ > + return __builtin_sub_overflow_p (a, b, (short) 0); > +} > + > +char > +f4 (unsigned short a, unsigned short b) > +{ > + return __builtin_mul_overflow_p (a, b, (unsigned short) 0); > +} > + > +char > +f5 (unsigned short a, unsigned short b) > +{ > + return __builtin_add_overflow_p (a, b, (unsigned short) 0); > +} > + > +char > +f6 (unsigned short a, unsigned short b) > +{ > + return __builtin_sub_overflow_p (a, b, (unsigned short) 0); > +} > + > +char > +f7 (short a, short b) > +{ > + return __builtin_mul_overflow_p (a, b, (short) 0); > +} > + > +char > +f8 (short a, short b) > +{ > + return __builtin_add_overflow_p (a, b, (short) 0); > +} > + > +char > +f9 (short a, short b) > +{ > + return __builtin_sub_overflow_p (a, b, (short) 0); > +} > + > +char > +f10 (unsigned short a, unsigned short b) > +{ > + return __builtin_mul_overflow_p (a, b, (unsigned short) 0); > +} > + > +char > +f11 (unsigned short a, unsigned short b) > +{ > + return __builtin_add_overflow_p (a, b, (unsigned short) 0); > +} > + > +char > +f12 (unsigned short a, unsigned short b) > +{ > + return __builtin_sub_overflow_p (a, b, (unsigned short) 0); > +} > + > +unsigned short > +f13 (short a, short b) > +{ > + return __builtin_mul_overflow_p (a, b, (short) 0); > +} > + > +unsigned short > +f14 (short a, short b) > +{ > + return __builtin_add_overflow_p (a, b, (short) 0); > +} > + > +unsigned short > +f15 (short a, short b) > +{ > + return __builtin_sub_overflow_p (a, b, (short) 0); > +} > + > +unsigned short > +f16 (unsigned short a, unsigned short b) > +{ > + return __builtin_mul_overflow_p (a, b, (unsigned short) 0); > +} > + > +unsigned short > +f17 (unsigned short a, unsigned short b) > +{ > + return __builtin_add_overflow_p (a, b, (unsigned short) 0); > +} > + > +unsigned short > +f18 (unsigned short a, unsigned short b) > +{ > + return __builtin_sub_overflow_p (a, b, (unsigned short) 0); > +} > + > +unsigned short > +f19 (short a, short b) > +{ > + return __builtin_mul_overflow_p (a, b, (short) 0); > +} > + > +unsigned short > +f20 (short a, short b) > +{ > + return __builtin_add_overflow_p (a, b, (short) 0); > +} > + > +unsigned short > +f21 (short a, short b) > +{ > + return __builtin_sub_overflow_p (a, b, (short) 0); > +} > + > +unsigned short > +f22 (unsigned short a, unsigned short b) > +{ > + return __builtin_mul_overflow_p (a, b, (unsigned short) 0); > +} > + > +unsigned short > +f23 (unsigned short a, unsigned short b) > +{ > + return __builtin_add_overflow_p (a, b, (unsigned short) 0); > +} > + > +unsigned short > +f24 (unsigned short a, unsigned short b) > +{ > + return __builtin_sub_overflow_p (a, b, (unsigned short) 0); > +} > > Jakub >