On Sat, Jul 31, 2021 at 12:38 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > On Fri, Jul 30, 2021 at 6:27 AM Jakub Jelinek via Gcc-patches > <gcc-patches@gcc.gnu.org> wrote: > > > > On Fri, Jul 30, 2021 at 12:27:39PM +0200, Uros Bizjak wrote: > > > Please put some space here, e.g.: > > ... > > > Can you just name the relevant insn pattern and use > > > > > > emit_insn (gen_bsr_1)? > > > > Here is the updated patch. I'll bootstrap/regtest it tonight. > > > > 2021-07-30 Jakub Jelinek <ja...@redhat.com> > > > > PR target/78103 > > * config/i386/i386.md (bsr_rex64_1, bsr_1, bsr_zext_1): New > > define_insn patterns. > > (*bsr_rex64_2, *bsr_2): New define_insn_and_split patterns. > > Add combine splitters for constant - clz. > > (clz<mode>2): Use a temporary pseudo for bsr result. > > > > * gcc.target/i386/pr78103-1.c: New test. > > * gcc.target/i386/pr78103-2.c: New test. > > * gcc.target/i386/pr78103-3.c: New test. > > > > --- gcc/config/i386/i386.md.jj 2021-07-28 12:05:56.857977764 +0200 > > +++ gcc/config/i386/i386.md 2021-07-30 15:13:49.994946550 +0200 > > @@ -14761,6 +14761,18 @@ (define_insn "bsr_rex64" > > (set_attr "znver1_decode" "vector") > > (set_attr "mode" "DI")]) > > > > +(define_insn "bsr_rex64_1" > > + [(set (match_operand:DI 0 "register_operand" "=r") > > + (minus:DI (const_int 63) > > + (clz:DI (match_operand:DI 1 "nonimmediate_operand" > > "rm")))) > > + (clobber (reg:CC FLAGS_REG))] > > + "!TARGET_LZCNT && TARGET_64BIT" > > + "bsr{q}\t{%1, %0|%0, %1}" > > + [(set_attr "type" "alu1") > > + (set_attr "prefix_0f" "1") > > + (set_attr "znver1_decode" "vector") > > + (set_attr "mode" "DI")]) > > + > > (define_insn "bsr" > > [(set (reg:CCZ FLAGS_REG) > > (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm") > > @@ -14775,17 +14787,204 @@ (define_insn "bsr" > > (set_attr "znver1_decode" "vector") > > (set_attr "mode" "SI")]) > > > > +(define_insn "bsr_1" > > + [(set (match_operand:SI 0 "register_operand" "=r") > > + (minus:SI (const_int 31) > > + (clz:SI (match_operand:SI 1 "nonimmediate_operand" > > "rm")))) > > + (clobber (reg:CC FLAGS_REG))] > > + "!TARGET_LZCNT" > > + "bsr{l}\t{%1, %0|%0, %1}" > > + [(set_attr "type" "alu1") > > + (set_attr "prefix_0f" "1") > > + (set_attr "znver1_decode" "vector") > > + (set_attr "mode" "SI")]) > > + > > +(define_insn "bsr_zext_1" > > + [(set (match_operand:DI 0 "register_operand" "=r") > > + (zero_extend:DI > > + (minus:SI > > + (const_int 31) > > + (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))) > > + (clobber (reg:CC FLAGS_REG))] > > + "!TARGET_LZCNT && TARGET_64BIT" > > + "bsr{l}\t{%1, %k0|%k0, %1}" > > + [(set_attr "type" "alu1") > > + (set_attr "prefix_0f" "1") > > + (set_attr "znver1_decode" "vector") > > + (set_attr "mode" "SI")]) > > + > > +; As bsr is undefined behavior on zero and for other input > > +; values it is in range 0 to 63, we can optimize away sign-extends. > > +(define_insn_and_split "*bsr_rex64_2" > > + [(set (match_operand:DI 0 "register_operand") > > + (xor:DI > > + (sign_extend:DI > > + (minus:SI > > + (const_int 63) > > + (subreg:SI (clz:DI (match_operand:DI 1 > > "nonimmediate_operand")) > > + 0))) > > + (const_int 63))) > > + (clobber (reg:CC FLAGS_REG))] > > + "!TARGET_LZCNT && TARGET_64BIT && ix86_pre_reload_split ()" > > + "#" > > + "&& 1" > > + [(parallel [(set (reg:CCZ FLAGS_REG) > > + (compare:CCZ (match_dup 1) (const_int 0))) > > + (set (match_dup 2) > > + (minus:DI (const_int 63) (clz:DI (match_dup 1))))]) > > + (parallel [(set (match_dup 0) > > + (zero_extend:DI (xor:SI (match_dup 3) (const_int 63)))) > > + (clobber (reg:CC FLAGS_REG))])] > > +{ > > + operands[2] = gen_reg_rtx (DImode); > > + operands[3] = lowpart_subreg (SImode, operands[2], DImode); > > +}) > > + > > +(define_insn_and_split "*bsr_2" > > + [(set (match_operand:DI 0 "register_operand") > > + (sign_extend:DI > > + (xor:SI > > + (minus:SI > > + (const_int 31) > > + (clz:SI (match_operand:SI 1 "nonimmediate_operand"))) > > + (const_int 31)))) > > + (clobber (reg:CC FLAGS_REG))] > > + "!TARGET_LZCNT && TARGET_64BIT && ix86_pre_reload_split ()" > > + "#" > > + "&& 1" > > + [(parallel [(set (reg:CCZ FLAGS_REG) > > + (compare:CCZ (match_dup 1) (const_int 0))) > > + (set (match_dup 2) > > + (minus:SI (const_int 31) (clz:SI (match_dup 1))))]) > > + (parallel [(set (match_dup 0) > > + (zero_extend:DI (xor:SI (match_dup 2) (const_int 31)))) > > + (clobber (reg:CC FLAGS_REG))])] > > + "operands[2] = gen_reg_rtx (SImode);") > > + > > +; Splitters to optimize 64 - __builtin_clzl (x) or 32 - __builtin_clz (x). > > +; Again, as for !TARGET_LZCNT CLZ is UB at zero, CLZ is guaranteed to be > > +; in [0, 63] or [0, 31] range. > > +(define_split > > + [(set (match_operand:SI 0 "register_operand") > > + (minus:SI > > + (match_operand:SI 2 "const_int_operand") > > + (xor:SI > > + (minus:SI (const_int 63) > > + (subreg:SI > > + (clz:DI (match_operand:DI 1 "nonimmediate_operand")) > > + 0)) > > + (const_int 63))))] > > + "!TARGET_LZCNT && TARGET_64BIT && ix86_pre_reload_split ()" > > + [(set (match_dup 3) > > + (minus:DI (const_int 63) (clz:DI (match_dup 1)))) > > + (set (match_dup 0) > > + (plus:SI (match_dup 5) (match_dup 4)))] > > +{ > > + operands[3] = gen_reg_rtx (DImode); > > + operands[5] = lowpart_subreg (SImode, operands[3], DImode); > > + if (INTVAL (operands[2]) == 63) > > + { > > + emit_insn (gen_bsr_rex64_1 (operands[3], operands[1])); > > + emit_move_insn (operands[0], operands[5]); > > + DONE; > > + } > > + operands[4] = gen_int_mode (UINTVAL (operands[2]) - 63, SImode); > > +}) > > + > > +(define_split > > + [(set (match_operand:SI 0 "register_operand") > > + (minus:SI > > + (match_operand:SI 2 "const_int_operand") > > + (xor:SI > > + (minus:SI (const_int 31) > > + (clz:SI (match_operand:SI 1 "nonimmediate_operand"))) > > + (const_int 31))))] > > + "!TARGET_LZCNT && ix86_pre_reload_split ()" > > + [(set (match_dup 3) > > + (minus:SI (const_int 31) (clz:SI (match_dup 1)))) > > + (set (match_dup 0) > > + (plus:SI (match_dup 3) (match_dup 4)))] > > +{ > > + if (INTVAL (operands[2]) == 31) > > + { > > + emit_insn (gen_bsr_1 (operands[0], operands[1])); > > + DONE; > > + } > > + operands[3] = gen_reg_rtx (SImode); > > + operands[4] = gen_int_mode (UINTVAL (operands[2]) - 31, SImode); > > +}) > > + > > +(define_split > > + [(set (match_operand:DI 0 "register_operand") > > + (minus:DI > > + (match_operand:DI 2 "const_int_operand") > > + (xor:DI > > + (sign_extend:DI > > + (minus:SI (const_int 63) > > + (subreg:SI > > + (clz:DI (match_operand:DI 1 > > "nonimmediate_operand")) > > + 0))) > > + (const_int 63))))] > > + "!TARGET_LZCNT > > + && TARGET_64BIT > > + && ix86_pre_reload_split () > > + && ((unsigned HOST_WIDE_INT) > > + trunc_int_for_mode (UINTVAL (operands[2]) - 63, SImode) > > + == UINTVAL (operands[2]) - 63)" > > + [(set (match_dup 3) > > + (minus:DI (const_int 63) (clz:DI (match_dup 1)))) > > + (set (match_dup 0) > > + (plus:DI (match_dup 3) (match_dup 4)))] > > +{ > > + if (INTVAL (operands[2]) == 63) > > + { > > + emit_insn (gen_bsr_rex64_1 (operands[0], operands[1])); > > + DONE; > > + } > > + operands[3] = gen_reg_rtx (DImode); > > + operands[4] = GEN_INT (UINTVAL (operands[2]) - 63); > > +}) > > + > > +(define_split > > + [(set (match_operand:DI 0 "register_operand") > > + (minus:DI > > + (match_operand:DI 2 "const_int_operand") > > + (sign_extend:DI > > + (xor:SI > > + (minus:SI (const_int 31) > > + (clz:SI (match_operand:SI 1 > > "nonimmediate_operand"))) > > + (const_int 31)))))] > > + "!TARGET_LZCNT > > + && TARGET_64BIT > > + && ix86_pre_reload_split () > > + && ((unsigned HOST_WIDE_INT) > > + trunc_int_for_mode (UINTVAL (operands[2]) - 31, SImode) > > + == UINTVAL (operands[2]) - 31)" > > + [(set (match_dup 3) > > + (zero_extend:DI (minus:SI (const_int 31) (clz:SI (match_dup 1))))) > > + (set (match_dup 0) > > + (plus:DI (match_dup 3) (match_dup 4)))] > > +{ > > + if (INTVAL (operands[2]) == 31) > > + { > > + emit_insn (gen_bsr_zext_1 (operands[0], operands[1])); > > + DONE; > > + } > > + operands[3] = gen_reg_rtx (DImode); > > + operands[4] = GEN_INT (UINTVAL (operands[2]) - 31); > > +}) > > + > > (define_expand "clz<mode>2" > > [(parallel > > [(set (reg:CCZ FLAGS_REG) > > (compare:CCZ (match_operand:SWI48 1 "nonimmediate_operand" "rm") > > (const_int 0))) > > - (set (match_operand:SWI48 0 "register_operand") > > - (minus:SWI48 > > - (match_dup 2) > > - (clz:SWI48 (match_dup 1))))]) > > + (set (match_dup 3) (minus:SWI48 > > + (match_dup 2) > > + (clz:SWI48 (match_dup 1))))]) > > (parallel > > - [(set (match_dup 0) (xor:SWI48 (match_dup 0) (match_dup 2))) > > + [(set (match_operand:SWI48 0 "register_operand") > > + (xor:SWI48 (match_dup 3) (match_dup 2))) > > (clobber (reg:CC FLAGS_REG))])] > > "" > > { > > @@ -14795,6 +14994,7 @@ (define_expand "clz<mode>2" > > DONE; > > } > > operands[2] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1); > > + operands[3] = gen_reg_rtx (<MODE>mode); > > }) > > > > (define_insn_and_split "clz<mode>2_lzcnt" > > --- gcc/testsuite/gcc.target/i386/pr78103-1.c.jj 2021-07-30 > > 15:07:26.104139537 +0200 > > +++ gcc/testsuite/gcc.target/i386/pr78103-1.c 2021-07-30 > > 15:07:26.104139537 +0200 > > @@ -0,0 +1,28 @@ > > +/* PR target/78103 */ > > +/* { dg-do compile { target { ! ia32 } } } */ > > +/* { dg-options "-O2 -mno-lzcnt" } */ > > +/* { dg-final { scan-assembler-not {\mcltq\M} } } */ > > + > > +long long > > +foo (long long x) > > +{ > > + return __builtin_clzll (x); > > +} > > + > > +long long > > +bar (long long x) > > +{ > > + return (unsigned int) __builtin_clzll (x); > > +} > > + > > +long long > > +baz (int x) > > +{ > > + return __builtin_clz (x); > > +} > > + > > +long long > > +qux (int x) > > +{ > > + return (unsigned int) __builtin_clz (x); > > +} > > --- gcc/testsuite/gcc.target/i386/pr78103-2.c.jj 2021-07-30 > > 15:07:26.104139537 +0200 > > +++ gcc/testsuite/gcc.target/i386/pr78103-2.c 2021-07-30 > > 15:07:26.104139537 +0200 > > @@ -0,0 +1,33 @@ > > +/* PR target/78103 */ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2 -mno-lzcnt" } */ > > +/* { dg-final { scan-assembler-not {\mmovl\M} } } */ > > +/* { dg-final { scan-assembler-not {\mxor[lq]\M} } } */ > > +/* { dg-final { scan-assembler-not {\msubl\M} } } */ > > +/* { dg-final { scan-assembler {\m(leal|addl)\M} } } */ > ^^^^^^^^ It > should also allow incl, like "incl %eax" for -m32.
Like diff --git a/gcc/testsuite/gcc.target/i386/pr78103-2.c b/gcc/testsuite/gcc.target/i386/pr78103-2.c index b3523382926..30f7f98f60a 100644 --- a/gcc/testsuite/gcc.target/i386/pr78103-2.c +++ b/gcc/testsuite/gcc.target/i386/pr78103-2.c @@ -4,7 +4,7 @@ /* { dg-final { scan-assembler-not {\mmovl\M} } } */ /* { dg-final { scan-assembler-not {\mxor[lq]\M} } } */ /* { dg-final { scan-assembler-not {\msubl\M} } } */ -/* { dg-final { scan-assembler {\m(leal|addl)\M} } } */ +/* { dg-final { scan-assembler {\m(leal|addl|incl)\M} } } */ unsigned int foo (unsigned int x) -- H.J.