On Sat, Jul 31, 2021 at 12:38 PM H.J. Lu <hjl.to...@gmail.com> wrote:
>
> On Fri, Jul 30, 2021 at 6:27 AM Jakub Jelinek via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > On Fri, Jul 30, 2021 at 12:27:39PM +0200, Uros Bizjak wrote:
> > > Please put some space here, e.g.:
> > ...
> > > Can you just name the relevant insn pattern and use
> > >
> > > emit_insn (gen_bsr_1)?
> >
> > Here is the updated patch.  I'll bootstrap/regtest it tonight.
> >
> > 2021-07-30  Jakub Jelinek  <ja...@redhat.com>
> >
> >         PR target/78103
> >         * config/i386/i386.md (bsr_rex64_1, bsr_1, bsr_zext_1): New
> >         define_insn patterns.
> >         (*bsr_rex64_2, *bsr_2): New define_insn_and_split patterns.
> >         Add combine splitters for constant - clz.
> >         (clz<mode>2): Use a temporary pseudo for bsr result.
> >
> >         * gcc.target/i386/pr78103-1.c: New test.
> >         * gcc.target/i386/pr78103-2.c: New test.
> >         * gcc.target/i386/pr78103-3.c: New test.
> >
> > --- gcc/config/i386/i386.md.jj  2021-07-28 12:05:56.857977764 +0200
> > +++ gcc/config/i386/i386.md     2021-07-30 15:13:49.994946550 +0200
> > @@ -14761,6 +14761,18 @@ (define_insn "bsr_rex64"
> >     (set_attr "znver1_decode" "vector")
> >     (set_attr "mode" "DI")])
> >
> > +(define_insn "bsr_rex64_1"
> > +  [(set (match_operand:DI 0 "register_operand" "=r")
> > +       (minus:DI (const_int 63)
> > +                 (clz:DI (match_operand:DI 1 "nonimmediate_operand" 
> > "rm"))))
> > +   (clobber (reg:CC FLAGS_REG))]
> > +  "!TARGET_LZCNT && TARGET_64BIT"
> > +  "bsr{q}\t{%1, %0|%0, %1}"
> > +  [(set_attr "type" "alu1")
> > +   (set_attr "prefix_0f" "1")
> > +   (set_attr "znver1_decode" "vector")
> > +   (set_attr "mode" "DI")])
> > +
> >  (define_insn "bsr"
> >    [(set (reg:CCZ FLAGS_REG)
> >         (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm")
> > @@ -14775,17 +14787,204 @@ (define_insn "bsr"
> >     (set_attr "znver1_decode" "vector")
> >     (set_attr "mode" "SI")])
> >
> > +(define_insn "bsr_1"
> > +  [(set (match_operand:SI 0 "register_operand" "=r")
> > +       (minus:SI (const_int 31)
> > +                 (clz:SI (match_operand:SI 1 "nonimmediate_operand" 
> > "rm"))))
> > +   (clobber (reg:CC FLAGS_REG))]
> > +  "!TARGET_LZCNT"
> > +  "bsr{l}\t{%1, %0|%0, %1}"
> > +  [(set_attr "type" "alu1")
> > +   (set_attr "prefix_0f" "1")
> > +   (set_attr "znver1_decode" "vector")
> > +   (set_attr "mode" "SI")])
> > +
> > +(define_insn "bsr_zext_1"
> > +  [(set (match_operand:DI 0 "register_operand" "=r")
> > +       (zero_extend:DI
> > +         (minus:SI
> > +           (const_int 31)
> > +           (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))))
> > +   (clobber (reg:CC FLAGS_REG))]
> > +  "!TARGET_LZCNT && TARGET_64BIT"
> > +  "bsr{l}\t{%1, %k0|%k0, %1}"
> > +  [(set_attr "type" "alu1")
> > +   (set_attr "prefix_0f" "1")
> > +   (set_attr "znver1_decode" "vector")
> > +   (set_attr "mode" "SI")])
> > +
> > +; As bsr is undefined behavior on zero and for other input
> > +; values it is in range 0 to 63, we can optimize away sign-extends.
> > +(define_insn_and_split "*bsr_rex64_2"
> > +  [(set (match_operand:DI 0 "register_operand")
> > +       (xor:DI
> > +         (sign_extend:DI
> > +           (minus:SI
> > +             (const_int 63)
> > +             (subreg:SI (clz:DI (match_operand:DI 1 
> > "nonimmediate_operand"))
> > +                        0)))
> > +         (const_int 63)))
> > +    (clobber (reg:CC FLAGS_REG))]
> > +  "!TARGET_LZCNT && TARGET_64BIT && ix86_pre_reload_split ()"
> > +  "#"
> > +  "&& 1"
> > +  [(parallel [(set (reg:CCZ FLAGS_REG)
> > +                  (compare:CCZ (match_dup 1) (const_int 0)))
> > +             (set (match_dup 2)
> > +                  (minus:DI (const_int 63) (clz:DI (match_dup 1))))])
> > +   (parallel [(set (match_dup 0)
> > +                  (zero_extend:DI (xor:SI (match_dup 3) (const_int 63))))
> > +             (clobber (reg:CC FLAGS_REG))])]
> > +{
> > +  operands[2] = gen_reg_rtx (DImode);
> > +  operands[3] = lowpart_subreg (SImode, operands[2], DImode);
> > +})
> > +
> > +(define_insn_and_split "*bsr_2"
> > +  [(set (match_operand:DI 0 "register_operand")
> > +       (sign_extend:DI
> > +         (xor:SI
> > +           (minus:SI
> > +             (const_int 31)
> > +             (clz:SI (match_operand:SI 1 "nonimmediate_operand")))
> > +           (const_int 31))))
> > +   (clobber (reg:CC FLAGS_REG))]
> > +  "!TARGET_LZCNT && TARGET_64BIT && ix86_pre_reload_split ()"
> > +  "#"
> > +  "&& 1"
> > +  [(parallel [(set (reg:CCZ FLAGS_REG)
> > +                  (compare:CCZ (match_dup 1) (const_int 0)))
> > +             (set (match_dup 2)
> > +                  (minus:SI (const_int 31) (clz:SI (match_dup 1))))])
> > +   (parallel [(set (match_dup 0)
> > +                  (zero_extend:DI (xor:SI (match_dup 2) (const_int 31))))
> > +             (clobber (reg:CC FLAGS_REG))])]
> > +  "operands[2] = gen_reg_rtx (SImode);")
> > +
> > +; Splitters to optimize 64 - __builtin_clzl (x) or 32 - __builtin_clz (x).
> > +; Again, as for !TARGET_LZCNT CLZ is UB at zero, CLZ is guaranteed to be
> > +; in [0, 63] or [0, 31] range.
> > +(define_split
> > +  [(set (match_operand:SI 0 "register_operand")
> > +       (minus:SI
> > +         (match_operand:SI 2 "const_int_operand")
> > +         (xor:SI
> > +           (minus:SI (const_int 63)
> > +                     (subreg:SI
> > +                       (clz:DI (match_operand:DI 1 "nonimmediate_operand"))
> > +                       0))
> > +           (const_int 63))))]
> > +  "!TARGET_LZCNT && TARGET_64BIT && ix86_pre_reload_split ()"
> > +  [(set (match_dup 3)
> > +       (minus:DI (const_int 63) (clz:DI (match_dup 1))))
> > +   (set (match_dup 0)
> > +       (plus:SI (match_dup 5) (match_dup 4)))]
> > +{
> > +  operands[3] = gen_reg_rtx (DImode);
> > +  operands[5] = lowpart_subreg (SImode, operands[3], DImode);
> > +  if (INTVAL (operands[2]) == 63)
> > +    {
> > +      emit_insn (gen_bsr_rex64_1 (operands[3], operands[1]));
> > +      emit_move_insn (operands[0], operands[5]);
> > +      DONE;
> > +    }
> > +  operands[4] = gen_int_mode (UINTVAL (operands[2]) - 63, SImode);
> > +})
> > +
> > +(define_split
> > +  [(set (match_operand:SI 0 "register_operand")
> > +       (minus:SI
> > +         (match_operand:SI 2 "const_int_operand")
> > +         (xor:SI
> > +           (minus:SI (const_int 31)
> > +                     (clz:SI (match_operand:SI 1 "nonimmediate_operand")))
> > +           (const_int 31))))]
> > +  "!TARGET_LZCNT && ix86_pre_reload_split ()"
> > +  [(set (match_dup 3)
> > +       (minus:SI (const_int 31) (clz:SI (match_dup 1))))
> > +   (set (match_dup 0)
> > +       (plus:SI (match_dup 3) (match_dup 4)))]
> > +{
> > +  if (INTVAL (operands[2]) == 31)
> > +    {
> > +      emit_insn (gen_bsr_1 (operands[0], operands[1]));
> > +      DONE;
> > +    }
> > +  operands[3] = gen_reg_rtx (SImode);
> > +  operands[4] = gen_int_mode (UINTVAL (operands[2]) - 31, SImode);
> > +})
> > +
> > +(define_split
> > +  [(set (match_operand:DI 0 "register_operand")
> > +       (minus:DI
> > +         (match_operand:DI 2 "const_int_operand")
> > +         (xor:DI
> > +           (sign_extend:DI
> > +             (minus:SI (const_int 63)
> > +                       (subreg:SI
> > +                         (clz:DI (match_operand:DI 1 
> > "nonimmediate_operand"))
> > +                         0)))
> > +           (const_int 63))))]
> > +  "!TARGET_LZCNT
> > +   && TARGET_64BIT
> > +   && ix86_pre_reload_split ()
> > +   && ((unsigned HOST_WIDE_INT)
> > +       trunc_int_for_mode (UINTVAL (operands[2]) - 63, SImode)
> > +       == UINTVAL (operands[2]) - 63)"
> > +  [(set (match_dup 3)
> > +       (minus:DI (const_int 63) (clz:DI (match_dup 1))))
> > +   (set (match_dup 0)
> > +       (plus:DI (match_dup 3) (match_dup 4)))]
> > +{
> > +  if (INTVAL (operands[2]) == 63)
> > +    {
> > +      emit_insn (gen_bsr_rex64_1 (operands[0], operands[1]));
> > +      DONE;
> > +    }
> > +  operands[3] = gen_reg_rtx (DImode);
> > +  operands[4] = GEN_INT (UINTVAL (operands[2]) - 63);
> > +})
> > +
> > +(define_split
> > +  [(set (match_operand:DI 0 "register_operand")
> > +       (minus:DI
> > +         (match_operand:DI 2 "const_int_operand")
> > +         (sign_extend:DI
> > +           (xor:SI
> > +             (minus:SI (const_int 31)
> > +                       (clz:SI (match_operand:SI 1 
> > "nonimmediate_operand")))
> > +             (const_int 31)))))]
> > +  "!TARGET_LZCNT
> > +   && TARGET_64BIT
> > +   && ix86_pre_reload_split ()
> > +   && ((unsigned HOST_WIDE_INT)
> > +       trunc_int_for_mode (UINTVAL (operands[2]) - 31, SImode)
> > +       == UINTVAL (operands[2]) - 31)"
> > +  [(set (match_dup 3)
> > +       (zero_extend:DI (minus:SI (const_int 31) (clz:SI (match_dup 1)))))
> > +   (set (match_dup 0)
> > +       (plus:DI (match_dup 3) (match_dup 4)))]
> > +{
> > +  if (INTVAL (operands[2]) == 31)
> > +    {
> > +      emit_insn (gen_bsr_zext_1 (operands[0], operands[1]));
> > +      DONE;
> > +    }
> > +  operands[3] = gen_reg_rtx (DImode);
> > +  operands[4] = GEN_INT (UINTVAL (operands[2]) - 31);
> > +})
> > +
> >  (define_expand "clz<mode>2"
> >    [(parallel
> >       [(set (reg:CCZ FLAGS_REG)
> >         (compare:CCZ (match_operand:SWI48 1 "nonimmediate_operand" "rm")
> >                      (const_int 0)))
> > -      (set (match_operand:SWI48 0 "register_operand")
> > -          (minus:SWI48
> > -            (match_dup 2)
> > -            (clz:SWI48 (match_dup 1))))])
> > +      (set (match_dup 3) (minus:SWI48
> > +                          (match_dup 2)
> > +                          (clz:SWI48 (match_dup 1))))])
> >     (parallel
> > -     [(set (match_dup 0) (xor:SWI48 (match_dup 0) (match_dup 2)))
> > +     [(set (match_operand:SWI48 0 "register_operand")
> > +          (xor:SWI48 (match_dup 3) (match_dup 2)))
> >        (clobber (reg:CC FLAGS_REG))])]
> >    ""
> >  {
> > @@ -14795,6 +14994,7 @@ (define_expand "clz<mode>2"
> >        DONE;
> >      }
> >    operands[2] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
> > +  operands[3] = gen_reg_rtx (<MODE>mode);
> >  })
> >
> >  (define_insn_and_split "clz<mode>2_lzcnt"
> > --- gcc/testsuite/gcc.target/i386/pr78103-1.c.jj        2021-07-30 
> > 15:07:26.104139537 +0200
> > +++ gcc/testsuite/gcc.target/i386/pr78103-1.c   2021-07-30 
> > 15:07:26.104139537 +0200
> > @@ -0,0 +1,28 @@
> > +/* PR target/78103 */
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-options "-O2 -mno-lzcnt" } */
> > +/* { dg-final { scan-assembler-not {\mcltq\M} } } */
> > +
> > +long long
> > +foo (long long x)
> > +{
> > +  return __builtin_clzll (x);
> > +}
> > +
> > +long long
> > +bar (long long x)
> > +{
> > +  return (unsigned int) __builtin_clzll (x);
> > +}
> > +
> > +long long
> > +baz (int x)
> > +{
> > +  return __builtin_clz (x);
> > +}
> > +
> > +long long
> > +qux (int x)
> > +{
> > +  return (unsigned int) __builtin_clz (x);
> > +}
> > --- gcc/testsuite/gcc.target/i386/pr78103-2.c.jj        2021-07-30 
> > 15:07:26.104139537 +0200
> > +++ gcc/testsuite/gcc.target/i386/pr78103-2.c   2021-07-30 
> > 15:07:26.104139537 +0200
> > @@ -0,0 +1,33 @@
> > +/* PR target/78103 */
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -mno-lzcnt" } */
> > +/* { dg-final { scan-assembler-not {\mmovl\M} } } */
> > +/* { dg-final { scan-assembler-not {\mxor[lq]\M} } } */
> > +/* { dg-final { scan-assembler-not {\msubl\M} } } */
> > +/* { dg-final { scan-assembler {\m(leal|addl)\M} } } */
>                                                            ^^^^^^^^ It
> should also allow incl, like "incl %eax" for -m32.

Like

diff --git a/gcc/testsuite/gcc.target/i386/pr78103-2.c
b/gcc/testsuite/gcc.target/i386/pr78103-2.c
index b3523382926..30f7f98f60a 100644
--- a/gcc/testsuite/gcc.target/i386/pr78103-2.c
+++ b/gcc/testsuite/gcc.target/i386/pr78103-2.c
@@ -4,7 +4,7 @@
 /* { dg-final { scan-assembler-not {\mmovl\M} } } */
 /* { dg-final { scan-assembler-not {\mxor[lq]\M} } } */
 /* { dg-final { scan-assembler-not {\msubl\M} } } */
-/* { dg-final { scan-assembler {\m(leal|addl)\M} } } */
+/* { dg-final { scan-assembler {\m(leal|addl|incl)\M} } } */

 unsigned int
 foo (unsigned int x)

-- 
H.J.

Reply via email to