On Thu, Jan 30, 2020 at 1:18 AM Jakub Jelinek <ja...@redhat.com> wrote: > > Hi! > > Like any other instruction with 32-bit GPR destination operand in 64-bit > mode, popcntl also clears the upper 32 bits of the register (and other bits > too, it can return only 0 to 32 inclusive). > > During combine, the zero or sign extensions of it show up as paradoxical > subreg of the popcount & 63, there 63 is the smallest power of two - 1 mask > that can represent all the 0 to 32 inclusive values. > > Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for > trunk? > > 2020-01-30 Jakub Jelinek <ja...@redhat.com> > > PR target/91824 > * config/i386/i386.md (*popcountsi2_zext): New define_insn_and_split. > (*popcountsi2_zext_falsedep): New define_insn. > > * gcc.target/i386/pr91824-1.c: New test.
OK. Thanks, Uros. > --- gcc/config/i386/i386.md.jj 2020-01-29 09:35:05.786248027 +0100 > +++ gcc/config/i386/i386.md 2020-01-29 16:18:09.924717021 +0100 > @@ -14563,6 +14563,60 @@ (define_insn "*popcount<mode>2_falsedep" > (set_attr "type" "bitmanip") > (set_attr "mode" "<MODE>")]) > > +(define_insn_and_split "*popcountsi2_zext" > + [(set (match_operand:DI 0 "register_operand" "=r") > + (and:DI > + (subreg:DI > + (popcount:SI > + (match_operand:SI 1 "nonimmediate_operand" "rm")) 0) > + (const_int 63))) > + (clobber (reg:CC FLAGS_REG))] > + "TARGET_POPCNT && TARGET_64BIT" > +{ > +#if TARGET_MACHO > + return "popcnt\t{%1, %k0|%k0, %1}"; > +#else > + return "popcnt{l}\t{%1, %k0|%k0, %1}"; > +#endif > +} > + "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed > + && optimize_function_for_speed_p (cfun) > + && !reg_mentioned_p (operands[0], operands[1])" > + [(parallel > + [(set (match_dup 0) > + (and:DI (subreg:DI (popcount:SI (match_dup 1)) 0) (const_int 63))) > + (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP) > + (clobber (reg:CC FLAGS_REG))])] > + "ix86_expand_clear (operands[0]);" > + [(set_attr "prefix_rep" "1") > + (set_attr "type" "bitmanip") > + (set_attr "mode" "SI")]) > + > +; False dependency happens when destination is only updated by tzcnt, > +; lzcnt or popcnt. There is no false dependency when destination is > +; also used in source. > +(define_insn "*popcountsi2_zext_falsedep" > + [(set (match_operand:DI 0 "register_operand" "=r") > + (and:DI > + (subreg:DI > + (popcount:SI > + (match_operand:SI 1 "nonimmediate_operand" "rm")) 0) > + (const_int 63))) > + (unspec [(match_operand:DI 2 "register_operand" "0")] > + UNSPEC_INSN_FALSE_DEP) > + (clobber (reg:CC FLAGS_REG))] > + "TARGET_POPCNT && TARGET_64BIT" > +{ > +#if TARGET_MACHO > + return "popcnt\t{%1, %k0|%k0, %1}"; > +#else > + return "popcnt{l}\t{%1, %k0|%k0, %1}"; > +#endif > +} > + [(set_attr "prefix_rep" "1") > + (set_attr "type" "bitmanip") > + (set_attr "mode" "SI")]) > + > (define_insn_and_split "*popcounthi2_1" > [(set (match_operand:SI 0 "register_operand") > (popcount:SI > --- gcc/testsuite/gcc.target/i386/pr91824-1.c.jj 2020-01-29 > 16:23:13.290186089 +0100 > +++ gcc/testsuite/gcc.target/i386/pr91824-1.c 2020-01-29 16:23:32.095905212 > +0100 > @@ -0,0 +1,54 @@ > +/* PR target/91824 */ > +/* { dg-do compile { target lp64 } } */ > +/* { dg-options "-O2 -mpopcnt" } */ > +/* { dg-final { scan-assembler-not "cltq" } } */ > + > +unsigned int foo (void); > + > +unsigned long > +f1 (unsigned int x) > +{ > + return __builtin_popcount (x); > +} > + > +unsigned long > +f2 (unsigned int x) > +{ > + return (unsigned) __builtin_popcount (x); > +} > + > +unsigned long > +f3 (unsigned int x) > +{ > + return __builtin_popcount (x) & 63ULL; > +} > + > +unsigned long > +f4 (unsigned int x) > +{ > + return __builtin_popcount (x) & 1023ULL; > +} > + > +unsigned long > +f5 (void) > +{ > + return __builtin_popcount (foo ()); > +} > + > +unsigned long > +f6 (void) > +{ > + return (unsigned) __builtin_popcount (foo ()); > +} > + > +unsigned long > +f7 (void) > +{ > + return __builtin_popcount (foo ()) & 63ULL; > +} > + > +unsigned long > +f8 (void) > +{ > + return __builtin_popcount (foo ()) & 1023ULL; > +} > > Jakub >