Hi! The ffs expanders on several targets (x86, ia64, aarch64 at least) emit a conditional move or similar code to handle the case when the argument is 0, which makes the code longer. If we know from VRP that the argument will not be zero, we can (if the target has also an ctz expander) just use ctz which is undefined at zero and thus the expander doesn't need to deal with that.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2020-05-06 Jakub Jelinek <ja...@redhat.com> PR tree-optimization/94956 * match.pd (FFS): Optimize __builtin_ffs* of non-zero argument into __builtin_ctz* + 1 if ctz_optab has an expander. * gcc.target/i386/pr94956.c: New test. --- gcc/match.pd.jj 2020-05-06 15:03:51.618058839 +0200 +++ gcc/match.pd 2020-05-06 15:48:23.658858289 +0200 @@ -5990,6 +5990,16 @@ (define_operator_list COND_TERNARY (convert (IFN_POPCOUNT:type @0))))) #endif +/* __builtin_ffs needs to deal on many targets with the possible zero + argument. If we know the argument is always non-zero, __builtin_ctz + 1 + should lead to better code. */ +(simplify + (FFS tree_expr_nonzero_p@0) + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) + && (optab_handler (ctz_optab, TYPE_MODE (TREE_TYPE (@0))) + != CODE_FOR_nothing)) + (plus (CTZ:type @0) { build_one_cst (type); }))) + /* Simplify: a = a1 op a2 --- gcc/testsuite/gcc.target/i386/pr94956.c.jj 2020-05-06 16:35:47.085876237 +0200 +++ gcc/testsuite/gcc.target/i386/pr94956.c 2020-05-06 16:39:52.927140038 +0200 @@ -0,0 +1,28 @@ +/* PR tree-optimization/94956 */ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-not "\tcmovne\t" } } */ +/* { dg-final { scan-assembler-not "\tsete\t" } } */ + +int +foo (unsigned x) +{ + if (x == 0) __builtin_unreachable (); + return __builtin_ffs (x) - 1; +} + +int +bar (unsigned long x) +{ + if (x == 0) __builtin_unreachable (); + return __builtin_ffsl (x) - 1; +} + +#ifdef __x86_64__ +int +baz (unsigned long long x) +{ + if (x == 0) __builtin_unreachable (); + return __builtin_ffsll (x) - 1; +} +#endif Jakub