On CPUs that implement popcnt[wd] but not cnttz[wd] we can do better for the ctz sequences than we do today.
CL[TZ]_DEFINED_VALUE_AT_ZERO can return 2, since we always return the same fixed value (only dependent on TARGET_* options). I originally tried to have the generic code handle this; that would be too much surgery for stage 3 though. Bootstrapped and tested on powerpc64-linux {-m32,-m64}; also tested manually with {-m32,-m64} -mcpu=power{4,7,9}. Is this okay for trunk? Segher 2016-12-09 Segher Boessenkool <seg...@kernel.crashing.org> PR target/78683 * config/rs6000/rs6000.h (CLZ_DEFINED_VALUE_AT_ZERO): Use GET_MODE_BITSIZE. Return 2. (CTZ_DEFINED_VALUE_AT_ZERO): Use GET_MODE_BITSIZE. Return 2. Handle TARGET_POPCNTD the same as TARGET_CTZ. * config/rs6000/rs6000.md (ctz<mode>2): Reimplement. (ffs<mode>2): Reimplement. --- gcc/config/rs6000/rs6000.h | 11 ++++---- gcc/config/rs6000/rs6000.md | 62 +++++++++++++++++++++++---------------------- 2 files changed, 38 insertions(+), 35 deletions(-) diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 5d56927..fe314bf 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -2199,14 +2199,15 @@ do { \ /* The cntlzw and cntlzd instructions return 32 and 64 for input of zero. */ #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ - ((VALUE) = ((MODE) == SImode ? 32 : 64), 1) + ((VALUE) = GET_MODE_BITSIZE (MODE), 2) /* The CTZ patterns that are implemented in terms of CLZ return -1 for input of - zero. The hardware instructions added in Power9 return 32 or 64. */ + zero. The hardware instructions added in Power9 and the sequences using + popcount return 32 or 64. */ #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ - ((!TARGET_CTZ) \ - ? ((VALUE) = -1, 1) \ - : ((VALUE) = ((MODE) == SImode ? 32 : 64), 1)) + (TARGET_CTZ || TARGET_POPCNTD \ + ? ((VALUE) = GET_MODE_BITSIZE (MODE), 2) \ + : ((VALUE) = -1, 2)) /* Specify the machine mode that pointers have. After generation of rtl, the compiler makes no further distinction diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 4726d73..777b996 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -2220,17 +2220,8 @@ (define_insn "clz<mode>2" [(set_attr "type" "cntlz")]) (define_expand "ctz<mode>2" - [(set (match_dup 2) - (neg:GPR (match_operand:GPR 1 "gpc_reg_operand" ""))) - (set (match_dup 3) - (and:GPR (match_dup 1) - (match_dup 2))) - (set (match_dup 4) - (clz:GPR (match_dup 3))) - (parallel [(set (match_operand:GPR 0 "gpc_reg_operand" "") - (minus:GPR (match_dup 5) - (match_dup 4))) - (clobber (reg:GPR CA_REGNO))])] + [(set (match_operand:GPR 0 "gpc_reg_operand") + (ctz:GPR (match_operand:GPR 1 "gpc_reg_operand")))] "" { if (TARGET_CTZ) @@ -2239,10 +2230,26 @@ (define_expand "ctz<mode>2" DONE; } - operands[2] = gen_reg_rtx (<MODE>mode); - operands[3] = gen_reg_rtx (<MODE>mode); - operands[4] = gen_reg_rtx (<MODE>mode); - operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - 1); + rtx tmp1 = gen_reg_rtx (<MODE>mode); + rtx tmp2 = gen_reg_rtx (<MODE>mode); + rtx tmp3 = gen_reg_rtx (<MODE>mode); + + if (TARGET_POPCNTD) + { + emit_insn (gen_add<mode>3 (tmp1, operands[1], constm1_rtx)); + emit_insn (gen_one_cmpl<mode>2 (tmp2, operands[1])); + emit_insn (gen_and<mode>3 (tmp3, tmp1, tmp2)); + emit_insn (gen_popcntd<mode>2 (operands[0], tmp3)); + } + else + { + emit_insn (gen_neg<mode>2 (tmp1, operands[1])); + emit_insn (gen_and<mode>3 (tmp2, operands[1], tmp1)); + emit_insn (gen_clz<mode>2 (tmp3, tmp2)); + emit_insn (gen_sub<mode>3 (operands[0], GEN_INT (<bits> - 1), tmp3)); + } + + DONE; }) (define_insn "ctz<mode>2_hw" @@ -2253,23 +2260,18 @@ (define_insn "ctz<mode>2_hw" [(set_attr "type" "cntlz")]) (define_expand "ffs<mode>2" - [(set (match_dup 2) - (neg:GPR (match_operand:GPR 1 "gpc_reg_operand" ""))) - (set (match_dup 3) - (and:GPR (match_dup 1) - (match_dup 2))) - (set (match_dup 4) - (clz:GPR (match_dup 3))) - (parallel [(set (match_operand:GPR 0 "gpc_reg_operand" "") - (minus:GPR (match_dup 5) - (match_dup 4))) - (clobber (reg:GPR CA_REGNO))])] + [(set (match_operand:GPR 0 "gpc_reg_operand") + (ffs:GPR (match_operand:GPR 1 "gpc_reg_operand")))] "" { - operands[2] = gen_reg_rtx (<MODE>mode); - operands[3] = gen_reg_rtx (<MODE>mode); - operands[4] = gen_reg_rtx (<MODE>mode); - operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)); + rtx tmp1 = gen_reg_rtx (<MODE>mode); + rtx tmp2 = gen_reg_rtx (<MODE>mode); + rtx tmp3 = gen_reg_rtx (<MODE>mode); + emit_insn (gen_neg<mode>2 (tmp1, operands[1])); + emit_insn (gen_and<mode>3 (tmp2, operands[1], tmp1)); + emit_insn (gen_clz<mode>2 (tmp3, tmp2)); + emit_insn (gen_sub<mode>3 (operands[0], GEN_INT (<bits>), tmp3)); + DONE; }) -- 1.9.3