Hi! This patch extends the PR65220 patch: 1) there is really no reason to limit the divisor to 32 or 64, we can divide/modulo by pow2 constants up to 2G (0x7fffffff is then still representable in 32-bit signed immediate) 2) on the testcase RTL cprop unfortunately isn't performed, because the function contains only a single basic block, and the combiner when the constant is propagated into it simplifies it to the shift and and; so the patch adds another pattern similar to the previous one to handle this case
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2015-04-08 Jakub Jelinek <ja...@redhat.com> PR target/65693 * config/i386/i386.md (*udivmod<mode>4_pow2): Allow any pow2 integer in between 2 and 0x80000000U inclusive. (*udivmod<mode>4_pow2_1): New define_insn_and_split. * gcc.target/i386/pr65693.c: New test. --- gcc/config/i386/i386.md.jj 2015-04-03 15:32:30.000000000 +0200 +++ gcc/config/i386/i386.md 2015-04-08 17:30:10.615369134 +0200 @@ -7340,7 +7340,7 @@ (define_insn_and_split "*udivmod<mode>4_ (set (match_operand:SWI48 1 "register_operand" "=r") (umod:SWI48 (match_dup 2) (match_dup 3))) (clobber (reg:CC FLAGS_REG))] - "UINTVAL (operands[3]) - 2 < <MODE_SIZE> * BITS_PER_UNIT + "IN_RANGE (INTVAL (operands[3]), 2, HOST_WIDE_INT_UC (0x80000000)) && (UINTVAL (operands[3]) & (UINTVAL (operands[3]) - 1)) == 0" "#" "&& 1" @@ -7357,6 +7357,27 @@ (define_insn_and_split "*udivmod<mode>4_ [(set_attr "type" "multi") (set_attr "mode" "<MODE>")]) +;; Similarly, but for the case when the combiner simplifies it. +(define_insn_and_split "*udivmod<mode>4_pow2_1" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (lshiftrt:SWI48 (match_operand:SWI48 2 "register_operand" "0") + (match_operand:SWI48 3 "const_int_operand" "n"))) + (set (match_operand:SWI48 1 "register_operand" "=r") + (and:SWI48 (match_dup 2) (match_operand:SWI48 4 "const_int_operand" "n"))) + (clobber (reg:CC FLAGS_REG))] + "IN_RANGE (INTVAL (operands[3]), 1, 31) + && UINTVAL (operands[4]) == (HOST_WIDE_INT_1U << INTVAL (operands[3])) - 1" + "#" + "&& 1" + [(set (match_dup 1) (match_dup 2)) + (parallel [(set (match_dup 0) (lshiftrt:<MODE> (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 1) (and:<MODE> (match_dup 1) (match_dup 4))) + (clobber (reg:CC FLAGS_REG))])] + "" + [(set_attr "type" "multi") + (set_attr "mode" "<MODE>")]) + (define_insn "*udivmod<mode>4_noext" [(set (match_operand:SWIM248 0 "register_operand" "=a") (udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0") --- gcc/testsuite/gcc.target/i386/pr65693.c.jj 2015-04-08 17:42:14.146727788 +0200 +++ gcc/testsuite/gcc.target/i386/pr65693.c 2015-04-08 17:41:22.000000000 +0200 @@ -0,0 +1,13 @@ +/* PR target/65693 */ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +int a; + +void +foo (int (*fn) (int, int, int), unsigned int b) +{ + unsigned long *c = (unsigned long *) __builtin_alloca (b); + a = *c; + register int d asm ("edx") = fn (0, 0, d); +} Jakub