[Bug middle-end/20283] optimising muldiv() type operations

ajrobb at bigfoot dot com Sun, 31 Aug 2008 06:41:46 -0700


------- Comment #5 from ajrobb at bigfoot dot com  2008-08-31 13:40 -------
For the case where the overall scale is between 1 and 2 (exclusive), it is
straightforward to implement a solution with a single multiply and no shifts:


/* Demostrate fast multiply and divide by rational number, r, where 1 < r < 2
 * Both methods work over entire domain: 0 < x < (1<<32)/r.
 */
#include <stdint.h>
#define Y 47
#define Z 40

uint32_t slow(uint32_t x) {
  return x * (uint64_t) Y / Z;
}

uint32_t fast(uint32_t x) {
  static const uint64_t fact = ((((uint64_t)(Y-Z)) << 32) + (Z-1)) / Z;
  return x + (uint32_t)((x * fact) >> 32);
}

This compiles to the following with gcc 4.2.1 on i586-suse-linux

gcc -O3 -fomit-frame-pointer

        .file   "vat.c"
.globl __udivdi3
        .text
        .p2align 4,,15
.globl slow
        .type   slow, @function
slow:
        subl    $28, %esp
        movl    $47, %eax
        mull    32(%esp)
        movl    $40, 8(%esp)
        movl    $0, 12(%esp)
        movl    %eax, (%esp)
        movl    %edx, 4(%esp)
        call    __udivdi3
        addl    $28, %esp
        ret
        .size   slow, .-slow
        .p2align 4,,15
.globl fast
        .type   fast, @function
fast:
        subl    $12, %esp
        movl    $751619277, %ecx
        movl    %ebx, (%esp)
        movl    16(%esp), %ebx
        movl    %edi, 8(%esp)
        movl    %esi, 4(%esp)
        movl    %ebx, %eax
        mull    %ecx
        movl    %edx, %edi
        movl    %eax, %esi
        movl    %edi, %esi
        xorl    %edi, %edi
        movl    8(%esp), %edi
        leal    (%ebx,%esi), %eax
        movl    (%esp), %ebx
        movl    4(%esp), %esi
        addl    $12, %esp
        ret
        .size   fast, .-fast
# additional hand-coded assembler equivalent to fast above:
        .p2align 4,,15
.globl faster
        .type   faster, @function
faster:
        movl    $751619277, %eax
        mull    4(%esp)
        movl    4(%esp), %eax
        addl    %edx, %eax
        ret
        .size   faster, .-faster
        .ident  "GCC: (GNU) 4.2.1 (SUSE Linux)"
        .section        .note.GNU-stack,"",@progbits


-- 

ajrobb at bigfoot dot com changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
            Version|3.3.4                       |4.2.1


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=20283

[Bug middle-end/20283] optimising muldiv() type operations

Reply via email to