[llvm-bugs] [Bug 48292] New: [aarch64] Inefficient code in __builtin_mul_overflow with unsigned < 32 bits

via llvm-bugs Tue, 24 Nov 2020 21:28:03 -0800

https://bugs.llvm.org/show_bug.cgi?id=48292


            Bug ID: 48292
           Summary: [aarch64] Inefficient code in __builtin_mul_overflow
                    with unsigned < 32 bits
           Product: libraries
           Version: 10.0
          Hardware: Other
                OS: Linux
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: Backend: AArch64
          Assignee: unassignedb...@nondot.org
          Reporter: husseyde...@gmail.com
                CC: arnaud.degrandmai...@arm.com,
                    llvm-bugs@lists.llvm.org, smithp...@googlemail.com,
                    ties.st...@arm.com

Given the following C code:


bool umul16_overflow(uint16_t a, uint16_t b, uint16_t *result)
{
    return __builtin_mul_overflow(a,b,result);
}

aarch64 clang 10.0.1 -O2:

umul16_overflow:
        // Cast args
        and     w9, w1, #0xffff
        and     w10, w0, #0xffff
        // 16-bit to 32-bit long multiply
        mul     w9, w10, w9
        // Store upper 16 bits in w10
        lsr     w10, w9, #16
        // Store zero in x8??????????
        mov     x8, xzr
        // Check if upper 16 bits (w10) are non-zero
        cmp     w10, #0
        cset    w10, ne
        // Check if zero (x8) is non-zero??????????
        // w8 = 0
        cmp     x8, #0
        cset    w8, ne
        // Or the result against zero??????????
        orr     w0, w10, w8
        // Store
        strh    w9, [x2]
        ret

I have no idea what x8 is being used for here, it will always be zero. This is
the code without the redundant instructions:

umul16_overflow:
        // Cast args
        and     w8, w1, #0xffff
        and     w9, w0, #0xffff
        // 16-bit to 32-bit long multiply
        mul     w8, w9, w8
        // Store upper 16 bits in w9
        lsr     w9, w8, #16
        // Check if the upper 16 bits (w9) are non-zero
        cmp     w9, #0
        cset    w0, ne
        // Store
        strh    w9, [x2]
        ret

Note that we could simplify this even further by using cmp more creatively, but
this appears to be a different issue.

umul16_overflow:
        // Cast args
        and     w8, w1, #0xffff
        and     w9, w0, #0xffff
        // 16-bit to 32-bit long multiply
        mul     w8, w9, w8
        // Check if the upper 16 bits are non-zero
        cmp     wzr, w8, lsr #16
        cset    w0, ne
        // Store
        strh    w8, [x2]
        ret

-- 
You are receiving this mail because:
You are on the CC list for the bug.

_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

[llvm-bugs] [Bug 48292] New: [aarch64] Inefficient code in __builtin_mul_overflow with unsigned < 32 bits

Reply via email to