>>> @@ -174,20 +174,12 @@ char *put_dec_trunc8(char *buf, unsigned r)
>>>     unsigned q;
>>>     /* Copy of previous function's body with added early returns */
>>> -   q      = (r * (uint64_t)0x1999999a) >> 32;
>>> -   *buf++ = (r - 10 * q) + '0'; /* 2 */
>>> -   if (q == 0)
>>> -           return buf;
>>> -   r      = (q * (uint64_t)0x1999999a) >> 32;
>>> -   *buf++ = (q - 10 * r) + '0'; /* 3 */
>>> -   if (r == 0)
>>> -           return buf;
>>> -   q      = (r * (uint64_t)0x1999999a) >> 32;
>>> -   *buf++ = (r - 10 * q) + '0'; /* 4 */
>>> -   if (q == 0)
>>> -           return buf;
>>> -   r      = (q * (uint64_t)0x1999999a) >> 32;
>>> -   *buf++ = (q - 10 * r) + '0'; /* 5 */
>>> +   while (r >= 10000) {
>>> +           q = r + '0';
>>> +           r  = (r * (uint64_t)0x1999999a) >> 32;
>>> +           *buf++ = q - 10*r;
>>> +   }

All right, I now see what the loop is doing (I couldn't grasp it
yesterday) and expect for r=0 it looks legit.

On Mon, Sep 24 2012, George Spelvin wrote:
> Truthfully, it would have made *more* sense to swap q and r globally,
> so the loop had a more sensible q=quotient/r=remainder assignment,
> but I wanted to show that the unmodified tail was in fact unmodified.

The original has it a bit awkwardly because it just copies code from
put_dec_full9() with the first iteration skipped.

> The big saving from using a loop is that it avoids unnecessary
> 32x32->64-bit multiplies, falling through to the 16x16->32-bit
> code as early as possible.  Given that most numbers are small,
> this seemed like a significant win.

Ah, makes sense.

I guess the following should work, even though it's not so pretty:

static noinline_for_stack
char *put_dec_trunc8(char *buf, unsigned r) {
        unsigned q;

        if (r > 10000) {
                do {
                        q = r + '0';
                        r = (r * (uint64_t)0x1999999a) >> 32;
                        *buf++ = q - 10 * r;
                } while (r >= 10000);
                if (r == 0)
                        return buf;
        }

        q      = (r * 0x199a) >> 16;
        *buf++ = (r - 10 * q)  + '0'; /* 6 */
        if (q == 0)
                return buf;
        r      = (q * 0xcd) >> 11;
        *buf++ = (q - 10 * r)  + '0'; /* 7 */
        if (r == 0)
                return buf;
        q      = (r * 0xcd) >> 11;
        *buf++ = (r - 10 * q) + '0'; /* 8 */
        if (q == 0)
                return buf;
        *buf++ = q + '0'; /* 9 */
        return buf;
}

-- 
Best regards,                                         _     _
.o. | Liege of Serenely Enlightened Majesty of      o' \,=./ `o
..o | Computer Science,  Michał “mina86” Nazarewicz    (o o)
ooo +----<email/xmpp: m...@google.com>--------------ooO--(_)--Ooo--

Attachment: pgpNHMfZB0BEV.pgp
Description: PGP signature

Reply via email to