The developers of the TICC time interval counter are having a problem printing 64-bit integers: https://github.com/TAPR/TICC/blob/master/TO-DO.txt
I went to work figuring out how to do that conversion, and after some false starts, came up with some quite efficient arbitrary-precision printing code. It's small, fast, and works for any number of bytes. (Implementation currently limited to 255 bytes.) The code, in its current state, is appended; it's been tested extensively on x86 and more lightly on simulavr. Any comments from experienced AVR programmers is definitely appreciated. Like the current __ultoa_invert, it formats the number backward in a caller-provided buffer. It's (currently) slightly larger, but slightly faster than __ultoa_invert. Timing in cycles for a simulated atmega1280: Input genprint __ultoa_invert 0 160 0xff 316 480 0xffff 584 792 0xffffff 1005 1260 0xffffffff 1434 1572 0xffffffffff 2024 48 ones 2626 56 ones 3286 64 ones 4103 I could just pass this over to the TICC project, but is there any interest in me making the necessary overhauls to vfprintf to incorporate this? I'd have to change vfprintf to pass around a pointer and length internally rather than copying the value. The printing code requires the input in a modifiable little-endian buffer, but that's how varargs works on AVR anyway. (Adding the hex/octal and negative number handling is quite simple.) If I get really sneaky, I could eliminate the output buffer by having it push the formatted number on the stack and call down to an output routine that supplies the necessary prefix padding once the length is known. But that might not be doable in C unless there's a way to force allocation of a stack frame. (If anyone cares, I have some other code for 16- and 32-bit numbers based on the ideas at http://homepage.divms.uiowa.edu/~jones/bcd/decimal.html. It converts 2 digits at a time, using a 100-byte binary-to-BCD lookup table, but is a lot larger, extending it to 64 bits is a mess, and doing without a hardware multiplier bloats it with shift-and-add sequences.) #if __AVR_ARCH__ typedef _Bool bool; typedef unsigned char uint8_t; typedef unsigned short uint16_t; #define false (bool)0 #define true (bool)1 #else #include <stdbool.h> #include <stdint.h> #endif /* * Print an arbitrary buffer of bytes. This uses only byte operations * and only multiplies by the constant 0x33, so is suitable for an * 8-bit microcontroller. * * The basic operation involves three steps: * 1) Sum the bytes mod 255 to find the remainder mod 5. Combine * this with the lsbit to compute the least significant digit. * 2) Subtract this from the number * 3) Then do a 1-bit shift and multiply by 0xcc..cccd to accomplish a * divide by 10. * * Because of the simple pattern of that multiplier (it's -0x33333...), * it can be managed in O(n) time and O(1) additional space. */ /* Reduce a byte mod 5. */ static uint8_t __attribute__((const)) mod5(uint8_t x) { #if __AVR_ARCH__ uint8_t tmp; /* Reduce x mod 15. We add the high halves, to get a carry. */ asm( "mov __tmp_reg__,%0" "\n swap __tmp_reg__" "\n cbr %0,15" "\n add %0,__tmp_reg__" "\n cbr %0,15" "\n swap %0" /* Swap and add carry bit to low */ "\n adc %0,__zero_reg__" /* End-around carry */ : "+d" (x) :: "r0"); #else x = (x >> 4) + (x & 0xf); /* 1 <= x <= 30 */ x = (x >> 4) + (x & 0xf); /* 1 <= x <= 15 */ #endif /* 1 <= x <= 15; now for final reduction mod 5 */ if (x >= 10) x -= 10; /* 0 <= x <= 9 */ if (x >= 5) x -= 5; /* 0 <= x <= 4 */ return x; } /* * Convert the arbitrary-precision number in bin[0..len-1] from * little-endian binary to to decimal. The length can be up to 127 bytes * (but the algorithm can go forever). * The digits are written little-endian, so they must be reversed * for output. A pointer to the end of the formatted number is returned. * (The caller is responsible for ensuring the output is big enough.) */ char * genprint(char *out, uint8_t *bin, uint8_t len) { uint8_t digit; /* This shouldn't happen, but don't crash */ if (!len) return out; /* * Pre-pass: strip trailing zero bytes, but not the last. * Also leave "bin" pointing to the end of the input. */ bin += len; while (!*--bin) { if (--len) continue; len++; break; } bin++; /* Leave bin pointing just past end of input */ do { uint16_t accum; uint8_t i = len; bool lsbit = false; /* * Pass 1, msb-to-lsb: divide bin[] by 2, and return the * value mod 10 (the new value mod 5, combined with the * previous lsbit). */ digit = 255; /* Could also be zero */ #if __AVR_ARCH__ /* * The one implementation hassle is the need for two * carry bits. One for the shift, and the other for * the end-around carries modulo 255. * * Unfamiliar AVR inline asm feature: given a memory * constraint, %2 prints X, Y or Z. %E2 and %F2 are the * low and high registers of the pointer, respectively. * So if %2 = Z, then %E2 = r30 and %F2 = r31. */ asm("" "\n1: ld __tmp_reg__,-%4" "\n lsr %1" /* lsbit to carry bit */ "\n ror __tmp_reg__" "\n st %4,__tmp_reg__" "\n rol %1" /* carry bit to lsbit */ "\n add %0,__tmp_reg__" "\n adc %0,__zero_reg__" /* End-around carry */ "\n dec %2" "\n brne 1b" "\n.ifnc %A3,%E4" "\n .error \"GCC constraints not working as expected.\"" "\n.endif" "\n.ifnc %B3,%F4" "\n .error \"GCC constraints not working as expected.\"" "\n.endif" : "+&r" (digit), "+&r" (lsbit), "+&r" (i), "+e" (bin) : "m" (*bin) : "r0"); #else do { uint8_t byte = *--bin; uint16_t t = digit; t += *bin = lsbit << 7 | byte >> 1; lsbit = byte & 1; digit = (t >> 8) + (t & 0xff); } while (--i); #endif digit = mod5(digit); *out++ = digit + digit + lsbit + '0'; /* * Pass 2, lsb-to-msb: the exciting part! * * We have to subtract the digit, then multiply by * 0xCCCC...CD to achieve a divide by 5. We actually * multiply by 0x333...33, and negate. To do the negate, * we don't complement and add 1, but rather subtract * 1 and then complement. That works because then the * subtraction can be combined with the accumulation * for the multiply. * * For each byte, we multiply it by 0x33, and add that * to the "value to be stored in all higher bytes" * accumulator. The accumulator needs to be 16 bits, but * we can prevent overflow by adding the msbyte to the * lsbyte after each iteration. * * We can also use this to do the necessary subtracts. * To subtract 1, we just initialize the accumulator * to 0xff. To subtract the digit without having a * separate carry-propagation pass through bin[], we * further subtract 0x33 times the digit. * (Since digit <= 4, this fits into 1 byte.) */ if (digit > 4) __builtin_unreachable(); accum = 255 - (uint8_t)(digit * 0x33); i = len; do { accum += 0x33 * *bin; *bin++ = digit = ~(uint8_t)accum; accum = (accum >> 8) + (accum & 0xff); } while (--i); /* Decrease len as high bytes become zero */ } while (digit || (--bin, --len)); return out; } #if __AVR_ARCH__ #define READ_PORT 0x38 #define WRITE_PORT 0x39 #define EXIT_PORT 0x3a static uint8_t inline __attribute__((always_inline)) inb(uint8_t port) { uint8_t x; asm("in %0,%1" : "=r" (x) : "I" (port)); return x; } static void inline __attribute__((always_inline)) outb(uint8_t port, uint8_t x) { asm("out %0,%1" :: "I" (port), "r" (x)); } static void putch(char c) { outb(WRITE_PORT, c); } static void revwrite(char const *p, uint8_t len) { while (len--) putch(*--p); } static void revput(char *buf, uint8_t *bin, uint8_t len) { char const *p = genprint(buf, bin, len); revwrite(p, p-buf); putch('\n'); } static void putst(char const __flash *p) { char c; while ((c = *p++) != 0) putch(c); putch('\n'); } static void __attribute__((noreturn)) my_exit(uint8_t status) { outb(EXIT_PORT, status); __builtin_unreachable(); } int main(void) { unsigned i; char buf[21]; union { unsigned i; uint8_t b[2]; } icopy; uint8_t bin[8]; static char const __flash hello[] = "Hello, world!"; putst(hello); for (i = 0; i < 8; i++) { int j; for (j = 0; j < 8; j++) bin[j] = 0xff; revput(buf, bin, i+1); } for (i = 0; i < 100; i++) { icopy.i = i; revput(buf, icopy.b, 2); } my_exit(0); } #else static bool __attribute__((pure)) revcmp(char const *a, char const *b, unsigned len) { while (len--) if (*a++ != b[len]) return false; return true; } #include <stdio.h> static bool test(uint64_t x) { char buf1[21], buf2[21]; int len = sprintf(buf1, "%llu", x); union { uint64_t x; uint8_t buf[8]; } u = { x }; char *p = genprint(buf2, u.buf, 8); *p = '\0'; //printf("Result: %u -> '%s' vs '%s'\n", x, buf1, buf2); if (p - buf2 == len && revcmp(buf1, buf2, len)) return true; printf("Mismatch: %llu (%#llx) -> '%s' vs '%s'\n", x, x, buf1, buf2); return false; } int main(void) { unsigned i; for (i = 0; i < 10000000; i++) { //printf("Testing %u\n", i); if (!test(i)) return 1; if (!test(~i)) return 1; if (!test(~(uint64_t)i)) return 1; } printf("All succeeded, up to %u\n", i); return 0; } #endif /* !__AVR_ARCH__ */ _______________________________________________ AVR-libc-dev mailing list AVR-libc-dev@nongnu.org https://lists.nongnu.org/mailman/listinfo/avr-libc-dev