Vào Th 2, 4 thg 11, 2024 vào lúc 03:33 Otto Moerbeek <o...@drijf.net> đã viết: > > On Sun, Nov 03, 2024 at 08:55:18AM +0100, Otto Moerbeek wrote: > > > On Sun, Nov 03, 2024 at 08:44:26AM +0100, Sebastien Marie wrote: > > > > > Otto Moerbeek <o...@drijf.net> writes: > > > > > > > On Sun, Nov 03, 2024 at 08:13:20AM +0100, Otto Moerbeek wrote: > > > > > > > >> On Sun, Nov 03, 2024 at 09:38:47AM +0700, hahahahacker2009 wrote: > > > >> > > > >> > Hello, > > > >> > OpenBSD bc(1) is unable to calculate very big number, > > > >> > for example, (1024*1024)^(1024*1024) run for an hour and still cannot > > > >> > give me the result. > > > >> > It just use about 11M of memory and 100% CPU. > > > >> > Gavin Howard's bc port do it in 2 minutes > > > >> > GNU bc on Linux do it in 5 minutes. > > > >> > > > >> bc uses dc which does a simple exponentation computation, which is > > > >> basically doing repeated multiplications. I'm sure there are smarter > > > >> methods, it's not just implemented that way. > > > > > > > > Oh, I looked and I did it a bit smarter when I wrote that code 20 > > > > years back, but still, I think it can be improved. > > > > > > > > > > The problem seems more on numnber printing than on number exponentation. > > > > > > $ /usr/bin/time -lp dc -e '1024 1024 * 1024 1024 * ^' > > > real 46.39 > > > user 45.66 > > > sys 0.00 > > > 15244 maximum resident set size > > > 0 average shared memory size > > > 0 average unshared data size > > > 0 average unshared stack size > > > 3509 minor page faults > > > 1 major page faults > > > 0 swaps > > > 0 block input operations > > > 0 block output operations > > > 0 messages sent > > > 0 messages received > > > 0 signals received > > > 0 voluntary context switches > > > 3334 involuntary context switches > > > > > > The exponentation took ~50 seconds (dc(1) doesn't print the number on > > > the stack by default). > > > -- > > > Sebastien Marie > > > > Heh, thanks for that insight. I might take a look some day. > > > > -Otto > > > > That day came sooner than expected. > > The root cause of the speed difference is that my dc/bc compute in > binary, while the others compute in base 10. So they basically do not > need to do a conversion for outputting in base 10. > > By avoiding a lot of memory allocations and using specialized > conversion routines for base 10 and base 16 things speed up nicely for > me for those bases (from about 1.5 hour to 5m20s for base 10, and to > 28s for base 16 (there the conversion is nearly free). > > Don't forget to rebuild bc as as well after applying the diff for dc. > > -Otto > > Index: inout.c > =================================================================== > RCS file: /home/cvs/src/usr.bin/dc/inout.c,v > diff -u -p -r1.23 inout.c > --- inout.c 8 Mar 2023 04:43:10 -0000 1.23 > +++ inout.c 3 Nov 2024 20:19:28 -0000 > @@ -38,7 +38,7 @@ static void src_freestring(struct source > static void flushwrap(FILE *); > static void putcharwrap(FILE *, int); > static void printwrap(FILE *, const char *); > -static char *get_digit(u_long, int, u_int); > +static void get_digit(u_long, int, u_int, char *, size_t); > > static struct vtable stream_vtable = { > src_getcharstream, > @@ -264,20 +264,17 @@ read_string(struct source *src) > return p; > } > > -static char * > -get_digit(u_long num, int digits, u_int base) > +static void > +get_digit(u_long num, int digits, u_int base, char *buf, size_t sz) > { > - char *p; > - > if (base <= 16) { > - p = bmalloc(2); > - p[0] = num >= 10 ? num + 'A' - 10 : num + '0'; > - p[1] = '\0'; > + buf[0] = num >= 10 ? num + 'A' - 10 : num + '0'; > + buf[1] = '\0'; > } else { > - if (asprintf(&p, "%0*lu", digits, num) == -1) > - err(1, NULL); > + int ret = snprintf(buf, sz, "%0*lu", digits, num); > + if (ret < 0 || (size_t)ret >= sz) > + err(1, "truncation %d %zu", ret, sz); > } > - return p; > } > > void > @@ -285,11 +282,10 @@ printnumber(FILE *f, const struct number > { > struct number *int_part, *fract_part; > int digits; > - char buf[11]; > - size_t sz; > + char buf[12], *str, *p; > + size_t allocated; > int i; > - struct stack stack; > - char *p; > + BN_ULONG *mem; > > charcount = 0; > lastchar = -1; > @@ -307,24 +303,49 @@ printnumber(FILE *f, const struct number > } > split_number(b, int_part->number, fract_part->number); > > - i = 0; > - stack_init(&stack); > - while (!BN_is_zero(int_part->number)) { > - BN_ULONG rem = BN_div_word(int_part->number, base); > - stack_pushstring(&stack, get_digit(rem, digits, base)); > - i++; > - } > - sz = i; > - if (BN_is_negative(b->number)) > - putcharwrap(f, '-'); > - for (i = 0; i < sz; i++) { > - p = stack_popstring(&stack); > - if (base > 16) > - putcharwrap(f, ' '); > - printwrap(f, p); > - free(p); > + if (base == 10 && !BN_is_zero(int_part->number)) { > + str = BN_bn2dec(int_part->number); > + bn_checkp(str); > + p = str; > + while (*p) > + putcharwrap(f, *p++); > + free(str); > + } else if (base == 16 && !BN_is_zero(int_part->number)) { > + str = BN_bn2hex(int_part->number); > + bn_checkp(str); > + p = str; > + if (*p == '-') > + putcharwrap(f, *p++); > + /* skip leading zero's */ > + while (*p == '0') > + p++; > + while (*p) > + putcharwrap(f, *p++); > + free(str); > + } else { > + i = 0; > + allocated = 1; > + mem = breallocarray(NULL, allocated, sizeof(BN_ULONG)); > + while (!BN_is_zero(int_part->number)) { > + if (i >= allocated) { > + allocated *= 2; > + mem = breallocarray(mem, allocated, > + sizeof(BN_ULONG)); > + } > + mem[i++] = BN_div_word(int_part->number, base); > + } > + if (BN_is_negative(b->number)) > + putcharwrap(f, '-'); > + for (i = i - 1; i >= 0; i--) { > + get_digit(mem[i], digits, base, buf, > + sizeof(buf)); > + if (base > 16) > + putcharwrap(f, ' '); > + printwrap(f, buf); > + } > + free(mem); > } > - stack_clear(&stack); > + > if (b->scale > 0) { > struct number *num_base; > BIGNUM *mult, *stop; > @@ -352,13 +373,12 @@ printnumber(FILE *f, const struct number > bmachine_scale()); > split_number(fract_part, int_part->number, NULL); > rem = BN_get_word(int_part->number); > - p = get_digit(rem, digits, base); > + get_digit(rem, digits, base, buf, sizeof(buf)); > int_part->scale = 0; > normalize(int_part, fract_part->scale); > bn_check(BN_sub(fract_part->number, > fract_part->number, > int_part->number)); > - printwrap(f, p); > - free(p); > + printwrap(f, buf); > bn_check(BN_mul_word(mult, base)); > } > free_number(num_base); >
It is quicker than the last time, it can print the result in 30 minutes. I will send the result of time -lp bc in 30 minutes