On Sun, Nov 03, 2024 at 08:55:18AM +0100, Otto Moerbeek wrote:

> On Sun, Nov 03, 2024 at 08:44:26AM +0100, Sebastien Marie wrote:
> 
> > Otto Moerbeek <o...@drijf.net> writes:
> > 
> > > On Sun, Nov 03, 2024 at 08:13:20AM +0100, Otto Moerbeek wrote:
> > >
> > >> On Sun, Nov 03, 2024 at 09:38:47AM +0700, hahahahacker2009 wrote:
> > >> 
> > >> > Hello,
> > >> > OpenBSD bc(1) is unable to calculate very big number,
> > >> > for example, (1024*1024)^(1024*1024) run for an hour and still cannot
> > >> > give me the result.
> > >> > It just use about 11M of memory and 100% CPU.
> > >> > Gavin Howard's bc port do it in 2 minutes
> > >> > GNU bc on Linux do it in 5 minutes.
> > >> 
> > >> bc uses dc which does a simple exponentation computation, which is
> > >> basically doing repeated multiplications. I'm sure there are smarter
> > >> methods, it's not just implemented that way.
> > >
> > > Oh, I looked and I did it a bit smarter when I wrote that code 20
> > > years back, but still, I think it can be improved.
> > >
> > 
> > The problem seems more on numnber printing than on number exponentation.
> > 
> > $ /usr/bin/time -lp dc -e '1024 1024 * 1024 1024 * ^'
> > real        46.39
> > user        45.66
> > sys          0.00
> >      15244  maximum resident set size
> >          0  average shared memory size
> >          0  average unshared data size
> >          0  average unshared stack size
> >       3509  minor page faults
> >          1  major page faults
> >          0  swaps
> >          0  block input operations
> >          0  block output operations
> >          0  messages sent
> >          0  messages received
> >          0  signals received
> >          0  voluntary context switches
> >       3334  involuntary context switches
> > 
> > The exponentation took ~50 seconds (dc(1) doesn't print the number on
> > the stack by default).
> > -- 
> > Sebastien Marie
> 
> Heh, thanks for that insight. I might take a look some day.
> 
>       -Otto
> 

That day came sooner than expected.

The root cause of the speed difference is that my dc/bc compute in
binary, while the others compute in base 10. So they basically do not
need to do a conversion for outputting in base 10.

By avoiding a lot of memory allocations and using specialized
conversion routines for base 10 and base 16 things speed up nicely for
me for those bases (from about 1.5 hour to 5m20s for base 10, and to
28s for base 16 (there the conversion is nearly free). 

Don't forget to rebuild bc as as well after applying the diff for dc.

        -Otto

Index: inout.c
===================================================================
RCS file: /home/cvs/src/usr.bin/dc/inout.c,v
diff -u -p -r1.23 inout.c
--- inout.c     8 Mar 2023 04:43:10 -0000       1.23
+++ inout.c     3 Nov 2024 20:19:28 -0000
@@ -38,7 +38,7 @@ static void   src_freestring(struct source
 static void    flushwrap(FILE *);
 static void    putcharwrap(FILE *, int);
 static void    printwrap(FILE *, const char *);
-static char    *get_digit(u_long, int, u_int);
+static void    get_digit(u_long, int, u_int, char *, size_t);
 
 static struct vtable stream_vtable = {
        src_getcharstream,
@@ -264,20 +264,17 @@ read_string(struct source *src)
        return p;
 }
 
-static char *
-get_digit(u_long num, int digits, u_int base)
+static void
+get_digit(u_long num, int digits, u_int base, char *buf, size_t sz)
 {
-       char *p;
-
        if (base <= 16) {
-               p = bmalloc(2);
-               p[0] = num >= 10 ? num + 'A' - 10 : num + '0';
-               p[1] = '\0';
+               buf[0] = num >= 10 ? num + 'A' - 10 : num + '0';
+               buf[1] = '\0';
        } else {
-               if (asprintf(&p, "%0*lu", digits, num) == -1)
-                       err(1, NULL);
+               int ret = snprintf(buf, sz, "%0*lu", digits, num);
+               if (ret < 0 || (size_t)ret >= sz)
+                       err(1, "truncation %d %zu", ret, sz);
        }
-       return p;
 }
 
 void
@@ -285,11 +282,10 @@ printnumber(FILE *f, const struct number
 {
        struct number   *int_part, *fract_part;
        int             digits;
-       char            buf[11];
-       size_t          sz;
+       char            buf[12], *str, *p;
+       size_t          allocated;
        int             i;
-       struct stack    stack;
-       char            *p;
+       BN_ULONG        *mem;
 
        charcount = 0;
        lastchar = -1;
@@ -307,24 +303,49 @@ printnumber(FILE *f, const struct number
        }
        split_number(b, int_part->number, fract_part->number);
 
-       i = 0;
-       stack_init(&stack);
-       while (!BN_is_zero(int_part->number)) {
-               BN_ULONG rem = BN_div_word(int_part->number, base);
-               stack_pushstring(&stack, get_digit(rem, digits, base));
-               i++;
-       }
-       sz = i;
-       if (BN_is_negative(b->number))
-               putcharwrap(f, '-');
-       for (i = 0; i < sz; i++) {
-               p = stack_popstring(&stack);
-               if (base > 16)
-                       putcharwrap(f, ' ');
-               printwrap(f, p);
-               free(p);
+       if (base == 10 && !BN_is_zero(int_part->number)) {
+               str = BN_bn2dec(int_part->number);
+               bn_checkp(str);
+               p = str;
+               while (*p)
+                       putcharwrap(f, *p++);
+               free(str);
+       } else if (base == 16 && !BN_is_zero(int_part->number)) {
+               str = BN_bn2hex(int_part->number);
+               bn_checkp(str);
+               p = str;
+               if (*p == '-')
+                       putcharwrap(f, *p++);
+               /* skip leading zero's */
+               while (*p == '0')
+                       p++;
+               while (*p)
+                       putcharwrap(f, *p++);
+               free(str);
+       } else {
+               i = 0;
+               allocated = 1;
+               mem = breallocarray(NULL, allocated, sizeof(BN_ULONG));
+               while (!BN_is_zero(int_part->number)) {
+                       if (i >= allocated) {
+                               allocated *= 2;
+                               mem = breallocarray(mem, allocated,
+                                   sizeof(BN_ULONG));
+                       }
+                       mem[i++] = BN_div_word(int_part->number, base);
+               }
+               if (BN_is_negative(b->number))
+                       putcharwrap(f, '-');
+               for (i = i - 1; i >= 0; i--) {
+                       get_digit(mem[i], digits, base, buf,
+                           sizeof(buf));
+                       if (base > 16)
+                               putcharwrap(f, ' ');
+                       printwrap(f, buf);
+               }
+               free(mem);
        }
-       stack_clear(&stack);
+
        if (b->scale > 0) {
                struct number   *num_base;
                BIGNUM          *mult, *stop;
@@ -352,13 +373,12 @@ printnumber(FILE *f, const struct number
                            bmachine_scale());
                        split_number(fract_part, int_part->number, NULL);
                        rem = BN_get_word(int_part->number);
-                       p = get_digit(rem, digits, base);
+                       get_digit(rem, digits, base, buf, sizeof(buf));
                        int_part->scale = 0;
                        normalize(int_part, fract_part->scale);
                        bn_check(BN_sub(fract_part->number, fract_part->number,
                            int_part->number));
-                       printwrap(f, p);
-                       free(p);
+                       printwrap(f, buf);
                        bn_check(BN_mul_word(mult, base));
                }
                free_number(num_base);

Reply via email to