commit b2c44d8c961090c2773f3a98d12fcafc7f5c5b2b
Author:     Mattias Andrée <[email protected]>
AuthorDate: Mon Mar 14 17:56:37 2016 +0100
Commit:     Mattias Andrée <[email protected]>
CommitDate: Mon Mar 14 17:56:37 2016 +0100

    Mostly optimisations
    
    Signed-off-by: Mattias Andrée <[email protected]>

diff --git a/Makefile b/Makefile
index 80d3aa7..c340bf6 100644
--- a/Makefile
+++ b/Makefile
@@ -36,7 +36,6 @@ FUN =\
        zrsh\
        zsave\
        zset\
-       zseti\
        zsets\
        zsetu\
        zsetup\
@@ -61,7 +60,8 @@ INLINE_FUN =\
        zabs\
        zneg\
        zlsb\
-       zbits
+       zbits\
+       zseti
 
 OBJ = $(FUN:=.o) allocator.o
 MAN3 = $(FUN:=.3) $(INLINE_FUN:=.3)
diff --git a/TODO b/TODO
index fbc847f..a71124e 100644
--- a/TODO
+++ b/TODO
@@ -1,2 +1,5 @@
 GMP has mpz_divexact(q,n,d), we should have zdiv_exact(q,n,d).
 It uses optimised division algorithm that requires that d|n.
+
+Add zsets_radix
+Add zstr_radix
diff --git a/config.mk b/config.mk
index a1be136..cad12d8 100644
--- a/config.mk
+++ b/config.mk
@@ -1,4 +1,4 @@
-VERSION = 0.0
+VERSION = 1.1
 
 PREFIX = /usr/local
 EXECPREFIX = $(PREFIX)
diff --git a/man/zcmpi.3 b/man/zcmpi.3
index eb786f8..22acf41 100644
--- a/man/zcmpi.3
+++ b/man/zcmpi.3
@@ -5,7 +5,7 @@ zcmpi - Compare a big integer and a signed integer
 .nf
 #include <zahl.h>
 
-int zcmpi(z_t \fIa\fP, signed long long int \fIb\fP);
+int zcmpi(z_t \fIa\fP, int64_t \fIb\fP);
 .fi
 .SH DESCRIPTION
 .B zcmpi
diff --git a/man/zcmpu.3 b/man/zcmpu.3
index b4703a9..63cbf09 100644
--- a/man/zcmpu.3
+++ b/man/zcmpu.3
@@ -5,7 +5,7 @@ zcmpu - Compare a big integer and an unsigned integer
 .nf
 #include <zahl.h>
 
-int zcmpu(z_t \fIa\fP, unsigned long long int \fIb\fP);
+int zcmpu(z_t \fIa\fP, uint64_t \fIb\fP);
 .fi
 .SH DESCRIPTION
 .B zcmpu
diff --git a/man/zseti.3 b/man/zseti.3
index 40d4aef..c2c589f 100644
--- a/man/zseti.3
+++ b/man/zseti.3
@@ -5,7 +5,7 @@ zseti - Set the value of a big integer to a signed integer.
 .nf
 #include <zahl.h>
 
-void zseti(z_t \fIa\fP, signed long long int \fIb\fP);
+void zseti(z_t \fIa\fP, int64_t \fIb\fP);
 .fi
 .SH DESCRIPTION
 .B zseti
diff --git a/man/zsetu.3 b/man/zsetu.3
index a0598a7..62b8202 100644
--- a/man/zsetu.3
+++ b/man/zsetu.3
@@ -5,7 +5,7 @@ zsetu - Set the value of a big integer to an unsigned integer.
 .nf
 #include <zahl.h>
 
-void zsetu(z_t \fIa\fP, unsigned long long int \fIb\fP);
+void zsetu(z_t \fIa\fP, uint64_t \fIb\fP);
 .fi
 .SH DESCRIPTION
 .B zseti
diff --git a/src/allocator.c b/src/allocator.c
index 44f7985..7737046 100644
--- a/src/allocator.c
+++ b/src/allocator.c
@@ -28,7 +28,7 @@ libzahl_realloc(z_t a, size_t need)
                a->chars = new;
        } else {
                a->chars = realloc(a->chars, need * sizeof(zahl_char_t));
-               if (!a->chars) {
+               if (unlikely(!a->chars)) {
                        if (!errno) /* sigh... */
                                errno = ENOMEM;
                        libzahl_failure(errno);
diff --git a/src/internals.h b/src/internals.h
index c1153ea..c859ce1 100644
--- a/src/internals.h
+++ b/src/internals.h
@@ -4,7 +4,11 @@
 #include <string.h>
 #include <stdlib.h>
 #include <errno.h>
-#include <limits.h>
+
+/* clang pretends to be GCC... */
+#if defined(__GNUC__) && defined(__clang__)
+# undef __GNUC__
+#endif
 
 #define BITS_PER_CHAR                64
 #define LB_BITS_PER_CHAR             6
@@ -14,6 +18,32 @@
 #define CEILING_BITS_TO_CHARS(bits)  (((bits) + (BITS_PER_CHAR - 1)) >> 
LB_BITS_PER_CHAR)
 #define BITS_IN_LAST_CHAR(bits)      ((bits) & (BITS_PER_CHAR - 1))
 
+#if defined(__GNUC__)
+# define O0     __attribute__((optimize("O0")))
+# define O1     __attribute__((optimize("O1")))
+# define O2     __attribute__((optimize("O2")))
+# define O3     __attribute__((optimize("O3")))
+# define Ofast  __attribute__((optimize("Ofast")))
+# define Os     __attribute__((optimize("Os")))
+# define Oz     __attribute__((optimize("Os")))
+#elif defined(__clang__)
+# define O0     __attribute__((optnone))
+# define O1     /* Don't know how. */
+# define O2     /* Don't know how. */
+# define O3     /* Don't know how. */
+# define Ofast  /* Don't know how. */
+# define Os     /* Don't know how. */
+# define Oz     /* Don't know how. */
+#else
+# define O0     /* Don't know how. */
+# define O1     /* Don't know how. */
+# define O2     /* Don't know how. */
+# define O3     /* Don't know how. */
+# define Ofast  /* Don't know how. */
+# define Os     /* Don't know how. */
+# define Oz     /* Don't know how. */
+#endif
+
 #define LIST_TEMPS\
        X(libzahl_tmp_cmp)\
        X(libzahl_tmp_str_num)\
@@ -77,13 +107,14 @@ extern size_t libzahl_pool_alloc[sizeof(size_t) * 8];
 #define TRIM(a)                      for (; (a)->used && !(a)->chars[(a)->used 
- 1]; (a)->used--)
 #define TRIM_NONZERO(a)              for (; !(a)->chars[(a)->used - 1]; 
(a)->used--)
 #define TRIM_AND_ZERO(a)             do { TRIM(a); if (!(a)->used) 
SET_SIGNUM(a, 0); } while (0)
+#define TRIM_AND_SIGN(a, s)          do { TRIM(a); SET_SIGNUM(a, (a)->used ? 
(s) : 0); } while (0)
 #define znegative(a)                 (zsignum(a) < 0)
 #define znegative1(a, b)             ((zsignum(a) | zsignum(b)) < 0)
 #define znegative2(a, b)             ((zsignum(a) & zsignum(b)) < 0)
 #define zpositive(a)                 (zsignum(a) > 0)
 #define zpositive1(a, b)             (zpositive(a) + zpositive(b) > 0)
 #define zpositive2(a, b)             (zsignum(a) + zsignum(b) == 2)
-#define zzero1(a, b)                 (zzero(a) + zzero(b) > 0)
+#define zzero1(a, b)                 (zzero(a) || zzero(b))
 #define zzero2(a, b)                 (!(zsignum(a) | zsignum(b)))
 #define zmemmove(d, s, n)            memmove((d), (s), (n) * 
sizeof(zahl_char_t))
 
@@ -138,3 +169,19 @@ libzahl_msb_nz_llu(unsigned long long int x)
        return r;
 }
 #endif
+
+#if defined(__GNUC__) || defined(__clang__)
+# if INT64_MAX == LONG_MAX
+#  define libzahl_add_overflow(rp, a, b)  __builtin_uaddl_overflow(a, b, rp)
+# else
+#  define libzahl_add_overflow(rp, a, b)  __builtin_uaddll_overflow(a, b, rp)
+# endif
+#else
+static inline int
+libzahl_add_overflow(zahl_char_t *rp, zahl_char_t a, zahl_char_t b)
+{
+       int carry = ZAHL_CHAR_MAX - a < b;
+       *rp = a + b;
+       return carry;
+}
+#endif
diff --git a/src/zadd.c b/src/zadd.c
index 9891b00..2d42684 100644
--- a/src/zadd.c
+++ b/src/zadd.c
@@ -2,12 +2,29 @@
 #include "internals.h"
 
 
-void
+static inline void
+zadd_impl(z_t a, z_t b, size_t n)
+{
+       zahl_char_t carry = 0, tcarry;
+       size_t i;
+
+       for (i = 0; i < n; i++) {
+               tcarry = libzahl_add_overflow(a->chars + i, a->chars[i], 
b->chars[i]);
+               carry = tcarry | libzahl_add_overflow(a->chars + i, 
a->chars[i], carry);
+       }
+       while (carry) {
+               carry = libzahl_add_overflow(a->chars + i, a->chars[i], 1);
+               i++;
+       }
+
+       if (a->used < i)
+               a->used = i;
+}
+
+inline void
 zadd_unsigned(z_t a, z_t b, z_t c)
 {
-       size_t i, size, n;
-       uint32_t carry[] = {0, 0};
-       zahl_char_t *addend;
+       size_t size, n;
 
        if (unlikely(zzero(b))) {
                zabs(a, c);
@@ -28,42 +45,26 @@ zadd_unsigned(z_t a, z_t b, z_t c)
                        n = c->used;
                        zmemset(a->chars + a->used, 0, n - a->used);
                }
-               addend = c->chars;
+               zadd_impl(a, c, n);
        } else if (unlikely(a == c)) {
                if (a->used < b->used) {
                        n = b->used;
                        zmemset(a->chars + a->used, 0, n - a->used);
                }
-               addend = b->chars;
-       } else if (b->used > c->used) {
+               zadd_impl(a, b, n);
+       } else if (likely(b->used > c->used)) {
                zmemcpy(a->chars, b->chars, b->used);
                a->used = b->used;
-               addend = c->chars;
+               zadd_impl(a, c, n);
        } else {
                zmemcpy(a->chars, c->chars, c->used);
                a->used = c->used;
-               addend = b->chars;
+               zadd_impl(a, b, n);
        }
 
-       for (i = 0; i < n; i++) {
-               if (carry[i & 1])
-                       carry[~i & 1] = (ZAHL_CHAR_MAX - a->chars[i] <= 
addend[i]);
-               else
-                       carry[~i & 1] = (ZAHL_CHAR_MAX - a->chars[i] < 
addend[i]);
-               a->chars[i] += addend[i] + carry[i & 1];
-       }
-
-       while (carry[i & 1]) {
-               carry[~i & 1] = a->chars[i] == ZAHL_CHAR_MAX;
-               a->chars[i++] += 1;
-       }
-
-       if (a->used < i)
-               a->used = i;
        SET_SIGNUM(a, 1);
 }
 
-
 void
 zadd(z_t a, z_t b, z_t c)
 {
@@ -71,19 +72,15 @@ zadd(z_t a, z_t b, z_t c)
                SET(a, c);
        } else if (unlikely(zzero(c))) {
                SET(a, b);
-       } else if (unlikely(b == c)) {
-               zlsh(a, b, 1);
-       } else if (unlikely(znegative1(b, c))) {
-               if (znegative(b)) {
-                       if (znegative(c)) {
-                               zadd_unsigned(a, b, c);
-                               SET_SIGNUM(a, -zsignum(a));
-                       } else {
-                               zsub_unsigned(a, c, b);
-                       }
+       } else if (unlikely(znegative(b))) {
+               if (znegative(c)) {
+                       zadd_unsigned(a, b, c);
+                       SET_SIGNUM(a, -zsignum(a));
                } else {
-                       zsub_unsigned(a, b, c);
+                       zsub_unsigned(a, c, b);
                }
+       } else if (unlikely(znegative(c))) {
+               zsub_unsigned(a, b, c);
        } else {
                zadd_unsigned(a, b, c);
        }
diff --git a/src/zand.c b/src/zand.c
index 119f66c..a8f53a6 100644
--- a/src/zand.c
+++ b/src/zand.c
@@ -2,36 +2,37 @@
 #include "internals.h"
 
 
+static inline O2 void
+zand_impl(zahl_char_t *restrict a, const zahl_char_t *restrict b, size_t n)
+{
+       size_t i;
+       for (i = 0; i < n; i++)
+               a[i] &= b[i];
+}
+
 void
 zand(z_t a, z_t b, z_t c)
 {
-       size_t n;
-
-       if (unlikely(zzero1(b, c))) {
+       /* Yes, you are reading this right. It's an optimisation. */
+       if (unlikely(zzero(b))) {
+               SET_SIGNUM(a, 0);
+               return;
+       } else if (unlikely(zzero(c))) {
                SET_SIGNUM(a, 0);
                return;
        }
 
-       n = MIN(b->used, c->used);
-       while (n--)
-               if (b->chars[n] & c->chars[n])
-                       goto found_highest;
-       SET_SIGNUM(a, 0);
-       return;
+       a->used = MIN(b->used, c->used);
 
-found_highest:
-       a->used = ++n;
        if (a == b) {
-               while (n--)
-                       a->chars[n] &= c->chars[n];
+               zand_impl(a->chars, c->chars, a->used);
        } else if (unlikely(a == c)) {
-               while (n--)
-                       a->chars[n] &= b->chars[n];
+               zand_impl(a->chars, b->chars, a->used);
        } else {
                ENSURE_SIZE(a, a->used);
                zmemcpy(a->chars, c->chars, a->used);
-               while (n--)
-                       a->chars[n] &= b->chars[n];
+               zand_impl(a->chars, b->chars, a->used);
        }
-       SET_SIGNUM(a, zpositive1(b, c) * 2 - 1);
+
+       TRIM_AND_SIGN(a, zpositive1(b, c) * 2 - 1);
 }
diff --git a/src/zcmpi.c b/src/zcmpi.c
index 54abcd9..71391b1 100644
--- a/src/zcmpi.c
+++ b/src/zcmpi.c
@@ -3,7 +3,7 @@
 
 
 int
-zcmpi(z_t a, long long int b)
+zcmpi(z_t a, int64_t b)
 {
        if (unlikely(!b))
                return zsignum(a);
diff --git a/src/zcmpu.c b/src/zcmpu.c
index 40e33b6..8bba692 100644
--- a/src/zcmpu.c
+++ b/src/zcmpu.c
@@ -3,7 +3,7 @@
 
 
 int
-zcmpu(z_t a, unsigned long long int b)
+zcmpu(z_t a, uint64_t b)
 {
        if (unlikely(!b))
                return zsignum(a);
diff --git a/src/zor.c b/src/zor.c
index 4e7a2dc..25b69ea 100644
--- a/src/zor.c
+++ b/src/zor.c
@@ -2,16 +2,21 @@
 #include "internals.h"
 
 
+static inline O2 void
+zor_impl(zahl_char_t *restrict a, const zahl_char_t *restrict b, size_t n)
+{
+       size_t i;
+       for (i = 0; i < n; i++)
+               a[i] |= b[i];
+}
+
 void
 zor(z_t a, z_t b, z_t c)
 {
        size_t n, m;
 
        if (unlikely(zzero(b))) {
-               if (zzero(c))
-                       SET_SIGNUM(a, 0);
-               else
-                       SET(a, c);
+               SET(a, c);
                return;
        } else if (unlikely(zzero(c))) {
                SET(a, b);
@@ -24,23 +29,19 @@ zor(z_t a, z_t b, z_t c)
        ENSURE_SIZE(a, m);
 
        if (a == b) {
-               if (b->used < c->used)
+               if (a->used < c->used)
                        zmemcpy(a->chars + n, c->chars + n, m - n);
-               while (n--)
-                       a->chars[n] |= c->chars[n];
+               zor_impl(a->chars, c->chars, n);
        } else if (unlikely(a == c)) {
-               if (c->used < b->used)
+               if (a->used < b->used)
                        zmemcpy(a->chars + n, b->chars + n, m - n);
-               while (n--)
-                       a->chars[n] |= b->chars[n];
-       } else if (m == b->used) {
+               zor_impl(a->chars, b->chars, n);
+       } else  if (m == b->used) {
                zmemcpy(a->chars, b->chars, m);
-               while (n--)
-                       a->chars[n] |= c->chars[n];
+               zor_impl(a->chars, c->chars, n);
        } else {
                zmemcpy(a->chars, c->chars, m);
-               while (n--)
-                       a->chars[n] |= b->chars[n];
+               zor_impl(a->chars, b->chars, n);
        }
 
        a->used = m;
diff --git a/src/zrand.c b/src/zrand.c
index 64fa7ed..1bb1a90 100644
--- a/src/zrand.c
+++ b/src/zrand.c
@@ -26,7 +26,7 @@ zrand_get_random_bits(z_t r, size_t bits, int fd)
 
        for (n = chars * sizeof(zahl_char_t); n;) {
                read_just = read(fd, buf + read_total, n);
-               if (read_just < 0)
+               if (unlikely(read_just < 0))
                        libzahl_failure(errno);
                read_total += (size_t)read_just;
                n -= (size_t)read_just;
@@ -38,7 +38,7 @@ zrand_get_random_bits(z_t r, size_t bits, int fd)
 
        r->chars[chars - 1] &= mask;
        for (n = chars; n--;) {
-               if (r->chars[n]) {
+               if (likely(r->chars[n])) {
                        r->used = n + 1;
                        SET_SIGNUM(r, 1);
                        return;
@@ -71,7 +71,7 @@ zrand(z_t r, enum zranddev dev, enum zranddist dist, z_t n)
        }
 
        fd = open(pathname, O_RDONLY);
-       if (fd < 0)
+       if (unlikely(fd < 0))
                libzahl_failure(errno);
 
        switch (dist) {
@@ -91,7 +91,7 @@ zrand(z_t r, enum zranddev dev, enum zranddist dist, z_t n)
                bits = zbits(n);
                do
                        zrand_get_random_bits(r, bits, fd);
-               while (zcmpmag(r, n) > 0);
+               while (unlikely(zcmpmag(r, n) > 0));
                break;
 
        default:
diff --git a/src/zseti.c b/src/zseti.c
deleted file mode 100644
index e1116b6..0000000
--- a/src/zseti.c
+++ /dev/null
@@ -1,14 +0,0 @@
-/* See LICENSE file for copyright and license details. */
-#include "internals.h"
-
-
-void
-zseti(z_t a, long long int b)
-{
-       if (unlikely(b >= 0)) {
-               zsetu(a, (unsigned long long int)b);
-       } else {
-               zsetu(a, (unsigned long long int)-b);
-               SET_SIGNUM(a, -1);
-       }
-}
diff --git a/src/zsets.c b/src/zsets.c
index bddc9ec..e1506f6 100644
--- a/src/zsets.c
+++ b/src/zsets.c
@@ -3,6 +3,10 @@
 
 #include <ctype.h>
 
+#ifdef __GNUC__
+# pragma GCC diagnostic ignored "-Wswitch-default"
+#endif
+
 
 int
 zsets(z_t a, const char *str)
@@ -44,7 +48,7 @@ zsets(z_t a, const char *str)
                }
        }
 
-       if (neg)
+       if (unlikely(neg))
                SET_SIGNUM(a, -zsignum(a));
        return 0;
 }
diff --git a/src/zsetu.c b/src/zsetu.c
index 538ea37..42e8cec 100644
--- a/src/zsetu.c
+++ b/src/zsetu.c
@@ -1,17 +1,15 @@
 /* See LICENSE file for copyright and license details. */
 #include "internals.h"
 
-#define SIZE_MULTIPLE(fit, in)  ((sizeof(fit) + sizeof(in) - 1) / sizeof(in))
-
 
 void
-zsetu(z_t a, unsigned long long int b)
+zsetu(z_t a, uint64_t b)
 {
        if (!b) {
                SET_SIGNUM(a, 0);
                return;
        }
-       ENSURE_SIZE(a, SIZE_MULTIPLE(b, *(a->chars)));
+       ENSURE_SIZE(a, 1);
        SET_SIGNUM(a, 1);
        a->chars[0] = (zahl_char_t)b;
        a->used = 1;
diff --git a/src/zstr.c b/src/zstr.c
index dfcdf23..c82ec89 100644
--- a/src/zstr.c
+++ b/src/zstr.c
@@ -19,8 +19,8 @@ zstr(z_t a, char *b)
        size_t n, len, neg;
        char overridden = 0;
 
-       if (zzero(a)) {
-               if (!b && !(b = malloc(2)))
+       if (unlikely(zzero(a))) {
+               if (unlikely(!b) && unlikely(!(b = malloc(2))))
                        libzahl_failure(errno);
                b[0] = '0';
                b[1] = 0;
@@ -29,7 +29,7 @@ zstr(z_t a, char *b)
 
        n = zstr_length(a, 10);
 
-       if (!b && !(b = malloc(n + 1)))
+       if (unlikely(!b) && unlikely(!(b = malloc(n + 1))))
                libzahl_failure(errno);
 
        neg = znegative(a);
@@ -41,7 +41,7 @@ zstr(z_t a, char *b)
 
        for (;;) {
                zdivmod(num, rem, num, libzahl_const_1e19);
-               if (!zzero(num)) {
+               if (likely(!zzero(num))) {
                        sprintf(b + n, "%019llu", zzero(rem) ? 0ULL : (unsigned 
long long)(rem->chars[0]));
                        b[n + 19] = overridden;
                        overridden = b[n];
diff --git a/src/zsub.c b/src/zsub.c
index e769e32..c8057b5 100644
--- a/src/zsub.c
+++ b/src/zsub.c
@@ -2,13 +2,34 @@
 #include "internals.h"
 
 
-void
+static inline void
+zsub_impl(z_t a, z_t b, size_t n)
+{
+       zahl_char_t carry = 0, tcarry;
+       size_t i;
+
+       for (i = 0; i < n; i++) {
+               tcarry = carry ? (a->chars[i] <= b->chars[i]) : (a->chars[i] < 
b->chars[i]);
+               a->chars[i] -= b->chars[i];
+               a->chars[i] -= carry;
+               carry = tcarry;
+       }
+
+       if (carry) {
+               while (!a->chars[i])
+                       a->chars[i++] = ZAHL_CHAR_MAX;
+               if (a->chars[i] == 1)
+                       a->used--;
+               else
+                       a->chars[i] -= 1;
+       }
+}
+
+inline void
 zsub_unsigned(z_t a, z_t b, z_t c)
 {
-       zahl_char_t carry[] = {0, 0};
-       zahl_char_t *s;
-       size_t i, n;
        int magcmp;
+       size_t n;
 
        if (unlikely(zzero(b))) {
                zabs(a, c);
@@ -20,64 +41,51 @@ zsub_unsigned(z_t a, z_t b, z_t c)
        }
 
        magcmp = zcmpmag(b, c);
-       if (magcmp <= 0) {
-               if (magcmp == 0) {
+       if (unlikely(magcmp <= 0)) {
+               if (unlikely(magcmp == 0)) {
                        SET_SIGNUM(a, 0);
                        return;
                }
                n = MIN(b->used, c->used);
                if (a == b) {
                        zset(libzahl_tmp_sub, b);
-                       s = libzahl_tmp_sub->chars;
+                       SET(a, c);
+                       zsub_impl(a, libzahl_tmp_sub, n);
                } else {
-                       s = b->chars;
+                       SET(a, c);
+                       zsub_impl(a, b, n);
                }
-               SET(a, c);
        } else {
                n = MIN(b->used, c->used);
-               if (a == c) {
+               if (unlikely(a == c)) {
                        zset(libzahl_tmp_sub, c);
-                       s = libzahl_tmp_sub->chars;
+                       SET(a, b);
+                       zsub_impl(a, libzahl_tmp_sub, n);
                } else {
-                       s = c->chars;
+                       SET(a, b);
+                       zsub_impl(a, c, n);
                }
-               SET(a, b);
-       }
-
-       for (i = 0; i < n; i++) {
-               carry[~i & 1] = carry[i & 1] ? (a->chars[i] <= s[i]) : 
(a->chars[i] < s[i]);
-               a->chars[i] -= s[i];
-               a->chars[i] -= carry[i & 1];
        }
 
-       if (carry[i & 1]) {
-               while (!a->chars[i])
-                       a->chars[i++] = ZAHL_CHAR_MAX;
-               a->chars[i] -= 1;
-       }
        SET_SIGNUM(a, magcmp);
 }
 
 void
 zsub(z_t a, z_t b, z_t c)
 {
-       if (unlikely(b == c)) {
-               SET_SIGNUM(a, 0);
-       } else if (unlikely(zzero(b))) {
+       if (unlikely(zzero(b))) {
                zneg(a, c);
        } else if (unlikely(zzero(c))) {
                SET(a, b);
-       } else if (unlikely(znegative1(b, c))) {
-               if (znegative(b)) {
-                       if (znegative(c)) {
-                               zsub_unsigned(a, c, b);
-                       } else {
-                               zadd_unsigned(a, b, c);
-                               SET_SIGNUM(a, -zsignum(a));
-                       }
+       } else if (unlikely(znegative(b))) {
+               if (znegative(c)) {
+                       zsub_unsigned(a, c, b);
                } else {
                        zadd_unsigned(a, b, c);
+                       SET_SIGNUM(a, -zsignum(a));
                }
+       } else if (znegative(c)) {
+               zadd_unsigned(a, b, c);
        } else {
                zsub_unsigned(a, b, c);
        }
diff --git a/src/ztrunc.c b/src/ztrunc.c
index 91d2a92..e1a330c 100644
--- a/src/ztrunc.c
+++ b/src/ztrunc.c
@@ -29,7 +29,5 @@ ztrunc(z_t a, z_t b, size_t bits)
                a->chars[a->used - 1] &= mask;
        }
 
-       TRIM(a);
-       if (!a->used)
-               SET_SIGNUM(a, 0);
+       TRIM_AND_ZERO(a);
 }
diff --git a/src/zxor.c b/src/zxor.c
index 876591a..f973774 100644
--- a/src/zxor.c
+++ b/src/zxor.c
@@ -2,16 +2,21 @@
 #include "internals.h"
 
 
+static inline void
+zxor_impl(zahl_char_t *restrict a, const zahl_char_t *restrict b, size_t n)
+{
+       size_t i;
+       for (i = 0; i < n; i++)
+               a[i] ^= b[i];
+}
+
 void
 zxor(z_t a, z_t b, z_t c)
 {
        size_t n, m;
 
        if (unlikely(zzero(b))) {
-               if (zzero(c))
-                       SET_SIGNUM(a, 0);
-               else
-                       SET(a, c);
+               SET(a, c);
                return;
        } else if (unlikely(zzero(c))) {
                SET(a, b);
@@ -24,29 +29,21 @@ zxor(z_t a, z_t b, z_t c)
        ENSURE_SIZE(a, m);
 
        if (a == b) {
-               if (b->used < c->used)
+               if (a->used < c->used)
                        zmemcpy(a->chars + n, c->chars + n, m - n);
-               while (n--)
-                       a->chars[n] ^= c->chars[n];
+               zxor_impl(a->chars, c->chars, n);
        } else if (unlikely(a == c)) {
-               if (c->used < b->used)
+               if (a->used < b->used)
                        zmemcpy(a->chars + n, b->chars + n, m - n);
-               while (n--)
-                       a->chars[n] ^= b->chars[n];
+               zxor_impl(a->chars, b->chars, n);
        } else if (m == b->used) {
                zmemcpy(a->chars, b->chars, m);
-               while (n--)
-                       a->chars[n] ^= c->chars[n];
+               zxor_impl(a->chars, c->chars, n);
        } else {
                zmemcpy(a->chars, c->chars, m);
-               while (n--)
-                       a->chars[n] ^= b->chars[n];
+               zxor_impl(a->chars, b->chars, n);
        }
 
        a->used = m;
-       TRIM(a);
-       if (a->used)
-               SET_SIGNUM(a, 1 - 2 * ((zsignum(b) ^ zsignum(c)) < 0));
-       else
-               SET_SIGNUM(a, 0);
+       TRIM_AND_SIGN(a, 1 - 2 * ((zsignum(b) ^ zsignum(c)) < 0));
 }
diff --git a/test.c b/test.c
index c4f146c..5517d9e 100644
--- a/test.c
+++ b/test.c
@@ -304,7 +304,7 @@ main(void)
        zneg(b, _1);
        zneg(c, _2);
 
-       assert((zadd_unsigned(a, b, c), zcmp(a, _3)), == 0);
+       assert((zadd_unsigned(a, _1, _2), zcmp(a, _3)), == 0);
        assert((zadd_unsigned(a, b, c), zcmp(a, _3)), == 0);
        assert((zadd_unsigned(a, b, _2), zcmp(a, _3)), == 0);
        assert((zadd_unsigned(a, _1, c), zcmp(a, _3)), == 0);
diff --git a/zahl.h b/zahl.h
index d051bc1..37711b2 100644
--- a/zahl.h
+++ b/zahl.h
@@ -4,12 +4,23 @@
 /* Caution: Do not use libzahl for cryptographic applications, use a 
specialised library. */
 
 #ifndef ZAHL_H
-#define ZAHL_H
+#define ZAHL_H 1
 
 
 #include <stddef.h>
 #include <setjmp.h>
 #include <stdint.h>
+#include <limits.h>
+
+
+
+#ifndef ZAHL_INLINE
+# if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+#  define ZAHL_INLINE  static inline
+# else
+#  define ZAHL_INLINE  static
+# endif
+#endif
 
 
 
@@ -19,22 +30,38 @@ typedef uint64_t zahl_char_t;
 /* This structure should be considered opaque. */
 typedef struct {
         int sign;
+#if INT_MAX != LONG_MAX
+       int padding__;
+#endif
         size_t used;
         size_t alloced;
         zahl_char_t *chars;
 } z_t[1];
 
 
-enum zprimality { NONPRIME = 0, PROBABLY_PRIME, PRIME };
-enum zranddev { FAST_RANDOM = 0, SECURE_RANDOM };
-enum zranddist { QUASIUNIFORM = 0, UNIFORM };
+
+enum zprimality {
+       NONPRIME = 0,                   /* The number is definitely composite. 
*/
+       PROBABLY_PRIME,                 /* The number is probably prime. */
+       PRIME                           /* The number is definitely prime. */
+};
+
+enum zranddev {
+       FAST_RANDOM = 0,                /* Random numbers are generated 
directly from /dev/urandom. */
+       SECURE_RANDOM                   /* Random numbers are generated 
directly from /dev/random. */
+};
+
+enum zranddist {
+       QUASIUNIFORM = 0,               /* Almost uniformly random, per the 
usual recommendation. */
+       UNIFORM                         /* Actually uniformly random. */
+};
 
 enum zerror {
-       ZERROR_ERRNO_SET = 0,          /* Please refer to errno. */
-       ZERROR_0_POW_0,                /* Indeterminate form: 0:th power of 0. 
(Translatable to EDOM.) */
-       ZERROR_0_DIV_0,                /* Indeterminate form: 0 divided by 0. 
(Translatable to EDOM.) */
-       ZERROR_DIV_0,                  /* Undefined result: Division by 0. 
(Translatable to EDOM.) */
-       ZERROR_NEGATIVE                /* Argument must be non-negative. 
(Translatable to EDOM or EINVAL.) */
+       ZERROR_ERRNO_SET = 0,           /* Please refer to errno. */
+       ZERROR_0_POW_0,                 /* Indeterminate form: 0:th power of 0. 
(Translatable to EDOM.) */
+       ZERROR_0_DIV_0,                 /* Indeterminate form: 0 divided by 0. 
(Translatable to EDOM.) */
+       ZERROR_DIV_0,                   /* Undefined result: Division by 0. 
(Translatable to EDOM.) */
+       ZERROR_NEGATIVE                 /* Argument must be non-negative. 
(Translatable to EDOM or EINVAL.) */
 };
 
 
@@ -44,75 +71,83 @@ enum zerror {
 
 /* Library initialisation and destruction. */
 
-void zsetup(jmp_buf);                  /* Prepare libzahl for use. */
-void zunsetup(void);                   /* Free resources used by libzahl */
+void zsetup(jmp_buf);                   /* Prepare libzahl for use. */
+void zunsetup(void);                    /* Free resources used by libzahl */
 
 
 /* Memory functions. */
 
-void zfree(z_t);                       /* Free resources in a. */
-size_t zsave(z_t, void *);             /* Store a into b (if !!b), and return 
number of written bytes. */
-size_t zload(z_t, const void *);       /* Restore a from b, and return number 
of read bytes. */
+ZAHL_INLINE void zinit(z_t);            /* Prepare a for use. */
+ZAHL_INLINE void zswap(z_t, z_t);       /* (a, b) := (b, a) */
+void zfree(z_t);                        /* Free resources in a. */
+size_t zsave(z_t, void *);              /* Store a into b (if !!b), and return 
number of written bytes. */
+size_t zload(z_t, const void *);        /* Restore a from b, and return number 
of read bytes. */
 
 
 /* Assignment functions. */
 
-/* a := b */
-void zset(z_t, z_t);
-void zseti(z_t, long long int);
-void zsetu(z_t, unsigned long long int);
-
+void zset(z_t, z_t);                    /* a := b */
+void zsetu(z_t, uint64_t);              /* a := b */
+ZAHL_INLINE void zseti(z_t, int64_t);   /* a := b */
 
 /* Comparison functions. */
 
-/* signum (a - b) */
-int zcmp(z_t, z_t);
-int zcmpi(z_t, long long int);
-int zcmpu(z_t, unsigned long long int);
-
-int zcmpmag(z_t, z_t);                 /* signum (|a| - |b|) */
+int zcmp(z_t, z_t);                     /* signum (a - b) */
+int zcmpu(z_t, uint64_t);               /* signum (a - b) */
+int zcmpi(z_t, int64_t);                /* signum (a - b) */
+int zcmpmag(z_t, z_t);                  /* signum (|a| - |b|) */
 
 
 /* Arithmetic functions. */
 
-void zadd(z_t, z_t, z_t);              /* a := b + c */
-void zsub(z_t, z_t, z_t);              /* a := b - c */
-void zmul(z_t, z_t, z_t);              /* a := b * c */
-void zmodmul(z_t, z_t, z_t, z_t);      /* a := (b * c) % d */
-void zdiv(z_t, z_t, z_t);              /* a := b / c */
-void zdivmod(z_t, z_t, z_t, z_t);      /* a := c / d, b = c % d */
-void zmod(z_t, z_t, z_t);              /* a := b % c */
-void zsqr(z_t, z_t);                   /* a := b² */
-void zmodsqr(z_t, z_t, z_t);           /* a := b² % c */
-void zpow(z_t, z_t, z_t);              /* a := b ↑ c */
-void zmodpow(z_t, z_t, z_t, z_t);      /* a := (b ↑ c) % d */
+ZAHL_INLINE void zabs(z_t, z_t);        /* a := |b| */
+ZAHL_INLINE void zneg(z_t, z_t);        /* a := -b */
+void zadd(z_t, z_t, z_t);               /* a := b + c */
+void zsub(z_t, z_t, z_t);               /* a := b - c */
+void zmul(z_t, z_t, z_t);               /* a := b * c */
+void zmodmul(z_t, z_t, z_t, z_t);       /* a := (b * c) % d */
+void zdiv(z_t, z_t, z_t);               /* a := b / c */
+void zdivmod(z_t, z_t, z_t, z_t);       /* a := c / d, b = c % d */
+void zmod(z_t, z_t, z_t);               /* a := b % c */
+void zsqr(z_t, z_t);                    /* a := b² */
+void zmodsqr(z_t, z_t, z_t);            /* a := b² % c */
+void zpow(z_t, z_t, z_t);               /* a := b ↑ c */
+void zmodpow(z_t, z_t, z_t, z_t);       /* a := (b ↑ c) % d */
 void zpowu(z_t, z_t, unsigned long long int);
 void zmodpowu(z_t, z_t, unsigned long long int, z_t);
 
 /* These are used internally and may be removed in a future version. */
-void zadd_unsigned(z_t, z_t, z_t);     /* a := |b| + |c| */
-void zsub_unsigned(z_t, z_t, z_t);     /* a := |b| - |c| */
+void zadd_unsigned(z_t, z_t, z_t);      /* a := |b| + |c| */
+void zsub_unsigned(z_t, z_t, z_t);      /* a := |b| - |c| */
 
 
 /* Bitwise operations. */
 
-void zand(z_t, z_t, z_t);              /* a := b & c */
-void zor(z_t, z_t, z_t);               /* a := b | c */
-void zxor(z_t, z_t, z_t);              /* a := b ^ c */
-void znot(z_t, z_t);                   /* a := ~b */
-void zlsh(z_t, z_t, size_t);           /* a := b << c */
-void zrsh(z_t, z_t, size_t);           /* a := b >> c */
-void ztrunc(z_t, z_t, size_t);         /* a := b & ((1 << c) - 1) */
-int zbtest(z_t, size_t);               /* (a >> b) & 1 */
-void zsplit(z_t, z_t, z_t, size_t);    /* a := c >> d, b := c - (a << d) */
-
-/* If d > 0: a := b | (1 << c), f d = 0: a := b & ~(1 << c), if d < 0: a := b 
^ (1 << c) */
+void zand(z_t, z_t, z_t);               /* a := b & c */
+void zor(z_t, z_t, z_t);                /* a := b | c */
+void zxor(z_t, z_t, z_t);               /* a := b ^ c */
+void znot(z_t, z_t);                    /* a := ~b */
+void zlsh(z_t, z_t, size_t);            /* a := b << c */
+void zrsh(z_t, z_t, size_t);            /* a := b >> c */
+void ztrunc(z_t, z_t, size_t);          /* a := b & ((1 << c) - 1) */
+int zbtest(z_t, size_t);                /* (a >> b) & 1 */
+void zsplit(z_t, z_t, z_t, size_t);     /* a := c >> d, b := c - (a << d) */
+ZAHL_INLINE size_t zlsb(z_t);           /* Index of first set bit, SIZE_MAX if 
none are set. */
+ZAHL_INLINE size_t zbits(z_t);          /* ⌊log₂ |a|⌋ + 1, 1 if a = 0 */
+
+/* If d > 0: a := b | (1 << c), if d = 0: a := b & ~(1 << c), if d < 0: a := b 
^ (1 << c) */
 void zbset(z_t, z_t, size_t, int);
 
 
 /* Number theory. */
 
-void zgcd(z_t, z_t, z_t);              /* a := gcd(b, c) */
+ZAHL_INLINE int zeven(z_t);             /* Is a even? */
+ZAHL_INLINE int zodd(z_t);              /* Is a odd? */
+ZAHL_INLINE int zeven_nonzero(z_t);     /* Is a even? Assumes a ≠ 0. */
+ZAHL_INLINE int zodd_nonzero(z_t);      /* Is a odd? Assumes a ≠ 0. */
+ZAHL_INLINE int zzero(z_t);             /* Is a zero? */
+ZAHL_INLINE int zsignum(z_t);           /* a/|a|, 0 if a is zero. */
+void zgcd(z_t, z_t, z_t);               /* a := gcd(b, c) */
 
 /* NONPRIME if b ∉ ℙ, PROBABLY_PRIME, if b ∈ ℙ with (1 − 4↑−c) 
certainty, 2 if PRIME ∈ ℙ.
  * If NONPRIME is returned the witness of b's compositeness is stored in a. */
@@ -127,8 +162,8 @@ void zrand(z_t, enum zranddev, enum zranddist, z_t);
 
 /* String conversion. */
 
-char *zstr(z_t, char *);               /* Write a in decimal onto b. */
-int zsets(z_t, const char *);          /* a := b */
+char *zstr(z_t, char *);                /* Write a in decimal onto b. */
+int zsets(z_t, const char *);           /* a := b */
 
 /* Length of a in radix b. */
 size_t zstr_length(z_t, unsigned long long int);
@@ -136,85 +171,96 @@ size_t zstr_length(z_t, unsigned long long int);
 
 /* Error handling functions. */
 
-enum zerror zerror(const char **);     /* Return the current error code, and 
unless a is 0, a description in *a. */
-void zperror(const char *);            /* Identical to perror(3p) except it 
supports libzahl errors. */
+enum zerror zerror(const char **);      /* Return the current error code, and 
unless !a, a description in *a. */
+void zperror(const char *);             /* Identical to perror(3p) except it 
supports libzahl errors. */
 
 
-/* Inline functions. */
 
-static inline void zinit(z_t a)        { a->alloced = 0; a->chars = 0; }       
     /* Prepare a for use. */
-static inline void zswap(z_t a, z_t b) { z_t t; *t = *a; *a = *b; *b = *t; }   
     /* (a, b) := (b, a) */
-static inline int zeven(z_t a)         { return !a->sign || !(a->chars[0] & 
1); }   /* Is a even? */
-static inline int zodd(z_t a)          { return a->sign && (a->chars[0] & 1); 
}     /* Is a odd? */
-static inline int zeven_nonzero(z_t a) { return !(a->chars[0] & 1); }          
     /* Is a even? Assumes a ≠ 0. */
-static inline int zodd_nonzero(z_t a)  { return (a->chars[0] & 1); }           
     /* Is a odd? Assumes a ≠ 0. */
-static inline int zzero(z_t a)         { return !a->sign; }                    
     /* Is a zero? */
-static inline int zsignum(z_t a)       { return a->sign; }                     
     /* a/|a|, 0 if a is zero. */
-static inline void zabs(z_t a, z_t b)  { if (a != b) zset(a, b); a->sign = 
!!a->sign; }  /* a := |b| */
-static inline void zneg(z_t a, z_t b)  { if (a != b) zset(a, b); a->sign = 
-a->sign; }   /* a := -b */
+/* ------------------------------- Implementations of inline functions. 
------------------------------- */
 
 
-/* Bitwise inline functions. */
+#if defined(__GNUC__) || defined(__clang__)
+# define ZAHL_UNLIKELY(expr)  __builtin_expect(!!(expr), 0)
+# define ZAHL_LIKELY(expr)    __builtin_expect(!!(expr), 1)
+#else
+# define ZAHL_UNLIKELY(expr)  (expr)
+# define ZAHL_LIKELY(expr)    (expr)
+#endif
 
 
-/* Index of first set bit, SIZE_MAX if none are set. */
-#if defined(__GNUC__) || defined(__clang__)
-static inline size_t
+ZAHL_INLINE void zinit(z_t a)         { a->alloced = 0; a->chars = 0; }
+ZAHL_INLINE void zswap(z_t a, z_t b)  { z_t t; *t = *a; *a = *b; *b = *t; }
+ZAHL_INLINE int zeven(z_t a)          { return !a->sign || !(a->chars[0] & 1); 
}
+ZAHL_INLINE int zodd(z_t a)           { return a->sign && (a->chars[0] & 1); }
+ZAHL_INLINE int zeven_nonzero(z_t a)  { return !(a->chars[0] & 1); }
+ZAHL_INLINE int zodd_nonzero(z_t a)   { return (a->chars[0] & 1); }
+ZAHL_INLINE int zzero(z_t a)          { return !a->sign; }
+ZAHL_INLINE int zsignum(z_t a)        { return a->sign; }
+ZAHL_INLINE void zabs(z_t a, z_t b)   { if (a != b) zset(a, b); a->sign = 
!!a->sign; }
+ZAHL_INLINE void zneg(z_t a, z_t b)   { if (a != b) zset(a, b); a->sign = 
-a->sign; }
+
+
+ZAHL_INLINE void
+zseti(z_t a, int64_t b)
+{
+       if (ZAHL_UNLIKELY(b >= 0)) {
+               zsetu(a, (uint64_t)b);
+       } else {
+               zsetu(a, (uint64_t)-b);
+               a->sign = -1;
+       }
+}
+
+
+ZAHL_INLINE size_t
 zlsb(z_t a)
 {
+#if defined(__GNUC__) || defined(__clang__)
        size_t i = 0;
-       if (__builtin_expect(zzero(a), 0))
+       if (ZAHL_UNLIKELY(zzero(a)))
                return SIZE_MAX;
        for (; !a->chars[i]; i++);
        i *= 8 * sizeof(zahl_char_t);
        i += (size_t)__builtin_ctzll(a->chars[i]);
        return i;
-}
 #else
-static inline size_t
-zlsb(z_t a)
-{
        size_t i = 0;
        zahl_char_t x;
-       if (zzero(a))
+       if (ZAHL_UNLIKELY(zzero(a)))
                return SIZE_MAX;
        for (; !a->chars[i]; i++);
        i *= 8 * sizeof(zahl_char_t);
        x = ~(a->chars[i]);
        for (; x & 1; x >>= 1, i++);
        return i;
-}
 #endif
+}
 
 
-/* ⌊log₂ |a|⌋ + 1, 1 if a = 0 */
-#if defined(__GNUC__) || defined(__clang__)
-static inline size_t
+ZAHL_INLINE size_t
 zbits(z_t a)
 {
+#if defined(__GNUC__) || defined(__clang__)
        size_t rc;
-       if (__builtin_expect(zzero(a), 0))
+       if (ZAHL_UNLIKELY(zzero(a)))
                return 1;
        while (!a->chars[a->used - 1])  a->used--; /* TODO should not be 
necessary */
        rc = a->used * 8 * sizeof(zahl_char_t);
        rc -= (size_t)__builtin_clzll(a->chars[a->used - 1]);
        return rc;
-}
 #else
-static inline size_t
-zbits(z_t a)
-{
        size_t rc;
        zahl_char_t x;
-       if (zzero(a))
+       if (ZAHL_UNLIKELY(zzero(a)))
                return 1;
        while (!a->chars[a->used - 1])  a->used--; /* TODO should not be 
necessary */
        x = a->chars[a->used - 1];
        rc = (a->used - 1) * 8 * sizeof(zahl_char_t);
        for (; x; x >>= 1, rc++);
        return rc;
-}
 #endif
+}
+
 
 
 #endif

Reply via email to