The branch main has been updated by jhb:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=854d066251a9e1731993416b1934e2002a29d395

commit 854d066251a9e1731993416b1934e2002a29d395
Author:     John Baldwin <j...@freebsd.org>
AuthorDate: 2022-10-28 20:36:13 +0000
Commit:     John Baldwin <j...@freebsd.org>
CommitDate: 2022-10-28 20:36:13 +0000

    wg: Trim compat shims for versions older than current stable/13.
    
    Reviewed by:    kevans, markj, emaste
    Sponsored by:   The FreeBSD Foundation
    Differential Revision:  https://reviews.freebsd.org/D36913
---
 sys/dev/wg/compat.h    |  101 ---
 sys/dev/wg/crypto.h    |   67 +-
 sys/dev/wg/wg_crypto.c | 1645 ++----------------------------------------------
 3 files changed, 52 insertions(+), 1761 deletions(-)

diff --git a/sys/dev/wg/compat.h b/sys/dev/wg/compat.h
index 101a771579d9..29f6ea92a50e 100644
--- a/sys/dev/wg/compat.h
+++ b/sys/dev/wg/compat.h
@@ -9,110 +9,9 @@
 
 #include <sys/param.h>
 
-#if (__FreeBSD_version < 1400036 && __FreeBSD_version >= 1400000) || 
__FreeBSD_version < 1300519
-#define COMPAT_NEED_CHACHA20POLY1305_MBUF
-#endif
-
-#if __FreeBSD_version < 1400048
-#define COMPAT_NEED_CHACHA20POLY1305
-#endif
-
-#if __FreeBSD_version < 1400049
-#define COMPAT_NEED_CURVE25519
-#endif
-
-#if __FreeBSD_version < 0x7fffffff /* TODO: update this when implemented */
 #define COMPAT_NEED_BLAKE2S
-#endif
 
 #if __FreeBSD_version < 1400059
 #include <sys/sockbuf.h>
 #define sbcreatecontrol(a, b, c, d, e) sbcreatecontrol(a, b, c, d)
 #endif
-
-#if __FreeBSD_version < 1300507
-#include <sys/smp.h>
-#include <sys/gtaskqueue.h>
-
-struct taskqgroup_cpu {
-       LIST_HEAD(, grouptask)  tgc_tasks;
-       struct gtaskqueue       *tgc_taskq;
-       int     tgc_cnt;
-       int     tgc_cpu;
-};
-
-struct taskqgroup {
-       struct taskqgroup_cpu tqg_queue[MAXCPU];
-       /* Other members trimmed from compat. */
-};
-
-static inline void taskqgroup_drain_all(struct taskqgroup *tqg)
-{
-       struct gtaskqueue *q;
-
-       for (int i = 0; i < mp_ncpus; i++) {
-               q = tqg->tqg_queue[i].tgc_taskq;
-               if (q == NULL)
-                       continue;
-               gtaskqueue_drain_all(q);
-       }
-}
-#endif
-
-#if __FreeBSD_version < 1300000
-#define VIMAGE
-
-#include <sys/types.h>
-#include <sys/limits.h>
-#include <sys/endian.h>
-#include <sys/socket.h>
-#include <sys/libkern.h>
-#include <sys/malloc.h>
-#include <sys/proc.h>
-#include <sys/lock.h>
-#include <sys/socketvar.h>
-#include <sys/protosw.h>
-#include <net/vnet.h>
-#include <net/if.h>
-#include <net/if_var.h>
-#include <vm/uma.h>
-
-#define taskqgroup_attach(a, b, c, d, e, f) taskqgroup_attach((a), (b), (c), 
-1, (f))
-#define taskqgroup_attach_cpu(a, b, c, d, e, f, g) taskqgroup_attach_cpu((a), 
(b), (c), (d), -1, (g))
-
-#undef NET_EPOCH_ENTER
-#define NET_EPOCH_ENTER(et) NET_EPOCH_ENTER_ET(et)
-#undef NET_EPOCH_EXIT
-#define NET_EPOCH_EXIT(et) NET_EPOCH_EXIT_ET(et)
-#define NET_EPOCH_CALL(f, c) epoch_call(net_epoch_preempt, (c), (f))
-#define NET_EPOCH_ASSERT() MPASS(in_epoch(net_epoch_preempt))
-
-#undef atomic_load_ptr
-#define atomic_load_ptr(p) (*(volatile __typeof(*p) *)(p))
-
-#endif
-
-#if __FreeBSD_version < 1202000
-static inline uint32_t arc4random_uniform(uint32_t bound)
-{
-       uint32_t ret, max_mod_bound;
-
-       if (bound < 2)
-               return 0;
-
-       max_mod_bound = (1 + ~bound) % bound;
-
-       do {
-               ret = arc4random();
-       } while (ret < max_mod_bound);
-
-       return ret % bound;
-}
-
-typedef void callout_func_t(void *);
-
-#ifndef CSUM_SND_TAG
-#define CSUM_SND_TAG 0x80000000
-#endif
-
-#endif
diff --git a/sys/dev/wg/crypto.h b/sys/dev/wg/crypto.h
index 2115039321b1..ff7b39354749 100644
--- a/sys/dev/wg/crypto.h
+++ b/sys/dev/wg/crypto.h
@@ -8,6 +8,9 @@
 #define _WG_CRYPTO
 
 #include <sys/param.h>
+#include <sys/endian.h>
+#include <crypto/chacha20_poly1305.h>
+#include <crypto/curve25519.h>
 
 struct mbuf;
 
@@ -20,36 +23,6 @@ enum chacha20poly1305_lengths {
        CHACHA20POLY1305_AUTHTAG_SIZE = 16
 };
 
-#ifdef COMPAT_NEED_CHACHA20POLY1305
-void
-chacha20poly1305_encrypt(uint8_t *dst, const uint8_t *src, const size_t 
src_len,
-                        const uint8_t *ad, const size_t ad_len,
-                        const uint64_t nonce,
-                        const uint8_t key[CHACHA20POLY1305_KEY_SIZE]);
-
-bool
-chacha20poly1305_decrypt(uint8_t *dst, const uint8_t *src, const size_t 
src_len,
-                        const uint8_t *ad, const size_t ad_len,
-                        const uint64_t nonce,
-                        const uint8_t key[CHACHA20POLY1305_KEY_SIZE]);
-
-void
-xchacha20poly1305_encrypt(uint8_t *dst, const uint8_t *src,
-                         const size_t src_len, const uint8_t *ad,
-                         const size_t ad_len,
-                         const uint8_t nonce[XCHACHA20POLY1305_NONCE_SIZE],
-                         const uint8_t key[CHACHA20POLY1305_KEY_SIZE]);
-
-bool
-xchacha20poly1305_decrypt(uint8_t *dst, const uint8_t *src,
-                         const size_t src_len,  const uint8_t *ad,
-                         const size_t ad_len,
-                         const uint8_t nonce[XCHACHA20POLY1305_NONCE_SIZE],
-                         const uint8_t key[CHACHA20POLY1305_KEY_SIZE]);
-#else
-#include <sys/endian.h>
-#include <crypto/chacha20_poly1305.h>
-
 static inline void
 chacha20poly1305_encrypt(uint8_t *dst, const uint8_t *src, const size_t 
src_len,
                         const uint8_t *ad, const size_t ad_len,
@@ -95,7 +68,6 @@ xchacha20poly1305_decrypt(uint8_t *dst, const uint8_t *src,
 {
        return (xchacha20_poly1305_decrypt(dst, src, src_len, ad, ad_len, 
nonce, key));
 }
-#endif
 
 int
 chacha20poly1305_encrypt_mbuf(struct mbuf *, const uint64_t nonce,
@@ -146,37 +118,4 @@ static inline void blake2s(uint8_t *out, const uint8_t 
*in, const uint8_t *key,
 }
 #endif
 
-#ifdef COMPAT_NEED_CURVE25519
-enum curve25519_lengths {
-        CURVE25519_KEY_SIZE = 32
-};
-
-bool curve25519(uint8_t mypublic[static CURVE25519_KEY_SIZE],
-               const uint8_t secret[static CURVE25519_KEY_SIZE],
-               const uint8_t basepoint[static CURVE25519_KEY_SIZE]);
-
-static inline bool
-curve25519_generate_public(uint8_t pub[static CURVE25519_KEY_SIZE],
-                          const uint8_t secret[static CURVE25519_KEY_SIZE])
-{
-       static const uint8_t basepoint[CURVE25519_KEY_SIZE] = { 9 };
-
-       return curve25519(pub, secret, basepoint);
-}
-
-static inline void curve25519_clamp_secret(uint8_t secret[static 
CURVE25519_KEY_SIZE])
-{
-        secret[0] &= 248;
-        secret[31] = (secret[31] & 127) | 64;
-}
-
-static inline void curve25519_generate_secret(uint8_t 
secret[CURVE25519_KEY_SIZE])
-{
-       arc4random_buf(secret, CURVE25519_KEY_SIZE);
-       curve25519_clamp_secret(secret);
-}
-#else
-#include <crypto/curve25519.h>
-#endif
-
 #endif
diff --git a/sys/dev/wg/wg_crypto.c b/sys/dev/wg/wg_crypto.c
index 29d9487d647f..53441ef25b40 100644
--- a/sys/dev/wg/wg_crypto.c
+++ b/sys/dev/wg/wg_crypto.c
@@ -12,776 +12,38 @@
 
 #include "crypto.h"
 
-#ifndef COMPAT_NEED_CHACHA20POLY1305_MBUF
 static crypto_session_t chacha20_poly1305_sid;
-#endif
 
+#ifdef COMPAT_NEED_BLAKE2S
 #ifndef ARRAY_SIZE
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 #endif
-#ifndef noinline
-#define noinline __attribute__((noinline))
-#endif
-#ifndef __aligned
-#define __aligned(x) __attribute__((aligned(x)))
-#endif
 #ifndef DIV_ROUND_UP
 #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
 #endif
 
 #define le32_to_cpup(a) le32toh(*(a))
-#define le64_to_cpup(a) le64toh(*(a))
 #define cpu_to_le32(a) htole32(a)
-#define cpu_to_le64(a) htole64(a)
 
-static inline __unused uint32_t get_unaligned_le32(const uint8_t *a)
-{
-       uint32_t l;
-       __builtin_memcpy(&l, a, sizeof(l));
-       return le32_to_cpup(&l);
-}
-static inline __unused uint64_t get_unaligned_le64(const uint8_t *a)
-{
-       uint64_t l;
-       __builtin_memcpy(&l, a, sizeof(l));
-       return le64_to_cpup(&l);
-}
-static inline __unused void put_unaligned_le32(uint32_t s, uint8_t *d)
-{
-       uint32_t l = cpu_to_le32(s);
-       __builtin_memcpy(d, &l, sizeof(l));
-}
-static inline __unused void cpu_to_le32_array(uint32_t *buf, unsigned int 
words)
+static inline void cpu_to_le32_array(uint32_t *buf, unsigned int words)
 {
         while (words--) {
                *buf = cpu_to_le32(*buf);
                ++buf;
        }
 }
-static inline __unused void le32_to_cpu_array(uint32_t *buf, unsigned int 
words)
+static inline void le32_to_cpu_array(uint32_t *buf, unsigned int words)
 {
         while (words--) {
                *buf = le32_to_cpup(buf);
                ++buf;
         }
 }
-static inline __unused uint32_t rol32(uint32_t word, unsigned int shift)
-{
-        return (word << (shift & 31)) | (word >> ((-shift) & 31));
-}
-static inline __unused uint32_t ror32(uint32_t word, unsigned int shift)
+static inline uint32_t ror32(uint32_t word, unsigned int shift)
 {
        return (word >> (shift & 31)) | (word << ((-shift) & 31));
 }
 
-#if defined(COMPAT_NEED_CHACHA20POLY1305) || 
defined(COMPAT_NEED_CHACHA20POLY1305_MBUF)
-static void xor_cpy(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, 
size_t len)
-{
-       size_t i;
-
-       for (i = 0; i < len; ++i)
-               dst[i] = src1[i] ^ src2[i];
-}
-
-#define QUARTER_ROUND(x, a, b, c, d) ( \
-       x[a] += x[b], \
-       x[d] = rol32((x[d] ^ x[a]), 16), \
-       x[c] += x[d], \
-       x[b] = rol32((x[b] ^ x[c]), 12), \
-       x[a] += x[b], \
-       x[d] = rol32((x[d] ^ x[a]), 8), \
-       x[c] += x[d], \
-       x[b] = rol32((x[b] ^ x[c]), 7) \
-)
-
-#define C(i, j) (i * 4 + j)
-
-#define DOUBLE_ROUND(x) ( \
-       /* Column Round */ \
-       QUARTER_ROUND(x, C(0, 0), C(1, 0), C(2, 0), C(3, 0)), \
-       QUARTER_ROUND(x, C(0, 1), C(1, 1), C(2, 1), C(3, 1)), \
-       QUARTER_ROUND(x, C(0, 2), C(1, 2), C(2, 2), C(3, 2)), \
-       QUARTER_ROUND(x, C(0, 3), C(1, 3), C(2, 3), C(3, 3)), \
-       /* Diagonal Round */ \
-       QUARTER_ROUND(x, C(0, 0), C(1, 1), C(2, 2), C(3, 3)), \
-       QUARTER_ROUND(x, C(0, 1), C(1, 2), C(2, 3), C(3, 0)), \
-       QUARTER_ROUND(x, C(0, 2), C(1, 3), C(2, 0), C(3, 1)), \
-       QUARTER_ROUND(x, C(0, 3), C(1, 0), C(2, 1), C(3, 2)) \
-)
-
-#define TWENTY_ROUNDS(x) ( \
-       DOUBLE_ROUND(x), \
-       DOUBLE_ROUND(x), \
-       DOUBLE_ROUND(x), \
-       DOUBLE_ROUND(x), \
-       DOUBLE_ROUND(x), \
-       DOUBLE_ROUND(x), \
-       DOUBLE_ROUND(x), \
-       DOUBLE_ROUND(x), \
-       DOUBLE_ROUND(x), \
-       DOUBLE_ROUND(x) \
-)
-
-enum chacha20_lengths {
-       CHACHA20_NONCE_SIZE = 16,
-       CHACHA20_KEY_SIZE = 32,
-       CHACHA20_KEY_WORDS = CHACHA20_KEY_SIZE / sizeof(uint32_t),
-       CHACHA20_BLOCK_SIZE = 64,
-       CHACHA20_BLOCK_WORDS = CHACHA20_BLOCK_SIZE / sizeof(uint32_t),
-       HCHACHA20_NONCE_SIZE = CHACHA20_NONCE_SIZE,
-       HCHACHA20_KEY_SIZE = CHACHA20_KEY_SIZE
-};
-
-enum chacha20_constants { /* expand 32-byte k */
-       CHACHA20_CONSTANT_EXPA = 0x61707865U,
-       CHACHA20_CONSTANT_ND_3 = 0x3320646eU,
-       CHACHA20_CONSTANT_2_BY = 0x79622d32U,
-       CHACHA20_CONSTANT_TE_K = 0x6b206574U
-};
-
-struct chacha20_ctx {
-       union {
-               uint32_t state[16];
-               struct {
-                       uint32_t constant[4];
-                       uint32_t key[8];
-                       uint32_t counter[4];
-               };
-       };
-};
-
-static void chacha20_init(struct chacha20_ctx *ctx,
-                         const uint8_t key[CHACHA20_KEY_SIZE],
-                         const uint64_t nonce)
-{
-       ctx->constant[0] = CHACHA20_CONSTANT_EXPA;
-       ctx->constant[1] = CHACHA20_CONSTANT_ND_3;
-       ctx->constant[2] = CHACHA20_CONSTANT_2_BY;
-       ctx->constant[3] = CHACHA20_CONSTANT_TE_K;
-       ctx->key[0] = get_unaligned_le32(key + 0);
-       ctx->key[1] = get_unaligned_le32(key + 4);
-       ctx->key[2] = get_unaligned_le32(key + 8);
-       ctx->key[3] = get_unaligned_le32(key + 12);
-       ctx->key[4] = get_unaligned_le32(key + 16);
-       ctx->key[5] = get_unaligned_le32(key + 20);
-       ctx->key[6] = get_unaligned_le32(key + 24);
-       ctx->key[7] = get_unaligned_le32(key + 28);
-       ctx->counter[0] = 0;
-       ctx->counter[1] = 0;
-       ctx->counter[2] = nonce & 0xffffffffU;
-       ctx->counter[3] = nonce >> 32;
-}
-
-static void chacha20_block(struct chacha20_ctx *ctx, uint32_t *stream)
-{
-       uint32_t x[CHACHA20_BLOCK_WORDS];
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(x); ++i)
-               x[i] = ctx->state[i];
-
-       TWENTY_ROUNDS(x);
-
-       for (i = 0; i < ARRAY_SIZE(x); ++i)
-               stream[i] = cpu_to_le32(x[i] + ctx->state[i]);
-
-       ctx->counter[0] += 1;
-}
-
-static void chacha20(struct chacha20_ctx *ctx, uint8_t *out, const uint8_t *in,
-                    uint32_t len)
-{
-       uint32_t buf[CHACHA20_BLOCK_WORDS];
-
-       while (len >= CHACHA20_BLOCK_SIZE) {
-               chacha20_block(ctx, buf);
-               xor_cpy(out, in, (uint8_t *)buf, CHACHA20_BLOCK_SIZE);
-               len -= CHACHA20_BLOCK_SIZE;
-               out += CHACHA20_BLOCK_SIZE;
-               in += CHACHA20_BLOCK_SIZE;
-       }
-       if (len) {
-               chacha20_block(ctx, buf);
-               xor_cpy(out, in, (uint8_t *)buf, len);
-       }
-}
-
-static void hchacha20(uint32_t derived_key[CHACHA20_KEY_WORDS],
-                     const uint8_t nonce[HCHACHA20_NONCE_SIZE],
-                     const uint8_t key[HCHACHA20_KEY_SIZE])
-{
-       uint32_t x[] = { CHACHA20_CONSTANT_EXPA,
-                   CHACHA20_CONSTANT_ND_3,
-                   CHACHA20_CONSTANT_2_BY,
-                   CHACHA20_CONSTANT_TE_K,
-                   get_unaligned_le32(key +  0),
-                   get_unaligned_le32(key +  4),
-                   get_unaligned_le32(key +  8),
-                   get_unaligned_le32(key + 12),
-                   get_unaligned_le32(key + 16),
-                   get_unaligned_le32(key + 20),
-                   get_unaligned_le32(key + 24),
-                   get_unaligned_le32(key + 28),
-                   get_unaligned_le32(nonce +  0),
-                   get_unaligned_le32(nonce +  4),
-                   get_unaligned_le32(nonce +  8),
-                   get_unaligned_le32(nonce + 12)
-       };
-
-       TWENTY_ROUNDS(x);
-
-       memcpy(derived_key + 0, x +  0, sizeof(uint32_t) * 4);
-       memcpy(derived_key + 4, x + 12, sizeof(uint32_t) * 4);
-}
-
-enum poly1305_lengths {
-       POLY1305_BLOCK_SIZE = 16,
-       POLY1305_KEY_SIZE = 32,
-       POLY1305_MAC_SIZE = 16
-};
-
-struct poly1305_internal {
-       uint32_t h[5];
-       uint32_t r[5];
-       uint32_t s[4];
-};
-
-struct poly1305_ctx {
-       struct poly1305_internal state;
-       uint32_t nonce[4];
-       uint8_t data[POLY1305_BLOCK_SIZE];
-       size_t num;
-};
-
-static void poly1305_init_core(struct poly1305_internal *st,
-                              const uint8_t key[16])
-{
-       /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
-       st->r[0] = (get_unaligned_le32(&key[0])) & 0x3ffffff;
-       st->r[1] = (get_unaligned_le32(&key[3]) >> 2) & 0x3ffff03;
-       st->r[2] = (get_unaligned_le32(&key[6]) >> 4) & 0x3ffc0ff;
-       st->r[3] = (get_unaligned_le32(&key[9]) >> 6) & 0x3f03fff;
-       st->r[4] = (get_unaligned_le32(&key[12]) >> 8) & 0x00fffff;
-
-       /* s = 5*r */
-       st->s[0] = st->r[1] * 5;
-       st->s[1] = st->r[2] * 5;
-       st->s[2] = st->r[3] * 5;
-       st->s[3] = st->r[4] * 5;
-
-       /* h = 0 */
-       st->h[0] = 0;
-       st->h[1] = 0;
-       st->h[2] = 0;
-       st->h[3] = 0;
-       st->h[4] = 0;
-}
-
-static void poly1305_blocks_core(struct poly1305_internal *st,
-                                const uint8_t *input, size_t len,
-                                const uint32_t padbit)
-{
-       const uint32_t hibit = padbit << 24;
-       uint32_t r0, r1, r2, r3, r4;
-       uint32_t s1, s2, s3, s4;
-       uint32_t h0, h1, h2, h3, h4;
-       uint64_t d0, d1, d2, d3, d4;
-       uint32_t c;
-
-       r0 = st->r[0];
-       r1 = st->r[1];
-       r2 = st->r[2];
-       r3 = st->r[3];
-       r4 = st->r[4];
-
-       s1 = st->s[0];
-       s2 = st->s[1];
-       s3 = st->s[2];
-       s4 = st->s[3];
-
-       h0 = st->h[0];
-       h1 = st->h[1];
-       h2 = st->h[2];
-       h3 = st->h[3];
-       h4 = st->h[4];
-
-       while (len >= POLY1305_BLOCK_SIZE) {
-               /* h += m[i] */
-               h0 += (get_unaligned_le32(&input[0])) & 0x3ffffff;
-               h1 += (get_unaligned_le32(&input[3]) >> 2) & 0x3ffffff;
-               h2 += (get_unaligned_le32(&input[6]) >> 4) & 0x3ffffff;
-               h3 += (get_unaligned_le32(&input[9]) >> 6) & 0x3ffffff;
-               h4 += (get_unaligned_le32(&input[12]) >> 8) | hibit;
-
-               /* h *= r */
-               d0 = ((uint64_t)h0 * r0) + ((uint64_t)h1 * s4) +
-                    ((uint64_t)h2 * s3) + ((uint64_t)h3 * s2) +
-                    ((uint64_t)h4 * s1);
-               d1 = ((uint64_t)h0 * r1) + ((uint64_t)h1 * r0) +
-                    ((uint64_t)h2 * s4) + ((uint64_t)h3 * s3) +
-                    ((uint64_t)h4 * s2);
-               d2 = ((uint64_t)h0 * r2) + ((uint64_t)h1 * r1) +
-                    ((uint64_t)h2 * r0) + ((uint64_t)h3 * s4) +
-                    ((uint64_t)h4 * s3);
-               d3 = ((uint64_t)h0 * r3) + ((uint64_t)h1 * r2) +
-                    ((uint64_t)h2 * r1) + ((uint64_t)h3 * r0) +
-                    ((uint64_t)h4 * s4);
-               d4 = ((uint64_t)h0 * r4) + ((uint64_t)h1 * r3) +
-                    ((uint64_t)h2 * r2) + ((uint64_t)h3 * r1) +
-                    ((uint64_t)h4 * r0);
-
-               /* (partial) h %= p */
-               c = (uint32_t)(d0 >> 26);
-               h0 = (uint32_t)d0 & 0x3ffffff;
-               d1 += c;
-               c = (uint32_t)(d1 >> 26);
-               h1 = (uint32_t)d1 & 0x3ffffff;
-               d2 += c;
-               c = (uint32_t)(d2 >> 26);
-               h2 = (uint32_t)d2 & 0x3ffffff;
-               d3 += c;
-               c = (uint32_t)(d3 >> 26);
-               h3 = (uint32_t)d3 & 0x3ffffff;
-               d4 += c;
-               c = (uint32_t)(d4 >> 26);
-               h4 = (uint32_t)d4 & 0x3ffffff;
-               h0 += c * 5;
-               c = (h0 >> 26);
-               h0 = h0 & 0x3ffffff;
-               h1 += c;
-
-               input += POLY1305_BLOCK_SIZE;
-               len -= POLY1305_BLOCK_SIZE;
-       }
-
-       st->h[0] = h0;
-       st->h[1] = h1;
-       st->h[2] = h2;
-       st->h[3] = h3;
-       st->h[4] = h4;
-}
-
-static void poly1305_emit_core(struct poly1305_internal *st, uint8_t mac[16],
-                              const uint32_t nonce[4])
-{
-       uint32_t h0, h1, h2, h3, h4, c;
-       uint32_t g0, g1, g2, g3, g4;
-       uint64_t f;
-       uint32_t mask;
-
-       /* fully carry h */
-       h0 = st->h[0];
-       h1 = st->h[1];
-       h2 = st->h[2];
-       h3 = st->h[3];
-       h4 = st->h[4];
-
-       c = h1 >> 26;
-       h1 = h1 & 0x3ffffff;
-       h2 += c;
-       c = h2 >> 26;
-       h2 = h2 & 0x3ffffff;
-       h3 += c;
-       c = h3 >> 26;
-       h3 = h3 & 0x3ffffff;
-       h4 += c;
-       c = h4 >> 26;
-       h4 = h4 & 0x3ffffff;
-       h0 += c * 5;
-       c = h0 >> 26;
-       h0 = h0 & 0x3ffffff;
-       h1 += c;
-
-       /* compute h + -p */
-       g0 = h0 + 5;
-       c = g0 >> 26;
-       g0 &= 0x3ffffff;
-       g1 = h1 + c;
-       c = g1 >> 26;
-       g1 &= 0x3ffffff;
-       g2 = h2 + c;
-       c = g2 >> 26;
-       g2 &= 0x3ffffff;
-       g3 = h3 + c;
-       c = g3 >> 26;
-       g3 &= 0x3ffffff;
-       g4 = h4 + c - (1UL << 26);
-
-       /* select h if h < p, or h + -p if h >= p */
-       mask = (g4 >> ((sizeof(uint32_t) * 8) - 1)) - 1;
-       g0 &= mask;
-       g1 &= mask;
-       g2 &= mask;
-       g3 &= mask;
-       g4 &= mask;
-       mask = ~mask;
-
-       h0 = (h0 & mask) | g0;
-       h1 = (h1 & mask) | g1;
-       h2 = (h2 & mask) | g2;
-       h3 = (h3 & mask) | g3;
-       h4 = (h4 & mask) | g4;
-
-       /* h = h % (2^128) */
-       h0 = ((h0) | (h1 << 26)) & 0xffffffff;
-       h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff;
-       h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
-       h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff;
-
-       /* mac = (h + nonce) % (2^128) */
-       f = (uint64_t)h0 + nonce[0];
-       h0 = (uint32_t)f;
-       f = (uint64_t)h1 + nonce[1] + (f >> 32);
-       h1 = (uint32_t)f;
-       f = (uint64_t)h2 + nonce[2] + (f >> 32);
-       h2 = (uint32_t)f;
-       f = (uint64_t)h3 + nonce[3] + (f >> 32);
-       h3 = (uint32_t)f;
-
-       put_unaligned_le32(h0, &mac[0]);
-       put_unaligned_le32(h1, &mac[4]);
-       put_unaligned_le32(h2, &mac[8]);
-       put_unaligned_le32(h3, &mac[12]);
-}
-
-static void poly1305_init(struct poly1305_ctx *ctx,
-                         const uint8_t key[POLY1305_KEY_SIZE])
-{
-       ctx->nonce[0] = get_unaligned_le32(&key[16]);
-       ctx->nonce[1] = get_unaligned_le32(&key[20]);
-       ctx->nonce[2] = get_unaligned_le32(&key[24]);
-       ctx->nonce[3] = get_unaligned_le32(&key[28]);
-
-       poly1305_init_core(&ctx->state, key);
-
-       ctx->num = 0;
-}
-
-static void poly1305_update(struct poly1305_ctx *ctx, const uint8_t *input,
-                           size_t len)
-{
-       const size_t num = ctx->num;
-       size_t rem;
-
-       if (num) {
-               rem = POLY1305_BLOCK_SIZE - num;
-               if (len < rem) {
-                       memcpy(ctx->data + num, input, len);
-                       ctx->num = num + len;
-                       return;
-               }
-               memcpy(ctx->data + num, input, rem);
-               poly1305_blocks_core(&ctx->state, ctx->data,
-                                    POLY1305_BLOCK_SIZE, 1);
-               input += rem;
-               len -= rem;
-       }
-
-       rem = len % POLY1305_BLOCK_SIZE;
-       len -= rem;
-
-       if (len >= POLY1305_BLOCK_SIZE) {
-               poly1305_blocks_core(&ctx->state, input, len, 1);
-               input += len;
-       }
-
-       if (rem)
-               memcpy(ctx->data, input, rem);
-
-       ctx->num = rem;
-}
-
-static void poly1305_final(struct poly1305_ctx *ctx,
-                          uint8_t mac[POLY1305_MAC_SIZE])
-{
-       size_t num = ctx->num;
-
-       if (num) {
-               ctx->data[num++] = 1;
-               while (num < POLY1305_BLOCK_SIZE)
-                       ctx->data[num++] = 0;
-               poly1305_blocks_core(&ctx->state, ctx->data,
-                                    POLY1305_BLOCK_SIZE, 0);
-       }
-
-       poly1305_emit_core(&ctx->state, mac, ctx->nonce);
-
-       explicit_bzero(ctx, sizeof(*ctx));
-}
-#endif
-
-#ifdef COMPAT_NEED_CHACHA20POLY1305
-static const uint8_t pad0[16] = { 0 };
-
-void
-chacha20poly1305_encrypt(uint8_t *dst, const uint8_t *src, const size_t 
src_len,
-                        const uint8_t *ad, const size_t ad_len,
-                        const uint64_t nonce,
-                        const uint8_t key[CHACHA20POLY1305_KEY_SIZE])
-{
-       struct poly1305_ctx poly1305_state;
-       struct chacha20_ctx chacha20_state;
-       union {
-               uint8_t block0[POLY1305_KEY_SIZE];
-               uint64_t lens[2];
-       } b = { { 0 } };
-
-       chacha20_init(&chacha20_state, key, nonce);
-       chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0));
-       poly1305_init(&poly1305_state, b.block0);
-
-       poly1305_update(&poly1305_state, ad, ad_len);
-       poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf);
-
-       chacha20(&chacha20_state, dst, src, src_len);
-
-       poly1305_update(&poly1305_state, dst, src_len);
-       poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf);
-
-       b.lens[0] = cpu_to_le64(ad_len);
-       b.lens[1] = cpu_to_le64(src_len);
-       poly1305_update(&poly1305_state, (uint8_t *)b.lens, sizeof(b.lens));
-
-       poly1305_final(&poly1305_state, dst + src_len);
-
-       explicit_bzero(&chacha20_state, sizeof(chacha20_state));
-       explicit_bzero(&b, sizeof(b));
-}
-
-bool
-chacha20poly1305_decrypt(uint8_t *dst, const uint8_t *src, const size_t 
src_len,
-                        const uint8_t *ad, const size_t ad_len,
-                        const uint64_t nonce,
-                        const uint8_t key[CHACHA20POLY1305_KEY_SIZE])
-{
-       struct poly1305_ctx poly1305_state;
-       struct chacha20_ctx chacha20_state;
-       bool ret;
-       size_t dst_len;
-       union {
-               uint8_t block0[POLY1305_KEY_SIZE];
-               uint8_t mac[POLY1305_MAC_SIZE];
-               uint64_t lens[2];
-       } b = { { 0 } };
-
-       if (src_len < POLY1305_MAC_SIZE)
-               return false;
-
-       chacha20_init(&chacha20_state, key, nonce);
-       chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0));
-       poly1305_init(&poly1305_state, b.block0);
-
-       poly1305_update(&poly1305_state, ad, ad_len);
-       poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf);
-
-       dst_len = src_len - POLY1305_MAC_SIZE;
-       poly1305_update(&poly1305_state, src, dst_len);
-       poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf);
-
-       b.lens[0] = cpu_to_le64(ad_len);
-       b.lens[1] = cpu_to_le64(dst_len);
-       poly1305_update(&poly1305_state, (uint8_t *)b.lens, sizeof(b.lens));
-
-       poly1305_final(&poly1305_state, b.mac);
-
-       ret = timingsafe_bcmp(b.mac, src + dst_len, POLY1305_MAC_SIZE) == 0;
-       if (ret)
-               chacha20(&chacha20_state, dst, src, dst_len);
-
-       explicit_bzero(&chacha20_state, sizeof(chacha20_state));
-       explicit_bzero(&b, sizeof(b));
-
-       return ret;
-}
-
-void
-xchacha20poly1305_encrypt(uint8_t *dst, const uint8_t *src,
-                         const size_t src_len, const uint8_t *ad,
-                         const size_t ad_len,
-                         const uint8_t nonce[XCHACHA20POLY1305_NONCE_SIZE],
-                         const uint8_t key[CHACHA20POLY1305_KEY_SIZE])
-{
-       uint32_t derived_key[CHACHA20_KEY_WORDS];
-
-       hchacha20(derived_key, nonce, key);
-       cpu_to_le32_array(derived_key, ARRAY_SIZE(derived_key));
-       chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len,
-                                get_unaligned_le64(nonce + 16),
-                                (uint8_t *)derived_key);
-       explicit_bzero(derived_key, CHACHA20POLY1305_KEY_SIZE);
-}
-
-bool
-xchacha20poly1305_decrypt(uint8_t *dst, const uint8_t *src,
-                         const size_t src_len,  const uint8_t *ad,
-                         const size_t ad_len,
-                         const uint8_t nonce[XCHACHA20POLY1305_NONCE_SIZE],
-                         const uint8_t key[CHACHA20POLY1305_KEY_SIZE])
-{
-       bool ret;
-       uint32_t derived_key[CHACHA20_KEY_WORDS];
-
-       hchacha20(derived_key, nonce, key);
-       cpu_to_le32_array(derived_key, ARRAY_SIZE(derived_key));
-       ret = chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len,
-                                      get_unaligned_le64(nonce + 16),
-                                      (uint8_t *)derived_key);
-       explicit_bzero(derived_key, CHACHA20POLY1305_KEY_SIZE);
-       return ret;
-}
-#endif
-
-#ifdef COMPAT_NEED_CHACHA20POLY1305_MBUF
-static inline int
-chacha20poly1305_crypt_mbuf(struct mbuf *m0, uint64_t nonce,
-                           const uint8_t key[CHACHA20POLY1305_KEY_SIZE], bool 
encrypt)
-{
-       struct poly1305_ctx poly1305_state;
-       struct chacha20_ctx chacha20_state;
-       uint8_t *buf, mbuf_mac[POLY1305_MAC_SIZE];
-       size_t len, leftover = 0;
-       struct mbuf *m;
-       int ret;
-       union {
-               uint32_t stream[CHACHA20_BLOCK_WORDS];
-               uint8_t block0[POLY1305_KEY_SIZE];
-               uint8_t mac[POLY1305_MAC_SIZE];
-               uint64_t lens[2];
-       } b = { { 0 } };
-
-       if (!encrypt) {
-               if (m0->m_pkthdr.len < POLY1305_MAC_SIZE)
-                       return EMSGSIZE;
-               m_copydata(m0, m0->m_pkthdr.len - POLY1305_MAC_SIZE, 
POLY1305_MAC_SIZE, mbuf_mac);
-               m_adj(m0, -POLY1305_MAC_SIZE);
-       }
-
-       chacha20_init(&chacha20_state, key, nonce);
-       chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0));
-       poly1305_init(&poly1305_state, b.block0);
-
-       for (m = m0; m; m = m->m_next) {
-               len = m->m_len;
-               buf = m->m_data;
-
-               if (!encrypt)
-                       poly1305_update(&poly1305_state, m->m_data, m->m_len);
-
-               if (leftover != 0) {
-                       size_t l = min(len, leftover);
-                       xor_cpy(buf, buf, ((uint8_t *)b.stream) + 
(CHACHA20_BLOCK_SIZE - leftover), l);
-                       leftover -= l;
-                       buf += l;
-                       len -= l;
-               }
-
-               while (len >= CHACHA20_BLOCK_SIZE) {
-                       chacha20_block(&chacha20_state, b.stream);
-                       xor_cpy(buf, buf, (uint8_t *)b.stream, 
CHACHA20_BLOCK_SIZE);
-                       buf += CHACHA20_BLOCK_SIZE;
-                       len -= CHACHA20_BLOCK_SIZE;
-               }
-
-               if (len) {
-                       chacha20_block(&chacha20_state, b.stream);
-                       xor_cpy(buf, buf, (uint8_t *)b.stream, len);
-                       leftover = CHACHA20_BLOCK_SIZE - len;
-               }
-
-               if (encrypt)
-                       poly1305_update(&poly1305_state, m->m_data, m->m_len);
-       }
-       poly1305_update(&poly1305_state, pad0, (0x10 - m0->m_pkthdr.len) & 0xf);
-
-       b.lens[0] = 0;
-       b.lens[1] = cpu_to_le64(m0->m_pkthdr.len);
-       poly1305_update(&poly1305_state, (uint8_t *)b.lens, sizeof(b.lens));
-
-       poly1305_final(&poly1305_state, b.mac);
-
-       if (encrypt)
-               ret = m_append(m0, POLY1305_MAC_SIZE, b.mac) ? 0 : ENOMEM;
-       else
-               ret = timingsafe_bcmp(b.mac, mbuf_mac, POLY1305_MAC_SIZE) == 0 
? 0 : EBADMSG;
-
-       explicit_bzero(&chacha20_state, sizeof(chacha20_state));
-       explicit_bzero(&b, sizeof(b));
-
-       return ret;
-}
-
-int
-chacha20poly1305_encrypt_mbuf(struct mbuf *m, const uint64_t nonce,
-                             const uint8_t key[CHACHA20POLY1305_KEY_SIZE])
-{
-       return chacha20poly1305_crypt_mbuf(m, nonce, key, true);
-}
-
-int
-chacha20poly1305_decrypt_mbuf(struct mbuf *m, const uint64_t nonce,
-                             const uint8_t key[CHACHA20POLY1305_KEY_SIZE])
-{
-       return chacha20poly1305_crypt_mbuf(m, nonce, key, false);
-}
-#else
-static int
-crypto_callback(struct cryptop *crp)
-{
-       return (0);
-}
-
-int
-chacha20poly1305_encrypt_mbuf(struct mbuf *m, const uint64_t nonce,
-                             const uint8_t key[CHACHA20POLY1305_KEY_SIZE])
-{
-       static const char blank_tag[POLY1305_HASH_LEN];
-       struct cryptop crp;
-       int ret;
-
-       if (!m_append(m, POLY1305_HASH_LEN, blank_tag))
-               return (ENOMEM);
-       crypto_initreq(&crp, chacha20_poly1305_sid);
-       crp.crp_op = CRYPTO_OP_ENCRYPT | CRYPTO_OP_COMPUTE_DIGEST;
-       crp.crp_flags = CRYPTO_F_IV_SEPARATE | CRYPTO_F_CBIMM;
-       crypto_use_mbuf(&crp, m);
-       crp.crp_payload_length = m->m_pkthdr.len - POLY1305_HASH_LEN;
-       crp.crp_digest_start = crp.crp_payload_length;
-       le64enc(crp.crp_iv, nonce);
-       crp.crp_cipher_key = key;
-       crp.crp_callback = crypto_callback;
-       ret = crypto_dispatch(&crp);
-       crypto_destroyreq(&crp);
-       return (ret);
-}
-
*** 963 LINES SKIPPED ***

Reply via email to