On Mon, Oct 07, 2019 at 06:45:48PM +0200, Ard Biesheuvel wrote:
> +static int chacha_stream_xor(struct skcipher_request *req,
> + const struct chacha_ctx *ctx, const u8 *iv)
> +{
> + struct skcipher_walk walk;
> + u32 state[16];
> + int err;
> +
> + err = skcipher_walk_virt(&walk, req, false);
> +
> + chacha_init_generic(state, ctx->key, iv);
> +
> + while (walk.nbytes > 0) {
> + unsigned int nbytes = walk.nbytes;
> +
> + if (nbytes < walk.total)
> + nbytes = round_down(nbytes, walk.stride);
> +
> + chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr,
> + nbytes, state, ctx->nrounds);
> + state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE);
> + err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
> + }
> +
> + return err;
> +}
> +
> +static int chacha_arm(struct skcipher_request *req)
> +{
> + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
> + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
> +
> + return chacha_stream_xor(req, ctx, req->iv);
> +}
> +
> +static int chacha_neon_stream_xor(struct skcipher_request *req,
> + const struct chacha_ctx *ctx, const u8 *iv)
> +{
> + struct skcipher_walk walk;
> + u32 state[16];
> + bool do_neon;
> + int err;
> +
> + err = skcipher_walk_virt(&walk, req, false);
> +
> + chacha_init_generic(state, ctx->key, iv);
> +
> + do_neon = (req->cryptlen > CHACHA_BLOCK_SIZE) && crypto_simd_usable();
> + while (walk.nbytes > 0) {
> + unsigned int nbytes = walk.nbytes;
> +
> + if (nbytes < walk.total)
> + nbytes = round_down(nbytes, walk.stride);
> +
> + if (!do_neon) {
> + chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr,
> + nbytes, state, ctx->nrounds);
> + state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE);
> + } else {
> + kernel_neon_begin();
> + chacha_doneon(state, walk.dst.virt.addr,
> + walk.src.virt.addr, nbytes, ctx->nrounds);
> + kernel_neon_end();
> + }
> + err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
> + }
> +
> + return err;
> +}
> +
> +static int chacha_neon(struct skcipher_request *req)
> +{
> + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
> + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
> +
> + return chacha_neon_stream_xor(req, ctx, req->iv);
> +}
> +
> +static int xchacha_arm(struct skcipher_request *req)
> +{
> + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
> + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
> + struct chacha_ctx subctx;
> + u32 state[16];
> + u8 real_iv[16];
> +
> + chacha_init_generic(state, ctx->key, req->iv);
> +
> + hchacha_block_arm(state, subctx.key, ctx->nrounds);
> + subctx.nrounds = ctx->nrounds;
> +
> + memcpy(&real_iv[0], req->iv + 24, 8);
> + memcpy(&real_iv[8], req->iv + 16, 8);
> + return chacha_stream_xor(req, &subctx, real_iv);
> +}
> +
> +static int xchacha_neon(struct skcipher_request *req)
> +{
> + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
> + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
> + struct chacha_ctx subctx;
> + u32 state[16];
> + u8 real_iv[16];
> +
> + chacha_init_generic(state, ctx->key, req->iv);
> +
> + if (!crypto_simd_usable()) {
> + hchacha_block_arm(state, subctx.key, ctx->nrounds);
> + } else {
> + kernel_neon_begin();
> + hchacha_block_neon(state, subctx.key, ctx->nrounds);
> + kernel_neon_end();
> + }
> + subctx.nrounds = ctx->nrounds;
> +
> + memcpy(&real_iv[0], req->iv + 24, 8);
> + memcpy(&real_iv[8], req->iv + 16, 8);
> + return chacha_neon_stream_xor(req, &subctx, real_iv);
> +}
There is some code duplication here: two implementations of stream_xor, and two
implementations of xchacha (hchacha + stream_xor). How about doing something
like the following?
diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c
index dae69a63b640..1952cbda2168 100644
--- a/arch/arm/crypto/chacha-glue.c
+++ b/arch/arm/crypto/chacha-glue.c
@@ -32,6 +32,11 @@ asmlinkage void chacha_doarm(u8 *dst, const u8 *src,
unsigned int bytes,
static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon);
+static inline bool neon_usable(void)
+{
+ return static_branch_likely(&use_neon) && crypto_simd_usable();
+}
+
static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
unsigned int bytes, int nrounds)
{
@@ -95,7 +100,8 @@ void chacha_crypt(u32 *state, u8 *dst, const u8 *src,
unsigned int bytes,
EXPORT_SYMBOL(chacha_crypt);
static int chacha_stream_xor(struct skcipher_request *req,
- const struct chacha_ctx *ctx, const u8 *iv)
+ const struct chacha_ctx *ctx, const u8 *iv,
+ bool neon)
{
struct skcipher_walk walk;
u32 state[16];
@@ -105,49 +111,14 @@ static int chacha_stream_xor(struct skcipher_request *req,
chacha_init_generic(state, ctx->key, iv);
+ neon &= (req->cryptlen > CHACHA_BLOCK_SIZE);
while (walk.nbytes > 0) {
unsigned int nbytes = walk.nbytes;
if (nbytes < walk.total)
nbytes = round_down(nbytes, walk.stride);
- chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr,
- nbytes, state, ctx->nrounds);
- state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE);
- err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
- }
-
- return err;
-}
-
-static int chacha_arm(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- return chacha_stream_xor(req, ctx, req->iv);
-}
-
-static int chacha_neon_stream_xor(struct skcipher_request *req,
- const struct chacha_ctx *ctx, const u8 *iv)
-{
- struct skcipher_walk walk;
- u32 state[16];
- bool do_neon;
- int err;
-
- err = skcipher_walk_virt(&walk, req, false);
-
- chacha_init_generic(state, ctx->key, iv);
-
- do_neon = (req->cryptlen > CHACHA_BLOCK_SIZE) && crypto_simd_usable();
- while (walk.nbytes > 0) {
- unsigned int nbytes = walk.nbytes;
-
- if (nbytes < walk.total)
- nbytes = round_down(nbytes, walk.stride);
-
- if (!static_branch_likely(&use_neon) || !do_neon) {
+ if (!neon) {
chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr,
nbytes, state, ctx->nrounds);
state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE);
@@ -163,33 +134,25 @@ static int chacha_neon_stream_xor(struct skcipher_request
*req,
return err;
}
-static int chacha_neon(struct skcipher_request *req)
+static int do_chacha(struct skcipher_request *req, bool neon)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
- return chacha_neon_stream_xor(req, ctx, req->iv);
+ return chacha_stream_xor(req, ctx, req->iv, neon);
}
-static int xchacha_arm(struct skcipher_request *req)
+static int chacha_arm(struct skcipher_request *req)
{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct chacha_ctx subctx;
- u32 state[16];
- u8 real_iv[16];
-
- chacha_init_generic(state, ctx->key, req->iv);
-
- hchacha_block_arm(state, subctx.key, ctx->nrounds);
- subctx.nrounds = ctx->nrounds;
+ return do_chacha(req, false);
+}
- memcpy(&real_iv[0], req->iv + 24, 8);
- memcpy(&real_iv[8], req->iv + 16, 8);
- return chacha_stream_xor(req, &subctx, real_iv);
+static int chacha_neon(struct skcipher_request *req)
+{
+ return do_chacha(req, neon_usable());
}
-static int xchacha_neon(struct skcipher_request *req)
+static int do_xchacha(struct skcipher_request *req, bool neon)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
@@ -199,7 +162,7 @@ static int xchacha_neon(struct skcipher_request *req)
chacha_init_generic(state, ctx->key, req->iv);
- if (!static_branch_likely(&use_neon) || !crypto_simd_usable()) {
+ if (!neon) {
hchacha_block_arm(state, subctx.key, ctx->nrounds);
} else {
kernel_neon_begin();
@@ -210,7 +173,17 @@ static int xchacha_neon(struct skcipher_request *req)
memcpy(&real_iv[0], req->iv + 24, 8);
memcpy(&real_iv[8], req->iv + 16, 8);
- return chacha_neon_stream_xor(req, &subctx, real_iv);
+ return chacha_stream_xor(req, &subctx, real_iv, neon);
+}
+
+static int xchacha_arm(struct skcipher_request *req)
+{
+ return do_xchacha(req, false);
+}
+
+static int xchacha_neon(struct skcipher_request *req)
+{
+ return do_xchacha(req, neon_usable());
}
static int chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,