Instead of exposing the arm64-optimized SHA-1 code via arm64-specific crypto_shash algorithms, instead just implement the sha1_blocks() library function. This is much simpler, it makes the SHA-1 library functions be arm64-optimized, and it fixes the longstanding issue where the arm64-optimized SHA-1 code was disabled by default. SHA-1 still remains available through crypto_shash, but individual architectures no longer need to handle it.
Remove support for SHA-1 finalization from assembly code, since the library does not yet support architecture-specific overrides of the finalization. (Support for that has been omitted for now, for simplicity and because usually it isn't performance-critical.) To match sha1_blocks(), change the type of the nblocks parameter and the return value of __sha1_ce_transform() from int to size_t. Update the assembly code accordingly. Signed-off-by: Eric Biggers <ebigg...@kernel.org> --- arch/arm64/configs/defconfig | 1 - arch/arm64/crypto/Kconfig | 11 -- arch/arm64/crypto/Makefile | 3 - arch/arm64/crypto/sha1-ce-glue.c | 118 ------------------ lib/crypto/Kconfig | 1 + lib/crypto/Makefile | 1 + .../crypto/arm64}/sha1-ce-core.S | 40 ++---- lib/crypto/arm64/sha1.h | 39 ++++++ 8 files changed, 51 insertions(+), 163 deletions(-) delete mode 100644 arch/arm64/crypto/sha1-ce-glue.c rename {arch/arm64/crypto => lib/crypto/arm64}/sha1-ce-core.S (76%) create mode 100644 lib/crypto/arm64/sha1.h diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index b612b78b3b091..31681206b49cf 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1741,11 +1741,10 @@ CONFIG_CRYPTO_BENCHMARK=m CONFIG_CRYPTO_ECHAINIV=y CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_ANSI_CPRNG=y CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_GHASH_ARM64_CE=y -CONFIG_CRYPTO_SHA1_ARM64_CE=y CONFIG_CRYPTO_SHA3_ARM64=m CONFIG_CRYPTO_SM3_ARM64_CE=m CONFIG_CRYPTO_AES_ARM64_CE_BLK=y CONFIG_CRYPTO_AES_ARM64_BS=m CONFIG_CRYPTO_AES_ARM64_CE_CCM=y diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index a9ead99f72c28..3bb5b513d5ae2 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -23,21 +23,10 @@ config CRYPTO_NHPOLY1305_NEON NHPoly1305 hash function (Adiantum) Architecture: arm64 using: - NEON (Advanced SIMD) extensions -config CRYPTO_SHA1_ARM64_CE - tristate "Hash functions: SHA-1 (ARMv8 Crypto Extensions)" - depends on KERNEL_MODE_NEON - select CRYPTO_HASH - select CRYPTO_SHA1 - help - SHA-1 secure hash algorithm (FIPS 180) - - Architecture: arm64 using: - - ARMv8 Crypto Extensions - config CRYPTO_SHA3_ARM64 tristate "Hash functions: SHA-3 (ARMv8.2 Crypto Extensions)" depends on KERNEL_MODE_NEON select CRYPTO_HASH select CRYPTO_SHA3 diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 228101f125d50..a8b2cdbe202c1 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -3,13 +3,10 @@ # linux/arch/arm64/crypto/Makefile # # Copyright (C) 2014 Linaro Ltd <ard.biesheu...@linaro.org> # -obj-$(CONFIG_CRYPTO_SHA1_ARM64_CE) += sha1-ce.o -sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o - obj-$(CONFIG_CRYPTO_SHA3_ARM64) += sha3-ce.o sha3-ce-y := sha3-ce-glue.o sha3-ce-core.o obj-$(CONFIG_CRYPTO_SM3_NEON) += sm3-neon.o sm3-neon-y := sm3-neon-glue.o sm3-neon-core.o diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c deleted file mode 100644 index 65b6980817e5b..0000000000000 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ /dev/null @@ -1,118 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions - * - * Copyright (C) 2014 - 2017 Linaro Ltd <ard.biesheu...@linaro.org> - */ - -#include <asm/neon.h> -#include <asm/simd.h> -#include <crypto/internal/hash.h> -#include <crypto/internal/simd.h> -#include <crypto/sha1.h> -#include <crypto/sha1_base.h> -#include <linux/cpufeature.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/string.h> - -MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); -MODULE_AUTHOR("Ard Biesheuvel <ard.biesheu...@linaro.org>"); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS_CRYPTO("sha1"); - -struct sha1_ce_state { - struct sha1_state sst; - u32 finalize; -}; - -extern const u32 sha1_ce_offsetof_count; -extern const u32 sha1_ce_offsetof_finalize; - -asmlinkage int __sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, - int blocks); - -static void sha1_ce_transform(struct sha1_state *sst, u8 const *src, - int blocks) -{ - while (blocks) { - int rem; - - kernel_neon_begin(); - rem = __sha1_ce_transform(container_of(sst, - struct sha1_ce_state, - sst), src, blocks); - kernel_neon_end(); - src += (blocks - rem) * SHA1_BLOCK_SIZE; - blocks = rem; - } -} - -const u32 sha1_ce_offsetof_count = offsetof(struct sha1_ce_state, sst.count); -const u32 sha1_ce_offsetof_finalize = offsetof(struct sha1_ce_state, finalize); - -static int sha1_ce_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - struct sha1_ce_state *sctx = shash_desc_ctx(desc); - - sctx->finalize = 0; - return sha1_base_do_update_blocks(desc, data, len, sha1_ce_transform); -} - -static int sha1_ce_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - struct sha1_ce_state *sctx = shash_desc_ctx(desc); - bool finalized = false; - - /* - * Allow the asm code to perform the finalization if there is no - * partial data and the input is a round multiple of the block size. - */ - if (len >= SHA1_BLOCK_SIZE) { - unsigned int remain = len - round_down(len, SHA1_BLOCK_SIZE); - - finalized = !remain; - sctx->finalize = finalized; - sha1_base_do_update_blocks(desc, data, len, sha1_ce_transform); - data += len - remain; - len = remain; - } - if (!finalized) { - sctx->finalize = 0; - sha1_base_do_finup(desc, data, len, sha1_ce_transform); - } - return sha1_base_finish(desc, out); -} - -static struct shash_alg alg = { - .init = sha1_base_init, - .update = sha1_ce_update, - .finup = sha1_ce_finup, - .descsize = sizeof(struct sha1_ce_state), - .statesize = SHA1_STATE_SIZE, - .digestsize = SHA1_DIGEST_SIZE, - .base = { - .cra_name = "sha1", - .cra_driver_name = "sha1-ce", - .cra_priority = 200, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | - CRYPTO_AHASH_ALG_FINUP_MAX, - .cra_blocksize = SHA1_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -static int __init sha1_ce_mod_init(void) -{ - return crypto_register_shash(&alg); -} - -static void __exit sha1_ce_mod_fini(void) -{ - crypto_unregister_shash(&alg); -} - -module_cpu_feature_match(SHA1, sha1_ce_mod_init); -module_exit(sha1_ce_mod_fini); diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 5c1bfa02fa349..189bdae58c812 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -145,10 +145,11 @@ config CRYPTO_LIB_SHA1 config CRYPTO_LIB_SHA1_ARCH bool depends on CRYPTO_LIB_SHA1 && !UML default y if ARM + default y if ARM64 && KERNEL_MODE_NEON config CRYPTO_LIB_SHA256 tristate help Enable the SHA-256 library interface. This interface may be fulfilled diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index e10a84a6dda6a..11c8ac54bf7d1 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -76,10 +76,11 @@ CFLAGS_sha1.o += -I$(src)/$(SRCARCH) ifeq ($(CONFIG_ARM),y) libsha1-y += arm/sha1-armv4-large.o libsha1-$(CONFIG_KERNEL_MODE_NEON) += arm/sha1-armv7-neon.o \ arm/sha1-ce-core.o endif +libsha1-$(CONFIG_ARM64) += arm64/sha1-ce-core.o endif # CONFIG_CRYPTO_LIB_SHA1_ARCH ################################################################################ obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o diff --git a/arch/arm64/crypto/sha1-ce-core.S b/lib/crypto/arm64/sha1-ce-core.S similarity index 76% rename from arch/arm64/crypto/sha1-ce-core.S rename to lib/crypto/arm64/sha1-ce-core.S index 9b1f2d82a6fea..21efbbafd7d62 100644 --- a/arch/arm64/crypto/sha1-ce-core.S +++ b/lib/crypto/arm64/sha1-ce-core.S @@ -60,12 +60,12 @@ movk \tmp, :abs_g1:\val dup \k, \tmp .endm /* - * int __sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, - * int blocks) + * size_t __sha1_ce_transform(struct sha1_block_state *state, + * const u8 *data, size_t nblocks); */ SYM_FUNC_START(__sha1_ce_transform) /* load round constants */ loadrc k0.4s, 0x5a827999, w6 loadrc k1.4s, 0x6ed9eba1, w6 @@ -74,24 +74,20 @@ SYM_FUNC_START(__sha1_ce_transform) /* load state */ ld1 {dgav.4s}, [x0] ldr dgb, [x0, #16] - /* load sha1_ce_state::finalize */ - ldr_l w4, sha1_ce_offsetof_finalize, x4 - ldr w4, [x0, x4] - /* load input */ 0: ld1 {v8.4s-v11.4s}, [x1], #64 - sub w2, w2, #1 + sub x2, x2, #1 CPU_LE( rev32 v8.16b, v8.16b ) CPU_LE( rev32 v9.16b, v9.16b ) CPU_LE( rev32 v10.16b, v10.16b ) CPU_LE( rev32 v11.16b, v11.16b ) -1: add t0.4s, v8.4s, k0.4s + add t0.4s, v8.4s, k0.4s mov dg0v.16b, dgav.16b add_update c, ev, k0, 8, 9, 10, 11, dgb add_update c, od, k0, 9, 10, 11, 8 add_update c, ev, k0, 10, 11, 8, 9 @@ -118,33 +114,17 @@ CPU_LE( rev32 v11.16b, v11.16b ) /* update state */ add dgbv.2s, dgbv.2s, dg1v.2s add dgav.4s, dgav.4s, dg0v.4s - cbz w2, 2f - cond_yield 3f, x5, x6 - b 0b + /* return early if voluntary preemption is needed */ + cond_yield 1f, x5, x6 - /* - * Final block: add padding and total bit count. - * Skip if the input size was not a round multiple of the block size, - * the padding is handled by the C code in that case. - */ -2: cbz x4, 3f - ldr_l w4, sha1_ce_offsetof_count, x4 - ldr x4, [x0, x4] - movi v9.2d, #0 - mov x8, #0x80000000 - movi v10.2d, #0 - ror x7, x4, #29 // ror(lsl(x4, 3), 32) - fmov d8, x8 - mov x4, #0 - mov v11.d[0], xzr - mov v11.d[1], x7 - b 1b + /* handled all input blocks? */ + cbnz x2, 0b /* store new state */ -3: st1 {dgav.4s}, [x0] +1: st1 {dgav.4s}, [x0] str dgb, [x0, #16] - mov w0, w2 + mov x0, x2 ret SYM_FUNC_END(__sha1_ce_transform) diff --git a/lib/crypto/arm64/sha1.h b/lib/crypto/arm64/sha1.h new file mode 100644 index 0000000000000..0a166f968f63e --- /dev/null +++ b/lib/crypto/arm64/sha1.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SHA-1 optimized for ARM64 + * + * Copyright 2025 Google LLC + */ +#include <asm/neon.h> +#include <asm/simd.h> +#include <linux/cpufeature.h> + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_ce); + +asmlinkage size_t __sha1_ce_transform(struct sha1_block_state *state, + const u8 *data, size_t nblocks); + +static void sha1_blocks(struct sha1_block_state *state, + const u8 *data, size_t nblocks) +{ + if (static_branch_likely(&have_ce) && may_use_simd()) { + do { + size_t rem; + + kernel_neon_begin(); + rem = __sha1_ce_transform(state, data, nblocks); + kernel_neon_end(); + data += (nblocks - rem) * SHA1_BLOCK_SIZE; + nblocks = rem; + } while (nblocks); + } else { + sha1_blocks_generic(state, data, nblocks); + } +} + +#define sha1_mod_init_arch sha1_mod_init_arch +static inline void sha1_mod_init_arch(void) +{ + if (cpu_have_named_feature(SHA1)) + static_branch_enable(&have_ce); +} -- 2.50.1