From de3629baf5a33af1919dec7136d643b0662e85ef Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 3 May 2025 18:24:01 +0200 Subject: [PATCH 01/16] parisc: Fix double SIGFPE crash Camm noticed that on parisc a SIGFPE exception will crash an application with a second SIGFPE in the signal handler. Dave analyzed it, and it happens because glibc uses a double-word floating-point store to atomically update function descriptors. As a result of lazy binding, we hit a floating-point store in fpe_func almost immediately. When the T bit is set, an assist exception trap occurs when when the co-processor encounters *any* floating-point instruction except for a double store of register %fr0. The latter cancels all pending traps. Let's fix this by clearing the Trap (T) bit in the FP status register before returning to the signal handler in userspace. The issue can be reproduced with this test program: root@parisc:~# cat fpe.c static void fpe_func(int sig, siginfo_t *i, void *v) { sigset_t set; sigemptyset(&set); sigaddset(&set, SIGFPE); sigprocmask(SIG_UNBLOCK, &set, NULL); printf("GOT signal %d with si_code %ld\n", sig, i->si_code); } int main() { struct sigaction action = { .sa_sigaction = fpe_func, .sa_flags = SA_RESTART|SA_SIGINFO }; sigaction(SIGFPE, &action, 0); feenableexcept(FE_OVERFLOW); return printf("%lf\n",1.7976931348623158E308*1.7976931348623158E308); } root@parisc:~# gcc fpe.c -lm root@parisc:~# ./a.out Floating point exception root@parisc:~# strace -f ./a.out execve("./a.out", ["./a.out"], 0xf9ac7034 /* 20 vars */) = 0 getrlimit(RLIMIT_STACK, {rlim_cur=8192*1024, rlim_max=RLIM_INFINITY}) = 0 ... rt_sigaction(SIGFPE, {sa_handler=0x1110a, sa_mask=[], sa_flags=SA_RESTART|SA_SIGINFO}, NULL, 8) = 0 --- SIGFPE {si_signo=SIGFPE, si_code=FPE_FLTOVF, si_addr=0x1078f} --- --- SIGFPE {si_signo=SIGFPE, si_code=FPE_FLTOVF, si_addr=0xf8f21237} --- +++ killed by SIGFPE +++ Floating point exception Signed-off-by: Helge Deller Suggested-by: John David Anglin Reported-by: Camm Maguire Cc: stable@vger.kernel.org --- arch/parisc/math-emu/driver.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/arch/parisc/math-emu/driver.c b/arch/parisc/math-emu/driver.c index 34495446e051..71829cb7bc81 100644 --- a/arch/parisc/math-emu/driver.c +++ b/arch/parisc/math-emu/driver.c @@ -97,9 +97,19 @@ handle_fpe(struct pt_regs *regs) memcpy(regs->fr, frcopy, sizeof regs->fr); if (signalcode != 0) { - force_sig_fault(signalcode >> 24, signalcode & 0xffffff, - (void __user *) regs->iaoq[0]); - return -1; + int sig = signalcode >> 24; + + if (sig == SIGFPE) { + /* + * Clear floating point trap bit to avoid trapping + * again on the first floating-point instruction in + * the userspace signal handler. + */ + regs->fr[0] &= ~(1ULL << 38); + } + force_sig_fault(sig, signalcode & 0xffffff, + (void __user *) regs->iaoq[0]); + return -1; } return signalcode ? -1 : 0; -- 2.51.0 From 92a09c47464d040866cf2b4cd052bc60555185fb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 4 May 2025 13:55:04 -0700 Subject: [PATCH 02/16] Linux 6.15-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 94be5dfb81fb..b29cc321ffd9 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Baby Opossum Posse # *DOCUMENTATION* -- 2.51.0 From 74a43a2cf5e8a3eeab3b55a2e64b33281f5ac554 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 28 Apr 2025 12:56:05 +0800 Subject: [PATCH 03/16] crypto: lib/sha256 - Move partial block handling out Extract the common partial block handling into a helper macro that can be reused by other library code. Also delete the unused sha256_base_do_finalize function. Signed-off-by: Herbert Xu --- include/crypto/internal/blockhash.h | 52 +++++++++++++++++++++++++++++ include/crypto/sha2.h | 9 +++-- include/crypto/sha256_base.h | 38 ++------------------- 3 files changed, 62 insertions(+), 37 deletions(-) create mode 100644 include/crypto/internal/blockhash.h diff --git a/include/crypto/internal/blockhash.h b/include/crypto/internal/blockhash.h new file mode 100644 index 000000000000..52d9d4c82493 --- /dev/null +++ b/include/crypto/internal/blockhash.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Handle partial blocks for block hash. + * + * Copyright (c) 2015 Linaro Ltd + * Copyright (c) 2025 Herbert Xu + */ + +#ifndef _CRYPTO_INTERNAL_BLOCKHASH_H +#define _CRYPTO_INTERNAL_BLOCKHASH_H + +#include +#include + +#define BLOCK_HASH_UPDATE_BASE(block_fn, state, src, nbytes, bs, dv, \ + buf, buflen) \ + ({ \ + typeof(block_fn) *_block_fn = &(block_fn); \ + typeof(state + 0) _state = (state); \ + unsigned int _buflen = (buflen); \ + size_t _nbytes = (nbytes); \ + unsigned int _bs = (bs); \ + const u8 *_src = (src); \ + u8 *_buf = (buf); \ + while ((_buflen + _nbytes) >= _bs) { \ + const u8 *data = _src; \ + size_t len = _nbytes; \ + size_t blocks; \ + int remain; \ + if (_buflen) { \ + remain = _bs - _buflen; \ + memcpy(_buf + _buflen, _src, remain); \ + data = _buf; \ + len = _bs; \ + } \ + remain = len % bs; \ + blocks = (len - remain) / (dv); \ + (*_block_fn)(_state, data, blocks); \ + _src += len - remain - _buflen; \ + _nbytes -= len - remain - _buflen; \ + _buflen = 0; \ + } \ + memcpy(_buf + _buflen, _src, _nbytes); \ + _buflen += _nbytes; \ + }) + +#define BLOCK_HASH_UPDATE(block, state, src, nbytes, bs, buf, buflen) \ + BLOCK_HASH_UPDATE_BASE(block, state, src, nbytes, bs, 1, buf, buflen) +#define BLOCK_HASH_UPDATE_BLOCKS(block, state, src, nbytes, bs, buf, buflen) \ + BLOCK_HASH_UPDATE_BASE(block, state, src, nbytes, bs, bs, buf, buflen) + +#endif /* _CRYPTO_INTERNAL_BLOCKHASH_H */ diff --git a/include/crypto/sha2.h b/include/crypto/sha2.h index abbd882f7849..f873c2207b1e 100644 --- a/include/crypto/sha2.h +++ b/include/crypto/sha2.h @@ -71,8 +71,13 @@ struct crypto_sha256_state { }; struct sha256_state { - u32 state[SHA256_DIGEST_SIZE / 4]; - u64 count; + union { + struct crypto_sha256_state ctx; + struct { + u32 state[SHA256_DIGEST_SIZE / 4]; + u64 count; + }; + }; u8 buf[SHA256_BLOCK_SIZE]; }; diff --git a/include/crypto/sha256_base.h b/include/crypto/sha256_base.h index 08cd5e41d4fd..9f284bed5a51 100644 --- a/include/crypto/sha256_base.h +++ b/include/crypto/sha256_base.h @@ -8,6 +8,7 @@ #ifndef _CRYPTO_SHA256_BASE_H #define _CRYPTO_SHA256_BASE_H +#include #include #include #include @@ -40,35 +41,10 @@ static inline int lib_sha256_base_do_update(struct sha256_state *sctx, sha256_block_fn *block_fn) { unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; - struct crypto_sha256_state *state = (void *)sctx; sctx->count += len; - - if (unlikely((partial + len) >= SHA256_BLOCK_SIZE)) { - int blocks; - - if (partial) { - int p = SHA256_BLOCK_SIZE - partial; - - memcpy(sctx->buf + partial, data, p); - data += p; - len -= p; - - block_fn(state, sctx->buf, 1); - } - - blocks = len / SHA256_BLOCK_SIZE; - len %= SHA256_BLOCK_SIZE; - - if (blocks) { - block_fn(state, data, blocks); - data += blocks * SHA256_BLOCK_SIZE; - } - partial = 0; - } - if (len) - memcpy(sctx->buf + partial, data, len); - + BLOCK_HASH_UPDATE_BLOCKS(block_fn, &sctx->ctx, data, len, + SHA256_BLOCK_SIZE, sctx->buf, partial); return 0; } @@ -140,14 +116,6 @@ static inline int lib_sha256_base_do_finalize(struct sha256_state *sctx, return lib_sha256_base_do_finup(state, sctx->buf, partial, block_fn); } -static inline int sha256_base_do_finalize(struct shash_desc *desc, - sha256_block_fn *block_fn) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - - return lib_sha256_base_do_finalize(sctx, block_fn); -} - static inline int __sha256_base_finish(u32 state[SHA256_DIGEST_SIZE / 4], u8 *out, unsigned int digest_size) { -- 2.51.0 From 9b84cb897803c484e15eb1885cd45a895ce1e436 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 28 Apr 2025 12:56:07 +0800 Subject: [PATCH 04/16] crypto: lib/poly1305 - Add block-only interface Add a block-only interface for poly1305. Implement the generic code first. Also use the generic partial block helper. Signed-off-by: Herbert Xu --- include/crypto/internal/poly1305.h | 28 ++++++++++++++-- include/crypto/poly1305.h | 25 ++++++++++---- lib/crypto/poly1305.c | 54 +++++++++++++----------------- 3 files changed, 68 insertions(+), 39 deletions(-) diff --git a/include/crypto/internal/poly1305.h b/include/crypto/internal/poly1305.h index e614594f88c1..c60315f47562 100644 --- a/include/crypto/internal/poly1305.h +++ b/include/crypto/internal/poly1305.h @@ -6,9 +6,8 @@ #ifndef _CRYPTO_INTERNAL_POLY1305_H #define _CRYPTO_INTERNAL_POLY1305_H -#include -#include #include +#include /* * Poly1305 core functions. These only accept whole blocks; the caller must @@ -31,4 +30,29 @@ void poly1305_core_blocks(struct poly1305_state *state, void poly1305_core_emit(const struct poly1305_state *state, const u32 nonce[4], void *dst); +void poly1305_block_init_arch(struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]); +void poly1305_block_init_generic(struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]); +void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, + unsigned int len, u32 padbit); + +static inline void poly1305_blocks_generic(struct poly1305_block_state *state, + const u8 *src, unsigned int len, + u32 padbit) +{ + poly1305_core_blocks(&state->h, &state->core_r, src, + len / POLY1305_BLOCK_SIZE, padbit); +} + +void poly1305_emit_arch(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], const u32 nonce[4]); + +static inline void poly1305_emit_generic(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], + const u32 nonce[4]) +{ + poly1305_core_emit(state, nonce, digest); +} + #endif diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h index 6e21ec2d1dc2..027d74842cd5 100644 --- a/include/crypto/poly1305.h +++ b/include/crypto/poly1305.h @@ -7,7 +7,6 @@ #define _CRYPTO_POLY1305_H #include -#include #define POLY1305_BLOCK_SIZE 16 #define POLY1305_KEY_SIZE 32 @@ -38,6 +37,17 @@ struct poly1305_state { }; }; +/* Combined state for block function. */ +struct poly1305_block_state { + /* accumulator */ + struct poly1305_state h; + /* key */ + union { + struct poly1305_key opaque_r[CONFIG_CRYPTO_LIB_POLY1305_RSIZE]; + struct poly1305_core_key core_r; + }; +}; + struct poly1305_desc_ctx { /* partial buffer */ u8 buf[POLY1305_BLOCK_SIZE]; @@ -45,12 +55,15 @@ struct poly1305_desc_ctx { unsigned int buflen; /* finalize key */ u32 s[4]; - /* accumulator */ - struct poly1305_state h; - /* key */ union { - struct poly1305_key opaque_r[CONFIG_CRYPTO_LIB_POLY1305_RSIZE]; - struct poly1305_core_key core_r; + struct { + struct poly1305_state h; + union { + struct poly1305_key opaque_r[CONFIG_CRYPTO_LIB_POLY1305_RSIZE]; + struct poly1305_core_key core_r; + }; + }; + struct poly1305_block_state state; }; }; diff --git a/lib/crypto/poly1305.c b/lib/crypto/poly1305.c index b633b043f0f6..9fec64a599c1 100644 --- a/lib/crypto/poly1305.c +++ b/lib/crypto/poly1305.c @@ -7,54 +7,45 @@ * Based on public domain code by Andrew Moon and Daniel J. Bernstein. */ +#include #include #include #include +#include #include +void poly1305_block_init_generic(struct poly1305_block_state *desc, + const u8 raw_key[POLY1305_BLOCK_SIZE]) +{ + poly1305_core_init(&desc->h); + poly1305_core_setkey(&desc->core_r, raw_key); +} +EXPORT_SYMBOL_GPL(poly1305_block_init_generic); + void poly1305_init_generic(struct poly1305_desc_ctx *desc, const u8 key[POLY1305_KEY_SIZE]) { - poly1305_core_setkey(&desc->core_r, key); desc->s[0] = get_unaligned_le32(key + 16); desc->s[1] = get_unaligned_le32(key + 20); desc->s[2] = get_unaligned_le32(key + 24); desc->s[3] = get_unaligned_le32(key + 28); - poly1305_core_init(&desc->h); desc->buflen = 0; + poly1305_block_init_generic(&desc->state, key); } EXPORT_SYMBOL_GPL(poly1305_init_generic); +static inline void poly1305_blocks(struct poly1305_block_state *state, + const u8 *src, unsigned int len) +{ + poly1305_blocks_generic(state, src, len, 1); +} + void poly1305_update_generic(struct poly1305_desc_ctx *desc, const u8 *src, unsigned int nbytes) { - unsigned int bytes; - - if (unlikely(desc->buflen)) { - bytes = min(nbytes, POLY1305_BLOCK_SIZE - desc->buflen); - memcpy(desc->buf + desc->buflen, src, bytes); - src += bytes; - nbytes -= bytes; - desc->buflen += bytes; - - if (desc->buflen == POLY1305_BLOCK_SIZE) { - poly1305_core_blocks(&desc->h, &desc->core_r, desc->buf, - 1, 1); - desc->buflen = 0; - } - } - - if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { - poly1305_core_blocks(&desc->h, &desc->core_r, src, - nbytes / POLY1305_BLOCK_SIZE, 1); - src += nbytes - (nbytes % POLY1305_BLOCK_SIZE); - nbytes %= POLY1305_BLOCK_SIZE; - } - - if (unlikely(nbytes)) { - desc->buflen = nbytes; - memcpy(desc->buf, src, nbytes); - } + desc->buflen = BLOCK_HASH_UPDATE(poly1305_blocks, &desc->state, + src, nbytes, POLY1305_BLOCK_SIZE, + desc->buf, desc->buflen); } EXPORT_SYMBOL_GPL(poly1305_update_generic); @@ -64,10 +55,11 @@ void poly1305_final_generic(struct poly1305_desc_ctx *desc, u8 *dst) desc->buf[desc->buflen++] = 1; memset(desc->buf + desc->buflen, 0, POLY1305_BLOCK_SIZE - desc->buflen); - poly1305_core_blocks(&desc->h, &desc->core_r, desc->buf, 1, 0); + poly1305_blocks_generic(&desc->state, desc->buf, + POLY1305_BLOCK_SIZE, 0); } - poly1305_core_emit(&desc->h, desc->s, dst); + poly1305_emit_generic(&desc->h, dst, desc->s); *desc = (struct poly1305_desc_ctx){}; } EXPORT_SYMBOL_GPL(poly1305_final_generic); -- 2.51.0 From 773426f4771bdd82ac5c834bf4c1775315c73a46 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 28 Apr 2025 12:56:09 +0800 Subject: [PATCH 05/16] crypto: arm/poly1305 - Add block-only interface Add block-only interface. Also remove the unnecessary SIMD fallback path. Signed-off-by: Herbert Xu --- arch/arm/lib/crypto/poly1305-armv4.pl | 4 +- arch/arm/lib/crypto/poly1305-glue.c | 80 +++++++++++++++------------ 2 files changed, 48 insertions(+), 36 deletions(-) diff --git a/arch/arm/lib/crypto/poly1305-armv4.pl b/arch/arm/lib/crypto/poly1305-armv4.pl index 6d79498d3115..d57c6e2fc84a 100644 --- a/arch/arm/lib/crypto/poly1305-armv4.pl +++ b/arch/arm/lib/crypto/poly1305-armv4.pl @@ -43,9 +43,9 @@ $code.=<<___; #else # define __ARM_ARCH__ __LINUX_ARM_ARCH__ # define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__ -# define poly1305_init poly1305_init_arm +# define poly1305_init poly1305_block_init_arch # define poly1305_blocks poly1305_blocks_arm -# define poly1305_emit poly1305_emit_arm +# define poly1305_emit poly1305_emit_arch .globl poly1305_blocks_neon #endif diff --git a/arch/arm/lib/crypto/poly1305-glue.c b/arch/arm/lib/crypto/poly1305-glue.c index 42d0ebde1ae1..3ee16048ec7c 100644 --- a/arch/arm/lib/crypto/poly1305-glue.c +++ b/arch/arm/lib/crypto/poly1305-glue.c @@ -7,20 +7,29 @@ #include #include -#include -#include -#include +#include #include #include +#include #include +#include #include -void poly1305_init_arm(void *state, const u8 *key); -void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit); -void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit); -void poly1305_emit_arm(void *state, u8 *digest, const u32 *nonce); - -void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit) +asmlinkage void poly1305_block_init_arch( + struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]); +EXPORT_SYMBOL_GPL(poly1305_block_init_arch); +asmlinkage void poly1305_blocks_arm(struct poly1305_block_state *state, + const u8 *src, u32 len, u32 hibit); +asmlinkage void poly1305_blocks_neon(struct poly1305_block_state *state, + const u8 *src, u32 len, u32 hibit); +asmlinkage void poly1305_emit_arch(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); +EXPORT_SYMBOL_GPL(poly1305_emit_arch); + +void __weak poly1305_blocks_neon(struct poly1305_block_state *state, + const u8 *src, u32 len, u32 hibit) { } @@ -28,21 +37,39 @@ static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) { - poly1305_init_arm(&dctx->h, key); dctx->s[0] = get_unaligned_le32(key + 16); dctx->s[1] = get_unaligned_le32(key + 20); dctx->s[2] = get_unaligned_le32(key + 24); dctx->s[3] = get_unaligned_le32(key + 28); dctx->buflen = 0; + poly1305_block_init_arch(&dctx->state, key); } EXPORT_SYMBOL(poly1305_init_arch); +void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, + unsigned int len, u32 padbit) +{ + len = round_down(len, POLY1305_BLOCK_SIZE); + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && + static_branch_likely(&have_neon)) { + do { + unsigned int todo = min_t(unsigned int, len, SZ_4K); + + kernel_neon_begin(); + poly1305_blocks_neon(state, src, todo, padbit); + kernel_neon_end(); + + len -= todo; + src += todo; + } while (len); + } else + poly1305_blocks_arm(state, src, len, padbit); +} +EXPORT_SYMBOL_GPL(poly1305_blocks_arch); + void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, unsigned int nbytes) { - bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && - crypto_simd_usable(); - if (unlikely(dctx->buflen)) { u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); @@ -52,30 +79,15 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, dctx->buflen += bytes; if (dctx->buflen == POLY1305_BLOCK_SIZE) { - poly1305_blocks_arm(&dctx->h, dctx->buf, - POLY1305_BLOCK_SIZE, 1); + poly1305_blocks_arch(&dctx->state, dctx->buf, + POLY1305_BLOCK_SIZE, 1); dctx->buflen = 0; } } if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { - unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); - - if (static_branch_likely(&have_neon) && do_neon) { - do { - unsigned int todo = min_t(unsigned int, len, SZ_4K); - - kernel_neon_begin(); - poly1305_blocks_neon(&dctx->h, src, todo, 1); - kernel_neon_end(); - - len -= todo; - src += todo; - } while (len); - } else { - poly1305_blocks_arm(&dctx->h, src, len, 1); - src += len; - } + poly1305_blocks_arch(&dctx->state, src, nbytes, 1); + src += round_down(nbytes, POLY1305_BLOCK_SIZE); nbytes %= POLY1305_BLOCK_SIZE; } @@ -92,10 +104,10 @@ void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) dctx->buf[dctx->buflen++] = 1; memset(dctx->buf + dctx->buflen, 0, POLY1305_BLOCK_SIZE - dctx->buflen); - poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); + poly1305_blocks_arch(&dctx->state, dctx->buf, POLY1305_BLOCK_SIZE, 0); } - poly1305_emit_arm(&dctx->h, dst, dctx->s); + poly1305_emit_arch(&dctx->h, dst, dctx->s); *dctx = (struct poly1305_desc_ctx){}; } EXPORT_SYMBOL(poly1305_final_arch); -- 2.51.0 From a59e5468a921937cb7317892779c67046ad9f5cc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 28 Apr 2025 12:56:11 +0800 Subject: [PATCH 06/16] crypto: arm64/poly1305 - Add block-only interface Add block-only interface. Also remove the unnecessary SIMD fallback path. Signed-off-by: Herbert Xu --- arch/arm64/lib/crypto/Makefile | 3 +- arch/arm64/lib/crypto/poly1305-glue.c | 71 ++++++++++++++++----------- 2 files changed, 45 insertions(+), 29 deletions(-) diff --git a/arch/arm64/lib/crypto/Makefile b/arch/arm64/lib/crypto/Makefile index ac624c3effda..6207088397a7 100644 --- a/arch/arm64/lib/crypto/Makefile +++ b/arch/arm64/lib/crypto/Makefile @@ -5,7 +5,8 @@ chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o obj-$(CONFIG_CRYPTO_POLY1305_NEON) += poly1305-neon.o poly1305-neon-y := poly1305-core.o poly1305-glue.o -AFLAGS_poly1305-core.o += -Dpoly1305_init=poly1305_init_arm64 +AFLAGS_poly1305-core.o += -Dpoly1305_init=poly1305_block_init_arch +AFLAGS_poly1305-core.o += -Dpoly1305_emit=poly1305_emit_arch quiet_cmd_perlasm = PERLASM $@ cmd_perlasm = $(PERL) $(<) void $(@) diff --git a/arch/arm64/lib/crypto/poly1305-glue.c b/arch/arm64/lib/crypto/poly1305-glue.c index 906970dd5373..d66a820e32d5 100644 --- a/arch/arm64/lib/crypto/poly1305-glue.c +++ b/arch/arm64/lib/crypto/poly1305-glue.c @@ -7,32 +7,60 @@ #include #include -#include -#include -#include +#include #include #include +#include #include +#include #include -asmlinkage void poly1305_init_arm64(void *state, const u8 *key); -asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit); -asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit); -asmlinkage void poly1305_emit(void *state, u8 *digest, const u32 *nonce); +asmlinkage void poly1305_block_init_arch( + struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]); +EXPORT_SYMBOL_GPL(poly1305_block_init_arch); +asmlinkage void poly1305_blocks(struct poly1305_block_state *state, + const u8 *src, u32 len, u32 hibit); +asmlinkage void poly1305_blocks_neon(struct poly1305_block_state *state, + const u8 *src, u32 len, u32 hibit); +asmlinkage void poly1305_emit_arch(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); +EXPORT_SYMBOL_GPL(poly1305_emit_arch); static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) { - poly1305_init_arm64(&dctx->h, key); dctx->s[0] = get_unaligned_le32(key + 16); dctx->s[1] = get_unaligned_le32(key + 20); dctx->s[2] = get_unaligned_le32(key + 24); dctx->s[3] = get_unaligned_le32(key + 28); dctx->buflen = 0; + poly1305_block_init_arch(&dctx->state, key); } EXPORT_SYMBOL(poly1305_init_arch); +void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, + unsigned int len, u32 padbit) +{ + len = round_down(len, POLY1305_BLOCK_SIZE); + if (static_branch_likely(&have_neon)) { + do { + unsigned int todo = min_t(unsigned int, len, SZ_4K); + + kernel_neon_begin(); + poly1305_blocks_neon(state, src, todo, 1); + kernel_neon_end(); + + len -= todo; + src += todo; + } while (len); + } else + poly1305_blocks(state, src, len, 1); +} +EXPORT_SYMBOL_GPL(poly1305_blocks_arch); + void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, unsigned int nbytes) { @@ -45,29 +73,15 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, dctx->buflen += bytes; if (dctx->buflen == POLY1305_BLOCK_SIZE) { - poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1); + poly1305_blocks_arch(&dctx->state, dctx->buf, + POLY1305_BLOCK_SIZE, 1); dctx->buflen = 0; } } if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { - unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); - - if (static_branch_likely(&have_neon) && crypto_simd_usable()) { - do { - unsigned int todo = min_t(unsigned int, len, SZ_4K); - - kernel_neon_begin(); - poly1305_blocks_neon(&dctx->h, src, todo, 1); - kernel_neon_end(); - - len -= todo; - src += todo; - } while (len); - } else { - poly1305_blocks(&dctx->h, src, len, 1); - src += len; - } + poly1305_blocks_arch(&dctx->state, src, nbytes, 1); + src += round_down(nbytes, POLY1305_BLOCK_SIZE); nbytes %= POLY1305_BLOCK_SIZE; } @@ -84,10 +98,11 @@ void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) dctx->buf[dctx->buflen++] = 1; memset(dctx->buf + dctx->buflen, 0, POLY1305_BLOCK_SIZE - dctx->buflen); - poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); + poly1305_blocks_arch(&dctx->state, dctx->buf, + POLY1305_BLOCK_SIZE, 0); } - poly1305_emit(&dctx->h, dst, dctx->s); + poly1305_emit_arch(&dctx->h, dst, dctx->s); memzero_explicit(dctx, sizeof(*dctx)); } EXPORT_SYMBOL(poly1305_final_arch); -- 2.51.0 From ffe5ca295d757d912b226239df17396c3cd8dbca Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 28 Apr 2025 12:56:14 +0800 Subject: [PATCH 07/16] crypto: mips/poly1305 - Add block-only interface Add block-only interface. Signed-off-by: Herbert Xu --- arch/mips/lib/crypto/poly1305-glue.c | 29 ++++++++++++++++++--------- arch/mips/lib/crypto/poly1305-mips.pl | 12 +++++------ 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/arch/mips/lib/crypto/poly1305-glue.c b/arch/mips/lib/crypto/poly1305-glue.c index 576e7a58e0b1..2fea4cacfe27 100644 --- a/arch/mips/lib/crypto/poly1305-glue.c +++ b/arch/mips/lib/crypto/poly1305-glue.c @@ -5,23 +5,33 @@ * Copyright (C) 2019 Linaro Ltd. */ -#include +#include #include +#include #include +#include #include -asmlinkage void poly1305_init_mips(void *state, const u8 *key); -asmlinkage void poly1305_blocks_mips(void *state, const u8 *src, u32 len, u32 hibit); -asmlinkage void poly1305_emit_mips(void *state, u8 *digest, const u32 *nonce); +asmlinkage void poly1305_block_init_arch( + struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]); +EXPORT_SYMBOL_GPL(poly1305_block_init_arch); +asmlinkage void poly1305_blocks_arch(struct poly1305_block_state *state, + const u8 *src, u32 len, u32 hibit); +EXPORT_SYMBOL_GPL(poly1305_blocks_arch); +asmlinkage void poly1305_emit_arch(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); +EXPORT_SYMBOL_GPL(poly1305_emit_arch); void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) { - poly1305_init_mips(&dctx->h, key); dctx->s[0] = get_unaligned_le32(key + 16); dctx->s[1] = get_unaligned_le32(key + 20); dctx->s[2] = get_unaligned_le32(key + 24); dctx->s[3] = get_unaligned_le32(key + 28); dctx->buflen = 0; + poly1305_block_init_arch(&dctx->state, key); } EXPORT_SYMBOL(poly1305_init_arch); @@ -37,7 +47,7 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, dctx->buflen += bytes; if (dctx->buflen == POLY1305_BLOCK_SIZE) { - poly1305_blocks_mips(&dctx->h, dctx->buf, + poly1305_blocks_arch(&dctx->state, dctx->buf, POLY1305_BLOCK_SIZE, 1); dctx->buflen = 0; } @@ -46,7 +56,7 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); - poly1305_blocks_mips(&dctx->h, src, len, 1); + poly1305_blocks_arch(&dctx->state, src, len, 1); src += len; nbytes %= POLY1305_BLOCK_SIZE; } @@ -64,10 +74,11 @@ void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) dctx->buf[dctx->buflen++] = 1; memset(dctx->buf + dctx->buflen, 0, POLY1305_BLOCK_SIZE - dctx->buflen); - poly1305_blocks_mips(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); + poly1305_blocks_arch(&dctx->state, dctx->buf, + POLY1305_BLOCK_SIZE, 0); } - poly1305_emit_mips(&dctx->h, dst, dctx->s); + poly1305_emit_arch(&dctx->h, dst, dctx->s); *dctx = (struct poly1305_desc_ctx){}; } EXPORT_SYMBOL(poly1305_final_arch); diff --git a/arch/mips/lib/crypto/poly1305-mips.pl b/arch/mips/lib/crypto/poly1305-mips.pl index b05bab884ed2..399f10c3e385 100644 --- a/arch/mips/lib/crypto/poly1305-mips.pl +++ b/arch/mips/lib/crypto/poly1305-mips.pl @@ -93,9 +93,9 @@ $code.=<<___; #endif #ifdef __KERNEL__ -# define poly1305_init poly1305_init_mips -# define poly1305_blocks poly1305_blocks_mips -# define poly1305_emit poly1305_emit_mips +# define poly1305_init poly1305_block_init_arch +# define poly1305_blocks poly1305_blocks_arch +# define poly1305_emit poly1305_emit_arch #endif #if defined(__MIPSEB__) && !defined(MIPSEB) @@ -565,9 +565,9 @@ $code.=<<___; #endif #ifdef __KERNEL__ -# define poly1305_init poly1305_init_mips -# define poly1305_blocks poly1305_blocks_mips -# define poly1305_emit poly1305_emit_mips +# define poly1305_init poly1305_block_init_arch +# define poly1305_blocks poly1305_blocks_arch +# define poly1305_emit poly1305_emit_arch #endif #if defined(__MIPSEB__) && !defined(MIPSEB) -- 2.51.0 From 14d31979145dbafaeb28a5bc4c90c4db918bb772 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 28 Apr 2025 12:56:16 +0800 Subject: [PATCH 08/16] crypto: powerpc/poly1305 - Add block-only interface Add block-only interface. Also remove the unnecessary SIMD fallback path. Signed-off-by: Herbert Xu --- arch/powerpc/lib/crypto/poly1305-p10-glue.c | 84 ++++++++++++--------- 1 file changed, 49 insertions(+), 35 deletions(-) diff --git a/arch/powerpc/lib/crypto/poly1305-p10-glue.c b/arch/powerpc/lib/crypto/poly1305-p10-glue.c index 00617f4c58e6..708435beaba6 100644 --- a/arch/powerpc/lib/crypto/poly1305-p10-glue.c +++ b/arch/powerpc/lib/crypto/poly1305-p10-glue.c @@ -4,19 +4,20 @@ * * Copyright 2023- IBM Corp. All rights reserved. */ +#include +#include +#include +#include #include #include -#include -#include -#include -#include +#include #include -#include -#include -asmlinkage void poly1305_p10le_4blocks(void *h, const u8 *m, u32 mlen); -asmlinkage void poly1305_64s(void *h, const u8 *m, u32 mlen, int highbit); -asmlinkage void poly1305_emit_64(void *h, void *s, u8 *dst); +asmlinkage void poly1305_p10le_4blocks(struct poly1305_block_state *state, const u8 *m, u32 mlen); +asmlinkage void poly1305_64s(struct poly1305_block_state *state, const u8 *m, u32 mlen, int highbit); +asmlinkage void poly1305_emit_arch(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_p10); @@ -32,22 +33,49 @@ static void vsx_end(void) preempt_enable(); } -void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) +void poly1305_block_init_arch(struct poly1305_block_state *dctx, + const u8 raw_key[POLY1305_BLOCK_SIZE]) { if (!static_key_enabled(&have_p10)) - return poly1305_init_generic(dctx, key); + return poly1305_block_init_generic(dctx, raw_key); dctx->h = (struct poly1305_state){}; - dctx->core_r.key.r64[0] = get_unaligned_le64(key + 0); - dctx->core_r.key.r64[1] = get_unaligned_le64(key + 8); + dctx->core_r.key.r64[0] = get_unaligned_le64(raw_key + 0); + dctx->core_r.key.r64[1] = get_unaligned_le64(raw_key + 8); +} +EXPORT_SYMBOL_GPL(poly1305_block_init_arch); + +void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) +{ dctx->s[0] = get_unaligned_le32(key + 16); dctx->s[1] = get_unaligned_le32(key + 20); dctx->s[2] = get_unaligned_le32(key + 24); dctx->s[3] = get_unaligned_le32(key + 28); dctx->buflen = 0; + poly1305_block_init_arch(&dctx->state, key); } EXPORT_SYMBOL(poly1305_init_arch); +void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, + unsigned int len, u32 padbit) +{ + if (!static_key_enabled(&have_p10)) + return poly1305_blocks_generic(state, src, len, padbit); + vsx_begin(); + if (len >= POLY1305_BLOCK_SIZE * 4) { + poly1305_p10le_4blocks(state, src, len); + src += len - (len % (POLY1305_BLOCK_SIZE * 4)); + len %= POLY1305_BLOCK_SIZE * 4; + } + while (len >= POLY1305_BLOCK_SIZE) { + poly1305_64s(state, src, POLY1305_BLOCK_SIZE, padbit); + len -= POLY1305_BLOCK_SIZE; + src += POLY1305_BLOCK_SIZE; + } + vsx_end(); +} +EXPORT_SYMBOL_GPL(poly1305_blocks_arch); + void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, unsigned int srclen) { @@ -64,28 +92,15 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, dctx->buflen += bytes; if (dctx->buflen < POLY1305_BLOCK_SIZE) return; - vsx_begin(); - poly1305_64s(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1); - vsx_end(); + poly1305_blocks_arch(&dctx->state, dctx->buf, + POLY1305_BLOCK_SIZE, 1); dctx->buflen = 0; } if (likely(srclen >= POLY1305_BLOCK_SIZE)) { - bytes = round_down(srclen, POLY1305_BLOCK_SIZE); - if (crypto_simd_usable() && (srclen >= POLY1305_BLOCK_SIZE*4)) { - vsx_begin(); - poly1305_p10le_4blocks(&dctx->h, src, srclen); - vsx_end(); - src += srclen - (srclen % (POLY1305_BLOCK_SIZE * 4)); - srclen %= POLY1305_BLOCK_SIZE * 4; - } - while (srclen >= POLY1305_BLOCK_SIZE) { - vsx_begin(); - poly1305_64s(&dctx->h, src, POLY1305_BLOCK_SIZE, 1); - vsx_end(); - srclen -= POLY1305_BLOCK_SIZE; - src += POLY1305_BLOCK_SIZE; - } + poly1305_blocks_arch(&dctx->state, src, srclen, 1); + src += srclen - (srclen % POLY1305_BLOCK_SIZE); + srclen %= POLY1305_BLOCK_SIZE; } if (unlikely(srclen)) { @@ -104,12 +119,11 @@ void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) dctx->buf[dctx->buflen++] = 1; memset(dctx->buf + dctx->buflen, 0, POLY1305_BLOCK_SIZE - dctx->buflen); - vsx_begin(); - poly1305_64s(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); - vsx_end(); + poly1305_blocks_arch(&dctx->state, dctx->buf, + POLY1305_BLOCK_SIZE, 0); } - poly1305_emit_64(&dctx->h, &dctx->s, dst); + poly1305_emit_arch(&dctx->h, dst, dctx->s); } EXPORT_SYMBOL(poly1305_final_arch); -- 2.51.0 From 318c53ae02f2abf1542062543741068278780d09 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 28 Apr 2025 12:56:18 +0800 Subject: [PATCH 09/16] crypto: x86/poly1305 - Add block-only interface Add block-only interface. Also remove the unnecessary SIMD fallback path. Signed-off-by: Herbert Xu --- .../lib/crypto/poly1305-x86_64-cryptogams.pl | 33 +++-- arch/x86/lib/crypto/poly1305_glue.c | 121 +++++++----------- 2 files changed, 69 insertions(+), 85 deletions(-) diff --git a/arch/x86/lib/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/lib/crypto/poly1305-x86_64-cryptogams.pl index 409ec6955733..501827254fed 100644 --- a/arch/x86/lib/crypto/poly1305-x86_64-cryptogams.pl +++ b/arch/x86/lib/crypto/poly1305-x86_64-cryptogams.pl @@ -118,6 +118,19 @@ sub declare_function() { } } +sub declare_typed_function() { + my ($name, $align, $nargs) = @_; + if($kernel) { + $code .= "SYM_TYPED_FUNC_START($name)\n"; + $code .= ".L$name:\n"; + } else { + $code .= ".globl $name\n"; + $code .= ".type $name,\@function,$nargs\n"; + $code .= ".align $align\n"; + $code .= "$name:\n"; + } +} + sub end_function() { my ($name) = @_; if($kernel) { @@ -128,7 +141,7 @@ sub end_function() { } $code.=<<___ if $kernel; -#include +#include ___ if ($avx) { @@ -236,14 +249,14 @@ ___ $code.=<<___ if (!$kernel); .extern OPENSSL_ia32cap_P -.globl poly1305_init_x86_64 -.hidden poly1305_init_x86_64 +.globl poly1305_block_init_arch +.hidden poly1305_block_init_arch .globl poly1305_blocks_x86_64 .hidden poly1305_blocks_x86_64 .globl poly1305_emit_x86_64 .hidden poly1305_emit_x86_64 ___ -&declare_function("poly1305_init_x86_64", 32, 3); +&declare_typed_function("poly1305_block_init_arch", 32, 3); $code.=<<___; xor %eax,%eax mov %rax,0($ctx) # initialize hash value @@ -298,7 +311,7 @@ $code.=<<___; .Lno_key: RET ___ -&end_function("poly1305_init_x86_64"); +&end_function("poly1305_block_init_arch"); &declare_function("poly1305_blocks_x86_64", 32, 4); $code.=<<___; @@ -4105,9 +4118,9 @@ avx_handler: .section .pdata .align 4 - .rva .LSEH_begin_poly1305_init_x86_64 - .rva .LSEH_end_poly1305_init_x86_64 - .rva .LSEH_info_poly1305_init_x86_64 + .rva .LSEH_begin_poly1305_block_init_arch + .rva .LSEH_end_poly1305_block_init_arch + .rva .LSEH_info_poly1305_block_init_arch .rva .LSEH_begin_poly1305_blocks_x86_64 .rva .LSEH_end_poly1305_blocks_x86_64 @@ -4155,10 +4168,10 @@ ___ $code.=<<___; .section .xdata .align 8 -.LSEH_info_poly1305_init_x86_64: +.LSEH_info_poly1305_block_init_arch: .byte 9,0,0,0 .rva se_handler - .rva .LSEH_begin_poly1305_init_x86_64,.LSEH_begin_poly1305_init_x86_64 + .rva .LSEH_begin_poly1305_block_init_arch,.LSEH_begin_poly1305_block_init_arch .LSEH_info_poly1305_blocks_x86_64: .byte 9,0,0,0 diff --git a/arch/x86/lib/crypto/poly1305_glue.c b/arch/x86/lib/crypto/poly1305_glue.c index cff35ca5822a..d98764ec3b47 100644 --- a/arch/x86/lib/crypto/poly1305_glue.c +++ b/arch/x86/lib/crypto/poly1305_glue.c @@ -3,34 +3,15 @@ * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. */ -#include -#include +#include +#include +#include #include #include #include #include +#include #include -#include -#include - -asmlinkage void poly1305_init_x86_64(void *ctx, - const u8 key[POLY1305_BLOCK_SIZE]); -asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp, - const size_t len, const u32 padbit); -asmlinkage void poly1305_emit_x86_64(void *ctx, u8 mac[POLY1305_DIGEST_SIZE], - const u32 nonce[4]); -asmlinkage void poly1305_emit_avx(void *ctx, u8 mac[POLY1305_DIGEST_SIZE], - const u32 nonce[4]); -asmlinkage void poly1305_blocks_avx(void *ctx, const u8 *inp, const size_t len, - const u32 padbit); -asmlinkage void poly1305_blocks_avx2(void *ctx, const u8 *inp, const size_t len, - const u32 padbit); -asmlinkage void poly1305_blocks_avx512(void *ctx, const u8 *inp, - const size_t len, const u32 padbit); - -static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx); -static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2); -static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx512); struct poly1305_arch_internal { union { @@ -45,64 +26,50 @@ struct poly1305_arch_internal { struct { u32 r2, r1, r4, r3; } rn[9]; }; -/* The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit - * the unfortunate situation of using AVX and then having to go back to scalar - * -- because the user is silly and has called the update function from two - * separate contexts -- then we need to convert back to the original base before - * proceeding. It is possible to reason that the initial reduction below is - * sufficient given the implementation invariants. However, for an avoidance of - * doubt and because this is not performance critical, we do the full reduction - * anyway. Z3 proof of below function: https://xn--4db.cc/ltPtHCKN/py - */ -static void convert_to_base2_64(void *ctx) -{ - struct poly1305_arch_internal *state = ctx; - u32 cy; - - if (!state->is_base2_26) - return; - - cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy; - cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy; - cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy; - cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy; - state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0]; - state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12); - state->hs[2] = state->h[4] >> 24; -#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1)) - cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL); - state->hs[2] &= 3; - state->hs[0] += cy; - state->hs[1] += (cy = ULT(state->hs[0], cy)); - state->hs[2] += ULT(state->hs[1], cy); -#undef ULT - state->is_base2_26 = 0; -} +asmlinkage void poly1305_block_init_arch( + struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]); +EXPORT_SYMBOL_GPL(poly1305_block_init_arch); +asmlinkage void poly1305_blocks_x86_64(struct poly1305_arch_internal *ctx, + const u8 *inp, + const size_t len, const u32 padbit); +asmlinkage void poly1305_emit_x86_64(const struct poly1305_state *ctx, + u8 mac[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); +asmlinkage void poly1305_emit_avx(const struct poly1305_state *ctx, + u8 mac[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); +asmlinkage void poly1305_blocks_avx(struct poly1305_arch_internal *ctx, + const u8 *inp, const size_t len, + const u32 padbit); +asmlinkage void poly1305_blocks_avx2(struct poly1305_arch_internal *ctx, + const u8 *inp, const size_t len, + const u32 padbit); +asmlinkage void poly1305_blocks_avx512(struct poly1305_arch_internal *ctx, + const u8 *inp, + const size_t len, const u32 padbit); -static void poly1305_simd_init(void *ctx, const u8 key[POLY1305_BLOCK_SIZE]) -{ - poly1305_init_x86_64(ctx, key); -} +static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx512); -static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len, - const u32 padbit) +void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *inp, + unsigned int len, u32 padbit) { - struct poly1305_arch_internal *state = ctx; + struct poly1305_arch_internal *ctx = + container_of(&state->h.h, struct poly1305_arch_internal, h); /* SIMD disables preemption, so relax after processing each page. */ BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE || SZ_4K % POLY1305_BLOCK_SIZE); - if (!static_branch_likely(&poly1305_use_avx) || - (len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) || - !crypto_simd_usable()) { - convert_to_base2_64(ctx); + if (!static_branch_likely(&poly1305_use_avx)) { poly1305_blocks_x86_64(ctx, inp, len, padbit); return; } do { - const size_t bytes = min_t(size_t, len, SZ_4K); + const unsigned int bytes = min(len, SZ_4K); kernel_fpu_begin(); if (static_branch_likely(&poly1305_use_avx512)) @@ -117,24 +84,26 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len, inp += bytes; } while (len); } +EXPORT_SYMBOL_GPL(poly1305_blocks_arch); -static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE], - const u32 nonce[4]) +void poly1305_emit_arch(const struct poly1305_state *ctx, + u8 mac[POLY1305_DIGEST_SIZE], const u32 nonce[4]) { if (!static_branch_likely(&poly1305_use_avx)) poly1305_emit_x86_64(ctx, mac, nonce); else poly1305_emit_avx(ctx, mac, nonce); } +EXPORT_SYMBOL_GPL(poly1305_emit_arch); void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) { - poly1305_simd_init(&dctx->h, key); dctx->s[0] = get_unaligned_le32(&key[16]); dctx->s[1] = get_unaligned_le32(&key[20]); dctx->s[2] = get_unaligned_le32(&key[24]); dctx->s[3] = get_unaligned_le32(&key[28]); dctx->buflen = 0; + poly1305_block_init_arch(&dctx->state, key); } EXPORT_SYMBOL(poly1305_init_arch); @@ -151,14 +120,15 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, dctx->buflen += bytes; if (dctx->buflen == POLY1305_BLOCK_SIZE) { - poly1305_simd_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1); + poly1305_blocks_arch(&dctx->state, dctx->buf, + POLY1305_BLOCK_SIZE, 1); dctx->buflen = 0; } } if (likely(srclen >= POLY1305_BLOCK_SIZE)) { bytes = round_down(srclen, POLY1305_BLOCK_SIZE); - poly1305_simd_blocks(&dctx->h, src, bytes, 1); + poly1305_blocks_arch(&dctx->state, src, bytes, 1); src += bytes; srclen -= bytes; } @@ -176,10 +146,11 @@ void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) dctx->buf[dctx->buflen++] = 1; memset(dctx->buf + dctx->buflen, 0, POLY1305_BLOCK_SIZE - dctx->buflen); - poly1305_simd_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); + poly1305_blocks_arch(&dctx->state, dctx->buf, + POLY1305_BLOCK_SIZE, 0); } - poly1305_simd_emit(&dctx->h, dst, dctx->s); + poly1305_emit_arch(&dctx->h, dst, dctx->s); memzero_explicit(dctx, sizeof(*dctx)); } EXPORT_SYMBOL(poly1305_final_arch); -- 2.51.0 From a298765e28adaea199f722142c10dae7e24dedf8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 28 Apr 2025 12:56:21 +0800 Subject: [PATCH 10/16] crypto: chacha20poly1305 - Use lib/crypto poly1305 Since the poly1305 algorithm is fixed, there is no point in going through the Crypto API for it. Use the lib/crypto poly1305 interface instead. For compatiblity keep the poly1305 parameter in the algorithm name. Signed-off-by: Herbert Xu --- crypto/Kconfig | 2 +- crypto/chacha20poly1305.c | 319 ++++++++------------------------------ 2 files changed, 65 insertions(+), 256 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index 9878286d1d68..f87e2a26d2dd 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -784,8 +784,8 @@ config CRYPTO_AEGIS128_SIMD config CRYPTO_CHACHA20POLY1305 tristate "ChaCha20-Poly1305" select CRYPTO_CHACHA20 - select CRYPTO_POLY1305 select CRYPTO_AEAD + select CRYPTO_LIB_POLY1305 select CRYPTO_MANAGER help ChaCha20 stream cipher and Poly1305 authenticator combined diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c index d740849f1c19..b29f66ba1e2f 100644 --- a/crypto/chacha20poly1305.c +++ b/crypto/chacha20poly1305.c @@ -12,36 +12,23 @@ #include #include #include -#include #include +#include #include +#include struct chachapoly_instance_ctx { struct crypto_skcipher_spawn chacha; - struct crypto_ahash_spawn poly; unsigned int saltlen; }; struct chachapoly_ctx { struct crypto_skcipher *chacha; - struct crypto_ahash *poly; /* key bytes we use for the ChaCha20 IV */ unsigned int saltlen; u8 salt[] __counted_by(saltlen); }; -struct poly_req { - /* zero byte padding for AD/ciphertext, as needed */ - u8 pad[POLY1305_BLOCK_SIZE]; - /* tail data with AD/ciphertext lengths */ - struct { - __le64 assoclen; - __le64 cryptlen; - } tail; - struct scatterlist src[1]; - struct ahash_request req; /* must be last member */ -}; - struct chacha_req { u8 iv[CHACHA_IV_SIZE]; struct scatterlist src[1]; @@ -62,7 +49,6 @@ struct chachapoly_req_ctx { /* request flags, with MAY_SLEEP cleared if needed */ u32 flags; union { - struct poly_req poly; struct chacha_req chacha; } u; }; @@ -105,16 +91,6 @@ static int poly_verify_tag(struct aead_request *req) return 0; } -static int poly_copy_tag(struct aead_request *req) -{ - struct chachapoly_req_ctx *rctx = aead_request_ctx(req); - - scatterwalk_map_and_copy(rctx->tag, req->dst, - req->assoclen + rctx->cryptlen, - sizeof(rctx->tag), 1); - return 0; -} - static void chacha_decrypt_done(void *data, int err) { async_done_continue(data, err, poly_verify_tag); @@ -151,210 +127,76 @@ skip: return poly_verify_tag(req); } -static int poly_tail_continue(struct aead_request *req) -{ - struct chachapoly_req_ctx *rctx = aead_request_ctx(req); - - if (rctx->cryptlen == req->cryptlen) /* encrypting */ - return poly_copy_tag(req); - - return chacha_decrypt(req); -} - -static void poly_tail_done(void *data, int err) -{ - async_done_continue(data, err, poly_tail_continue); -} - -static int poly_tail(struct aead_request *req) -{ - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct chachapoly_ctx *ctx = crypto_aead_ctx(tfm); - struct chachapoly_req_ctx *rctx = aead_request_ctx(req); - struct poly_req *preq = &rctx->u.poly; - int err; - - preq->tail.assoclen = cpu_to_le64(rctx->assoclen); - preq->tail.cryptlen = cpu_to_le64(rctx->cryptlen); - sg_init_one(preq->src, &preq->tail, sizeof(preq->tail)); - - ahash_request_set_callback(&preq->req, rctx->flags, - poly_tail_done, req); - ahash_request_set_tfm(&preq->req, ctx->poly); - ahash_request_set_crypt(&preq->req, preq->src, - rctx->tag, sizeof(preq->tail)); - - err = crypto_ahash_finup(&preq->req); - if (err) - return err; - - return poly_tail_continue(req); -} - -static void poly_cipherpad_done(void *data, int err) -{ - async_done_continue(data, err, poly_tail); -} - -static int poly_cipherpad(struct aead_request *req) +static int poly_hash(struct aead_request *req) { - struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); struct chachapoly_req_ctx *rctx = aead_request_ctx(req); - struct poly_req *preq = &rctx->u.poly; + const void *zp = page_address(ZERO_PAGE(0)); + struct scatterlist *sg = req->src; + struct poly1305_desc_ctx desc; + struct scatter_walk walk; + struct { + union { + struct { + __le64 assoclen; + __le64 cryptlen; + }; + u8 u8[16]; + }; + } tail; unsigned int padlen; - int err; - - padlen = -rctx->cryptlen % POLY1305_BLOCK_SIZE; - memset(preq->pad, 0, sizeof(preq->pad)); - sg_init_one(preq->src, preq->pad, padlen); - - ahash_request_set_callback(&preq->req, rctx->flags, - poly_cipherpad_done, req); - ahash_request_set_tfm(&preq->req, ctx->poly); - ahash_request_set_crypt(&preq->req, preq->src, NULL, padlen); + unsigned int total; - err = crypto_ahash_update(&preq->req); - if (err) - return err; - - return poly_tail(req); -} - -static void poly_cipher_done(void *data, int err) -{ - async_done_continue(data, err, poly_cipherpad); -} - -static int poly_cipher(struct aead_request *req) -{ - struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); - struct chachapoly_req_ctx *rctx = aead_request_ctx(req); - struct poly_req *preq = &rctx->u.poly; - struct scatterlist *crypt = req->src; - int err; + if (sg != req->dst) + memcpy_sglist(req->dst, sg, req->assoclen); if (rctx->cryptlen == req->cryptlen) /* encrypting */ - crypt = req->dst; - - crypt = scatterwalk_ffwd(rctx->src, crypt, req->assoclen); - - ahash_request_set_callback(&preq->req, rctx->flags, - poly_cipher_done, req); - ahash_request_set_tfm(&preq->req, ctx->poly); - ahash_request_set_crypt(&preq->req, crypt, NULL, rctx->cryptlen); - - err = crypto_ahash_update(&preq->req); - if (err) - return err; + sg = req->dst; - return poly_cipherpad(req); -} + poly1305_init(&desc, rctx->key); + scatterwalk_start(&walk, sg); -static void poly_adpad_done(void *data, int err) -{ - async_done_continue(data, err, poly_cipher); -} + total = rctx->assoclen; + while (total) { + unsigned int n = scatterwalk_next(&walk, total); -static int poly_adpad(struct aead_request *req) -{ - struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); - struct chachapoly_req_ctx *rctx = aead_request_ctx(req); - struct poly_req *preq = &rctx->u.poly; - unsigned int padlen; - int err; + poly1305_update(&desc, walk.addr, n); + scatterwalk_done_src(&walk, n); + total -= n; + } padlen = -rctx->assoclen % POLY1305_BLOCK_SIZE; - memset(preq->pad, 0, sizeof(preq->pad)); - sg_init_one(preq->src, preq->pad, padlen); - - ahash_request_set_callback(&preq->req, rctx->flags, - poly_adpad_done, req); - ahash_request_set_tfm(&preq->req, ctx->poly); - ahash_request_set_crypt(&preq->req, preq->src, NULL, padlen); - - err = crypto_ahash_update(&preq->req); - if (err) - return err; - - return poly_cipher(req); -} - -static void poly_ad_done(void *data, int err) -{ - async_done_continue(data, err, poly_adpad); -} - -static int poly_ad(struct aead_request *req) -{ - struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); - struct chachapoly_req_ctx *rctx = aead_request_ctx(req); - struct poly_req *preq = &rctx->u.poly; - int err; - - ahash_request_set_callback(&preq->req, rctx->flags, - poly_ad_done, req); - ahash_request_set_tfm(&preq->req, ctx->poly); - ahash_request_set_crypt(&preq->req, req->src, NULL, rctx->assoclen); - - err = crypto_ahash_update(&preq->req); - if (err) - return err; - - return poly_adpad(req); -} - -static void poly_setkey_done(void *data, int err) -{ - async_done_continue(data, err, poly_ad); -} + poly1305_update(&desc, zp, padlen); -static int poly_setkey(struct aead_request *req) -{ - struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); - struct chachapoly_req_ctx *rctx = aead_request_ctx(req); - struct poly_req *preq = &rctx->u.poly; - int err; + scatterwalk_skip(&walk, req->assoclen - rctx->assoclen); - sg_init_one(preq->src, rctx->key, sizeof(rctx->key)); + total = rctx->cryptlen; + while (total) { + unsigned int n = scatterwalk_next(&walk, total); - ahash_request_set_callback(&preq->req, rctx->flags, - poly_setkey_done, req); - ahash_request_set_tfm(&preq->req, ctx->poly); - ahash_request_set_crypt(&preq->req, preq->src, NULL, sizeof(rctx->key)); - - err = crypto_ahash_update(&preq->req); - if (err) - return err; - - return poly_ad(req); -} - -static void poly_init_done(void *data, int err) -{ - async_done_continue(data, err, poly_setkey); -} + poly1305_update(&desc, walk.addr, n); + scatterwalk_done_src(&walk, n); + total -= n; + } -static int poly_init(struct aead_request *req) -{ - struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); - struct chachapoly_req_ctx *rctx = aead_request_ctx(req); - struct poly_req *preq = &rctx->u.poly; - int err; + padlen = -rctx->cryptlen % POLY1305_BLOCK_SIZE; + poly1305_update(&desc, zp, padlen); - ahash_request_set_callback(&preq->req, rctx->flags, - poly_init_done, req); - ahash_request_set_tfm(&preq->req, ctx->poly); + tail.assoclen = cpu_to_le64(rctx->assoclen); + tail.cryptlen = cpu_to_le64(rctx->cryptlen); + poly1305_update(&desc, tail.u8, sizeof(tail)); + memzero_explicit(&tail, sizeof(tail)); + poly1305_final(&desc, rctx->tag); - err = crypto_ahash_init(&preq->req); - if (err) - return err; + if (rctx->cryptlen != req->cryptlen) + return chacha_decrypt(req); - return poly_setkey(req); + memcpy_to_scatterwalk(&walk, rctx->tag, sizeof(rctx->tag)); + return 0; } static void poly_genkey_done(void *data, int err) { - async_done_continue(data, err, poly_init); + async_done_continue(data, err, poly_hash); } static int poly_genkey(struct aead_request *req) @@ -388,7 +230,7 @@ static int poly_genkey(struct aead_request *req) if (err) return err; - return poly_init(req); + return poly_hash(req); } static void chacha_encrypt_done(void *data, int err) @@ -437,14 +279,7 @@ static int chachapoly_encrypt(struct aead_request *req) /* encrypt call chain: * - chacha_encrypt/done() * - poly_genkey/done() - * - poly_init/done() - * - poly_setkey/done() - * - poly_ad/done() - * - poly_adpad/done() - * - poly_cipher/done() - * - poly_cipherpad/done() - * - poly_tail/done/continue() - * - poly_copy_tag() + * - poly_hash() */ return chacha_encrypt(req); } @@ -458,13 +293,7 @@ static int chachapoly_decrypt(struct aead_request *req) /* decrypt call chain: * - poly_genkey/done() - * - poly_init/done() - * - poly_setkey/done() - * - poly_ad/done() - * - poly_adpad/done() - * - poly_cipher/done() - * - poly_cipherpad/done() - * - poly_tail/done/continue() + * - poly_hash() * - chacha_decrypt/done() * - poly_verify_tag() */ @@ -503,21 +332,13 @@ static int chachapoly_init(struct crypto_aead *tfm) struct chachapoly_instance_ctx *ictx = aead_instance_ctx(inst); struct chachapoly_ctx *ctx = crypto_aead_ctx(tfm); struct crypto_skcipher *chacha; - struct crypto_ahash *poly; unsigned long align; - poly = crypto_spawn_ahash(&ictx->poly); - if (IS_ERR(poly)) - return PTR_ERR(poly); - chacha = crypto_spawn_skcipher(&ictx->chacha); - if (IS_ERR(chacha)) { - crypto_free_ahash(poly); + if (IS_ERR(chacha)) return PTR_ERR(chacha); - } ctx->chacha = chacha; - ctx->poly = poly; ctx->saltlen = ictx->saltlen; align = crypto_aead_alignmask(tfm); @@ -525,12 +346,9 @@ static int chachapoly_init(struct crypto_aead *tfm) crypto_aead_set_reqsize( tfm, align + offsetof(struct chachapoly_req_ctx, u) + - max(offsetof(struct chacha_req, req) + - sizeof(struct skcipher_request) + - crypto_skcipher_reqsize(chacha), - offsetof(struct poly_req, req) + - sizeof(struct ahash_request) + - crypto_ahash_reqsize(poly))); + offsetof(struct chacha_req, req) + + sizeof(struct skcipher_request) + + crypto_skcipher_reqsize(chacha)); return 0; } @@ -539,7 +357,6 @@ static void chachapoly_exit(struct crypto_aead *tfm) { struct chachapoly_ctx *ctx = crypto_aead_ctx(tfm); - crypto_free_ahash(ctx->poly); crypto_free_skcipher(ctx->chacha); } @@ -548,7 +365,6 @@ static void chachapoly_free(struct aead_instance *inst) struct chachapoly_instance_ctx *ctx = aead_instance_ctx(inst); crypto_drop_skcipher(&ctx->chacha); - crypto_drop_ahash(&ctx->poly); kfree(inst); } @@ -559,7 +375,6 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb, struct aead_instance *inst; struct chachapoly_instance_ctx *ctx; struct skcipher_alg_common *chacha; - struct hash_alg_common *poly; int err; if (ivsize > CHACHAPOLY_IV_SIZE) @@ -581,14 +396,9 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb, goto err_free_inst; chacha = crypto_spawn_skcipher_alg_common(&ctx->chacha); - err = crypto_grab_ahash(&ctx->poly, aead_crypto_instance(inst), - crypto_attr_alg_name(tb[2]), 0, mask); - if (err) - goto err_free_inst; - poly = crypto_spawn_ahash_alg(&ctx->poly); - err = -EINVAL; - if (poly->digestsize != POLY1305_DIGEST_SIZE) + if (strcmp(crypto_attr_alg_name(tb[2]), "poly1305") && + strcmp(crypto_attr_alg_name(tb[2]), "poly1305-generic")) goto err_free_inst; /* Need 16-byte IV size, including Initial Block Counter value */ if (chacha->ivsize != CHACHA_IV_SIZE) @@ -599,16 +409,15 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb, err = -ENAMETOOLONG; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, - "%s(%s,%s)", name, chacha->base.cra_name, - poly->base.cra_name) >= CRYPTO_MAX_ALG_NAME) + "%s(%s,poly1305)", name, + chacha->base.cra_name) >= CRYPTO_MAX_ALG_NAME) goto err_free_inst; if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, - "%s(%s,%s)", name, chacha->base.cra_driver_name, - poly->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) + "%s(%s,poly1305-generic)", name, + chacha->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) goto err_free_inst; - inst->alg.base.cra_priority = (chacha->base.cra_priority + - poly->base.cra_priority) / 2; + inst->alg.base.cra_priority = chacha->base.cra_priority; inst->alg.base.cra_blocksize = 1; inst->alg.base.cra_alignmask = chacha->base.cra_alignmask; inst->alg.base.cra_ctxsize = sizeof(struct chachapoly_ctx) + -- 2.51.0 From 34c418b742a64f4add67ad7975b3fa0e20d6cd92 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 28 Apr 2025 12:56:23 +0800 Subject: [PATCH 11/16] crypto: testmgr - Remove poly1305 As poly1305 no longer has any in-kernel users, remove its tests. Signed-off-by: Herbert Xu --- crypto/testmgr.c | 6 - crypto/testmgr.h | 288 ----------------------------------------------- 2 files changed, 294 deletions(-) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 82977ea25db3..f100be516f52 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -5406,12 +5406,6 @@ static const struct alg_test_desc alg_test_descs[] = { .alg = "pkcs1pad(rsa)", .test = alg_test_null, .fips_allowed = 1, - }, { - .alg = "poly1305", - .test = alg_test_hash, - .suite = { - .hash = __VECS(poly1305_tv_template) - } }, { .alg = "polyval", .test = alg_test_hash, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index afc10af59b0a..32d099ac9e73 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -8836,294 +8836,6 @@ static const struct hash_testvec hmac_sha3_512_tv_template[] = { }, }; -/* - * Poly1305 test vectors from RFC7539 A.3. - */ - -static const struct hash_testvec poly1305_tv_template[] = { - { /* Test Vector #1 */ - .plaintext = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .psize = 96, - .digest = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - }, { /* Test Vector #2 */ - .plaintext = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x36\xe5\xf6\xb5\xc5\xe0\x60\x70" - "\xf0\xef\xca\x96\x22\x7a\x86\x3e" - "\x41\x6e\x79\x20\x73\x75\x62\x6d" - "\x69\x73\x73\x69\x6f\x6e\x20\x74" - "\x6f\x20\x74\x68\x65\x20\x49\x45" - "\x54\x46\x20\x69\x6e\x74\x65\x6e" - "\x64\x65\x64\x20\x62\x79\x20\x74" - "\x68\x65\x20\x43\x6f\x6e\x74\x72" - "\x69\x62\x75\x74\x6f\x72\x20\x66" - "\x6f\x72\x20\x70\x75\x62\x6c\x69" - "\x63\x61\x74\x69\x6f\x6e\x20\x61" - "\x73\x20\x61\x6c\x6c\x20\x6f\x72" - "\x20\x70\x61\x72\x74\x20\x6f\x66" - "\x20\x61\x6e\x20\x49\x45\x54\x46" - "\x20\x49\x6e\x74\x65\x72\x6e\x65" - "\x74\x2d\x44\x72\x61\x66\x74\x20" - "\x6f\x72\x20\x52\x46\x43\x20\x61" - "\x6e\x64\x20\x61\x6e\x79\x20\x73" - "\x74\x61\x74\x65\x6d\x65\x6e\x74" - "\x20\x6d\x61\x64\x65\x20\x77\x69" - "\x74\x68\x69\x6e\x20\x74\x68\x65" - "\x20\x63\x6f\x6e\x74\x65\x78\x74" - "\x20\x6f\x66\x20\x61\x6e\x20\x49" - "\x45\x54\x46\x20\x61\x63\x74\x69" - "\x76\x69\x74\x79\x20\x69\x73\x20" - "\x63\x6f\x6e\x73\x69\x64\x65\x72" - "\x65\x64\x20\x61\x6e\x20\x22\x49" - "\x45\x54\x46\x20\x43\x6f\x6e\x74" - "\x72\x69\x62\x75\x74\x69\x6f\x6e" - "\x22\x2e\x20\x53\x75\x63\x68\x20" - "\x73\x74\x61\x74\x65\x6d\x65\x6e" - "\x74\x73\x20\x69\x6e\x63\x6c\x75" - "\x64\x65\x20\x6f\x72\x61\x6c\x20" - "\x73\x74\x61\x74\x65\x6d\x65\x6e" - "\x74\x73\x20\x69\x6e\x20\x49\x45" - "\x54\x46\x20\x73\x65\x73\x73\x69" - "\x6f\x6e\x73\x2c\x20\x61\x73\x20" - "\x77\x65\x6c\x6c\x20\x61\x73\x20" - "\x77\x72\x69\x74\x74\x65\x6e\x20" - "\x61\x6e\x64\x20\x65\x6c\x65\x63" - "\x74\x72\x6f\x6e\x69\x63\x20\x63" - "\x6f\x6d\x6d\x75\x6e\x69\x63\x61" - "\x74\x69\x6f\x6e\x73\x20\x6d\x61" - "\x64\x65\x20\x61\x74\x20\x61\x6e" - "\x79\x20\x74\x69\x6d\x65\x20\x6f" - "\x72\x20\x70\x6c\x61\x63\x65\x2c" - "\x20\x77\x68\x69\x63\x68\x20\x61" - "\x72\x65\x20\x61\x64\x64\x72\x65" - "\x73\x73\x65\x64\x20\x74\x6f", - .psize = 407, - .digest = "\x36\xe5\xf6\xb5\xc5\xe0\x60\x70" - "\xf0\xef\xca\x96\x22\x7a\x86\x3e", - }, { /* Test Vector #3 */ - .plaintext = "\x36\xe5\xf6\xb5\xc5\xe0\x60\x70" - "\xf0\xef\xca\x96\x22\x7a\x86\x3e" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x41\x6e\x79\x20\x73\x75\x62\x6d" - "\x69\x73\x73\x69\x6f\x6e\x20\x74" - "\x6f\x20\x74\x68\x65\x20\x49\x45" - "\x54\x46\x20\x69\x6e\x74\x65\x6e" - "\x64\x65\x64\x20\x62\x79\x20\x74" - "\x68\x65\x20\x43\x6f\x6e\x74\x72" - "\x69\x62\x75\x74\x6f\x72\x20\x66" - "\x6f\x72\x20\x70\x75\x62\x6c\x69" - "\x63\x61\x74\x69\x6f\x6e\x20\x61" - "\x73\x20\x61\x6c\x6c\x20\x6f\x72" - "\x20\x70\x61\x72\x74\x20\x6f\x66" - "\x20\x61\x6e\x20\x49\x45\x54\x46" - "\x20\x49\x6e\x74\x65\x72\x6e\x65" - "\x74\x2d\x44\x72\x61\x66\x74\x20" - "\x6f\x72\x20\x52\x46\x43\x20\x61" - "\x6e\x64\x20\x61\x6e\x79\x20\x73" - "\x74\x61\x74\x65\x6d\x65\x6e\x74" - "\x20\x6d\x61\x64\x65\x20\x77\x69" - "\x74\x68\x69\x6e\x20\x74\x68\x65" - "\x20\x63\x6f\x6e\x74\x65\x78\x74" - "\x20\x6f\x66\x20\x61\x6e\x20\x49" - "\x45\x54\x46\x20\x61\x63\x74\x69" - "\x76\x69\x74\x79\x20\x69\x73\x20" - "\x63\x6f\x6e\x73\x69\x64\x65\x72" - "\x65\x64\x20\x61\x6e\x20\x22\x49" - "\x45\x54\x46\x20\x43\x6f\x6e\x74" - "\x72\x69\x62\x75\x74\x69\x6f\x6e" - "\x22\x2e\x20\x53\x75\x63\x68\x20" - "\x73\x74\x61\x74\x65\x6d\x65\x6e" - "\x74\x73\x20\x69\x6e\x63\x6c\x75" - "\x64\x65\x20\x6f\x72\x61\x6c\x20" - "\x73\x74\x61\x74\x65\x6d\x65\x6e" - "\x74\x73\x20\x69\x6e\x20\x49\x45" - "\x54\x46\x20\x73\x65\x73\x73\x69" - "\x6f\x6e\x73\x2c\x20\x61\x73\x20" - "\x77\x65\x6c\x6c\x20\x61\x73\x20" - "\x77\x72\x69\x74\x74\x65\x6e\x20" - "\x61\x6e\x64\x20\x65\x6c\x65\x63" - "\x74\x72\x6f\x6e\x69\x63\x20\x63" - "\x6f\x6d\x6d\x75\x6e\x69\x63\x61" - "\x74\x69\x6f\x6e\x73\x20\x6d\x61" - "\x64\x65\x20\x61\x74\x20\x61\x6e" - "\x79\x20\x74\x69\x6d\x65\x20\x6f" - "\x72\x20\x70\x6c\x61\x63\x65\x2c" - "\x20\x77\x68\x69\x63\x68\x20\x61" - "\x72\x65\x20\x61\x64\x64\x72\x65" - "\x73\x73\x65\x64\x20\x74\x6f", - .psize = 407, - .digest = "\xf3\x47\x7e\x7c\xd9\x54\x17\xaf" - "\x89\xa6\xb8\x79\x4c\x31\x0c\xf0", - }, { /* Test Vector #4 */ - .plaintext = "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a" - "\xf3\x33\x88\x86\x04\xf6\xb5\xf0" - "\x47\x39\x17\xc1\x40\x2b\x80\x09" - "\x9d\xca\x5c\xbc\x20\x70\x75\xc0" - "\x27\x54\x77\x61\x73\x20\x62\x72" - "\x69\x6c\x6c\x69\x67\x2c\x20\x61" - "\x6e\x64\x20\x74\x68\x65\x20\x73" - "\x6c\x69\x74\x68\x79\x20\x74\x6f" - "\x76\x65\x73\x0a\x44\x69\x64\x20" - "\x67\x79\x72\x65\x20\x61\x6e\x64" - "\x20\x67\x69\x6d\x62\x6c\x65\x20" - "\x69\x6e\x20\x74\x68\x65\x20\x77" - "\x61\x62\x65\x3a\x0a\x41\x6c\x6c" - "\x20\x6d\x69\x6d\x73\x79\x20\x77" - "\x65\x72\x65\x20\x74\x68\x65\x20" - "\x62\x6f\x72\x6f\x67\x6f\x76\x65" - "\x73\x2c\x0a\x41\x6e\x64\x20\x74" - "\x68\x65\x20\x6d\x6f\x6d\x65\x20" - "\x72\x61\x74\x68\x73\x20\x6f\x75" - "\x74\x67\x72\x61\x62\x65\x2e", - .psize = 159, - .digest = "\x45\x41\x66\x9a\x7e\xaa\xee\x61" - "\xe7\x08\xdc\x7c\xbc\xc5\xeb\x62", - }, { /* Test Vector #5 */ - .plaintext = "\x02\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff", - .psize = 48, - .digest = "\x03\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - }, { /* Test Vector #6 */ - .plaintext = "\x02\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\x02\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .psize = 48, - .digest = "\x03\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - }, { /* Test Vector #7 */ - .plaintext = "\x01\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xf0\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\x11\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .psize = 80, - .digest = "\x05\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - }, { /* Test Vector #8 */ - .plaintext = "\x01\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xfb\xfe\xfe\xfe\xfe\xfe\xfe\xfe" - "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe" - "\x01\x01\x01\x01\x01\x01\x01\x01" - "\x01\x01\x01\x01\x01\x01\x01\x01", - .psize = 80, - .digest = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - }, { /* Test Vector #9 */ - .plaintext = "\x02\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\xfd\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff", - .psize = 48, - .digest = "\xfa\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff", - }, { /* Test Vector #10 */ - .plaintext = "\x01\x00\x00\x00\x00\x00\x00\x00" - "\x04\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\xe3\x35\x94\xd7\x50\x5e\x43\xb9" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x33\x94\xd7\x50\x5e\x43\x79\xcd" - "\x01\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x01\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .psize = 96, - .digest = "\x14\x00\x00\x00\x00\x00\x00\x00" - "\x55\x00\x00\x00\x00\x00\x00\x00", - }, { /* Test Vector #11 */ - .plaintext = "\x01\x00\x00\x00\x00\x00\x00\x00" - "\x04\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\xe3\x35\x94\xd7\x50\x5e\x43\xb9" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x33\x94\xd7\x50\x5e\x43\x79\xcd" - "\x01\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .psize = 80, - .digest = "\x13\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - }, { /* Regression test for overflow in AVX2 implementation */ - .plaintext = "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff", - .psize = 300, - .digest = "\xfb\x5e\x96\xd8\x61\xd5\xc7\xc8" - "\x78\xe5\x87\xcc\x2d\x5a\x22\xe1", - } -}; - /* NHPoly1305 test vectors from https://github.com/google/adiantum */ static const struct hash_testvec nhpoly1305_tv_template[] = { { -- 2.51.0 From ceef731b0e22df80a13d67773ae9afd55a971f9e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 28 Apr 2025 12:56:25 +0800 Subject: [PATCH 12/16] crypto: poly1305 - Remove algorithm As there are no in-kernel users of the Crypto API poly1305 left, remove it. Signed-off-by: Herbert Xu --- crypto/Kconfig | 12 ---- crypto/Makefile | 2 - crypto/poly1305.c | 152 ---------------------------------------------- 3 files changed, 166 deletions(-) delete mode 100644 crypto/poly1305.c diff --git a/crypto/Kconfig b/crypto/Kconfig index f87e2a26d2dd..3cb5563dc4ab 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -953,18 +953,6 @@ config CRYPTO_POLYVAL This is used in HCTR2. It is not a general-purpose cryptographic hash function. -config CRYPTO_POLY1305 - tristate "Poly1305" - select CRYPTO_HASH - select CRYPTO_LIB_POLY1305 - select CRYPTO_LIB_POLY1305_GENERIC - help - Poly1305 authenticator algorithm (RFC7539) - - Poly1305 is an authenticator algorithm designed by Daniel J. Bernstein. - It is used for the ChaCha20-Poly1305 AEAD, specified in RFC7539 for use - in IETF protocols. This is the portable C implementation of Poly1305. - config CRYPTO_RMD160 tristate "RIPEMD-160" select CRYPTO_HASH diff --git a/crypto/Makefile b/crypto/Makefile index 5d2f2a28d8a0..587bc74b6d74 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -149,8 +149,6 @@ obj-$(CONFIG_CRYPTO_SEED) += seed.o obj-$(CONFIG_CRYPTO_ARIA) += aria_generic.o obj-$(CONFIG_CRYPTO_CHACHA20) += chacha.o CFLAGS_chacha.o += -DARCH=$(ARCH) -obj-$(CONFIG_CRYPTO_POLY1305) += poly1305.o -CFLAGS_poly1305.o += -DARCH=$(ARCH) obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o obj-$(CONFIG_CRYPTO_CRC32C) += crc32c_generic.o diff --git a/crypto/poly1305.c b/crypto/poly1305.c deleted file mode 100644 index e0436bdc462b..000000000000 --- a/crypto/poly1305.c +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Crypto API wrapper for the Poly1305 library functions - * - * Copyright (C) 2015 Martin Willi - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include - -struct crypto_poly1305_desc_ctx { - struct poly1305_desc_ctx base; - u8 key[POLY1305_KEY_SIZE]; - unsigned int keysize; -}; - -static int crypto_poly1305_init(struct shash_desc *desc) -{ - struct crypto_poly1305_desc_ctx *dctx = shash_desc_ctx(desc); - - dctx->keysize = 0; - return 0; -} - -static int crypto_poly1305_update(struct shash_desc *desc, - const u8 *src, unsigned int srclen, bool arch) -{ - struct crypto_poly1305_desc_ctx *dctx = shash_desc_ctx(desc); - unsigned int bytes; - - /* - * The key is passed as the first 32 "data" bytes. The actual - * poly1305_init() can be called only once the full key is available. - */ - if (dctx->keysize < POLY1305_KEY_SIZE) { - bytes = min(srclen, POLY1305_KEY_SIZE - dctx->keysize); - memcpy(&dctx->key[dctx->keysize], src, bytes); - dctx->keysize += bytes; - if (dctx->keysize < POLY1305_KEY_SIZE) - return 0; - if (arch) - poly1305_init(&dctx->base, dctx->key); - else - poly1305_init_generic(&dctx->base, dctx->key); - src += bytes; - srclen -= bytes; - } - - if (arch) - poly1305_update(&dctx->base, src, srclen); - else - poly1305_update_generic(&dctx->base, src, srclen); - - return 0; -} - -static int crypto_poly1305_update_generic(struct shash_desc *desc, - const u8 *src, unsigned int srclen) -{ - return crypto_poly1305_update(desc, src, srclen, false); -} - -static int crypto_poly1305_update_arch(struct shash_desc *desc, - const u8 *src, unsigned int srclen) -{ - return crypto_poly1305_update(desc, src, srclen, true); -} - -static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst, bool arch) -{ - struct crypto_poly1305_desc_ctx *dctx = shash_desc_ctx(desc); - - if (unlikely(dctx->keysize != POLY1305_KEY_SIZE)) - return -ENOKEY; - - if (arch) - poly1305_final(&dctx->base, dst); - else - poly1305_final_generic(&dctx->base, dst); - memzero_explicit(&dctx->key, sizeof(dctx->key)); - return 0; -} - -static int crypto_poly1305_final_generic(struct shash_desc *desc, u8 *dst) -{ - return crypto_poly1305_final(desc, dst, false); -} - -static int crypto_poly1305_final_arch(struct shash_desc *desc, u8 *dst) -{ - return crypto_poly1305_final(desc, dst, true); -} - -static struct shash_alg poly1305_algs[] = { - { - .base.cra_name = "poly1305", - .base.cra_driver_name = "poly1305-generic", - .base.cra_priority = 100, - .base.cra_blocksize = POLY1305_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, - .digestsize = POLY1305_DIGEST_SIZE, - .init = crypto_poly1305_init, - .update = crypto_poly1305_update_generic, - .final = crypto_poly1305_final_generic, - .descsize = sizeof(struct crypto_poly1305_desc_ctx), - }, - { - .base.cra_name = "poly1305", - .base.cra_driver_name = "poly1305-" __stringify(ARCH), - .base.cra_priority = 300, - .base.cra_blocksize = POLY1305_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, - .digestsize = POLY1305_DIGEST_SIZE, - .init = crypto_poly1305_init, - .update = crypto_poly1305_update_arch, - .final = crypto_poly1305_final_arch, - .descsize = sizeof(struct crypto_poly1305_desc_ctx), - }, -}; - -static int num_algs; - -static int __init poly1305_mod_init(void) -{ - /* register the arch flavours only if they differ from generic */ - num_algs = poly1305_is_arch_optimized() ? 2 : 1; - - return crypto_register_shashes(poly1305_algs, num_algs); -} - -static void __exit poly1305_mod_exit(void) -{ - crypto_unregister_shashes(poly1305_algs, num_algs); -} - -subsys_initcall(poly1305_mod_init); -module_exit(poly1305_mod_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Martin Willi "); -MODULE_DESCRIPTION("Crypto API wrapper for the Poly1305 library functions"); -MODULE_ALIAS_CRYPTO("poly1305"); -MODULE_ALIAS_CRYPTO("poly1305-generic"); -MODULE_ALIAS_CRYPTO("poly1305-" __stringify(ARCH)); -- 2.51.0 From 10a6d72ea355b730aa9702da0fd36aef0898a80e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 28 Apr 2025 12:56:28 +0800 Subject: [PATCH 13/16] crypto: lib/poly1305 - Use block-only interface Now that every architecture provides a block function, use that to implement the lib/poly1305 and remove the old per-arch code. Signed-off-by: Herbert Xu --- arch/arm/lib/crypto/poly1305-glue.c | 57 ------------------- arch/arm64/lib/crypto/poly1305-glue.c | 58 ------------------- arch/mips/lib/crypto/poly1305-glue.c | 60 -------------------- arch/powerpc/lib/crypto/poly1305-p10-glue.c | 63 --------------------- arch/x86/lib/crypto/poly1305_glue.c | 60 -------------------- include/crypto/poly1305.h | 53 ++--------------- lib/crypto/poly1305.c | 39 ++++++++----- 7 files changed, 32 insertions(+), 358 deletions(-) diff --git a/arch/arm/lib/crypto/poly1305-glue.c b/arch/arm/lib/crypto/poly1305-glue.c index 3ee16048ec7c..91da42b26d9c 100644 --- a/arch/arm/lib/crypto/poly1305-glue.c +++ b/arch/arm/lib/crypto/poly1305-glue.c @@ -12,7 +12,6 @@ #include #include #include -#include #include asmlinkage void poly1305_block_init_arch( @@ -35,17 +34,6 @@ void __weak poly1305_blocks_neon(struct poly1305_block_state *state, static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); -void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) -{ - dctx->s[0] = get_unaligned_le32(key + 16); - dctx->s[1] = get_unaligned_le32(key + 20); - dctx->s[2] = get_unaligned_le32(key + 24); - dctx->s[3] = get_unaligned_le32(key + 28); - dctx->buflen = 0; - poly1305_block_init_arch(&dctx->state, key); -} -EXPORT_SYMBOL(poly1305_init_arch); - void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, unsigned int len, u32 padbit) { @@ -67,51 +55,6 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, } EXPORT_SYMBOL_GPL(poly1305_blocks_arch); -void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, - unsigned int nbytes) -{ - if (unlikely(dctx->buflen)) { - u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); - - memcpy(dctx->buf + dctx->buflen, src, bytes); - src += bytes; - nbytes -= bytes; - dctx->buflen += bytes; - - if (dctx->buflen == POLY1305_BLOCK_SIZE) { - poly1305_blocks_arch(&dctx->state, dctx->buf, - POLY1305_BLOCK_SIZE, 1); - dctx->buflen = 0; - } - } - - if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { - poly1305_blocks_arch(&dctx->state, src, nbytes, 1); - src += round_down(nbytes, POLY1305_BLOCK_SIZE); - nbytes %= POLY1305_BLOCK_SIZE; - } - - if (unlikely(nbytes)) { - dctx->buflen = nbytes; - memcpy(dctx->buf, src, nbytes); - } -} -EXPORT_SYMBOL(poly1305_update_arch); - -void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) -{ - if (unlikely(dctx->buflen)) { - dctx->buf[dctx->buflen++] = 1; - memset(dctx->buf + dctx->buflen, 0, - POLY1305_BLOCK_SIZE - dctx->buflen); - poly1305_blocks_arch(&dctx->state, dctx->buf, POLY1305_BLOCK_SIZE, 0); - } - - poly1305_emit_arch(&dctx->h, dst, dctx->s); - *dctx = (struct poly1305_desc_ctx){}; -} -EXPORT_SYMBOL(poly1305_final_arch); - bool poly1305_is_arch_optimized(void) { /* We always can use at least the ARM scalar implementation. */ diff --git a/arch/arm64/lib/crypto/poly1305-glue.c b/arch/arm64/lib/crypto/poly1305-glue.c index d66a820e32d5..681c26557336 100644 --- a/arch/arm64/lib/crypto/poly1305-glue.c +++ b/arch/arm64/lib/crypto/poly1305-glue.c @@ -12,7 +12,6 @@ #include #include #include -#include #include asmlinkage void poly1305_block_init_arch( @@ -30,17 +29,6 @@ EXPORT_SYMBOL_GPL(poly1305_emit_arch); static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); -void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) -{ - dctx->s[0] = get_unaligned_le32(key + 16); - dctx->s[1] = get_unaligned_le32(key + 20); - dctx->s[2] = get_unaligned_le32(key + 24); - dctx->s[3] = get_unaligned_le32(key + 28); - dctx->buflen = 0; - poly1305_block_init_arch(&dctx->state, key); -} -EXPORT_SYMBOL(poly1305_init_arch); - void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, unsigned int len, u32 padbit) { @@ -61,52 +49,6 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, } EXPORT_SYMBOL_GPL(poly1305_blocks_arch); -void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, - unsigned int nbytes) -{ - if (unlikely(dctx->buflen)) { - u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); - - memcpy(dctx->buf + dctx->buflen, src, bytes); - src += bytes; - nbytes -= bytes; - dctx->buflen += bytes; - - if (dctx->buflen == POLY1305_BLOCK_SIZE) { - poly1305_blocks_arch(&dctx->state, dctx->buf, - POLY1305_BLOCK_SIZE, 1); - dctx->buflen = 0; - } - } - - if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { - poly1305_blocks_arch(&dctx->state, src, nbytes, 1); - src += round_down(nbytes, POLY1305_BLOCK_SIZE); - nbytes %= POLY1305_BLOCK_SIZE; - } - - if (unlikely(nbytes)) { - dctx->buflen = nbytes; - memcpy(dctx->buf, src, nbytes); - } -} -EXPORT_SYMBOL(poly1305_update_arch); - -void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) -{ - if (unlikely(dctx->buflen)) { - dctx->buf[dctx->buflen++] = 1; - memset(dctx->buf + dctx->buflen, 0, - POLY1305_BLOCK_SIZE - dctx->buflen); - poly1305_blocks_arch(&dctx->state, dctx->buf, - POLY1305_BLOCK_SIZE, 0); - } - - poly1305_emit_arch(&dctx->h, dst, dctx->s); - memzero_explicit(dctx, sizeof(*dctx)); -} -EXPORT_SYMBOL(poly1305_final_arch); - bool poly1305_is_arch_optimized(void) { /* We always can use at least the ARM64 scalar implementation. */ diff --git a/arch/mips/lib/crypto/poly1305-glue.c b/arch/mips/lib/crypto/poly1305-glue.c index 2fea4cacfe27..764a38a65200 100644 --- a/arch/mips/lib/crypto/poly1305-glue.c +++ b/arch/mips/lib/crypto/poly1305-glue.c @@ -9,7 +9,6 @@ #include #include #include -#include #include asmlinkage void poly1305_block_init_arch( @@ -24,65 +23,6 @@ asmlinkage void poly1305_emit_arch(const struct poly1305_state *state, const u32 nonce[4]); EXPORT_SYMBOL_GPL(poly1305_emit_arch); -void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) -{ - dctx->s[0] = get_unaligned_le32(key + 16); - dctx->s[1] = get_unaligned_le32(key + 20); - dctx->s[2] = get_unaligned_le32(key + 24); - dctx->s[3] = get_unaligned_le32(key + 28); - dctx->buflen = 0; - poly1305_block_init_arch(&dctx->state, key); -} -EXPORT_SYMBOL(poly1305_init_arch); - -void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, - unsigned int nbytes) -{ - if (unlikely(dctx->buflen)) { - u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); - - memcpy(dctx->buf + dctx->buflen, src, bytes); - src += bytes; - nbytes -= bytes; - dctx->buflen += bytes; - - if (dctx->buflen == POLY1305_BLOCK_SIZE) { - poly1305_blocks_arch(&dctx->state, dctx->buf, - POLY1305_BLOCK_SIZE, 1); - dctx->buflen = 0; - } - } - - if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { - unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); - - poly1305_blocks_arch(&dctx->state, src, len, 1); - src += len; - nbytes %= POLY1305_BLOCK_SIZE; - } - - if (unlikely(nbytes)) { - dctx->buflen = nbytes; - memcpy(dctx->buf, src, nbytes); - } -} -EXPORT_SYMBOL(poly1305_update_arch); - -void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) -{ - if (unlikely(dctx->buflen)) { - dctx->buf[dctx->buflen++] = 1; - memset(dctx->buf + dctx->buflen, 0, - POLY1305_BLOCK_SIZE - dctx->buflen); - poly1305_blocks_arch(&dctx->state, dctx->buf, - POLY1305_BLOCK_SIZE, 0); - } - - poly1305_emit_arch(&dctx->h, dst, dctx->s); - *dctx = (struct poly1305_desc_ctx){}; -} -EXPORT_SYMBOL(poly1305_final_arch); - bool poly1305_is_arch_optimized(void) { return true; diff --git a/arch/powerpc/lib/crypto/poly1305-p10-glue.c b/arch/powerpc/lib/crypto/poly1305-p10-glue.c index 708435beaba6..50ac802220e0 100644 --- a/arch/powerpc/lib/crypto/poly1305-p10-glue.c +++ b/arch/powerpc/lib/crypto/poly1305-p10-glue.c @@ -10,7 +10,6 @@ #include #include #include -#include #include asmlinkage void poly1305_p10le_4blocks(struct poly1305_block_state *state, const u8 *m, u32 mlen); @@ -45,17 +44,6 @@ void poly1305_block_init_arch(struct poly1305_block_state *dctx, } EXPORT_SYMBOL_GPL(poly1305_block_init_arch); -void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) -{ - dctx->s[0] = get_unaligned_le32(key + 16); - dctx->s[1] = get_unaligned_le32(key + 20); - dctx->s[2] = get_unaligned_le32(key + 24); - dctx->s[3] = get_unaligned_le32(key + 28); - dctx->buflen = 0; - poly1305_block_init_arch(&dctx->state, key); -} -EXPORT_SYMBOL(poly1305_init_arch); - void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, unsigned int len, u32 padbit) { @@ -76,57 +64,6 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, } EXPORT_SYMBOL_GPL(poly1305_blocks_arch); -void poly1305_update_arch(struct poly1305_desc_ctx *dctx, - const u8 *src, unsigned int srclen) -{ - unsigned int bytes; - - if (!static_key_enabled(&have_p10)) - return poly1305_update_generic(dctx, src, srclen); - - if (unlikely(dctx->buflen)) { - bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen); - memcpy(dctx->buf + dctx->buflen, src, bytes); - src += bytes; - srclen -= bytes; - dctx->buflen += bytes; - if (dctx->buflen < POLY1305_BLOCK_SIZE) - return; - poly1305_blocks_arch(&dctx->state, dctx->buf, - POLY1305_BLOCK_SIZE, 1); - dctx->buflen = 0; - } - - if (likely(srclen >= POLY1305_BLOCK_SIZE)) { - poly1305_blocks_arch(&dctx->state, src, srclen, 1); - src += srclen - (srclen % POLY1305_BLOCK_SIZE); - srclen %= POLY1305_BLOCK_SIZE; - } - - if (unlikely(srclen)) { - dctx->buflen = srclen; - memcpy(dctx->buf, src, srclen); - } -} -EXPORT_SYMBOL(poly1305_update_arch); - -void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) -{ - if (!static_key_enabled(&have_p10)) - return poly1305_final_generic(dctx, dst); - - if (dctx->buflen) { - dctx->buf[dctx->buflen++] = 1; - memset(dctx->buf + dctx->buflen, 0, - POLY1305_BLOCK_SIZE - dctx->buflen); - poly1305_blocks_arch(&dctx->state, dctx->buf, - POLY1305_BLOCK_SIZE, 0); - } - - poly1305_emit_arch(&dctx->h, dst, dctx->s); -} -EXPORT_SYMBOL(poly1305_final_arch); - bool poly1305_is_arch_optimized(void) { return static_key_enabled(&have_p10); diff --git a/arch/x86/lib/crypto/poly1305_glue.c b/arch/x86/lib/crypto/poly1305_glue.c index d98764ec3b47..f799828c5809 100644 --- a/arch/x86/lib/crypto/poly1305_glue.c +++ b/arch/x86/lib/crypto/poly1305_glue.c @@ -10,7 +10,6 @@ #include #include #include -#include #include struct poly1305_arch_internal { @@ -96,65 +95,6 @@ void poly1305_emit_arch(const struct poly1305_state *ctx, } EXPORT_SYMBOL_GPL(poly1305_emit_arch); -void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) -{ - dctx->s[0] = get_unaligned_le32(&key[16]); - dctx->s[1] = get_unaligned_le32(&key[20]); - dctx->s[2] = get_unaligned_le32(&key[24]); - dctx->s[3] = get_unaligned_le32(&key[28]); - dctx->buflen = 0; - poly1305_block_init_arch(&dctx->state, key); -} -EXPORT_SYMBOL(poly1305_init_arch); - -void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, - unsigned int srclen) -{ - unsigned int bytes; - - if (unlikely(dctx->buflen)) { - bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen); - memcpy(dctx->buf + dctx->buflen, src, bytes); - src += bytes; - srclen -= bytes; - dctx->buflen += bytes; - - if (dctx->buflen == POLY1305_BLOCK_SIZE) { - poly1305_blocks_arch(&dctx->state, dctx->buf, - POLY1305_BLOCK_SIZE, 1); - dctx->buflen = 0; - } - } - - if (likely(srclen >= POLY1305_BLOCK_SIZE)) { - bytes = round_down(srclen, POLY1305_BLOCK_SIZE); - poly1305_blocks_arch(&dctx->state, src, bytes, 1); - src += bytes; - srclen -= bytes; - } - - if (unlikely(srclen)) { - dctx->buflen = srclen; - memcpy(dctx->buf, src, srclen); - } -} -EXPORT_SYMBOL(poly1305_update_arch); - -void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) -{ - if (unlikely(dctx->buflen)) { - dctx->buf[dctx->buflen++] = 1; - memset(dctx->buf + dctx->buflen, 0, - POLY1305_BLOCK_SIZE - dctx->buflen); - poly1305_blocks_arch(&dctx->state, dctx->buf, - POLY1305_BLOCK_SIZE, 0); - } - - poly1305_emit_arch(&dctx->h, dst, dctx->s); - memzero_explicit(dctx, sizeof(*dctx)); -} -EXPORT_SYMBOL(poly1305_final_arch); - bool poly1305_is_arch_optimized(void) { return static_key_enabled(&poly1305_use_avx); diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h index 027d74842cd5..e54abda8cfe9 100644 --- a/include/crypto/poly1305.h +++ b/include/crypto/poly1305.h @@ -55,55 +55,14 @@ struct poly1305_desc_ctx { unsigned int buflen; /* finalize key */ u32 s[4]; - union { - struct { - struct poly1305_state h; - union { - struct poly1305_key opaque_r[CONFIG_CRYPTO_LIB_POLY1305_RSIZE]; - struct poly1305_core_key core_r; - }; - }; - struct poly1305_block_state state; - }; + struct poly1305_block_state state; }; -void poly1305_init_arch(struct poly1305_desc_ctx *desc, - const u8 key[POLY1305_KEY_SIZE]); -void poly1305_init_generic(struct poly1305_desc_ctx *desc, - const u8 key[POLY1305_KEY_SIZE]); - -static inline void poly1305_init(struct poly1305_desc_ctx *desc, const u8 *key) -{ - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) - poly1305_init_arch(desc, key); - else - poly1305_init_generic(desc, key); -} - -void poly1305_update_arch(struct poly1305_desc_ctx *desc, const u8 *src, - unsigned int nbytes); -void poly1305_update_generic(struct poly1305_desc_ctx *desc, const u8 *src, - unsigned int nbytes); - -static inline void poly1305_update(struct poly1305_desc_ctx *desc, - const u8 *src, unsigned int nbytes) -{ - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) - poly1305_update_arch(desc, src, nbytes); - else - poly1305_update_generic(desc, src, nbytes); -} - -void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *digest); -void poly1305_final_generic(struct poly1305_desc_ctx *desc, u8 *digest); - -static inline void poly1305_final(struct poly1305_desc_ctx *desc, u8 *digest) -{ - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) - poly1305_final_arch(desc, digest); - else - poly1305_final_generic(desc, digest); -} +void poly1305_init(struct poly1305_desc_ctx *desc, + const u8 key[POLY1305_KEY_SIZE]); +void poly1305_update(struct poly1305_desc_ctx *desc, + const u8 *src, unsigned int nbytes); +void poly1305_final(struct poly1305_desc_ctx *desc, u8 *digest); #if IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305) bool poly1305_is_arch_optimized(void); diff --git a/lib/crypto/poly1305.c b/lib/crypto/poly1305.c index 9fec64a599c1..4c9996864090 100644 --- a/lib/crypto/poly1305.c +++ b/lib/crypto/poly1305.c @@ -22,47 +22,60 @@ void poly1305_block_init_generic(struct poly1305_block_state *desc, } EXPORT_SYMBOL_GPL(poly1305_block_init_generic); -void poly1305_init_generic(struct poly1305_desc_ctx *desc, - const u8 key[POLY1305_KEY_SIZE]) +void poly1305_init(struct poly1305_desc_ctx *desc, + const u8 key[POLY1305_KEY_SIZE]) { desc->s[0] = get_unaligned_le32(key + 16); desc->s[1] = get_unaligned_le32(key + 20); desc->s[2] = get_unaligned_le32(key + 24); desc->s[3] = get_unaligned_le32(key + 28); desc->buflen = 0; - poly1305_block_init_generic(&desc->state, key); + if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) + poly1305_block_init_arch(&desc->state, key); + else + poly1305_block_init_generic(&desc->state, key); } -EXPORT_SYMBOL_GPL(poly1305_init_generic); +EXPORT_SYMBOL(poly1305_init); static inline void poly1305_blocks(struct poly1305_block_state *state, const u8 *src, unsigned int len) { - poly1305_blocks_generic(state, src, len, 1); + if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) + poly1305_blocks_arch(state, src, len, 1); + else + poly1305_blocks_generic(state, src, len, 1); } -void poly1305_update_generic(struct poly1305_desc_ctx *desc, const u8 *src, - unsigned int nbytes) +void poly1305_update(struct poly1305_desc_ctx *desc, + const u8 *src, unsigned int nbytes) { desc->buflen = BLOCK_HASH_UPDATE(poly1305_blocks, &desc->state, src, nbytes, POLY1305_BLOCK_SIZE, desc->buf, desc->buflen); } -EXPORT_SYMBOL_GPL(poly1305_update_generic); +EXPORT_SYMBOL(poly1305_update); -void poly1305_final_generic(struct poly1305_desc_ctx *desc, u8 *dst) +void poly1305_final(struct poly1305_desc_ctx *desc, u8 *dst) { if (unlikely(desc->buflen)) { desc->buf[desc->buflen++] = 1; memset(desc->buf + desc->buflen, 0, POLY1305_BLOCK_SIZE - desc->buflen); - poly1305_blocks_generic(&desc->state, desc->buf, - POLY1305_BLOCK_SIZE, 0); + if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) + poly1305_blocks_arch(&desc->state, desc->buf, + POLY1305_BLOCK_SIZE, 0); + else + poly1305_blocks_generic(&desc->state, desc->buf, + POLY1305_BLOCK_SIZE, 0); } - poly1305_emit_generic(&desc->h, dst, desc->s); + if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) + poly1305_emit_arch(&desc->state.h, dst, desc->s); + else + poly1305_emit_generic(&desc->state.h, dst, desc->s); *desc = (struct poly1305_desc_ctx){}; } -EXPORT_SYMBOL_GPL(poly1305_final_generic); +EXPORT_SYMBOL(poly1305_final); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Martin Willi "); -- 2.51.0 From 950e5c84118c9e5b06bb9a9b64edf989ee4034df Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 28 Apr 2025 10:00:26 -0700 Subject: [PATCH 14/16] crypto: sha256 - support arch-optimized lib and expose through shash As has been done for various other algorithms, rework the design of the SHA-256 library to support arch-optimized implementations, and make crypto/sha256.c expose both generic and arch-optimized shash algorithms that wrap the library functions. This allows users of the SHA-256 library functions to take advantage of the arch-optimized code, and this makes it much simpler to integrate SHA-256 for each architecture. Note that sha256_base.h is not used in the new design. It will be removed once all the architecture-specific code has been updated. Move the generic block function into its own module to avoid a circular dependency from libsha256.ko => sha256-$ARCH.ko => libsha256.ko. Signed-off-by: Eric Biggers Add export and import functions to maintain existing export format. Signed-off-by: Herbert Xu --- crypto/Kconfig | 1 + crypto/Makefile | 3 +- crypto/sha256.c | 243 +++++++++++++++++++++++++++++++++ crypto/sha256_generic.c | 102 -------------- include/crypto/internal/sha2.h | 28 ++++ include/crypto/sha2.h | 15 +- include/crypto/sha256_base.h | 9 +- lib/crypto/Kconfig | 19 +++ lib/crypto/Makefile | 3 + lib/crypto/sha256-generic.c | 137 +++++++++++++++++++ lib/crypto/sha256.c | 196 ++++++++++++-------------- 11 files changed, 529 insertions(+), 227 deletions(-) create mode 100644 crypto/sha256.c delete mode 100644 crypto/sha256_generic.c create mode 100644 include/crypto/internal/sha2.h create mode 100644 lib/crypto/sha256-generic.c diff --git a/crypto/Kconfig b/crypto/Kconfig index 3cb5563dc4ab..7bfad077f308 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -982,6 +982,7 @@ config CRYPTO_SHA256 tristate "SHA-224 and SHA-256" select CRYPTO_HASH select CRYPTO_LIB_SHA256 + select CRYPTO_LIB_SHA256_GENERIC help SHA-224 and SHA-256 secure hash algorithms (FIPS 180, ISO/IEC 10118-3) diff --git a/crypto/Makefile b/crypto/Makefile index 587bc74b6d74..84f6911dc9ba 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -76,7 +76,8 @@ obj-$(CONFIG_CRYPTO_MD4) += md4.o obj-$(CONFIG_CRYPTO_MD5) += md5.o obj-$(CONFIG_CRYPTO_RMD160) += rmd160.o obj-$(CONFIG_CRYPTO_SHA1) += sha1_generic.o -obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o +obj-$(CONFIG_CRYPTO_SHA256) += sha256.o +CFLAGS_sha256.o += -DARCH=$(ARCH) obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o obj-$(CONFIG_CRYPTO_SHA3) += sha3_generic.o obj-$(CONFIG_CRYPTO_SM3_GENERIC) += sm3_generic.o diff --git a/crypto/sha256.c b/crypto/sha256.c new file mode 100644 index 000000000000..c2588d08ee3e --- /dev/null +++ b/crypto/sha256.c @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Crypto API wrapper for the SHA-256 and SHA-224 library functions + * + * Copyright (c) Jean-Luc Cooke + * Copyright (c) Andrew McDonald + * Copyright (c) 2002 James Morris + * SHA224 Support Copyright 2007 Intel Corporation + */ +#include +#include +#include +#include + +const u8 sha224_zero_message_hash[SHA224_DIGEST_SIZE] = { + 0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9, 0x47, + 0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4, 0x15, 0xa2, + 0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a, 0xc5, 0xb3, 0xe4, + 0x2f +}; +EXPORT_SYMBOL_GPL(sha224_zero_message_hash); + +const u8 sha256_zero_message_hash[SHA256_DIGEST_SIZE] = { + 0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, + 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, + 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, + 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55 +}; +EXPORT_SYMBOL_GPL(sha256_zero_message_hash); + +static int crypto_sha256_init(struct shash_desc *desc) +{ + sha256_init(shash_desc_ctx(desc)); + return 0; +} + +static int crypto_sha256_update_generic(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + sha256_update_generic(shash_desc_ctx(desc), data, len); + return 0; +} + +static int crypto_sha256_update_arch(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + sha256_update(shash_desc_ctx(desc), data, len); + return 0; +} + +static int crypto_sha256_final_generic(struct shash_desc *desc, u8 *out) +{ + sha256_final_generic(shash_desc_ctx(desc), out); + return 0; +} + +static int crypto_sha256_final_arch(struct shash_desc *desc, u8 *out) +{ + sha256_final(shash_desc_ctx(desc), out); + return 0; +} + +static int crypto_sha256_finup_generic(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + sha256_update_generic(sctx, data, len); + sha256_final_generic(sctx, out); + return 0; +} + +static int crypto_sha256_finup_arch(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + sha256_update(sctx, data, len); + sha256_final(sctx, out); + return 0; +} + +static int crypto_sha256_digest_generic(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + sha256_init(sctx); + sha256_update_generic(sctx, data, len); + sha256_final_generic(sctx, out); + return 0; +} + +static int crypto_sha256_digest_arch(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + sha256(data, len, out); + return 0; +} + +static int crypto_sha224_init(struct shash_desc *desc) +{ + sha224_init(shash_desc_ctx(desc)); + return 0; +} + +static int crypto_sha224_final_generic(struct shash_desc *desc, u8 *out) +{ + sha224_final_generic(shash_desc_ctx(desc), out); + return 0; +} + +static int crypto_sha224_final_arch(struct shash_desc *desc, u8 *out) +{ + sha224_final(shash_desc_ctx(desc), out); + return 0; +} + +static int crypto_sha256_import_lib(struct shash_desc *desc, const void *in) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + const u8 *p = in; + + memcpy(sctx, p, sizeof(*sctx)); + p += sizeof(*sctx); + sctx->count += *p; + return 0; +} + +static int crypto_sha256_export_lib(struct shash_desc *desc, void *out) +{ + struct sha256_state *sctx0 = shash_desc_ctx(desc); + struct sha256_state sctx = *sctx0; + unsigned int partial; + u8 *p = out; + + partial = sctx.count % SHA256_BLOCK_SIZE; + sctx.count -= partial; + memcpy(p, &sctx, sizeof(sctx)); + p += sizeof(sctx); + *p = partial; + return 0; +} + +static struct shash_alg algs[] = { + { + .base.cra_name = "sha256", + .base.cra_driver_name = "sha256-generic", + .base.cra_priority = 100, + .base.cra_blocksize = SHA256_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, + .digestsize = SHA256_DIGEST_SIZE, + .init = crypto_sha256_init, + .update = crypto_sha256_update_generic, + .final = crypto_sha256_final_generic, + .finup = crypto_sha256_finup_generic, + .digest = crypto_sha256_digest_generic, + .descsize = sizeof(struct sha256_state), + .statesize = sizeof(struct crypto_sha256_state) + + SHA256_BLOCK_SIZE + 1, + .import = crypto_sha256_import_lib, + .export = crypto_sha256_export_lib, + }, + { + .base.cra_name = "sha224", + .base.cra_driver_name = "sha224-generic", + .base.cra_priority = 100, + .base.cra_blocksize = SHA224_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, + .digestsize = SHA224_DIGEST_SIZE, + .init = crypto_sha224_init, + .update = crypto_sha256_update_generic, + .final = crypto_sha224_final_generic, + .descsize = sizeof(struct sha256_state), + .statesize = sizeof(struct crypto_sha256_state) + + SHA256_BLOCK_SIZE + 1, + .import = crypto_sha256_import_lib, + .export = crypto_sha256_export_lib, + }, + { + .base.cra_name = "sha256", + .base.cra_driver_name = "sha256-" __stringify(ARCH), + .base.cra_priority = 300, + .base.cra_blocksize = SHA256_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, + .digestsize = SHA256_DIGEST_SIZE, + .init = crypto_sha256_init, + .update = crypto_sha256_update_arch, + .final = crypto_sha256_final_arch, + .finup = crypto_sha256_finup_arch, + .digest = crypto_sha256_digest_arch, + .descsize = sizeof(struct sha256_state), + .statesize = sizeof(struct crypto_sha256_state) + + SHA256_BLOCK_SIZE + 1, + .import = crypto_sha256_import_lib, + .export = crypto_sha256_export_lib, + }, + { + .base.cra_name = "sha224", + .base.cra_driver_name = "sha224-" __stringify(ARCH), + .base.cra_priority = 300, + .base.cra_blocksize = SHA224_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, + .digestsize = SHA224_DIGEST_SIZE, + .init = crypto_sha224_init, + .update = crypto_sha256_update_arch, + .final = crypto_sha224_final_arch, + .descsize = sizeof(struct sha256_state), + .statesize = sizeof(struct crypto_sha256_state) + + SHA256_BLOCK_SIZE + 1, + .import = crypto_sha256_import_lib, + .export = crypto_sha256_export_lib, + }, +}; + +static unsigned int num_algs; + +static int __init crypto_sha256_mod_init(void) +{ + /* register the arch flavours only if they differ from generic */ + num_algs = ARRAY_SIZE(algs); + BUILD_BUG_ON(ARRAY_SIZE(algs) % 2 != 0); + if (!sha256_is_arch_optimized()) + num_algs /= 2; + return crypto_register_shashes(algs, ARRAY_SIZE(algs)); +} +subsys_initcall(crypto_sha256_mod_init); + +static void __exit crypto_sha256_mod_exit(void) +{ + crypto_unregister_shashes(algs, num_algs); +} +module_exit(crypto_sha256_mod_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Crypto API wrapper for the SHA-256 and SHA-224 library functions"); + +MODULE_ALIAS_CRYPTO("sha256"); +MODULE_ALIAS_CRYPTO("sha256-generic"); +MODULE_ALIAS_CRYPTO("sha256-" __stringify(ARCH)); +MODULE_ALIAS_CRYPTO("sha224"); +MODULE_ALIAS_CRYPTO("sha224-generic"); +MODULE_ALIAS_CRYPTO("sha224-" __stringify(ARCH)); diff --git a/crypto/sha256_generic.c b/crypto/sha256_generic.c deleted file mode 100644 index 05084e5bbaec..000000000000 --- a/crypto/sha256_generic.c +++ /dev/null @@ -1,102 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Crypto API wrapper for the generic SHA256 code from lib/crypto/sha256.c - * - * Copyright (c) Jean-Luc Cooke - * Copyright (c) Andrew McDonald - * Copyright (c) 2002 James Morris - * SHA224 Support Copyright 2007 Intel Corporation - */ -#include -#include -#include -#include -#include - -const u8 sha224_zero_message_hash[SHA224_DIGEST_SIZE] = { - 0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9, 0x47, - 0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4, 0x15, 0xa2, - 0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a, 0xc5, 0xb3, 0xe4, - 0x2f -}; -EXPORT_SYMBOL_GPL(sha224_zero_message_hash); - -const u8 sha256_zero_message_hash[SHA256_DIGEST_SIZE] = { - 0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, - 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, - 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, - 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55 -}; -EXPORT_SYMBOL_GPL(sha256_zero_message_hash); - -static void sha256_block(struct crypto_sha256_state *sctx, const u8 *input, - int blocks) -{ - sha256_transform_blocks(sctx, input, blocks); -} - -static int crypto_sha256_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - return sha256_base_do_update_blocks(desc, data, len, sha256_block); -} - -static int crypto_sha256_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *hash) -{ - sha256_base_do_finup(desc, data, len, sha256_block); - return sha256_base_finish(desc, hash); -} - -static struct shash_alg sha256_algs[2] = { { - .digestsize = SHA256_DIGEST_SIZE, - .init = sha256_base_init, - .update = crypto_sha256_update, - .finup = crypto_sha256_finup, - .descsize = sizeof(struct crypto_sha256_state), - .base = { - .cra_name = "sha256", - .cra_driver_name= "sha256-generic", - .cra_priority = 100, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | - CRYPTO_AHASH_ALG_FINUP_MAX, - .cra_blocksize = SHA256_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}, { - .digestsize = SHA224_DIGEST_SIZE, - .init = sha224_base_init, - .update = crypto_sha256_update, - .finup = crypto_sha256_finup, - .descsize = sizeof(struct crypto_sha256_state), - .base = { - .cra_name = "sha224", - .cra_driver_name= "sha224-generic", - .cra_priority = 100, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | - CRYPTO_AHASH_ALG_FINUP_MAX, - .cra_blocksize = SHA224_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -} }; - -static int __init sha256_generic_mod_init(void) -{ - return crypto_register_shashes(sha256_algs, ARRAY_SIZE(sha256_algs)); -} - -static void __exit sha256_generic_mod_fini(void) -{ - crypto_unregister_shashes(sha256_algs, ARRAY_SIZE(sha256_algs)); -} - -subsys_initcall(sha256_generic_mod_init); -module_exit(sha256_generic_mod_fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm"); - -MODULE_ALIAS_CRYPTO("sha224"); -MODULE_ALIAS_CRYPTO("sha224-generic"); -MODULE_ALIAS_CRYPTO("sha256"); -MODULE_ALIAS_CRYPTO("sha256-generic"); diff --git a/include/crypto/internal/sha2.h b/include/crypto/internal/sha2.h new file mode 100644 index 000000000000..d641c67abcbc --- /dev/null +++ b/include/crypto/internal/sha2.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _CRYPTO_INTERNAL_SHA2_H +#define _CRYPTO_INTERNAL_SHA2_H + +#include + +void sha256_update_generic(struct sha256_state *sctx, + const u8 *data, size_t len); +void sha256_final_generic(struct sha256_state *sctx, + u8 out[SHA256_DIGEST_SIZE]); +void sha224_final_generic(struct sha256_state *sctx, + u8 out[SHA224_DIGEST_SIZE]); + +#if IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_SHA256) +bool sha256_is_arch_optimized(void); +#else +static inline bool sha256_is_arch_optimized(void) +{ + return false; +} +#endif +void sha256_blocks_generic(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks); +void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks); + +#endif /* _CRYPTO_INTERNAL_SHA2_H */ diff --git a/include/crypto/sha2.h b/include/crypto/sha2.h index f873c2207b1e..9a56286d736d 100644 --- a/include/crypto/sha2.h +++ b/include/crypto/sha2.h @@ -13,6 +13,7 @@ #define SHA256_DIGEST_SIZE 32 #define SHA256_BLOCK_SIZE 64 +#define SHA256_STATE_WORDS 8 #define SHA384_DIGEST_SIZE 48 #define SHA384_BLOCK_SIZE 128 @@ -66,7 +67,7 @@ extern const u8 sha384_zero_message_hash[SHA384_DIGEST_SIZE]; extern const u8 sha512_zero_message_hash[SHA512_DIGEST_SIZE]; struct crypto_sha256_state { - u32 state[SHA256_DIGEST_SIZE / 4]; + u32 state[SHA256_STATE_WORDS]; u64 count; }; @@ -74,7 +75,7 @@ struct sha256_state { union { struct crypto_sha256_state ctx; struct { - u32 state[SHA256_DIGEST_SIZE / 4]; + u32 state[SHA256_STATE_WORDS]; u64 count; }; }; @@ -87,16 +88,6 @@ struct sha512_state { u8 buf[SHA512_BLOCK_SIZE]; }; -/* - * Stand-alone implementation of the SHA256 algorithm. It is designed to - * have as little dependencies as possible so it can be used in the - * kexec_file purgatory. In other cases you should generally use the - * hash APIs from include/crypto/hash.h. Especially when hashing large - * amounts of data as those APIs may be hw-accelerated. - * - * For details see lib/crypto/sha256.c - */ - static inline void sha256_init(struct sha256_state *sctx) { sctx->state[0] = SHA256_H0; diff --git a/include/crypto/sha256_base.h b/include/crypto/sha256_base.h index 9f284bed5a51..804361731a7a 100644 --- a/include/crypto/sha256_base.h +++ b/include/crypto/sha256_base.h @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include #include @@ -142,7 +142,10 @@ static inline int sha256_base_finish(struct shash_desc *desc, u8 *out) return __sha256_base_finish(sctx->state, out, digest_size); } -void sha256_transform_blocks(struct crypto_sha256_state *sst, - const u8 *input, int blocks); +static inline void sha256_transform_blocks(struct crypto_sha256_state *sst, + const u8 *input, int blocks) +{ + sha256_blocks_generic(sst->state, input, blocks); +} #endif /* _CRYPTO_SHA256_BASE_H */ diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index af2368799579..7fe678047939 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -139,6 +139,25 @@ config CRYPTO_LIB_SHA1 config CRYPTO_LIB_SHA256 tristate + help + Enable the SHA-256 library interface. This interface may be fulfilled + by either the generic implementation or an arch-specific one, if one + is available and enabled. + +config CRYPTO_ARCH_HAVE_LIB_SHA256 + bool + help + Declares whether the architecture provides an arch-specific + accelerated implementation of the SHA-256 library interface. + +config CRYPTO_LIB_SHA256_GENERIC + tristate + default CRYPTO_LIB_SHA256 if !CRYPTO_ARCH_HAVE_LIB_SHA256 + help + This symbol can be selected by arch implementations of the SHA-256 + library interface that require the generic code as a fallback, e.g., + for SIMD implementations. If no arch specific implementation is + enabled, this implementation serves the users of CRYPTO_LIB_SHA256. config CRYPTO_LIB_SM3 tristate diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index 4dd62bc5bee3..71d3d05d666a 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -51,6 +51,9 @@ libsha1-y := sha1.o obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o libsha256-y := sha256.o +obj-$(CONFIG_CRYPTO_LIB_SHA256_GENERIC) += libsha256-generic.o +libsha256-generic-y := sha256-generic.o + ifneq ($(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS),y) libblake2s-y += blake2s-selftest.o libchacha20poly1305-y += chacha20poly1305-selftest.o diff --git a/lib/crypto/sha256-generic.c b/lib/crypto/sha256-generic.c new file mode 100644 index 000000000000..a16ad4f25ebb --- /dev/null +++ b/lib/crypto/sha256-generic.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * SHA-256, as specified in + * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf + * + * SHA-256 code by Jean-Luc Cooke . + * + * Copyright (c) Jean-Luc Cooke + * Copyright (c) Andrew McDonald + * Copyright (c) 2002 James Morris + * Copyright (c) 2014 Red Hat Inc. + */ + +#include +#include +#include +#include +#include + +static const u32 SHA256_K[] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, +}; + +static inline u32 Ch(u32 x, u32 y, u32 z) +{ + return z ^ (x & (y ^ z)); +} + +static inline u32 Maj(u32 x, u32 y, u32 z) +{ + return (x & y) | (z & (x | y)); +} + +#define e0(x) (ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22)) +#define e1(x) (ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25)) +#define s0(x) (ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3)) +#define s1(x) (ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10)) + +static inline void LOAD_OP(int I, u32 *W, const u8 *input) +{ + W[I] = get_unaligned_be32((__u32 *)input + I); +} + +static inline void BLEND_OP(int I, u32 *W) +{ + W[I] = s1(W[I-2]) + W[I-7] + s0(W[I-15]) + W[I-16]; +} + +#define SHA256_ROUND(i, a, b, c, d, e, f, g, h) do { \ + u32 t1, t2; \ + t1 = h + e1(e) + Ch(e, f, g) + SHA256_K[i] + W[i]; \ + t2 = e0(a) + Maj(a, b, c); \ + d += t1; \ + h = t1 + t2; \ +} while (0) + +static void sha256_block_generic(u32 state[SHA256_STATE_WORDS], + const u8 *input, u32 W[64]) +{ + u32 a, b, c, d, e, f, g, h; + int i; + + /* load the input */ + for (i = 0; i < 16; i += 8) { + LOAD_OP(i + 0, W, input); + LOAD_OP(i + 1, W, input); + LOAD_OP(i + 2, W, input); + LOAD_OP(i + 3, W, input); + LOAD_OP(i + 4, W, input); + LOAD_OP(i + 5, W, input); + LOAD_OP(i + 6, W, input); + LOAD_OP(i + 7, W, input); + } + + /* now blend */ + for (i = 16; i < 64; i += 8) { + BLEND_OP(i + 0, W); + BLEND_OP(i + 1, W); + BLEND_OP(i + 2, W); + BLEND_OP(i + 3, W); + BLEND_OP(i + 4, W); + BLEND_OP(i + 5, W); + BLEND_OP(i + 6, W); + BLEND_OP(i + 7, W); + } + + /* load the state into our registers */ + a = state[0]; b = state[1]; c = state[2]; d = state[3]; + e = state[4]; f = state[5]; g = state[6]; h = state[7]; + + /* now iterate */ + for (i = 0; i < 64; i += 8) { + SHA256_ROUND(i + 0, a, b, c, d, e, f, g, h); + SHA256_ROUND(i + 1, h, a, b, c, d, e, f, g); + SHA256_ROUND(i + 2, g, h, a, b, c, d, e, f); + SHA256_ROUND(i + 3, f, g, h, a, b, c, d, e); + SHA256_ROUND(i + 4, e, f, g, h, a, b, c, d); + SHA256_ROUND(i + 5, d, e, f, g, h, a, b, c); + SHA256_ROUND(i + 6, c, d, e, f, g, h, a, b); + SHA256_ROUND(i + 7, b, c, d, e, f, g, h, a); + } + + state[0] += a; state[1] += b; state[2] += c; state[3] += d; + state[4] += e; state[5] += f; state[6] += g; state[7] += h; +} + +void sha256_blocks_generic(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks) +{ + u32 W[64]; + + do { + sha256_block_generic(state, data, W); + data += SHA256_BLOCK_SIZE; + } while (--nblocks); + + memzero_explicit(W, sizeof(W)); +} +EXPORT_SYMBOL_GPL(sha256_blocks_generic); + +MODULE_DESCRIPTION("SHA-256 Algorithm (generic implementation)"); +MODULE_LICENSE("GPL"); diff --git a/lib/crypto/sha256.c b/lib/crypto/sha256.c index a89bab377de1..4b19cf977ef1 100644 --- a/lib/crypto/sha256.c +++ b/lib/crypto/sha256.c @@ -11,148 +11,105 @@ * Copyright (c) 2014 Red Hat Inc. */ -#include -#include +#include #include #include #include +#include -static const u32 SHA256_K[] = { - 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, - 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, - 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, - 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, - 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, - 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, - 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, - 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, - 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, - 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, - 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, - 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, - 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, - 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, - 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, - 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, -}; - -static inline u32 Ch(u32 x, u32 y, u32 z) -{ - return z ^ (x & (y ^ z)); -} - -static inline u32 Maj(u32 x, u32 y, u32 z) -{ - return (x & y) | (z & (x | y)); -} - -#define e0(x) (ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22)) -#define e1(x) (ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25)) -#define s0(x) (ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3)) -#define s1(x) (ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10)) +/* + * If __DISABLE_EXPORTS is defined, then this file is being compiled for a + * pre-boot environment. In that case, ignore the kconfig options, pull the + * generic code into the same translation unit, and use that only. + */ +#ifdef __DISABLE_EXPORTS +#include "sha256-generic.c" +#endif -static inline void LOAD_OP(int I, u32 *W, const u8 *input) +static inline void sha256_blocks(u32 state[SHA256_STATE_WORDS], const u8 *data, + size_t nblocks, bool force_generic) { - W[I] = get_unaligned_be32((__u32 *)input + I); +#if IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_SHA256) && !defined(__DISABLE_EXPORTS) + if (!force_generic) + return sha256_blocks_arch(state, data, nblocks); +#endif + sha256_blocks_generic(state, data, nblocks); } -static inline void BLEND_OP(int I, u32 *W) +static inline void __sha256_update(struct sha256_state *sctx, const u8 *data, + size_t len, bool force_generic) { - W[I] = s1(W[I-2]) + W[I-7] + s0(W[I-15]) + W[I-16]; -} - -#define SHA256_ROUND(i, a, b, c, d, e, f, g, h) do { \ - u32 t1, t2; \ - t1 = h + e1(e) + Ch(e, f, g) + SHA256_K[i] + W[i]; \ - t2 = e0(a) + Maj(a, b, c); \ - d += t1; \ - h = t1 + t2; \ -} while (0) + size_t partial = sctx->count % SHA256_BLOCK_SIZE; -static void sha256_transform(u32 *state, const u8 *input, u32 *W) -{ - u32 a, b, c, d, e, f, g, h; - int i; - - /* load the input */ - for (i = 0; i < 16; i += 8) { - LOAD_OP(i + 0, W, input); - LOAD_OP(i + 1, W, input); - LOAD_OP(i + 2, W, input); - LOAD_OP(i + 3, W, input); - LOAD_OP(i + 4, W, input); - LOAD_OP(i + 5, W, input); - LOAD_OP(i + 6, W, input); - LOAD_OP(i + 7, W, input); - } + sctx->count += len; - /* now blend */ - for (i = 16; i < 64; i += 8) { - BLEND_OP(i + 0, W); - BLEND_OP(i + 1, W); - BLEND_OP(i + 2, W); - BLEND_OP(i + 3, W); - BLEND_OP(i + 4, W); - BLEND_OP(i + 5, W); - BLEND_OP(i + 6, W); - BLEND_OP(i + 7, W); - } + if (partial + len >= SHA256_BLOCK_SIZE) { + size_t nblocks; - /* load the state into our registers */ - a = state[0]; b = state[1]; c = state[2]; d = state[3]; - e = state[4]; f = state[5]; g = state[6]; h = state[7]; - - /* now iterate */ - for (i = 0; i < 64; i += 8) { - SHA256_ROUND(i + 0, a, b, c, d, e, f, g, h); - SHA256_ROUND(i + 1, h, a, b, c, d, e, f, g); - SHA256_ROUND(i + 2, g, h, a, b, c, d, e, f); - SHA256_ROUND(i + 3, f, g, h, a, b, c, d, e); - SHA256_ROUND(i + 4, e, f, g, h, a, b, c, d); - SHA256_ROUND(i + 5, d, e, f, g, h, a, b, c); - SHA256_ROUND(i + 6, c, d, e, f, g, h, a, b); - SHA256_ROUND(i + 7, b, c, d, e, f, g, h, a); - } + if (partial) { + size_t l = SHA256_BLOCK_SIZE - partial; - state[0] += a; state[1] += b; state[2] += c; state[3] += d; - state[4] += e; state[5] += f; state[6] += g; state[7] += h; -} + memcpy(&sctx->buf[partial], data, l); + data += l; + len -= l; -void sha256_transform_blocks(struct crypto_sha256_state *sst, - const u8 *input, int blocks) -{ - u32 W[64]; + sha256_blocks(sctx->state, sctx->buf, 1, force_generic); + } - do { - sha256_transform(sst->state, input, W); - input += SHA256_BLOCK_SIZE; - } while (--blocks); + nblocks = len / SHA256_BLOCK_SIZE; + len %= SHA256_BLOCK_SIZE; - memzero_explicit(W, sizeof(W)); + if (nblocks) { + sha256_blocks(sctx->state, data, nblocks, + force_generic); + data += nblocks * SHA256_BLOCK_SIZE; + } + partial = 0; + } + if (len) + memcpy(&sctx->buf[partial], data, len); } -EXPORT_SYMBOL_GPL(sha256_transform_blocks); void sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len) { - lib_sha256_base_do_update(sctx, data, len, sha256_transform_blocks); + __sha256_update(sctx, data, len, false); } EXPORT_SYMBOL(sha256_update); -static void __sha256_final(struct sha256_state *sctx, u8 *out, int digest_size) +static inline void __sha256_final(struct sha256_state *sctx, u8 *out, + size_t digest_size, bool force_generic) { - lib_sha256_base_do_finalize(sctx, sha256_transform_blocks); - lib_sha256_base_finish(sctx, out, digest_size); + const size_t bit_offset = SHA256_BLOCK_SIZE - sizeof(__be64); + __be64 *bits = (__be64 *)&sctx->buf[bit_offset]; + size_t partial = sctx->count % SHA256_BLOCK_SIZE; + size_t i; + + sctx->buf[partial++] = 0x80; + if (partial > bit_offset) { + memset(&sctx->buf[partial], 0, SHA256_BLOCK_SIZE - partial); + sha256_blocks(sctx->state, sctx->buf, 1, force_generic); + partial = 0; + } + + memset(&sctx->buf[partial], 0, bit_offset - partial); + *bits = cpu_to_be64(sctx->count << 3); + sha256_blocks(sctx->state, sctx->buf, 1, force_generic); + + for (i = 0; i < digest_size; i += 4) + put_unaligned_be32(sctx->state[i / 4], out + i); + + memzero_explicit(sctx, sizeof(*sctx)); } void sha256_final(struct sha256_state *sctx, u8 *out) { - __sha256_final(sctx, out, 32); + __sha256_final(sctx, out, SHA256_DIGEST_SIZE, false); } EXPORT_SYMBOL(sha256_final); void sha224_final(struct sha256_state *sctx, u8 *out) { - __sha256_final(sctx, out, 28); + __sha256_final(sctx, out, SHA224_DIGEST_SIZE, false); } EXPORT_SYMBOL(sha224_final); @@ -166,5 +123,26 @@ void sha256(const u8 *data, unsigned int len, u8 *out) } EXPORT_SYMBOL(sha256); +#if IS_ENABLED(CONFIG_CRYPTO_SHA256) && !defined(__DISABLE_EXPORTS) +void sha256_update_generic(struct sha256_state *sctx, + const u8 *data, size_t len) +{ + __sha256_update(sctx, data, len, true); +} +EXPORT_SYMBOL(sha256_update_generic); + +void sha256_final_generic(struct sha256_state *sctx, u8 out[SHA256_DIGEST_SIZE]) +{ + __sha256_final(sctx, out, SHA256_DIGEST_SIZE, true); +} +EXPORT_SYMBOL(sha256_final_generic); + +void sha224_final_generic(struct sha256_state *sctx, u8 out[SHA224_DIGEST_SIZE]) +{ + __sha256_final(sctx, out, SHA224_DIGEST_SIZE, true); +} +EXPORT_SYMBOL(sha224_final_generic); +#endif + MODULE_DESCRIPTION("SHA-256 Algorithm"); MODULE_LICENSE("GPL"); -- 2.51.0 From ca4477e41c68b58043e67bc78074cd6fcc59ee5e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 28 Apr 2025 10:00:27 -0700 Subject: [PATCH 15/16] crypto: arm/sha256 - implement library instead of shash Instead of providing crypto_shash algorithms for the arch-optimized SHA-256 code, instead implement the SHA-256 library. This is much simpler, it makes the SHA-256 library functions be arch-optimized, and it fixes the longstanding issue where the arch-optimized SHA-256 was disabled by default. SHA-256 still remains available through crypto_shash, but individual architectures no longer need to handle it. To merge the scalar, NEON, and CE code all into one module cleanly, add !CPU_V7M as a direct dependency of the CE code. Previously, !CPU_V7M was only a direct dependency of the scalar and NEON code. The result is still the same because CPU_V7M implies !KERNEL_MODE_NEON, so !CPU_V7M was already an indirect dependency of the CE code. To match sha256_blocks_arch(), change the type of the nblocks parameter of the assembly functions from int to size_t. The assembly functions actually already treated it as size_t. While renaming the assembly files, also fix the naming quirk where "sha2" meant sha256. (SHA-512 is also part of SHA-2.) Reviewed-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/arm/configs/exynos_defconfig | 1 - arch/arm/configs/milbeaut_m10v_defconfig | 1 - arch/arm/configs/multi_v7_defconfig | 1 - arch/arm/configs/omap2plus_defconfig | 1 - arch/arm/configs/pxa_defconfig | 1 - arch/arm/crypto/Kconfig | 21 ---- arch/arm/crypto/Makefile | 8 +- arch/arm/crypto/sha2-ce-glue.c | 87 -------------- arch/arm/crypto/sha256_glue.c | 107 ------------------ arch/arm/crypto/sha256_glue.h | 9 -- arch/arm/crypto/sha256_neon_glue.c | 75 ------------ arch/arm/lib/crypto/.gitignore | 1 + arch/arm/lib/crypto/Kconfig | 6 + arch/arm/lib/crypto/Makefile | 8 +- arch/arm/{ => lib}/crypto/sha256-armv4.pl | 0 .../sha2-ce-core.S => lib/crypto/sha256-ce.S} | 10 +- arch/arm/lib/crypto/sha256.c | 64 +++++++++++ 17 files changed, 84 insertions(+), 317 deletions(-) delete mode 100644 arch/arm/crypto/sha2-ce-glue.c delete mode 100644 arch/arm/crypto/sha256_glue.c delete mode 100644 arch/arm/crypto/sha256_glue.h delete mode 100644 arch/arm/crypto/sha256_neon_glue.c rename arch/arm/{ => lib}/crypto/sha256-armv4.pl (100%) rename arch/arm/{crypto/sha2-ce-core.S => lib/crypto/sha256-ce.S} (91%) create mode 100644 arch/arm/lib/crypto/sha256.c diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig index e81a5d6c1c20..c6792c0256a6 100644 --- a/arch/arm/configs/exynos_defconfig +++ b/arch/arm/configs/exynos_defconfig @@ -364,7 +364,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_SHA1_ARM_NEON=m -CONFIG_CRYPTO_SHA256_ARM=m CONFIG_CRYPTO_SHA512_ARM=m CONFIG_CRYPTO_AES_ARM_BS=m CONFIG_CRYPTO_CHACHA20_NEON=m diff --git a/arch/arm/configs/milbeaut_m10v_defconfig b/arch/arm/configs/milbeaut_m10v_defconfig index 275ddf7a3a14..4ec21f477c63 100644 --- a/arch/arm/configs/milbeaut_m10v_defconfig +++ b/arch/arm/configs/milbeaut_m10v_defconfig @@ -101,7 +101,6 @@ CONFIG_CRYPTO_SEQIV=m CONFIG_CRYPTO_GHASH_ARM_CE=m CONFIG_CRYPTO_SHA1_ARM_NEON=m CONFIG_CRYPTO_SHA1_ARM_CE=m -CONFIG_CRYPTO_SHA2_ARM_CE=m CONFIG_CRYPTO_SHA512_ARM=m CONFIG_CRYPTO_AES_ARM=m CONFIG_CRYPTO_AES_ARM_BS=m diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index ad037c175fdb..96178acedad0 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -1301,7 +1301,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_GHASH_ARM_CE=m CONFIG_CRYPTO_SHA1_ARM_NEON=m CONFIG_CRYPTO_SHA1_ARM_CE=m -CONFIG_CRYPTO_SHA2_ARM_CE=m CONFIG_CRYPTO_SHA512_ARM=m CONFIG_CRYPTO_AES_ARM=m CONFIG_CRYPTO_AES_ARM_BS=m diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 75b326bc7830..317f977e509e 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -697,7 +697,6 @@ CONFIG_SECURITY=y CONFIG_CRYPTO_MICHAEL_MIC=y CONFIG_CRYPTO_GHASH_ARM_CE=m CONFIG_CRYPTO_SHA1_ARM_NEON=m -CONFIG_CRYPTO_SHA256_ARM=m CONFIG_CRYPTO_SHA512_ARM=m CONFIG_CRYPTO_AES_ARM=m CONFIG_CRYPTO_AES_ARM_BS=m diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig index 24fca8608554..56be85752909 100644 --- a/arch/arm/configs/pxa_defconfig +++ b/arch/arm/configs/pxa_defconfig @@ -660,7 +660,6 @@ CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_DEFLATE=y CONFIG_CRYPTO_LZO=y CONFIG_CRYPTO_SHA1_ARM=m -CONFIG_CRYPTO_SHA256_ARM=m CONFIG_CRYPTO_SHA512_ARM=m CONFIG_CRYPTO_AES_ARM=m CONFIG_FONTS=y diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 1f889d6bab77..7efb9a8596e4 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -93,27 +93,6 @@ config CRYPTO_SHA1_ARM_CE Architecture: arm using ARMv8 Crypto Extensions -config CRYPTO_SHA2_ARM_CE - tristate "Hash functions: SHA-224 and SHA-256 (ARMv8 Crypto Extensions)" - depends on KERNEL_MODE_NEON - select CRYPTO_SHA256_ARM - select CRYPTO_HASH - help - SHA-224 and SHA-256 secure hash algorithms (FIPS 180) - - Architecture: arm using - - ARMv8 Crypto Extensions - -config CRYPTO_SHA256_ARM - tristate "Hash functions: SHA-224 and SHA-256 (NEON)" - select CRYPTO_HASH - depends on !CPU_V7M - help - SHA-224 and SHA-256 secure hash algorithms (FIPS 180) - - Architecture: arm using - - NEON (Advanced SIMD) extensions - config CRYPTO_SHA512_ARM tristate "Hash functions: SHA-384 and SHA-512 (NEON)" select CRYPTO_HASH diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index ecabe6603e08..8479137c6e80 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -7,7 +7,6 @@ obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o -obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o obj-$(CONFIG_CRYPTO_BLAKE2B_NEON) += blake2b-neon.o obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o @@ -15,20 +14,16 @@ obj-$(CONFIG_CRYPTO_CURVE25519_NEON) += curve25519-neon.o obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o -obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o aes-arm-y := aes-cipher-core.o aes-cipher-glue.o aes-arm-bs-y := aes-neonbs-core.o aes-neonbs-glue.o sha1-arm-y := sha1-armv4-large.o sha1_glue.o sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o -sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o -sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y) sha512-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha512-neon-glue.o sha512-arm-y := sha512-core.o sha512-glue.o $(sha512-arm-neon-y) blake2b-neon-y := blake2b-neon-core.o blake2b-neon-glue.o sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o -sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o @@ -40,9 +35,8 @@ quiet_cmd_perl = PERL $@ $(obj)/%-core.S: $(src)/%-armv4.pl $(call cmd,perl) -clean-files += sha256-core.S sha512-core.S +clean-files += sha512-core.S aflags-thumb2-$(CONFIG_THUMB2_KERNEL) := -U__thumb2__ -D__thumb2__=1 -AFLAGS_sha256-core.o += $(aflags-thumb2-y) AFLAGS_sha512-core.o += $(aflags-thumb2-y) diff --git a/arch/arm/crypto/sha2-ce-glue.c b/arch/arm/crypto/sha2-ce-glue.c deleted file mode 100644 index 1e9d16f79678..000000000000 --- a/arch/arm/crypto/sha2-ce-glue.c +++ /dev/null @@ -1,87 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions - * - * Copyright (C) 2015 Linaro Ltd - */ - -#include -#include -#include -#include -#include -#include -#include - -MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions"); -MODULE_AUTHOR("Ard Biesheuvel "); -MODULE_LICENSE("GPL v2"); - -asmlinkage void sha2_ce_transform(struct crypto_sha256_state *sst, - u8 const *src, int blocks); - -static int sha2_ce_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - int remain; - - kernel_neon_begin(); - remain = sha256_base_do_update_blocks(desc, data, len, - sha2_ce_transform); - kernel_neon_end(); - return remain; -} - -static int sha2_ce_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - kernel_neon_begin(); - sha256_base_do_finup(desc, data, len, sha2_ce_transform); - kernel_neon_end(); - return sha256_base_finish(desc, out); -} - -static struct shash_alg algs[] = { { - .init = sha224_base_init, - .update = sha2_ce_update, - .finup = sha2_ce_finup, - .descsize = sizeof(struct crypto_sha256_state), - .digestsize = SHA224_DIGEST_SIZE, - .base = { - .cra_name = "sha224", - .cra_driver_name = "sha224-ce", - .cra_priority = 300, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | - CRYPTO_AHASH_ALG_FINUP_MAX, - .cra_blocksize = SHA256_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}, { - .init = sha256_base_init, - .update = sha2_ce_update, - .finup = sha2_ce_finup, - .descsize = sizeof(struct crypto_sha256_state), - .digestsize = SHA256_DIGEST_SIZE, - .base = { - .cra_name = "sha256", - .cra_driver_name = "sha256-ce", - .cra_priority = 300, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | - CRYPTO_AHASH_ALG_FINUP_MAX, - .cra_blocksize = SHA256_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -} }; - -static int __init sha2_ce_mod_init(void) -{ - return crypto_register_shashes(algs, ARRAY_SIZE(algs)); -} - -static void __exit sha2_ce_mod_fini(void) -{ - crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); -} - -module_cpu_feature_match(SHA2, sha2_ce_mod_init); -module_exit(sha2_ce_mod_fini); diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c deleted file mode 100644 index d04c4e6bae6d..000000000000 --- a/arch/arm/crypto/sha256_glue.c +++ /dev/null @@ -1,107 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Glue code for the SHA256 Secure Hash Algorithm assembly implementation - * using optimized ARM assembler and NEON instructions. - * - * Copyright © 2015 Google Inc. - * - * This file is based on sha256_ssse3_glue.c: - * Copyright (C) 2013 Intel Corporation - * Author: Tim Chen - */ - -#include -#include -#include -#include -#include -#include - -#include "sha256_glue.h" - -asmlinkage void sha256_block_data_order(struct crypto_sha256_state *state, - const u8 *data, int num_blks); - -static int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - /* make sure casting to sha256_block_fn() is safe */ - BUILD_BUG_ON(offsetof(struct crypto_sha256_state, state) != 0); - - return sha256_base_do_update_blocks(desc, data, len, - sha256_block_data_order); -} - -static int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - sha256_base_do_finup(desc, data, len, sha256_block_data_order); - return sha256_base_finish(desc, out); -} - -static struct shash_alg algs[] = { { - .digestsize = SHA256_DIGEST_SIZE, - .init = sha256_base_init, - .update = crypto_sha256_arm_update, - .finup = crypto_sha256_arm_finup, - .descsize = sizeof(struct crypto_sha256_state), - .base = { - .cra_name = "sha256", - .cra_driver_name = "sha256-asm", - .cra_priority = 150, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | - CRYPTO_AHASH_ALG_FINUP_MAX, - .cra_blocksize = SHA256_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}, { - .digestsize = SHA224_DIGEST_SIZE, - .init = sha224_base_init, - .update = crypto_sha256_arm_update, - .finup = crypto_sha256_arm_finup, - .descsize = sizeof(struct crypto_sha256_state), - .base = { - .cra_name = "sha224", - .cra_driver_name = "sha224-asm", - .cra_priority = 150, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | - CRYPTO_AHASH_ALG_FINUP_MAX, - .cra_blocksize = SHA224_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -} }; - -static int __init sha256_mod_init(void) -{ - int res = crypto_register_shashes(algs, ARRAY_SIZE(algs)); - - if (res < 0) - return res; - - if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) { - res = crypto_register_shashes(sha256_neon_algs, - ARRAY_SIZE(sha256_neon_algs)); - - if (res < 0) - crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); - } - - return res; -} - -static void __exit sha256_mod_fini(void) -{ - crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); - - if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) - crypto_unregister_shashes(sha256_neon_algs, - ARRAY_SIZE(sha256_neon_algs)); -} - -module_init(sha256_mod_init); -module_exit(sha256_mod_fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm (ARM), including NEON"); - -MODULE_ALIAS_CRYPTO("sha256"); diff --git a/arch/arm/crypto/sha256_glue.h b/arch/arm/crypto/sha256_glue.h deleted file mode 100644 index 9881c9a115d1..000000000000 --- a/arch/arm/crypto/sha256_glue.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _CRYPTO_SHA256_GLUE_H -#define _CRYPTO_SHA256_GLUE_H - -#include - -extern struct shash_alg sha256_neon_algs[2]; - -#endif /* _CRYPTO_SHA256_GLUE_H */ diff --git a/arch/arm/crypto/sha256_neon_glue.c b/arch/arm/crypto/sha256_neon_glue.c deleted file mode 100644 index 76eb3cdc21c9..000000000000 --- a/arch/arm/crypto/sha256_neon_glue.c +++ /dev/null @@ -1,75 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Glue code for the SHA256 Secure Hash Algorithm assembly implementation - * using NEON instructions. - * - * Copyright © 2015 Google Inc. - * - * This file is based on sha512_neon_glue.c: - * Copyright © 2014 Jussi Kivilinna - */ - -#include -#include -#include -#include -#include -#include - -#include "sha256_glue.h" - -asmlinkage void sha256_block_data_order_neon( - struct crypto_sha256_state *digest, const u8 *data, int num_blks); - -static int crypto_sha256_neon_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - int remain; - - kernel_neon_begin(); - remain = sha256_base_do_update_blocks(desc, data, len, - sha256_block_data_order_neon); - kernel_neon_end(); - return remain; -} - -static int crypto_sha256_neon_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - kernel_neon_begin(); - sha256_base_do_finup(desc, data, len, sha256_block_data_order_neon); - kernel_neon_end(); - return sha256_base_finish(desc, out); -} - -struct shash_alg sha256_neon_algs[] = { { - .digestsize = SHA256_DIGEST_SIZE, - .init = sha256_base_init, - .update = crypto_sha256_neon_update, - .finup = crypto_sha256_neon_finup, - .descsize = sizeof(struct crypto_sha256_state), - .base = { - .cra_name = "sha256", - .cra_driver_name = "sha256-neon", - .cra_priority = 250, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | - CRYPTO_AHASH_ALG_FINUP_MAX, - .cra_blocksize = SHA256_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}, { - .digestsize = SHA224_DIGEST_SIZE, - .init = sha224_base_init, - .update = crypto_sha256_neon_update, - .finup = crypto_sha256_neon_finup, - .descsize = sizeof(struct crypto_sha256_state), - .base = { - .cra_name = "sha224", - .cra_driver_name = "sha224-neon", - .cra_priority = 250, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | - CRYPTO_AHASH_ALG_FINUP_MAX, - .cra_blocksize = SHA224_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -} }; diff --git a/arch/arm/lib/crypto/.gitignore b/arch/arm/lib/crypto/.gitignore index 0d47d4f21c6d..12d74d8b03d0 100644 --- a/arch/arm/lib/crypto/.gitignore +++ b/arch/arm/lib/crypto/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only poly1305-core.S +sha256-core.S diff --git a/arch/arm/lib/crypto/Kconfig b/arch/arm/lib/crypto/Kconfig index e8444fd0aae3..9f3ff30f4032 100644 --- a/arch/arm/lib/crypto/Kconfig +++ b/arch/arm/lib/crypto/Kconfig @@ -22,3 +22,9 @@ config CRYPTO_POLY1305_ARM tristate default CRYPTO_LIB_POLY1305 select CRYPTO_ARCH_HAVE_LIB_POLY1305 + +config CRYPTO_SHA256_ARM + tristate + depends on !CPU_V7M + default CRYPTO_LIB_SHA256 + select CRYPTO_ARCH_HAVE_LIB_SHA256 diff --git a/arch/arm/lib/crypto/Makefile b/arch/arm/lib/crypto/Makefile index 4c042a4c77ed..431f77c3ff6f 100644 --- a/arch/arm/lib/crypto/Makefile +++ b/arch/arm/lib/crypto/Makefile @@ -10,13 +10,17 @@ chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o poly1305-arm-y := poly1305-core.o poly1305-glue.o +obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o +sha256-arm-y := sha256.o sha256-core.o +sha256-arm-$(CONFIG_KERNEL_MODE_NEON) += sha256-ce.o + quiet_cmd_perl = PERL $@ cmd_perl = $(PERL) $(<) > $(@) $(obj)/%-core.S: $(src)/%-armv4.pl $(call cmd,perl) -clean-files += poly1305-core.S +clean-files += poly1305-core.S sha256-core.S aflags-thumb2-$(CONFIG_THUMB2_KERNEL) := -U__thumb2__ -D__thumb2__=1 @@ -24,3 +28,5 @@ aflags-thumb2-$(CONFIG_THUMB2_KERNEL) := -U__thumb2__ -D__thumb2__=1 poly1305-aflags-$(CONFIG_CPU_V7) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=5 poly1305-aflags-$(CONFIG_KERNEL_MODE_NEON) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=7 AFLAGS_poly1305-core.o += $(poly1305-aflags-y) $(aflags-thumb2-y) + +AFLAGS_sha256-core.o += $(aflags-thumb2-y) diff --git a/arch/arm/crypto/sha256-armv4.pl b/arch/arm/lib/crypto/sha256-armv4.pl similarity index 100% rename from arch/arm/crypto/sha256-armv4.pl rename to arch/arm/lib/crypto/sha256-armv4.pl diff --git a/arch/arm/crypto/sha2-ce-core.S b/arch/arm/lib/crypto/sha256-ce.S similarity index 91% rename from arch/arm/crypto/sha2-ce-core.S rename to arch/arm/lib/crypto/sha256-ce.S index b6369d2440a1..ac2c9b01b22d 100644 --- a/arch/arm/crypto/sha2-ce-core.S +++ b/arch/arm/lib/crypto/sha256-ce.S @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * sha2-ce-core.S - SHA-224/256 secure hash using ARMv8 Crypto Extensions + * sha256-ce.S - SHA-224/256 secure hash using ARMv8 Crypto Extensions * * Copyright (C) 2015 Linaro Ltd. * Author: Ard Biesheuvel @@ -67,10 +67,10 @@ .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 /* - * void sha2_ce_transform(struct sha256_state *sst, u8 const *src, - int blocks); + * void sha256_ce_transform(u32 state[SHA256_STATE_WORDS], + * const u8 *data, size_t nblocks); */ -ENTRY(sha2_ce_transform) +ENTRY(sha256_ce_transform) /* load state */ vld1.32 {dga-dgb}, [r0] @@ -120,4 +120,4 @@ ENTRY(sha2_ce_transform) /* store new state */ vst1.32 {dga-dgb}, [r0] bx lr -ENDPROC(sha2_ce_transform) +ENDPROC(sha256_ce_transform) diff --git a/arch/arm/lib/crypto/sha256.c b/arch/arm/lib/crypto/sha256.c new file mode 100644 index 000000000000..3a8dfc304807 --- /dev/null +++ b/arch/arm/lib/crypto/sha256.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * SHA-256 optimized for ARM + * + * Copyright 2025 Google LLC + */ +#include +#include +#include +#include +#include + +asmlinkage void sha256_block_data_order(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks); +asmlinkage void sha256_block_data_order_neon(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks); +asmlinkage void sha256_ce_transform(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_ce); + +void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks) +{ + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && + static_branch_likely(&have_neon) && crypto_simd_usable()) { + kernel_neon_begin(); + if (static_branch_likely(&have_ce)) + sha256_ce_transform(state, data, nblocks); + else + sha256_block_data_order_neon(state, data, nblocks); + kernel_neon_end(); + } else { + sha256_block_data_order(state, data, nblocks); + } +} +EXPORT_SYMBOL(sha256_blocks_arch); + +bool sha256_is_arch_optimized(void) +{ + /* We always can use at least the ARM scalar implementation. */ + return true; +} +EXPORT_SYMBOL(sha256_is_arch_optimized); + +static int __init sha256_arm_mod_init(void) +{ + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) { + static_branch_enable(&have_neon); + if (elf_hwcap2 & HWCAP2_SHA2) + static_branch_enable(&have_ce); + } + return 0; +} +arch_initcall(sha256_arm_mod_init); + +static void __exit sha256_arm_mod_exit(void) +{ +} +module_exit(sha256_arm_mod_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA-256 optimized for ARM"); -- 2.51.0 From 642cfc0680ff9aae73cd87d6fffcc84d9434938b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 28 Apr 2025 10:00:28 -0700 Subject: [PATCH 16/16] crypto: arm64/sha256 - remove obsolete chunking logic Since kernel-mode NEON sections are now preemptible on arm64, there is no longer any need to limit the length of them. Reviewed-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/arm64/crypto/sha256-glue.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c index 26f9fdfae87b..d63ea82e1374 100644 --- a/arch/arm64/crypto/sha256-glue.c +++ b/arch/arm64/crypto/sha256-glue.c @@ -86,23 +86,8 @@ static struct shash_alg algs[] = { { static int sha256_update_neon(struct shash_desc *desc, const u8 *data, unsigned int len) { - do { - unsigned int chunk = len; - - /* - * Don't hog the CPU for the entire time it takes to process all - * input when running on a preemptible kernel, but process the - * data block by block instead. - */ - if (IS_ENABLED(CONFIG_PREEMPTION)) - chunk = SHA256_BLOCK_SIZE; - - chunk -= sha256_base_do_update_blocks(desc, data, chunk, - sha256_neon_transform); - data += chunk; - len -= chunk; - } while (len >= SHA256_BLOCK_SIZE); - return len; + return sha256_base_do_update_blocks(desc, data, len, + sha256_neon_transform); } static int sha256_finup_neon(struct shash_desc *desc, const u8 *data, -- 2.51.0