From 9b84cb897803c484e15eb1885cd45a895ce1e436 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 28 Apr 2025 12:56:07 +0800 Subject: [PATCH 01/16] crypto: lib/poly1305 - Add block-only interface Add a block-only interface for poly1305. Implement the generic code first. Also use the generic partial block helper. Signed-off-by: Herbert Xu --- include/crypto/internal/poly1305.h | 28 ++++++++++++++-- include/crypto/poly1305.h | 25 ++++++++++---- lib/crypto/poly1305.c | 54 +++++++++++++----------------- 3 files changed, 68 insertions(+), 39 deletions(-) diff --git a/include/crypto/internal/poly1305.h b/include/crypto/internal/poly1305.h index e614594f88c1..c60315f47562 100644 --- a/include/crypto/internal/poly1305.h +++ b/include/crypto/internal/poly1305.h @@ -6,9 +6,8 @@ #ifndef _CRYPTO_INTERNAL_POLY1305_H #define _CRYPTO_INTERNAL_POLY1305_H -#include -#include #include +#include /* * Poly1305 core functions. These only accept whole blocks; the caller must @@ -31,4 +30,29 @@ void poly1305_core_blocks(struct poly1305_state *state, void poly1305_core_emit(const struct poly1305_state *state, const u32 nonce[4], void *dst); +void poly1305_block_init_arch(struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]); +void poly1305_block_init_generic(struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]); +void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, + unsigned int len, u32 padbit); + +static inline void poly1305_blocks_generic(struct poly1305_block_state *state, + const u8 *src, unsigned int len, + u32 padbit) +{ + poly1305_core_blocks(&state->h, &state->core_r, src, + len / POLY1305_BLOCK_SIZE, padbit); +} + +void poly1305_emit_arch(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], const u32 nonce[4]); + +static inline void poly1305_emit_generic(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], + const u32 nonce[4]) +{ + poly1305_core_emit(state, nonce, digest); +} + #endif diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h index 6e21ec2d1dc2..027d74842cd5 100644 --- a/include/crypto/poly1305.h +++ b/include/crypto/poly1305.h @@ -7,7 +7,6 @@ #define _CRYPTO_POLY1305_H #include -#include #define POLY1305_BLOCK_SIZE 16 #define POLY1305_KEY_SIZE 32 @@ -38,6 +37,17 @@ struct poly1305_state { }; }; +/* Combined state for block function. */ +struct poly1305_block_state { + /* accumulator */ + struct poly1305_state h; + /* key */ + union { + struct poly1305_key opaque_r[CONFIG_CRYPTO_LIB_POLY1305_RSIZE]; + struct poly1305_core_key core_r; + }; +}; + struct poly1305_desc_ctx { /* partial buffer */ u8 buf[POLY1305_BLOCK_SIZE]; @@ -45,12 +55,15 @@ struct poly1305_desc_ctx { unsigned int buflen; /* finalize key */ u32 s[4]; - /* accumulator */ - struct poly1305_state h; - /* key */ union { - struct poly1305_key opaque_r[CONFIG_CRYPTO_LIB_POLY1305_RSIZE]; - struct poly1305_core_key core_r; + struct { + struct poly1305_state h; + union { + struct poly1305_key opaque_r[CONFIG_CRYPTO_LIB_POLY1305_RSIZE]; + struct poly1305_core_key core_r; + }; + }; + struct poly1305_block_state state; }; }; diff --git a/lib/crypto/poly1305.c b/lib/crypto/poly1305.c index b633b043f0f6..9fec64a599c1 100644 --- a/lib/crypto/poly1305.c +++ b/lib/crypto/poly1305.c @@ -7,54 +7,45 @@ * Based on public domain code by Andrew Moon and Daniel J. Bernstein. */ +#include #include #include #include +#include #include +void poly1305_block_init_generic(struct poly1305_block_state *desc, + const u8 raw_key[POLY1305_BLOCK_SIZE]) +{ + poly1305_core_init(&desc->h); + poly1305_core_setkey(&desc->core_r, raw_key); +} +EXPORT_SYMBOL_GPL(poly1305_block_init_generic); + void poly1305_init_generic(struct poly1305_desc_ctx *desc, const u8 key[POLY1305_KEY_SIZE]) { - poly1305_core_setkey(&desc->core_r, key); desc->s[0] = get_unaligned_le32(key + 16); desc->s[1] = get_unaligned_le32(key + 20); desc->s[2] = get_unaligned_le32(key + 24); desc->s[3] = get_unaligned_le32(key + 28); - poly1305_core_init(&desc->h); desc->buflen = 0; + poly1305_block_init_generic(&desc->state, key); } EXPORT_SYMBOL_GPL(poly1305_init_generic); +static inline void poly1305_blocks(struct poly1305_block_state *state, + const u8 *src, unsigned int len) +{ + poly1305_blocks_generic(state, src, len, 1); +} + void poly1305_update_generic(struct poly1305_desc_ctx *desc, const u8 *src, unsigned int nbytes) { - unsigned int bytes; - - if (unlikely(desc->buflen)) { - bytes = min(nbytes, POLY1305_BLOCK_SIZE - desc->buflen); - memcpy(desc->buf + desc->buflen, src, bytes); - src += bytes; - nbytes -= bytes; - desc->buflen += bytes; - - if (desc->buflen == POLY1305_BLOCK_SIZE) { - poly1305_core_blocks(&desc->h, &desc->core_r, desc->buf, - 1, 1); - desc->buflen = 0; - } - } - - if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { - poly1305_core_blocks(&desc->h, &desc->core_r, src, - nbytes / POLY1305_BLOCK_SIZE, 1); - src += nbytes - (nbytes % POLY1305_BLOCK_SIZE); - nbytes %= POLY1305_BLOCK_SIZE; - } - - if (unlikely(nbytes)) { - desc->buflen = nbytes; - memcpy(desc->buf, src, nbytes); - } + desc->buflen = BLOCK_HASH_UPDATE(poly1305_blocks, &desc->state, + src, nbytes, POLY1305_BLOCK_SIZE, + desc->buf, desc->buflen); } EXPORT_SYMBOL_GPL(poly1305_update_generic); @@ -64,10 +55,11 @@ void poly1305_final_generic(struct poly1305_desc_ctx *desc, u8 *dst) desc->buf[desc->buflen++] = 1; memset(desc->buf + desc->buflen, 0, POLY1305_BLOCK_SIZE - desc->buflen); - poly1305_core_blocks(&desc->h, &desc->core_r, desc->buf, 1, 0); + poly1305_blocks_generic(&desc->state, desc->buf, + POLY1305_BLOCK_SIZE, 0); } - poly1305_core_emit(&desc->h, desc->s, dst); + poly1305_emit_generic(&desc->h, dst, desc->s); *desc = (struct poly1305_desc_ctx){}; } EXPORT_SYMBOL_GPL(poly1305_final_generic); -- 2.51.0 From 773426f4771bdd82ac5c834bf4c1775315c73a46 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 28 Apr 2025 12:56:09 +0800 Subject: [PATCH 02/16] crypto: arm/poly1305 - Add block-only interface Add block-only interface. Also remove the unnecessary SIMD fallback path. Signed-off-by: Herbert Xu --- arch/arm/lib/crypto/poly1305-armv4.pl | 4 +- arch/arm/lib/crypto/poly1305-glue.c | 80 +++++++++++++++------------ 2 files changed, 48 insertions(+), 36 deletions(-) diff --git a/arch/arm/lib/crypto/poly1305-armv4.pl b/arch/arm/lib/crypto/poly1305-armv4.pl index 6d79498d3115..d57c6e2fc84a 100644 --- a/arch/arm/lib/crypto/poly1305-armv4.pl +++ b/arch/arm/lib/crypto/poly1305-armv4.pl @@ -43,9 +43,9 @@ $code.=<<___; #else # define __ARM_ARCH__ __LINUX_ARM_ARCH__ # define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__ -# define poly1305_init poly1305_init_arm +# define poly1305_init poly1305_block_init_arch # define poly1305_blocks poly1305_blocks_arm -# define poly1305_emit poly1305_emit_arm +# define poly1305_emit poly1305_emit_arch .globl poly1305_blocks_neon #endif diff --git a/arch/arm/lib/crypto/poly1305-glue.c b/arch/arm/lib/crypto/poly1305-glue.c index 42d0ebde1ae1..3ee16048ec7c 100644 --- a/arch/arm/lib/crypto/poly1305-glue.c +++ b/arch/arm/lib/crypto/poly1305-glue.c @@ -7,20 +7,29 @@ #include #include -#include -#include -#include +#include #include #include +#include #include +#include #include -void poly1305_init_arm(void *state, const u8 *key); -void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit); -void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit); -void poly1305_emit_arm(void *state, u8 *digest, const u32 *nonce); - -void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit) +asmlinkage void poly1305_block_init_arch( + struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]); +EXPORT_SYMBOL_GPL(poly1305_block_init_arch); +asmlinkage void poly1305_blocks_arm(struct poly1305_block_state *state, + const u8 *src, u32 len, u32 hibit); +asmlinkage void poly1305_blocks_neon(struct poly1305_block_state *state, + const u8 *src, u32 len, u32 hibit); +asmlinkage void poly1305_emit_arch(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); +EXPORT_SYMBOL_GPL(poly1305_emit_arch); + +void __weak poly1305_blocks_neon(struct poly1305_block_state *state, + const u8 *src, u32 len, u32 hibit) { } @@ -28,21 +37,39 @@ static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) { - poly1305_init_arm(&dctx->h, key); dctx->s[0] = get_unaligned_le32(key + 16); dctx->s[1] = get_unaligned_le32(key + 20); dctx->s[2] = get_unaligned_le32(key + 24); dctx->s[3] = get_unaligned_le32(key + 28); dctx->buflen = 0; + poly1305_block_init_arch(&dctx->state, key); } EXPORT_SYMBOL(poly1305_init_arch); +void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, + unsigned int len, u32 padbit) +{ + len = round_down(len, POLY1305_BLOCK_SIZE); + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && + static_branch_likely(&have_neon)) { + do { + unsigned int todo = min_t(unsigned int, len, SZ_4K); + + kernel_neon_begin(); + poly1305_blocks_neon(state, src, todo, padbit); + kernel_neon_end(); + + len -= todo; + src += todo; + } while (len); + } else + poly1305_blocks_arm(state, src, len, padbit); +} +EXPORT_SYMBOL_GPL(poly1305_blocks_arch); + void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, unsigned int nbytes) { - bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && - crypto_simd_usable(); - if (unlikely(dctx->buflen)) { u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); @@ -52,30 +79,15 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, dctx->buflen += bytes; if (dctx->buflen == POLY1305_BLOCK_SIZE) { - poly1305_blocks_arm(&dctx->h, dctx->buf, - POLY1305_BLOCK_SIZE, 1); + poly1305_blocks_arch(&dctx->state, dctx->buf, + POLY1305_BLOCK_SIZE, 1); dctx->buflen = 0; } } if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { - unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); - - if (static_branch_likely(&have_neon) && do_neon) { - do { - unsigned int todo = min_t(unsigned int, len, SZ_4K); - - kernel_neon_begin(); - poly1305_blocks_neon(&dctx->h, src, todo, 1); - kernel_neon_end(); - - len -= todo; - src += todo; - } while (len); - } else { - poly1305_blocks_arm(&dctx->h, src, len, 1); - src += len; - } + poly1305_blocks_arch(&dctx->state, src, nbytes, 1); + src += round_down(nbytes, POLY1305_BLOCK_SIZE); nbytes %= POLY1305_BLOCK_SIZE; } @@ -92,10 +104,10 @@ void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) dctx->buf[dctx->buflen++] = 1; memset(dctx->buf + dctx->buflen, 0, POLY1305_BLOCK_SIZE - dctx->buflen); - poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); + poly1305_blocks_arch(&dctx->state, dctx->buf, POLY1305_BLOCK_SIZE, 0); } - poly1305_emit_arm(&dctx->h, dst, dctx->s); + poly1305_emit_arch(&dctx->h, dst, dctx->s); *dctx = (struct poly1305_desc_ctx){}; } EXPORT_SYMBOL(poly1305_final_arch); -- 2.51.0 From a59e5468a921937cb7317892779c67046ad9f5cc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 28 Apr 2025 12:56:11 +0800 Subject: [PATCH 03/16] crypto: arm64/poly1305 - Add block-only interface Add block-only interface. Also remove the unnecessary SIMD fallback path. Signed-off-by: Herbert Xu --- arch/arm64/lib/crypto/Makefile | 3 +- arch/arm64/lib/crypto/poly1305-glue.c | 71 ++++++++++++++++----------- 2 files changed, 45 insertions(+), 29 deletions(-) diff --git a/arch/arm64/lib/crypto/Makefile b/arch/arm64/lib/crypto/Makefile index ac624c3effda..6207088397a7 100644 --- a/arch/arm64/lib/crypto/Makefile +++ b/arch/arm64/lib/crypto/Makefile @@ -5,7 +5,8 @@ chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o obj-$(CONFIG_CRYPTO_POLY1305_NEON) += poly1305-neon.o poly1305-neon-y := poly1305-core.o poly1305-glue.o -AFLAGS_poly1305-core.o += -Dpoly1305_init=poly1305_init_arm64 +AFLAGS_poly1305-core.o += -Dpoly1305_init=poly1305_block_init_arch +AFLAGS_poly1305-core.o += -Dpoly1305_emit=poly1305_emit_arch quiet_cmd_perlasm = PERLASM $@ cmd_perlasm = $(PERL) $(<) void $(@) diff --git a/arch/arm64/lib/crypto/poly1305-glue.c b/arch/arm64/lib/crypto/poly1305-glue.c index 906970dd5373..d66a820e32d5 100644 --- a/arch/arm64/lib/crypto/poly1305-glue.c +++ b/arch/arm64/lib/crypto/poly1305-glue.c @@ -7,32 +7,60 @@ #include #include -#include -#include -#include +#include #include #include +#include #include +#include #include -asmlinkage void poly1305_init_arm64(void *state, const u8 *key); -asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit); -asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit); -asmlinkage void poly1305_emit(void *state, u8 *digest, const u32 *nonce); +asmlinkage void poly1305_block_init_arch( + struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]); +EXPORT_SYMBOL_GPL(poly1305_block_init_arch); +asmlinkage void poly1305_blocks(struct poly1305_block_state *state, + const u8 *src, u32 len, u32 hibit); +asmlinkage void poly1305_blocks_neon(struct poly1305_block_state *state, + const u8 *src, u32 len, u32 hibit); +asmlinkage void poly1305_emit_arch(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); +EXPORT_SYMBOL_GPL(poly1305_emit_arch); static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) { - poly1305_init_arm64(&dctx->h, key); dctx->s[0] = get_unaligned_le32(key + 16); dctx->s[1] = get_unaligned_le32(key + 20); dctx->s[2] = get_unaligned_le32(key + 24); dctx->s[3] = get_unaligned_le32(key + 28); dctx->buflen = 0; + poly1305_block_init_arch(&dctx->state, key); } EXPORT_SYMBOL(poly1305_init_arch); +void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, + unsigned int len, u32 padbit) +{ + len = round_down(len, POLY1305_BLOCK_SIZE); + if (static_branch_likely(&have_neon)) { + do { + unsigned int todo = min_t(unsigned int, len, SZ_4K); + + kernel_neon_begin(); + poly1305_blocks_neon(state, src, todo, 1); + kernel_neon_end(); + + len -= todo; + src += todo; + } while (len); + } else + poly1305_blocks(state, src, len, 1); +} +EXPORT_SYMBOL_GPL(poly1305_blocks_arch); + void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, unsigned int nbytes) { @@ -45,29 +73,15 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, dctx->buflen += bytes; if (dctx->buflen == POLY1305_BLOCK_SIZE) { - poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1); + poly1305_blocks_arch(&dctx->state, dctx->buf, + POLY1305_BLOCK_SIZE, 1); dctx->buflen = 0; } } if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { - unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); - - if (static_branch_likely(&have_neon) && crypto_simd_usable()) { - do { - unsigned int todo = min_t(unsigned int, len, SZ_4K); - - kernel_neon_begin(); - poly1305_blocks_neon(&dctx->h, src, todo, 1); - kernel_neon_end(); - - len -= todo; - src += todo; - } while (len); - } else { - poly1305_blocks(&dctx->h, src, len, 1); - src += len; - } + poly1305_blocks_arch(&dctx->state, src, nbytes, 1); + src += round_down(nbytes, POLY1305_BLOCK_SIZE); nbytes %= POLY1305_BLOCK_SIZE; } @@ -84,10 +98,11 @@ void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) dctx->buf[dctx->buflen++] = 1; memset(dctx->buf + dctx->buflen, 0, POLY1305_BLOCK_SIZE - dctx->buflen); - poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); + poly1305_blocks_arch(&dctx->state, dctx->buf, + POLY1305_BLOCK_SIZE, 0); } - poly1305_emit(&dctx->h, dst, dctx->s); + poly1305_emit_arch(&dctx->h, dst, dctx->s); memzero_explicit(dctx, sizeof(*dctx)); } EXPORT_SYMBOL(poly1305_final_arch); -- 2.51.0 From ffe5ca295d757d912b226239df17396c3cd8dbca Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 28 Apr 2025 12:56:14 +0800 Subject: [PATCH 04/16] crypto: mips/poly1305 - Add block-only interface Add block-only interface. Signed-off-by: Herbert Xu --- arch/mips/lib/crypto/poly1305-glue.c | 29 ++++++++++++++++++--------- arch/mips/lib/crypto/poly1305-mips.pl | 12 +++++------ 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/arch/mips/lib/crypto/poly1305-glue.c b/arch/mips/lib/crypto/poly1305-glue.c index 576e7a58e0b1..2fea4cacfe27 100644 --- a/arch/mips/lib/crypto/poly1305-glue.c +++ b/arch/mips/lib/crypto/poly1305-glue.c @@ -5,23 +5,33 @@ * Copyright (C) 2019 Linaro Ltd. */ -#include +#include #include +#include #include +#include #include -asmlinkage void poly1305_init_mips(void *state, const u8 *key); -asmlinkage void poly1305_blocks_mips(void *state, const u8 *src, u32 len, u32 hibit); -asmlinkage void poly1305_emit_mips(void *state, u8 *digest, const u32 *nonce); +asmlinkage void poly1305_block_init_arch( + struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]); +EXPORT_SYMBOL_GPL(poly1305_block_init_arch); +asmlinkage void poly1305_blocks_arch(struct poly1305_block_state *state, + const u8 *src, u32 len, u32 hibit); +EXPORT_SYMBOL_GPL(poly1305_blocks_arch); +asmlinkage void poly1305_emit_arch(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); +EXPORT_SYMBOL_GPL(poly1305_emit_arch); void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) { - poly1305_init_mips(&dctx->h, key); dctx->s[0] = get_unaligned_le32(key + 16); dctx->s[1] = get_unaligned_le32(key + 20); dctx->s[2] = get_unaligned_le32(key + 24); dctx->s[3] = get_unaligned_le32(key + 28); dctx->buflen = 0; + poly1305_block_init_arch(&dctx->state, key); } EXPORT_SYMBOL(poly1305_init_arch); @@ -37,7 +47,7 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, dctx->buflen += bytes; if (dctx->buflen == POLY1305_BLOCK_SIZE) { - poly1305_blocks_mips(&dctx->h, dctx->buf, + poly1305_blocks_arch(&dctx->state, dctx->buf, POLY1305_BLOCK_SIZE, 1); dctx->buflen = 0; } @@ -46,7 +56,7 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); - poly1305_blocks_mips(&dctx->h, src, len, 1); + poly1305_blocks_arch(&dctx->state, src, len, 1); src += len; nbytes %= POLY1305_BLOCK_SIZE; } @@ -64,10 +74,11 @@ void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) dctx->buf[dctx->buflen++] = 1; memset(dctx->buf + dctx->buflen, 0, POLY1305_BLOCK_SIZE - dctx->buflen); - poly1305_blocks_mips(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); + poly1305_blocks_arch(&dctx->state, dctx->buf, + POLY1305_BLOCK_SIZE, 0); } - poly1305_emit_mips(&dctx->h, dst, dctx->s); + poly1305_emit_arch(&dctx->h, dst, dctx->s); *dctx = (struct poly1305_desc_ctx){}; } EXPORT_SYMBOL(poly1305_final_arch); diff --git a/arch/mips/lib/crypto/poly1305-mips.pl b/arch/mips/lib/crypto/poly1305-mips.pl index b05bab884ed2..399f10c3e385 100644 --- a/arch/mips/lib/crypto/poly1305-mips.pl +++ b/arch/mips/lib/crypto/poly1305-mips.pl @@ -93,9 +93,9 @@ $code.=<<___; #endif #ifdef __KERNEL__ -# define poly1305_init poly1305_init_mips -# define poly1305_blocks poly1305_blocks_mips -# define poly1305_emit poly1305_emit_mips +# define poly1305_init poly1305_block_init_arch +# define poly1305_blocks poly1305_blocks_arch +# define poly1305_emit poly1305_emit_arch #endif #if defined(__MIPSEB__) && !defined(MIPSEB) @@ -565,9 +565,9 @@ $code.=<<___; #endif #ifdef __KERNEL__ -# define poly1305_init poly1305_init_mips -# define poly1305_blocks poly1305_blocks_mips -# define poly1305_emit poly1305_emit_mips +# define poly1305_init poly1305_block_init_arch +# define poly1305_blocks poly1305_blocks_arch +# define poly1305_emit poly1305_emit_arch #endif #if defined(__MIPSEB__) && !defined(MIPSEB) -- 2.51.0 From 14d31979145dbafaeb28a5bc4c90c4db918bb772 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 28 Apr 2025 12:56:16 +0800 Subject: [PATCH 05/16] crypto: powerpc/poly1305 - Add block-only interface Add block-only interface. Also remove the unnecessary SIMD fallback path. Signed-off-by: Herbert Xu --- arch/powerpc/lib/crypto/poly1305-p10-glue.c | 84 ++++++++++++--------- 1 file changed, 49 insertions(+), 35 deletions(-) diff --git a/arch/powerpc/lib/crypto/poly1305-p10-glue.c b/arch/powerpc/lib/crypto/poly1305-p10-glue.c index 00617f4c58e6..708435beaba6 100644 --- a/arch/powerpc/lib/crypto/poly1305-p10-glue.c +++ b/arch/powerpc/lib/crypto/poly1305-p10-glue.c @@ -4,19 +4,20 @@ * * Copyright 2023- IBM Corp. All rights reserved. */ +#include +#include +#include +#include #include #include -#include -#include -#include -#include +#include #include -#include -#include -asmlinkage void poly1305_p10le_4blocks(void *h, const u8 *m, u32 mlen); -asmlinkage void poly1305_64s(void *h, const u8 *m, u32 mlen, int highbit); -asmlinkage void poly1305_emit_64(void *h, void *s, u8 *dst); +asmlinkage void poly1305_p10le_4blocks(struct poly1305_block_state *state, const u8 *m, u32 mlen); +asmlinkage void poly1305_64s(struct poly1305_block_state *state, const u8 *m, u32 mlen, int highbit); +asmlinkage void poly1305_emit_arch(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_p10); @@ -32,22 +33,49 @@ static void vsx_end(void) preempt_enable(); } -void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) +void poly1305_block_init_arch(struct poly1305_block_state *dctx, + const u8 raw_key[POLY1305_BLOCK_SIZE]) { if (!static_key_enabled(&have_p10)) - return poly1305_init_generic(dctx, key); + return poly1305_block_init_generic(dctx, raw_key); dctx->h = (struct poly1305_state){}; - dctx->core_r.key.r64[0] = get_unaligned_le64(key + 0); - dctx->core_r.key.r64[1] = get_unaligned_le64(key + 8); + dctx->core_r.key.r64[0] = get_unaligned_le64(raw_key + 0); + dctx->core_r.key.r64[1] = get_unaligned_le64(raw_key + 8); +} +EXPORT_SYMBOL_GPL(poly1305_block_init_arch); + +void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) +{ dctx->s[0] = get_unaligned_le32(key + 16); dctx->s[1] = get_unaligned_le32(key + 20); dctx->s[2] = get_unaligned_le32(key + 24); dctx->s[3] = get_unaligned_le32(key + 28); dctx->buflen = 0; + poly1305_block_init_arch(&dctx->state, key); } EXPORT_SYMBOL(poly1305_init_arch); +void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, + unsigned int len, u32 padbit) +{ + if (!static_key_enabled(&have_p10)) + return poly1305_blocks_generic(state, src, len, padbit); + vsx_begin(); + if (len >= POLY1305_BLOCK_SIZE * 4) { + poly1305_p10le_4blocks(state, src, len); + src += len - (len % (POLY1305_BLOCK_SIZE * 4)); + len %= POLY1305_BLOCK_SIZE * 4; + } + while (len >= POLY1305_BLOCK_SIZE) { + poly1305_64s(state, src, POLY1305_BLOCK_SIZE, padbit); + len -= POLY1305_BLOCK_SIZE; + src += POLY1305_BLOCK_SIZE; + } + vsx_end(); +} +EXPORT_SYMBOL_GPL(poly1305_blocks_arch); + void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, unsigned int srclen) { @@ -64,28 +92,15 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, dctx->buflen += bytes; if (dctx->buflen < POLY1305_BLOCK_SIZE) return; - vsx_begin(); - poly1305_64s(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1); - vsx_end(); + poly1305_blocks_arch(&dctx->state, dctx->buf, + POLY1305_BLOCK_SIZE, 1); dctx->buflen = 0; } if (likely(srclen >= POLY1305_BLOCK_SIZE)) { - bytes = round_down(srclen, POLY1305_BLOCK_SIZE); - if (crypto_simd_usable() && (srclen >= POLY1305_BLOCK_SIZE*4)) { - vsx_begin(); - poly1305_p10le_4blocks(&dctx->h, src, srclen); - vsx_end(); - src += srclen - (srclen % (POLY1305_BLOCK_SIZE * 4)); - srclen %= POLY1305_BLOCK_SIZE * 4; - } - while (srclen >= POLY1305_BLOCK_SIZE) { - vsx_begin(); - poly1305_64s(&dctx->h, src, POLY1305_BLOCK_SIZE, 1); - vsx_end(); - srclen -= POLY1305_BLOCK_SIZE; - src += POLY1305_BLOCK_SIZE; - } + poly1305_blocks_arch(&dctx->state, src, srclen, 1); + src += srclen - (srclen % POLY1305_BLOCK_SIZE); + srclen %= POLY1305_BLOCK_SIZE; } if (unlikely(srclen)) { @@ -104,12 +119,11 @@ void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) dctx->buf[dctx->buflen++] = 1; memset(dctx->buf + dctx->buflen, 0, POLY1305_BLOCK_SIZE - dctx->buflen); - vsx_begin(); - poly1305_64s(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); - vsx_end(); + poly1305_blocks_arch(&dctx->state, dctx->buf, + POLY1305_BLOCK_SIZE, 0); } - poly1305_emit_64(&dctx->h, &dctx->s, dst); + poly1305_emit_arch(&dctx->h, dst, dctx->s); } EXPORT_SYMBOL(poly1305_final_arch); -- 2.51.0 From 318c53ae02f2abf1542062543741068278780d09 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 28 Apr 2025 12:56:18 +0800 Subject: [PATCH 06/16] crypto: x86/poly1305 - Add block-only interface Add block-only interface. Also remove the unnecessary SIMD fallback path. Signed-off-by: Herbert Xu --- .../lib/crypto/poly1305-x86_64-cryptogams.pl | 33 +++-- arch/x86/lib/crypto/poly1305_glue.c | 121 +++++++----------- 2 files changed, 69 insertions(+), 85 deletions(-) diff --git a/arch/x86/lib/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/lib/crypto/poly1305-x86_64-cryptogams.pl index 409ec6955733..501827254fed 100644 --- a/arch/x86/lib/crypto/poly1305-x86_64-cryptogams.pl +++ b/arch/x86/lib/crypto/poly1305-x86_64-cryptogams.pl @@ -118,6 +118,19 @@ sub declare_function() { } } +sub declare_typed_function() { + my ($name, $align, $nargs) = @_; + if($kernel) { + $code .= "SYM_TYPED_FUNC_START($name)\n"; + $code .= ".L$name:\n"; + } else { + $code .= ".globl $name\n"; + $code .= ".type $name,\@function,$nargs\n"; + $code .= ".align $align\n"; + $code .= "$name:\n"; + } +} + sub end_function() { my ($name) = @_; if($kernel) { @@ -128,7 +141,7 @@ sub end_function() { } $code.=<<___ if $kernel; -#include +#include ___ if ($avx) { @@ -236,14 +249,14 @@ ___ $code.=<<___ if (!$kernel); .extern OPENSSL_ia32cap_P -.globl poly1305_init_x86_64 -.hidden poly1305_init_x86_64 +.globl poly1305_block_init_arch +.hidden poly1305_block_init_arch .globl poly1305_blocks_x86_64 .hidden poly1305_blocks_x86_64 .globl poly1305_emit_x86_64 .hidden poly1305_emit_x86_64 ___ -&declare_function("poly1305_init_x86_64", 32, 3); +&declare_typed_function("poly1305_block_init_arch", 32, 3); $code.=<<___; xor %eax,%eax mov %rax,0($ctx) # initialize hash value @@ -298,7 +311,7 @@ $code.=<<___; .Lno_key: RET ___ -&end_function("poly1305_init_x86_64"); +&end_function("poly1305_block_init_arch"); &declare_function("poly1305_blocks_x86_64", 32, 4); $code.=<<___; @@ -4105,9 +4118,9 @@ avx_handler: .section .pdata .align 4 - .rva .LSEH_begin_poly1305_init_x86_64 - .rva .LSEH_end_poly1305_init_x86_64 - .rva .LSEH_info_poly1305_init_x86_64 + .rva .LSEH_begin_poly1305_block_init_arch + .rva .LSEH_end_poly1305_block_init_arch + .rva .LSEH_info_poly1305_block_init_arch .rva .LSEH_begin_poly1305_blocks_x86_64 .rva .LSEH_end_poly1305_blocks_x86_64 @@ -4155,10 +4168,10 @@ ___ $code.=<<___; .section .xdata .align 8 -.LSEH_info_poly1305_init_x86_64: +.LSEH_info_poly1305_block_init_arch: .byte 9,0,0,0 .rva se_handler - .rva .LSEH_begin_poly1305_init_x86_64,.LSEH_begin_poly1305_init_x86_64 + .rva .LSEH_begin_poly1305_block_init_arch,.LSEH_begin_poly1305_block_init_arch .LSEH_info_poly1305_blocks_x86_64: .byte 9,0,0,0 diff --git a/arch/x86/lib/crypto/poly1305_glue.c b/arch/x86/lib/crypto/poly1305_glue.c index cff35ca5822a..d98764ec3b47 100644 --- a/arch/x86/lib/crypto/poly1305_glue.c +++ b/arch/x86/lib/crypto/poly1305_glue.c @@ -3,34 +3,15 @@ * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. */ -#include -#include +#include +#include +#include #include #include #include #include +#include #include -#include -#include - -asmlinkage void poly1305_init_x86_64(void *ctx, - const u8 key[POLY1305_BLOCK_SIZE]); -asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp, - const size_t len, const u32 padbit); -asmlinkage void poly1305_emit_x86_64(void *ctx, u8 mac[POLY1305_DIGEST_SIZE], - const u32 nonce[4]); -asmlinkage void poly1305_emit_avx(void *ctx, u8 mac[POLY1305_DIGEST_SIZE], - const u32 nonce[4]); -asmlinkage void poly1305_blocks_avx(void *ctx, const u8 *inp, const size_t len, - const u32 padbit); -asmlinkage void poly1305_blocks_avx2(void *ctx, const u8 *inp, const size_t len, - const u32 padbit); -asmlinkage void poly1305_blocks_avx512(void *ctx, const u8 *inp, - const size_t len, const u32 padbit); - -static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx); -static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2); -static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx512); struct poly1305_arch_internal { union { @@ -45,64 +26,50 @@ struct poly1305_arch_internal { struct { u32 r2, r1, r4, r3; } rn[9]; }; -/* The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit - * the unfortunate situation of using AVX and then having to go back to scalar - * -- because the user is silly and has called the update function from two - * separate contexts -- then we need to convert back to the original base before - * proceeding. It is possible to reason that the initial reduction below is - * sufficient given the implementation invariants. However, for an avoidance of - * doubt and because this is not performance critical, we do the full reduction - * anyway. Z3 proof of below function: https://xn--4db.cc/ltPtHCKN/py - */ -static void convert_to_base2_64(void *ctx) -{ - struct poly1305_arch_internal *state = ctx; - u32 cy; - - if (!state->is_base2_26) - return; - - cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy; - cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy; - cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy; - cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy; - state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0]; - state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12); - state->hs[2] = state->h[4] >> 24; -#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1)) - cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL); - state->hs[2] &= 3; - state->hs[0] += cy; - state->hs[1] += (cy = ULT(state->hs[0], cy)); - state->hs[2] += ULT(state->hs[1], cy); -#undef ULT - state->is_base2_26 = 0; -} +asmlinkage void poly1305_block_init_arch( + struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]); +EXPORT_SYMBOL_GPL(poly1305_block_init_arch); +asmlinkage void poly1305_blocks_x86_64(struct poly1305_arch_internal *ctx, + const u8 *inp, + const size_t len, const u32 padbit); +asmlinkage void poly1305_emit_x86_64(const struct poly1305_state *ctx, + u8 mac[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); +asmlinkage void poly1305_emit_avx(const struct poly1305_state *ctx, + u8 mac[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); +asmlinkage void poly1305_blocks_avx(struct poly1305_arch_internal *ctx, + const u8 *inp, const size_t len, + const u32 padbit); +asmlinkage void poly1305_blocks_avx2(struct poly1305_arch_internal *ctx, + const u8 *inp, const size_t len, + const u32 padbit); +asmlinkage void poly1305_blocks_avx512(struct poly1305_arch_internal *ctx, + const u8 *inp, + const size_t len, const u32 padbit); -static void poly1305_simd_init(void *ctx, const u8 key[POLY1305_BLOCK_SIZE]) -{ - poly1305_init_x86_64(ctx, key); -} +static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx512); -static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len, - const u32 padbit) +void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *inp, + unsigned int len, u32 padbit) { - struct poly1305_arch_internal *state = ctx; + struct poly1305_arch_internal *ctx = + container_of(&state->h.h, struct poly1305_arch_internal, h); /* SIMD disables preemption, so relax after processing each page. */ BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE || SZ_4K % POLY1305_BLOCK_SIZE); - if (!static_branch_likely(&poly1305_use_avx) || - (len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) || - !crypto_simd_usable()) { - convert_to_base2_64(ctx); + if (!static_branch_likely(&poly1305_use_avx)) { poly1305_blocks_x86_64(ctx, inp, len, padbit); return; } do { - const size_t bytes = min_t(size_t, len, SZ_4K); + const unsigned int bytes = min(len, SZ_4K); kernel_fpu_begin(); if (static_branch_likely(&poly1305_use_avx512)) @@ -117,24 +84,26 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len, inp += bytes; } while (len); } +EXPORT_SYMBOL_GPL(poly1305_blocks_arch); -static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE], - const u32 nonce[4]) +void poly1305_emit_arch(const struct poly1305_state *ctx, + u8 mac[POLY1305_DIGEST_SIZE], const u32 nonce[4]) { if (!static_branch_likely(&poly1305_use_avx)) poly1305_emit_x86_64(ctx, mac, nonce); else poly1305_emit_avx(ctx, mac, nonce); } +EXPORT_SYMBOL_GPL(poly1305_emit_arch); void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) { - poly1305_simd_init(&dctx->h, key); dctx->s[0] = get_unaligned_le32(&key[16]); dctx->s[1] = get_unaligned_le32(&key[20]); dctx->s[2] = get_unaligned_le32(&key[24]); dctx->s[3] = get_unaligned_le32(&key[28]); dctx->buflen = 0; + poly1305_block_init_arch(&dctx->state, key); } EXPORT_SYMBOL(poly1305_init_arch); @@ -151,14 +120,15 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, dctx->buflen += bytes; if (dctx->buflen == POLY1305_BLOCK_SIZE) { - poly1305_simd_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1); + poly1305_blocks_arch(&dctx->state, dctx->buf, + POLY1305_BLOCK_SIZE, 1); dctx->buflen = 0; } } if (likely(srclen >= POLY1305_BLOCK_SIZE)) { bytes = round_down(srclen, POLY1305_BLOCK_SIZE); - poly1305_simd_blocks(&dctx->h, src, bytes, 1); + poly1305_blocks_arch(&dctx->state, src, bytes, 1); src += bytes; srclen -= bytes; } @@ -176,10 +146,11 @@ void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) dctx->buf[dctx->buflen++] = 1; memset(dctx->buf + dctx->buflen, 0, POLY1305_BLOCK_SIZE - dctx->buflen); - poly1305_simd_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); + poly1305_blocks_arch(&dctx->state, dctx->buf, + POLY1305_BLOCK_SIZE, 0); } - poly1305_simd_emit(&dctx->h, dst, dctx->s); + poly1305_emit_arch(&dctx->h, dst, dctx->s); memzero_explicit(dctx, sizeof(*dctx)); } EXPORT_SYMBOL(poly1305_final_arch); -- 2.51.0 From a298765e28adaea199f722142c10dae7e24dedf8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 28 Apr 2025 12:56:21 +0800 Subject: [PATCH 07/16] crypto: chacha20poly1305 - Use lib/crypto poly1305 Since the poly1305 algorithm is fixed, there is no point in going through the Crypto API for it. Use the lib/crypto poly1305 interface instead. For compatiblity keep the poly1305 parameter in the algorithm name. Signed-off-by: Herbert Xu --- crypto/Kconfig | 2 +- crypto/chacha20poly1305.c | 319 ++++++++------------------------------ 2 files changed, 65 insertions(+), 256 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index 9878286d1d68..f87e2a26d2dd 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -784,8 +784,8 @@ config CRYPTO_AEGIS128_SIMD config CRYPTO_CHACHA20POLY1305 tristate "ChaCha20-Poly1305" select CRYPTO_CHACHA20 - select CRYPTO_POLY1305 select CRYPTO_AEAD + select CRYPTO_LIB_POLY1305 select CRYPTO_MANAGER help ChaCha20 stream cipher and Poly1305 authenticator combined diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c index d740849f1c19..b29f66ba1e2f 100644 --- a/crypto/chacha20poly1305.c +++ b/crypto/chacha20poly1305.c @@ -12,36 +12,23 @@ #include #include #include -#include #include +#include #include +#include struct chachapoly_instance_ctx { struct crypto_skcipher_spawn chacha; - struct crypto_ahash_spawn poly; unsigned int saltlen; }; struct chachapoly_ctx { struct crypto_skcipher *chacha; - struct crypto_ahash *poly; /* key bytes we use for the ChaCha20 IV */ unsigned int saltlen; u8 salt[] __counted_by(saltlen); }; -struct poly_req { - /* zero byte padding for AD/ciphertext, as needed */ - u8 pad[POLY1305_BLOCK_SIZE]; - /* tail data with AD/ciphertext lengths */ - struct { - __le64 assoclen; - __le64 cryptlen; - } tail; - struct scatterlist src[1]; - struct ahash_request req; /* must be last member */ -}; - struct chacha_req { u8 iv[CHACHA_IV_SIZE]; struct scatterlist src[1]; @@ -62,7 +49,6 @@ struct chachapoly_req_ctx { /* request flags, with MAY_SLEEP cleared if needed */ u32 flags; union { - struct poly_req poly; struct chacha_req chacha; } u; }; @@ -105,16 +91,6 @@ static int poly_verify_tag(struct aead_request *req) return 0; } -static int poly_copy_tag(struct aead_request *req) -{ - struct chachapoly_req_ctx *rctx = aead_request_ctx(req); - - scatterwalk_map_and_copy(rctx->tag, req->dst, - req->assoclen + rctx->cryptlen, - sizeof(rctx->tag), 1); - return 0; -} - static void chacha_decrypt_done(void *data, int err) { async_done_continue(data, err, poly_verify_tag); @@ -151,210 +127,76 @@ skip: return poly_verify_tag(req); } -static int poly_tail_continue(struct aead_request *req) -{ - struct chachapoly_req_ctx *rctx = aead_request_ctx(req); - - if (rctx->cryptlen == req->cryptlen) /* encrypting */ - return poly_copy_tag(req); - - return chacha_decrypt(req); -} - -static void poly_tail_done(void *data, int err) -{ - async_done_continue(data, err, poly_tail_continue); -} - -static int poly_tail(struct aead_request *req) -{ - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct chachapoly_ctx *ctx = crypto_aead_ctx(tfm); - struct chachapoly_req_ctx *rctx = aead_request_ctx(req); - struct poly_req *preq = &rctx->u.poly; - int err; - - preq->tail.assoclen = cpu_to_le64(rctx->assoclen); - preq->tail.cryptlen = cpu_to_le64(rctx->cryptlen); - sg_init_one(preq->src, &preq->tail, sizeof(preq->tail)); - - ahash_request_set_callback(&preq->req, rctx->flags, - poly_tail_done, req); - ahash_request_set_tfm(&preq->req, ctx->poly); - ahash_request_set_crypt(&preq->req, preq->src, - rctx->tag, sizeof(preq->tail)); - - err = crypto_ahash_finup(&preq->req); - if (err) - return err; - - return poly_tail_continue(req); -} - -static void poly_cipherpad_done(void *data, int err) -{ - async_done_continue(data, err, poly_tail); -} - -static int poly_cipherpad(struct aead_request *req) +static int poly_hash(struct aead_request *req) { - struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); struct chachapoly_req_ctx *rctx = aead_request_ctx(req); - struct poly_req *preq = &rctx->u.poly; + const void *zp = page_address(ZERO_PAGE(0)); + struct scatterlist *sg = req->src; + struct poly1305_desc_ctx desc; + struct scatter_walk walk; + struct { + union { + struct { + __le64 assoclen; + __le64 cryptlen; + }; + u8 u8[16]; + }; + } tail; unsigned int padlen; - int err; - - padlen = -rctx->cryptlen % POLY1305_BLOCK_SIZE; - memset(preq->pad, 0, sizeof(preq->pad)); - sg_init_one(preq->src, preq->pad, padlen); - - ahash_request_set_callback(&preq->req, rctx->flags, - poly_cipherpad_done, req); - ahash_request_set_tfm(&preq->req, ctx->poly); - ahash_request_set_crypt(&preq->req, preq->src, NULL, padlen); + unsigned int total; - err = crypto_ahash_update(&preq->req); - if (err) - return err; - - return poly_tail(req); -} - -static void poly_cipher_done(void *data, int err) -{ - async_done_continue(data, err, poly_cipherpad); -} - -static int poly_cipher(struct aead_request *req) -{ - struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); - struct chachapoly_req_ctx *rctx = aead_request_ctx(req); - struct poly_req *preq = &rctx->u.poly; - struct scatterlist *crypt = req->src; - int err; + if (sg != req->dst) + memcpy_sglist(req->dst, sg, req->assoclen); if (rctx->cryptlen == req->cryptlen) /* encrypting */ - crypt = req->dst; - - crypt = scatterwalk_ffwd(rctx->src, crypt, req->assoclen); - - ahash_request_set_callback(&preq->req, rctx->flags, - poly_cipher_done, req); - ahash_request_set_tfm(&preq->req, ctx->poly); - ahash_request_set_crypt(&preq->req, crypt, NULL, rctx->cryptlen); - - err = crypto_ahash_update(&preq->req); - if (err) - return err; + sg = req->dst; - return poly_cipherpad(req); -} + poly1305_init(&desc, rctx->key); + scatterwalk_start(&walk, sg); -static void poly_adpad_done(void *data, int err) -{ - async_done_continue(data, err, poly_cipher); -} + total = rctx->assoclen; + while (total) { + unsigned int n = scatterwalk_next(&walk, total); -static int poly_adpad(struct aead_request *req) -{ - struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); - struct chachapoly_req_ctx *rctx = aead_request_ctx(req); - struct poly_req *preq = &rctx->u.poly; - unsigned int padlen; - int err; + poly1305_update(&desc, walk.addr, n); + scatterwalk_done_src(&walk, n); + total -= n; + } padlen = -rctx->assoclen % POLY1305_BLOCK_SIZE; - memset(preq->pad, 0, sizeof(preq->pad)); - sg_init_one(preq->src, preq->pad, padlen); - - ahash_request_set_callback(&preq->req, rctx->flags, - poly_adpad_done, req); - ahash_request_set_tfm(&preq->req, ctx->poly); - ahash_request_set_crypt(&preq->req, preq->src, NULL, padlen); - - err = crypto_ahash_update(&preq->req); - if (err) - return err; - - return poly_cipher(req); -} - -static void poly_ad_done(void *data, int err) -{ - async_done_continue(data, err, poly_adpad); -} - -static int poly_ad(struct aead_request *req) -{ - struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); - struct chachapoly_req_ctx *rctx = aead_request_ctx(req); - struct poly_req *preq = &rctx->u.poly; - int err; - - ahash_request_set_callback(&preq->req, rctx->flags, - poly_ad_done, req); - ahash_request_set_tfm(&preq->req, ctx->poly); - ahash_request_set_crypt(&preq->req, req->src, NULL, rctx->assoclen); - - err = crypto_ahash_update(&preq->req); - if (err) - return err; - - return poly_adpad(req); -} - -static void poly_setkey_done(void *data, int err) -{ - async_done_continue(data, err, poly_ad); -} + poly1305_update(&desc, zp, padlen); -static int poly_setkey(struct aead_request *req) -{ - struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); - struct chachapoly_req_ctx *rctx = aead_request_ctx(req); - struct poly_req *preq = &rctx->u.poly; - int err; + scatterwalk_skip(&walk, req->assoclen - rctx->assoclen); - sg_init_one(preq->src, rctx->key, sizeof(rctx->key)); + total = rctx->cryptlen; + while (total) { + unsigned int n = scatterwalk_next(&walk, total); - ahash_request_set_callback(&preq->req, rctx->flags, - poly_setkey_done, req); - ahash_request_set_tfm(&preq->req, ctx->poly); - ahash_request_set_crypt(&preq->req, preq->src, NULL, sizeof(rctx->key)); - - err = crypto_ahash_update(&preq->req); - if (err) - return err; - - return poly_ad(req); -} - -static void poly_init_done(void *data, int err) -{ - async_done_continue(data, err, poly_setkey); -} + poly1305_update(&desc, walk.addr, n); + scatterwalk_done_src(&walk, n); + total -= n; + } -static int poly_init(struct aead_request *req) -{ - struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); - struct chachapoly_req_ctx *rctx = aead_request_ctx(req); - struct poly_req *preq = &rctx->u.poly; - int err; + padlen = -rctx->cryptlen % POLY1305_BLOCK_SIZE; + poly1305_update(&desc, zp, padlen); - ahash_request_set_callback(&preq->req, rctx->flags, - poly_init_done, req); - ahash_request_set_tfm(&preq->req, ctx->poly); + tail.assoclen = cpu_to_le64(rctx->assoclen); + tail.cryptlen = cpu_to_le64(rctx->cryptlen); + poly1305_update(&desc, tail.u8, sizeof(tail)); + memzero_explicit(&tail, sizeof(tail)); + poly1305_final(&desc, rctx->tag); - err = crypto_ahash_init(&preq->req); - if (err) - return err; + if (rctx->cryptlen != req->cryptlen) + return chacha_decrypt(req); - return poly_setkey(req); + memcpy_to_scatterwalk(&walk, rctx->tag, sizeof(rctx->tag)); + return 0; } static void poly_genkey_done(void *data, int err) { - async_done_continue(data, err, poly_init); + async_done_continue(data, err, poly_hash); } static int poly_genkey(struct aead_request *req) @@ -388,7 +230,7 @@ static int poly_genkey(struct aead_request *req) if (err) return err; - return poly_init(req); + return poly_hash(req); } static void chacha_encrypt_done(void *data, int err) @@ -437,14 +279,7 @@ static int chachapoly_encrypt(struct aead_request *req) /* encrypt call chain: * - chacha_encrypt/done() * - poly_genkey/done() - * - poly_init/done() - * - poly_setkey/done() - * - poly_ad/done() - * - poly_adpad/done() - * - poly_cipher/done() - * - poly_cipherpad/done() - * - poly_tail/done/continue() - * - poly_copy_tag() + * - poly_hash() */ return chacha_encrypt(req); } @@ -458,13 +293,7 @@ static int chachapoly_decrypt(struct aead_request *req) /* decrypt call chain: * - poly_genkey/done() - * - poly_init/done() - * - poly_setkey/done() - * - poly_ad/done() - * - poly_adpad/done() - * - poly_cipher/done() - * - poly_cipherpad/done() - * - poly_tail/done/continue() + * - poly_hash() * - chacha_decrypt/done() * - poly_verify_tag() */ @@ -503,21 +332,13 @@ static int chachapoly_init(struct crypto_aead *tfm) struct chachapoly_instance_ctx *ictx = aead_instance_ctx(inst); struct chachapoly_ctx *ctx = crypto_aead_ctx(tfm); struct crypto_skcipher *chacha; - struct crypto_ahash *poly; unsigned long align; - poly = crypto_spawn_ahash(&ictx->poly); - if (IS_ERR(poly)) - return PTR_ERR(poly); - chacha = crypto_spawn_skcipher(&ictx->chacha); - if (IS_ERR(chacha)) { - crypto_free_ahash(poly); + if (IS_ERR(chacha)) return PTR_ERR(chacha); - } ctx->chacha = chacha; - ctx->poly = poly; ctx->saltlen = ictx->saltlen; align = crypto_aead_alignmask(tfm); @@ -525,12 +346,9 @@ static int chachapoly_init(struct crypto_aead *tfm) crypto_aead_set_reqsize( tfm, align + offsetof(struct chachapoly_req_ctx, u) + - max(offsetof(struct chacha_req, req) + - sizeof(struct skcipher_request) + - crypto_skcipher_reqsize(chacha), - offsetof(struct poly_req, req) + - sizeof(struct ahash_request) + - crypto_ahash_reqsize(poly))); + offsetof(struct chacha_req, req) + + sizeof(struct skcipher_request) + + crypto_skcipher_reqsize(chacha)); return 0; } @@ -539,7 +357,6 @@ static void chachapoly_exit(struct crypto_aead *tfm) { struct chachapoly_ctx *ctx = crypto_aead_ctx(tfm); - crypto_free_ahash(ctx->poly); crypto_free_skcipher(ctx->chacha); } @@ -548,7 +365,6 @@ static void chachapoly_free(struct aead_instance *inst) struct chachapoly_instance_ctx *ctx = aead_instance_ctx(inst); crypto_drop_skcipher(&ctx->chacha); - crypto_drop_ahash(&ctx->poly); kfree(inst); } @@ -559,7 +375,6 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb, struct aead_instance *inst; struct chachapoly_instance_ctx *ctx; struct skcipher_alg_common *chacha; - struct hash_alg_common *poly; int err; if (ivsize > CHACHAPOLY_IV_SIZE) @@ -581,14 +396,9 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb, goto err_free_inst; chacha = crypto_spawn_skcipher_alg_common(&ctx->chacha); - err = crypto_grab_ahash(&ctx->poly, aead_crypto_instance(inst), - crypto_attr_alg_name(tb[2]), 0, mask); - if (err) - goto err_free_inst; - poly = crypto_spawn_ahash_alg(&ctx->poly); - err = -EINVAL; - if (poly->digestsize != POLY1305_DIGEST_SIZE) + if (strcmp(crypto_attr_alg_name(tb[2]), "poly1305") && + strcmp(crypto_attr_alg_name(tb[2]), "poly1305-generic")) goto err_free_inst; /* Need 16-byte IV size, including Initial Block Counter value */ if (chacha->ivsize != CHACHA_IV_SIZE) @@ -599,16 +409,15 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb, err = -ENAMETOOLONG; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, - "%s(%s,%s)", name, chacha->base.cra_name, - poly->base.cra_name) >= CRYPTO_MAX_ALG_NAME) + "%s(%s,poly1305)", name, + chacha->base.cra_name) >= CRYPTO_MAX_ALG_NAME) goto err_free_inst; if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, - "%s(%s,%s)", name, chacha->base.cra_driver_name, - poly->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) + "%s(%s,poly1305-generic)", name, + chacha->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) goto err_free_inst; - inst->alg.base.cra_priority = (chacha->base.cra_priority + - poly->base.cra_priority) / 2; + inst->alg.base.cra_priority = chacha->base.cra_priority; inst->alg.base.cra_blocksize = 1; inst->alg.base.cra_alignmask = chacha->base.cra_alignmask; inst->alg.base.cra_ctxsize = sizeof(struct chachapoly_ctx) + -- 2.51.0 From 34c418b742a64f4add67ad7975b3fa0e20d6cd92 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 28 Apr 2025 12:56:23 +0800 Subject: [PATCH 08/16] crypto: testmgr - Remove poly1305 As poly1305 no longer has any in-kernel users, remove its tests. Signed-off-by: Herbert Xu --- crypto/testmgr.c | 6 - crypto/testmgr.h | 288 ----------------------------------------------- 2 files changed, 294 deletions(-) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 82977ea25db3..f100be516f52 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -5406,12 +5406,6 @@ static const struct alg_test_desc alg_test_descs[] = { .alg = "pkcs1pad(rsa)", .test = alg_test_null, .fips_allowed = 1, - }, { - .alg = "poly1305", - .test = alg_test_hash, - .suite = { - .hash = __VECS(poly1305_tv_template) - } }, { .alg = "polyval", .test = alg_test_hash, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index afc10af59b0a..32d099ac9e73 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -8836,294 +8836,6 @@ static const struct hash_testvec hmac_sha3_512_tv_template[] = { }, }; -/* - * Poly1305 test vectors from RFC7539 A.3. - */ - -static const struct hash_testvec poly1305_tv_template[] = { - { /* Test Vector #1 */ - .plaintext = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .psize = 96, - .digest = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - }, { /* Test Vector #2 */ - .plaintext = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x36\xe5\xf6\xb5\xc5\xe0\x60\x70" - "\xf0\xef\xca\x96\x22\x7a\x86\x3e" - "\x41\x6e\x79\x20\x73\x75\x62\x6d" - "\x69\x73\x73\x69\x6f\x6e\x20\x74" - "\x6f\x20\x74\x68\x65\x20\x49\x45" - "\x54\x46\x20\x69\x6e\x74\x65\x6e" - "\x64\x65\x64\x20\x62\x79\x20\x74" - "\x68\x65\x20\x43\x6f\x6e\x74\x72" - "\x69\x62\x75\x74\x6f\x72\x20\x66" - "\x6f\x72\x20\x70\x75\x62\x6c\x69" - "\x63\x61\x74\x69\x6f\x6e\x20\x61" - "\x73\x20\x61\x6c\x6c\x20\x6f\x72" - "\x20\x70\x61\x72\x74\x20\x6f\x66" - "\x20\x61\x6e\x20\x49\x45\x54\x46" - "\x20\x49\x6e\x74\x65\x72\x6e\x65" - "\x74\x2d\x44\x72\x61\x66\x74\x20" - "\x6f\x72\x20\x52\x46\x43\x20\x61" - "\x6e\x64\x20\x61\x6e\x79\x20\x73" - "\x74\x61\x74\x65\x6d\x65\x6e\x74" - "\x20\x6d\x61\x64\x65\x20\x77\x69" - "\x74\x68\x69\x6e\x20\x74\x68\x65" - "\x20\x63\x6f\x6e\x74\x65\x78\x74" - "\x20\x6f\x66\x20\x61\x6e\x20\x49" - "\x45\x54\x46\x20\x61\x63\x74\x69" - "\x76\x69\x74\x79\x20\x69\x73\x20" - "\x63\x6f\x6e\x73\x69\x64\x65\x72" - "\x65\x64\x20\x61\x6e\x20\x22\x49" - "\x45\x54\x46\x20\x43\x6f\x6e\x74" - "\x72\x69\x62\x75\x74\x69\x6f\x6e" - "\x22\x2e\x20\x53\x75\x63\x68\x20" - "\x73\x74\x61\x74\x65\x6d\x65\x6e" - "\x74\x73\x20\x69\x6e\x63\x6c\x75" - "\x64\x65\x20\x6f\x72\x61\x6c\x20" - "\x73\x74\x61\x74\x65\x6d\x65\x6e" - "\x74\x73\x20\x69\x6e\x20\x49\x45" - "\x54\x46\x20\x73\x65\x73\x73\x69" - "\x6f\x6e\x73\x2c\x20\x61\x73\x20" - "\x77\x65\x6c\x6c\x20\x61\x73\x20" - "\x77\x72\x69\x74\x74\x65\x6e\x20" - "\x61\x6e\x64\x20\x65\x6c\x65\x63" - "\x74\x72\x6f\x6e\x69\x63\x20\x63" - "\x6f\x6d\x6d\x75\x6e\x69\x63\x61" - "\x74\x69\x6f\x6e\x73\x20\x6d\x61" - "\x64\x65\x20\x61\x74\x20\x61\x6e" - "\x79\x20\x74\x69\x6d\x65\x20\x6f" - "\x72\x20\x70\x6c\x61\x63\x65\x2c" - "\x20\x77\x68\x69\x63\x68\x20\x61" - "\x72\x65\x20\x61\x64\x64\x72\x65" - "\x73\x73\x65\x64\x20\x74\x6f", - .psize = 407, - .digest = "\x36\xe5\xf6\xb5\xc5\xe0\x60\x70" - "\xf0\xef\xca\x96\x22\x7a\x86\x3e", - }, { /* Test Vector #3 */ - .plaintext = "\x36\xe5\xf6\xb5\xc5\xe0\x60\x70" - "\xf0\xef\xca\x96\x22\x7a\x86\x3e" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x41\x6e\x79\x20\x73\x75\x62\x6d" - "\x69\x73\x73\x69\x6f\x6e\x20\x74" - "\x6f\x20\x74\x68\x65\x20\x49\x45" - "\x54\x46\x20\x69\x6e\x74\x65\x6e" - "\x64\x65\x64\x20\x62\x79\x20\x74" - "\x68\x65\x20\x43\x6f\x6e\x74\x72" - "\x69\x62\x75\x74\x6f\x72\x20\x66" - "\x6f\x72\x20\x70\x75\x62\x6c\x69" - "\x63\x61\x74\x69\x6f\x6e\x20\x61" - "\x73\x20\x61\x6c\x6c\x20\x6f\x72" - "\x20\x70\x61\x72\x74\x20\x6f\x66" - "\x20\x61\x6e\x20\x49\x45\x54\x46" - "\x20\x49\x6e\x74\x65\x72\x6e\x65" - "\x74\x2d\x44\x72\x61\x66\x74\x20" - "\x6f\x72\x20\x52\x46\x43\x20\x61" - "\x6e\x64\x20\x61\x6e\x79\x20\x73" - "\x74\x61\x74\x65\x6d\x65\x6e\x74" - "\x20\x6d\x61\x64\x65\x20\x77\x69" - "\x74\x68\x69\x6e\x20\x74\x68\x65" - "\x20\x63\x6f\x6e\x74\x65\x78\x74" - "\x20\x6f\x66\x20\x61\x6e\x20\x49" - "\x45\x54\x46\x20\x61\x63\x74\x69" - "\x76\x69\x74\x79\x20\x69\x73\x20" - "\x63\x6f\x6e\x73\x69\x64\x65\x72" - "\x65\x64\x20\x61\x6e\x20\x22\x49" - "\x45\x54\x46\x20\x43\x6f\x6e\x74" - "\x72\x69\x62\x75\x74\x69\x6f\x6e" - "\x22\x2e\x20\x53\x75\x63\x68\x20" - "\x73\x74\x61\x74\x65\x6d\x65\x6e" - "\x74\x73\x20\x69\x6e\x63\x6c\x75" - "\x64\x65\x20\x6f\x72\x61\x6c\x20" - "\x73\x74\x61\x74\x65\x6d\x65\x6e" - "\x74\x73\x20\x69\x6e\x20\x49\x45" - "\x54\x46\x20\x73\x65\x73\x73\x69" - "\x6f\x6e\x73\x2c\x20\x61\x73\x20" - "\x77\x65\x6c\x6c\x20\x61\x73\x20" - "\x77\x72\x69\x74\x74\x65\x6e\x20" - "\x61\x6e\x64\x20\x65\x6c\x65\x63" - "\x74\x72\x6f\x6e\x69\x63\x20\x63" - "\x6f\x6d\x6d\x75\x6e\x69\x63\x61" - "\x74\x69\x6f\x6e\x73\x20\x6d\x61" - "\x64\x65\x20\x61\x74\x20\x61\x6e" - "\x79\x20\x74\x69\x6d\x65\x20\x6f" - "\x72\x20\x70\x6c\x61\x63\x65\x2c" - "\x20\x77\x68\x69\x63\x68\x20\x61" - "\x72\x65\x20\x61\x64\x64\x72\x65" - "\x73\x73\x65\x64\x20\x74\x6f", - .psize = 407, - .digest = "\xf3\x47\x7e\x7c\xd9\x54\x17\xaf" - "\x89\xa6\xb8\x79\x4c\x31\x0c\xf0", - }, { /* Test Vector #4 */ - .plaintext = "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a" - "\xf3\x33\x88\x86\x04\xf6\xb5\xf0" - "\x47\x39\x17\xc1\x40\x2b\x80\x09" - "\x9d\xca\x5c\xbc\x20\x70\x75\xc0" - "\x27\x54\x77\x61\x73\x20\x62\x72" - "\x69\x6c\x6c\x69\x67\x2c\x20\x61" - "\x6e\x64\x20\x74\x68\x65\x20\x73" - "\x6c\x69\x74\x68\x79\x20\x74\x6f" - "\x76\x65\x73\x0a\x44\x69\x64\x20" - "\x67\x79\x72\x65\x20\x61\x6e\x64" - "\x20\x67\x69\x6d\x62\x6c\x65\x20" - "\x69\x6e\x20\x74\x68\x65\x20\x77" - "\x61\x62\x65\x3a\x0a\x41\x6c\x6c" - "\x20\x6d\x69\x6d\x73\x79\x20\x77" - "\x65\x72\x65\x20\x74\x68\x65\x20" - "\x62\x6f\x72\x6f\x67\x6f\x76\x65" - "\x73\x2c\x0a\x41\x6e\x64\x20\x74" - "\x68\x65\x20\x6d\x6f\x6d\x65\x20" - "\x72\x61\x74\x68\x73\x20\x6f\x75" - "\x74\x67\x72\x61\x62\x65\x2e", - .psize = 159, - .digest = "\x45\x41\x66\x9a\x7e\xaa\xee\x61" - "\xe7\x08\xdc\x7c\xbc\xc5\xeb\x62", - }, { /* Test Vector #5 */ - .plaintext = "\x02\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff", - .psize = 48, - .digest = "\x03\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - }, { /* Test Vector #6 */ - .plaintext = "\x02\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\x02\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .psize = 48, - .digest = "\x03\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - }, { /* Test Vector #7 */ - .plaintext = "\x01\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xf0\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\x11\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .psize = 80, - .digest = "\x05\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - }, { /* Test Vector #8 */ - .plaintext = "\x01\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xfb\xfe\xfe\xfe\xfe\xfe\xfe\xfe" - "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe" - "\x01\x01\x01\x01\x01\x01\x01\x01" - "\x01\x01\x01\x01\x01\x01\x01\x01", - .psize = 80, - .digest = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - }, { /* Test Vector #9 */ - .plaintext = "\x02\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\xfd\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff", - .psize = 48, - .digest = "\xfa\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff", - }, { /* Test Vector #10 */ - .plaintext = "\x01\x00\x00\x00\x00\x00\x00\x00" - "\x04\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\xe3\x35\x94\xd7\x50\x5e\x43\xb9" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x33\x94\xd7\x50\x5e\x43\x79\xcd" - "\x01\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x01\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .psize = 96, - .digest = "\x14\x00\x00\x00\x00\x00\x00\x00" - "\x55\x00\x00\x00\x00\x00\x00\x00", - }, { /* Test Vector #11 */ - .plaintext = "\x01\x00\x00\x00\x00\x00\x00\x00" - "\x04\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\xe3\x35\x94\xd7\x50\x5e\x43\xb9" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x33\x94\xd7\x50\x5e\x43\x79\xcd" - "\x01\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .psize = 80, - .digest = "\x13\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - }, { /* Regression test for overflow in AVX2 implementation */ - .plaintext = "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff", - .psize = 300, - .digest = "\xfb\x5e\x96\xd8\x61\xd5\xc7\xc8" - "\x78\xe5\x87\xcc\x2d\x5a\x22\xe1", - } -}; - /* NHPoly1305 test vectors from https://github.com/google/adiantum */ static const struct hash_testvec nhpoly1305_tv_template[] = { { -- 2.51.0 From ceef731b0e22df80a13d67773ae9afd55a971f9e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 28 Apr 2025 12:56:25 +0800 Subject: [PATCH 09/16] crypto: poly1305 - Remove algorithm As there are no in-kernel users of the Crypto API poly1305 left, remove it. Signed-off-by: Herbert Xu --- crypto/Kconfig | 12 ---- crypto/Makefile | 2 - crypto/poly1305.c | 152 ---------------------------------------------- 3 files changed, 166 deletions(-) delete mode 100644 crypto/poly1305.c diff --git a/crypto/Kconfig b/crypto/Kconfig index f87e2a26d2dd..3cb5563dc4ab 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -953,18 +953,6 @@ config CRYPTO_POLYVAL This is used in HCTR2. It is not a general-purpose cryptographic hash function. -config CRYPTO_POLY1305 - tristate "Poly1305" - select CRYPTO_HASH - select CRYPTO_LIB_POLY1305 - select CRYPTO_LIB_POLY1305_GENERIC - help - Poly1305 authenticator algorithm (RFC7539) - - Poly1305 is an authenticator algorithm designed by Daniel J. Bernstein. - It is used for the ChaCha20-Poly1305 AEAD, specified in RFC7539 for use - in IETF protocols. This is the portable C implementation of Poly1305. - config CRYPTO_RMD160 tristate "RIPEMD-160" select CRYPTO_HASH diff --git a/crypto/Makefile b/crypto/Makefile index 5d2f2a28d8a0..587bc74b6d74 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -149,8 +149,6 @@ obj-$(CONFIG_CRYPTO_SEED) += seed.o obj-$(CONFIG_CRYPTO_ARIA) += aria_generic.o obj-$(CONFIG_CRYPTO_CHACHA20) += chacha.o CFLAGS_chacha.o += -DARCH=$(ARCH) -obj-$(CONFIG_CRYPTO_POLY1305) += poly1305.o -CFLAGS_poly1305.o += -DARCH=$(ARCH) obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o obj-$(CONFIG_CRYPTO_CRC32C) += crc32c_generic.o diff --git a/crypto/poly1305.c b/crypto/poly1305.c deleted file mode 100644 index e0436bdc462b..000000000000 --- a/crypto/poly1305.c +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Crypto API wrapper for the Poly1305 library functions - * - * Copyright (C) 2015 Martin Willi - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include - -struct crypto_poly1305_desc_ctx { - struct poly1305_desc_ctx base; - u8 key[POLY1305_KEY_SIZE]; - unsigned int keysize; -}; - -static int crypto_poly1305_init(struct shash_desc *desc) -{ - struct crypto_poly1305_desc_ctx *dctx = shash_desc_ctx(desc); - - dctx->keysize = 0; - return 0; -} - -static int crypto_poly1305_update(struct shash_desc *desc, - const u8 *src, unsigned int srclen, bool arch) -{ - struct crypto_poly1305_desc_ctx *dctx = shash_desc_ctx(desc); - unsigned int bytes; - - /* - * The key is passed as the first 32 "data" bytes. The actual - * poly1305_init() can be called only once the full key is available. - */ - if (dctx->keysize < POLY1305_KEY_SIZE) { - bytes = min(srclen, POLY1305_KEY_SIZE - dctx->keysize); - memcpy(&dctx->key[dctx->keysize], src, bytes); - dctx->keysize += bytes; - if (dctx->keysize < POLY1305_KEY_SIZE) - return 0; - if (arch) - poly1305_init(&dctx->base, dctx->key); - else - poly1305_init_generic(&dctx->base, dctx->key); - src += bytes; - srclen -= bytes; - } - - if (arch) - poly1305_update(&dctx->base, src, srclen); - else - poly1305_update_generic(&dctx->base, src, srclen); - - return 0; -} - -static int crypto_poly1305_update_generic(struct shash_desc *desc, - const u8 *src, unsigned int srclen) -{ - return crypto_poly1305_update(desc, src, srclen, false); -} - -static int crypto_poly1305_update_arch(struct shash_desc *desc, - const u8 *src, unsigned int srclen) -{ - return crypto_poly1305_update(desc, src, srclen, true); -} - -static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst, bool arch) -{ - struct crypto_poly1305_desc_ctx *dctx = shash_desc_ctx(desc); - - if (unlikely(dctx->keysize != POLY1305_KEY_SIZE)) - return -ENOKEY; - - if (arch) - poly1305_final(&dctx->base, dst); - else - poly1305_final_generic(&dctx->base, dst); - memzero_explicit(&dctx->key, sizeof(dctx->key)); - return 0; -} - -static int crypto_poly1305_final_generic(struct shash_desc *desc, u8 *dst) -{ - return crypto_poly1305_final(desc, dst, false); -} - -static int crypto_poly1305_final_arch(struct shash_desc *desc, u8 *dst) -{ - return crypto_poly1305_final(desc, dst, true); -} - -static struct shash_alg poly1305_algs[] = { - { - .base.cra_name = "poly1305", - .base.cra_driver_name = "poly1305-generic", - .base.cra_priority = 100, - .base.cra_blocksize = POLY1305_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, - .digestsize = POLY1305_DIGEST_SIZE, - .init = crypto_poly1305_init, - .update = crypto_poly1305_update_generic, - .final = crypto_poly1305_final_generic, - .descsize = sizeof(struct crypto_poly1305_desc_ctx), - }, - { - .base.cra_name = "poly1305", - .base.cra_driver_name = "poly1305-" __stringify(ARCH), - .base.cra_priority = 300, - .base.cra_blocksize = POLY1305_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, - .digestsize = POLY1305_DIGEST_SIZE, - .init = crypto_poly1305_init, - .update = crypto_poly1305_update_arch, - .final = crypto_poly1305_final_arch, - .descsize = sizeof(struct crypto_poly1305_desc_ctx), - }, -}; - -static int num_algs; - -static int __init poly1305_mod_init(void) -{ - /* register the arch flavours only if they differ from generic */ - num_algs = poly1305_is_arch_optimized() ? 2 : 1; - - return crypto_register_shashes(poly1305_algs, num_algs); -} - -static void __exit poly1305_mod_exit(void) -{ - crypto_unregister_shashes(poly1305_algs, num_algs); -} - -subsys_initcall(poly1305_mod_init); -module_exit(poly1305_mod_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Martin Willi "); -MODULE_DESCRIPTION("Crypto API wrapper for the Poly1305 library functions"); -MODULE_ALIAS_CRYPTO("poly1305"); -MODULE_ALIAS_CRYPTO("poly1305-generic"); -MODULE_ALIAS_CRYPTO("poly1305-" __stringify(ARCH)); -- 2.51.0 From 10a6d72ea355b730aa9702da0fd36aef0898a80e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 28 Apr 2025 12:56:28 +0800 Subject: [PATCH 10/16] crypto: lib/poly1305 - Use block-only interface Now that every architecture provides a block function, use that to implement the lib/poly1305 and remove the old per-arch code. Signed-off-by: Herbert Xu --- arch/arm/lib/crypto/poly1305-glue.c | 57 ------------------- arch/arm64/lib/crypto/poly1305-glue.c | 58 ------------------- arch/mips/lib/crypto/poly1305-glue.c | 60 -------------------- arch/powerpc/lib/crypto/poly1305-p10-glue.c | 63 --------------------- arch/x86/lib/crypto/poly1305_glue.c | 60 -------------------- include/crypto/poly1305.h | 53 ++--------------- lib/crypto/poly1305.c | 39 ++++++++----- 7 files changed, 32 insertions(+), 358 deletions(-) diff --git a/arch/arm/lib/crypto/poly1305-glue.c b/arch/arm/lib/crypto/poly1305-glue.c index 3ee16048ec7c..91da42b26d9c 100644 --- a/arch/arm/lib/crypto/poly1305-glue.c +++ b/arch/arm/lib/crypto/poly1305-glue.c @@ -12,7 +12,6 @@ #include #include #include -#include #include asmlinkage void poly1305_block_init_arch( @@ -35,17 +34,6 @@ void __weak poly1305_blocks_neon(struct poly1305_block_state *state, static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); -void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) -{ - dctx->s[0] = get_unaligned_le32(key + 16); - dctx->s[1] = get_unaligned_le32(key + 20); - dctx->s[2] = get_unaligned_le32(key + 24); - dctx->s[3] = get_unaligned_le32(key + 28); - dctx->buflen = 0; - poly1305_block_init_arch(&dctx->state, key); -} -EXPORT_SYMBOL(poly1305_init_arch); - void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, unsigned int len, u32 padbit) { @@ -67,51 +55,6 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, } EXPORT_SYMBOL_GPL(poly1305_blocks_arch); -void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, - unsigned int nbytes) -{ - if (unlikely(dctx->buflen)) { - u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); - - memcpy(dctx->buf + dctx->buflen, src, bytes); - src += bytes; - nbytes -= bytes; - dctx->buflen += bytes; - - if (dctx->buflen == POLY1305_BLOCK_SIZE) { - poly1305_blocks_arch(&dctx->state, dctx->buf, - POLY1305_BLOCK_SIZE, 1); - dctx->buflen = 0; - } - } - - if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { - poly1305_blocks_arch(&dctx->state, src, nbytes, 1); - src += round_down(nbytes, POLY1305_BLOCK_SIZE); - nbytes %= POLY1305_BLOCK_SIZE; - } - - if (unlikely(nbytes)) { - dctx->buflen = nbytes; - memcpy(dctx->buf, src, nbytes); - } -} -EXPORT_SYMBOL(poly1305_update_arch); - -void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) -{ - if (unlikely(dctx->buflen)) { - dctx->buf[dctx->buflen++] = 1; - memset(dctx->buf + dctx->buflen, 0, - POLY1305_BLOCK_SIZE - dctx->buflen); - poly1305_blocks_arch(&dctx->state, dctx->buf, POLY1305_BLOCK_SIZE, 0); - } - - poly1305_emit_arch(&dctx->h, dst, dctx->s); - *dctx = (struct poly1305_desc_ctx){}; -} -EXPORT_SYMBOL(poly1305_final_arch); - bool poly1305_is_arch_optimized(void) { /* We always can use at least the ARM scalar implementation. */ diff --git a/arch/arm64/lib/crypto/poly1305-glue.c b/arch/arm64/lib/crypto/poly1305-glue.c index d66a820e32d5..681c26557336 100644 --- a/arch/arm64/lib/crypto/poly1305-glue.c +++ b/arch/arm64/lib/crypto/poly1305-glue.c @@ -12,7 +12,6 @@ #include #include #include -#include #include asmlinkage void poly1305_block_init_arch( @@ -30,17 +29,6 @@ EXPORT_SYMBOL_GPL(poly1305_emit_arch); static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); -void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) -{ - dctx->s[0] = get_unaligned_le32(key + 16); - dctx->s[1] = get_unaligned_le32(key + 20); - dctx->s[2] = get_unaligned_le32(key + 24); - dctx->s[3] = get_unaligned_le32(key + 28); - dctx->buflen = 0; - poly1305_block_init_arch(&dctx->state, key); -} -EXPORT_SYMBOL(poly1305_init_arch); - void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, unsigned int len, u32 padbit) { @@ -61,52 +49,6 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, } EXPORT_SYMBOL_GPL(poly1305_blocks_arch); -void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, - unsigned int nbytes) -{ - if (unlikely(dctx->buflen)) { - u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); - - memcpy(dctx->buf + dctx->buflen, src, bytes); - src += bytes; - nbytes -= bytes; - dctx->buflen += bytes; - - if (dctx->buflen == POLY1305_BLOCK_SIZE) { - poly1305_blocks_arch(&dctx->state, dctx->buf, - POLY1305_BLOCK_SIZE, 1); - dctx->buflen = 0; - } - } - - if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { - poly1305_blocks_arch(&dctx->state, src, nbytes, 1); - src += round_down(nbytes, POLY1305_BLOCK_SIZE); - nbytes %= POLY1305_BLOCK_SIZE; - } - - if (unlikely(nbytes)) { - dctx->buflen = nbytes; - memcpy(dctx->buf, src, nbytes); - } -} -EXPORT_SYMBOL(poly1305_update_arch); - -void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) -{ - if (unlikely(dctx->buflen)) { - dctx->buf[dctx->buflen++] = 1; - memset(dctx->buf + dctx->buflen, 0, - POLY1305_BLOCK_SIZE - dctx->buflen); - poly1305_blocks_arch(&dctx->state, dctx->buf, - POLY1305_BLOCK_SIZE, 0); - } - - poly1305_emit_arch(&dctx->h, dst, dctx->s); - memzero_explicit(dctx, sizeof(*dctx)); -} -EXPORT_SYMBOL(poly1305_final_arch); - bool poly1305_is_arch_optimized(void) { /* We always can use at least the ARM64 scalar implementation. */ diff --git a/arch/mips/lib/crypto/poly1305-glue.c b/arch/mips/lib/crypto/poly1305-glue.c index 2fea4cacfe27..764a38a65200 100644 --- a/arch/mips/lib/crypto/poly1305-glue.c +++ b/arch/mips/lib/crypto/poly1305-glue.c @@ -9,7 +9,6 @@ #include #include #include -#include #include asmlinkage void poly1305_block_init_arch( @@ -24,65 +23,6 @@ asmlinkage void poly1305_emit_arch(const struct poly1305_state *state, const u32 nonce[4]); EXPORT_SYMBOL_GPL(poly1305_emit_arch); -void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) -{ - dctx->s[0] = get_unaligned_le32(key + 16); - dctx->s[1] = get_unaligned_le32(key + 20); - dctx->s[2] = get_unaligned_le32(key + 24); - dctx->s[3] = get_unaligned_le32(key + 28); - dctx->buflen = 0; - poly1305_block_init_arch(&dctx->state, key); -} -EXPORT_SYMBOL(poly1305_init_arch); - -void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, - unsigned int nbytes) -{ - if (unlikely(dctx->buflen)) { - u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); - - memcpy(dctx->buf + dctx->buflen, src, bytes); - src += bytes; - nbytes -= bytes; - dctx->buflen += bytes; - - if (dctx->buflen == POLY1305_BLOCK_SIZE) { - poly1305_blocks_arch(&dctx->state, dctx->buf, - POLY1305_BLOCK_SIZE, 1); - dctx->buflen = 0; - } - } - - if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { - unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); - - poly1305_blocks_arch(&dctx->state, src, len, 1); - src += len; - nbytes %= POLY1305_BLOCK_SIZE; - } - - if (unlikely(nbytes)) { - dctx->buflen = nbytes; - memcpy(dctx->buf, src, nbytes); - } -} -EXPORT_SYMBOL(poly1305_update_arch); - -void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) -{ - if (unlikely(dctx->buflen)) { - dctx->buf[dctx->buflen++] = 1; - memset(dctx->buf + dctx->buflen, 0, - POLY1305_BLOCK_SIZE - dctx->buflen); - poly1305_blocks_arch(&dctx->state, dctx->buf, - POLY1305_BLOCK_SIZE, 0); - } - - poly1305_emit_arch(&dctx->h, dst, dctx->s); - *dctx = (struct poly1305_desc_ctx){}; -} -EXPORT_SYMBOL(poly1305_final_arch); - bool poly1305_is_arch_optimized(void) { return true; diff --git a/arch/powerpc/lib/crypto/poly1305-p10-glue.c b/arch/powerpc/lib/crypto/poly1305-p10-glue.c index 708435beaba6..50ac802220e0 100644 --- a/arch/powerpc/lib/crypto/poly1305-p10-glue.c +++ b/arch/powerpc/lib/crypto/poly1305-p10-glue.c @@ -10,7 +10,6 @@ #include #include #include -#include #include asmlinkage void poly1305_p10le_4blocks(struct poly1305_block_state *state, const u8 *m, u32 mlen); @@ -45,17 +44,6 @@ void poly1305_block_init_arch(struct poly1305_block_state *dctx, } EXPORT_SYMBOL_GPL(poly1305_block_init_arch); -void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) -{ - dctx->s[0] = get_unaligned_le32(key + 16); - dctx->s[1] = get_unaligned_le32(key + 20); - dctx->s[2] = get_unaligned_le32(key + 24); - dctx->s[3] = get_unaligned_le32(key + 28); - dctx->buflen = 0; - poly1305_block_init_arch(&dctx->state, key); -} -EXPORT_SYMBOL(poly1305_init_arch); - void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, unsigned int len, u32 padbit) { @@ -76,57 +64,6 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, } EXPORT_SYMBOL_GPL(poly1305_blocks_arch); -void poly1305_update_arch(struct poly1305_desc_ctx *dctx, - const u8 *src, unsigned int srclen) -{ - unsigned int bytes; - - if (!static_key_enabled(&have_p10)) - return poly1305_update_generic(dctx, src, srclen); - - if (unlikely(dctx->buflen)) { - bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen); - memcpy(dctx->buf + dctx->buflen, src, bytes); - src += bytes; - srclen -= bytes; - dctx->buflen += bytes; - if (dctx->buflen < POLY1305_BLOCK_SIZE) - return; - poly1305_blocks_arch(&dctx->state, dctx->buf, - POLY1305_BLOCK_SIZE, 1); - dctx->buflen = 0; - } - - if (likely(srclen >= POLY1305_BLOCK_SIZE)) { - poly1305_blocks_arch(&dctx->state, src, srclen, 1); - src += srclen - (srclen % POLY1305_BLOCK_SIZE); - srclen %= POLY1305_BLOCK_SIZE; - } - - if (unlikely(srclen)) { - dctx->buflen = srclen; - memcpy(dctx->buf, src, srclen); - } -} -EXPORT_SYMBOL(poly1305_update_arch); - -void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) -{ - if (!static_key_enabled(&have_p10)) - return poly1305_final_generic(dctx, dst); - - if (dctx->buflen) { - dctx->buf[dctx->buflen++] = 1; - memset(dctx->buf + dctx->buflen, 0, - POLY1305_BLOCK_SIZE - dctx->buflen); - poly1305_blocks_arch(&dctx->state, dctx->buf, - POLY1305_BLOCK_SIZE, 0); - } - - poly1305_emit_arch(&dctx->h, dst, dctx->s); -} -EXPORT_SYMBOL(poly1305_final_arch); - bool poly1305_is_arch_optimized(void) { return static_key_enabled(&have_p10); diff --git a/arch/x86/lib/crypto/poly1305_glue.c b/arch/x86/lib/crypto/poly1305_glue.c index d98764ec3b47..f799828c5809 100644 --- a/arch/x86/lib/crypto/poly1305_glue.c +++ b/arch/x86/lib/crypto/poly1305_glue.c @@ -10,7 +10,6 @@ #include #include #include -#include #include struct poly1305_arch_internal { @@ -96,65 +95,6 @@ void poly1305_emit_arch(const struct poly1305_state *ctx, } EXPORT_SYMBOL_GPL(poly1305_emit_arch); -void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) -{ - dctx->s[0] = get_unaligned_le32(&key[16]); - dctx->s[1] = get_unaligned_le32(&key[20]); - dctx->s[2] = get_unaligned_le32(&key[24]); - dctx->s[3] = get_unaligned_le32(&key[28]); - dctx->buflen = 0; - poly1305_block_init_arch(&dctx->state, key); -} -EXPORT_SYMBOL(poly1305_init_arch); - -void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, - unsigned int srclen) -{ - unsigned int bytes; - - if (unlikely(dctx->buflen)) { - bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen); - memcpy(dctx->buf + dctx->buflen, src, bytes); - src += bytes; - srclen -= bytes; - dctx->buflen += bytes; - - if (dctx->buflen == POLY1305_BLOCK_SIZE) { - poly1305_blocks_arch(&dctx->state, dctx->buf, - POLY1305_BLOCK_SIZE, 1); - dctx->buflen = 0; - } - } - - if (likely(srclen >= POLY1305_BLOCK_SIZE)) { - bytes = round_down(srclen, POLY1305_BLOCK_SIZE); - poly1305_blocks_arch(&dctx->state, src, bytes, 1); - src += bytes; - srclen -= bytes; - } - - if (unlikely(srclen)) { - dctx->buflen = srclen; - memcpy(dctx->buf, src, srclen); - } -} -EXPORT_SYMBOL(poly1305_update_arch); - -void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) -{ - if (unlikely(dctx->buflen)) { - dctx->buf[dctx->buflen++] = 1; - memset(dctx->buf + dctx->buflen, 0, - POLY1305_BLOCK_SIZE - dctx->buflen); - poly1305_blocks_arch(&dctx->state, dctx->buf, - POLY1305_BLOCK_SIZE, 0); - } - - poly1305_emit_arch(&dctx->h, dst, dctx->s); - memzero_explicit(dctx, sizeof(*dctx)); -} -EXPORT_SYMBOL(poly1305_final_arch); - bool poly1305_is_arch_optimized(void) { return static_key_enabled(&poly1305_use_avx); diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h index 027d74842cd5..e54abda8cfe9 100644 --- a/include/crypto/poly1305.h +++ b/include/crypto/poly1305.h @@ -55,55 +55,14 @@ struct poly1305_desc_ctx { unsigned int buflen; /* finalize key */ u32 s[4]; - union { - struct { - struct poly1305_state h; - union { - struct poly1305_key opaque_r[CONFIG_CRYPTO_LIB_POLY1305_RSIZE]; - struct poly1305_core_key core_r; - }; - }; - struct poly1305_block_state state; - }; + struct poly1305_block_state state; }; -void poly1305_init_arch(struct poly1305_desc_ctx *desc, - const u8 key[POLY1305_KEY_SIZE]); -void poly1305_init_generic(struct poly1305_desc_ctx *desc, - const u8 key[POLY1305_KEY_SIZE]); - -static inline void poly1305_init(struct poly1305_desc_ctx *desc, const u8 *key) -{ - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) - poly1305_init_arch(desc, key); - else - poly1305_init_generic(desc, key); -} - -void poly1305_update_arch(struct poly1305_desc_ctx *desc, const u8 *src, - unsigned int nbytes); -void poly1305_update_generic(struct poly1305_desc_ctx *desc, const u8 *src, - unsigned int nbytes); - -static inline void poly1305_update(struct poly1305_desc_ctx *desc, - const u8 *src, unsigned int nbytes) -{ - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) - poly1305_update_arch(desc, src, nbytes); - else - poly1305_update_generic(desc, src, nbytes); -} - -void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *digest); -void poly1305_final_generic(struct poly1305_desc_ctx *desc, u8 *digest); - -static inline void poly1305_final(struct poly1305_desc_ctx *desc, u8 *digest) -{ - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) - poly1305_final_arch(desc, digest); - else - poly1305_final_generic(desc, digest); -} +void poly1305_init(struct poly1305_desc_ctx *desc, + const u8 key[POLY1305_KEY_SIZE]); +void poly1305_update(struct poly1305_desc_ctx *desc, + const u8 *src, unsigned int nbytes); +void poly1305_final(struct poly1305_desc_ctx *desc, u8 *digest); #if IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305) bool poly1305_is_arch_optimized(void); diff --git a/lib/crypto/poly1305.c b/lib/crypto/poly1305.c index 9fec64a599c1..4c9996864090 100644 --- a/lib/crypto/poly1305.c +++ b/lib/crypto/poly1305.c @@ -22,47 +22,60 @@ void poly1305_block_init_generic(struct poly1305_block_state *desc, } EXPORT_SYMBOL_GPL(poly1305_block_init_generic); -void poly1305_init_generic(struct poly1305_desc_ctx *desc, - const u8 key[POLY1305_KEY_SIZE]) +void poly1305_init(struct poly1305_desc_ctx *desc, + const u8 key[POLY1305_KEY_SIZE]) { desc->s[0] = get_unaligned_le32(key + 16); desc->s[1] = get_unaligned_le32(key + 20); desc->s[2] = get_unaligned_le32(key + 24); desc->s[3] = get_unaligned_le32(key + 28); desc->buflen = 0; - poly1305_block_init_generic(&desc->state, key); + if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) + poly1305_block_init_arch(&desc->state, key); + else + poly1305_block_init_generic(&desc->state, key); } -EXPORT_SYMBOL_GPL(poly1305_init_generic); +EXPORT_SYMBOL(poly1305_init); static inline void poly1305_blocks(struct poly1305_block_state *state, const u8 *src, unsigned int len) { - poly1305_blocks_generic(state, src, len, 1); + if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) + poly1305_blocks_arch(state, src, len, 1); + else + poly1305_blocks_generic(state, src, len, 1); } -void poly1305_update_generic(struct poly1305_desc_ctx *desc, const u8 *src, - unsigned int nbytes) +void poly1305_update(struct poly1305_desc_ctx *desc, + const u8 *src, unsigned int nbytes) { desc->buflen = BLOCK_HASH_UPDATE(poly1305_blocks, &desc->state, src, nbytes, POLY1305_BLOCK_SIZE, desc->buf, desc->buflen); } -EXPORT_SYMBOL_GPL(poly1305_update_generic); +EXPORT_SYMBOL(poly1305_update); -void poly1305_final_generic(struct poly1305_desc_ctx *desc, u8 *dst) +void poly1305_final(struct poly1305_desc_ctx *desc, u8 *dst) { if (unlikely(desc->buflen)) { desc->buf[desc->buflen++] = 1; memset(desc->buf + desc->buflen, 0, POLY1305_BLOCK_SIZE - desc->buflen); - poly1305_blocks_generic(&desc->state, desc->buf, - POLY1305_BLOCK_SIZE, 0); + if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) + poly1305_blocks_arch(&desc->state, desc->buf, + POLY1305_BLOCK_SIZE, 0); + else + poly1305_blocks_generic(&desc->state, desc->buf, + POLY1305_BLOCK_SIZE, 0); } - poly1305_emit_generic(&desc->h, dst, desc->s); + if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) + poly1305_emit_arch(&desc->state.h, dst, desc->s); + else + poly1305_emit_generic(&desc->state.h, dst, desc->s); *desc = (struct poly1305_desc_ctx){}; } -EXPORT_SYMBOL_GPL(poly1305_final_generic); +EXPORT_SYMBOL(poly1305_final); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Martin Willi "); -- 2.51.0 From 950e5c84118c9e5b06bb9a9b64edf989ee4034df Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 28 Apr 2025 10:00:26 -0700 Subject: [PATCH 11/16] crypto: sha256 - support arch-optimized lib and expose through shash As has been done for various other algorithms, rework the design of the SHA-256 library to support arch-optimized implementations, and make crypto/sha256.c expose both generic and arch-optimized shash algorithms that wrap the library functions. This allows users of the SHA-256 library functions to take advantage of the arch-optimized code, and this makes it much simpler to integrate SHA-256 for each architecture. Note that sha256_base.h is not used in the new design. It will be removed once all the architecture-specific code has been updated. Move the generic block function into its own module to avoid a circular dependency from libsha256.ko => sha256-$ARCH.ko => libsha256.ko. Signed-off-by: Eric Biggers Add export and import functions to maintain existing export format. Signed-off-by: Herbert Xu --- crypto/Kconfig | 1 + crypto/Makefile | 3 +- crypto/sha256.c | 243 +++++++++++++++++++++++++++++++++ crypto/sha256_generic.c | 102 -------------- include/crypto/internal/sha2.h | 28 ++++ include/crypto/sha2.h | 15 +- include/crypto/sha256_base.h | 9 +- lib/crypto/Kconfig | 19 +++ lib/crypto/Makefile | 3 + lib/crypto/sha256-generic.c | 137 +++++++++++++++++++ lib/crypto/sha256.c | 196 ++++++++++++-------------- 11 files changed, 529 insertions(+), 227 deletions(-) create mode 100644 crypto/sha256.c delete mode 100644 crypto/sha256_generic.c create mode 100644 include/crypto/internal/sha2.h create mode 100644 lib/crypto/sha256-generic.c diff --git a/crypto/Kconfig b/crypto/Kconfig index 3cb5563dc4ab..7bfad077f308 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -982,6 +982,7 @@ config CRYPTO_SHA256 tristate "SHA-224 and SHA-256" select CRYPTO_HASH select CRYPTO_LIB_SHA256 + select CRYPTO_LIB_SHA256_GENERIC help SHA-224 and SHA-256 secure hash algorithms (FIPS 180, ISO/IEC 10118-3) diff --git a/crypto/Makefile b/crypto/Makefile index 587bc74b6d74..84f6911dc9ba 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -76,7 +76,8 @@ obj-$(CONFIG_CRYPTO_MD4) += md4.o obj-$(CONFIG_CRYPTO_MD5) += md5.o obj-$(CONFIG_CRYPTO_RMD160) += rmd160.o obj-$(CONFIG_CRYPTO_SHA1) += sha1_generic.o -obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o +obj-$(CONFIG_CRYPTO_SHA256) += sha256.o +CFLAGS_sha256.o += -DARCH=$(ARCH) obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o obj-$(CONFIG_CRYPTO_SHA3) += sha3_generic.o obj-$(CONFIG_CRYPTO_SM3_GENERIC) += sm3_generic.o diff --git a/crypto/sha256.c b/crypto/sha256.c new file mode 100644 index 000000000000..c2588d08ee3e --- /dev/null +++ b/crypto/sha256.c @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Crypto API wrapper for the SHA-256 and SHA-224 library functions + * + * Copyright (c) Jean-Luc Cooke + * Copyright (c) Andrew McDonald + * Copyright (c) 2002 James Morris + * SHA224 Support Copyright 2007 Intel Corporation + */ +#include +#include +#include +#include + +const u8 sha224_zero_message_hash[SHA224_DIGEST_SIZE] = { + 0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9, 0x47, + 0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4, 0x15, 0xa2, + 0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a, 0xc5, 0xb3, 0xe4, + 0x2f +}; +EXPORT_SYMBOL_GPL(sha224_zero_message_hash); + +const u8 sha256_zero_message_hash[SHA256_DIGEST_SIZE] = { + 0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, + 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, + 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, + 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55 +}; +EXPORT_SYMBOL_GPL(sha256_zero_message_hash); + +static int crypto_sha256_init(struct shash_desc *desc) +{ + sha256_init(shash_desc_ctx(desc)); + return 0; +} + +static int crypto_sha256_update_generic(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + sha256_update_generic(shash_desc_ctx(desc), data, len); + return 0; +} + +static int crypto_sha256_update_arch(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + sha256_update(shash_desc_ctx(desc), data, len); + return 0; +} + +static int crypto_sha256_final_generic(struct shash_desc *desc, u8 *out) +{ + sha256_final_generic(shash_desc_ctx(desc), out); + return 0; +} + +static int crypto_sha256_final_arch(struct shash_desc *desc, u8 *out) +{ + sha256_final(shash_desc_ctx(desc), out); + return 0; +} + +static int crypto_sha256_finup_generic(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + sha256_update_generic(sctx, data, len); + sha256_final_generic(sctx, out); + return 0; +} + +static int crypto_sha256_finup_arch(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + sha256_update(sctx, data, len); + sha256_final(sctx, out); + return 0; +} + +static int crypto_sha256_digest_generic(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + sha256_init(sctx); + sha256_update_generic(sctx, data, len); + sha256_final_generic(sctx, out); + return 0; +} + +static int crypto_sha256_digest_arch(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + sha256(data, len, out); + return 0; +} + +static int crypto_sha224_init(struct shash_desc *desc) +{ + sha224_init(shash_desc_ctx(desc)); + return 0; +} + +static int crypto_sha224_final_generic(struct shash_desc *desc, u8 *out) +{ + sha224_final_generic(shash_desc_ctx(desc), out); + return 0; +} + +static int crypto_sha224_final_arch(struct shash_desc *desc, u8 *out) +{ + sha224_final(shash_desc_ctx(desc), out); + return 0; +} + +static int crypto_sha256_import_lib(struct shash_desc *desc, const void *in) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + const u8 *p = in; + + memcpy(sctx, p, sizeof(*sctx)); + p += sizeof(*sctx); + sctx->count += *p; + return 0; +} + +static int crypto_sha256_export_lib(struct shash_desc *desc, void *out) +{ + struct sha256_state *sctx0 = shash_desc_ctx(desc); + struct sha256_state sctx = *sctx0; + unsigned int partial; + u8 *p = out; + + partial = sctx.count % SHA256_BLOCK_SIZE; + sctx.count -= partial; + memcpy(p, &sctx, sizeof(sctx)); + p += sizeof(sctx); + *p = partial; + return 0; +} + +static struct shash_alg algs[] = { + { + .base.cra_name = "sha256", + .base.cra_driver_name = "sha256-generic", + .base.cra_priority = 100, + .base.cra_blocksize = SHA256_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, + .digestsize = SHA256_DIGEST_SIZE, + .init = crypto_sha256_init, + .update = crypto_sha256_update_generic, + .final = crypto_sha256_final_generic, + .finup = crypto_sha256_finup_generic, + .digest = crypto_sha256_digest_generic, + .descsize = sizeof(struct sha256_state), + .statesize = sizeof(struct crypto_sha256_state) + + SHA256_BLOCK_SIZE + 1, + .import = crypto_sha256_import_lib, + .export = crypto_sha256_export_lib, + }, + { + .base.cra_name = "sha224", + .base.cra_driver_name = "sha224-generic", + .base.cra_priority = 100, + .base.cra_blocksize = SHA224_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, + .digestsize = SHA224_DIGEST_SIZE, + .init = crypto_sha224_init, + .update = crypto_sha256_update_generic, + .final = crypto_sha224_final_generic, + .descsize = sizeof(struct sha256_state), + .statesize = sizeof(struct crypto_sha256_state) + + SHA256_BLOCK_SIZE + 1, + .import = crypto_sha256_import_lib, + .export = crypto_sha256_export_lib, + }, + { + .base.cra_name = "sha256", + .base.cra_driver_name = "sha256-" __stringify(ARCH), + .base.cra_priority = 300, + .base.cra_blocksize = SHA256_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, + .digestsize = SHA256_DIGEST_SIZE, + .init = crypto_sha256_init, + .update = crypto_sha256_update_arch, + .final = crypto_sha256_final_arch, + .finup = crypto_sha256_finup_arch, + .digest = crypto_sha256_digest_arch, + .descsize = sizeof(struct sha256_state), + .statesize = sizeof(struct crypto_sha256_state) + + SHA256_BLOCK_SIZE + 1, + .import = crypto_sha256_import_lib, + .export = crypto_sha256_export_lib, + }, + { + .base.cra_name = "sha224", + .base.cra_driver_name = "sha224-" __stringify(ARCH), + .base.cra_priority = 300, + .base.cra_blocksize = SHA224_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, + .digestsize = SHA224_DIGEST_SIZE, + .init = crypto_sha224_init, + .update = crypto_sha256_update_arch, + .final = crypto_sha224_final_arch, + .descsize = sizeof(struct sha256_state), + .statesize = sizeof(struct crypto_sha256_state) + + SHA256_BLOCK_SIZE + 1, + .import = crypto_sha256_import_lib, + .export = crypto_sha256_export_lib, + }, +}; + +static unsigned int num_algs; + +static int __init crypto_sha256_mod_init(void) +{ + /* register the arch flavours only if they differ from generic */ + num_algs = ARRAY_SIZE(algs); + BUILD_BUG_ON(ARRAY_SIZE(algs) % 2 != 0); + if (!sha256_is_arch_optimized()) + num_algs /= 2; + return crypto_register_shashes(algs, ARRAY_SIZE(algs)); +} +subsys_initcall(crypto_sha256_mod_init); + +static void __exit crypto_sha256_mod_exit(void) +{ + crypto_unregister_shashes(algs, num_algs); +} +module_exit(crypto_sha256_mod_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Crypto API wrapper for the SHA-256 and SHA-224 library functions"); + +MODULE_ALIAS_CRYPTO("sha256"); +MODULE_ALIAS_CRYPTO("sha256-generic"); +MODULE_ALIAS_CRYPTO("sha256-" __stringify(ARCH)); +MODULE_ALIAS_CRYPTO("sha224"); +MODULE_ALIAS_CRYPTO("sha224-generic"); +MODULE_ALIAS_CRYPTO("sha224-" __stringify(ARCH)); diff --git a/crypto/sha256_generic.c b/crypto/sha256_generic.c deleted file mode 100644 index 05084e5bbaec..000000000000 --- a/crypto/sha256_generic.c +++ /dev/null @@ -1,102 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Crypto API wrapper for the generic SHA256 code from lib/crypto/sha256.c - * - * Copyright (c) Jean-Luc Cooke - * Copyright (c) Andrew McDonald - * Copyright (c) 2002 James Morris - * SHA224 Support Copyright 2007 Intel Corporation - */ -#include -#include -#include -#include -#include - -const u8 sha224_zero_message_hash[SHA224_DIGEST_SIZE] = { - 0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9, 0x47, - 0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4, 0x15, 0xa2, - 0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a, 0xc5, 0xb3, 0xe4, - 0x2f -}; -EXPORT_SYMBOL_GPL(sha224_zero_message_hash); - -const u8 sha256_zero_message_hash[SHA256_DIGEST_SIZE] = { - 0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, - 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, - 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, - 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55 -}; -EXPORT_SYMBOL_GPL(sha256_zero_message_hash); - -static void sha256_block(struct crypto_sha256_state *sctx, const u8 *input, - int blocks) -{ - sha256_transform_blocks(sctx, input, blocks); -} - -static int crypto_sha256_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - return sha256_base_do_update_blocks(desc, data, len, sha256_block); -} - -static int crypto_sha256_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *hash) -{ - sha256_base_do_finup(desc, data, len, sha256_block); - return sha256_base_finish(desc, hash); -} - -static struct shash_alg sha256_algs[2] = { { - .digestsize = SHA256_DIGEST_SIZE, - .init = sha256_base_init, - .update = crypto_sha256_update, - .finup = crypto_sha256_finup, - .descsize = sizeof(struct crypto_sha256_state), - .base = { - .cra_name = "sha256", - .cra_driver_name= "sha256-generic", - .cra_priority = 100, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | - CRYPTO_AHASH_ALG_FINUP_MAX, - .cra_blocksize = SHA256_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}, { - .digestsize = SHA224_DIGEST_SIZE, - .init = sha224_base_init, - .update = crypto_sha256_update, - .finup = crypto_sha256_finup, - .descsize = sizeof(struct crypto_sha256_state), - .base = { - .cra_name = "sha224", - .cra_driver_name= "sha224-generic", - .cra_priority = 100, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | - CRYPTO_AHASH_ALG_FINUP_MAX, - .cra_blocksize = SHA224_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -} }; - -static int __init sha256_generic_mod_init(void) -{ - return crypto_register_shashes(sha256_algs, ARRAY_SIZE(sha256_algs)); -} - -static void __exit sha256_generic_mod_fini(void) -{ - crypto_unregister_shashes(sha256_algs, ARRAY_SIZE(sha256_algs)); -} - -subsys_initcall(sha256_generic_mod_init); -module_exit(sha256_generic_mod_fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm"); - -MODULE_ALIAS_CRYPTO("sha224"); -MODULE_ALIAS_CRYPTO("sha224-generic"); -MODULE_ALIAS_CRYPTO("sha256"); -MODULE_ALIAS_CRYPTO("sha256-generic"); diff --git a/include/crypto/internal/sha2.h b/include/crypto/internal/sha2.h new file mode 100644 index 000000000000..d641c67abcbc --- /dev/null +++ b/include/crypto/internal/sha2.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _CRYPTO_INTERNAL_SHA2_H +#define _CRYPTO_INTERNAL_SHA2_H + +#include + +void sha256_update_generic(struct sha256_state *sctx, + const u8 *data, size_t len); +void sha256_final_generic(struct sha256_state *sctx, + u8 out[SHA256_DIGEST_SIZE]); +void sha224_final_generic(struct sha256_state *sctx, + u8 out[SHA224_DIGEST_SIZE]); + +#if IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_SHA256) +bool sha256_is_arch_optimized(void); +#else +static inline bool sha256_is_arch_optimized(void) +{ + return false; +} +#endif +void sha256_blocks_generic(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks); +void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks); + +#endif /* _CRYPTO_INTERNAL_SHA2_H */ diff --git a/include/crypto/sha2.h b/include/crypto/sha2.h index f873c2207b1e..9a56286d736d 100644 --- a/include/crypto/sha2.h +++ b/include/crypto/sha2.h @@ -13,6 +13,7 @@ #define SHA256_DIGEST_SIZE 32 #define SHA256_BLOCK_SIZE 64 +#define SHA256_STATE_WORDS 8 #define SHA384_DIGEST_SIZE 48 #define SHA384_BLOCK_SIZE 128 @@ -66,7 +67,7 @@ extern const u8 sha384_zero_message_hash[SHA384_DIGEST_SIZE]; extern const u8 sha512_zero_message_hash[SHA512_DIGEST_SIZE]; struct crypto_sha256_state { - u32 state[SHA256_DIGEST_SIZE / 4]; + u32 state[SHA256_STATE_WORDS]; u64 count; }; @@ -74,7 +75,7 @@ struct sha256_state { union { struct crypto_sha256_state ctx; struct { - u32 state[SHA256_DIGEST_SIZE / 4]; + u32 state[SHA256_STATE_WORDS]; u64 count; }; }; @@ -87,16 +88,6 @@ struct sha512_state { u8 buf[SHA512_BLOCK_SIZE]; }; -/* - * Stand-alone implementation of the SHA256 algorithm. It is designed to - * have as little dependencies as possible so it can be used in the - * kexec_file purgatory. In other cases you should generally use the - * hash APIs from include/crypto/hash.h. Especially when hashing large - * amounts of data as those APIs may be hw-accelerated. - * - * For details see lib/crypto/sha256.c - */ - static inline void sha256_init(struct sha256_state *sctx) { sctx->state[0] = SHA256_H0; diff --git a/include/crypto/sha256_base.h b/include/crypto/sha256_base.h index 9f284bed5a51..804361731a7a 100644 --- a/include/crypto/sha256_base.h +++ b/include/crypto/sha256_base.h @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include #include @@ -142,7 +142,10 @@ static inline int sha256_base_finish(struct shash_desc *desc, u8 *out) return __sha256_base_finish(sctx->state, out, digest_size); } -void sha256_transform_blocks(struct crypto_sha256_state *sst, - const u8 *input, int blocks); +static inline void sha256_transform_blocks(struct crypto_sha256_state *sst, + const u8 *input, int blocks) +{ + sha256_blocks_generic(sst->state, input, blocks); +} #endif /* _CRYPTO_SHA256_BASE_H */ diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index af2368799579..7fe678047939 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -139,6 +139,25 @@ config CRYPTO_LIB_SHA1 config CRYPTO_LIB_SHA256 tristate + help + Enable the SHA-256 library interface. This interface may be fulfilled + by either the generic implementation or an arch-specific one, if one + is available and enabled. + +config CRYPTO_ARCH_HAVE_LIB_SHA256 + bool + help + Declares whether the architecture provides an arch-specific + accelerated implementation of the SHA-256 library interface. + +config CRYPTO_LIB_SHA256_GENERIC + tristate + default CRYPTO_LIB_SHA256 if !CRYPTO_ARCH_HAVE_LIB_SHA256 + help + This symbol can be selected by arch implementations of the SHA-256 + library interface that require the generic code as a fallback, e.g., + for SIMD implementations. If no arch specific implementation is + enabled, this implementation serves the users of CRYPTO_LIB_SHA256. config CRYPTO_LIB_SM3 tristate diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index 4dd62bc5bee3..71d3d05d666a 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -51,6 +51,9 @@ libsha1-y := sha1.o obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o libsha256-y := sha256.o +obj-$(CONFIG_CRYPTO_LIB_SHA256_GENERIC) += libsha256-generic.o +libsha256-generic-y := sha256-generic.o + ifneq ($(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS),y) libblake2s-y += blake2s-selftest.o libchacha20poly1305-y += chacha20poly1305-selftest.o diff --git a/lib/crypto/sha256-generic.c b/lib/crypto/sha256-generic.c new file mode 100644 index 000000000000..a16ad4f25ebb --- /dev/null +++ b/lib/crypto/sha256-generic.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * SHA-256, as specified in + * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf + * + * SHA-256 code by Jean-Luc Cooke . + * + * Copyright (c) Jean-Luc Cooke + * Copyright (c) Andrew McDonald + * Copyright (c) 2002 James Morris + * Copyright (c) 2014 Red Hat Inc. + */ + +#include +#include +#include +#include +#include + +static const u32 SHA256_K[] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, +}; + +static inline u32 Ch(u32 x, u32 y, u32 z) +{ + return z ^ (x & (y ^ z)); +} + +static inline u32 Maj(u32 x, u32 y, u32 z) +{ + return (x & y) | (z & (x | y)); +} + +#define e0(x) (ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22)) +#define e1(x) (ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25)) +#define s0(x) (ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3)) +#define s1(x) (ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10)) + +static inline void LOAD_OP(int I, u32 *W, const u8 *input) +{ + W[I] = get_unaligned_be32((__u32 *)input + I); +} + +static inline void BLEND_OP(int I, u32 *W) +{ + W[I] = s1(W[I-2]) + W[I-7] + s0(W[I-15]) + W[I-16]; +} + +#define SHA256_ROUND(i, a, b, c, d, e, f, g, h) do { \ + u32 t1, t2; \ + t1 = h + e1(e) + Ch(e, f, g) + SHA256_K[i] + W[i]; \ + t2 = e0(a) + Maj(a, b, c); \ + d += t1; \ + h = t1 + t2; \ +} while (0) + +static void sha256_block_generic(u32 state[SHA256_STATE_WORDS], + const u8 *input, u32 W[64]) +{ + u32 a, b, c, d, e, f, g, h; + int i; + + /* load the input */ + for (i = 0; i < 16; i += 8) { + LOAD_OP(i + 0, W, input); + LOAD_OP(i + 1, W, input); + LOAD_OP(i + 2, W, input); + LOAD_OP(i + 3, W, input); + LOAD_OP(i + 4, W, input); + LOAD_OP(i + 5, W, input); + LOAD_OP(i + 6, W, input); + LOAD_OP(i + 7, W, input); + } + + /* now blend */ + for (i = 16; i < 64; i += 8) { + BLEND_OP(i + 0, W); + BLEND_OP(i + 1, W); + BLEND_OP(i + 2, W); + BLEND_OP(i + 3, W); + BLEND_OP(i + 4, W); + BLEND_OP(i + 5, W); + BLEND_OP(i + 6, W); + BLEND_OP(i + 7, W); + } + + /* load the state into our registers */ + a = state[0]; b = state[1]; c = state[2]; d = state[3]; + e = state[4]; f = state[5]; g = state[6]; h = state[7]; + + /* now iterate */ + for (i = 0; i < 64; i += 8) { + SHA256_ROUND(i + 0, a, b, c, d, e, f, g, h); + SHA256_ROUND(i + 1, h, a, b, c, d, e, f, g); + SHA256_ROUND(i + 2, g, h, a, b, c, d, e, f); + SHA256_ROUND(i + 3, f, g, h, a, b, c, d, e); + SHA256_ROUND(i + 4, e, f, g, h, a, b, c, d); + SHA256_ROUND(i + 5, d, e, f, g, h, a, b, c); + SHA256_ROUND(i + 6, c, d, e, f, g, h, a, b); + SHA256_ROUND(i + 7, b, c, d, e, f, g, h, a); + } + + state[0] += a; state[1] += b; state[2] += c; state[3] += d; + state[4] += e; state[5] += f; state[6] += g; state[7] += h; +} + +void sha256_blocks_generic(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks) +{ + u32 W[64]; + + do { + sha256_block_generic(state, data, W); + data += SHA256_BLOCK_SIZE; + } while (--nblocks); + + memzero_explicit(W, sizeof(W)); +} +EXPORT_SYMBOL_GPL(sha256_blocks_generic); + +MODULE_DESCRIPTION("SHA-256 Algorithm (generic implementation)"); +MODULE_LICENSE("GPL"); diff --git a/lib/crypto/sha256.c b/lib/crypto/sha256.c index a89bab377de1..4b19cf977ef1 100644 --- a/lib/crypto/sha256.c +++ b/lib/crypto/sha256.c @@ -11,148 +11,105 @@ * Copyright (c) 2014 Red Hat Inc. */ -#include -#include +#include #include #include #include +#include -static const u32 SHA256_K[] = { - 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, - 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, - 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, - 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, - 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, - 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, - 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, - 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, - 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, - 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, - 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, - 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, - 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, - 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, - 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, - 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, -}; - -static inline u32 Ch(u32 x, u32 y, u32 z) -{ - return z ^ (x & (y ^ z)); -} - -static inline u32 Maj(u32 x, u32 y, u32 z) -{ - return (x & y) | (z & (x | y)); -} - -#define e0(x) (ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22)) -#define e1(x) (ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25)) -#define s0(x) (ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3)) -#define s1(x) (ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10)) +/* + * If __DISABLE_EXPORTS is defined, then this file is being compiled for a + * pre-boot environment. In that case, ignore the kconfig options, pull the + * generic code into the same translation unit, and use that only. + */ +#ifdef __DISABLE_EXPORTS +#include "sha256-generic.c" +#endif -static inline void LOAD_OP(int I, u32 *W, const u8 *input) +static inline void sha256_blocks(u32 state[SHA256_STATE_WORDS], const u8 *data, + size_t nblocks, bool force_generic) { - W[I] = get_unaligned_be32((__u32 *)input + I); +#if IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_SHA256) && !defined(__DISABLE_EXPORTS) + if (!force_generic) + return sha256_blocks_arch(state, data, nblocks); +#endif + sha256_blocks_generic(state, data, nblocks); } -static inline void BLEND_OP(int I, u32 *W) +static inline void __sha256_update(struct sha256_state *sctx, const u8 *data, + size_t len, bool force_generic) { - W[I] = s1(W[I-2]) + W[I-7] + s0(W[I-15]) + W[I-16]; -} - -#define SHA256_ROUND(i, a, b, c, d, e, f, g, h) do { \ - u32 t1, t2; \ - t1 = h + e1(e) + Ch(e, f, g) + SHA256_K[i] + W[i]; \ - t2 = e0(a) + Maj(a, b, c); \ - d += t1; \ - h = t1 + t2; \ -} while (0) + size_t partial = sctx->count % SHA256_BLOCK_SIZE; -static void sha256_transform(u32 *state, const u8 *input, u32 *W) -{ - u32 a, b, c, d, e, f, g, h; - int i; - - /* load the input */ - for (i = 0; i < 16; i += 8) { - LOAD_OP(i + 0, W, input); - LOAD_OP(i + 1, W, input); - LOAD_OP(i + 2, W, input); - LOAD_OP(i + 3, W, input); - LOAD_OP(i + 4, W, input); - LOAD_OP(i + 5, W, input); - LOAD_OP(i + 6, W, input); - LOAD_OP(i + 7, W, input); - } + sctx->count += len; - /* now blend */ - for (i = 16; i < 64; i += 8) { - BLEND_OP(i + 0, W); - BLEND_OP(i + 1, W); - BLEND_OP(i + 2, W); - BLEND_OP(i + 3, W); - BLEND_OP(i + 4, W); - BLEND_OP(i + 5, W); - BLEND_OP(i + 6, W); - BLEND_OP(i + 7, W); - } + if (partial + len >= SHA256_BLOCK_SIZE) { + size_t nblocks; - /* load the state into our registers */ - a = state[0]; b = state[1]; c = state[2]; d = state[3]; - e = state[4]; f = state[5]; g = state[6]; h = state[7]; - - /* now iterate */ - for (i = 0; i < 64; i += 8) { - SHA256_ROUND(i + 0, a, b, c, d, e, f, g, h); - SHA256_ROUND(i + 1, h, a, b, c, d, e, f, g); - SHA256_ROUND(i + 2, g, h, a, b, c, d, e, f); - SHA256_ROUND(i + 3, f, g, h, a, b, c, d, e); - SHA256_ROUND(i + 4, e, f, g, h, a, b, c, d); - SHA256_ROUND(i + 5, d, e, f, g, h, a, b, c); - SHA256_ROUND(i + 6, c, d, e, f, g, h, a, b); - SHA256_ROUND(i + 7, b, c, d, e, f, g, h, a); - } + if (partial) { + size_t l = SHA256_BLOCK_SIZE - partial; - state[0] += a; state[1] += b; state[2] += c; state[3] += d; - state[4] += e; state[5] += f; state[6] += g; state[7] += h; -} + memcpy(&sctx->buf[partial], data, l); + data += l; + len -= l; -void sha256_transform_blocks(struct crypto_sha256_state *sst, - const u8 *input, int blocks) -{ - u32 W[64]; + sha256_blocks(sctx->state, sctx->buf, 1, force_generic); + } - do { - sha256_transform(sst->state, input, W); - input += SHA256_BLOCK_SIZE; - } while (--blocks); + nblocks = len / SHA256_BLOCK_SIZE; + len %= SHA256_BLOCK_SIZE; - memzero_explicit(W, sizeof(W)); + if (nblocks) { + sha256_blocks(sctx->state, data, nblocks, + force_generic); + data += nblocks * SHA256_BLOCK_SIZE; + } + partial = 0; + } + if (len) + memcpy(&sctx->buf[partial], data, len); } -EXPORT_SYMBOL_GPL(sha256_transform_blocks); void sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len) { - lib_sha256_base_do_update(sctx, data, len, sha256_transform_blocks); + __sha256_update(sctx, data, len, false); } EXPORT_SYMBOL(sha256_update); -static void __sha256_final(struct sha256_state *sctx, u8 *out, int digest_size) +static inline void __sha256_final(struct sha256_state *sctx, u8 *out, + size_t digest_size, bool force_generic) { - lib_sha256_base_do_finalize(sctx, sha256_transform_blocks); - lib_sha256_base_finish(sctx, out, digest_size); + const size_t bit_offset = SHA256_BLOCK_SIZE - sizeof(__be64); + __be64 *bits = (__be64 *)&sctx->buf[bit_offset]; + size_t partial = sctx->count % SHA256_BLOCK_SIZE; + size_t i; + + sctx->buf[partial++] = 0x80; + if (partial > bit_offset) { + memset(&sctx->buf[partial], 0, SHA256_BLOCK_SIZE - partial); + sha256_blocks(sctx->state, sctx->buf, 1, force_generic); + partial = 0; + } + + memset(&sctx->buf[partial], 0, bit_offset - partial); + *bits = cpu_to_be64(sctx->count << 3); + sha256_blocks(sctx->state, sctx->buf, 1, force_generic); + + for (i = 0; i < digest_size; i += 4) + put_unaligned_be32(sctx->state[i / 4], out + i); + + memzero_explicit(sctx, sizeof(*sctx)); } void sha256_final(struct sha256_state *sctx, u8 *out) { - __sha256_final(sctx, out, 32); + __sha256_final(sctx, out, SHA256_DIGEST_SIZE, false); } EXPORT_SYMBOL(sha256_final); void sha224_final(struct sha256_state *sctx, u8 *out) { - __sha256_final(sctx, out, 28); + __sha256_final(sctx, out, SHA224_DIGEST_SIZE, false); } EXPORT_SYMBOL(sha224_final); @@ -166,5 +123,26 @@ void sha256(const u8 *data, unsigned int len, u8 *out) } EXPORT_SYMBOL(sha256); +#if IS_ENABLED(CONFIG_CRYPTO_SHA256) && !defined(__DISABLE_EXPORTS) +void sha256_update_generic(struct sha256_state *sctx, + const u8 *data, size_t len) +{ + __sha256_update(sctx, data, len, true); +} +EXPORT_SYMBOL(sha256_update_generic); + +void sha256_final_generic(struct sha256_state *sctx, u8 out[SHA256_DIGEST_SIZE]) +{ + __sha256_final(sctx, out, SHA256_DIGEST_SIZE, true); +} +EXPORT_SYMBOL(sha256_final_generic); + +void sha224_final_generic(struct sha256_state *sctx, u8 out[SHA224_DIGEST_SIZE]) +{ + __sha256_final(sctx, out, SHA224_DIGEST_SIZE, true); +} +EXPORT_SYMBOL(sha224_final_generic); +#endif + MODULE_DESCRIPTION("SHA-256 Algorithm"); MODULE_LICENSE("GPL"); -- 2.51.0 From ca4477e41c68b58043e67bc78074cd6fcc59ee5e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 28 Apr 2025 10:00:27 -0700 Subject: [PATCH 12/16] crypto: arm/sha256 - implement library instead of shash Instead of providing crypto_shash algorithms for the arch-optimized SHA-256 code, instead implement the SHA-256 library. This is much simpler, it makes the SHA-256 library functions be arch-optimized, and it fixes the longstanding issue where the arch-optimized SHA-256 was disabled by default. SHA-256 still remains available through crypto_shash, but individual architectures no longer need to handle it. To merge the scalar, NEON, and CE code all into one module cleanly, add !CPU_V7M as a direct dependency of the CE code. Previously, !CPU_V7M was only a direct dependency of the scalar and NEON code. The result is still the same because CPU_V7M implies !KERNEL_MODE_NEON, so !CPU_V7M was already an indirect dependency of the CE code. To match sha256_blocks_arch(), change the type of the nblocks parameter of the assembly functions from int to size_t. The assembly functions actually already treated it as size_t. While renaming the assembly files, also fix the naming quirk where "sha2" meant sha256. (SHA-512 is also part of SHA-2.) Reviewed-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/arm/configs/exynos_defconfig | 1 - arch/arm/configs/milbeaut_m10v_defconfig | 1 - arch/arm/configs/multi_v7_defconfig | 1 - arch/arm/configs/omap2plus_defconfig | 1 - arch/arm/configs/pxa_defconfig | 1 - arch/arm/crypto/Kconfig | 21 ---- arch/arm/crypto/Makefile | 8 +- arch/arm/crypto/sha2-ce-glue.c | 87 -------------- arch/arm/crypto/sha256_glue.c | 107 ------------------ arch/arm/crypto/sha256_glue.h | 9 -- arch/arm/crypto/sha256_neon_glue.c | 75 ------------ arch/arm/lib/crypto/.gitignore | 1 + arch/arm/lib/crypto/Kconfig | 6 + arch/arm/lib/crypto/Makefile | 8 +- arch/arm/{ => lib}/crypto/sha256-armv4.pl | 0 .../sha2-ce-core.S => lib/crypto/sha256-ce.S} | 10 +- arch/arm/lib/crypto/sha256.c | 64 +++++++++++ 17 files changed, 84 insertions(+), 317 deletions(-) delete mode 100644 arch/arm/crypto/sha2-ce-glue.c delete mode 100644 arch/arm/crypto/sha256_glue.c delete mode 100644 arch/arm/crypto/sha256_glue.h delete mode 100644 arch/arm/crypto/sha256_neon_glue.c rename arch/arm/{ => lib}/crypto/sha256-armv4.pl (100%) rename arch/arm/{crypto/sha2-ce-core.S => lib/crypto/sha256-ce.S} (91%) create mode 100644 arch/arm/lib/crypto/sha256.c diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig index e81a5d6c1c20..c6792c0256a6 100644 --- a/arch/arm/configs/exynos_defconfig +++ b/arch/arm/configs/exynos_defconfig @@ -364,7 +364,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_SHA1_ARM_NEON=m -CONFIG_CRYPTO_SHA256_ARM=m CONFIG_CRYPTO_SHA512_ARM=m CONFIG_CRYPTO_AES_ARM_BS=m CONFIG_CRYPTO_CHACHA20_NEON=m diff --git a/arch/arm/configs/milbeaut_m10v_defconfig b/arch/arm/configs/milbeaut_m10v_defconfig index 275ddf7a3a14..4ec21f477c63 100644 --- a/arch/arm/configs/milbeaut_m10v_defconfig +++ b/arch/arm/configs/milbeaut_m10v_defconfig @@ -101,7 +101,6 @@ CONFIG_CRYPTO_SEQIV=m CONFIG_CRYPTO_GHASH_ARM_CE=m CONFIG_CRYPTO_SHA1_ARM_NEON=m CONFIG_CRYPTO_SHA1_ARM_CE=m -CONFIG_CRYPTO_SHA2_ARM_CE=m CONFIG_CRYPTO_SHA512_ARM=m CONFIG_CRYPTO_AES_ARM=m CONFIG_CRYPTO_AES_ARM_BS=m diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index ad037c175fdb..96178acedad0 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -1301,7 +1301,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_GHASH_ARM_CE=m CONFIG_CRYPTO_SHA1_ARM_NEON=m CONFIG_CRYPTO_SHA1_ARM_CE=m -CONFIG_CRYPTO_SHA2_ARM_CE=m CONFIG_CRYPTO_SHA512_ARM=m CONFIG_CRYPTO_AES_ARM=m CONFIG_CRYPTO_AES_ARM_BS=m diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 75b326bc7830..317f977e509e 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -697,7 +697,6 @@ CONFIG_SECURITY=y CONFIG_CRYPTO_MICHAEL_MIC=y CONFIG_CRYPTO_GHASH_ARM_CE=m CONFIG_CRYPTO_SHA1_ARM_NEON=m -CONFIG_CRYPTO_SHA256_ARM=m CONFIG_CRYPTO_SHA512_ARM=m CONFIG_CRYPTO_AES_ARM=m CONFIG_CRYPTO_AES_ARM_BS=m diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig index 24fca8608554..56be85752909 100644 --- a/arch/arm/configs/pxa_defconfig +++ b/arch/arm/configs/pxa_defconfig @@ -660,7 +660,6 @@ CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_DEFLATE=y CONFIG_CRYPTO_LZO=y CONFIG_CRYPTO_SHA1_ARM=m -CONFIG_CRYPTO_SHA256_ARM=m CONFIG_CRYPTO_SHA512_ARM=m CONFIG_CRYPTO_AES_ARM=m CONFIG_FONTS=y diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 1f889d6bab77..7efb9a8596e4 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -93,27 +93,6 @@ config CRYPTO_SHA1_ARM_CE Architecture: arm using ARMv8 Crypto Extensions -config CRYPTO_SHA2_ARM_CE - tristate "Hash functions: SHA-224 and SHA-256 (ARMv8 Crypto Extensions)" - depends on KERNEL_MODE_NEON - select CRYPTO_SHA256_ARM - select CRYPTO_HASH - help - SHA-224 and SHA-256 secure hash algorithms (FIPS 180) - - Architecture: arm using - - ARMv8 Crypto Extensions - -config CRYPTO_SHA256_ARM - tristate "Hash functions: SHA-224 and SHA-256 (NEON)" - select CRYPTO_HASH - depends on !CPU_V7M - help - SHA-224 and SHA-256 secure hash algorithms (FIPS 180) - - Architecture: arm using - - NEON (Advanced SIMD) extensions - config CRYPTO_SHA512_ARM tristate "Hash functions: SHA-384 and SHA-512 (NEON)" select CRYPTO_HASH diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index ecabe6603e08..8479137c6e80 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -7,7 +7,6 @@ obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o -obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o obj-$(CONFIG_CRYPTO_BLAKE2B_NEON) += blake2b-neon.o obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o @@ -15,20 +14,16 @@ obj-$(CONFIG_CRYPTO_CURVE25519_NEON) += curve25519-neon.o obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o -obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o aes-arm-y := aes-cipher-core.o aes-cipher-glue.o aes-arm-bs-y := aes-neonbs-core.o aes-neonbs-glue.o sha1-arm-y := sha1-armv4-large.o sha1_glue.o sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o -sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o -sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y) sha512-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha512-neon-glue.o sha512-arm-y := sha512-core.o sha512-glue.o $(sha512-arm-neon-y) blake2b-neon-y := blake2b-neon-core.o blake2b-neon-glue.o sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o -sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o @@ -40,9 +35,8 @@ quiet_cmd_perl = PERL $@ $(obj)/%-core.S: $(src)/%-armv4.pl $(call cmd,perl) -clean-files += sha256-core.S sha512-core.S +clean-files += sha512-core.S aflags-thumb2-$(CONFIG_THUMB2_KERNEL) := -U__thumb2__ -D__thumb2__=1 -AFLAGS_sha256-core.o += $(aflags-thumb2-y) AFLAGS_sha512-core.o += $(aflags-thumb2-y) diff --git a/arch/arm/crypto/sha2-ce-glue.c b/arch/arm/crypto/sha2-ce-glue.c deleted file mode 100644 index 1e9d16f79678..000000000000 --- a/arch/arm/crypto/sha2-ce-glue.c +++ /dev/null @@ -1,87 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions - * - * Copyright (C) 2015 Linaro Ltd - */ - -#include -#include -#include -#include -#include -#include -#include - -MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions"); -MODULE_AUTHOR("Ard Biesheuvel "); -MODULE_LICENSE("GPL v2"); - -asmlinkage void sha2_ce_transform(struct crypto_sha256_state *sst, - u8 const *src, int blocks); - -static int sha2_ce_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - int remain; - - kernel_neon_begin(); - remain = sha256_base_do_update_blocks(desc, data, len, - sha2_ce_transform); - kernel_neon_end(); - return remain; -} - -static int sha2_ce_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - kernel_neon_begin(); - sha256_base_do_finup(desc, data, len, sha2_ce_transform); - kernel_neon_end(); - return sha256_base_finish(desc, out); -} - -static struct shash_alg algs[] = { { - .init = sha224_base_init, - .update = sha2_ce_update, - .finup = sha2_ce_finup, - .descsize = sizeof(struct crypto_sha256_state), - .digestsize = SHA224_DIGEST_SIZE, - .base = { - .cra_name = "sha224", - .cra_driver_name = "sha224-ce", - .cra_priority = 300, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | - CRYPTO_AHASH_ALG_FINUP_MAX, - .cra_blocksize = SHA256_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}, { - .init = sha256_base_init, - .update = sha2_ce_update, - .finup = sha2_ce_finup, - .descsize = sizeof(struct crypto_sha256_state), - .digestsize = SHA256_DIGEST_SIZE, - .base = { - .cra_name = "sha256", - .cra_driver_name = "sha256-ce", - .cra_priority = 300, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | - CRYPTO_AHASH_ALG_FINUP_MAX, - .cra_blocksize = SHA256_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -} }; - -static int __init sha2_ce_mod_init(void) -{ - return crypto_register_shashes(algs, ARRAY_SIZE(algs)); -} - -static void __exit sha2_ce_mod_fini(void) -{ - crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); -} - -module_cpu_feature_match(SHA2, sha2_ce_mod_init); -module_exit(sha2_ce_mod_fini); diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c deleted file mode 100644 index d04c4e6bae6d..000000000000 --- a/arch/arm/crypto/sha256_glue.c +++ /dev/null @@ -1,107 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Glue code for the SHA256 Secure Hash Algorithm assembly implementation - * using optimized ARM assembler and NEON instructions. - * - * Copyright © 2015 Google Inc. - * - * This file is based on sha256_ssse3_glue.c: - * Copyright (C) 2013 Intel Corporation - * Author: Tim Chen - */ - -#include -#include -#include -#include -#include -#include - -#include "sha256_glue.h" - -asmlinkage void sha256_block_data_order(struct crypto_sha256_state *state, - const u8 *data, int num_blks); - -static int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - /* make sure casting to sha256_block_fn() is safe */ - BUILD_BUG_ON(offsetof(struct crypto_sha256_state, state) != 0); - - return sha256_base_do_update_blocks(desc, data, len, - sha256_block_data_order); -} - -static int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - sha256_base_do_finup(desc, data, len, sha256_block_data_order); - return sha256_base_finish(desc, out); -} - -static struct shash_alg algs[] = { { - .digestsize = SHA256_DIGEST_SIZE, - .init = sha256_base_init, - .update = crypto_sha256_arm_update, - .finup = crypto_sha256_arm_finup, - .descsize = sizeof(struct crypto_sha256_state), - .base = { - .cra_name = "sha256", - .cra_driver_name = "sha256-asm", - .cra_priority = 150, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | - CRYPTO_AHASH_ALG_FINUP_MAX, - .cra_blocksize = SHA256_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}, { - .digestsize = SHA224_DIGEST_SIZE, - .init = sha224_base_init, - .update = crypto_sha256_arm_update, - .finup = crypto_sha256_arm_finup, - .descsize = sizeof(struct crypto_sha256_state), - .base = { - .cra_name = "sha224", - .cra_driver_name = "sha224-asm", - .cra_priority = 150, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | - CRYPTO_AHASH_ALG_FINUP_MAX, - .cra_blocksize = SHA224_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -} }; - -static int __init sha256_mod_init(void) -{ - int res = crypto_register_shashes(algs, ARRAY_SIZE(algs)); - - if (res < 0) - return res; - - if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) { - res = crypto_register_shashes(sha256_neon_algs, - ARRAY_SIZE(sha256_neon_algs)); - - if (res < 0) - crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); - } - - return res; -} - -static void __exit sha256_mod_fini(void) -{ - crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); - - if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) - crypto_unregister_shashes(sha256_neon_algs, - ARRAY_SIZE(sha256_neon_algs)); -} - -module_init(sha256_mod_init); -module_exit(sha256_mod_fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm (ARM), including NEON"); - -MODULE_ALIAS_CRYPTO("sha256"); diff --git a/arch/arm/crypto/sha256_glue.h b/arch/arm/crypto/sha256_glue.h deleted file mode 100644 index 9881c9a115d1..000000000000 --- a/arch/arm/crypto/sha256_glue.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _CRYPTO_SHA256_GLUE_H -#define _CRYPTO_SHA256_GLUE_H - -#include - -extern struct shash_alg sha256_neon_algs[2]; - -#endif /* _CRYPTO_SHA256_GLUE_H */ diff --git a/arch/arm/crypto/sha256_neon_glue.c b/arch/arm/crypto/sha256_neon_glue.c deleted file mode 100644 index 76eb3cdc21c9..000000000000 --- a/arch/arm/crypto/sha256_neon_glue.c +++ /dev/null @@ -1,75 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Glue code for the SHA256 Secure Hash Algorithm assembly implementation - * using NEON instructions. - * - * Copyright © 2015 Google Inc. - * - * This file is based on sha512_neon_glue.c: - * Copyright © 2014 Jussi Kivilinna - */ - -#include -#include -#include -#include -#include -#include - -#include "sha256_glue.h" - -asmlinkage void sha256_block_data_order_neon( - struct crypto_sha256_state *digest, const u8 *data, int num_blks); - -static int crypto_sha256_neon_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - int remain; - - kernel_neon_begin(); - remain = sha256_base_do_update_blocks(desc, data, len, - sha256_block_data_order_neon); - kernel_neon_end(); - return remain; -} - -static int crypto_sha256_neon_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - kernel_neon_begin(); - sha256_base_do_finup(desc, data, len, sha256_block_data_order_neon); - kernel_neon_end(); - return sha256_base_finish(desc, out); -} - -struct shash_alg sha256_neon_algs[] = { { - .digestsize = SHA256_DIGEST_SIZE, - .init = sha256_base_init, - .update = crypto_sha256_neon_update, - .finup = crypto_sha256_neon_finup, - .descsize = sizeof(struct crypto_sha256_state), - .base = { - .cra_name = "sha256", - .cra_driver_name = "sha256-neon", - .cra_priority = 250, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | - CRYPTO_AHASH_ALG_FINUP_MAX, - .cra_blocksize = SHA256_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}, { - .digestsize = SHA224_DIGEST_SIZE, - .init = sha224_base_init, - .update = crypto_sha256_neon_update, - .finup = crypto_sha256_neon_finup, - .descsize = sizeof(struct crypto_sha256_state), - .base = { - .cra_name = "sha224", - .cra_driver_name = "sha224-neon", - .cra_priority = 250, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | - CRYPTO_AHASH_ALG_FINUP_MAX, - .cra_blocksize = SHA224_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -} }; diff --git a/arch/arm/lib/crypto/.gitignore b/arch/arm/lib/crypto/.gitignore index 0d47d4f21c6d..12d74d8b03d0 100644 --- a/arch/arm/lib/crypto/.gitignore +++ b/arch/arm/lib/crypto/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only poly1305-core.S +sha256-core.S diff --git a/arch/arm/lib/crypto/Kconfig b/arch/arm/lib/crypto/Kconfig index e8444fd0aae3..9f3ff30f4032 100644 --- a/arch/arm/lib/crypto/Kconfig +++ b/arch/arm/lib/crypto/Kconfig @@ -22,3 +22,9 @@ config CRYPTO_POLY1305_ARM tristate default CRYPTO_LIB_POLY1305 select CRYPTO_ARCH_HAVE_LIB_POLY1305 + +config CRYPTO_SHA256_ARM + tristate + depends on !CPU_V7M + default CRYPTO_LIB_SHA256 + select CRYPTO_ARCH_HAVE_LIB_SHA256 diff --git a/arch/arm/lib/crypto/Makefile b/arch/arm/lib/crypto/Makefile index 4c042a4c77ed..431f77c3ff6f 100644 --- a/arch/arm/lib/crypto/Makefile +++ b/arch/arm/lib/crypto/Makefile @@ -10,13 +10,17 @@ chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o poly1305-arm-y := poly1305-core.o poly1305-glue.o +obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o +sha256-arm-y := sha256.o sha256-core.o +sha256-arm-$(CONFIG_KERNEL_MODE_NEON) += sha256-ce.o + quiet_cmd_perl = PERL $@ cmd_perl = $(PERL) $(<) > $(@) $(obj)/%-core.S: $(src)/%-armv4.pl $(call cmd,perl) -clean-files += poly1305-core.S +clean-files += poly1305-core.S sha256-core.S aflags-thumb2-$(CONFIG_THUMB2_KERNEL) := -U__thumb2__ -D__thumb2__=1 @@ -24,3 +28,5 @@ aflags-thumb2-$(CONFIG_THUMB2_KERNEL) := -U__thumb2__ -D__thumb2__=1 poly1305-aflags-$(CONFIG_CPU_V7) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=5 poly1305-aflags-$(CONFIG_KERNEL_MODE_NEON) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=7 AFLAGS_poly1305-core.o += $(poly1305-aflags-y) $(aflags-thumb2-y) + +AFLAGS_sha256-core.o += $(aflags-thumb2-y) diff --git a/arch/arm/crypto/sha256-armv4.pl b/arch/arm/lib/crypto/sha256-armv4.pl similarity index 100% rename from arch/arm/crypto/sha256-armv4.pl rename to arch/arm/lib/crypto/sha256-armv4.pl diff --git a/arch/arm/crypto/sha2-ce-core.S b/arch/arm/lib/crypto/sha256-ce.S similarity index 91% rename from arch/arm/crypto/sha2-ce-core.S rename to arch/arm/lib/crypto/sha256-ce.S index b6369d2440a1..ac2c9b01b22d 100644 --- a/arch/arm/crypto/sha2-ce-core.S +++ b/arch/arm/lib/crypto/sha256-ce.S @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * sha2-ce-core.S - SHA-224/256 secure hash using ARMv8 Crypto Extensions + * sha256-ce.S - SHA-224/256 secure hash using ARMv8 Crypto Extensions * * Copyright (C) 2015 Linaro Ltd. * Author: Ard Biesheuvel @@ -67,10 +67,10 @@ .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 /* - * void sha2_ce_transform(struct sha256_state *sst, u8 const *src, - int blocks); + * void sha256_ce_transform(u32 state[SHA256_STATE_WORDS], + * const u8 *data, size_t nblocks); */ -ENTRY(sha2_ce_transform) +ENTRY(sha256_ce_transform) /* load state */ vld1.32 {dga-dgb}, [r0] @@ -120,4 +120,4 @@ ENTRY(sha2_ce_transform) /* store new state */ vst1.32 {dga-dgb}, [r0] bx lr -ENDPROC(sha2_ce_transform) +ENDPROC(sha256_ce_transform) diff --git a/arch/arm/lib/crypto/sha256.c b/arch/arm/lib/crypto/sha256.c new file mode 100644 index 000000000000..3a8dfc304807 --- /dev/null +++ b/arch/arm/lib/crypto/sha256.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * SHA-256 optimized for ARM + * + * Copyright 2025 Google LLC + */ +#include +#include +#include +#include +#include + +asmlinkage void sha256_block_data_order(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks); +asmlinkage void sha256_block_data_order_neon(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks); +asmlinkage void sha256_ce_transform(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_ce); + +void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks) +{ + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && + static_branch_likely(&have_neon) && crypto_simd_usable()) { + kernel_neon_begin(); + if (static_branch_likely(&have_ce)) + sha256_ce_transform(state, data, nblocks); + else + sha256_block_data_order_neon(state, data, nblocks); + kernel_neon_end(); + } else { + sha256_block_data_order(state, data, nblocks); + } +} +EXPORT_SYMBOL(sha256_blocks_arch); + +bool sha256_is_arch_optimized(void) +{ + /* We always can use at least the ARM scalar implementation. */ + return true; +} +EXPORT_SYMBOL(sha256_is_arch_optimized); + +static int __init sha256_arm_mod_init(void) +{ + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) { + static_branch_enable(&have_neon); + if (elf_hwcap2 & HWCAP2_SHA2) + static_branch_enable(&have_ce); + } + return 0; +} +arch_initcall(sha256_arm_mod_init); + +static void __exit sha256_arm_mod_exit(void) +{ +} +module_exit(sha256_arm_mod_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA-256 optimized for ARM"); -- 2.51.0 From 642cfc0680ff9aae73cd87d6fffcc84d9434938b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 28 Apr 2025 10:00:28 -0700 Subject: [PATCH 13/16] crypto: arm64/sha256 - remove obsolete chunking logic Since kernel-mode NEON sections are now preemptible on arm64, there is no longer any need to limit the length of them. Reviewed-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/arm64/crypto/sha256-glue.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c index 26f9fdfae87b..d63ea82e1374 100644 --- a/arch/arm64/crypto/sha256-glue.c +++ b/arch/arm64/crypto/sha256-glue.c @@ -86,23 +86,8 @@ static struct shash_alg algs[] = { { static int sha256_update_neon(struct shash_desc *desc, const u8 *data, unsigned int len) { - do { - unsigned int chunk = len; - - /* - * Don't hog the CPU for the entire time it takes to process all - * input when running on a preemptible kernel, but process the - * data block by block instead. - */ - if (IS_ENABLED(CONFIG_PREEMPTION)) - chunk = SHA256_BLOCK_SIZE; - - chunk -= sha256_base_do_update_blocks(desc, data, chunk, - sha256_neon_transform); - data += chunk; - len -= chunk; - } while (len >= SHA256_BLOCK_SIZE); - return len; + return sha256_base_do_update_blocks(desc, data, len, + sha256_neon_transform); } static int sha256_finup_neon(struct shash_desc *desc, const u8 *data, -- 2.51.0 From 6e36be511d2846f40c0095c408797ceef17db4f5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 28 Apr 2025 10:00:29 -0700 Subject: [PATCH 14/16] crypto: arm64/sha256 - implement library instead of shash Instead of providing crypto_shash algorithms for the arch-optimized SHA-256 code, instead implement the SHA-256 library. This is much simpler, it makes the SHA-256 library functions be arch-optimized, and it fixes the longstanding issue where the arch-optimized SHA-256 was disabled by default. SHA-256 still remains available through crypto_shash, but individual architectures no longer need to handle it. Remove support for SHA-256 finalization from the ARMv8 CE assembly code, since the library does not yet support architecture-specific overrides of the finalization. (Support for that has been omitted for now, for simplicity and because usually it isn't performance-critical.) To match sha256_blocks_arch(), change the type of the nblocks parameter of the assembly functions from int or 'unsigned int' to size_t. Update the ARMv8 CE assembly function accordingly. The scalar and NEON assembly functions actually already treated it as size_t. While renaming the assembly files, also fix the naming quirks where "sha2" meant sha256, and "sha512" meant both sha256 and sha512. Reviewed-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/arm64/configs/defconfig | 1 - arch/arm64/crypto/Kconfig | 19 --- arch/arm64/crypto/Makefile | 13 +- arch/arm64/crypto/sha2-ce-glue.c | 138 ---------------- arch/arm64/crypto/sha256-glue.c | 156 ------------------ arch/arm64/lib/crypto/.gitignore | 1 + arch/arm64/lib/crypto/Kconfig | 5 + arch/arm64/lib/crypto/Makefile | 9 +- .../crypto/sha2-armv8.pl} | 0 .../sha2-ce-core.S => lib/crypto/sha256-ce.S} | 36 +--- arch/arm64/lib/crypto/sha256.c | 75 +++++++++ 11 files changed, 98 insertions(+), 355 deletions(-) delete mode 100644 arch/arm64/crypto/sha2-ce-glue.c delete mode 100644 arch/arm64/crypto/sha256-glue.c rename arch/arm64/{crypto/sha512-armv8.pl => lib/crypto/sha2-armv8.pl} (100%) rename arch/arm64/{crypto/sha2-ce-core.S => lib/crypto/sha256-ce.S} (80%) create mode 100644 arch/arm64/lib/crypto/sha256.c diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 5bb8f09422a2..b0d4c7d173ea 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1737,7 +1737,6 @@ CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_CHACHA20_NEON=m CONFIG_CRYPTO_GHASH_ARM64_CE=y CONFIG_CRYPTO_SHA1_ARM64_CE=y -CONFIG_CRYPTO_SHA2_ARM64_CE=y CONFIG_CRYPTO_SHA512_ARM64_CE=m CONFIG_CRYPTO_SHA3_ARM64=m CONFIG_CRYPTO_SM3_ARM64_CE=m diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 55a7d87a6769..c44b0f202a1f 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -36,25 +36,6 @@ config CRYPTO_SHA1_ARM64_CE Architecture: arm64 using: - ARMv8 Crypto Extensions -config CRYPTO_SHA256_ARM64 - tristate "Hash functions: SHA-224 and SHA-256" - select CRYPTO_HASH - help - SHA-224 and SHA-256 secure hash algorithms (FIPS 180) - - Architecture: arm64 - -config CRYPTO_SHA2_ARM64_CE - tristate "Hash functions: SHA-224 and SHA-256 (ARMv8 Crypto Extensions)" - depends on KERNEL_MODE_NEON - select CRYPTO_HASH - select CRYPTO_SHA256_ARM64 - help - SHA-224 and SHA-256 secure hash algorithms (FIPS 180) - - Architecture: arm64 using: - - ARMv8 Crypto Extensions - config CRYPTO_SHA512_ARM64 tristate "Hash functions: SHA-384 and SHA-512" select CRYPTO_HASH diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 089ae3ddde81..c231c980c514 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -8,9 +8,6 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM64_CE) += sha1-ce.o sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o -obj-$(CONFIG_CRYPTO_SHA2_ARM64_CE) += sha2-ce.o -sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o - obj-$(CONFIG_CRYPTO_SHA512_ARM64_CE) += sha512-ce.o sha512-ce-y := sha512-ce-glue.o sha512-ce-core.o @@ -56,9 +53,6 @@ aes-ce-blk-y := aes-glue-ce.o aes-ce.o obj-$(CONFIG_CRYPTO_AES_ARM64_NEON_BLK) += aes-neon-blk.o aes-neon-blk-y := aes-glue-neon.o aes-neon.o -obj-$(CONFIG_CRYPTO_SHA256_ARM64) += sha256-arm64.o -sha256-arm64-y := sha256-glue.o sha256-core.o - obj-$(CONFIG_CRYPTO_SHA512_ARM64) += sha512-arm64.o sha512-arm64-y := sha512-glue.o sha512-core.o @@ -74,10 +68,7 @@ aes-neon-bs-y := aes-neonbs-core.o aes-neonbs-glue.o quiet_cmd_perlasm = PERLASM $@ cmd_perlasm = $(PERL) $(<) void $(@) -$(obj)/%-core.S: $(src)/%-armv8.pl - $(call cmd,perlasm) - -$(obj)/sha256-core.S: $(src)/sha512-armv8.pl +$(obj)/sha512-core.S: $(src)/../lib/crypto/sha2-armv8.pl $(call cmd,perlasm) -clean-files += sha256-core.S sha512-core.S +clean-files += sha512-core.S diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c deleted file mode 100644 index 912c215101eb..000000000000 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ /dev/null @@ -1,138 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions - * - * Copyright (C) 2014 - 2017 Linaro Ltd - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions"); -MODULE_AUTHOR("Ard Biesheuvel "); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS_CRYPTO("sha224"); -MODULE_ALIAS_CRYPTO("sha256"); - -struct sha256_ce_state { - struct crypto_sha256_state sst; - u32 finalize; -}; - -extern const u32 sha256_ce_offsetof_count; -extern const u32 sha256_ce_offsetof_finalize; - -asmlinkage int __sha256_ce_transform(struct sha256_ce_state *sst, u8 const *src, - int blocks); - -static void sha256_ce_transform(struct crypto_sha256_state *sst, u8 const *src, - int blocks) -{ - while (blocks) { - int rem; - - kernel_neon_begin(); - rem = __sha256_ce_transform(container_of(sst, - struct sha256_ce_state, - sst), src, blocks); - kernel_neon_end(); - src += (blocks - rem) * SHA256_BLOCK_SIZE; - blocks = rem; - } -} - -const u32 sha256_ce_offsetof_count = offsetof(struct sha256_ce_state, - sst.count); -const u32 sha256_ce_offsetof_finalize = offsetof(struct sha256_ce_state, - finalize); - -static int sha256_ce_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - struct sha256_ce_state *sctx = shash_desc_ctx(desc); - - sctx->finalize = 0; - return sha256_base_do_update_blocks(desc, data, len, - sha256_ce_transform); -} - -static int sha256_ce_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - struct sha256_ce_state *sctx = shash_desc_ctx(desc); - bool finalize = !(len % SHA256_BLOCK_SIZE) && len; - - /* - * Allow the asm code to perform the finalization if there is no - * partial data and the input is a round multiple of the block size. - */ - sctx->finalize = finalize; - - if (finalize) - sha256_base_do_update_blocks(desc, data, len, - sha256_ce_transform); - else - sha256_base_do_finup(desc, data, len, sha256_ce_transform); - return sha256_base_finish(desc, out); -} - -static int sha256_ce_digest(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - sha256_base_init(desc); - return sha256_ce_finup(desc, data, len, out); -} - -static struct shash_alg algs[] = { { - .init = sha224_base_init, - .update = sha256_ce_update, - .finup = sha256_ce_finup, - .descsize = sizeof(struct sha256_ce_state), - .statesize = sizeof(struct crypto_sha256_state), - .digestsize = SHA224_DIGEST_SIZE, - .base = { - .cra_name = "sha224", - .cra_driver_name = "sha224-ce", - .cra_priority = 200, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | - CRYPTO_AHASH_ALG_FINUP_MAX, - .cra_blocksize = SHA256_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}, { - .init = sha256_base_init, - .update = sha256_ce_update, - .finup = sha256_ce_finup, - .digest = sha256_ce_digest, - .descsize = sizeof(struct sha256_ce_state), - .statesize = sizeof(struct crypto_sha256_state), - .digestsize = SHA256_DIGEST_SIZE, - .base = { - .cra_name = "sha256", - .cra_driver_name = "sha256-ce", - .cra_priority = 200, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | - CRYPTO_AHASH_ALG_FINUP_MAX, - .cra_blocksize = SHA256_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -} }; - -static int __init sha2_ce_mod_init(void) -{ - return crypto_register_shashes(algs, ARRAY_SIZE(algs)); -} - -static void __exit sha2_ce_mod_fini(void) -{ - crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); -} - -module_cpu_feature_match(SHA2, sha2_ce_mod_init); -module_exit(sha2_ce_mod_fini); diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c deleted file mode 100644 index d63ea82e1374..000000000000 --- a/arch/arm64/crypto/sha256-glue.c +++ /dev/null @@ -1,156 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Linux/arm64 port of the OpenSSL SHA256 implementation for AArch64 - * - * Copyright (c) 2016 Linaro Ltd. - */ - -#include -#include -#include -#include -#include -#include -#include - -MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash for arm64"); -MODULE_AUTHOR("Andy Polyakov "); -MODULE_AUTHOR("Ard Biesheuvel "); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS_CRYPTO("sha224"); -MODULE_ALIAS_CRYPTO("sha256"); - -asmlinkage void sha256_block_data_order(u32 *digest, const void *data, - unsigned int num_blks); -EXPORT_SYMBOL(sha256_block_data_order); - -static void sha256_arm64_transform(struct crypto_sha256_state *sst, - u8 const *src, int blocks) -{ - sha256_block_data_order(sst->state, src, blocks); -} - -asmlinkage void sha256_block_neon(u32 *digest, const void *data, - unsigned int num_blks); - -static void sha256_neon_transform(struct crypto_sha256_state *sst, - u8 const *src, int blocks) -{ - kernel_neon_begin(); - sha256_block_neon(sst->state, src, blocks); - kernel_neon_end(); -} - -static int crypto_sha256_arm64_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - return sha256_base_do_update_blocks(desc, data, len, - sha256_arm64_transform); -} - -static int crypto_sha256_arm64_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - sha256_base_do_finup(desc, data, len, sha256_arm64_transform); - return sha256_base_finish(desc, out); -} - -static struct shash_alg algs[] = { { - .digestsize = SHA256_DIGEST_SIZE, - .init = sha256_base_init, - .update = crypto_sha256_arm64_update, - .finup = crypto_sha256_arm64_finup, - .descsize = sizeof(struct crypto_sha256_state), - .base.cra_name = "sha256", - .base.cra_driver_name = "sha256-arm64", - .base.cra_priority = 125, - .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | - CRYPTO_AHASH_ALG_FINUP_MAX, - .base.cra_blocksize = SHA256_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, -}, { - .digestsize = SHA224_DIGEST_SIZE, - .init = sha224_base_init, - .update = crypto_sha256_arm64_update, - .finup = crypto_sha256_arm64_finup, - .descsize = sizeof(struct crypto_sha256_state), - .base.cra_name = "sha224", - .base.cra_driver_name = "sha224-arm64", - .base.cra_priority = 125, - .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | - CRYPTO_AHASH_ALG_FINUP_MAX, - .base.cra_blocksize = SHA224_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, -} }; - -static int sha256_update_neon(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - return sha256_base_do_update_blocks(desc, data, len, - sha256_neon_transform); -} - -static int sha256_finup_neon(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - if (len >= SHA256_BLOCK_SIZE) { - int remain = sha256_update_neon(desc, data, len); - - data += len - remain; - len = remain; - } - sha256_base_do_finup(desc, data, len, sha256_neon_transform); - return sha256_base_finish(desc, out); -} - -static struct shash_alg neon_algs[] = { { - .digestsize = SHA256_DIGEST_SIZE, - .init = sha256_base_init, - .update = sha256_update_neon, - .finup = sha256_finup_neon, - .descsize = sizeof(struct crypto_sha256_state), - .base.cra_name = "sha256", - .base.cra_driver_name = "sha256-arm64-neon", - .base.cra_priority = 150, - .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | - CRYPTO_AHASH_ALG_FINUP_MAX, - .base.cra_blocksize = SHA256_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, -}, { - .digestsize = SHA224_DIGEST_SIZE, - .init = sha224_base_init, - .update = sha256_update_neon, - .finup = sha256_finup_neon, - .descsize = sizeof(struct crypto_sha256_state), - .base.cra_name = "sha224", - .base.cra_driver_name = "sha224-arm64-neon", - .base.cra_priority = 150, - .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | - CRYPTO_AHASH_ALG_FINUP_MAX, - .base.cra_blocksize = SHA224_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, -} }; - -static int __init sha256_mod_init(void) -{ - int ret = crypto_register_shashes(algs, ARRAY_SIZE(algs)); - if (ret) - return ret; - - if (cpu_have_named_feature(ASIMD)) { - ret = crypto_register_shashes(neon_algs, ARRAY_SIZE(neon_algs)); - if (ret) - crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); - } - return ret; -} - -static void __exit sha256_mod_fini(void) -{ - if (cpu_have_named_feature(ASIMD)) - crypto_unregister_shashes(neon_algs, ARRAY_SIZE(neon_algs)); - crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); -} - -module_init(sha256_mod_init); -module_exit(sha256_mod_fini); diff --git a/arch/arm64/lib/crypto/.gitignore b/arch/arm64/lib/crypto/.gitignore index 0d47d4f21c6d..12d74d8b03d0 100644 --- a/arch/arm64/lib/crypto/.gitignore +++ b/arch/arm64/lib/crypto/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only poly1305-core.S +sha256-core.S diff --git a/arch/arm64/lib/crypto/Kconfig b/arch/arm64/lib/crypto/Kconfig index 0b903ef524d8..49e57bfdb5b5 100644 --- a/arch/arm64/lib/crypto/Kconfig +++ b/arch/arm64/lib/crypto/Kconfig @@ -12,3 +12,8 @@ config CRYPTO_POLY1305_NEON depends on KERNEL_MODE_NEON default CRYPTO_LIB_POLY1305 select CRYPTO_ARCH_HAVE_LIB_POLY1305 + +config CRYPTO_SHA256_ARM64 + tristate + default CRYPTO_LIB_SHA256 + select CRYPTO_ARCH_HAVE_LIB_SHA256 diff --git a/arch/arm64/lib/crypto/Makefile b/arch/arm64/lib/crypto/Makefile index 6207088397a7..946c09903711 100644 --- a/arch/arm64/lib/crypto/Makefile +++ b/arch/arm64/lib/crypto/Makefile @@ -8,10 +8,17 @@ poly1305-neon-y := poly1305-core.o poly1305-glue.o AFLAGS_poly1305-core.o += -Dpoly1305_init=poly1305_block_init_arch AFLAGS_poly1305-core.o += -Dpoly1305_emit=poly1305_emit_arch +obj-$(CONFIG_CRYPTO_SHA256_ARM64) += sha256-arm64.o +sha256-arm64-y := sha256.o sha256-core.o +sha256-arm64-$(CONFIG_KERNEL_MODE_NEON) += sha256-ce.o + quiet_cmd_perlasm = PERLASM $@ cmd_perlasm = $(PERL) $(<) void $(@) $(obj)/%-core.S: $(src)/%-armv8.pl $(call cmd,perlasm) -clean-files += poly1305-core.S +$(obj)/sha256-core.S: $(src)/sha2-armv8.pl + $(call cmd,perlasm) + +clean-files += poly1305-core.S sha256-core.S diff --git a/arch/arm64/crypto/sha512-armv8.pl b/arch/arm64/lib/crypto/sha2-armv8.pl similarity index 100% rename from arch/arm64/crypto/sha512-armv8.pl rename to arch/arm64/lib/crypto/sha2-armv8.pl diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/lib/crypto/sha256-ce.S similarity index 80% rename from arch/arm64/crypto/sha2-ce-core.S rename to arch/arm64/lib/crypto/sha256-ce.S index fce84d88ddb2..a8461d6dad63 100644 --- a/arch/arm64/crypto/sha2-ce-core.S +++ b/arch/arm64/lib/crypto/sha256-ce.S @@ -71,8 +71,8 @@ .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 /* - * int __sha256_ce_transform(struct sha256_ce_state *sst, u8 const *src, - * int blocks) + * size_t __sha256_ce_transform(u32 state[SHA256_STATE_WORDS], + * const u8 *data, size_t nblocks); */ .text SYM_FUNC_START(__sha256_ce_transform) @@ -86,20 +86,16 @@ SYM_FUNC_START(__sha256_ce_transform) /* load state */ ld1 {dgav.4s, dgbv.4s}, [x0] - /* load sha256_ce_state::finalize */ - ldr_l w4, sha256_ce_offsetof_finalize, x4 - ldr w4, [x0, x4] - /* load input */ 0: ld1 {v16.4s-v19.4s}, [x1], #64 - sub w2, w2, #1 + sub x2, x2, #1 CPU_LE( rev32 v16.16b, v16.16b ) CPU_LE( rev32 v17.16b, v17.16b ) CPU_LE( rev32 v18.16b, v18.16b ) CPU_LE( rev32 v19.16b, v19.16b ) -1: add t0.4s, v16.4s, v0.4s + add t0.4s, v16.4s, v0.4s mov dg0v.16b, dgav.16b mov dg1v.16b, dgbv.16b @@ -128,30 +124,12 @@ CPU_LE( rev32 v19.16b, v19.16b ) add dgbv.4s, dgbv.4s, dg1v.4s /* handled all input blocks? */ - cbz w2, 2f + cbz x2, 1f cond_yield 3f, x5, x6 b 0b - /* - * Final block: add padding and total bit count. - * Skip if the input size was not a round multiple of the block size, - * the padding is handled by the C code in that case. - */ -2: cbz x4, 3f - ldr_l w4, sha256_ce_offsetof_count, x4 - ldr x4, [x0, x4] - movi v17.2d, #0 - mov x8, #0x80000000 - movi v18.2d, #0 - ror x7, x4, #29 // ror(lsl(x4, 3), 32) - fmov d16, x8 - mov x4, #0 - mov v19.d[0], xzr - mov v19.d[1], x7 - b 1b - /* store new state */ -3: st1 {dgav.4s, dgbv.4s}, [x0] - mov w0, w2 +1: st1 {dgav.4s, dgbv.4s}, [x0] + mov x0, x2 ret SYM_FUNC_END(__sha256_ce_transform) diff --git a/arch/arm64/lib/crypto/sha256.c b/arch/arm64/lib/crypto/sha256.c new file mode 100644 index 000000000000..2bd413c586d2 --- /dev/null +++ b/arch/arm64/lib/crypto/sha256.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * SHA-256 optimized for ARM64 + * + * Copyright 2025 Google LLC + */ +#include +#include +#include +#include +#include + +asmlinkage void sha256_block_data_order(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks); +asmlinkage void sha256_block_neon(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks); +asmlinkage size_t __sha256_ce_transform(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_ce); + +void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks) +{ + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && + static_branch_likely(&have_neon) && crypto_simd_usable()) { + if (static_branch_likely(&have_ce)) { + do { + size_t rem; + + kernel_neon_begin(); + rem = __sha256_ce_transform(state, + data, nblocks); + kernel_neon_end(); + data += (nblocks - rem) * SHA256_BLOCK_SIZE; + nblocks = rem; + } while (nblocks); + } else { + kernel_neon_begin(); + sha256_block_neon(state, data, nblocks); + kernel_neon_end(); + } + } else { + sha256_block_data_order(state, data, nblocks); + } +} +EXPORT_SYMBOL(sha256_blocks_arch); + +bool sha256_is_arch_optimized(void) +{ + /* We always can use at least the ARM64 scalar implementation. */ + return true; +} +EXPORT_SYMBOL(sha256_is_arch_optimized); + +static int __init sha256_arm64_mod_init(void) +{ + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && + cpu_have_named_feature(ASIMD)) { + static_branch_enable(&have_neon); + if (cpu_have_named_feature(SHA2)) + static_branch_enable(&have_ce); + } + return 0; +} +arch_initcall(sha256_arm64_mod_init); + +static void __exit sha256_arm64_mod_exit(void) +{ +} +module_exit(sha256_arm64_mod_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA-256 optimized for ARM64"); -- 2.51.0 From b67b6f9adb326f4a816b11bc3dabf9190070f536 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 28 Apr 2025 10:00:30 -0700 Subject: [PATCH 15/16] crypto: mips/sha256 - implement library instead of shash Instead of providing crypto_shash algorithms for the arch-optimized SHA-256 code, instead implement the SHA-256 library. This is much simpler, it makes the SHA-256 library functions be arch-optimized, and it fixes the longstanding issue where the arch-optimized SHA-256 was disabled by default. SHA-256 still remains available through crypto_shash, but individual architectures no longer need to handle it. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/mips/cavium-octeon/Kconfig | 6 + .../mips/cavium-octeon/crypto/octeon-sha256.c | 135 ++++-------------- arch/mips/configs/cavium_octeon_defconfig | 1 - arch/mips/crypto/Kconfig | 10 -- 4 files changed, 33 insertions(+), 119 deletions(-) diff --git a/arch/mips/cavium-octeon/Kconfig b/arch/mips/cavium-octeon/Kconfig index 450e979ef5d9..11f4aa6e80e9 100644 --- a/arch/mips/cavium-octeon/Kconfig +++ b/arch/mips/cavium-octeon/Kconfig @@ -23,6 +23,12 @@ config CAVIUM_OCTEON_CVMSEG_SIZE legally range is from zero to 54 cache blocks (i.e. CVMSEG LM is between zero and 6192 bytes). +config CRYPTO_SHA256_OCTEON + tristate + default CRYPTO_LIB_SHA256 + select CRYPTO_ARCH_HAVE_LIB_SHA256 + select CRYPTO_LIB_SHA256_GENERIC + endif # CPU_CAVIUM_OCTEON if CAVIUM_OCTEON_SOC diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha256.c b/arch/mips/cavium-octeon/crypto/octeon-sha256.c index 8e85ea65387c..f169054852bc 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-sha256.c +++ b/arch/mips/cavium-octeon/crypto/octeon-sha256.c @@ -1,8 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * Cryptographic API. - * - * SHA-224 and SHA-256 Secure Hash Algorithm. + * SHA-256 Secure Hash Algorithm. * * Adapted for OCTEON by Aaro Koskinen . * @@ -15,9 +13,7 @@ */ #include -#include -#include -#include +#include #include #include @@ -27,31 +23,24 @@ * We pass everything as 64-bit. OCTEON can handle misaligned data. */ -static void octeon_sha256_store_hash(struct crypto_sha256_state *sctx) +void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks) { - u64 *hash = (u64 *)sctx->state; - - write_octeon_64bit_hash_dword(hash[0], 0); - write_octeon_64bit_hash_dword(hash[1], 1); - write_octeon_64bit_hash_dword(hash[2], 2); - write_octeon_64bit_hash_dword(hash[3], 3); -} + struct octeon_cop2_state cop2_state; + u64 *state64 = (u64 *)state; + unsigned long flags; -static void octeon_sha256_read_hash(struct crypto_sha256_state *sctx) -{ - u64 *hash = (u64 *)sctx->state; + if (!octeon_has_crypto()) + return sha256_blocks_generic(state, data, nblocks); - hash[0] = read_octeon_64bit_hash_dword(0); - hash[1] = read_octeon_64bit_hash_dword(1); - hash[2] = read_octeon_64bit_hash_dword(2); - hash[3] = read_octeon_64bit_hash_dword(3); -} + flags = octeon_crypto_enable(&cop2_state); + write_octeon_64bit_hash_dword(state64[0], 0); + write_octeon_64bit_hash_dword(state64[1], 1); + write_octeon_64bit_hash_dword(state64[2], 2); + write_octeon_64bit_hash_dword(state64[3], 3); -static void octeon_sha256_transform(struct crypto_sha256_state *sctx, - const u8 *src, int blocks) -{ do { - const u64 *block = (const u64 *)src; + const u64 *block = (const u64 *)data; write_octeon_64bit_block_dword(block[0], 0); write_octeon_64bit_block_dword(block[1], 1); @@ -62,93 +51,23 @@ static void octeon_sha256_transform(struct crypto_sha256_state *sctx, write_octeon_64bit_block_dword(block[6], 6); octeon_sha256_start(block[7]); - src += SHA256_BLOCK_SIZE; - } while (--blocks); -} - -static int octeon_sha256_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - struct crypto_sha256_state *sctx = shash_desc_ctx(desc); - struct octeon_cop2_state state; - unsigned long flags; - int remain; - - flags = octeon_crypto_enable(&state); - octeon_sha256_store_hash(sctx); - - remain = sha256_base_do_update_blocks(desc, data, len, - octeon_sha256_transform); + data += SHA256_BLOCK_SIZE; + } while (--nblocks); - octeon_sha256_read_hash(sctx); - octeon_crypto_disable(&state, flags); - return remain; + state64[0] = read_octeon_64bit_hash_dword(0); + state64[1] = read_octeon_64bit_hash_dword(1); + state64[2] = read_octeon_64bit_hash_dword(2); + state64[3] = read_octeon_64bit_hash_dword(3); + octeon_crypto_disable(&cop2_state, flags); } +EXPORT_SYMBOL(sha256_blocks_arch); -static int octeon_sha256_finup(struct shash_desc *desc, const u8 *src, - unsigned int len, u8 *out) +bool sha256_is_arch_optimized(void) { - struct crypto_sha256_state *sctx = shash_desc_ctx(desc); - struct octeon_cop2_state state; - unsigned long flags; - - flags = octeon_crypto_enable(&state); - octeon_sha256_store_hash(sctx); - - sha256_base_do_finup(desc, src, len, octeon_sha256_transform); - - octeon_sha256_read_hash(sctx); - octeon_crypto_disable(&state, flags); - return sha256_base_finish(desc, out); + return octeon_has_crypto(); } - -static struct shash_alg octeon_sha256_algs[2] = { { - .digestsize = SHA256_DIGEST_SIZE, - .init = sha256_base_init, - .update = octeon_sha256_update, - .finup = octeon_sha256_finup, - .descsize = sizeof(struct crypto_sha256_state), - .base = { - .cra_name = "sha256", - .cra_driver_name= "octeon-sha256", - .cra_priority = OCTEON_CR_OPCODE_PRIORITY, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, - .cra_blocksize = SHA256_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}, { - .digestsize = SHA224_DIGEST_SIZE, - .init = sha224_base_init, - .update = octeon_sha256_update, - .finup = octeon_sha256_finup, - .descsize = sizeof(struct crypto_sha256_state), - .base = { - .cra_name = "sha224", - .cra_driver_name= "octeon-sha224", - .cra_priority = OCTEON_CR_OPCODE_PRIORITY, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, - .cra_blocksize = SHA224_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -} }; - -static int __init octeon_sha256_mod_init(void) -{ - if (!octeon_has_crypto()) - return -ENOTSUPP; - return crypto_register_shashes(octeon_sha256_algs, - ARRAY_SIZE(octeon_sha256_algs)); -} - -static void __exit octeon_sha256_mod_fini(void) -{ - crypto_unregister_shashes(octeon_sha256_algs, - ARRAY_SIZE(octeon_sha256_algs)); -} - -module_init(octeon_sha256_mod_init); -module_exit(octeon_sha256_mod_fini); +EXPORT_SYMBOL(sha256_is_arch_optimized); MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm (OCTEON)"); +MODULE_DESCRIPTION("SHA-256 Secure Hash Algorithm (OCTEON)"); MODULE_AUTHOR("Aaro Koskinen "); diff --git a/arch/mips/configs/cavium_octeon_defconfig b/arch/mips/configs/cavium_octeon_defconfig index f523ee6f25bf..88ae0aa85364 100644 --- a/arch/mips/configs/cavium_octeon_defconfig +++ b/arch/mips/configs/cavium_octeon_defconfig @@ -157,7 +157,6 @@ CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_MD5_OCTEON=y CONFIG_CRYPTO_SHA1_OCTEON=m -CONFIG_CRYPTO_SHA256_OCTEON=m CONFIG_CRYPTO_SHA512_OCTEON=m CONFIG_CRYPTO_DES=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y diff --git a/arch/mips/crypto/Kconfig b/arch/mips/crypto/Kconfig index 9db1fd6d9f0e..6bf073ae7613 100644 --- a/arch/mips/crypto/Kconfig +++ b/arch/mips/crypto/Kconfig @@ -22,16 +22,6 @@ config CRYPTO_SHA1_OCTEON Architecture: mips OCTEON -config CRYPTO_SHA256_OCTEON - tristate "Hash functions: SHA-224 and SHA-256 (OCTEON)" - depends on CPU_CAVIUM_OCTEON - select CRYPTO_SHA256 - select CRYPTO_HASH - help - SHA-224 and SHA-256 secure hash algorithms (FIPS 180) - - Architecture: mips OCTEON using crypto instructions, when available - config CRYPTO_SHA512_OCTEON tristate "Hash functions: SHA-384 and SHA-512 (OCTEON)" depends on CPU_CAVIUM_OCTEON -- 2.51.0 From 1a49c573bf886349c7bc958b331c700cf18601d8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 28 Apr 2025 10:00:31 -0700 Subject: [PATCH 16/16] crypto: powerpc/sha256 - implement library instead of shash Instead of providing crypto_shash algorithms for the arch-optimized SHA-256 code, instead implement the SHA-256 library. This is much simpler, it makes the SHA-256 library functions be arch-optimized, and it fixes the longstanding issue where the arch-optimized SHA-256 was disabled by default. SHA-256 still remains available through crypto_shash, but individual architectures no longer need to handle it. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/powerpc/crypto/Kconfig | 11 -- arch/powerpc/crypto/Makefile | 2 - arch/powerpc/crypto/sha256-spe-glue.c | 128 ------------------ arch/powerpc/lib/crypto/Kconfig | 6 + arch/powerpc/lib/crypto/Makefile | 3 + .../powerpc/{ => lib}/crypto/sha256-spe-asm.S | 0 arch/powerpc/lib/crypto/sha256.c | 70 ++++++++++ 7 files changed, 79 insertions(+), 141 deletions(-) delete mode 100644 arch/powerpc/crypto/sha256-spe-glue.c rename arch/powerpc/{ => lib}/crypto/sha256-spe-asm.S (100%) create mode 100644 arch/powerpc/lib/crypto/sha256.c diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig index 4bf7b01228e7..caaa359f4742 100644 --- a/arch/powerpc/crypto/Kconfig +++ b/arch/powerpc/crypto/Kconfig @@ -39,17 +39,6 @@ config CRYPTO_SHA1_PPC_SPE Architecture: powerpc using - SPE (Signal Processing Engine) extensions -config CRYPTO_SHA256_PPC_SPE - tristate "Hash functions: SHA-224 and SHA-256 (SPE)" - depends on SPE - select CRYPTO_SHA256 - select CRYPTO_HASH - help - SHA-224 and SHA-256 secure hash algorithms (FIPS 180) - - Architecture: powerpc using - - SPE (Signal Processing Engine) extensions - config CRYPTO_AES_PPC_SPE tristate "Ciphers: AES, modes: ECB/CBC/CTR/XTS (SPE)" depends on SPE diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile index f13aec8a1833..8c2936ae466f 100644 --- a/arch/powerpc/crypto/Makefile +++ b/arch/powerpc/crypto/Makefile @@ -9,7 +9,6 @@ obj-$(CONFIG_CRYPTO_AES_PPC_SPE) += aes-ppc-spe.o obj-$(CONFIG_CRYPTO_MD5_PPC) += md5-ppc.o obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o obj-$(CONFIG_CRYPTO_SHA1_PPC_SPE) += sha1-ppc-spe.o -obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o obj-$(CONFIG_CRYPTO_AES_GCM_P10) += aes-gcm-p10-crypto.o obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o obj-$(CONFIG_CRYPTO_CURVE25519_PPC64) += curve25519-ppc64le.o @@ -18,7 +17,6 @@ aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes- md5-ppc-y := md5-asm.o md5-glue.o sha1-powerpc-y := sha1-powerpc-asm.o sha1.o sha1-ppc-spe-y := sha1-spe-asm.o sha1-spe-glue.o -sha256-ppc-spe-y := sha256-spe-asm.o sha256-spe-glue.o aes-gcm-p10-crypto-y := aes-gcm-p10-glue.o aes-gcm-p10.o ghashp10-ppc.o aesp10-ppc.o vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o aes_xts.o ghash.o curve25519-ppc64le-y := curve25519-ppc64le-core.o curve25519-ppc64le_asm.o diff --git a/arch/powerpc/crypto/sha256-spe-glue.c b/arch/powerpc/crypto/sha256-spe-glue.c deleted file mode 100644 index 42c76bf8062d..000000000000 --- a/arch/powerpc/crypto/sha256-spe-glue.c +++ /dev/null @@ -1,128 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Glue code for SHA-256 implementation for SPE instructions (PPC) - * - * Based on generic implementation. The assembler module takes care - * about the SPE registers so it can run from interrupt context. - * - * Copyright (c) 2015 Markus Stockhausen - */ - -#include -#include -#include -#include -#include -#include -#include - -/* - * MAX_BYTES defines the number of bytes that are allowed to be processed - * between preempt_disable() and preempt_enable(). SHA256 takes ~2,000 - * operations per 64 bytes. e500 cores can issue two arithmetic instructions - * per clock cycle using one 32/64 bit unit (SU1) and one 32 bit unit (SU2). - * Thus 1KB of input data will need an estimated maximum of 18,000 cycles. - * Headroom for cache misses included. Even with the low end model clocked - * at 667 MHz this equals to a critical time window of less than 27us. - * - */ -#define MAX_BYTES 1024 - -extern void ppc_spe_sha256_transform(u32 *state, const u8 *src, u32 blocks); - -static void spe_begin(void) -{ - /* We just start SPE operations and will save SPE registers later. */ - preempt_disable(); - enable_kernel_spe(); -} - -static void spe_end(void) -{ - disable_kernel_spe(); - /* reenable preemption */ - preempt_enable(); -} - -static void ppc_spe_sha256_block(struct crypto_sha256_state *sctx, - const u8 *src, int blocks) -{ - do { - /* cut input data into smaller blocks */ - int unit = min(blocks, MAX_BYTES / SHA256_BLOCK_SIZE); - - spe_begin(); - ppc_spe_sha256_transform(sctx->state, src, unit); - spe_end(); - - src += unit * SHA256_BLOCK_SIZE; - blocks -= unit; - } while (blocks); -} - -static int ppc_spe_sha256_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - return sha256_base_do_update_blocks(desc, data, len, - ppc_spe_sha256_block); -} - -static int ppc_spe_sha256_finup(struct shash_desc *desc, const u8 *src, - unsigned int len, u8 *out) -{ - sha256_base_do_finup(desc, src, len, ppc_spe_sha256_block); - return sha256_base_finish(desc, out); -} - -static struct shash_alg algs[2] = { { - .digestsize = SHA256_DIGEST_SIZE, - .init = sha256_base_init, - .update = ppc_spe_sha256_update, - .finup = ppc_spe_sha256_finup, - .descsize = sizeof(struct crypto_sha256_state), - .base = { - .cra_name = "sha256", - .cra_driver_name= "sha256-ppc-spe", - .cra_priority = 300, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | - CRYPTO_AHASH_ALG_FINUP_MAX, - .cra_blocksize = SHA256_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}, { - .digestsize = SHA224_DIGEST_SIZE, - .init = sha224_base_init, - .update = ppc_spe_sha256_update, - .finup = ppc_spe_sha256_finup, - .descsize = sizeof(struct crypto_sha256_state), - .base = { - .cra_name = "sha224", - .cra_driver_name= "sha224-ppc-spe", - .cra_priority = 300, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | - CRYPTO_AHASH_ALG_FINUP_MAX, - .cra_blocksize = SHA224_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -} }; - -static int __init ppc_spe_sha256_mod_init(void) -{ - return crypto_register_shashes(algs, ARRAY_SIZE(algs)); -} - -static void __exit ppc_spe_sha256_mod_fini(void) -{ - crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); -} - -module_init(ppc_spe_sha256_mod_init); -module_exit(ppc_spe_sha256_mod_fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm, SPE optimized"); - -MODULE_ALIAS_CRYPTO("sha224"); -MODULE_ALIAS_CRYPTO("sha224-ppc-spe"); -MODULE_ALIAS_CRYPTO("sha256"); -MODULE_ALIAS_CRYPTO("sha256-ppc-spe"); diff --git a/arch/powerpc/lib/crypto/Kconfig b/arch/powerpc/lib/crypto/Kconfig index bf6d0ab22c27..ffa541ad6d5d 100644 --- a/arch/powerpc/lib/crypto/Kconfig +++ b/arch/powerpc/lib/crypto/Kconfig @@ -13,3 +13,9 @@ config CRYPTO_POLY1305_P10 default CRYPTO_LIB_POLY1305 select CRYPTO_ARCH_HAVE_LIB_POLY1305 select CRYPTO_LIB_POLY1305_GENERIC + +config CRYPTO_SHA256_PPC_SPE + tristate + depends on SPE + default CRYPTO_LIB_SHA256 + select CRYPTO_ARCH_HAVE_LIB_SHA256 diff --git a/arch/powerpc/lib/crypto/Makefile b/arch/powerpc/lib/crypto/Makefile index 5709ae14258a..27f231f8e334 100644 --- a/arch/powerpc/lib/crypto/Makefile +++ b/arch/powerpc/lib/crypto/Makefile @@ -5,3 +5,6 @@ chacha-p10-crypto-y := chacha-p10-glue.o chacha-p10le-8x.o obj-$(CONFIG_CRYPTO_POLY1305_P10) += poly1305-p10-crypto.o poly1305-p10-crypto-y := poly1305-p10-glue.o poly1305-p10le_64.o + +obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o +sha256-ppc-spe-y := sha256.o sha256-spe-asm.o diff --git a/arch/powerpc/crypto/sha256-spe-asm.S b/arch/powerpc/lib/crypto/sha256-spe-asm.S similarity index 100% rename from arch/powerpc/crypto/sha256-spe-asm.S rename to arch/powerpc/lib/crypto/sha256-spe-asm.S diff --git a/arch/powerpc/lib/crypto/sha256.c b/arch/powerpc/lib/crypto/sha256.c new file mode 100644 index 000000000000..c05023c5acdd --- /dev/null +++ b/arch/powerpc/lib/crypto/sha256.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * SHA-256 Secure Hash Algorithm, SPE optimized + * + * Based on generic implementation. The assembler module takes care + * about the SPE registers so it can run from interrupt context. + * + * Copyright (c) 2015 Markus Stockhausen + */ + +#include +#include +#include +#include +#include + +/* + * MAX_BYTES defines the number of bytes that are allowed to be processed + * between preempt_disable() and preempt_enable(). SHA256 takes ~2,000 + * operations per 64 bytes. e500 cores can issue two arithmetic instructions + * per clock cycle using one 32/64 bit unit (SU1) and one 32 bit unit (SU2). + * Thus 1KB of input data will need an estimated maximum of 18,000 cycles. + * Headroom for cache misses included. Even with the low end model clocked + * at 667 MHz this equals to a critical time window of less than 27us. + * + */ +#define MAX_BYTES 1024 + +extern void ppc_spe_sha256_transform(u32 *state, const u8 *src, u32 blocks); + +static void spe_begin(void) +{ + /* We just start SPE operations and will save SPE registers later. */ + preempt_disable(); + enable_kernel_spe(); +} + +static void spe_end(void) +{ + disable_kernel_spe(); + /* reenable preemption */ + preempt_enable(); +} + +void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks) +{ + do { + /* cut input data into smaller blocks */ + u32 unit = min_t(size_t, nblocks, + MAX_BYTES / SHA256_BLOCK_SIZE); + + spe_begin(); + ppc_spe_sha256_transform(state, data, unit); + spe_end(); + + data += unit * SHA256_BLOCK_SIZE; + nblocks -= unit; + } while (nblocks); +} +EXPORT_SYMBOL(sha256_blocks_arch); + +bool sha256_is_arch_optimized(void) +{ + return true; +} +EXPORT_SYMBOL(sha256_is_arch_optimized); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA-256 Secure Hash Algorithm, SPE optimized"); -- 2.51.0