From 982b72cd00b502b30abe8d48d07d7512204e1d73 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 1 Apr 2025 17:24:08 -0700 Subject: [PATCH 01/16] crypto: x86/sm4 - stop using the SIMD helper Stop wrapping skcipher and aead algorithms with the crypto SIMD helper (crypto/simd.c). The only purpose of doing so was to work around x86 not always supporting kernel-mode FPU in softirqs. Specifically, if a hardirq interrupted a task context kernel-mode FPU section and then a softirqs were run at the end of that hardirq, those softirqs could not use kernel-mode FPU. This has now been fixed. In combination with the fact that the skcipher and aead APIs only support task and softirq contexts, these can now just use kernel-mode FPU unconditionally on x86. This simplifies the code and improves performance. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/Kconfig | 2 -- arch/x86/crypto/sm4_aesni_avx2_glue.c | 31 ++++++++++----------------- arch/x86/crypto/sm4_aesni_avx_glue.c | 31 ++++++++++----------------- 3 files changed, 22 insertions(+), 42 deletions(-) diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig index 51c74a496126..afc1a05e663d 100644 --- a/arch/x86/crypto/Kconfig +++ b/arch/x86/crypto/Kconfig @@ -192,7 +192,6 @@ config CRYPTO_SM4_AESNI_AVX_X86_64 tristate "Ciphers: SM4 with modes: ECB, CBC, CTR (AES-NI/AVX)" depends on X86 && 64BIT select CRYPTO_SKCIPHER - select CRYPTO_SIMD select CRYPTO_ALGAPI select CRYPTO_SM4 help @@ -213,7 +212,6 @@ config CRYPTO_SM4_AESNI_AVX2_X86_64 tristate "Ciphers: SM4 with modes: ECB, CBC, CTR (AES-NI/AVX2)" depends on X86 && 64BIT select CRYPTO_SKCIPHER - select CRYPTO_SIMD select CRYPTO_ALGAPI select CRYPTO_SM4 select CRYPTO_SM4_AESNI_AVX_X86_64 diff --git a/arch/x86/crypto/sm4_aesni_avx2_glue.c b/arch/x86/crypto/sm4_aesni_avx2_glue.c index 1148fd4cd57f..fec0ab7a63dd 100644 --- a/arch/x86/crypto/sm4_aesni_avx2_glue.c +++ b/arch/x86/crypto/sm4_aesni_avx2_glue.c @@ -8,11 +8,10 @@ * Copyright (c) 2021 Tianjia Zhang */ +#include #include #include #include -#include -#include #include #include #include "sm4-avx.h" @@ -48,10 +47,9 @@ static int ctr_crypt(struct skcipher_request *req) static struct skcipher_alg sm4_aesni_avx2_skciphers[] = { { .base = { - .cra_name = "__ecb(sm4)", - .cra_driver_name = "__ecb-sm4-aesni-avx2", + .cra_name = "ecb(sm4)", + .cra_driver_name = "ecb-sm4-aesni-avx2", .cra_priority = 500, - .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = SM4_BLOCK_SIZE, .cra_ctxsize = sizeof(struct sm4_ctx), .cra_module = THIS_MODULE, @@ -64,10 +62,9 @@ static struct skcipher_alg sm4_aesni_avx2_skciphers[] = { .decrypt = sm4_avx_ecb_decrypt, }, { .base = { - .cra_name = "__cbc(sm4)", - .cra_driver_name = "__cbc-sm4-aesni-avx2", + .cra_name = "cbc(sm4)", + .cra_driver_name = "cbc-sm4-aesni-avx2", .cra_priority = 500, - .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = SM4_BLOCK_SIZE, .cra_ctxsize = sizeof(struct sm4_ctx), .cra_module = THIS_MODULE, @@ -81,10 +78,9 @@ static struct skcipher_alg sm4_aesni_avx2_skciphers[] = { .decrypt = cbc_decrypt, }, { .base = { - .cra_name = "__ctr(sm4)", - .cra_driver_name = "__ctr-sm4-aesni-avx2", + .cra_name = "ctr(sm4)", + .cra_driver_name = "ctr-sm4-aesni-avx2", .cra_priority = 500, - .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct sm4_ctx), .cra_module = THIS_MODULE, @@ -100,9 +96,6 @@ static struct skcipher_alg sm4_aesni_avx2_skciphers[] = { } }; -static struct simd_skcipher_alg * -simd_sm4_aesni_avx2_skciphers[ARRAY_SIZE(sm4_aesni_avx2_skciphers)]; - static int __init sm4_init(void) { const char *feature_name; @@ -121,16 +114,14 @@ static int __init sm4_init(void) return -ENODEV; } - return simd_register_skciphers_compat(sm4_aesni_avx2_skciphers, - ARRAY_SIZE(sm4_aesni_avx2_skciphers), - simd_sm4_aesni_avx2_skciphers); + return crypto_register_skciphers(sm4_aesni_avx2_skciphers, + ARRAY_SIZE(sm4_aesni_avx2_skciphers)); } static void __exit sm4_exit(void) { - simd_unregister_skciphers(sm4_aesni_avx2_skciphers, - ARRAY_SIZE(sm4_aesni_avx2_skciphers), - simd_sm4_aesni_avx2_skciphers); + crypto_unregister_skciphers(sm4_aesni_avx2_skciphers, + ARRAY_SIZE(sm4_aesni_avx2_skciphers)); } module_init(sm4_init); diff --git a/arch/x86/crypto/sm4_aesni_avx_glue.c b/arch/x86/crypto/sm4_aesni_avx_glue.c index 85b4ca78b47b..72867fc49ce8 100644 --- a/arch/x86/crypto/sm4_aesni_avx_glue.c +++ b/arch/x86/crypto/sm4_aesni_avx_glue.c @@ -8,11 +8,10 @@ * Copyright (c) 2021 Tianjia Zhang */ +#include #include #include #include -#include -#include #include #include #include "sm4-avx.h" @@ -263,10 +262,9 @@ static int ctr_crypt(struct skcipher_request *req) static struct skcipher_alg sm4_aesni_avx_skciphers[] = { { .base = { - .cra_name = "__ecb(sm4)", - .cra_driver_name = "__ecb-sm4-aesni-avx", + .cra_name = "ecb(sm4)", + .cra_driver_name = "ecb-sm4-aesni-avx", .cra_priority = 400, - .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = SM4_BLOCK_SIZE, .cra_ctxsize = sizeof(struct sm4_ctx), .cra_module = THIS_MODULE, @@ -279,10 +277,9 @@ static struct skcipher_alg sm4_aesni_avx_skciphers[] = { .decrypt = sm4_avx_ecb_decrypt, }, { .base = { - .cra_name = "__cbc(sm4)", - .cra_driver_name = "__cbc-sm4-aesni-avx", + .cra_name = "cbc(sm4)", + .cra_driver_name = "cbc-sm4-aesni-avx", .cra_priority = 400, - .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = SM4_BLOCK_SIZE, .cra_ctxsize = sizeof(struct sm4_ctx), .cra_module = THIS_MODULE, @@ -296,10 +293,9 @@ static struct skcipher_alg sm4_aesni_avx_skciphers[] = { .decrypt = cbc_decrypt, }, { .base = { - .cra_name = "__ctr(sm4)", - .cra_driver_name = "__ctr-sm4-aesni-avx", + .cra_name = "ctr(sm4)", + .cra_driver_name = "ctr-sm4-aesni-avx", .cra_priority = 400, - .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct sm4_ctx), .cra_module = THIS_MODULE, @@ -315,9 +311,6 @@ static struct skcipher_alg sm4_aesni_avx_skciphers[] = { } }; -static struct simd_skcipher_alg * -simd_sm4_aesni_avx_skciphers[ARRAY_SIZE(sm4_aesni_avx_skciphers)]; - static int __init sm4_init(void) { const char *feature_name; @@ -335,16 +328,14 @@ static int __init sm4_init(void) return -ENODEV; } - return simd_register_skciphers_compat(sm4_aesni_avx_skciphers, - ARRAY_SIZE(sm4_aesni_avx_skciphers), - simd_sm4_aesni_avx_skciphers); + return crypto_register_skciphers(sm4_aesni_avx_skciphers, + ARRAY_SIZE(sm4_aesni_avx_skciphers)); } static void __exit sm4_exit(void) { - simd_unregister_skciphers(sm4_aesni_avx_skciphers, - ARRAY_SIZE(sm4_aesni_avx_skciphers), - simd_sm4_aesni_avx_skciphers); + crypto_unregister_skciphers(sm4_aesni_avx_skciphers, + ARRAY_SIZE(sm4_aesni_avx_skciphers)); } module_init(sm4_init); -- 2.51.0 From bda5cd6e29e82ff0071f801aed75d2480d74703f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 1 Apr 2025 17:24:09 -0700 Subject: [PATCH 02/16] crypto: x86/twofish - stop using the SIMD helper Stop wrapping skcipher and aead algorithms with the crypto SIMD helper (crypto/simd.c). The only purpose of doing so was to work around x86 not always supporting kernel-mode FPU in softirqs. Specifically, if a hardirq interrupted a task context kernel-mode FPU section and then a softirqs were run at the end of that hardirq, those softirqs could not use kernel-mode FPU. This has now been fixed. In combination with the fact that the skcipher and aead APIs only support task and softirq contexts, these can now just use kernel-mode FPU unconditionally on x86. This simplifies the code and improves performance. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/Kconfig | 1 - arch/x86/crypto/twofish_avx_glue.c | 21 +++++++-------------- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig index afc1a05e663d..f9e46e83440f 100644 --- a/arch/x86/crypto/Kconfig +++ b/arch/x86/crypto/Kconfig @@ -270,7 +270,6 @@ config CRYPTO_TWOFISH_AVX_X86_64 tristate "Ciphers: Twofish with modes: ECB, CBC (AVX)" depends on X86 && 64BIT select CRYPTO_SKCIPHER - select CRYPTO_SIMD select CRYPTO_TWOFISH_COMMON select CRYPTO_TWOFISH_X86_64 select CRYPTO_TWOFISH_X86_64_3WAY diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index 3eb3440b477a..9e20db013750 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include "twofish.h" @@ -74,10 +73,9 @@ static int cbc_decrypt(struct skcipher_request *req) static struct skcipher_alg twofish_algs[] = { { - .base.cra_name = "__ecb(twofish)", - .base.cra_driver_name = "__ecb-twofish-avx", + .base.cra_name = "ecb(twofish)", + .base.cra_driver_name = "ecb-twofish-avx", .base.cra_priority = 400, - .base.cra_flags = CRYPTO_ALG_INTERNAL, .base.cra_blocksize = TF_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct twofish_ctx), .base.cra_module = THIS_MODULE, @@ -87,10 +85,9 @@ static struct skcipher_alg twofish_algs[] = { .encrypt = ecb_encrypt, .decrypt = ecb_decrypt, }, { - .base.cra_name = "__cbc(twofish)", - .base.cra_driver_name = "__cbc-twofish-avx", + .base.cra_name = "cbc(twofish)", + .base.cra_driver_name = "cbc-twofish-avx", .base.cra_priority = 400, - .base.cra_flags = CRYPTO_ALG_INTERNAL, .base.cra_blocksize = TF_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct twofish_ctx), .base.cra_module = THIS_MODULE, @@ -103,8 +100,6 @@ static struct skcipher_alg twofish_algs[] = { }, }; -static struct simd_skcipher_alg *twofish_simd_algs[ARRAY_SIZE(twofish_algs)]; - static int __init twofish_init(void) { const char *feature_name; @@ -114,15 +109,13 @@ static int __init twofish_init(void) return -ENODEV; } - return simd_register_skciphers_compat(twofish_algs, - ARRAY_SIZE(twofish_algs), - twofish_simd_algs); + return crypto_register_skciphers(twofish_algs, + ARRAY_SIZE(twofish_algs)); } static void __exit twofish_exit(void) { - simd_unregister_skciphers(twofish_algs, ARRAY_SIZE(twofish_algs), - twofish_simd_algs); + crypto_unregister_skciphers(twofish_algs, ARRAY_SIZE(twofish_algs)); } module_init(twofish_init); -- 2.51.0 From 83366bcc7cb9bb6570363bef7cf05948cb00fe4e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 2 Apr 2025 12:13:47 +0100 Subject: [PATCH 03/16] crypto: eip93 - Make read-only arrays static const Don't populate the read-only arrays sha256_init, sha224_init, sha1_init and md5_init on the stack at run time, instead make them static. Signed-off-by: Colin Ian King Reviewed-by: Antoine Tenart Signed-off-by: Herbert Xu --- .../crypto/inside-secure/eip93/eip93-hash.c | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/inside-secure/eip93/eip93-hash.c b/drivers/crypto/inside-secure/eip93/eip93-hash.c index df1b05ac5a57..ac13d90a2b7c 100644 --- a/drivers/crypto/inside-secure/eip93/eip93-hash.c +++ b/drivers/crypto/inside-secure/eip93/eip93-hash.c @@ -97,12 +97,20 @@ void eip93_hash_handle_result(struct crypto_async_request *async, int err) static void eip93_hash_init_sa_state_digest(u32 hash, u8 *digest) { - u32 sha256_init[] = { SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, - SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7 }; - u32 sha224_init[] = { SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3, - SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7 }; - u32 sha1_init[] = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }; - u32 md5_init[] = { MD5_H0, MD5_H1, MD5_H2, MD5_H3 }; + static const u32 sha256_init[] = { + SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, + SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7 + }; + static const u32 sha224_init[] = { + SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3, + SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7 + }; + static const u32 sha1_init[] = { + SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 + }; + static const u32 md5_init[] = { + MD5_H0, MD5_H1, MD5_H2, MD5_H3 + }; /* Init HASH constant */ switch (hash) { -- 2.51.0 From f98ed0dd58d9eabe5d240743fc26d4d22e202a07 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 3 Apr 2025 10:33:32 +0800 Subject: [PATCH 04/16] crypto: hash - Do not use shash in hard IRQs Update the documentation to be consistent with the fact that shash may not be used in hard IRQs. Reported-by: Eric Biggers Signed-off-by: Herbert Xu --- include/crypto/hash.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 2aa83ee0ec98..b987904afdba 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -834,7 +834,7 @@ static inline void *shash_desc_ctx(struct shash_desc *desc) * cipher handle must point to a keyed message digest cipher in order for this * function to succeed. * - * Context: Any context. + * Context: Softirq or process context. * Return: 0 if the setting of the key was successful; < 0 if an error occurred */ int crypto_shash_setkey(struct crypto_shash *tfm, const u8 *key, @@ -851,7 +851,7 @@ int crypto_shash_setkey(struct crypto_shash *tfm, const u8 *key, * crypto_shash_update and crypto_shash_final. The parameters have the same * meaning as discussed for those separate three functions. * - * Context: Any context. + * Context: Softirq or process context. * Return: 0 if the message digest creation was successful; < 0 if an error * occurred */ @@ -871,7 +871,7 @@ int crypto_shash_digest(struct shash_desc *desc, const u8 *data, * directly, and it allocates a hash descriptor on the stack internally. * Note that this stack allocation may be fairly large. * - * Context: Any context. + * Context: Softirq or process context. * Return: 0 on success; < 0 if an error occurred. */ int crypto_shash_tfm_digest(struct crypto_shash *tfm, const u8 *data, @@ -886,7 +886,7 @@ int crypto_shash_tfm_digest(struct crypto_shash *tfm, const u8 *data, * caller-allocated output buffer out which must have sufficient size (e.g. by * calling crypto_shash_descsize). * - * Context: Any context. + * Context: Softirq or process context. * Return: 0 if the export creation was successful; < 0 if an error occurred */ int crypto_shash_export(struct shash_desc *desc, void *out); @@ -900,7 +900,7 @@ int crypto_shash_export(struct shash_desc *desc, void *out); * the input buffer. That buffer should have been generated with the * crypto_ahash_export function. * - * Context: Any context. + * Context: Softirq or process context. * Return: 0 if the import was successful; < 0 if an error occurred */ int crypto_shash_import(struct shash_desc *desc, const void *in); @@ -913,7 +913,7 @@ int crypto_shash_import(struct shash_desc *desc, const void *in); * operational state handle. Any potentially existing state created by * previous operations is discarded. * - * Context: Any context. + * Context: Softirq or process context. * Return: 0 if the message digest initialization was successful; < 0 if an * error occurred */ @@ -935,7 +935,7 @@ static inline int crypto_shash_init(struct shash_desc *desc) * * Updates the message digest state of the operational state handle. * - * Context: Any context. + * Context: Softirq or process context. * Return: 0 if the message digest update was successful; < 0 if an error * occurred */ @@ -952,7 +952,7 @@ int crypto_shash_update(struct shash_desc *desc, const u8 *data, * into the output buffer. The caller must ensure that the output buffer is * large enough by using crypto_shash_digestsize. * - * Context: Any context. + * Context: Softirq or process context. * Return: 0 if the message digest creation was successful; < 0 if an error * occurred */ @@ -969,7 +969,7 @@ int crypto_shash_final(struct shash_desc *desc, u8 *out); * crypto_shash_update and crypto_shash_final. The parameters have the same * meaning as discussed for those separate functions. * - * Context: Any context. + * Context: Softirq or process context. * Return: 0 if the message digest creation was successful; < 0 if an error * occurred */ -- 2.51.0 From 9b4400215e0e0b3edad010398069fcc51659a8cc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 3 Apr 2025 12:14:50 +0800 Subject: [PATCH 05/16] crypto: x86/chacha - Remove SIMD fallback path Get rid of the fallback path as SIMD is now always usable in softirq context. Signed-off-by: Herbert Xu --- arch/x86/crypto/chacha_glue.c | 46 +++++++++-------------------------- 1 file changed, 11 insertions(+), 35 deletions(-) diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c index 8bb74a272879..6a3d60cf3192 100644 --- a/arch/x86/crypto/chacha_glue.c +++ b/arch/x86/crypto/chacha_glue.c @@ -6,9 +6,7 @@ * Copyright (C) 2015 Martin Willi */ -#include #include -#include #include #include #include @@ -35,7 +33,6 @@ asmlinkage void chacha_4block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src, asmlinkage void chacha_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src, unsigned int len, int nrounds); -static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_simd); static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx2); static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx512vl); @@ -123,23 +120,15 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src, void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) { - if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable()) { - hchacha_block_generic(state, stream, nrounds); - } else { - kernel_fpu_begin(); - hchacha_block_ssse3(state, stream, nrounds); - kernel_fpu_end(); - } + kernel_fpu_begin(); + hchacha_block_ssse3(state, stream, nrounds); + kernel_fpu_end(); } EXPORT_SYMBOL(hchacha_block_arch); void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, int nrounds) { - if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable() || - bytes <= CHACHA_BLOCK_SIZE) - return chacha_crypt_generic(state, dst, src, bytes, nrounds); - do { unsigned int todo = min_t(unsigned int, bytes, SZ_4K); @@ -171,18 +160,11 @@ static int chacha_simd_stream_xor(struct skcipher_request *req, if (nbytes < walk.total) nbytes = round_down(nbytes, walk.stride); - if (!static_branch_likely(&chacha_use_simd) || - !crypto_simd_usable()) { - chacha_crypt_generic(state, walk.dst.virt.addr, - walk.src.virt.addr, nbytes, - ctx->nrounds); - } else { - kernel_fpu_begin(); - chacha_dosimd(state, walk.dst.virt.addr, - walk.src.virt.addr, nbytes, - ctx->nrounds); - kernel_fpu_end(); - } + kernel_fpu_begin(); + chacha_dosimd(state, walk.dst.virt.addr, + walk.src.virt.addr, nbytes, + ctx->nrounds); + kernel_fpu_end(); err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } @@ -207,13 +189,9 @@ static int xchacha_simd(struct skcipher_request *req) chacha_init(state, ctx->key, req->iv); - if (req->cryptlen > CHACHA_BLOCK_SIZE && crypto_simd_usable()) { - kernel_fpu_begin(); - hchacha_block_ssse3(state, subctx.key, ctx->nrounds); - kernel_fpu_end(); - } else { - hchacha_block_generic(state, subctx.key, ctx->nrounds); - } + kernel_fpu_begin(); + hchacha_block_ssse3(state, subctx.key, ctx->nrounds); + kernel_fpu_end(); subctx.nrounds = ctx->nrounds; memcpy(&real_iv[0], req->iv + 24, 8); @@ -275,8 +253,6 @@ static int __init chacha_simd_mod_init(void) if (!boot_cpu_has(X86_FEATURE_SSSE3)) return 0; - static_branch_enable(&chacha_use_simd); - if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { -- 2.51.0 From e77fe9cce31b5db4a6b3f5fac7d69352612229c5 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 3 Apr 2025 09:19:55 +0200 Subject: [PATCH 06/16] crypto: arm/aes-ce - stop using the SIMD helper Now that ARM permits use of the NEON unit in softirq context as well as task context, there is no longer a need to rely on the SIMD helper module to construct async skciphers wrapping the sync ones, as the latter can always be called directly. So remove these wrappers and the dependency on the SIMD helper. This permits the use of these algorithms by callers that only support synchronous use. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/Kconfig | 1 - arch/arm/crypto/aes-ce-glue.c | 104 ++++------------------------------ 2 files changed, 11 insertions(+), 94 deletions(-) diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 23e4ea067ddb..2e73200b930a 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -200,7 +200,6 @@ config CRYPTO_AES_ARM_CE depends on KERNEL_MODE_NEON select CRYPTO_SKCIPHER select CRYPTO_LIB_AES - select CRYPTO_SIMD help Length-preserving ciphers: AES cipher algorithms (FIPS-197) with block cipher modes: diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c index 1cf61f51e766..00591895d540 100644 --- a/arch/arm/crypto/aes-ce-glue.c +++ b/arch/arm/crypto/aes-ce-glue.c @@ -10,8 +10,6 @@ #include #include #include -#include -#include #include #include #include @@ -418,29 +416,6 @@ static int ctr_encrypt(struct skcipher_request *req) return err; } -static void ctr_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst) -{ - struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); - unsigned long flags; - - /* - * Temporarily disable interrupts to avoid races where - * cachelines are evicted when the CPU is interrupted - * to do something else. - */ - local_irq_save(flags); - aes_encrypt(ctx, dst, src); - local_irq_restore(flags); -} - -static int ctr_encrypt_sync(struct skcipher_request *req) -{ - if (!crypto_simd_usable()) - return crypto_ctr_encrypt_walk(req, ctr_encrypt_one); - - return ctr_encrypt(req); -} - static int xts_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); @@ -586,10 +561,9 @@ static int xts_decrypt(struct skcipher_request *req) } static struct skcipher_alg aes_algs[] = { { - .base.cra_name = "__ecb(aes)", - .base.cra_driver_name = "__ecb-aes-ce", + .base.cra_name = "ecb(aes)", + .base.cra_driver_name = "ecb-aes-ce", .base.cra_priority = 300, - .base.cra_flags = CRYPTO_ALG_INTERNAL, .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct crypto_aes_ctx), .base.cra_module = THIS_MODULE, @@ -600,10 +574,9 @@ static struct skcipher_alg aes_algs[] = { { .encrypt = ecb_encrypt, .decrypt = ecb_decrypt, }, { - .base.cra_name = "__cbc(aes)", - .base.cra_driver_name = "__cbc-aes-ce", + .base.cra_name = "cbc(aes)", + .base.cra_driver_name = "cbc-aes-ce", .base.cra_priority = 300, - .base.cra_flags = CRYPTO_ALG_INTERNAL, .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct crypto_aes_ctx), .base.cra_module = THIS_MODULE, @@ -615,10 +588,9 @@ static struct skcipher_alg aes_algs[] = { { .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, }, { - .base.cra_name = "__cts(cbc(aes))", - .base.cra_driver_name = "__cts-cbc-aes-ce", + .base.cra_name = "cts(cbc(aes))", + .base.cra_driver_name = "cts-cbc-aes-ce", .base.cra_priority = 300, - .base.cra_flags = CRYPTO_ALG_INTERNAL, .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct crypto_aes_ctx), .base.cra_module = THIS_MODULE, @@ -631,10 +603,9 @@ static struct skcipher_alg aes_algs[] = { { .encrypt = cts_cbc_encrypt, .decrypt = cts_cbc_decrypt, }, { - .base.cra_name = "__ctr(aes)", - .base.cra_driver_name = "__ctr-aes-ce", + .base.cra_name = "ctr(aes)", + .base.cra_driver_name = "ctr-aes-ce", .base.cra_priority = 300, - .base.cra_flags = CRYPTO_ALG_INTERNAL, .base.cra_blocksize = 1, .base.cra_ctxsize = sizeof(struct crypto_aes_ctx), .base.cra_module = THIS_MODULE, @@ -647,25 +618,9 @@ static struct skcipher_alg aes_algs[] = { { .encrypt = ctr_encrypt, .decrypt = ctr_encrypt, }, { - .base.cra_name = "ctr(aes)", - .base.cra_driver_name = "ctr-aes-ce-sync", - .base.cra_priority = 300 - 1, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct crypto_aes_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .chunksize = AES_BLOCK_SIZE, - .setkey = ce_aes_setkey, - .encrypt = ctr_encrypt_sync, - .decrypt = ctr_encrypt_sync, -}, { - .base.cra_name = "__xts(aes)", - .base.cra_driver_name = "__xts-aes-ce", + .base.cra_name = "xts(aes)", + .base.cra_driver_name = "xts-aes-ce", .base.cra_priority = 300, - .base.cra_flags = CRYPTO_ALG_INTERNAL, .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct crypto_aes_xts_ctx), .base.cra_module = THIS_MODULE, @@ -679,51 +634,14 @@ static struct skcipher_alg aes_algs[] = { { .decrypt = xts_decrypt, } }; -static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)]; - static void aes_exit(void) { - int i; - - for (i = 0; i < ARRAY_SIZE(aes_simd_algs) && aes_simd_algs[i]; i++) - simd_skcipher_free(aes_simd_algs[i]); - crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); } static int __init aes_init(void) { - struct simd_skcipher_alg *simd; - const char *basename; - const char *algname; - const char *drvname; - int err; - int i; - - err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); - if (err) - return err; - - for (i = 0; i < ARRAY_SIZE(aes_algs); i++) { - if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL)) - continue; - - algname = aes_algs[i].base.cra_name + 2; - drvname = aes_algs[i].base.cra_driver_name + 2; - basename = aes_algs[i].base.cra_driver_name; - simd = simd_skcipher_create_compat(aes_algs + i, algname, drvname, basename); - err = PTR_ERR(simd); - if (IS_ERR(simd)) - goto unregister_simds; - - aes_simd_algs[i] = simd; - } - - return 0; - -unregister_simds: - aes_exit(); - return err; + return crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); } module_cpu_feature_match(AES, aes_init); -- 2.51.0 From 7c79bdf97802290b5dbad043a0c67719797405cc Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 3 Apr 2025 09:19:56 +0200 Subject: [PATCH 07/16] crypto: arm/aes-neonbs - stop using the SIMD helper Now that ARM permits use of the NEON unit in softirq context as well as task context, there is no longer a need to rely on the SIMD helper module to construct async skciphers wrapping the sync ones, as the latter can always be called directly. So remove these wrappers and the dependency on the SIMD helper. This permits the use of these algorithms by callers that only support synchronous use. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/Kconfig | 1 - arch/arm/crypto/aes-neonbs-glue.c | 116 +++--------------------------- 2 files changed, 9 insertions(+), 108 deletions(-) diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 2e73200b930a..be9c2e19f976 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -172,7 +172,6 @@ config CRYPTO_AES_ARM_BS select CRYPTO_AES_ARM select CRYPTO_SKCIPHER select CRYPTO_LIB_AES - select CRYPTO_SIMD help Length-preserving ciphers: AES cipher algorithms (FIPS-197) with block cipher modes: diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c index f6be80b5938b..95418df97fb4 100644 --- a/arch/arm/crypto/aes-neonbs-glue.c +++ b/arch/arm/crypto/aes-neonbs-glue.c @@ -8,8 +8,6 @@ #include #include #include -#include -#include #include #include #include @@ -59,11 +57,6 @@ struct aesbs_xts_ctx { struct crypto_aes_ctx tweak_key; }; -struct aesbs_ctr_ctx { - struct aesbs_ctx key; /* must be first member */ - struct crypto_aes_ctx fallback; -}; - static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { @@ -200,25 +193,6 @@ static int cbc_decrypt(struct skcipher_request *req) return err; } -static int aesbs_ctr_setkey_sync(struct crypto_skcipher *tfm, const u8 *in_key, - unsigned int key_len) -{ - struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); - int err; - - err = aes_expandkey(&ctx->fallback, in_key, key_len); - if (err) - return err; - - ctx->key.rounds = 6 + key_len / 4; - - kernel_neon_begin(); - aesbs_convert_key(ctx->key.rk, ctx->fallback.key_enc, ctx->key.rounds); - kernel_neon_end(); - - return 0; -} - static int ctr_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); @@ -254,21 +228,6 @@ static int ctr_encrypt(struct skcipher_request *req) return err; } -static void ctr_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst) -{ - struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); - - __aes_arm_encrypt(ctx->fallback.key_enc, ctx->key.rounds, src, dst); -} - -static int ctr_encrypt_sync(struct skcipher_request *req) -{ - if (!crypto_simd_usable()) - return crypto_ctr_encrypt_walk(req, ctr_encrypt_one); - - return ctr_encrypt(req); -} - static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { @@ -374,13 +333,12 @@ static int xts_decrypt(struct skcipher_request *req) } static struct skcipher_alg aes_algs[] = { { - .base.cra_name = "__ecb(aes)", - .base.cra_driver_name = "__ecb-aes-neonbs", + .base.cra_name = "ecb(aes)", + .base.cra_driver_name = "ecb-aes-neonbs", .base.cra_priority = 250, .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct aesbs_ctx), .base.cra_module = THIS_MODULE, - .base.cra_flags = CRYPTO_ALG_INTERNAL, .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, @@ -389,13 +347,12 @@ static struct skcipher_alg aes_algs[] = { { .encrypt = ecb_encrypt, .decrypt = ecb_decrypt, }, { - .base.cra_name = "__cbc(aes)", - .base.cra_driver_name = "__cbc-aes-neonbs", + .base.cra_name = "cbc(aes)", + .base.cra_driver_name = "cbc-aes-neonbs", .base.cra_priority = 250, .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct aesbs_cbc_ctx), .base.cra_module = THIS_MODULE, - .base.cra_flags = CRYPTO_ALG_INTERNAL, .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, @@ -405,13 +362,12 @@ static struct skcipher_alg aes_algs[] = { { .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, }, { - .base.cra_name = "__ctr(aes)", - .base.cra_driver_name = "__ctr-aes-neonbs", + .base.cra_name = "ctr(aes)", + .base.cra_driver_name = "ctr-aes-neonbs", .base.cra_priority = 250, .base.cra_blocksize = 1, .base.cra_ctxsize = sizeof(struct aesbs_ctx), .base.cra_module = THIS_MODULE, - .base.cra_flags = CRYPTO_ALG_INTERNAL, .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, @@ -422,29 +378,12 @@ static struct skcipher_alg aes_algs[] = { { .encrypt = ctr_encrypt, .decrypt = ctr_encrypt, }, { - .base.cra_name = "ctr(aes)", - .base.cra_driver_name = "ctr-aes-neonbs-sync", - .base.cra_priority = 250 - 1, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct aesbs_ctr_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .chunksize = AES_BLOCK_SIZE, - .walksize = 8 * AES_BLOCK_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = aesbs_ctr_setkey_sync, - .encrypt = ctr_encrypt_sync, - .decrypt = ctr_encrypt_sync, -}, { - .base.cra_name = "__xts(aes)", - .base.cra_driver_name = "__xts-aes-neonbs", + .base.cra_name = "xts(aes)", + .base.cra_driver_name = "xts-aes-neonbs", .base.cra_priority = 250, .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct aesbs_xts_ctx), .base.cra_module = THIS_MODULE, - .base.cra_flags = CRYPTO_ALG_INTERNAL, .min_keysize = 2 * AES_MIN_KEY_SIZE, .max_keysize = 2 * AES_MAX_KEY_SIZE, @@ -455,54 +394,17 @@ static struct skcipher_alg aes_algs[] = { { .decrypt = xts_decrypt, } }; -static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)]; - static void aes_exit(void) { - int i; - - for (i = 0; i < ARRAY_SIZE(aes_simd_algs); i++) - if (aes_simd_algs[i]) - simd_skcipher_free(aes_simd_algs[i]); - crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); } static int __init aes_init(void) { - struct simd_skcipher_alg *simd; - const char *basename; - const char *algname; - const char *drvname; - int err; - int i; - if (!(elf_hwcap & HWCAP_NEON)) return -ENODEV; - err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); - if (err) - return err; - - for (i = 0; i < ARRAY_SIZE(aes_algs); i++) { - if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL)) - continue; - - algname = aes_algs[i].base.cra_name + 2; - drvname = aes_algs[i].base.cra_driver_name + 2; - basename = aes_algs[i].base.cra_driver_name; - simd = simd_skcipher_create_compat(aes_algs + i, algname, drvname, basename); - err = PTR_ERR(simd); - if (IS_ERR(simd)) - goto unregister_simds; - - aes_simd_algs[i] = simd; - } - return 0; - -unregister_simds: - aes_exit(); - return err; + return crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); } late_initcall(aes_init); -- 2.51.0 From e0f860a1ca32e7206b5cb43812acec9ffe211a52 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 3 Apr 2025 09:19:57 +0200 Subject: [PATCH 08/16] crypto: ctr - remove unused crypto_ctr_encrypt_walk() crypto_ctr_encrypt_walk() is no longer used so remove it. Note that some existing drivers currently rely on the transitive includes of some other crypto headers so retain those for the time being. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- include/crypto/ctr.h | 47 -------------------------------------------- 1 file changed, 47 deletions(-) diff --git a/include/crypto/ctr.h b/include/crypto/ctr.h index da1ee73e9ce9..c41685874f00 100644 --- a/include/crypto/ctr.h +++ b/include/crypto/ctr.h @@ -10,56 +10,9 @@ #include #include -#include -#include #define CTR_RFC3686_NONCE_SIZE 4 #define CTR_RFC3686_IV_SIZE 8 #define CTR_RFC3686_BLOCK_SIZE 16 -static inline int crypto_ctr_encrypt_walk(struct skcipher_request *req, - void (*fn)(struct crypto_skcipher *, - const u8 *, u8 *)) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - int blocksize = crypto_skcipher_chunksize(tfm); - u8 buf[MAX_CIPHER_BLOCKSIZE]; - struct skcipher_walk walk; - int err; - - /* avoid integer division due to variable blocksize parameter */ - if (WARN_ON_ONCE(!is_power_of_2(blocksize))) - return -EINVAL; - - err = skcipher_walk_virt(&walk, req, false); - - while (walk.nbytes > 0) { - const u8 *src = walk.src.virt.addr; - u8 *dst = walk.dst.virt.addr; - int nbytes = walk.nbytes; - int tail = 0; - - if (nbytes < walk.total) { - tail = walk.nbytes & (blocksize - 1); - nbytes -= tail; - } - - do { - int bsize = min(nbytes, blocksize); - - fn(tfm, walk.iv, buf); - - crypto_xor_cpy(dst, src, buf, bsize); - crypto_inc(walk.iv, blocksize); - - dst += bsize; - src += bsize; - nbytes -= bsize; - } while (nbytes > 0); - - err = skcipher_walk_done(&walk, tail); - } - return err; -} - #endif /* _CRYPTO_CTR_H */ -- 2.51.0 From d0d9d00b092243b802dad6f23f8b2e8245e1c96b Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Thu, 3 Apr 2025 21:07:28 +0100 Subject: [PATCH 09/16] crypto: qat - switch to standard pattern for PCI IDs Update the names of the defines for PCI IDs to follow the standard naming convention `PCI_DEVICE_ID_`. Also drop the unnecessary inner comma from the pci_device_id tables that use these definitions. This does not introduce any functional change. Signed-off-by: Giovanni Cabiddu Suggested-by: Andy Shevchenko Reviewed-by: Andy Shevchenko Signed-off-by: Herbert Xu --- drivers/crypto/intel/qat/qat_420xx/adf_drv.c | 2 +- .../crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c | 4 ++-- drivers/crypto/intel/qat/qat_4xxx/adf_drv.c | 6 +++--- .../intel/qat/qat_common/adf_accel_devices.h | 16 ++++++++-------- drivers/crypto/intel/qat/qat_common/qat_hal.c | 10 +++++----- drivers/crypto/intel/qat/qat_common/qat_uclo.c | 8 ++++---- 6 files changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/crypto/intel/qat/qat_420xx/adf_drv.c b/drivers/crypto/intel/qat/qat_420xx/adf_drv.c index b4731f02deb8..cfa00daeb4fb 100644 --- a/drivers/crypto/intel/qat/qat_420xx/adf_drv.c +++ b/drivers/crypto/intel/qat/qat_420xx/adf_drv.c @@ -14,7 +14,7 @@ #include "adf_420xx_hw_data.h" static const struct pci_device_id adf_pci_tbl[] = { - { PCI_VDEVICE(INTEL, ADF_420XX_PCI_DEVICE_ID), }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_QAT_420XX) }, { } }; MODULE_DEVICE_TABLE(pci, adf_pci_tbl); diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c index 63c7c8e90cca..7d4c366aa8b2 100644 --- a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -421,13 +421,13 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 dev_id) hw_data->admin_ae_mask = ADF_4XXX_ADMIN_AE_MASK; hw_data->num_rps = ADF_GEN4_MAX_RPS; switch (dev_id) { - case ADF_402XX_PCI_DEVICE_ID: + case PCI_DEVICE_ID_INTEL_QAT_402XX: hw_data->fw_name = ADF_402XX_FW; hw_data->fw_mmp_name = ADF_402XX_MMP; hw_data->uof_get_name = uof_get_name_402xx; hw_data->get_ena_thd_mask = get_ena_thd_mask; break; - case ADF_401XX_PCI_DEVICE_ID: + case PCI_DEVICE_ID_INTEL_QAT_401XX: hw_data->fw_name = ADF_4XXX_FW; hw_data->fw_mmp_name = ADF_4XXX_MMP; hw_data->uof_get_name = uof_get_name_4xxx; diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c b/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c index 1ac415ef3c31..c9be5dcddb27 100644 --- a/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c +++ b/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c @@ -14,9 +14,9 @@ #include "adf_4xxx_hw_data.h" static const struct pci_device_id adf_pci_tbl[] = { - { PCI_VDEVICE(INTEL, ADF_4XXX_PCI_DEVICE_ID), }, - { PCI_VDEVICE(INTEL, ADF_401XX_PCI_DEVICE_ID), }, - { PCI_VDEVICE(INTEL, ADF_402XX_PCI_DEVICE_ID), }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_QAT_4XXX) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_QAT_401XX) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_QAT_402XX) }, { } }; MODULE_DEVICE_TABLE(pci, adf_pci_tbl); diff --git a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h index dc21551153cb..1e301a20c244 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h @@ -25,14 +25,14 @@ #define ADF_C3XXXVF_DEVICE_NAME "c3xxxvf" #define ADF_4XXX_DEVICE_NAME "4xxx" #define ADF_420XX_DEVICE_NAME "420xx" -#define ADF_4XXX_PCI_DEVICE_ID 0x4940 -#define ADF_4XXXIOV_PCI_DEVICE_ID 0x4941 -#define ADF_401XX_PCI_DEVICE_ID 0x4942 -#define ADF_401XXIOV_PCI_DEVICE_ID 0x4943 -#define ADF_402XX_PCI_DEVICE_ID 0x4944 -#define ADF_402XXIOV_PCI_DEVICE_ID 0x4945 -#define ADF_420XX_PCI_DEVICE_ID 0x4946 -#define ADF_420XXIOV_PCI_DEVICE_ID 0x4947 +#define PCI_DEVICE_ID_INTEL_QAT_4XXX 0x4940 +#define PCI_DEVICE_ID_INTEL_QAT_4XXXIOV 0x4941 +#define PCI_DEVICE_ID_INTEL_QAT_401XX 0x4942 +#define PCI_DEVICE_ID_INTEL_QAT_401XXIOV 0x4943 +#define PCI_DEVICE_ID_INTEL_QAT_402XX 0x4944 +#define PCI_DEVICE_ID_INTEL_QAT_402XXIOV 0x4945 +#define PCI_DEVICE_ID_INTEL_QAT_420XX 0x4946 +#define PCI_DEVICE_ID_INTEL_QAT_420XXIOV 0x4947 #define ADF_DEVICE_FUSECTL_OFFSET 0x40 #define ADF_DEVICE_LEGFUSE_OFFSET 0x4C #define ADF_DEVICE_FUSECTL_MASK 0x80000000 diff --git a/drivers/crypto/intel/qat/qat_common/qat_hal.c b/drivers/crypto/intel/qat/qat_common/qat_hal.c index ef8a9cf74f0c..841c1d7d3ffe 100644 --- a/drivers/crypto/intel/qat/qat_common/qat_hal.c +++ b/drivers/crypto/intel/qat/qat_common/qat_hal.c @@ -694,16 +694,16 @@ static int qat_hal_chip_init(struct icp_qat_fw_loader_handle *handle, handle->pci_dev = pci_info->pci_dev; switch (handle->pci_dev->device) { - case ADF_4XXX_PCI_DEVICE_ID: - case ADF_401XX_PCI_DEVICE_ID: - case ADF_402XX_PCI_DEVICE_ID: - case ADF_420XX_PCI_DEVICE_ID: + case PCI_DEVICE_ID_INTEL_QAT_4XXX: + case PCI_DEVICE_ID_INTEL_QAT_401XX: + case PCI_DEVICE_ID_INTEL_QAT_402XX: + case PCI_DEVICE_ID_INTEL_QAT_420XX: handle->chip_info->mmp_sram_size = 0; handle->chip_info->nn = false; handle->chip_info->lm2lm3 = true; handle->chip_info->lm_size = ICP_QAT_UCLO_MAX_LMEM_REG_2X; handle->chip_info->icp_rst_csr = ICP_RESET_CPP0; - if (handle->pci_dev->device == ADF_420XX_PCI_DEVICE_ID) + if (handle->pci_dev->device == PCI_DEVICE_ID_INTEL_QAT_420XX) handle->chip_info->icp_rst_mask = 0x100155; else handle->chip_info->icp_rst_mask = 0x100015; diff --git a/drivers/crypto/intel/qat/qat_common/qat_uclo.c b/drivers/crypto/intel/qat/qat_common/qat_uclo.c index 87e247ac1c9a..620300e70238 100644 --- a/drivers/crypto/intel/qat/qat_common/qat_uclo.c +++ b/drivers/crypto/intel/qat/qat_common/qat_uclo.c @@ -732,10 +732,10 @@ qat_uclo_get_dev_type(struct icp_qat_fw_loader_handle *handle) return ICP_QAT_AC_C62X_DEV_TYPE; case PCI_DEVICE_ID_INTEL_QAT_C3XXX: return ICP_QAT_AC_C3XXX_DEV_TYPE; - case ADF_4XXX_PCI_DEVICE_ID: - case ADF_401XX_PCI_DEVICE_ID: - case ADF_402XX_PCI_DEVICE_ID: - case ADF_420XX_PCI_DEVICE_ID: + case PCI_DEVICE_ID_INTEL_QAT_4XXX: + case PCI_DEVICE_ID_INTEL_QAT_401XX: + case PCI_DEVICE_ID_INTEL_QAT_402XX: + case PCI_DEVICE_ID_INTEL_QAT_420XX: return ICP_QAT_AC_4XXX_A_DEV_TYPE; default: pr_err("QAT: unsupported device 0x%x\n", -- 2.51.0 From 984f835009d61316b7a76e547fbe636770c8342d Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Fri, 4 Apr 2025 09:41:00 +0200 Subject: [PATCH 10/16] crypto: x86 - Remove CONFIG_AS_SHA1_NI Current minimum required version of binutils is 2.25, which supports SHA-1 instruction mnemonics. Remove check for assembler support of SHA-1 instructions and all relevant macros for conditional compilation. No functional change intended. Signed-off-by: Uros Bizjak Cc: Herbert Xu Cc: "David S. Miller" Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: "H. Peter Anvin" Reviewed-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/Kconfig.assembler | 5 ----- arch/x86/crypto/Makefile | 3 +-- arch/x86/crypto/sha1_ssse3_glue.c | 10 ---------- 3 files changed, 1 insertion(+), 17 deletions(-) diff --git a/arch/x86/Kconfig.assembler b/arch/x86/Kconfig.assembler index 6d20a6ce0507..70fe87bb1055 100644 --- a/arch/x86/Kconfig.assembler +++ b/arch/x86/Kconfig.assembler @@ -6,11 +6,6 @@ config AS_AVX512 help Supported by binutils >= 2.25 and LLVM integrated assembler -config AS_SHA1_NI - def_bool $(as-instr,sha1msg1 %xmm0$(comma)%xmm1) - help - Supported by binutils >= 2.24 and LLVM integrated assembler - config AS_SHA256_NI def_bool $(as-instr,sha256msg1 %xmm0$(comma)%xmm1) help diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 5d19f41bde58..97c1dbc3b7d6 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -56,8 +56,7 @@ aesni-intel-$(CONFIG_64BIT) += aes-gcm-avx10-x86_64.o endif obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o -sha1-ssse3-y := sha1_avx2_x86_64_asm.o sha1_ssse3_asm.o sha1_ssse3_glue.o -sha1-ssse3-$(CONFIG_AS_SHA1_NI) += sha1_ni_asm.o +sha1-ssse3-y := sha1_avx2_x86_64_asm.o sha1_ssse3_asm.o sha1_ni_asm.o sha1_ssse3_glue.o obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index ab8bc54f254d..abb793cbad01 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -28,9 +28,7 @@ #include static const struct x86_cpu_id module_cpu_ids[] = { -#ifdef CONFIG_AS_SHA1_NI X86_MATCH_FEATURE(X86_FEATURE_SHA_NI, NULL), -#endif X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL), X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL), X86_MATCH_FEATURE(X86_FEATURE_SSSE3, NULL), @@ -256,7 +254,6 @@ static void unregister_sha1_avx2(void) crypto_unregister_shash(&sha1_avx2_alg); } -#ifdef CONFIG_AS_SHA1_NI asmlinkage void sha1_ni_transform(struct sha1_state *digest, const u8 *data, int rounds); @@ -306,11 +303,6 @@ static void unregister_sha1_ni(void) crypto_unregister_shash(&sha1_ni_alg); } -#else -static inline int register_sha1_ni(void) { return 0; } -static inline void unregister_sha1_ni(void) { } -#endif - static int __init sha1_ssse3_mod_init(void) { if (!x86_match_cpu(module_cpu_ids)) @@ -360,6 +352,4 @@ MODULE_ALIAS_CRYPTO("sha1"); MODULE_ALIAS_CRYPTO("sha1-ssse3"); MODULE_ALIAS_CRYPTO("sha1-avx"); MODULE_ALIAS_CRYPTO("sha1-avx2"); -#ifdef CONFIG_AS_SHA1_NI MODULE_ALIAS_CRYPTO("sha1-ni"); -#endif -- 2.51.0 From d032a27e8fe9e22ef8d70dd50ee36d0a5de12198 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Fri, 4 Apr 2025 09:41:01 +0200 Subject: [PATCH 11/16] crypto: x86 - Remove CONFIG_AS_SHA256_NI Current minimum required version of binutils is 2.25, which supports SHA-256 instruction mnemonics. Remove check for assembler support of SHA-256 instructions and all relevant macros for conditional compilation. No functional change intended. Signed-off-by: Uros Bizjak Cc: Herbert Xu Cc: "David S. Miller" Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: "H. Peter Anvin" Reviewed-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/Kconfig.assembler | 4 ---- arch/x86/crypto/Makefile | 3 +-- arch/x86/crypto/sha256_ssse3_glue.c | 10 ---------- 3 files changed, 1 insertion(+), 16 deletions(-) diff --git a/arch/x86/Kconfig.assembler b/arch/x86/Kconfig.assembler index 70fe87bb1055..4d06fd3c8dfe 100644 --- a/arch/x86/Kconfig.assembler +++ b/arch/x86/Kconfig.assembler @@ -6,10 +6,6 @@ config AS_AVX512 help Supported by binutils >= 2.25 and LLVM integrated assembler -config AS_SHA256_NI - def_bool $(as-instr,sha256msg1 %xmm0$(comma)%xmm1) - help - Supported by binutils >= 2.24 and LLVM integrated assembler config AS_TPAUSE def_bool $(as-instr,tpause %ecx) help diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 97c1dbc3b7d6..e06b739176c9 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -59,8 +59,7 @@ obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o sha1-ssse3-y := sha1_avx2_x86_64_asm.o sha1_ssse3_asm.o sha1_ni_asm.o sha1_ssse3_glue.o obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o -sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o -sha256-ssse3-$(CONFIG_AS_SHA256_NI) += sha256_ni_asm.o +sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ni_asm.o sha256_ssse3_glue.o obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index e04a43d9f7d5..429a3cefbab4 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -45,9 +45,7 @@ asmlinkage void sha256_transform_ssse3(struct sha256_state *state, const u8 *data, int blocks); static const struct x86_cpu_id module_cpu_ids[] = { -#ifdef CONFIG_AS_SHA256_NI X86_MATCH_FEATURE(X86_FEATURE_SHA_NI, NULL), -#endif X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL), X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL), X86_MATCH_FEATURE(X86_FEATURE_SSSE3, NULL), @@ -329,7 +327,6 @@ static void unregister_sha256_avx2(void) ARRAY_SIZE(sha256_avx2_algs)); } -#ifdef CONFIG_AS_SHA256_NI asmlinkage void sha256_ni_transform(struct sha256_state *digest, const u8 *data, int rounds); @@ -403,11 +400,6 @@ static void unregister_sha256_ni(void) ARRAY_SIZE(sha256_ni_algs)); } -#else -static inline int register_sha256_ni(void) { return 0; } -static inline void unregister_sha256_ni(void) { } -#endif - static int __init sha256_ssse3_mod_init(void) { if (!x86_match_cpu(module_cpu_ids)) @@ -461,7 +453,5 @@ MODULE_ALIAS_CRYPTO("sha224"); MODULE_ALIAS_CRYPTO("sha224-ssse3"); MODULE_ALIAS_CRYPTO("sha224-avx"); MODULE_ALIAS_CRYPTO("sha224-avx2"); -#ifdef CONFIG_AS_SHA256_NI MODULE_ALIAS_CRYPTO("sha256-ni"); MODULE_ALIAS_CRYPTO("sha224-ni"); -#endif -- 2.51.0 From bc23fe6dc172b09778ce3d75a9157decd153f4ef Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Fri, 4 Apr 2025 09:41:02 +0200 Subject: [PATCH 12/16] crypto: x86 - Remove CONFIG_AS_AVX512 handling Current minimum required version of binutils is 2.25, which supports AVX-512 instruction mnemonics. Remove check for assembler support of AVX-512 instructions and all relevant macros for conditional compilation. No functional change intended. Signed-off-by: Uros Bizjak Cc: Herbert Xu Cc: "David S. Miller" Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: "H. Peter Anvin" Reviewed-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/Kconfig | 2 +- arch/x86/crypto/Makefile | 3 +-- arch/x86/crypto/blake2s-core.S | 4 ---- arch/x86/crypto/blake2s-glue.c | 6 ++---- arch/x86/crypto/chacha_glue.c | 6 ++---- arch/x86/crypto/poly1305-x86_64-cryptogams.pl | 8 -------- arch/x86/crypto/poly1305_glue.c | 6 +++--- 7 files changed, 9 insertions(+), 26 deletions(-) diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig index f9e46e83440f..2fae1674f756 100644 --- a/arch/x86/crypto/Kconfig +++ b/arch/x86/crypto/Kconfig @@ -320,7 +320,7 @@ config CRYPTO_ARIA_AESNI_AVX2_X86_64 config CRYPTO_ARIA_GFNI_AVX512_X86_64 tristate "Ciphers: ARIA with modes: ECB, CTR (AVX512/GFNI)" - depends on X86 && 64BIT && AS_AVX512 && AS_GFNI + depends on X86 && 64BIT && AS_GFNI select CRYPTO_SKCIPHER select CRYPTO_ALGAPI select CRYPTO_ARIA diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index e06b739176c9..2f22b381f244 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -43,8 +43,7 @@ obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha-x86_64.o -chacha-x86_64-y := chacha-avx2-x86_64.o chacha-ssse3-x86_64.o chacha_glue.o -chacha-x86_64-$(CONFIG_AS_AVX512) += chacha-avx512vl-x86_64.o +chacha-x86_64-y := chacha-avx2-x86_64.o chacha-ssse3-x86_64.o chacha-avx512vl-x86_64.o chacha_glue.o obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o diff --git a/arch/x86/crypto/blake2s-core.S b/arch/x86/crypto/blake2s-core.S index b50b35ff1fdb..ac1c845445a4 100644 --- a/arch/x86/crypto/blake2s-core.S +++ b/arch/x86/crypto/blake2s-core.S @@ -29,7 +29,6 @@ SIGMA: .byte 13, 7, 12, 3, 11, 14, 1, 9, 2, 5, 15, 8, 10, 0, 4, 6 .byte 6, 14, 11, 0, 15, 9, 3, 8, 10, 12, 13, 1, 5, 2, 7, 4 .byte 10, 8, 7, 1, 2, 4, 6, 5, 13, 15, 9, 3, 0, 11, 14, 12 -#ifdef CONFIG_AS_AVX512 .section .rodata.cst64.BLAKE2S_SIGMA2, "aM", @progbits, 640 .align 64 SIGMA2: @@ -43,7 +42,6 @@ SIGMA2: .long 6, 13, 0, 14, 12, 2, 1, 11, 15, 4, 5, 8, 7, 9, 3, 10 .long 15, 5, 4, 13, 10, 7, 3, 11, 12, 2, 0, 6, 9, 8, 1, 14 .long 8, 7, 14, 11, 13, 15, 0, 12, 10, 4, 5, 6, 3, 2, 1, 9 -#endif /* CONFIG_AS_AVX512 */ .text SYM_FUNC_START(blake2s_compress_ssse3) @@ -174,7 +172,6 @@ SYM_FUNC_START(blake2s_compress_ssse3) RET SYM_FUNC_END(blake2s_compress_ssse3) -#ifdef CONFIG_AS_AVX512 SYM_FUNC_START(blake2s_compress_avx512) vmovdqu (%rdi),%xmm0 vmovdqu 0x10(%rdi),%xmm1 @@ -253,4 +250,3 @@ SYM_FUNC_START(blake2s_compress_avx512) vzeroupper RET SYM_FUNC_END(blake2s_compress_avx512) -#endif /* CONFIG_AS_AVX512 */ diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c index 0313f9673f56..00f84f29cc8c 100644 --- a/arch/x86/crypto/blake2s-glue.c +++ b/arch/x86/crypto/blake2s-glue.c @@ -41,8 +41,7 @@ void blake2s_compress(struct blake2s_state *state, const u8 *block, SZ_4K / BLAKE2S_BLOCK_SIZE); kernel_fpu_begin(); - if (IS_ENABLED(CONFIG_AS_AVX512) && - static_branch_likely(&blake2s_use_avx512)) + if (static_branch_likely(&blake2s_use_avx512)) blake2s_compress_avx512(state, block, blocks, inc); else blake2s_compress_ssse3(state, block, blocks, inc); @@ -59,8 +58,7 @@ static int __init blake2s_mod_init(void) if (boot_cpu_has(X86_FEATURE_SSSE3)) static_branch_enable(&blake2s_use_ssse3); - if (IS_ENABLED(CONFIG_AS_AVX512) && - boot_cpu_has(X86_FEATURE_AVX) && + if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX512F) && boot_cpu_has(X86_FEATURE_AVX512VL) && diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c index 6a3d60cf3192..946c306f60cd 100644 --- a/arch/x86/crypto/chacha_glue.c +++ b/arch/x86/crypto/chacha_glue.c @@ -45,8 +45,7 @@ static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks) static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, int nrounds) { - if (IS_ENABLED(CONFIG_AS_AVX512) && - static_branch_likely(&chacha_use_avx512vl)) { + if (static_branch_likely(&chacha_use_avx512vl)) { while (bytes >= CHACHA_BLOCK_SIZE * 8) { chacha_8block_xor_avx512vl(state, dst, src, bytes, nrounds); @@ -258,8 +257,7 @@ static int __init chacha_simd_mod_init(void) cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { static_branch_enable(&chacha_use_avx2); - if (IS_ENABLED(CONFIG_AS_AVX512) && - boot_cpu_has(X86_FEATURE_AVX512VL) && + if (boot_cpu_has(X86_FEATURE_AVX512VL) && boot_cpu_has(X86_FEATURE_AVX512BW)) /* kmovq */ static_branch_enable(&chacha_use_avx512vl); } diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl index b9abcd79c1f4..409ec6955733 100644 --- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl +++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl @@ -2811,18 +2811,10 @@ if ($avx>2) { # reason stack layout is kept identical to poly1305_blocks_avx2. If not # for this tail, we wouldn't have to even allocate stack frame... -if($kernel) { - $code .= "#ifdef CONFIG_AS_AVX512\n"; -} - &declare_function("poly1305_blocks_avx512", 32, 4); poly1305_blocks_avxN(1); &end_function("poly1305_blocks_avx512"); -if ($kernel) { - $code .= "#endif\n"; -} - if (!$kernel && $avx>3) { ######################################################################## # VPMADD52 version using 2^44 radix. diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c index 08ff4b489f7e..8b5593c46da7 100644 --- a/arch/x86/crypto/poly1305_glue.c +++ b/arch/x86/crypto/poly1305_glue.c @@ -107,7 +107,7 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len, const size_t bytes = min_t(size_t, len, SZ_4K); kernel_fpu_begin(); - if (IS_ENABLED(CONFIG_AS_AVX512) && static_branch_likely(&poly1305_use_avx512)) + if (static_branch_likely(&poly1305_use_avx512)) poly1305_blocks_avx512(ctx, inp, bytes, padbit); else if (static_branch_likely(&poly1305_use_avx2)) poly1305_blocks_avx2(ctx, inp, bytes, padbit); @@ -265,8 +265,8 @@ static int __init poly1305_simd_mod_init(void) if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) static_branch_enable(&poly1305_use_avx2); - if (IS_ENABLED(CONFIG_AS_AVX512) && boot_cpu_has(X86_FEATURE_AVX) && - boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX512F) && + if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && + boot_cpu_has(X86_FEATURE_AVX512F) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MASK_AVX512, NULL) && /* Skylake downclocks unacceptably much when using zmm, but later generations are fast. */ boot_cpu_data.x86_vfm != INTEL_SKYLAKE_X) -- 2.51.0 From 570ef50a15d7caa4d63d66f1e28d967065733f84 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 4 Apr 2025 21:09:30 -0700 Subject: [PATCH 13/16] crypto: x86/aes-xts - optimize _compute_first_set_of_tweaks for AVX-512 Optimize the AVX-512 version of _compute_first_set_of_tweaks by using vectorized shifts to compute the first vector of tweak blocks, and by using byte-aligned shifts when multiplying by x^8. AES-XTS performance on AMD Ryzen 9 9950X (Zen 5) improves by about 2% for 4096-byte messages or 6% for 512-byte messages. AES-XTS performance on Intel Sapphire Rapids improves by about 1% for 4096-byte messages or 3% for 512-byte messages. Code size decreases by 75 bytes which outweighs the increase in rodata size of 16 bytes. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/aes-xts-avx-x86_64.S | 90 +++++++++++++++++++--------- 1 file changed, 62 insertions(+), 28 deletions(-) diff --git a/arch/x86/crypto/aes-xts-avx-x86_64.S b/arch/x86/crypto/aes-xts-avx-x86_64.S index bbeaccbd1c51..db79cdf81588 100644 --- a/arch/x86/crypto/aes-xts-avx-x86_64.S +++ b/arch/x86/crypto/aes-xts-avx-x86_64.S @@ -100,6 +100,17 @@ // exists when there's a carry out of the low 64 bits of the tweak. .quad 0x87, 1 + // These are the shift amounts that are needed when multiplying by [x^0, + // x^1, x^2, x^3] to compute the first vector of tweaks when VL=64. + // + // The right shifts by 64 are expected to zeroize the destination. + // 'vpsrlvq' is indeed defined to do that; i.e. it doesn't truncate the + // amount to 64 & 63 = 0 like the 'shr' scalar shift instruction would. +.Lrshift_amounts: + .byte 64, 64, 63, 63, 62, 62, 61, 61 +.Llshift_amounts: + .byte 0, 0, 1, 1, 2, 2, 3, 3 + // This table contains constants for vpshufb and vpblendvb, used to // handle variable byte shifts and blending during ciphertext stealing // on CPUs that don't support AVX512-style masking. @@ -294,52 +305,75 @@ // Given the first XTS tweak at (TWEAK), compute the first set of tweaks and // store them in the vector registers TWEAK0-TWEAK3. Clobbers V0-V5. .macro _compute_first_set_of_tweaks - vmovdqu (TWEAK), TWEAK0_XMM - _vbroadcast128 .Lgf_poly(%rip), GF_POLY .if VL == 16 - // With VL=16, multiplying by x serially is fastest. + vmovdqu (TWEAK), TWEAK0_XMM + vmovdqu .Lgf_poly(%rip), GF_POLY _next_tweak TWEAK0, %xmm0, TWEAK1 _next_tweak TWEAK1, %xmm0, TWEAK2 _next_tweak TWEAK2, %xmm0, TWEAK3 -.else -.if VL == 32 - // Compute the second block of TWEAK0. +.elseif VL == 32 + vmovdqu (TWEAK), TWEAK0_XMM + vbroadcasti128 .Lgf_poly(%rip), GF_POLY + + // Compute the first vector of tweaks. _next_tweak TWEAK0_XMM, %xmm0, %xmm1 vinserti128 $1, %xmm1, TWEAK0, TWEAK0 -.elseif VL == 64 - // Compute the remaining blocks of TWEAK0. - _next_tweak TWEAK0_XMM, %xmm0, %xmm1 - _next_tweak %xmm1, %xmm0, %xmm2 - _next_tweak %xmm2, %xmm0, %xmm3 - vinserti32x4 $1, %xmm1, TWEAK0, TWEAK0 - vinserti32x4 $2, %xmm2, TWEAK0, TWEAK0 - vinserti32x4 $3, %xmm3, TWEAK0, TWEAK0 -.endif - // Compute TWEAK[1-3] from TWEAK0. - vpsrlq $64 - 1*VL/16, TWEAK0, V0 - vpsrlq $64 - 2*VL/16, TWEAK0, V2 - vpsrlq $64 - 3*VL/16, TWEAK0, V4 + + // Compute the next three vectors of tweaks: + // TWEAK1 = TWEAK0 * [x^2, x^2] + // TWEAK2 = TWEAK0 * [x^4, x^4] + // TWEAK3 = TWEAK0 * [x^6, x^6] + vpsrlq $64 - 2, TWEAK0, V0 + vpsrlq $64 - 4, TWEAK0, V2 + vpsrlq $64 - 6, TWEAK0, V4 vpclmulqdq $0x01, GF_POLY, V0, V1 vpclmulqdq $0x01, GF_POLY, V2, V3 vpclmulqdq $0x01, GF_POLY, V4, V5 vpslldq $8, V0, V0 vpslldq $8, V2, V2 vpslldq $8, V4, V4 - vpsllq $1*VL/16, TWEAK0, TWEAK1 - vpsllq $2*VL/16, TWEAK0, TWEAK2 - vpsllq $3*VL/16, TWEAK0, TWEAK3 -.if USE_AVX512 - vpternlogd $0x96, V0, V1, TWEAK1 - vpternlogd $0x96, V2, V3, TWEAK2 - vpternlogd $0x96, V4, V5, TWEAK3 -.else + vpsllq $2, TWEAK0, TWEAK1 + vpsllq $4, TWEAK0, TWEAK2 + vpsllq $6, TWEAK0, TWEAK3 vpxor V0, TWEAK1, TWEAK1 vpxor V2, TWEAK2, TWEAK2 vpxor V4, TWEAK3, TWEAK3 vpxor V1, TWEAK1, TWEAK1 vpxor V3, TWEAK2, TWEAK2 vpxor V5, TWEAK3, TWEAK3 -.endif +.else + vbroadcasti32x4 (TWEAK), TWEAK0 + vbroadcasti32x4 .Lgf_poly(%rip), GF_POLY + + // Compute the first vector of tweaks: + // TWEAK0 = broadcast128(TWEAK) * [x^0, x^1, x^2, x^3] + vpmovzxbq .Lrshift_amounts(%rip), V4 + vpsrlvq V4, TWEAK0, V0 + vpclmulqdq $0x01, GF_POLY, V0, V1 + vpmovzxbq .Llshift_amounts(%rip), V4 + vpslldq $8, V0, V0 + vpsllvq V4, TWEAK0, TWEAK0 + vpternlogd $0x96, V0, V1, TWEAK0 + + // Compute the next three vectors of tweaks: + // TWEAK1 = TWEAK0 * [x^4, x^4, x^4, x^4] + // TWEAK2 = TWEAK0 * [x^8, x^8, x^8, x^8] + // TWEAK3 = TWEAK0 * [x^12, x^12, x^12, x^12] + // x^8 only needs byte-aligned shifts, so optimize accordingly. + vpsrlq $64 - 4, TWEAK0, V0 + vpsrldq $(64 - 8) / 8, TWEAK0, V2 + vpsrlq $64 - 12, TWEAK0, V4 + vpclmulqdq $0x01, GF_POLY, V0, V1 + vpclmulqdq $0x01, GF_POLY, V2, V3 + vpclmulqdq $0x01, GF_POLY, V4, V5 + vpslldq $8, V0, V0 + vpslldq $8, V4, V4 + vpsllq $4, TWEAK0, TWEAK1 + vpslldq $8 / 8, TWEAK0, TWEAK2 + vpsllq $12, TWEAK0, TWEAK3 + vpternlogd $0x96, V0, V1, TWEAK1 + vpxord V3, TWEAK2, TWEAK2 + vpternlogd $0x96, V4, V5, TWEAK3 .endif .endm -- 2.51.0 From ceba0eda831382864dc420fcab5f6079fbb26ba2 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 5 Apr 2025 11:26:01 -0700 Subject: [PATCH 14/16] crypto: riscv/chacha - implement library instead of skcipher Currently the RISC-V optimized ChaCha20 is only wired up to the crypto_skcipher API, which makes it unavailable to users of the library API. The crypto_skcipher API for ChaCha20 is going to change to be implemented on top of the library API, so the library API needs to be supported. And of course it's needed anyway to serve the library users. Therefore, change the RISC-V ChaCha20 code to implement the library API instead of the crypto_skcipher API. The library functions take the ChaCha state matrix directly (instead of key and IV) and support both ChaCha20 and ChaCha12. To make the RISC-V code work properly for that, change the assembly code to take the state matrix directly and add a nrounds parameter. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/riscv/crypto/Kconfig | 11 +-- arch/riscv/crypto/chacha-riscv64-glue.c | 106 ++++++++---------------- arch/riscv/crypto/chacha-riscv64-zvkb.S | 71 ++++++++-------- 3 files changed, 75 insertions(+), 113 deletions(-) diff --git a/arch/riscv/crypto/Kconfig b/arch/riscv/crypto/Kconfig index c67095a3d669..6392e1e11bc9 100644 --- a/arch/riscv/crypto/Kconfig +++ b/arch/riscv/crypto/Kconfig @@ -19,14 +19,11 @@ config CRYPTO_AES_RISCV64 - Zvkg vector crypto extension (XTS) config CRYPTO_CHACHA_RISCV64 - tristate "Ciphers: ChaCha" + tristate depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO - select CRYPTO_SKCIPHER - help - Length-preserving ciphers: ChaCha20 stream cipher algorithm - - Architecture: riscv64 using: - - Zvkb vector crypto extension + select CRYPTO_ARCH_HAVE_LIB_CHACHA + select CRYPTO_LIB_CHACHA_GENERIC + default CRYPTO_LIB_CHACHA_INTERNAL config CRYPTO_GHASH_RISCV64 tristate "Hash functions: GHASH" diff --git a/arch/riscv/crypto/chacha-riscv64-glue.c b/arch/riscv/crypto/chacha-riscv64-glue.c index 10b46f36375a..68caef7a3d50 100644 --- a/arch/riscv/crypto/chacha-riscv64-glue.c +++ b/arch/riscv/crypto/chacha-riscv64-glue.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * ChaCha20 using the RISC-V vector crypto extensions + * ChaCha stream cipher (RISC-V optimized) * * Copyright (C) 2023 SiFive, Inc. * Author: Jerry Shih @@ -8,94 +8,56 @@ #include #include -#include -#include +#include +#include #include #include -asmlinkage void chacha20_zvkb(const u32 key[8], const u8 *in, u8 *out, - size_t len, const u32 iv[4]); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_zvkb); -static int riscv64_chacha20_crypt(struct skcipher_request *req) +asmlinkage void chacha_zvkb(u32 state[16], const u8 *in, u8 *out, + size_t nblocks, int nrounds); + +void hchacha_block_arch(const u32 *state, u32 *out, int nrounds) { - u32 iv[CHACHA_IV_SIZE / sizeof(u32)]; - u8 block_buffer[CHACHA_BLOCK_SIZE]; - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - const struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - unsigned int nbytes; - unsigned int tail_bytes; - int err; + hchacha_block_generic(state, out, nrounds); +} +EXPORT_SYMBOL(hchacha_block_arch); - iv[0] = get_unaligned_le32(req->iv); - iv[1] = get_unaligned_le32(req->iv + 4); - iv[2] = get_unaligned_le32(req->iv + 8); - iv[3] = get_unaligned_le32(req->iv + 12); +void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, + int nrounds) +{ + u8 block_buffer[CHACHA_BLOCK_SIZE]; + unsigned int full_blocks = bytes / CHACHA_BLOCK_SIZE; + unsigned int tail_bytes = bytes % CHACHA_BLOCK_SIZE; - err = skcipher_walk_virt(&walk, req, false); - while (walk.nbytes) { - nbytes = walk.nbytes & ~(CHACHA_BLOCK_SIZE - 1); - tail_bytes = walk.nbytes & (CHACHA_BLOCK_SIZE - 1); - kernel_vector_begin(); - if (nbytes) { - chacha20_zvkb(ctx->key, walk.src.virt.addr, - walk.dst.virt.addr, nbytes, iv); - iv[0] += nbytes / CHACHA_BLOCK_SIZE; - } - if (walk.nbytes == walk.total && tail_bytes > 0) { - memcpy(block_buffer, walk.src.virt.addr + nbytes, - tail_bytes); - chacha20_zvkb(ctx->key, block_buffer, block_buffer, - CHACHA_BLOCK_SIZE, iv); - memcpy(walk.dst.virt.addr + nbytes, block_buffer, - tail_bytes); - tail_bytes = 0; - } - kernel_vector_end(); + if (!static_branch_likely(&use_zvkb) || !crypto_simd_usable()) + return chacha_crypt_generic(state, dst, src, bytes, nrounds); - err = skcipher_walk_done(&walk, tail_bytes); + kernel_vector_begin(); + if (full_blocks) { + chacha_zvkb(state, src, dst, full_blocks, nrounds); + src += full_blocks * CHACHA_BLOCK_SIZE; + dst += full_blocks * CHACHA_BLOCK_SIZE; } - - return err; + if (tail_bytes) { + memcpy(block_buffer, src, tail_bytes); + chacha_zvkb(state, block_buffer, block_buffer, 1, nrounds); + memcpy(dst, block_buffer, tail_bytes); + } + kernel_vector_end(); } - -static struct skcipher_alg riscv64_chacha_alg = { - .setkey = chacha20_setkey, - .encrypt = riscv64_chacha20_crypt, - .decrypt = riscv64_chacha20_crypt, - .min_keysize = CHACHA_KEY_SIZE, - .max_keysize = CHACHA_KEY_SIZE, - .ivsize = CHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, - .walksize = 4 * CHACHA_BLOCK_SIZE, - .base = { - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct chacha_ctx), - .cra_priority = 300, - .cra_name = "chacha20", - .cra_driver_name = "chacha20-riscv64-zvkb", - .cra_module = THIS_MODULE, - }, -}; +EXPORT_SYMBOL(chacha_crypt_arch); static int __init riscv64_chacha_mod_init(void) { if (riscv_isa_extension_available(NULL, ZVKB) && riscv_vector_vlen() >= 128) - return crypto_register_skcipher(&riscv64_chacha_alg); - - return -ENODEV; -} - -static void __exit riscv64_chacha_mod_exit(void) -{ - crypto_unregister_skcipher(&riscv64_chacha_alg); + static_branch_enable(&use_zvkb); + return 0; } - module_init(riscv64_chacha_mod_init); -module_exit(riscv64_chacha_mod_exit); -MODULE_DESCRIPTION("ChaCha20 (RISC-V accelerated)"); +MODULE_DESCRIPTION("ChaCha stream cipher (RISC-V optimized)"); MODULE_AUTHOR("Jerry Shih "); MODULE_LICENSE("GPL"); -MODULE_ALIAS_CRYPTO("chacha20"); diff --git a/arch/riscv/crypto/chacha-riscv64-zvkb.S b/arch/riscv/crypto/chacha-riscv64-zvkb.S index bf057737ac69..ab4423b3880e 100644 --- a/arch/riscv/crypto/chacha-riscv64-zvkb.S +++ b/arch/riscv/crypto/chacha-riscv64-zvkb.S @@ -46,11 +46,11 @@ .text .option arch, +zvkb -#define KEYP a0 +#define STATEP a0 #define INP a1 #define OUTP a2 -#define LEN a3 -#define IVP a4 +#define NBLOCKS a3 +#define NROUNDS a4 #define CONSTS0 a5 #define CONSTS1 a6 @@ -59,7 +59,7 @@ #define TMP t1 #define VL t2 #define STRIDE t3 -#define NROUNDS t4 +#define ROUND_CTR t4 #define KEY0 s0 #define KEY1 s1 #define KEY2 s2 @@ -132,14 +132,16 @@ vror.vi \b3, \b3, 32 - 7 .endm -// void chacha20_zvkb(const u32 key[8], const u8 *in, u8 *out, size_t len, -// const u32 iv[4]); +// void chacha_zvkb(u32 state[16], const u8 *in, u8 *out, size_t nblocks, +// int nrounds); // -// |len| must be nonzero and a multiple of 64 (CHACHA_BLOCK_SIZE). -// The counter is treated as 32-bit, following the RFC7539 convention. -SYM_FUNC_START(chacha20_zvkb) - srli LEN, LEN, 6 // Bytes to blocks - +// |nblocks| is the number of 64-byte blocks to process, and must be nonzero. +// +// |state| gives the ChaCha state matrix, including the 32-bit counter in +// state[12] following the RFC7539 convention; note that this differs from the +// original Salsa20 paper which uses a 64-bit counter in state[12..13]. The +// updated 32-bit counter is written back to state[12] before returning. +SYM_FUNC_START(chacha_zvkb) addi sp, sp, -96 sd s0, 0(sp) sd s1, 8(sp) @@ -157,26 +159,26 @@ SYM_FUNC_START(chacha20_zvkb) li STRIDE, 64 // Set up the initial state matrix in scalar registers. - li CONSTS0, 0x61707865 // "expa" little endian - li CONSTS1, 0x3320646e // "nd 3" little endian - li CONSTS2, 0x79622d32 // "2-by" little endian - li CONSTS3, 0x6b206574 // "te k" little endian - lw KEY0, 0(KEYP) - lw KEY1, 4(KEYP) - lw KEY2, 8(KEYP) - lw KEY3, 12(KEYP) - lw KEY4, 16(KEYP) - lw KEY5, 20(KEYP) - lw KEY6, 24(KEYP) - lw KEY7, 28(KEYP) - lw COUNTER, 0(IVP) - lw NONCE0, 4(IVP) - lw NONCE1, 8(IVP) - lw NONCE2, 12(IVP) + lw CONSTS0, 0(STATEP) + lw CONSTS1, 4(STATEP) + lw CONSTS2, 8(STATEP) + lw CONSTS3, 12(STATEP) + lw KEY0, 16(STATEP) + lw KEY1, 20(STATEP) + lw KEY2, 24(STATEP) + lw KEY3, 28(STATEP) + lw KEY4, 32(STATEP) + lw KEY5, 36(STATEP) + lw KEY6, 40(STATEP) + lw KEY7, 44(STATEP) + lw COUNTER, 48(STATEP) + lw NONCE0, 52(STATEP) + lw NONCE1, 56(STATEP) + lw NONCE2, 60(STATEP) .Lblock_loop: // Set vl to the number of blocks to process in this iteration. - vsetvli VL, LEN, e32, m1, ta, ma + vsetvli VL, NBLOCKS, e32, m1, ta, ma // Set up the initial state matrix for the next VL blocks in v0-v15. // v{i} holds the i'th 32-bit word of the state matrix for all blocks. @@ -203,16 +205,16 @@ SYM_FUNC_START(chacha20_zvkb) // v{16+i} holds the i'th 32-bit word for all blocks. vlsseg8e32.v v16, (INP), STRIDE - li NROUNDS, 20 + mv ROUND_CTR, NROUNDS .Lnext_doubleround: - addi NROUNDS, NROUNDS, -2 + addi ROUND_CTR, ROUND_CTR, -2 // column round chacha_round v0, v4, v8, v12, v1, v5, v9, v13, \ v2, v6, v10, v14, v3, v7, v11, v15 // diagonal round chacha_round v0, v5, v10, v15, v1, v6, v11, v12, \ v2, v7, v8, v13, v3, v4, v9, v14 - bnez NROUNDS, .Lnext_doubleround + bnez ROUND_CTR, .Lnext_doubleround // Load the second half of the input data for each block into v24-v31. // v{24+i} holds the {8+i}'th 32-bit word for all blocks. @@ -271,12 +273,13 @@ SYM_FUNC_START(chacha20_zvkb) // Update the counter, the remaining number of blocks, and the input and // output pointers according to the number of blocks processed (VL). add COUNTER, COUNTER, VL - sub LEN, LEN, VL + sub NBLOCKS, NBLOCKS, VL slli TMP, VL, 6 add OUTP, OUTP, TMP add INP, INP, TMP - bnez LEN, .Lblock_loop + bnez NBLOCKS, .Lblock_loop + sw COUNTER, 48(STATEP) ld s0, 0(sp) ld s1, 8(sp) ld s2, 16(sp) @@ -291,4 +294,4 @@ SYM_FUNC_START(chacha20_zvkb) ld s11, 88(sp) addi sp, sp, 96 ret -SYM_FUNC_END(chacha20_zvkb) +SYM_FUNC_END(chacha_zvkb) -- 2.51.0 From 4aa6dc909e400b247bd3abf29f805d49a9c140bf Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 5 Apr 2025 11:26:02 -0700 Subject: [PATCH 15/16] crypto: chacha - centralize the skcipher wrappers for arch code Following the example of the crc32 and crc32c code, make the crypto subsystem register both generic and architecture-optimized chacha20, xchacha20, and xchacha12 skcipher algorithms, all implemented on top of the appropriate library functions. This eliminates the need for every architecture to implement the same skcipher glue code. To register the architecture-optimized skciphers only when architecture-optimized code is actually being used, add a function chacha_is_arch_optimized() and make each arch implement it. Change each architecture's ChaCha module_init function to arch_initcall so that the CPU feature detection is guaranteed to run before chacha_is_arch_optimized() gets called by crypto/chacha.c. In the case of s390, remove the CPU feature based module autoloading, which is no longer needed since the module just gets pulled in via function linkage. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/arm/crypto/chacha-glue.c | 9 +- arch/arm64/crypto/chacha-neon-glue.c | 8 +- arch/mips/crypto/chacha-glue.c | 8 +- arch/powerpc/crypto/chacha-p10-glue.c | 8 +- arch/riscv/crypto/chacha-riscv64-glue.c | 8 +- arch/s390/crypto/chacha-glue.c | 8 +- arch/x86/crypto/chacha_glue.c | 8 +- crypto/Makefile | 3 +- crypto/chacha.c | 227 ++++++++++++++++++++++++ crypto/chacha_generic.c | 139 --------------- include/crypto/chacha.h | 9 + 11 files changed, 288 insertions(+), 147 deletions(-) create mode 100644 crypto/chacha.c delete mode 100644 crypto/chacha_generic.c diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c index 50e635512046..e1cb34d31771 100644 --- a/arch/arm/crypto/chacha-glue.c +++ b/arch/arm/crypto/chacha-glue.c @@ -287,6 +287,13 @@ static struct skcipher_alg neon_algs[] = { } }; +bool chacha_is_arch_optimized(void) +{ + /* We always can use at least the ARM scalar implementation. */ + return true; +} +EXPORT_SYMBOL(chacha_is_arch_optimized); + static int __init chacha_simd_mod_init(void) { int err = 0; @@ -333,7 +340,7 @@ static void __exit chacha_simd_mod_fini(void) } } -module_init(chacha_simd_mod_init); +arch_initcall(chacha_simd_mod_init); module_exit(chacha_simd_mod_fini); MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)"); diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c index 229876acfc58..bb9b52321bda 100644 --- a/arch/arm64/crypto/chacha-neon-glue.c +++ b/arch/arm64/crypto/chacha-neon-glue.c @@ -206,6 +206,12 @@ static struct skcipher_alg algs[] = { } }; +bool chacha_is_arch_optimized(void) +{ + return static_key_enabled(&have_neon); +} +EXPORT_SYMBOL(chacha_is_arch_optimized); + static int __init chacha_simd_mod_init(void) { if (!cpu_have_named_feature(ASIMD)) @@ -223,7 +229,7 @@ static void __exit chacha_simd_mod_fini(void) crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); } -module_init(chacha_simd_mod_init); +arch_initcall(chacha_simd_mod_init); module_exit(chacha_simd_mod_fini); MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)"); diff --git a/arch/mips/crypto/chacha-glue.c b/arch/mips/crypto/chacha-glue.c index f6fc2e1079a1..64ccaeaeaa1e 100644 --- a/arch/mips/crypto/chacha-glue.c +++ b/arch/mips/crypto/chacha-glue.c @@ -120,6 +120,12 @@ static struct skcipher_alg algs[] = { } }; +bool chacha_is_arch_optimized(void) +{ + return true; +} +EXPORT_SYMBOL(chacha_is_arch_optimized); + static int __init chacha_simd_mod_init(void) { return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ? @@ -132,7 +138,7 @@ static void __exit chacha_simd_mod_fini(void) crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); } -module_init(chacha_simd_mod_init); +arch_initcall(chacha_simd_mod_init); module_exit(chacha_simd_mod_fini); MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (MIPS accelerated)"); diff --git a/arch/powerpc/crypto/chacha-p10-glue.c b/arch/powerpc/crypto/chacha-p10-glue.c index d8796decc1fb..3355305b6c7f 100644 --- a/arch/powerpc/crypto/chacha-p10-glue.c +++ b/arch/powerpc/crypto/chacha-p10-glue.c @@ -189,6 +189,12 @@ static struct skcipher_alg algs[] = { } }; +bool chacha_is_arch_optimized(void) +{ + return static_key_enabled(&have_p10); +} +EXPORT_SYMBOL(chacha_is_arch_optimized); + static int __init chacha_p10_init(void) { if (!cpu_has_feature(CPU_FTR_ARCH_31)) @@ -207,7 +213,7 @@ static void __exit chacha_p10_exit(void) crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); } -module_init(chacha_p10_init); +arch_initcall(chacha_p10_init); module_exit(chacha_p10_exit); MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (P10 accelerated)"); diff --git a/arch/riscv/crypto/chacha-riscv64-glue.c b/arch/riscv/crypto/chacha-riscv64-glue.c index 68caef7a3d50..ccaab0dea383 100644 --- a/arch/riscv/crypto/chacha-riscv64-glue.c +++ b/arch/riscv/crypto/chacha-riscv64-glue.c @@ -49,6 +49,12 @@ void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, } EXPORT_SYMBOL(chacha_crypt_arch); +bool chacha_is_arch_optimized(void) +{ + return static_key_enabled(&use_zvkb); +} +EXPORT_SYMBOL(chacha_is_arch_optimized); + static int __init riscv64_chacha_mod_init(void) { if (riscv_isa_extension_available(NULL, ZVKB) && @@ -56,7 +62,7 @@ static int __init riscv64_chacha_mod_init(void) static_branch_enable(&use_zvkb); return 0; } -module_init(riscv64_chacha_mod_init); +arch_initcall(riscv64_chacha_mod_init); MODULE_DESCRIPTION("ChaCha stream cipher (RISC-V optimized)"); MODULE_AUTHOR("Jerry Shih "); diff --git a/arch/s390/crypto/chacha-glue.c b/arch/s390/crypto/chacha-glue.c index 920e9f0941e7..0c68191f2aa4 100644 --- a/arch/s390/crypto/chacha-glue.c +++ b/arch/s390/crypto/chacha-glue.c @@ -103,6 +103,12 @@ static struct skcipher_alg chacha_algs[] = { } }; +bool chacha_is_arch_optimized(void) +{ + return cpu_has_vx(); +} +EXPORT_SYMBOL(chacha_is_arch_optimized); + static int __init chacha_mod_init(void) { return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ? @@ -115,7 +121,7 @@ static void __exit chacha_mod_fini(void) crypto_unregister_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs)); } -module_cpu_feature_match(S390_CPU_FEATURE_VXRS, chacha_mod_init); +arch_initcall(chacha_mod_init); module_exit(chacha_mod_fini); MODULE_DESCRIPTION("ChaCha20 stream cipher"); diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c index 946c306f60cd..8858de7b33e3 100644 --- a/arch/x86/crypto/chacha_glue.c +++ b/arch/x86/crypto/chacha_glue.c @@ -247,6 +247,12 @@ static struct skcipher_alg algs[] = { }, }; +bool chacha_is_arch_optimized(void) +{ + return static_key_enabled(&chacha_use_simd); +} +EXPORT_SYMBOL(chacha_is_arch_optimized); + static int __init chacha_simd_mod_init(void) { if (!boot_cpu_has(X86_FEATURE_SSSE3)) @@ -271,7 +277,7 @@ static void __exit chacha_simd_mod_fini(void) crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); } -module_init(chacha_simd_mod_init); +arch_initcall(chacha_simd_mod_init); module_exit(chacha_simd_mod_fini); MODULE_LICENSE("GPL"); diff --git a/crypto/Makefile b/crypto/Makefile index 0e6ab5ffd3f7..98510a2aa0b1 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -148,7 +148,8 @@ obj-$(CONFIG_CRYPTO_KHAZAD) += khazad.o obj-$(CONFIG_CRYPTO_ANUBIS) += anubis.o obj-$(CONFIG_CRYPTO_SEED) += seed.o obj-$(CONFIG_CRYPTO_ARIA) += aria_generic.o -obj-$(CONFIG_CRYPTO_CHACHA20) += chacha_generic.o +obj-$(CONFIG_CRYPTO_CHACHA20) += chacha.o +CFLAGS_chacha.o += -DARCH=$(ARCH) obj-$(CONFIG_CRYPTO_POLY1305) += poly1305_generic.o obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o diff --git a/crypto/chacha.c b/crypto/chacha.c new file mode 100644 index 000000000000..2009038c5e56 --- /dev/null +++ b/crypto/chacha.c @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Crypto API wrappers for the ChaCha20, XChaCha20, and XChaCha12 stream ciphers + * + * Copyright (C) 2015 Martin Willi + * Copyright (C) 2018 Google LLC + */ + +#include +#include +#include +#include +#include + +static int chacha_stream_xor(struct skcipher_request *req, + const struct chacha_ctx *ctx, const u8 *iv, + bool arch) +{ + struct skcipher_walk walk; + u32 state[16]; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + chacha_init(state, ctx->key, iv); + + while (walk.nbytes > 0) { + unsigned int nbytes = walk.nbytes; + + if (nbytes < walk.total) + nbytes = round_down(nbytes, CHACHA_BLOCK_SIZE); + + if (arch) + chacha_crypt(state, walk.dst.virt.addr, + walk.src.virt.addr, nbytes, ctx->nrounds); + else + chacha_crypt_generic(state, walk.dst.virt.addr, + walk.src.virt.addr, nbytes, + ctx->nrounds); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + } + + return err; +} + +static int crypto_chacha_crypt_generic(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + + return chacha_stream_xor(req, ctx, req->iv, false); +} + +static int crypto_chacha_crypt_arch(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + + return chacha_stream_xor(req, ctx, req->iv, true); +} + +static int crypto_xchacha_crypt(struct skcipher_request *req, bool arch) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + struct chacha_ctx subctx; + u32 state[16]; + u8 real_iv[16]; + + /* Compute the subkey given the original key and first 128 nonce bits */ + chacha_init(state, ctx->key, req->iv); + if (arch) + hchacha_block(state, subctx.key, ctx->nrounds); + else + hchacha_block_generic(state, subctx.key, ctx->nrounds); + subctx.nrounds = ctx->nrounds; + + /* Build the real IV */ + memcpy(&real_iv[0], req->iv + 24, 8); /* stream position */ + memcpy(&real_iv[8], req->iv + 16, 8); /* remaining 64 nonce bits */ + + /* Generate the stream and XOR it with the data */ + return chacha_stream_xor(req, &subctx, real_iv, arch); +} + +static int crypto_xchacha_crypt_generic(struct skcipher_request *req) +{ + return crypto_xchacha_crypt(req, false); +} + +static int crypto_xchacha_crypt_arch(struct skcipher_request *req) +{ + return crypto_xchacha_crypt(req, true); +} + +static struct skcipher_alg algs[] = { + { + .base.cra_name = "chacha20", + .base.cra_driver_name = "chacha20-generic", + .base.cra_priority = 100, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = CHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = chacha20_setkey, + .encrypt = crypto_chacha_crypt_generic, + .decrypt = crypto_chacha_crypt_generic, + }, + { + .base.cra_name = "xchacha20", + .base.cra_driver_name = "xchacha20-generic", + .base.cra_priority = 100, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = chacha20_setkey, + .encrypt = crypto_xchacha_crypt_generic, + .decrypt = crypto_xchacha_crypt_generic, + }, + { + .base.cra_name = "xchacha12", + .base.cra_driver_name = "xchacha12-generic", + .base.cra_priority = 100, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = chacha12_setkey, + .encrypt = crypto_xchacha_crypt_generic, + .decrypt = crypto_xchacha_crypt_generic, + }, + { + .base.cra_name = "chacha20", + .base.cra_driver_name = "chacha20-" __stringify(ARCH), + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = CHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = chacha20_setkey, + .encrypt = crypto_chacha_crypt_arch, + .decrypt = crypto_chacha_crypt_arch, + }, + { + .base.cra_name = "xchacha20", + .base.cra_driver_name = "xchacha20-" __stringify(ARCH), + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = chacha20_setkey, + .encrypt = crypto_xchacha_crypt_arch, + .decrypt = crypto_xchacha_crypt_arch, + }, + { + .base.cra_name = "xchacha12", + .base.cra_driver_name = "xchacha12-" __stringify(ARCH), + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = chacha12_setkey, + .encrypt = crypto_xchacha_crypt_arch, + .decrypt = crypto_xchacha_crypt_arch, + } +}; + +static unsigned int num_algs; + +static int __init crypto_chacha_mod_init(void) +{ + /* register the arch flavours only if they differ from generic */ + num_algs = ARRAY_SIZE(algs); + BUILD_BUG_ON(ARRAY_SIZE(algs) % 2 != 0); + if (!chacha_is_arch_optimized()) + num_algs /= 2; + + return crypto_register_skciphers(algs, num_algs); +} + +static void __exit crypto_chacha_mod_fini(void) +{ + crypto_unregister_skciphers(algs, num_algs); +} + +subsys_initcall(crypto_chacha_mod_init); +module_exit(crypto_chacha_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Martin Willi "); +MODULE_DESCRIPTION("Crypto API wrappers for the ChaCha20, XChaCha20, and XChaCha12 stream ciphers"); +MODULE_ALIAS_CRYPTO("chacha20"); +MODULE_ALIAS_CRYPTO("chacha20-generic"); +MODULE_ALIAS_CRYPTO("chacha20-" __stringify(ARCH)); +MODULE_ALIAS_CRYPTO("xchacha20"); +MODULE_ALIAS_CRYPTO("xchacha20-generic"); +MODULE_ALIAS_CRYPTO("xchacha20-" __stringify(ARCH)); +MODULE_ALIAS_CRYPTO("xchacha12"); +MODULE_ALIAS_CRYPTO("xchacha12-generic"); +MODULE_ALIAS_CRYPTO("xchacha12-" __stringify(ARCH)); diff --git a/crypto/chacha_generic.c b/crypto/chacha_generic.c deleted file mode 100644 index 1fb9fbd302c6..000000000000 --- a/crypto/chacha_generic.c +++ /dev/null @@ -1,139 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * ChaCha and XChaCha stream ciphers, including ChaCha20 (RFC7539) - * - * Copyright (C) 2015 Martin Willi - * Copyright (C) 2018 Google LLC - */ - -#include -#include -#include -#include -#include - -static int chacha_stream_xor(struct skcipher_request *req, - const struct chacha_ctx *ctx, const u8 *iv) -{ - struct skcipher_walk walk; - u32 state[16]; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - chacha_init(state, ctx->key, iv); - - while (walk.nbytes > 0) { - unsigned int nbytes = walk.nbytes; - - if (nbytes < walk.total) - nbytes = round_down(nbytes, CHACHA_BLOCK_SIZE); - - chacha_crypt_generic(state, walk.dst.virt.addr, - walk.src.virt.addr, nbytes, ctx->nrounds); - err = skcipher_walk_done(&walk, walk.nbytes - nbytes); - } - - return err; -} - -static int crypto_chacha_crypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); - - return chacha_stream_xor(req, ctx, req->iv); -} - -static int crypto_xchacha_crypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); - struct chacha_ctx subctx; - u32 state[16]; - u8 real_iv[16]; - - /* Compute the subkey given the original key and first 128 nonce bits */ - chacha_init(state, ctx->key, req->iv); - hchacha_block_generic(state, subctx.key, ctx->nrounds); - subctx.nrounds = ctx->nrounds; - - /* Build the real IV */ - memcpy(&real_iv[0], req->iv + 24, 8); /* stream position */ - memcpy(&real_iv[8], req->iv + 16, 8); /* remaining 64 nonce bits */ - - /* Generate the stream and XOR it with the data */ - return chacha_stream_xor(req, &subctx, real_iv); -} - -static struct skcipher_alg algs[] = { - { - .base.cra_name = "chacha20", - .base.cra_driver_name = "chacha20-generic", - .base.cra_priority = 100, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = CHACHA_KEY_SIZE, - .max_keysize = CHACHA_KEY_SIZE, - .ivsize = CHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, - .setkey = chacha20_setkey, - .encrypt = crypto_chacha_crypt, - .decrypt = crypto_chacha_crypt, - }, { - .base.cra_name = "xchacha20", - .base.cra_driver_name = "xchacha20-generic", - .base.cra_priority = 100, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = CHACHA_KEY_SIZE, - .max_keysize = CHACHA_KEY_SIZE, - .ivsize = XCHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, - .setkey = chacha20_setkey, - .encrypt = crypto_xchacha_crypt, - .decrypt = crypto_xchacha_crypt, - }, { - .base.cra_name = "xchacha12", - .base.cra_driver_name = "xchacha12-generic", - .base.cra_priority = 100, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = CHACHA_KEY_SIZE, - .max_keysize = CHACHA_KEY_SIZE, - .ivsize = XCHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, - .setkey = chacha12_setkey, - .encrypt = crypto_xchacha_crypt, - .decrypt = crypto_xchacha_crypt, - } -}; - -static int __init chacha_generic_mod_init(void) -{ - return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); -} - -static void __exit chacha_generic_mod_fini(void) -{ - crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); -} - -subsys_initcall(chacha_generic_mod_init); -module_exit(chacha_generic_mod_fini); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Martin Willi "); -MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (generic)"); -MODULE_ALIAS_CRYPTO("chacha20"); -MODULE_ALIAS_CRYPTO("chacha20-generic"); -MODULE_ALIAS_CRYPTO("xchacha20"); -MODULE_ALIAS_CRYPTO("xchacha20-generic"); -MODULE_ALIAS_CRYPTO("xchacha12"); -MODULE_ALIAS_CRYPTO("xchacha12-generic"); diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h index f8cc073bba41..58129e18cc31 100644 --- a/include/crypto/chacha.h +++ b/include/crypto/chacha.h @@ -99,4 +99,13 @@ static inline void chacha20_crypt(u32 *state, u8 *dst, const u8 *src, chacha_crypt(state, dst, src, bytes, 20); } +#if IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA) +bool chacha_is_arch_optimized(void); +#else +static inline bool chacha_is_arch_optimized(void) +{ + return false; +} +#endif + #endif /* _CRYPTO_CHACHA_H */ -- 2.51.0 From 08820553f33ae0d9143ff33910dbb5874b742455 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 5 Apr 2025 11:26:03 -0700 Subject: [PATCH 16/16] crypto: arm/chacha - remove the redundant skcipher algorithms Since crypto/chacha.c now registers chacha20-$(ARCH), xchacha20-$(ARCH), and xchacha12-$(ARCH) skcipher algorithms that use the architecture's ChaCha and HChaCha library functions, individual architectures no longer need to do the same. Therefore, remove the redundant skcipher algorithms and leave just the library functions. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/arm/crypto/Kconfig | 7 - arch/arm/crypto/chacha-glue.c | 242 +---------------------------- arch/arm/crypto/chacha-neon-core.S | 2 +- 3 files changed, 7 insertions(+), 244 deletions(-) diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index be9c2e19f976..332f2430beec 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -214,15 +214,8 @@ config CRYPTO_AES_ARM_CE config CRYPTO_CHACHA20_NEON tristate - select CRYPTO_SKCIPHER select CRYPTO_ARCH_HAVE_LIB_CHACHA default CRYPTO_LIB_CHACHA_INTERNAL - help - Length-preserving ciphers: ChaCha20, XChaCha20, and XChaCha12 - stream cipher algorithms - - Architecture: arm using: - - NEON (Advanced SIMD) extensions endmenu diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c index e1cb34d31771..3a5c75c95d43 100644 --- a/arch/arm/crypto/chacha-glue.c +++ b/arch/arm/crypto/chacha-glue.c @@ -1,16 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 /* - * ARM NEON accelerated ChaCha and XChaCha stream ciphers, - * including ChaCha20 (RFC7539) + * ChaCha and HChaCha functions (ARM optimized) * * Copyright (C) 2016-2019 Linaro, Ltd. * Copyright (C) 2015 Martin Willi */ -#include -#include +#include #include -#include #include #include #include @@ -100,193 +97,6 @@ void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, } EXPORT_SYMBOL(chacha_crypt_arch); -static int chacha_stream_xor(struct skcipher_request *req, - const struct chacha_ctx *ctx, const u8 *iv, - bool neon) -{ - struct skcipher_walk walk; - u32 state[16]; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - chacha_init(state, ctx->key, iv); - - while (walk.nbytes > 0) { - unsigned int nbytes = walk.nbytes; - - if (nbytes < walk.total) - nbytes = round_down(nbytes, walk.stride); - - if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) { - chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr, - nbytes, state, ctx->nrounds); - state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE); - } else { - kernel_neon_begin(); - chacha_doneon(state, walk.dst.virt.addr, - walk.src.virt.addr, nbytes, ctx->nrounds); - kernel_neon_end(); - } - err = skcipher_walk_done(&walk, walk.nbytes - nbytes); - } - - return err; -} - -static int do_chacha(struct skcipher_request *req, bool neon) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); - - return chacha_stream_xor(req, ctx, req->iv, neon); -} - -static int chacha_arm(struct skcipher_request *req) -{ - return do_chacha(req, false); -} - -static int chacha_neon(struct skcipher_request *req) -{ - return do_chacha(req, neon_usable()); -} - -static int do_xchacha(struct skcipher_request *req, bool neon) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); - struct chacha_ctx subctx; - u32 state[16]; - u8 real_iv[16]; - - chacha_init(state, ctx->key, req->iv); - - if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) { - hchacha_block_arm(state, subctx.key, ctx->nrounds); - } else { - kernel_neon_begin(); - hchacha_block_neon(state, subctx.key, ctx->nrounds); - kernel_neon_end(); - } - subctx.nrounds = ctx->nrounds; - - memcpy(&real_iv[0], req->iv + 24, 8); - memcpy(&real_iv[8], req->iv + 16, 8); - return chacha_stream_xor(req, &subctx, real_iv, neon); -} - -static int xchacha_arm(struct skcipher_request *req) -{ - return do_xchacha(req, false); -} - -static int xchacha_neon(struct skcipher_request *req) -{ - return do_xchacha(req, neon_usable()); -} - -static struct skcipher_alg arm_algs[] = { - { - .base.cra_name = "chacha20", - .base.cra_driver_name = "chacha20-arm", - .base.cra_priority = 200, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = CHACHA_KEY_SIZE, - .max_keysize = CHACHA_KEY_SIZE, - .ivsize = CHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, - .setkey = chacha20_setkey, - .encrypt = chacha_arm, - .decrypt = chacha_arm, - }, { - .base.cra_name = "xchacha20", - .base.cra_driver_name = "xchacha20-arm", - .base.cra_priority = 200, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = CHACHA_KEY_SIZE, - .max_keysize = CHACHA_KEY_SIZE, - .ivsize = XCHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, - .setkey = chacha20_setkey, - .encrypt = xchacha_arm, - .decrypt = xchacha_arm, - }, { - .base.cra_name = "xchacha12", - .base.cra_driver_name = "xchacha12-arm", - .base.cra_priority = 200, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = CHACHA_KEY_SIZE, - .max_keysize = CHACHA_KEY_SIZE, - .ivsize = XCHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, - .setkey = chacha12_setkey, - .encrypt = xchacha_arm, - .decrypt = xchacha_arm, - }, -}; - -static struct skcipher_alg neon_algs[] = { - { - .base.cra_name = "chacha20", - .base.cra_driver_name = "chacha20-neon", - .base.cra_priority = 300, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = CHACHA_KEY_SIZE, - .max_keysize = CHACHA_KEY_SIZE, - .ivsize = CHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, - .walksize = 4 * CHACHA_BLOCK_SIZE, - .setkey = chacha20_setkey, - .encrypt = chacha_neon, - .decrypt = chacha_neon, - }, { - .base.cra_name = "xchacha20", - .base.cra_driver_name = "xchacha20-neon", - .base.cra_priority = 300, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = CHACHA_KEY_SIZE, - .max_keysize = CHACHA_KEY_SIZE, - .ivsize = XCHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, - .walksize = 4 * CHACHA_BLOCK_SIZE, - .setkey = chacha20_setkey, - .encrypt = xchacha_neon, - .decrypt = xchacha_neon, - }, { - .base.cra_name = "xchacha12", - .base.cra_driver_name = "xchacha12-neon", - .base.cra_priority = 300, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = CHACHA_KEY_SIZE, - .max_keysize = CHACHA_KEY_SIZE, - .ivsize = XCHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, - .walksize = 4 * CHACHA_BLOCK_SIZE, - .setkey = chacha12_setkey, - .encrypt = xchacha_neon, - .decrypt = xchacha_neon, - } -}; - bool chacha_is_arch_optimized(void) { /* We always can use at least the ARM scalar implementation. */ @@ -294,19 +104,9 @@ bool chacha_is_arch_optimized(void) } EXPORT_SYMBOL(chacha_is_arch_optimized); -static int __init chacha_simd_mod_init(void) +static int __init chacha_arm_mod_init(void) { - int err = 0; - - if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) { - err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); - if (err) - return err; - } - if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) { - int i; - switch (read_cpuid_part()) { case ARM_CPU_PART_CORTEX_A7: case ARM_CPU_PART_CORTEX_A5: @@ -315,45 +115,15 @@ static int __init chacha_simd_mod_init(void) * the NEON implementation but do incredibly with the * scalar one and use less power. */ - for (i = 0; i < ARRAY_SIZE(neon_algs); i++) - neon_algs[i].base.cra_priority = 0; break; default: static_branch_enable(&use_neon); } - - if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) { - err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); - if (err) - crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); - } } - return err; + return 0; } +arch_initcall(chacha_arm_mod_init); -static void __exit chacha_simd_mod_fini(void) -{ - if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) { - crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); - if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) - crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); - } -} - -arch_initcall(chacha_simd_mod_init); -module_exit(chacha_simd_mod_fini); - -MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)"); +MODULE_DESCRIPTION("ChaCha and HChaCha functions (ARM optimized)"); MODULE_AUTHOR("Ard Biesheuvel "); MODULE_LICENSE("GPL v2"); -MODULE_ALIAS_CRYPTO("chacha20"); -MODULE_ALIAS_CRYPTO("chacha20-arm"); -MODULE_ALIAS_CRYPTO("xchacha20"); -MODULE_ALIAS_CRYPTO("xchacha20-arm"); -MODULE_ALIAS_CRYPTO("xchacha12"); -MODULE_ALIAS_CRYPTO("xchacha12-arm"); -#ifdef CONFIG_KERNEL_MODE_NEON -MODULE_ALIAS_CRYPTO("chacha20-neon"); -MODULE_ALIAS_CRYPTO("xchacha20-neon"); -MODULE_ALIAS_CRYPTO("xchacha12-neon"); -#endif diff --git a/arch/arm/crypto/chacha-neon-core.S b/arch/arm/crypto/chacha-neon-core.S index 13d12f672656..ddd62b6294a5 100644 --- a/arch/arm/crypto/chacha-neon-core.S +++ b/arch/arm/crypto/chacha-neon-core.S @@ -1,5 +1,5 @@ /* - * ChaCha/XChaCha NEON helper functions + * ChaCha/HChaCha NEON helper functions * * Copyright (C) 2016 Linaro, Ltd. * -- 2.51.0