From cdc3ed3035d0fe934aa1d9b78ce256752fd3bb7d Mon Sep 17 00:00:00 2001 From: Murad Masimov Date: Wed, 2 Apr 2025 09:56:27 +0300 Subject: [PATCH 01/16] ocfs2: fix possible memory leak in ocfs2_finish_quota_recovery If ocfs2_finish_quota_recovery() exits due to an error before passing all rc_list elements to ocfs2_recover_local_quota_file() then it can lead to a memory leak as rc_list may still contain elements that have to be freed. Release all memory allocated by ocfs2_add_recovery_chunk() using ocfs2_free_quota_recovery() instead of kfree(). Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Link: https://lkml.kernel.org/r/20250402065628.706359-2-m.masimov@mt-integration.ru Fixes: 2205363dce74 ("ocfs2: Implement quota recovery") Signed-off-by: Murad Masimov Reviewed-by: Jan Kara Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/quota_local.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index e272429da3db..de7f12858729 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -674,7 +674,7 @@ out_put: break; } out: - kfree(rec); + ocfs2_free_quota_recovery(rec); return status; } -- 2.51.0 From 1785c67e2adc4d1899685b36dd10f7ebbf117178 Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Tue, 22 Apr 2025 15:30:51 +0800 Subject: [PATCH 02/16] ocfs2: remove unnecessary NULL check before unregister_sysctl_table() unregister_sysctl_table() checks for NULL pointers internally. Remove unneeded NULL check here. Link: https://lkml.kernel.org/r/20250422073051.1334310-1-nichen@iscas.ac.cn Signed-off-by: Chen Ni Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/stackglue.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index ddd761cf44c8..a28c127b9934 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -691,8 +691,7 @@ static void __exit ocfs2_stack_glue_exit(void) memset(&locking_max_version, 0, sizeof(struct ocfs2_protocol_version)); ocfs2_sysfs_exit(); - if (ocfs2_table_header) - unregister_sysctl_table(ocfs2_table_header); + unregister_sysctl_table(ocfs2_table_header); } MODULE_AUTHOR("Oracle"); -- 2.51.0 From f3def8270c67217efb0e23dcd54714f74de3b6b2 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Sun, 27 Apr 2025 23:14:49 +0300 Subject: [PATCH 03/16] sort.h: hoist cmp_int() into generic header file Deduplicate the same functionality implemented in several places by moving the cmp_int() helper macro into linux/sort.h. The macro performs a three-way comparison of the arguments mostly useful in different sorting strategies and algorithms. Link: https://lkml.kernel.org/r/20250427201451.900730-1-pchelkin@ispras.ru Signed-off-by: Fedor Pchelkin Suggested-by: Darrick J. Wong Acked-by: Kent Overstreet Acked-by: Coly Li Cc: Al Viro Cc: Carlos Maiolino Cc: Christian Brauner Cc: Coly Li Cc: Jan Kara Signed-off-by: Andrew Morton --- drivers/md/bcache/btree.c | 3 +-- fs/bcachefs/util.h | 3 +-- fs/pipe.c | 3 +-- fs/xfs/xfs_zone_gc.c | 2 -- include/linux/sort.h | 10 ++++++++++ 5 files changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index ed40d8600656..2cc2eb24dc8a 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -36,6 +36,7 @@ #include #include #include +#include #include /* @@ -559,8 +560,6 @@ static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp) } } -#define cmp_int(l, r) ((l > r) - (l < r)) - #ifdef CONFIG_PROVE_LOCKING static int btree_lock_cmp_fn(const struct lockdep_map *_a, const struct lockdep_map *_b) diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 3e52c7f8ddd2..7ec1fc8b46f9 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -669,8 +670,6 @@ static inline void percpu_memset(void __percpu *p, int c, size_t bytes) u64 *bch2_acc_percpu_u64s(u64 __percpu *, unsigned); -#define cmp_int(l, r) ((l > r) - (l < r)) - static inline int u8_cmp(u8 l, u8 r) { return cmp_int(l, r); diff --git a/fs/pipe.c b/fs/pipe.c index da45edd68c41..45077c37bad1 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -76,8 +77,6 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR; * -- Manfred Spraul 2002-05-09 */ -#define cmp_int(l, r) ((l > r) - (l < r)) - #ifdef CONFIG_PROVE_LOCKING static int pipe_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b) diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index 81c94dd1d596..2f9caa3eb828 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c @@ -290,8 +290,6 @@ xfs_zone_gc_query_cb( return 0; } -#define cmp_int(l, r) ((l > r) - (l < r)) - static int xfs_zone_gc_rmap_rec_cmp( const void *a, diff --git a/include/linux/sort.h b/include/linux/sort.h index 8e5603b10941..c01ef804a0eb 100644 --- a/include/linux/sort.h +++ b/include/linux/sort.h @@ -4,6 +4,16 @@ #include +/** + * cmp_int - perform a three-way comparison of the arguments + * @l: the left argument + * @r: the right argument + * + * Return: 1 if the left argument is greater than the right one; 0 if the + * arguments are equal; -1 if the left argument is less than the right one. + */ +#define cmp_int(l, r) (((l) > (r)) - ((l) < (r))) + void sort_r(void *base, size_t num, size_t size, cmp_r_func_t cmp_func, swap_r_func_t swap_func, -- 2.51.0 From c91d78622e16f628176d7248044106b03818af6d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 28 Apr 2025 10:27:37 +0300 Subject: [PATCH 04/16] util_macros.h: fix the reference in kernel-doc In PTR_IF() description the text refers to the parameter as (ptr) while the kernel-doc format asks for @ptr. Fix this accordingly. Link: https://lkml.kernel.org/r/20250428072737.3265239-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Cc: Alexandru Ardelean Signed-off-by: Andrew Morton --- include/linux/util_macros.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/util_macros.h b/include/linux/util_macros.h index 7b64fb597f85..8183ac610cc5 100644 --- a/include/linux/util_macros.h +++ b/include/linux/util_macros.h @@ -85,7 +85,7 @@ * @ptr: A pointer to assign if @cond is true. * * PTR_IF(IS_ENABLED(CONFIG_FOO), ptr) evaluates to @ptr if CONFIG_FOO is set - * to 'y' or 'm', or to NULL otherwise. The (ptr) argument must be a pointer. + * to 'y' or 'm', or to NULL otherwise. The @ptr argument must be a pointer. * * The macro can be very useful to help compiler dropping dead code. * -- 2.51.0 From f7a667a046cf9962b0b3eb763b1fbe3eec925a2c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 28 Apr 2025 11:57:20 -0700 Subject: [PATCH 05/16] kexec_file: use SHA-256 library API instead of crypto_shash API This user of SHA-256 does not support any other algorithm, so the crypto_shash abstraction provides no value. Just use the SHA-256 library API instead, which is much simpler and easier to use. Tested with '/sbin/kexec --kexec-file-syscall'. Link: https://lkml.kernel.org/r/20250428185721.844686-1-ebiggers@kernel.org Signed-off-by: Eric Biggers Cc: Baoquan He Cc: Vivek Goyal Cc: Dave Young Signed-off-by: Andrew Morton --- kernel/Kconfig.kexec | 3 +- kernel/kexec_file.c | 78 +++++++++----------------------------------- 2 files changed, 16 insertions(+), 65 deletions(-) diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec index 4d111f871951..9fea9dca15bd 100644 --- a/kernel/Kconfig.kexec +++ b/kernel/Kconfig.kexec @@ -38,8 +38,7 @@ config KEXEC config KEXEC_FILE bool "Enable kexec file based system call" depends on ARCH_SUPPORTS_KEXEC_FILE - select CRYPTO - select CRYPTO_SHA256 + select CRYPTO_LIB_SHA256 select KEXEC_CORE help This is new version of kexec system call. This system call is diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index fba686487e3b..14e5087b4277 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -712,11 +711,10 @@ int kexec_add_buffer(struct kexec_buf *kbuf) /* Calculate and store the digest of segments */ static int kexec_calculate_store_digests(struct kimage *image) { - struct crypto_shash *tfm; - struct shash_desc *desc; + struct sha256_state state; int ret = 0, i, j, zero_buf_sz, sha_region_sz; - size_t desc_size, nullsz; - char *digest; + size_t nullsz; + u8 digest[SHA256_DIGEST_SIZE]; void *zero_buf; struct kexec_sha_region *sha_regions; struct purgatory_info *pi = &image->purgatory_info; @@ -727,37 +725,12 @@ static int kexec_calculate_store_digests(struct kimage *image) zero_buf = __va(page_to_pfn(ZERO_PAGE(0)) << PAGE_SHIFT); zero_buf_sz = PAGE_SIZE; - tfm = crypto_alloc_shash("sha256", 0, 0); - if (IS_ERR(tfm)) { - ret = PTR_ERR(tfm); - goto out; - } - - desc_size = crypto_shash_descsize(tfm) + sizeof(*desc); - desc = kzalloc(desc_size, GFP_KERNEL); - if (!desc) { - ret = -ENOMEM; - goto out_free_tfm; - } - sha_region_sz = KEXEC_SEGMENT_MAX * sizeof(struct kexec_sha_region); sha_regions = vzalloc(sha_region_sz); - if (!sha_regions) { - ret = -ENOMEM; - goto out_free_desc; - } - - desc->tfm = tfm; - - ret = crypto_shash_init(desc); - if (ret < 0) - goto out_free_sha_regions; + if (!sha_regions) + return -ENOMEM; - digest = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL); - if (!digest) { - ret = -ENOMEM; - goto out_free_sha_regions; - } + sha256_init(&state); for (j = i = 0; i < image->nr_segments; i++) { struct kexec_segment *ksegment; @@ -776,10 +749,7 @@ static int kexec_calculate_store_digests(struct kimage *image) if (ksegment->kbuf == pi->purgatory_buf) continue; - ret = crypto_shash_update(desc, ksegment->kbuf, - ksegment->bufsz); - if (ret) - break; + sha256_update(&state, ksegment->kbuf, ksegment->bufsz); /* * Assume rest of the buffer is filled with zero and @@ -791,44 +761,26 @@ static int kexec_calculate_store_digests(struct kimage *image) if (bytes > zero_buf_sz) bytes = zero_buf_sz; - ret = crypto_shash_update(desc, zero_buf, bytes); - if (ret) - break; + sha256_update(&state, zero_buf, bytes); nullsz -= bytes; } - if (ret) - break; - sha_regions[j].start = ksegment->mem; sha_regions[j].len = ksegment->memsz; j++; } - if (!ret) { - ret = crypto_shash_final(desc, digest); - if (ret) - goto out_free_digest; - ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha_regions", - sha_regions, sha_region_sz, 0); - if (ret) - goto out_free_digest; + sha256_final(&state, digest); - ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha256_digest", - digest, SHA256_DIGEST_SIZE, 0); - if (ret) - goto out_free_digest; - } + ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha_regions", + sha_regions, sha_region_sz, 0); + if (ret) + goto out_free_sha_regions; -out_free_digest: - kfree(digest); + ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha256_digest", + digest, SHA256_DIGEST_SIZE, 0); out_free_sha_regions: vfree(sha_regions); -out_free_desc: - kfree(desc); -out_free_tfm: - kfree(tfm); -out: return ret; } -- 2.51.0 From f43f02429295486059605997bc43803527d69791 Mon Sep 17 00:00:00 2001 From: Wentao Liang Date: Tue, 29 Apr 2025 02:37:07 +0900 Subject: [PATCH 06/16] nilfs2: add pointer check for nilfs_direct_propagate() Patch series "nilfs2: improve sanity checks in dirty state propagation". This fixes one missed check for block mapping anomalies and one improper return of an error code during a preparation step for log writing, thereby improving checking for filesystem corruption on writeback. This patch (of 2): In nilfs_direct_propagate(), the printer get from nilfs_direct_get_ptr() need to be checked to ensure it is not an invalid pointer. If the pointer value obtained by nilfs_direct_get_ptr() is NILFS_BMAP_INVALID_PTR, means that the metadata (in this case, i_bmap in the nilfs_inode_info struct) that should point to the data block at the buffer head of the argument is corrupted and the data block is orphaned, meaning that the file system has lost consistency. Add a value check and return -EINVAL when it is an invalid pointer. Link: https://lkml.kernel.org/r/20250428173808.6452-1-konishi.ryusuke@gmail.com Link: https://lkml.kernel.org/r/20250428173808.6452-2-konishi.ryusuke@gmail.com Fixes: 36a580eb489f ("nilfs2: direct block mapping") Signed-off-by: Wentao Liang Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/direct.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c index 893ab36824cc..2d8dc6b35b54 100644 --- a/fs/nilfs2/direct.c +++ b/fs/nilfs2/direct.c @@ -273,6 +273,9 @@ static int nilfs_direct_propagate(struct nilfs_bmap *bmap, dat = nilfs_bmap_get_dat(bmap); key = nilfs_bmap_data_get_key(bmap, bh); ptr = nilfs_direct_get_ptr(bmap, key); + if (ptr == NILFS_BMAP_INVALID_PTR) + return -EINVAL; + if (!buffer_nilfs_volatile(bh)) { oldreq.pr_entry_nr = ptr; newreq.pr_entry_nr = ptr; -- 2.51.0 From 8e39fbb1edbb4ec9d7c1124f403877fc167fcecd Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 29 Apr 2025 02:37:08 +0900 Subject: [PATCH 07/16] nilfs2: do not propagate ENOENT error from nilfs_btree_propagate() In preparation for writing logs, in nilfs_btree_propagate(), which makes parent and ancestor node blocks dirty starting from a modified data block or b-tree node block, if the starting block does not belong to the b-tree, i.e. is isolated, nilfs_btree_do_lookup() called within the function fails with -ENOENT. In this case, even though -ENOENT is an internal code, it is propagated to the log writer via nilfs_bmap_propagate() and may be erroneously returned to system calls such as fsync(). Fix this issue by changing the error code to -EINVAL in this case, and having the bmap layer detect metadata corruption and convert the error code appropriately. Link: https://lkml.kernel.org/r/20250428173808.6452-3-konishi.ryusuke@gmail.com Fixes: 1f5abe7e7dbc ("nilfs2: replace BUG_ON and BUG calls triggerable from ioctl") Signed-off-by: Ryusuke Konishi Cc: Wentao Liang Signed-off-by: Andrew Morton --- fs/nilfs2/btree.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 0d8f7fb15c2e..dd0c8e560ef6 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -2102,11 +2102,13 @@ static int nilfs_btree_propagate(struct nilfs_bmap *btree, ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); if (ret < 0) { - if (unlikely(ret == -ENOENT)) + if (unlikely(ret == -ENOENT)) { nilfs_crit(btree->b_inode->i_sb, "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d", btree->b_inode->i_ino, (unsigned long long)key, level); + ret = -EINVAL; + } goto out; } -- 2.51.0 From 479d26ee013c5388ad6855e512ab20d81e43750d Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 1 May 2025 02:05:02 +0100 Subject: [PATCH 08/16] lib/oid_registry.c: remove unused sprint_OID sprint_OID() was added as part of 2012's commit 4f73175d0375 ("X.509: Add utility functions to render OIDs as strings") but it hasn't been used. Remove it. Note that there's also 'sprint_oid' (lower case) which is used in a lot of places; that's left as is except for fixing its case in a comment. Link: https://lkml.kernel.org/r/20250501010502.326472-1-linux@treblig.org Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Andrew Morton --- include/linux/oid_registry.h | 1 - lib/oid_registry.c | 25 +------------------------ 2 files changed, 1 insertion(+), 25 deletions(-) diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h index 6f9242259edc..6de479ebbe5d 100644 --- a/include/linux/oid_registry.h +++ b/include/linux/oid_registry.h @@ -151,6 +151,5 @@ enum OID { extern enum OID look_up_OID(const void *data, size_t datasize); extern int parse_OID(const void *data, size_t datasize, enum OID *oid); extern int sprint_oid(const void *, size_t, char *, size_t); -extern int sprint_OID(enum OID, char *, size_t); #endif /* _LINUX_OID_REGISTRY_H */ diff --git a/lib/oid_registry.c b/lib/oid_registry.c index fe6705cfd780..9b757a117f09 100644 --- a/lib/oid_registry.c +++ b/lib/oid_registry.c @@ -117,7 +117,7 @@ int parse_OID(const void *data, size_t datasize, enum OID *oid) EXPORT_SYMBOL_GPL(parse_OID); /* - * sprint_OID - Print an Object Identifier into a buffer + * sprint_oid - Print an Object Identifier into a buffer * @data: The encoded OID to print * @datasize: The size of the encoded OID * @buffer: The buffer to render into @@ -173,26 +173,3 @@ bad: return -EBADMSG; } EXPORT_SYMBOL_GPL(sprint_oid); - -/** - * sprint_OID - Print an Object Identifier into a buffer - * @oid: The OID to print - * @buffer: The buffer to render into - * @bufsize: The size of the buffer - * - * The OID is rendered into the buffer in "a.b.c.d" format and the number of - * bytes is returned. - */ -int sprint_OID(enum OID oid, char *buffer, size_t bufsize) -{ - int ret; - - BUG_ON(oid >= OID__NR); - - ret = sprint_oid(oid_data + oid_index[oid], - oid_index[oid + 1] - oid_index[oid], - buffer, bufsize); - BUG_ON(ret == -EBADMSG); - return ret; -} -EXPORT_SYMBOL_GPL(sprint_OID); -- 2.51.0 From f11c1efe46ad84555a0948401c7bdb63d711088d Mon Sep 17 00:00:00 2001 From: Chelsy Ratnawat Date: Sat, 3 May 2025 14:19:59 -0700 Subject: [PATCH 09/16] selftests: fix some typos in tools/testing/selftests Fix multiple spelling errors: - "rougly" -> "roughly" - "fielesystems" -> "filesystems" - "Can'" -> "Can't" Link: https://lkml.kernel.org/r/20250503211959.507815-1-chelsyratnawat2001@gmail.com Signed-off-by: Chelsy Ratnawat Cc: Christian Brauner Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/filesystems/file_stressor.c | 2 +- tools/testing/selftests/mm/gup_longterm.c | 2 +- .../selftests/thermal/intel/power_floor/power_floor_test.c | 2 +- .../thermal/intel/workload_hint/workload_hint_test.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/filesystems/file_stressor.c b/tools/testing/selftests/filesystems/file_stressor.c index 1136f93a9977..01dd89f8e52f 100644 --- a/tools/testing/selftests/filesystems/file_stressor.c +++ b/tools/testing/selftests/filesystems/file_stressor.c @@ -156,7 +156,7 @@ TEST_F_TIMEOUT(file_stressor, slab_typesafe_by_rcu, 900 * 2) ssize_t nr_read; /* - * Concurrently read /proc//fd/ which rougly does: + * Concurrently read /proc//fd/ which roughly does: * * f = fget_task_next(p, &fd); * if (!f) diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c index 21595b20bbc3..d50ada0c7dbf 100644 --- a/tools/testing/selftests/mm/gup_longterm.c +++ b/tools/testing/selftests/mm/gup_longterm.c @@ -158,7 +158,7 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) /* * R/O pinning or pinning in a private mapping is always * expected to work. Otherwise, we expect long-term R/W pinning - * to only succeed for special fielesystems. + * to only succeed for special filesystems. */ should_work = !shared || !rw || fs_supports_writable_longterm_pinning(fs_type); diff --git a/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c b/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c index 0326b39a11b9..30cab5d425d2 100644 --- a/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c +++ b/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c @@ -56,7 +56,7 @@ int main(int argc, char **argv) } if (write(fd, "1\n", 2) < 0) { - perror("Can' enable power floor notifications\n"); + perror("Can't enable power floor notifications\n"); exit(1); } diff --git a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c index 217c3a641c53..a40097232967 100644 --- a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c +++ b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c @@ -37,7 +37,7 @@ void workload_hint_exit(int signum) } if (write(fd, "0\n", 2) < 0) { - perror("Can' disable workload hints\n"); + perror("Can't disable workload hints\n"); exit(1); } @@ -99,7 +99,7 @@ int main(int argc, char **argv) } if (write(fd, "1\n", 2) < 0) { - perror("Can' enable workload hints\n"); + perror("Can't enable workload hints\n"); exit(1); } -- 2.51.0 From 6be7045c7756caf2d445dc66473e45ac5779cd55 Mon Sep 17 00:00:00 2001 From: Illia Ostapyshyn Date: Sat, 3 May 2025 14:32:31 +0200 Subject: [PATCH 10/16] scripts/gdb: fix kgdb probing on single-core systems Patch series "scripts/gdb: Fixes related to lx_per_cpu()". These patches (1) fix kgdb detection on systems featuring a single CPU and (2) update the documentation to reflect the current usage of lx_per_cpu() and update an outdated example of its usage. This patch (of 2): When requested the list of threads via qfThreadInfo, gdb_cmd_query in kernel/debug/gdbstub.c first returns "shadow" threads for CPUs followed by the actual tasks in the system. Extended qThreadExtraInfo queries yield "shadowCPU%d" as the name for the CPU core threads. This behavior is used by get_gdbserver_type() to probe for KGDB by matching the name for the thread 2 against "shadowCPU". This breaks down on single-core systems, where thread 2 is the first nonshadow thread. Request the name for thread 1 instead. As GDB assigns thread IDs in the order of their appearance, it is safe to assume shadowCPU0 at ID 1 as long as CPU0 is not hotplugged. Before: (gdb) info threads Id Target Id Frame 1 Thread 4294967294 (shadowCPU0) kgdb_breakpoint () * 2 Thread 1 (swapper/0) kgdb_breakpoint () 3 Thread 2 (kthreadd) 0x0000000000000000 in ?? () ... (gdb) p $lx_current().comm Sorry, obtaining the current CPU is not yet supported with this gdb server. After: (gdb) info threads Id Target Id Frame 1 Thread 4294967294 (shadowCPU0) kgdb_breakpoint () * 2 Thread 1 (swapper/0) kgdb_breakpoint () 3 Thread 2 (kthreadd) 0x0000000000000000 in ?? () ... (gdb) p $lx_current().comm $1 = "swapper/0\000\000\000\000\000\000" Link: https://lkml.kernel.org/r/20250503123234.2407184-1-illia@yshyn.com Link: https://lkml.kernel.org/r/20250503123234.2407184-2-illia@yshyn.com Signed-off-by: Illia Ostapyshyn Cc: Alex Shi Cc: Brendan Jackman Cc: Dongliang Mu Cc: Florian Rommel Cc: Hu Haowen <2023002089@link.tyut.edu.cn> Cc: Jan Kiszka Cc: Jonathan Corbet Cc: Kieran Bingham Cc: Yanteng Si Signed-off-by: Andrew Morton --- scripts/gdb/linux/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/gdb/linux/utils.py b/scripts/gdb/linux/utils.py index 03ebdccf5f69..877404e92dbb 100644 --- a/scripts/gdb/linux/utils.py +++ b/scripts/gdb/linux/utils.py @@ -200,7 +200,7 @@ def get_gdbserver_type(): def probe_kgdb(): try: - thread_info = gdb.execute("info thread 2", to_string=True) + thread_info = gdb.execute("info thread 1", to_string=True) return "shadowCPU" in thread_info except gdb.error: return False -- 2.51.0 From 09e1d93a421fa5f6699a9dbdbfc09c1008de4b9a Mon Sep 17 00:00:00 2001 From: Illia Ostapyshyn Date: Sat, 3 May 2025 14:32:32 +0200 Subject: [PATCH 11/16] scripts/gdb: update documentation for lx_per_cpu Commit db08c53fdd542bb7f83b ("scripts/gdb: fix parameter handling in $lx_per_cpu") changed the parameter handling of lx_per_cpu to use GdbValue instead of parsing the variable name. Update the documentation to reflect the new lx_per_cpu usage. Update the hrtimer_bases example to use rb_tree instead of the timerqueue_head.next pointer removed in commit 511885d7061eda3eb1fa ("lib/timerqueue: Rely on rbtree semantics for next timer"). Link: https://lkml.kernel.org/r/20250503123234.2407184-3-illia@yshyn.com Signed-off-by: Illia Ostapyshyn Cc: Alex Shi Cc: Brendan Jackman Cc: Dongliang Mu Cc: Florian Rommel Cc: Hu Haowen <2023002089@link.tyut.edu.cn> Cc: Jan Kiszka Cc: Jonathan Corbet Cc: Kieran Bingham Cc: Yanteng Si Signed-off-by: Andrew Morton --- .../debugging/gdb-kernel-debugging.rst | 34 ++++++++----------- .../zh_CN/dev-tools/gdb-kernel-debugging.rst | 34 ++++++++----------- .../zh_TW/dev-tools/gdb-kernel-debugging.rst | 34 ++++++++----------- scripts/gdb/linux/cpus.py | 4 +-- 4 files changed, 47 insertions(+), 59 deletions(-) diff --git a/Documentation/process/debugging/gdb-kernel-debugging.rst b/Documentation/process/debugging/gdb-kernel-debugging.rst index 895285c037c7..9475c759c722 100644 --- a/Documentation/process/debugging/gdb-kernel-debugging.rst +++ b/Documentation/process/debugging/gdb-kernel-debugging.rst @@ -127,35 +127,31 @@ Examples of using the Linux-provided gdb helpers - Make use of the per-cpu function for the current or a specified CPU:: - (gdb) p $lx_per_cpu("runqueues").nr_running + (gdb) p $lx_per_cpu(runqueues).nr_running $3 = 1 - (gdb) p $lx_per_cpu("runqueues", 2).nr_running + (gdb) p $lx_per_cpu(runqueues, 2).nr_running $4 = 0 - Dig into hrtimers using the container_of helper:: - (gdb) set $next = $lx_per_cpu("hrtimer_bases").clock_base[0].active.next - (gdb) p *$container_of($next, "struct hrtimer", "node") + (gdb) set $leftmost = $lx_per_cpu(hrtimer_bases).clock_base[0].active.rb_root.rb_leftmost + (gdb) p *$container_of($leftmost, "struct hrtimer", "node") $5 = { node = { node = { - __rb_parent_color = 18446612133355256072, - rb_right = 0x0 , - rb_left = 0x0 + __rb_parent_color = 18446612686384860673, + rb_right = 0xffff888231da8b00, + rb_left = 0x0 }, - expires = { - tv64 = 1835268000000 - } + expires = 1228461000000 }, - _softexpires = { - tv64 = 1835268000000 - }, - function = 0xffffffff81078232 , - base = 0xffff88003fd0d6f0, - state = 1, - start_pid = 0, - start_site = 0xffffffff81055c1f , - start_comm = "swapper/2\000\000\000\000\000\000" + _softexpires = 1228461000000, + function = 0xffffffff8137ab20 , + base = 0xffff888231d9b4c0, + state = 1 '\001', + is_rel = 0 '\000', + is_soft = 0 '\000', + is_hard = 1 '\001' } diff --git a/Documentation/translations/zh_CN/dev-tools/gdb-kernel-debugging.rst b/Documentation/translations/zh_CN/dev-tools/gdb-kernel-debugging.rst index 3c133a918f30..282aacd33442 100644 --- a/Documentation/translations/zh_CN/dev-tools/gdb-kernel-debugging.rst +++ b/Documentation/translations/zh_CN/dev-tools/gdb-kernel-debugging.rst @@ -120,35 +120,31 @@ Kgdb内核调试器、QEMU等虚拟机管理程序或基于JTAG的硬件接口 - 对当前或指定的CPU使用per-cpu函数:: - (gdb) p $lx_per_cpu("runqueues").nr_running + (gdb) p $lx_per_cpu(runqueues).nr_running $3 = 1 - (gdb) p $lx_per_cpu("runqueues", 2).nr_running + (gdb) p $lx_per_cpu(runqueues, 2).nr_running $4 = 0 - 使用container_of查看更多hrtimers信息:: - (gdb) set $next = $lx_per_cpu("hrtimer_bases").clock_base[0].active.next - (gdb) p *$container_of($next, "struct hrtimer", "node") + (gdb) set $leftmost = $lx_per_cpu(hrtimer_bases).clock_base[0].active.rb_root.rb_leftmost + (gdb) p *$container_of($leftmost, "struct hrtimer", "node") $5 = { node = { node = { - __rb_parent_color = 18446612133355256072, - rb_right = 0x0 , - rb_left = 0x0 + __rb_parent_color = 18446612686384860673, + rb_right = 0xffff888231da8b00, + rb_left = 0x0 }, - expires = { - tv64 = 1835268000000 - } + expires = 1228461000000 }, - _softexpires = { - tv64 = 1835268000000 - }, - function = 0xffffffff81078232 , - base = 0xffff88003fd0d6f0, - state = 1, - start_pid = 0, - start_site = 0xffffffff81055c1f , - start_comm = "swapper/2\000\000\000\000\000\000" + _softexpires = 1228461000000, + function = 0xffffffff8137ab20 , + base = 0xffff888231d9b4c0, + state = 1 '\001', + is_rel = 0 '\000', + is_soft = 0 '\000', + is_hard = 1 '\001' } diff --git a/Documentation/translations/zh_TW/dev-tools/gdb-kernel-debugging.rst b/Documentation/translations/zh_TW/dev-tools/gdb-kernel-debugging.rst index c881e8872b19..b595af59ba78 100644 --- a/Documentation/translations/zh_TW/dev-tools/gdb-kernel-debugging.rst +++ b/Documentation/translations/zh_TW/dev-tools/gdb-kernel-debugging.rst @@ -116,35 +116,31 @@ Kgdb內核調試器、QEMU等虛擬機管理程序或基於JTAG的硬件接口 - 對當前或指定的CPU使用per-cpu函數:: - (gdb) p $lx_per_cpu("runqueues").nr_running + (gdb) p $lx_per_cpu(runqueues).nr_running $3 = 1 - (gdb) p $lx_per_cpu("runqueues", 2).nr_running + (gdb) p $lx_per_cpu(runqueues, 2).nr_running $4 = 0 - 使用container_of查看更多hrtimers信息:: - (gdb) set $next = $lx_per_cpu("hrtimer_bases").clock_base[0].active.next - (gdb) p *$container_of($next, "struct hrtimer", "node") + (gdb) set $leftmost = $lx_per_cpu(hrtimer_bases).clock_base[0].active.rb_root.rb_leftmost + (gdb) p *$container_of($leftmost, "struct hrtimer", "node") $5 = { node = { node = { - __rb_parent_color = 18446612133355256072, - rb_right = 0x0 , - rb_left = 0x0 + __rb_parent_color = 18446612686384860673, + rb_right = 0xffff888231da8b00, + rb_left = 0x0 }, - expires = { - tv64 = 1835268000000 - } + expires = 1228461000000 }, - _softexpires = { - tv64 = 1835268000000 - }, - function = 0xffffffff81078232 , - base = 0xffff88003fd0d6f0, - state = 1, - start_pid = 0, - start_site = 0xffffffff81055c1f , - start_comm = "swapper/2\000\000\000\000\000\000" + _softexpires = 1228461000000, + function = 0xffffffff8137ab20 , + base = 0xffff888231d9b4c0, + state = 1 '\001', + is_rel = 0 '\000', + is_soft = 0 '\000', + is_hard = 1 '\001' } diff --git a/scripts/gdb/linux/cpus.py b/scripts/gdb/linux/cpus.py index f506965ea759..6edf4ef61636 100644 --- a/scripts/gdb/linux/cpus.py +++ b/scripts/gdb/linux/cpus.py @@ -141,7 +141,7 @@ LxCpus() class PerCpu(gdb.Function): """Return per-cpu variable. -$lx_per_cpu("VAR"[, CPU]): Return the per-cpu variable called VAR for the +$lx_per_cpu(VAR[, CPU]): Return the per-cpu variable called VAR for the given CPU number. If CPU is omitted, the CPU of the current context is used. Note that VAR has to be quoted as string.""" @@ -158,7 +158,7 @@ PerCpu() class PerCpuPtr(gdb.Function): """Return per-cpu pointer. -$lx_per_cpu_ptr("VAR"[, CPU]): Return the per-cpu pointer called VAR for the +$lx_per_cpu_ptr(VAR[, CPU]): Return the per-cpu pointer called VAR for the given CPU number. If CPU is omitted, the CPU of the current context is used. Note that VAR has to be quoted as string.""" -- 2.51.0 From cf80fdbc0a5512feabbc56280b2abbb51d4e5b4e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 2 May 2025 15:17:42 +0300 Subject: [PATCH 12/16] list: remove redundant 'extern' for function prototypes The 'extern' keyword is redundant for function prototypes. list.h never used them and new code in general is better without them. Link: https://lkml.kernel.org/r/20250502121742.3997529-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Signed-off-by: Andrew Morton --- include/linux/list.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/list.h b/include/linux/list.h index 29a375889fb8..e7e28afd28f8 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -50,9 +50,9 @@ static inline void INIT_LIST_HEAD(struct list_head *list) * Performs the full set of list corruption checks before __list_add(). * On list corruption reports a warning, and returns false. */ -extern bool __list_valid_slowpath __list_add_valid_or_report(struct list_head *new, - struct list_head *prev, - struct list_head *next); +bool __list_valid_slowpath __list_add_valid_or_report(struct list_head *new, + struct list_head *prev, + struct list_head *next); /* * Performs list corruption checks before __list_add(). Returns false if a @@ -93,7 +93,7 @@ static __always_inline bool __list_add_valid(struct list_head *new, * Performs the full set of list corruption checks before __list_del_entry(). * On list corruption reports a warning, and returns false. */ -extern bool __list_valid_slowpath __list_del_entry_valid_or_report(struct list_head *entry); +bool __list_valid_slowpath __list_del_entry_valid_or_report(struct list_head *entry); /* * Performs list corruption checks before __list_del_entry(). Returns false if a -- 2.51.0 From bf454ec31add6790f6cdc88328e38901fcbbade6 Mon Sep 17 00:00:00 2001 From: Coiby Xu Date: Fri, 2 May 2025 09:12:35 +0800 Subject: [PATCH 13/16] kexec_file: allow to place kexec_buf randomly Patch series "Support kdump with LUKS encryption by reusing LUKS volume keys", v9. LUKS is the standard for Linux disk encryption, widely adopted by users, and in some cases, such as Confidential VMs, it is a requirement. With kdump enabled, when the first kernel crashes, the system can boot into the kdump/crash kernel to dump the memory image (i.e., /proc/vmcore) to a specified target. However, there are two challenges when dumping vmcore to a LUKS-encrypted device: - Kdump kernel may not be able to decrypt the LUKS partition. For some machines, a system administrator may not have a chance to enter the password to decrypt the device in kdump initramfs after the 1st kernel crashes; For cloud confidential VMs, depending on the policy the kdump kernel may not be able to unseal the keys with TPM and the console virtual keyboard is untrusted. - LUKS2 by default use the memory-hard Argon2 key derivation function which is quite memory-consuming compared to the limited memory reserved for kdump. Take Fedora example, by default, only 256M is reserved for systems having memory between 4G-64G. With LUKS enabled, ~1300M needs to be reserved for kdump. Note if the memory reserved for kdump can't be used by 1st kernel i.e. an user sees ~1300M memory missing in the 1st kernel. Besides users (at least for Fedora) usually expect kdump to work out of the box i.e. no manual password input or custom crashkernel value is needed. And it doesn't make sense to derivate the keys again in kdump kernel which seems to be redundant work. This patchset addresses the above issues by making the LUKS volume keys persistent for kdump kernel with the help of cryptsetup's new APIs (--link-vk-to-keyring/--volume-key-keyring). Here is the life cycle of the kdump copies of LUKS volume keys, 1. After the 1st kernel loads the initramfs during boot, systemd use an user-input passphrase to de-crypt the LUKS volume keys or TPM-sealed key and then save the volume keys to specified keyring (using the --link-vk-to-keyring API) and the key will expire within specified time. 2. A user space tool (kdump initramfs loader like kdump-utils) create key items inside /sys/kernel/config/crash_dm_crypt_keys to inform the 1st kernel which keys are needed. 3. When the kdump initramfs is loaded by the kexec_file_load syscall, the 1st kernel will iterate created key items, save the keys to kdump reserved memory. 4. When the 1st kernel crashes and the kdump initramfs is booted, the kdump initramfs asks the kdump kernel to create a user key using the key stored in kdump reserved memory by writing yes to /sys/kernel/crash_dm_crypt_keys/restore. Then the LUKS encrypted device is unlocked with libcryptsetup's --volume-key-keyring API. 5. The system gets rebooted to the 1st kernel after dumping vmcore to the LUKS encrypted device is finished After libcryptsetup saving the LUKS volume keys to specified keyring, whoever takes this should be responsible for the safety of these copies of keys. The keys will be saved in the memory area exclusively reserved for kdump where even the 1st kernel has no direct access. And further more, two additional protections are added, - save the copy randomly in kdump reserved memory as suggested by Jan - clear the _PAGE_PRESENT flag of the page that stores the copy as suggested by Pingfan This patchset only supports x86. There will be patches to support other architectures once this patch set gets merged. This patch (of 9): Currently, kexec_buf is placed in order which means for the same machine, the info in the kexec_buf is always located at the same position each time the machine is booted. This may cause a risk for sensitive information like LUKS volume key. Now struct kexec_buf has a new field random which indicates it's supposed to be placed in a random position. Note this feature is enabled only when CONFIG_CRASH_DUMP is enabled. So it only takes effect for kdump and won't impact kexec reboot. Link: https://lkml.kernel.org/r/20250502011246.99238-1-coxu@redhat.com Link: https://lkml.kernel.org/r/20250502011246.99238-2-coxu@redhat.com Signed-off-by: Coiby Xu Suggested-by: Jan Pazdziora Acked-by: Baoquan He Cc: "Daniel P. Berrange" Cc: Dave Hansen Cc: Dave Young Cc: Liu Pingfan Cc: Milan Broz Cc: Ondrej Kozina Cc: Vitaly Kuznetsov Signed-off-by: Andrew Morton --- include/linux/kexec.h | 30 ++++++++++++++++++++++++++++++ kernel/kexec_file.c | 3 +++ 2 files changed, 33 insertions(+) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index c8971861521a..1871eaa95432 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -25,6 +25,10 @@ extern note_buf_t __percpu *crash_notes; +#ifdef CONFIG_CRASH_DUMP +#include +#endif + #ifdef CONFIG_KEXEC_CORE #include #include @@ -169,6 +173,7 @@ int kexec_image_post_load_cleanup_default(struct kimage *image); * @buf_min: The buffer can't be placed below this address. * @buf_max: The buffer can't be placed above this address. * @top_down: Allocate from top of memory. + * @random: Place the buffer at a random position. */ struct kexec_buf { struct kimage *image; @@ -180,8 +185,33 @@ struct kexec_buf { unsigned long buf_min; unsigned long buf_max; bool top_down; +#ifdef CONFIG_CRASH_DUMP + bool random; +#endif }; + +#ifdef CONFIG_CRASH_DUMP +static inline void kexec_random_range_start(unsigned long start, + unsigned long end, + struct kexec_buf *kbuf, + unsigned long *temp_start) +{ + unsigned short i; + + if (kbuf->random) { + get_random_bytes(&i, sizeof(unsigned short)); + *temp_start = start + (end - start) / USHRT_MAX * i; + } +} +#else +static inline void kexec_random_range_start(unsigned long start, + unsigned long end, + struct kexec_buf *kbuf, + unsigned long *temp_start) +{} +#endif + int kexec_load_purgatory(struct kimage *image, struct kexec_buf *kbuf); int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name, void *buf, unsigned int size, diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 14e5087b4277..4a88bfc33824 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -444,6 +444,7 @@ static int locate_mem_hole_top_down(unsigned long start, unsigned long end, temp_end = min(end, kbuf->buf_max); temp_start = temp_end - kbuf->memsz + 1; + kexec_random_range_start(temp_start, temp_end, kbuf, &temp_start); do { /* align down start */ @@ -488,6 +489,8 @@ static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end, temp_start = max(start, kbuf->buf_min); + kexec_random_range_start(temp_start, end, kbuf, &temp_start); + do { temp_start = ALIGN(temp_start, kbuf->buf_align); temp_end = temp_start + kbuf->memsz - 1; -- 2.51.0 From 180cf31af7c313790f1e0fba1c7aa42512144dd5 Mon Sep 17 00:00:00 2001 From: Coiby Xu Date: Fri, 2 May 2025 09:12:36 +0800 Subject: [PATCH 14/16] crash_dump: make dm crypt keys persist for the kdump kernel MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit A configfs /sys/kernel/config/crash_dm_crypt_keys is provided for user space to make the dm crypt keys persist for the kdump kernel. Take the case of dumping to a LUKS-encrypted target as an example, here is the life cycle of the kdump copies of LUKS volume keys, 1. After the 1st kernel loads the initramfs during boot, systemd uses an user-input passphrase to de-crypt the LUKS volume keys or simply TPM-sealed volume keys and then save the volume keys to specified keyring (using the --link-vk-to-keyring API) and the keys will expire within specified time. 2. A user space tool (kdump initramfs loader like kdump-utils) create key items inside /sys/kernel/config/crash_dm_crypt_keys to inform the 1st kernel which keys are needed. 3. When the kdump initramfs is loaded by the kexec_file_load syscall, the 1st kernel will iterate created key items, save the keys to kdump reserved memory. 4. When the 1st kernel crashes and the kdump initramfs is booted, the kdump initramfs asks the kdump kernel to create a user key using the key stored in kdump reserved memory by writing yes to /sys/kernel/crash_dm_crypt_keys/restore. Then the LUKS encrypted device is unlocked with libcryptsetup's --volume-key-keyring API. 5. The system gets rebooted to the 1st kernel after dumping vmcore to the LUKS encrypted device is finished Eventually the keys have to stay in the kdump reserved memory for the kdump kernel to unlock encrypted volumes. During this process, some measures like letting the keys expire within specified time are desirable to reduce security risk. This patch assumes, 1) there are 128 LUKS devices at maximum to be unlocked thus MAX_KEY_NUM=128. 2) a key description won't exceed 128 bytes thus KEY_DESC_MAX_LEN=128. And here is a demo on how to interact with /sys/kernel/config/crash_dm_crypt_keys, # Add key #1 mkdir /sys/kernel/config/crash_dm_crypt_keys/7d26b7b4-e342-4d2d-b660-7426b0996720 # Add key #1's description echo cryptsetup:7d26b7b4-e342-4d2d-b660-7426b0996720 > /sys/kernel/config/crash_dm_crypt_keys/description # how many keys do we have now? cat /sys/kernel/config/crash_dm_crypt_keys/count 1 # Add key# 2 in the same way # how many keys do we have now? cat /sys/kernel/config/crash_dm_crypt_keys/count 2 # the tree structure of /crash_dm_crypt_keys configfs tree /sys/kernel/config/crash_dm_crypt_keys/ /sys/kernel/config/crash_dm_crypt_keys/ ├── 7d26b7b4-e342-4d2d-b660-7426b0996720 │   └── description ├── count ├── fce2cd38-4d59-4317-8ce2-1fd24d52c46a │   └── description Link: https://lkml.kernel.org/r/20250502011246.99238-3-coxu@redhat.com Signed-off-by: Coiby Xu Acked-by: Baoquan He Cc: "Daniel P. Berrange" Cc: Dave Hansen Cc: Dave Young Cc: Jan Pazdziora Cc: Liu Pingfan Cc: Milan Broz Cc: Ondrej Kozina Cc: Vitaly Kuznetsov Signed-off-by: Andrew Morton --- Documentation/admin-guide/kdump/kdump.rst | 28 ++++ kernel/Kconfig.kexec | 11 ++ kernel/Makefile | 1 + kernel/crash_dump_dm_crypt.c | 154 ++++++++++++++++++++++ 4 files changed, 194 insertions(+) create mode 100644 kernel/crash_dump_dm_crypt.c diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst index 1f7f14c6e184..b74d3bed8fff 100644 --- a/Documentation/admin-guide/kdump/kdump.rst +++ b/Documentation/admin-guide/kdump/kdump.rst @@ -547,6 +547,34 @@ from within add_taint() whenever the value set in this bitmask matches with the bit flag being set by add_taint(). This will cause a kdump to occur at the add_taint()->panic() call. +Write the dump file to encrypted disk volume +============================================ + +CONFIG_CRASH_DM_CRYPT can be enabled to support saving the dump file to an +encrypted disk volume. User space can interact with +/sys/kernel/config/crash_dm_crypt_keys for setup, + +1. Tell the first kernel what logon keys are needed to unlock the disk volumes, + # Add key #1 + mkdir /sys/kernel/config/crash_dm_crypt_keys/7d26b7b4-e342-4d2d-b660-7426b0996720 + # Add key #1's description + echo cryptsetup:7d26b7b4-e342-4d2d-b660-7426b0996720 > /sys/kernel/config/crash_dm_crypt_keys/description + + # how many keys do we have now? + cat /sys/kernel/config/crash_dm_crypt_keys/count + 1 + + # Add key #2 in the same way + + # how many keys do we have now? + cat /sys/kernel/config/crash_dm_crypt_keys/count + 2 + +2. Load the dump-capture kernel + +3. After the dump-capture kerne get booted, restore the keys to user keyring + echo yes > /sys/kernel/crash_dm_crypt_keys/restore + Contact ======= diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec index 9fea9dca15bd..c3e180d5e4fd 100644 --- a/kernel/Kconfig.kexec +++ b/kernel/Kconfig.kexec @@ -115,6 +115,17 @@ config CRASH_DUMP For s390, this option also enables zfcpdump. See also +config CRASH_DM_CRYPT + bool "Support saving crash dump to dm-crypt encrypted volume" + depends on KEXEC_FILE + depends on CRASH_DUMP + depends on DM_CRYPT + depends on CONFIGFS_FS + help + With this option enabled, user space can intereact with + /sys/kernel/config/crash_dm_crypt_keys to make the dm crypt keys + persistent for the dump-capture kernel. + config CRASH_HOTPLUG bool "Update the crash elfcorehdr on system configuration changes" default y diff --git a/kernel/Makefile b/kernel/Makefile index 434929de17ef..c7d3793107e5 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -77,6 +77,7 @@ obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o elfcorehdr.o obj-$(CONFIG_CRASH_RESERVE) += crash_reserve.o obj-$(CONFIG_KEXEC_CORE) += kexec_core.o obj-$(CONFIG_CRASH_DUMP) += crash_core.o +obj-$(CONFIG_CRASH_DM_CRYPT) += crash_dump_dm_crypt.o obj-$(CONFIG_KEXEC) += kexec.o obj-$(CONFIG_KEXEC_FILE) += kexec_file.o obj-$(CONFIG_KEXEC_ELF) += kexec_elf.o diff --git a/kernel/crash_dump_dm_crypt.c b/kernel/crash_dump_dm_crypt.c new file mode 100644 index 000000000000..62a3c47d8b3b --- /dev/null +++ b/kernel/crash_dump_dm_crypt.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include + +#define KEY_NUM_MAX 128 /* maximum dm crypt keys */ +#define KEY_DESC_MAX_LEN 128 /* maximum dm crypt key description size */ + +static unsigned int key_count; + +struct config_key { + struct config_item item; + const char *description; +}; + +static inline struct config_key *to_config_key(struct config_item *item) +{ + return container_of(item, struct config_key, item); +} + +static ssize_t config_key_description_show(struct config_item *item, char *page) +{ + return sprintf(page, "%s\n", to_config_key(item)->description); +} + +static ssize_t config_key_description_store(struct config_item *item, + const char *page, size_t count) +{ + struct config_key *config_key = to_config_key(item); + size_t len; + int ret; + + ret = -EINVAL; + len = strcspn(page, "\n"); + + if (len > KEY_DESC_MAX_LEN) { + pr_err("The key description shouldn't exceed %u characters", KEY_DESC_MAX_LEN); + return ret; + } + + if (!len) + return ret; + + kfree(config_key->description); + ret = -ENOMEM; + config_key->description = kmemdup_nul(page, len, GFP_KERNEL); + if (!config_key->description) + return ret; + + return count; +} + +CONFIGFS_ATTR(config_key_, description); + +static struct configfs_attribute *config_key_attrs[] = { + &config_key_attr_description, + NULL, +}; + +static void config_key_release(struct config_item *item) +{ + kfree(to_config_key(item)); + key_count--; +} + +static struct configfs_item_operations config_key_item_ops = { + .release = config_key_release, +}; + +static const struct config_item_type config_key_type = { + .ct_item_ops = &config_key_item_ops, + .ct_attrs = config_key_attrs, + .ct_owner = THIS_MODULE, +}; + +static struct config_item *config_keys_make_item(struct config_group *group, + const char *name) +{ + struct config_key *config_key; + + if (key_count > KEY_NUM_MAX) { + pr_err("Only %u keys at maximum to be created\n", KEY_NUM_MAX); + return ERR_PTR(-EINVAL); + } + + config_key = kzalloc(sizeof(struct config_key), GFP_KERNEL); + if (!config_key) + return ERR_PTR(-ENOMEM); + + config_item_init_type_name(&config_key->item, name, &config_key_type); + + key_count++; + + return &config_key->item; +} + +static ssize_t config_keys_count_show(struct config_item *item, char *page) +{ + return sprintf(page, "%d\n", key_count); +} + +CONFIGFS_ATTR_RO(config_keys_, count); + +static struct configfs_attribute *config_keys_attrs[] = { + &config_keys_attr_count, + NULL, +}; + +/* + * Note that, since no extra work is required on ->drop_item(), + * no ->drop_item() is provided. + */ +static struct configfs_group_operations config_keys_group_ops = { + .make_item = config_keys_make_item, +}; + +static const struct config_item_type config_keys_type = { + .ct_group_ops = &config_keys_group_ops, + .ct_attrs = config_keys_attrs, + .ct_owner = THIS_MODULE, +}; + +static struct configfs_subsystem config_keys_subsys = { + .su_group = { + .cg_item = { + .ci_namebuf = "crash_dm_crypt_keys", + .ci_type = &config_keys_type, + }, + }, +}; + +static int __init configfs_dmcrypt_keys_init(void) +{ + int ret; + + config_group_init(&config_keys_subsys.su_group); + mutex_init(&config_keys_subsys.su_mutex); + ret = configfs_register_subsystem(&config_keys_subsys); + if (ret) { + pr_err("Error %d while registering subsystem %s\n", ret, + config_keys_subsys.su_group.cg_item.ci_namebuf); + goto out_unregister; + } + + return 0; + +out_unregister: + configfs_unregister_subsystem(&config_keys_subsys); + + return ret; +} + +module_init(configfs_dmcrypt_keys_init); -- 2.51.0 From 479e58549b0fa7e80f1e0b9e69e0a2a8e6711132 Mon Sep 17 00:00:00 2001 From: Coiby Xu Date: Fri, 2 May 2025 09:12:37 +0800 Subject: [PATCH 15/16] crash_dump: store dm crypt keys in kdump reserved memory When the kdump kernel image and initrd are loaded, the dm crypts keys will be read from keyring and then stored in kdump reserved memory. Assume a key won't exceed 256 bytes thus MAX_KEY_SIZE=256 according to "cryptsetup benchmark". Link: https://lkml.kernel.org/r/20250502011246.99238-4-coxu@redhat.com Signed-off-by: Coiby Xu Acked-by: Baoquan He Cc: "Daniel P. Berrange" Cc: Dave Hansen Cc: Dave Young Cc: Jan Pazdziora Cc: Liu Pingfan Cc: Milan Broz Cc: Ondrej Kozina Cc: Vitaly Kuznetsov Signed-off-by: Andrew Morton --- include/linux/crash_core.h | 6 +- include/linux/kexec.h | 4 ++ kernel/crash_dump_dm_crypt.c | 133 +++++++++++++++++++++++++++++++++++ 3 files changed, 142 insertions(+), 1 deletion(-) diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 44305336314e..2e6782239034 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -34,7 +34,11 @@ static inline void arch_kexec_protect_crashkres(void) { } static inline void arch_kexec_unprotect_crashkres(void) { } #endif - +#ifdef CONFIG_CRASH_DM_CRYPT +int crash_load_dm_crypt_keys(struct kimage *image); +#else +static inline int crash_load_dm_crypt_keys(struct kimage *image) {return 0; } +#endif #ifndef arch_crash_handle_hotplug_event static inline void arch_crash_handle_hotplug_event(struct kimage *image, void *arg) { } diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 1871eaa95432..6e688c5d8e4d 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -405,6 +405,10 @@ struct kimage { void *elf_headers; unsigned long elf_headers_sz; unsigned long elf_load_addr; + + /* dm crypt keys buffer */ + unsigned long dm_crypt_keys_addr; + unsigned long dm_crypt_keys_sz; }; /* kexec interface functions */ diff --git a/kernel/crash_dump_dm_crypt.c b/kernel/crash_dump_dm_crypt.c index 62a3c47d8b3b..fb25f55f1512 100644 --- a/kernel/crash_dump_dm_crypt.c +++ b/kernel/crash_dump_dm_crypt.c @@ -1,14 +1,62 @@ // SPDX-License-Identifier: GPL-2.0-only +#include +#include #include #include #include #include #define KEY_NUM_MAX 128 /* maximum dm crypt keys */ +#define KEY_SIZE_MAX 256 /* maximum dm crypt key size */ #define KEY_DESC_MAX_LEN 128 /* maximum dm crypt key description size */ static unsigned int key_count; +struct dm_crypt_key { + unsigned int key_size; + char key_desc[KEY_DESC_MAX_LEN]; + u8 data[KEY_SIZE_MAX]; +}; + +static struct keys_header { + unsigned int total_keys; + struct dm_crypt_key keys[] __counted_by(total_keys); +} *keys_header; + +static size_t get_keys_header_size(size_t total_keys) +{ + return struct_size(keys_header, keys, total_keys); +} + +static int read_key_from_user_keying(struct dm_crypt_key *dm_key) +{ + const struct user_key_payload *ukp; + struct key *key; + + kexec_dprintk("Requesting logon key %s", dm_key->key_desc); + key = request_key(&key_type_logon, dm_key->key_desc, NULL); + + if (IS_ERR(key)) { + pr_warn("No such logon key %s\n", dm_key->key_desc); + return PTR_ERR(key); + } + + ukp = user_key_payload_locked(key); + if (!ukp) + return -EKEYREVOKED; + + if (ukp->datalen > KEY_SIZE_MAX) { + pr_err("Key size %u exceeds maximum (%u)\n", ukp->datalen, KEY_SIZE_MAX); + return -EINVAL; + } + + memcpy(dm_key->data, ukp->data, ukp->datalen); + dm_key->key_size = ukp->datalen; + kexec_dprintk("Get dm crypt key (size=%u) %s: %8ph\n", dm_key->key_size, + dm_key->key_desc, dm_key->data); + return 0; +} + struct config_key { struct config_item item; const char *description; @@ -130,6 +178,91 @@ static struct configfs_subsystem config_keys_subsys = { }, }; +static int build_keys_header(void) +{ + struct config_item *item = NULL; + struct config_key *key; + int i, r; + + if (keys_header != NULL) + kvfree(keys_header); + + keys_header = kzalloc(get_keys_header_size(key_count), GFP_KERNEL); + if (!keys_header) + return -ENOMEM; + + keys_header->total_keys = key_count; + + i = 0; + list_for_each_entry(item, &config_keys_subsys.su_group.cg_children, + ci_entry) { + if (item->ci_type != &config_key_type) + continue; + + key = to_config_key(item); + + if (!key->description) { + pr_warn("No key description for key %s\n", item->ci_name); + return -EINVAL; + } + + strscpy(keys_header->keys[i].key_desc, key->description, + KEY_DESC_MAX_LEN); + r = read_key_from_user_keying(&keys_header->keys[i]); + if (r != 0) { + kexec_dprintk("Failed to read key %s\n", + keys_header->keys[i].key_desc); + return r; + } + i++; + kexec_dprintk("Found key: %s\n", item->ci_name); + } + + return 0; +} + +int crash_load_dm_crypt_keys(struct kimage *image) +{ + struct kexec_buf kbuf = { + .image = image, + .buf_min = 0, + .buf_max = ULONG_MAX, + .top_down = false, + .random = true, + }; + int r; + + + if (key_count <= 0) { + kexec_dprintk("No dm-crypt keys\n"); + return -ENOENT; + } + + image->dm_crypt_keys_addr = 0; + r = build_keys_header(); + if (r) + return r; + + kbuf.buffer = keys_header; + kbuf.bufsz = get_keys_header_size(key_count); + + kbuf.memsz = kbuf.bufsz; + kbuf.buf_align = ELF_CORE_HEADER_ALIGN; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + r = kexec_add_buffer(&kbuf); + if (r) { + kvfree((void *)kbuf.buffer); + return r; + } + image->dm_crypt_keys_addr = kbuf.mem; + image->dm_crypt_keys_sz = kbuf.bufsz; + kexec_dprintk( + "Loaded dm crypt keys to kexec_buffer bufsz=0x%lx memsz=0x%lx\n", + kbuf.bufsz, kbuf.memsz); + + return r; +} + static int __init configfs_dmcrypt_keys_init(void) { int ret; -- 2.51.0 From 9ebfa8dcaea77a8ef02d0f9478717a138b0ad828 Mon Sep 17 00:00:00 2001 From: Coiby Xu Date: Fri, 2 May 2025 09:12:38 +0800 Subject: [PATCH 16/16] crash_dump: reuse saved dm crypt keys for CPU/memory hot-plugging When there are CPU and memory hot un/plugs, the dm crypt keys may need to be reloaded again depending on the solution for crash hotplug support. Currently, there are two solutions. One is to utilizes udev to instruct user space to reload the kdump kernel image and initrd, elfcorehdr and etc again. The other is to only update the elfcorehdr segment introduced in commit 247262756121 ("crash: add generic infrastructure for crash hotplug support"). For the 1st solution, the dm crypt keys need to be reloaded again. The user space can write true to /sys/kernel/config/crash_dm_crypt_key/reuse so the stored keys can be re-used. For the 2nd solution, the dm crypt keys don't need to be reloaded. Currently, only x86 supports the 2nd solution. If the 2nd solution gets extended to all arches, this patch can be dropped. Link: https://lkml.kernel.org/r/20250502011246.99238-5-coxu@redhat.com Signed-off-by: Coiby Xu Acked-by: Baoquan He Cc: "Daniel P. Berrange" Cc: Dave Hansen Cc: Dave Young Cc: Jan Pazdziora Cc: Liu Pingfan Cc: Milan Broz Cc: Ondrej Kozina Cc: Vitaly Kuznetsov Signed-off-by: Andrew Morton --- Documentation/admin-guide/kdump/kdump.rst | 4 ++ kernel/crash_dump_dm_crypt.c | 52 +++++++++++++++++++++-- 2 files changed, 52 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst index b74d3bed8fff..e25edaa8e533 100644 --- a/Documentation/admin-guide/kdump/kdump.rst +++ b/Documentation/admin-guide/kdump/kdump.rst @@ -570,6 +570,10 @@ encrypted disk volume. User space can interact with cat /sys/kernel/config/crash_dm_crypt_keys/count 2 + # To support CPU/memory hot-plugging, re-use keys already saved to reserved + # memory + echo true > /sys/kernel/config/crash_dm_crypt_key/reuse + 2. Load the dump-capture kernel 3. After the dump-capture kerne get booted, restore the keys to user keyring diff --git a/kernel/crash_dump_dm_crypt.c b/kernel/crash_dump_dm_crypt.c index fb25f55f1512..5f4a62389150 100644 --- a/kernel/crash_dump_dm_crypt.c +++ b/kernel/crash_dump_dm_crypt.c @@ -28,6 +28,20 @@ static size_t get_keys_header_size(size_t total_keys) return struct_size(keys_header, keys, total_keys); } +static void get_keys_from_kdump_reserved_memory(void) +{ + struct keys_header *keys_header_loaded; + + arch_kexec_unprotect_crashkres(); + + keys_header_loaded = kmap_local_page(pfn_to_page( + kexec_crash_image->dm_crypt_keys_addr >> PAGE_SHIFT)); + + memcpy(keys_header, keys_header_loaded, get_keys_header_size(key_count)); + kunmap_local(keys_header_loaded); + arch_kexec_protect_crashkres(); +} + static int read_key_from_user_keying(struct dm_crypt_key *dm_key) { const struct user_key_payload *ukp; @@ -150,8 +164,36 @@ static ssize_t config_keys_count_show(struct config_item *item, char *page) CONFIGFS_ATTR_RO(config_keys_, count); +static bool is_dm_key_reused; + +static ssize_t config_keys_reuse_show(struct config_item *item, char *page) +{ + return sprintf(page, "%d\n", is_dm_key_reused); +} + +static ssize_t config_keys_reuse_store(struct config_item *item, + const char *page, size_t count) +{ + if (!kexec_crash_image || !kexec_crash_image->dm_crypt_keys_addr) { + kexec_dprintk( + "dm-crypt keys haven't be saved to crash-reserved memory\n"); + return -EINVAL; + } + + if (kstrtobool(page, &is_dm_key_reused)) + return -EINVAL; + + if (is_dm_key_reused) + get_keys_from_kdump_reserved_memory(); + + return count; +} + +CONFIGFS_ATTR(config_keys_, reuse); + static struct configfs_attribute *config_keys_attrs[] = { &config_keys_attr_count, + &config_keys_attr_reuse, NULL, }; @@ -238,10 +280,12 @@ int crash_load_dm_crypt_keys(struct kimage *image) return -ENOENT; } - image->dm_crypt_keys_addr = 0; - r = build_keys_header(); - if (r) - return r; + if (!is_dm_key_reused) { + image->dm_crypt_keys_addr = 0; + r = build_keys_header(); + if (r) + return r; + } kbuf.buffer = keys_header; kbuf.bufsz = get_keys_header_size(key_count); -- 2.51.0