From 2cc02059fbc79306b53a44b1f1a4444aa3c76598 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 29 Jan 2025 17:06:41 +0100 Subject: [PATCH 01/16] selftests: always check mask returned by statmount(2) STATMOUNT_MNT_OPTS can actually be missing if there are no options. This is a change of behavior since 75ead69a7173 ("fs: don't let statmount return empty strings"). The other checks shouldn't actually trigger, but add them for correctness and for easier debugging if the test fails. Signed-off-by: Miklos Szeredi Link: https://lore.kernel.org/r/20250129160641.35485-1-mszeredi@redhat.com Signed-off-by: Christian Brauner --- .../filesystems/statmount/statmount_test.c | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c index 8eb6aa606a0d..46d289611ce8 100644 --- a/tools/testing/selftests/filesystems/statmount/statmount_test.c +++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c @@ -383,6 +383,10 @@ static void test_statmount_mnt_point(void) return; } + if (!(sm->mask & STATMOUNT_MNT_POINT)) { + ksft_test_result_fail("missing STATMOUNT_MNT_POINT in mask\n"); + return; + } if (strcmp(sm->str + sm->mnt_point, "/") != 0) { ksft_test_result_fail("unexpected mount point: '%s' != '/'\n", sm->str + sm->mnt_point); @@ -408,6 +412,10 @@ static void test_statmount_mnt_root(void) strerror(errno)); return; } + if (!(sm->mask & STATMOUNT_MNT_ROOT)) { + ksft_test_result_fail("missing STATMOUNT_MNT_ROOT in mask\n"); + return; + } mnt_root = sm->str + sm->mnt_root; last_root = strrchr(mnt_root, '/'); if (last_root) @@ -437,6 +445,10 @@ static void test_statmount_fs_type(void) strerror(errno)); return; } + if (!(sm->mask & STATMOUNT_FS_TYPE)) { + ksft_test_result_fail("missing STATMOUNT_FS_TYPE in mask\n"); + return; + } fs_type = sm->str + sm->fs_type; for (s = known_fs; s != NULL; s++) { if (strcmp(fs_type, *s) == 0) @@ -464,6 +476,11 @@ static void test_statmount_mnt_opts(void) return; } + if (!(sm->mask & STATMOUNT_MNT_BASIC)) { + ksft_test_result_fail("missing STATMOUNT_MNT_BASIC in mask\n"); + return; + } + while (getline(&line, &len, f_mountinfo) != -1) { int i; char *p, *p2; @@ -514,7 +531,10 @@ static void test_statmount_mnt_opts(void) if (p2) *p2 = '\0'; - statmount_opts = sm->str + sm->mnt_opts; + if (sm->mask & STATMOUNT_MNT_OPTS) + statmount_opts = sm->str + sm->mnt_opts; + else + statmount_opts = ""; if (strcmp(statmount_opts, p) != 0) ksft_test_result_fail( "unexpected mount options: '%s' != '%s'\n", -- 2.51.0 From 711f9b8fbe4f4936302804e246e206f0829f628f Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 3 Feb 2025 23:32:05 +0100 Subject: [PATCH 02/16] fsnotify: disable pre-content and permission events by default After introducing pre-content events, we had a regression related to disabling huge faults on files that should never have pre-content events enabled. This happened because the default f_mode of allocated files (0) does not disable pre-content events. Pre-content events are disabled in file_set_fsnotify_mode_by_watchers() but internal files may not get to call this helper. Initialize f_mode to disable permission and pre-content events for all files and if needed they will be enabled for the callers of file_set_fsnotify_mode_by_watchers(). Fixes: 20bf82a898b6 ("mm: don't allow huge faults for files with pre content watches") Reported-by: Alex Williamson Closes: https://lore.kernel.org/linux-fsdevel/20250131121703.1e4d00a7.alex.williamson@redhat.com/ Tested-by: Alex Williamson Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/r/20250203223205.861346-4-amir73il@gmail.com Signed-off-by: Christian Brauner --- fs/file_table.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/file_table.c b/fs/file_table.c index 35b93da6c5cb..5c00dc38558d 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -194,6 +194,11 @@ static int init_file(struct file *f, int flags, const struct cred *cred) * refcount bumps we should reinitialize the reused file first. */ file_ref_init(&f->f_ref, 1); + /* + * Disable permission and pre-content events for all files by default. + * They may be enabled later by file_set_fsnotify_mode_from_watchers(). + */ + file_set_fsnotify_mode(f, FMODE_NONOTIFY_PERM); return 0; } -- 2.51.0 From 091ee63e36e8289f9067f659a48d497911e49d6f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 4 Feb 2025 14:51:20 +0100 Subject: [PATCH 03/16] pidfs: improve ioctl handling Pidfs supports extensible and non-extensible ioctls. The extensible ioctls need to check for the ioctl number itself not just the ioctl command otherwise both backward- and forward compatibility are broken. The pidfs ioctl handler also needs to look at the type of the ioctl command to guard against cases where "[...] a daemon receives some random file descriptor from a (potentially less privileged) client and expects the FD to be of some specific type, it might call ioctl() on this FD with some type-specific command and expect the call to fail if the FD is of the wrong type; but due to the missing type check, the kernel instead performs some action that userspace didn't expect." (cf. [1]] Link: https://lore.kernel.org/r/20250204-work-pidfs-ioctl-v1-1-04987d239575@kernel.org Link: https://lore.kernel.org/r/CAG48ez2K9A5GwtgqO31u9ZL292we8ZwAA=TJwwEv7wRuJ3j4Lw@mail.gmail.com [1] Fixes: 8ce352818820 ("pidfs: check for valid ioctl commands") Acked-by: Luca Boccassi Reported-by: Jann Horn Cc: stable@vger.kernel.org # v6.13; please backport with 8ce352818820 ("pidfs: check for valid ioctl commands") Signed-off-by: Christian Brauner --- fs/pidfs.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/pidfs.c b/fs/pidfs.c index 049352f973de..63f9699ebac3 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -287,7 +287,6 @@ static bool pidfs_ioctl_valid(unsigned int cmd) switch (cmd) { case FS_IOC_GETVERSION: case PIDFD_GET_CGROUP_NAMESPACE: - case PIDFD_GET_INFO: case PIDFD_GET_IPC_NAMESPACE: case PIDFD_GET_MNT_NAMESPACE: case PIDFD_GET_NET_NAMESPACE: @@ -300,6 +299,17 @@ static bool pidfs_ioctl_valid(unsigned int cmd) return true; } + /* Extensible ioctls require some more careful checks. */ + switch (_IOC_NR(cmd)) { + case _IOC_NR(PIDFD_GET_INFO): + /* + * Try to prevent performing a pidfd ioctl when someone + * erronously mistook the file descriptor for a pidfd. + * This is not perfect but will catch most cases. + */ + return (_IOC_TYPE(cmd) == _IOC_TYPE(PIDFD_GET_INFO)); + } + return false; } -- 2.51.0 From 37d11cfc63604b3886308e2111d845d148ced8bc Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Tue, 4 Feb 2025 22:32:07 +0100 Subject: [PATCH 04/16] vfs: sanity check the length passed to inode_set_cached_link() This costs a strlen() call when instatianating a symlink. Preferably it would be hidden behind VFS_WARN_ON (or compatible), but there is no such facility at the moment. With the facility in place the call can be patched out in production kernels. In the meantime, since the cost is being paid unconditionally, use the result to a fixup the bad caller. This is not expected to persist in the long run (tm). Sample splat: bad length passed for symlink [/tmp/syz-imagegen43743633/file0/file0] (got 131109, expected 37) [rest of WARN blurp goes here] Signed-off-by: Mateusz Guzik Link: https://lore.kernel.org/r/20250204213207.337980-1-mjguzik@gmail.com Signed-off-by: Christian Brauner --- include/linux/fs.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/linux/fs.h b/include/linux/fs.h index 7620547432a8..2c3b2f8a621f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -790,6 +790,19 @@ struct inode { static inline void inode_set_cached_link(struct inode *inode, char *link, int linklen) { + int testlen; + + /* + * TODO: patch it into a debug-only check if relevant macros show up. + * In the meantime, since we are suffering strlen even on production kernels + * to find the right length, do a fixup if the wrong value got passed. + */ + testlen = strlen(link); + if (testlen != linklen) { + WARN_ONCE(1, "bad length passed for symlink [%s] (got %d, expected %d)", + link, linklen, testlen); + linklen = testlen; + } inode->i_link = link; inode->i_linklen = linklen; inode->i_opflags |= IOP_CACHED_LINK; -- 2.51.0 From 511121a48bbd12df4ae50a099a8936e833df8c46 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Wed, 5 Feb 2025 19:42:01 +0100 Subject: [PATCH 05/16] MAINTAINERS: Move Pavel to kernel.org address I need to filter my emails better, switch to pavel@kernel.org address to help with that. Signed-off-by: Pavel Machek Signed-off-by: Linus Torvalds --- CREDITS | 6 ++---- MAINTAINERS | 10 +++++----- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/CREDITS b/CREDITS index 1f9f0f078b4a..53d11a46fd69 100644 --- a/CREDITS +++ b/CREDITS @@ -2515,11 +2515,9 @@ D: SLS distribution D: Initial implementation of VC's, pty's and select() N: Pavel Machek -E: pavel@ucw.cz +E: pavel@kernel.org P: 4096R/92DFCE96 4FA7 9EEF FCD4 C44F C585 B8C7 C060 2241 92DF CE96 -D: Softcursor for vga, hypertech cdrom support, vcsa bugfix, nbd, -D: sun4/330 port, capabilities for elf, speedup for rm on ext2, USB, -D: work on suspend-to-ram/disk, killing duplicates from ioctl32, +D: NBD, Sun4/330 port, USB, work on suspend-to-ram/disk, D: Altera SoCFPGA and Nokia N900 support. S: Czech Republic diff --git a/MAINTAINERS b/MAINTAINERS index 873aa2cce4d7..157818de0b55 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9418,7 +9418,7 @@ F: fs/freevxfs/ FREEZER M: "Rafael J. Wysocki" -M: Pavel Machek +M: Pavel Machek L: linux-pm@vger.kernel.org S: Supported F: Documentation/power/freezing-of-tasks.rst @@ -10253,7 +10253,7 @@ F: drivers/video/fbdev/hgafb.c HIBERNATION (aka Software Suspend, aka swsusp) M: "Rafael J. Wysocki" -M: Pavel Machek +M: Pavel Machek L: linux-pm@vger.kernel.org S: Supported B: https://bugzilla.kernel.org @@ -13124,8 +13124,8 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/har F: scripts/leaking_addresses.pl LED SUBSYSTEM -M: Pavel Machek M: Lee Jones +M: Pavel Machek L: linux-leds@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/lee/leds.git @@ -16823,7 +16823,7 @@ F: include/linux/tick.h F: kernel/time/tick*.* NOKIA N900 CAMERA SUPPORT (ET8EK8 SENSOR, AD5820 FOCUS) -M: Pavel Machek +M: Pavel Machek M: Sakari Ailus L: linux-media@vger.kernel.org S: Maintained @@ -22849,7 +22849,7 @@ F: drivers/sh/ SUSPEND TO RAM M: "Rafael J. Wysocki" M: Len Brown -M: Pavel Machek +M: Pavel Machek L: linux-pm@vger.kernel.org S: Supported B: https://bugzilla.kernel.org -- 2.51.0 From 1b3291f00013c86a9bb349d6158a9a7a4f0334fe Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Fri, 7 Feb 2025 03:21:46 +0900 Subject: [PATCH 06/16] MAINTAINERS: Remove myself I no longer have any faith left in the kernel development process or community management approach. Apple/ARM platform development will continue downstream. If I feel like sending some patches upstream in the future myself for whatever subtree I may, or I may not. Anyone who feels like fighting the upstreaming fight themselves is welcome to do so. Signed-off-by: Hector Martin Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 157818de0b55..20c9e0871215 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2209,7 +2209,6 @@ F: sound/soc/codecs/cs42l84.* F: sound/soc/codecs/ssm3515.c ARM/APPLE MACHINE SUPPORT -M: Hector Martin M: Sven Peter R: Alyssa Rosenzweig L: asahi@lists.linux.dev -- 2.51.0 From f354fc88a72ae83dacd68370f6fa040e5733bcfe Mon Sep 17 00:00:00 2001 From: WangYuli Date: Fri, 7 Feb 2025 15:08:55 +0800 Subject: [PATCH 07/16] kbuild: install-extmod-build: add missing quotation marks for CC variable While attempting to build a Debian packages with CC="ccache gcc", I saw the following error as builddeb builds linux-headers-$KERNELVERSION: make HOSTCC=ccache gcc VPATH= srcroot=. -f ./scripts/Makefile.build obj=debian/linux-headers-6.14.0-rc1/usr/src/linux-headers-6.14.0-rc1/scripts make[6]: *** No rule to make target 'gcc'. Stop. Upon investigation, it seems that one instance of $(CC) variable reference in ./scripts/package/install-extmod-build was missing quotation marks, causing the above error. Add the missing quotation marks around $(CC) to fix build. Fixes: 5f73e7d0386d ("kbuild: refactor cross-compiling linux-headers package") Co-developed-by: Mingcong Bai Signed-off-by: Mingcong Bai Tested-by: WangYuli Signed-off-by: WangYuli Signed-off-by: Masahiro Yamada --- scripts/package/install-extmod-build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/package/install-extmod-build b/scripts/package/install-extmod-build index bb6e23c1174e..b724626ea0ca 100755 --- a/scripts/package/install-extmod-build +++ b/scripts/package/install-extmod-build @@ -63,7 +63,7 @@ if [ "${CC}" != "${HOSTCC}" ]; then # Clear VPATH and srcroot because the source files reside in the output # directory. # shellcheck disable=SC2016 # $(MAKE), $(CC), and $(build) will be expanded by Make - "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC=$(CC) VPATH= srcroot=. $(build)='"${destdir}"/scripts + "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC="$(CC)" VPATH= srcroot=. $(build)='"${destdir}"/scripts rm -f "${destdir}/scripts/Kbuild" fi -- 2.51.0 From 8f6629c004b193d23612641c3607e785819e97ab Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 17 Oct 2024 10:09:22 -0700 Subject: [PATCH 08/16] kbuild: Move -Wenum-enum-conversion to W=2 -Wenum-enum-conversion was strengthened in clang-19 to warn for C, which caused the kernel to move it to W=1 in commit 75b5ab134bb5 ("kbuild: Move -Wenum-{compare-conditional,enum-conversion} into W=1") because there were numerous instances that would break builds with -Werror. Unfortunately, this is not a full solution, as more and more developers, subsystems, and distributors are building with W=1 as well, so they continue to see the numerous instances of this warning. Since the move to W=1, there have not been many new instances that have appeared through various build reports and the ones that have appeared seem to be following similar existing patterns, suggesting that most instances of this warning will not be real issues. The only alternatives for silencing this warning are adding casts (which is generally seen as an ugly practice) or refactoring the enums to macro defines or a unified enum (which may be undesirable because of type safety in other parts of the code). Move the warning to W=2, where warnings that occur frequently but may be relevant should reside. Cc: stable@vger.kernel.org Fixes: 75b5ab134bb5 ("kbuild: Move -Wenum-{compare-conditional,enum-conversion} into W=1") Link: https://lore.kernel.org/ZwRA9SOcOjjLJcpi@google.com/ Signed-off-by: Nathan Chancellor Acked-by: Arnd Bergmann Signed-off-by: Linus Torvalds --- scripts/Makefile.extrawarn | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index eb719f6d8d53..a7003c1e66c7 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -133,7 +133,6 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast) KBUILD_CFLAGS += -Wno-tautological-constant-out-of-range-compare KBUILD_CFLAGS += $(call cc-disable-warning, unaligned-access) KBUILD_CFLAGS += -Wno-enum-compare-conditional -KBUILD_CFLAGS += -Wno-enum-enum-conversion endif endif @@ -157,6 +156,10 @@ KBUILD_CFLAGS += -Wno-missing-field-initializers KBUILD_CFLAGS += -Wno-type-limits KBUILD_CFLAGS += -Wno-shift-negative-value +ifdef CONFIG_CC_IS_CLANG +KBUILD_CFLAGS += -Wno-enum-enum-conversion +endif + ifdef CONFIG_CC_IS_GCC KBUILD_CFLAGS += -Wno-maybe-uninitialized endif -- 2.51.0 From c8c9b1d2d5b4377c72a979f5a26e842a869aefc9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 8 Feb 2025 00:15:11 -0500 Subject: [PATCH 09/16] fgraph: Fix set_graph_notrace with setting TRACE_GRAPH_NOTRACE_BIT The code was restructured where the function graph notrace code, that would not trace a function and all its children is done by setting a NOTRACE flag when the function that is not to be traced is hit. There's a TRACE_GRAPH_NOTRACE_BIT which defines the bit in the flags and a TRACE_GRAPH_NOTRACE which is the mask with that bit set. But the restructuring used TRACE_GRAPH_NOTRACE_BIT when it should have used TRACE_GRAPH_NOTRACE. For example: # cd /sys/kernel/tracing # echo set_track_prepare stack_trace_save > set_graph_notrace # echo function_graph > current_tracer # cat trace [..] 0) | __slab_free() { 0) | free_to_partial_list() { 0) | arch_stack_walk() { 0) | __unwind_start() { 0) 0.501 us | get_stack_info(); Where a non filter trace looks like: # echo > set_graph_notrace # cat trace 0) | free_to_partial_list() { 0) | set_track_prepare() { 0) | stack_trace_save() { 0) | arch_stack_walk() { 0) | __unwind_start() { Where the filter should look like: # cat trace 0) | free_to_partial_list() { 0) | _raw_spin_lock_irqsave() { 0) 0.350 us | preempt_count_add(); 0) 0.351 us | do_raw_spin_lock(); 0) 2.440 us | } Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250208001511.535be150@batman.local.home Fixes: b84214890a9bc ("function_graph: Move graph notrace bit to shadow stack global var") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_functions_graph.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 54d850997c0a..136c750b0b4d 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -198,7 +198,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace, * returning from the function. */ if (ftrace_graph_notrace_addr(trace->func)) { - *task_var |= TRACE_GRAPH_NOTRACE_BIT; + *task_var |= TRACE_GRAPH_NOTRACE; /* * Need to return 1 to have the return called * that will clear the NOTRACE bit. -- 2.51.0 From 7585946243d614bd2cd4e13377be2c711c9539e0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 8 Feb 2025 18:54:28 +0100 Subject: [PATCH 10/16] PM: sleep: core: Restrict power.set_active propagation Commit 3775fc538f53 ("PM: sleep: core: Synchronize runtime PM status of parents and children") exposed an issue related to simple_pm_bus_pm_ops that uses pm_runtime_force_suspend() and pm_runtime_force_resume() as bus type PM callbacks for the noirq phases of system-wide suspend and resume. The problem is that pm_runtime_force_suspend() does not distinguish runtime-suspended devices from devices for which runtime PM has never been enabled, so if it sees a device with runtime PM status set to RPM_ACTIVE, it will assume that runtime PM is enabled for that device and so it will attempt to suspend it with the help of its runtime PM callbacks which may not be ready for that. As it turns out, this causes simple_pm_bus_runtime_suspend() to crash due to a NULL pointer dereference. Another problem related to the above commit and simple_pm_bus_pm_ops is that setting runtime PM status of a device handled by the latter to RPM_ACTIVE will actually prevent it from being resumed because pm_runtime_force_resume() only resumes devices with runtime PM status set to RPM_SUSPENDED. To mitigate these issues, do not allow power.set_active to propagate beyond the parent of the device with DPM_FLAG_SMART_SUSPEND set that will need to be resumed, which should be a sufficient stop-gap for the time being, but they will need to be properly addressed in the future because in general during system-wide resume it is necessary to resume all devices in a dependency chain in which at least one device is going to be resumed. Fixes: 3775fc538f53 ("PM: sleep: core: Synchronize runtime PM status of parents and children") Closes: https://lore.kernel.org/linux-pm/1c2433d4-7e0f-4395-b841-b8eac7c25651@nvidia.com/ Reported-by: Jon Hunter Tested-by: Johan Hovold Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/6137505.lOV4Wx5bFT@rjwysocki.net --- drivers/base/power/main.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index d497d448e4b2..40e1d8d8a589 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1191,24 +1191,18 @@ static pm_message_t resume_event(pm_message_t sleep_state) return PMSG_ON; } -static void dpm_superior_set_must_resume(struct device *dev, bool set_active) +static void dpm_superior_set_must_resume(struct device *dev) { struct device_link *link; int idx; - if (dev->parent) { + if (dev->parent) dev->parent->power.must_resume = true; - if (set_active) - dev->parent->power.set_active = true; - } idx = device_links_read_lock(); - list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) { + list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) link->supplier->power.must_resume = true; - if (set_active) - link->supplier->power.set_active = true; - } device_links_read_unlock(idx); } @@ -1287,9 +1281,12 @@ Skip: dev->power.must_resume = true; if (dev->power.must_resume) { - dev->power.set_active = dev->power.set_active || - dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND); - dpm_superior_set_must_resume(dev, dev->power.set_active); + if (dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND)) { + dev->power.set_active = true; + if (dev->parent && !dev->parent->power.ignore_children) + dev->parent->power.set_active = true; + } + dpm_superior_set_must_resume(dev); } Complete: -- 2.51.0 From a64dcfb451e254085a7daee5fe51bf22959d52d3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 9 Feb 2025 12:45:03 -0800 Subject: [PATCH 11/16] Linux 6.14-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9e0d63d9d94b..89628e354ca7 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Baby Opossum Posse # *DOCUMENTATION* -- 2.51.0 From ebc4176551cdd021d02f4d2ed734e7b65e44442a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 3 Feb 2025 22:00:35 -0800 Subject: [PATCH 12/16] blk-crypto: add basic hardware-wrapped key support To prevent keys from being compromised if an attacker acquires read access to kernel memory, some inline encryption hardware can accept keys which are wrapped by a per-boot hardware-internal key. This avoids needing to keep the raw keys in kernel memory, without limiting the number of keys that can be used. Such hardware also supports deriving a "software secret" for cryptographic tasks that can't be handled by inline encryption; this is needed for fscrypt to work properly. To support this hardware, allow struct blk_crypto_key to represent a hardware-wrapped key as an alternative to a raw key, and make drivers set flags in struct blk_crypto_profile to indicate which types of keys they support. Also add the ->derive_sw_secret() low-level operation, which drivers supporting wrapped keys must implement. For more information, see the detailed documentation which this patch adds to Documentation/block/inline-encryption.rst. Signed-off-by: Eric Biggers Tested-by: Bartosz Golaszewski # sm8650 Link: https://lore.kernel.org/r/20250204060041.409950-2-ebiggers@kernel.org Signed-off-by: Jens Axboe --- Documentation/block/inline-encryption.rst | 219 +++++++++++++++++++++- block/blk-crypto-fallback.c | 7 +- block/blk-crypto-internal.h | 1 + block/blk-crypto-profile.c | 46 +++++ block/blk-crypto.c | 61 ++++-- drivers/md/dm-table.c | 1 + drivers/mmc/host/cqhci-crypto.c | 8 +- drivers/mmc/host/sdhci-msm.c | 3 +- drivers/ufs/core/ufshcd-crypto.c | 7 +- drivers/ufs/host/ufs-exynos.c | 3 +- drivers/ufs/host/ufs-qcom.c | 3 +- fs/crypto/inline_crypt.c | 4 +- include/linux/blk-crypto-profile.h | 20 ++ include/linux/blk-crypto.h | 72 ++++++- 14 files changed, 417 insertions(+), 38 deletions(-) diff --git a/Documentation/block/inline-encryption.rst b/Documentation/block/inline-encryption.rst index 90b733422ed4..f03bd5b090d8 100644 --- a/Documentation/block/inline-encryption.rst +++ b/Documentation/block/inline-encryption.rst @@ -77,10 +77,10 @@ Basic design ============ We introduce ``struct blk_crypto_key`` to represent an inline encryption key and -how it will be used. This includes the actual bytes of the key; the size of the -key; the algorithm and data unit size the key will be used with; and the number -of bytes needed to represent the maximum data unit number the key will be used -with. +how it will be used. This includes the type of the key (raw or +hardware-wrapped); the actual bytes of the key; the size of the key; the +algorithm and data unit size the key will be used with; and the number of bytes +needed to represent the maximum data unit number the key will be used with. We introduce ``struct bio_crypt_ctx`` to represent an encryption context. It contains a data unit number and a pointer to a blk_crypto_key. We add pointers @@ -301,3 +301,214 @@ kernel will pretend that the device does not support hardware inline encryption When the crypto API fallback is enabled, this means that all bios with and encryption context will use the fallback, and IO will complete as usual. When the fallback is disabled, a bio with an encryption context will be failed. + +.. _hardware_wrapped_keys: + +Hardware-wrapped keys +===================== + +Motivation and threat model +--------------------------- + +Linux storage encryption (dm-crypt, fscrypt, eCryptfs, etc.) traditionally +relies on the raw encryption key(s) being present in kernel memory so that the +encryption can be performed. This traditionally isn't seen as a problem because +the key(s) won't be present during an offline attack, which is the main type of +attack that storage encryption is intended to protect from. + +However, there is an increasing desire to also protect users' data from other +types of attacks (to the extent possible), including: + +- Cold boot attacks, where an attacker with physical access to a system suddenly + powers it off, then immediately dumps the system memory to extract recently + in-use encryption keys, then uses these keys to decrypt user data on-disk. + +- Online attacks where the attacker is able to read kernel memory without fully + compromising the system, followed by an offline attack where any extracted + keys can be used to decrypt user data on-disk. An example of such an online + attack would be if the attacker is able to run some code on the system that + exploits a Meltdown-like vulnerability but is unable to escalate privileges. + +- Online attacks where the attacker fully compromises the system, but their data + exfiltration is significantly time-limited and/or bandwidth-limited, so in + order to completely exfiltrate the data they need to extract the encryption + keys to use in a later offline attack. + +Hardware-wrapped keys are a feature of inline encryption hardware that is +designed to protect users' data from the above attacks (to the extent possible), +without introducing limitations such as a maximum number of keys. + +Note that it is impossible to **fully** protect users' data from these attacks. +Even in the attacks where the attacker "just" gets read access to kernel memory, +they can still extract any user data that is present in memory, including +plaintext pagecache pages of encrypted files. The focus here is just on +protecting the encryption keys, as those instantly give access to **all** user +data in any following offline attack, rather than just some of it (where which +data is included in that "some" might not be controlled by the attacker). + +Solution overview +----------------- + +Inline encryption hardware typically has "keyslots" into which software can +program keys for the hardware to use; the contents of keyslots typically can't +be read back by software. As such, the above security goals could be achieved +if the kernel simply erased its copy of the key(s) after programming them into +keyslot(s) and thereafter only referred to them via keyslot number. + +However, that naive approach runs into a couple problems: + +- It limits the number of unlocked keys to the number of keyslots, which + typically is a small number. In cases where there is only one encryption key + system-wide (e.g., a full-disk encryption key), that can be tolerable. + However, in general there can be many logged-in users with many different + keys, and/or many running applications with application-specific encrypted + storage areas. This is especially true if file-based encryption (e.g. + fscrypt) is being used. + +- Inline crypto engines typically lose the contents of their keyslots if the + storage controller (usually UFS or eMMC) is reset. Resetting the storage + controller is a standard error recovery procedure that is executed if certain + types of storage errors occur, and such errors can occur at any time. + Therefore, when inline crypto is being used, the operating system must always + be ready to reprogram the keyslots without user intervention. + +Thus, it is important for the kernel to still have a way to "remind" the +hardware about a key, without actually having the raw key itself. + +Somewhat less importantly, it is also desirable that the raw keys are never +visible to software at all, even while being initially unlocked. This would +ensure that a read-only compromise of system memory will never allow a key to be +extracted to be used off-system, even if it occurs when a key is being unlocked. + +To solve all these problems, some vendors of inline encryption hardware have +made their hardware support *hardware-wrapped keys*. Hardware-wrapped keys +are encrypted keys that can only be unwrapped (decrypted) and used by hardware +-- either by the inline encryption hardware itself, or by a dedicated hardware +block that can directly provision keys to the inline encryption hardware. + +(We refer to them as "hardware-wrapped keys" rather than simply "wrapped keys" +to add some clarity in cases where there could be other types of wrapped keys, +such as in file-based encryption. Key wrapping is a commonly used technique.) + +The key which wraps (encrypts) hardware-wrapped keys is a hardware-internal key +that is never exposed to software; it is either a persistent key (a "long-term +wrapping key") or a per-boot key (an "ephemeral wrapping key"). The long-term +wrapped form of the key is what is initially unlocked, but it is erased from +memory as soon as it is converted into an ephemerally-wrapped key. In-use +hardware-wrapped keys are always ephemerally-wrapped, not long-term wrapped. + +As inline encryption hardware can only be used to encrypt/decrypt data on-disk, +the hardware also includes a level of indirection; it doesn't use the unwrapped +key directly for inline encryption, but rather derives both an inline encryption +key and a "software secret" from it. Software can use the "software secret" for +tasks that can't use the inline encryption hardware, such as filenames +encryption. The software secret is not protected from memory compromise. + +Key hierarchy +------------- + +Here is the key hierarchy for a hardware-wrapped key:: + + Hardware-wrapped key + | + | + + | + ----------------------------- + | | + Inline encryption key Software secret + +The components are: + +- *Hardware-wrapped key*: a key for the hardware's KDF (Key Derivation + Function), in ephemerally-wrapped form. The key wrapping algorithm is a + hardware implementation detail that doesn't impact kernel operation, but a + strong authenticated encryption algorithm such as AES-256-GCM is recommended. + +- *Hardware KDF*: a KDF (Key Derivation Function) which the hardware uses to + derive subkeys after unwrapping the wrapped key. The hardware's choice of KDF + doesn't impact kernel operation, but it does need to be known for testing + purposes, and it's also assumed to have at least a 256-bit security strength. + All known hardware uses the SP800-108 KDF in Counter Mode with AES-256-CMAC, + with a particular choice of labels and contexts; new hardware should use this + already-vetted KDF. + +- *Inline encryption key*: a derived key which the hardware directly provisions + to a keyslot of the inline encryption hardware, without exposing it to + software. In all known hardware, this will always be an AES-256-XTS key. + However, in principle other encryption algorithms could be supported too. + Hardware must derive distinct subkeys for each supported encryption algorithm. + +- *Software secret*: a derived key which the hardware returns to software so + that software can use it for cryptographic tasks that can't use inline + encryption. This value is cryptographically isolated from the inline + encryption key, i.e. knowing one doesn't reveal the other. (The KDF ensures + this.) Currently, the software secret is always 32 bytes and thus is suitable + for cryptographic applications that require up to a 256-bit security strength. + Some use cases (e.g. full-disk encryption) won't require the software secret. + +Example: in the case of fscrypt, the fscrypt master key (the key that protects a +particular set of encrypted directories) is made hardware-wrapped. The inline +encryption key is used as the file contents encryption key, while the software +secret (rather than the master key directly) is used to key fscrypt's KDF +(HKDF-SHA512) to derive other subkeys such as filenames encryption keys. + +Note that currently this design assumes a single inline encryption key per +hardware-wrapped key, without any further key derivation. Thus, in the case of +fscrypt, currently hardware-wrapped keys are only compatible with the "inline +encryption optimized" settings, which use one file contents encryption key per +encryption policy rather than one per file. This design could be extended to +make the hardware derive per-file keys using per-file nonces passed down the +storage stack, and in fact some hardware already supports this; future work is +planned to remove this limitation by adding the corresponding kernel support. + +Kernel support +-------------- + +The inline encryption support of the kernel's block layer ("blk-crypto") has +been extended to support hardware-wrapped keys as an alternative to raw keys, +when hardware support is available. This works in the following way: + +- A ``key_types_supported`` field is added to the crypto capabilities in + ``struct blk_crypto_profile``. This allows device drivers to declare that + they support raw keys, hardware-wrapped keys, or both. + +- ``struct blk_crypto_key`` can now contain a hardware-wrapped key as an + alternative to a raw key; a ``key_type`` field is added to + ``struct blk_crypto_config`` to distinguish between the different key types. + This allows users of blk-crypto to en/decrypt data using a hardware-wrapped + key in a way very similar to using a raw key. + +- A new method ``blk_crypto_ll_ops::derive_sw_secret`` is added. Device drivers + that support hardware-wrapped keys must implement this method. Users of + blk-crypto can call ``blk_crypto_derive_sw_secret()`` to access this method. + +- The programming and eviction of hardware-wrapped keys happens via + ``blk_crypto_ll_ops::keyslot_program`` and + ``blk_crypto_ll_ops::keyslot_evict``, just like it does for raw keys. If a + driver supports hardware-wrapped keys, then it must handle hardware-wrapped + keys being passed to these methods. + +blk-crypto-fallback doesn't support hardware-wrapped keys. Therefore, +hardware-wrapped keys can only be used with actual inline encryption hardware. + +Testability +----------- + +Both the hardware KDF and the inline encryption itself are well-defined +algorithms that don't depend on any secrets other than the unwrapped key. +Therefore, if the unwrapped key is known to software, these algorithms can be +reproduced in software in order to verify the ciphertext that is written to disk +by the inline encryption hardware. + +However, the unwrapped key will only be known to software for testing if the +"import" functionality is used. Proper testing is not possible in the +"generate" case where the hardware generates the key itself. The correct +operation of the "generate" mode thus relies on the security and correctness of +the hardware RNG and its use to generate the key, as well as the testing of the +"import" mode as that should cover all parts other than the key generation. + +For an example of a test that verifies the ciphertext written to disk in the +"import" mode, see the fscrypt hardware-wrapped key tests in xfstests, or +`Android's vts_kernel_encryption_test +`_. diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c index 29a205482617..f154be0b575a 100644 --- a/block/blk-crypto-fallback.c +++ b/block/blk-crypto-fallback.c @@ -87,7 +87,7 @@ static struct bio_set crypto_bio_split; * This is the key we set when evicting a keyslot. This *should* be the all 0's * key, but AES-XTS rejects that key, so we use some random bytes instead. */ -static u8 blank_key[BLK_CRYPTO_MAX_KEY_SIZE]; +static u8 blank_key[BLK_CRYPTO_MAX_RAW_KEY_SIZE]; static void blk_crypto_fallback_evict_keyslot(unsigned int slot) { @@ -119,7 +119,7 @@ blk_crypto_fallback_keyslot_program(struct blk_crypto_profile *profile, blk_crypto_fallback_evict_keyslot(slot); slotp->crypto_mode = crypto_mode; - err = crypto_skcipher_setkey(slotp->tfms[crypto_mode], key->raw, + err = crypto_skcipher_setkey(slotp->tfms[crypto_mode], key->bytes, key->size); if (err) { blk_crypto_fallback_evict_keyslot(slot); @@ -539,7 +539,7 @@ static int blk_crypto_fallback_init(void) if (blk_crypto_fallback_inited) return 0; - get_random_bytes(blank_key, BLK_CRYPTO_MAX_KEY_SIZE); + get_random_bytes(blank_key, sizeof(blank_key)); err = bioset_init(&crypto_bio_split, 64, 0, 0); if (err) @@ -561,6 +561,7 @@ static int blk_crypto_fallback_init(void) blk_crypto_fallback_profile->ll_ops = blk_crypto_fallback_ll_ops; blk_crypto_fallback_profile->max_dun_bytes_supported = BLK_CRYPTO_MAX_IV_SIZE; + blk_crypto_fallback_profile->key_types_supported = BLK_CRYPTO_KEY_TYPE_RAW; /* All blk-crypto modes have a crypto API fallback. */ for (i = 0; i < BLK_ENCRYPTION_MODE_MAX; i++) diff --git a/block/blk-crypto-internal.h b/block/blk-crypto-internal.h index 93a141979694..1893df9a8f06 100644 --- a/block/blk-crypto-internal.h +++ b/block/blk-crypto-internal.h @@ -14,6 +14,7 @@ struct blk_crypto_mode { const char *name; /* name of this mode, shown in sysfs */ const char *cipher_str; /* crypto API name (for fallback case) */ unsigned int keysize; /* key size in bytes */ + unsigned int security_strength; /* security strength in bytes */ unsigned int ivsize; /* iv size in bytes */ }; diff --git a/block/blk-crypto-profile.c b/block/blk-crypto-profile.c index 7fabc883e39f..a990d9026c83 100644 --- a/block/blk-crypto-profile.c +++ b/block/blk-crypto-profile.c @@ -352,6 +352,8 @@ bool __blk_crypto_cfg_supported(struct blk_crypto_profile *profile, return false; if (profile->max_dun_bytes_supported < cfg->dun_bytes) return false; + if (!(profile->key_types_supported & cfg->key_type)) + return false; return true; } @@ -462,6 +464,44 @@ bool blk_crypto_register(struct blk_crypto_profile *profile, } EXPORT_SYMBOL_GPL(blk_crypto_register); +/** + * blk_crypto_derive_sw_secret() - Derive software secret from wrapped key + * @bdev: a block device that supports hardware-wrapped keys + * @eph_key: a hardware-wrapped key in ephemerally-wrapped form + * @eph_key_size: size of @eph_key in bytes + * @sw_secret: (output) the software secret + * + * Given a hardware-wrapped key in ephemerally-wrapped form (the same form that + * it is used for I/O), ask the hardware to derive the secret which software can + * use for cryptographic tasks other than inline encryption. This secret is + * guaranteed to be cryptographically isolated from the inline encryption key, + * i.e. derived with a different KDF context. + * + * Return: 0 on success, -EOPNOTSUPP if the block device doesn't support + * hardware-wrapped keys, -EBADMSG if the key isn't a valid + * ephemerally-wrapped key, or another -errno code. + */ +int blk_crypto_derive_sw_secret(struct block_device *bdev, + const u8 *eph_key, size_t eph_key_size, + u8 sw_secret[BLK_CRYPTO_SW_SECRET_SIZE]) +{ + struct blk_crypto_profile *profile = + bdev_get_queue(bdev)->crypto_profile; + int err; + + if (!profile) + return -EOPNOTSUPP; + if (!(profile->key_types_supported & BLK_CRYPTO_KEY_TYPE_HW_WRAPPED)) + return -EOPNOTSUPP; + if (!profile->ll_ops.derive_sw_secret) + return -EOPNOTSUPP; + blk_crypto_hw_enter(profile); + err = profile->ll_ops.derive_sw_secret(profile, eph_key, eph_key_size, + sw_secret); + blk_crypto_hw_exit(profile); + return err; +} + /** * blk_crypto_intersect_capabilities() - restrict supported crypto capabilities * by child device @@ -485,10 +525,12 @@ void blk_crypto_intersect_capabilities(struct blk_crypto_profile *parent, child->max_dun_bytes_supported); for (i = 0; i < ARRAY_SIZE(child->modes_supported); i++) parent->modes_supported[i] &= child->modes_supported[i]; + parent->key_types_supported &= child->key_types_supported; } else { parent->max_dun_bytes_supported = 0; memset(parent->modes_supported, 0, sizeof(parent->modes_supported)); + parent->key_types_supported = 0; } } EXPORT_SYMBOL_GPL(blk_crypto_intersect_capabilities); @@ -521,6 +563,9 @@ bool blk_crypto_has_capabilities(const struct blk_crypto_profile *target, target->max_dun_bytes_supported) return false; + if (reference->key_types_supported & ~target->key_types_supported) + return false; + return true; } EXPORT_SYMBOL_GPL(blk_crypto_has_capabilities); @@ -555,5 +600,6 @@ void blk_crypto_update_capabilities(struct blk_crypto_profile *dst, sizeof(dst->modes_supported)); dst->max_dun_bytes_supported = src->max_dun_bytes_supported; + dst->key_types_supported = src->key_types_supported; } EXPORT_SYMBOL_GPL(blk_crypto_update_capabilities); diff --git a/block/blk-crypto.c b/block/blk-crypto.c index 4d760b092deb..72975a980fbc 100644 --- a/block/blk-crypto.c +++ b/block/blk-crypto.c @@ -23,24 +23,28 @@ const struct blk_crypto_mode blk_crypto_modes[] = { .name = "AES-256-XTS", .cipher_str = "xts(aes)", .keysize = 64, + .security_strength = 32, .ivsize = 16, }, [BLK_ENCRYPTION_MODE_AES_128_CBC_ESSIV] = { .name = "AES-128-CBC-ESSIV", .cipher_str = "essiv(cbc(aes),sha256)", .keysize = 16, + .security_strength = 16, .ivsize = 16, }, [BLK_ENCRYPTION_MODE_ADIANTUM] = { .name = "Adiantum", .cipher_str = "adiantum(xchacha12,aes)", .keysize = 32, + .security_strength = 32, .ivsize = 32, }, [BLK_ENCRYPTION_MODE_SM4_XTS] = { .name = "SM4-XTS", .cipher_str = "xts(sm4)", .keysize = 32, + .security_strength = 16, .ivsize = 16, }, }; @@ -76,9 +80,15 @@ static int __init bio_crypt_ctx_init(void) /* This is assumed in various places. */ BUILD_BUG_ON(BLK_ENCRYPTION_MODE_INVALID != 0); - /* Sanity check that no algorithm exceeds the defined limits. */ + /* + * Validate the crypto mode properties. This ideally would be done with + * static assertions, but boot-time checks are the next best thing. + */ for (i = 0; i < BLK_ENCRYPTION_MODE_MAX; i++) { - BUG_ON(blk_crypto_modes[i].keysize > BLK_CRYPTO_MAX_KEY_SIZE); + BUG_ON(blk_crypto_modes[i].keysize > + BLK_CRYPTO_MAX_RAW_KEY_SIZE); + BUG_ON(blk_crypto_modes[i].security_strength > + blk_crypto_modes[i].keysize); BUG_ON(blk_crypto_modes[i].ivsize > BLK_CRYPTO_MAX_IV_SIZE); } @@ -315,17 +325,20 @@ int __blk_crypto_rq_bio_prep(struct request *rq, struct bio *bio, /** * blk_crypto_init_key() - Prepare a key for use with blk-crypto * @blk_key: Pointer to the blk_crypto_key to initialize. - * @raw_key: Pointer to the raw key. Must be the correct length for the chosen - * @crypto_mode; see blk_crypto_modes[]. + * @key_bytes: the bytes of the key + * @key_size: size of the key in bytes + * @key_type: type of the key -- either raw or hardware-wrapped * @crypto_mode: identifier for the encryption algorithm to use * @dun_bytes: number of bytes that will be used to specify the DUN when this * key is used * @data_unit_size: the data unit size to use for en/decryption * * Return: 0 on success, -errno on failure. The caller is responsible for - * zeroizing both blk_key and raw_key when done with them. + * zeroizing both blk_key and key_bytes when done with them. */ -int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key, +int blk_crypto_init_key(struct blk_crypto_key *blk_key, + const u8 *key_bytes, size_t key_size, + enum blk_crypto_key_type key_type, enum blk_crypto_mode_num crypto_mode, unsigned int dun_bytes, unsigned int data_unit_size) @@ -338,8 +351,19 @@ int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key, return -EINVAL; mode = &blk_crypto_modes[crypto_mode]; - if (mode->keysize == 0) + switch (key_type) { + case BLK_CRYPTO_KEY_TYPE_RAW: + if (key_size != mode->keysize) + return -EINVAL; + break; + case BLK_CRYPTO_KEY_TYPE_HW_WRAPPED: + if (key_size < mode->security_strength || + key_size > BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE) + return -EINVAL; + break; + default: return -EINVAL; + } if (dun_bytes == 0 || dun_bytes > mode->ivsize) return -EINVAL; @@ -350,9 +374,10 @@ int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key, blk_key->crypto_cfg.crypto_mode = crypto_mode; blk_key->crypto_cfg.dun_bytes = dun_bytes; blk_key->crypto_cfg.data_unit_size = data_unit_size; + blk_key->crypto_cfg.key_type = key_type; blk_key->data_unit_size_bits = ilog2(data_unit_size); - blk_key->size = mode->keysize; - memcpy(blk_key->raw, raw_key, mode->keysize); + blk_key->size = key_size; + memcpy(blk_key->bytes, key_bytes, key_size); return 0; } @@ -372,8 +397,10 @@ bool blk_crypto_config_supported_natively(struct block_device *bdev, bool blk_crypto_config_supported(struct block_device *bdev, const struct blk_crypto_config *cfg) { - return IS_ENABLED(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) || - blk_crypto_config_supported_natively(bdev, cfg); + if (IS_ENABLED(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) && + cfg->key_type == BLK_CRYPTO_KEY_TYPE_RAW) + return true; + return blk_crypto_config_supported_natively(bdev, cfg); } /** @@ -387,15 +414,21 @@ bool blk_crypto_config_supported(struct block_device *bdev, * an skcipher, and *should not* be called from the data path, since that might * cause a deadlock * - * Return: 0 on success; -ENOPKG if the hardware doesn't support the key and - * blk-crypto-fallback is either disabled or the needed algorithm - * is disabled in the crypto API; or another -errno code. + * Return: 0 on success; -EOPNOTSUPP if the key is wrapped but the hardware does + * not support wrapped keys; -ENOPKG if the key is a raw key but the + * hardware does not support raw keys and blk-crypto-fallback is either + * disabled or the needed algorithm is disabled in the crypto API; or + * another -errno code if something else went wrong. */ int blk_crypto_start_using_key(struct block_device *bdev, const struct blk_crypto_key *key) { if (blk_crypto_config_supported_natively(bdev, &key->crypto_cfg)) return 0; + if (key->crypto_cfg.key_type != BLK_CRYPTO_KEY_TYPE_RAW) { + pr_warn_ratelimited("%pg: no support for wrapped keys\n", bdev); + return -EOPNOTSUPP; + } return blk_crypto_fallback_start_using_mode(key->crypto_cfg.crypto_mode); } diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 0ef5203387b2..bf9a61191e9a 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1250,6 +1250,7 @@ static int dm_table_construct_crypto_profile(struct dm_table *t) profile->max_dun_bytes_supported = UINT_MAX; memset(profile->modes_supported, 0xFF, sizeof(profile->modes_supported)); + profile->key_types_supported = ~0; for (i = 0; i < t->num_targets; i++) { struct dm_target *ti = dm_table_get_target(t, i); diff --git a/drivers/mmc/host/cqhci-crypto.c b/drivers/mmc/host/cqhci-crypto.c index cb8044093402..5a467098a0d6 100644 --- a/drivers/mmc/host/cqhci-crypto.c +++ b/drivers/mmc/host/cqhci-crypto.c @@ -84,11 +84,11 @@ static int cqhci_crypto_keyslot_program(struct blk_crypto_profile *profile, if (ccap_array[cap_idx].algorithm_id == CQHCI_CRYPTO_ALG_AES_XTS) { /* In XTS mode, the blk_crypto_key's size is already doubled */ - memcpy(cfg.crypto_key, key->raw, key->size/2); + memcpy(cfg.crypto_key, key->bytes, key->size/2); memcpy(cfg.crypto_key + CQHCI_CRYPTO_KEY_MAX_SIZE/2, - key->raw + key->size/2, key->size/2); + key->bytes + key->size/2, key->size/2); } else { - memcpy(cfg.crypto_key, key->raw, key->size); + memcpy(cfg.crypto_key, key->bytes, key->size); } cqhci_crypto_program_key(cq_host, &cfg, slot); @@ -204,6 +204,8 @@ int cqhci_crypto_init(struct cqhci_host *cq_host) /* Unfortunately, CQHCI crypto only supports 32 DUN bits. */ profile->max_dun_bytes_supported = 4; + profile->key_types_supported = BLK_CRYPTO_KEY_TYPE_RAW; + /* * Cache all the crypto capabilities and advertise the supported crypto * modes and data unit sizes to the block layer. diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c index e3d39311fdc7..3c383bce4928 100644 --- a/drivers/mmc/host/sdhci-msm.c +++ b/drivers/mmc/host/sdhci-msm.c @@ -1895,6 +1895,7 @@ static int sdhci_msm_ice_init(struct sdhci_msm_host *msm_host, profile->ll_ops = sdhci_msm_crypto_ops; profile->max_dun_bytes_supported = 4; + profile->key_types_supported = BLK_CRYPTO_KEY_TYPE_RAW; profile->dev = dev; /* @@ -1968,7 +1969,7 @@ static int sdhci_msm_ice_keyslot_program(struct blk_crypto_profile *profile, return qcom_ice_program_key(msm_host->ice, QCOM_ICE_CRYPTO_ALG_AES_XTS, QCOM_ICE_CRYPTO_KEY_SIZE_256, - key->raw, + key->bytes, key->crypto_cfg.data_unit_size / 512, slot); } diff --git a/drivers/ufs/core/ufshcd-crypto.c b/drivers/ufs/core/ufshcd-crypto.c index 694ff7578fc1..9e63a9d3cb7e 100644 --- a/drivers/ufs/core/ufshcd-crypto.c +++ b/drivers/ufs/core/ufshcd-crypto.c @@ -72,11 +72,11 @@ static int ufshcd_crypto_keyslot_program(struct blk_crypto_profile *profile, if (ccap_array[cap_idx].algorithm_id == UFS_CRYPTO_ALG_AES_XTS) { /* In XTS mode, the blk_crypto_key's size is already doubled */ - memcpy(cfg.crypto_key, key->raw, key->size/2); + memcpy(cfg.crypto_key, key->bytes, key->size/2); memcpy(cfg.crypto_key + UFS_CRYPTO_KEY_MAX_SIZE/2, - key->raw + key->size/2, key->size/2); + key->bytes + key->size/2, key->size/2); } else { - memcpy(cfg.crypto_key, key->raw, key->size); + memcpy(cfg.crypto_key, key->bytes, key->size); } ufshcd_program_key(hba, &cfg, slot); @@ -185,6 +185,7 @@ int ufshcd_hba_init_crypto_capabilities(struct ufs_hba *hba) hba->crypto_profile.ll_ops = ufshcd_crypto_ops; /* UFS only supports 8 bytes for any DUN */ hba->crypto_profile.max_dun_bytes_supported = 8; + hba->crypto_profile.key_types_supported = BLK_CRYPTO_KEY_TYPE_RAW; hba->crypto_profile.dev = hba->dev; /* diff --git a/drivers/ufs/host/ufs-exynos.c b/drivers/ufs/host/ufs-exynos.c index 13dd5dfc03eb..6a415d9bdc85 100644 --- a/drivers/ufs/host/ufs-exynos.c +++ b/drivers/ufs/host/ufs-exynos.c @@ -1320,6 +1320,7 @@ static void exynos_ufs_fmp_init(struct ufs_hba *hba, struct exynos_ufs *ufs) return; } profile->max_dun_bytes_supported = AES_BLOCK_SIZE; + profile->key_types_supported = BLK_CRYPTO_KEY_TYPE_RAW; profile->dev = hba->dev; profile->modes_supported[BLK_ENCRYPTION_MODE_AES_256_XTS] = DATA_UNIT_SIZE; @@ -1366,7 +1367,7 @@ static int exynos_ufs_fmp_fill_prdt(struct ufs_hba *hba, void *prdt, unsigned int num_segments) { struct fmp_sg_entry *fmp_prdt = prdt; - const u8 *enckey = crypt_ctx->bc_key->raw; + const u8 *enckey = crypt_ctx->bc_key->bytes; const u8 *twkey = enckey + AES_KEYSIZE_256; u64 dun_lo = crypt_ctx->bc_dun[0]; u64 dun_hi = crypt_ctx->bc_dun[1]; diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index 23b9f6efa047..c3f0aa81ff98 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -147,6 +147,7 @@ static int ufs_qcom_ice_init(struct ufs_qcom_host *host) profile->ll_ops = ufs_qcom_crypto_ops; profile->max_dun_bytes_supported = 8; + profile->key_types_supported = BLK_CRYPTO_KEY_TYPE_RAW; profile->dev = dev; /* @@ -202,7 +203,7 @@ static int ufs_qcom_ice_keyslot_program(struct blk_crypto_profile *profile, err = qcom_ice_program_key(host->ice, QCOM_ICE_CRYPTO_ALG_AES_XTS, QCOM_ICE_CRYPTO_KEY_SIZE_256, - key->raw, + key->bytes, key->crypto_cfg.data_unit_size / 512, slot); ufshcd_release(hba); diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c index 40de69860dcf..7fa53d30aec3 100644 --- a/fs/crypto/inline_crypt.c +++ b/fs/crypto/inline_crypt.c @@ -130,6 +130,7 @@ int fscrypt_select_encryption_impl(struct fscrypt_inode_info *ci) crypto_cfg.crypto_mode = ci->ci_mode->blk_crypto_mode; crypto_cfg.data_unit_size = 1U << ci->ci_data_unit_bits; crypto_cfg.dun_bytes = fscrypt_get_dun_bytes(ci); + crypto_cfg.key_type = BLK_CRYPTO_KEY_TYPE_RAW; devs = fscrypt_get_devices(sb, &num_devs); if (IS_ERR(devs)) @@ -166,7 +167,8 @@ int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, if (!blk_key) return -ENOMEM; - err = blk_crypto_init_key(blk_key, raw_key, crypto_mode, + err = blk_crypto_init_key(blk_key, raw_key, ci->ci_mode->keysize, + BLK_CRYPTO_KEY_TYPE_RAW, crypto_mode, fscrypt_get_dun_bytes(ci), 1U << ci->ci_data_unit_bits); if (err) { diff --git a/include/linux/blk-crypto-profile.h b/include/linux/blk-crypto-profile.h index 90ab33cb5d0e..7764b4f7b45b 100644 --- a/include/linux/blk-crypto-profile.h +++ b/include/linux/blk-crypto-profile.h @@ -57,6 +57,20 @@ struct blk_crypto_ll_ops { int (*keyslot_evict)(struct blk_crypto_profile *profile, const struct blk_crypto_key *key, unsigned int slot); + + /** + * @derive_sw_secret: Derive the software secret from a hardware-wrapped + * key in ephemerally-wrapped form. + * + * This only needs to be implemented if BLK_CRYPTO_KEY_TYPE_HW_WRAPPED + * is supported. + * + * Must return 0 on success, -EBADMSG if the key is invalid, or another + * -errno code on other errors. + */ + int (*derive_sw_secret)(struct blk_crypto_profile *profile, + const u8 *eph_key, size_t eph_key_size, + u8 sw_secret[BLK_CRYPTO_SW_SECRET_SIZE]); }; /** @@ -84,6 +98,12 @@ struct blk_crypto_profile { */ unsigned int max_dun_bytes_supported; + /** + * @key_types_supported: A bitmask of the supported key types: + * BLK_CRYPTO_KEY_TYPE_RAW and/or BLK_CRYPTO_KEY_TYPE_HW_WRAPPED. + */ + unsigned int key_types_supported; + /** * @modes_supported: Array of bitmasks that specifies whether each * combination of crypto mode and data unit size is supported. diff --git a/include/linux/blk-crypto.h b/include/linux/blk-crypto.h index 5e5822c18ee4..81f932b3ea37 100644 --- a/include/linux/blk-crypto.h +++ b/include/linux/blk-crypto.h @@ -6,6 +6,7 @@ #ifndef __LINUX_BLK_CRYPTO_H #define __LINUX_BLK_CRYPTO_H +#include #include enum blk_crypto_mode_num { @@ -17,7 +18,55 @@ enum blk_crypto_mode_num { BLK_ENCRYPTION_MODE_MAX, }; -#define BLK_CRYPTO_MAX_KEY_SIZE 64 +/* + * Supported types of keys. Must be bitflags due to their use in + * blk_crypto_profile::key_types_supported. + */ +enum blk_crypto_key_type { + /* + * Raw keys (i.e. "software keys"). These keys are simply kept in raw, + * plaintext form in kernel memory. + */ + BLK_CRYPTO_KEY_TYPE_RAW = 0x1, + + /* + * Hardware-wrapped keys. These keys are only present in kernel memory + * in ephemerally-wrapped form, and they can only be unwrapped by + * dedicated hardware. For details, see the "Hardware-wrapped keys" + * section of Documentation/block/inline-encryption.rst. + */ + BLK_CRYPTO_KEY_TYPE_HW_WRAPPED = 0x2, +}; + +/* + * Currently the maximum raw key size is 64 bytes, as that is the key size of + * BLK_ENCRYPTION_MODE_AES_256_XTS which takes the longest key. + * + * The maximum hardware-wrapped key size depends on the hardware's key wrapping + * algorithm, which is a hardware implementation detail, so it isn't precisely + * specified. But currently 128 bytes is plenty in practice. Implementations + * are recommended to wrap a 32-byte key for the hardware KDF with AES-256-GCM, + * which should result in a size closer to 64 bytes than 128. + * + * Both of these values can trivially be increased if ever needed. + */ +#define BLK_CRYPTO_MAX_RAW_KEY_SIZE 64 +#define BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE 128 + +#define BLK_CRYPTO_MAX_ANY_KEY_SIZE \ + MAX(BLK_CRYPTO_MAX_RAW_KEY_SIZE, BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE) + +/* + * Size of the "software secret" which can be derived from a hardware-wrapped + * key. This is currently always 32 bytes. Note, the choice of 32 bytes + * assumes that the software secret is only used directly for algorithms that + * don't require more than a 256-bit key to get the desired security strength. + * If it were to be used e.g. directly as an AES-256-XTS key, then this would + * need to be increased (which is possible if hardware supports it, but care + * would need to be taken to avoid breaking users who need exactly 32 bytes). + */ +#define BLK_CRYPTO_SW_SECRET_SIZE 32 + /** * struct blk_crypto_config - an inline encryption key's crypto configuration * @crypto_mode: encryption algorithm this key is for @@ -26,20 +75,23 @@ enum blk_crypto_mode_num { * ciphertext. This is always a power of 2. It might be e.g. the * filesystem block size or the disk sector size. * @dun_bytes: the maximum number of bytes of DUN used when using this key + * @key_type: the type of this key -- either raw or hardware-wrapped */ struct blk_crypto_config { enum blk_crypto_mode_num crypto_mode; unsigned int data_unit_size; unsigned int dun_bytes; + enum blk_crypto_key_type key_type; }; /** * struct blk_crypto_key - an inline encryption key - * @crypto_cfg: the crypto configuration (like crypto_mode, key size) for this - * key + * @crypto_cfg: the crypto mode, data unit size, key type, and other + * characteristics of this key and how it will be used * @data_unit_size_bits: log2 of data_unit_size - * @size: size of this key in bytes (determined by @crypto_cfg.crypto_mode) - * @raw: the raw bytes of this key. Only the first @size bytes are used. + * @size: size of this key in bytes. The size of a raw key is fixed for a given + * crypto mode, but the size of a hardware-wrapped key can vary. + * @bytes: the bytes of this key. Only the first @size bytes are significant. * * A blk_crypto_key is immutable once created, and many bios can reference it at * the same time. It must not be freed until all bios using it have completed @@ -49,7 +101,7 @@ struct blk_crypto_key { struct blk_crypto_config crypto_cfg; unsigned int data_unit_size_bits; unsigned int size; - u8 raw[BLK_CRYPTO_MAX_KEY_SIZE]; + u8 bytes[BLK_CRYPTO_MAX_ANY_KEY_SIZE]; }; #define BLK_CRYPTO_MAX_IV_SIZE 32 @@ -87,7 +139,9 @@ bool bio_crypt_dun_is_contiguous(const struct bio_crypt_ctx *bc, unsigned int bytes, const u64 next_dun[BLK_CRYPTO_DUN_ARRAY_SIZE]); -int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key, +int blk_crypto_init_key(struct blk_crypto_key *blk_key, + const u8 *key_bytes, size_t key_size, + enum blk_crypto_key_type key_type, enum blk_crypto_mode_num crypto_mode, unsigned int dun_bytes, unsigned int data_unit_size); @@ -103,6 +157,10 @@ bool blk_crypto_config_supported_natively(struct block_device *bdev, bool blk_crypto_config_supported(struct block_device *bdev, const struct blk_crypto_config *cfg); +int blk_crypto_derive_sw_secret(struct block_device *bdev, + const u8 *eph_key, size_t eph_key_size, + u8 sw_secret[BLK_CRYPTO_SW_SECRET_SIZE]); + #else /* CONFIG_BLK_INLINE_ENCRYPTION */ static inline bool bio_has_crypt_ctx(struct bio *bio) -- 2.51.0 From e35fde43e25ad725d27315992fba8088d1210b01 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 3 Feb 2025 22:00:36 -0800 Subject: [PATCH 13/16] blk-crypto: show supported key types in sysfs Add sysfs files that indicate which type(s) of keys are supported by the inline encryption hardware associated with a particular request queue: /sys/block/$disk/queue/crypto/hw_wrapped_keys /sys/block/$disk/queue/crypto/raw_keys Userspace can use the presence or absence of these files to decide what encyption settings to use. Don't use a single key_type file, as devices might support both key types at the same time. Signed-off-by: Eric Biggers Tested-by: Bartosz Golaszewski # sm8650 Link: https://lore.kernel.org/r/20250204060041.409950-3-ebiggers@kernel.org Signed-off-by: Jens Axboe --- Documentation/ABI/stable/sysfs-block | 20 ++++++++++++++++ block/blk-crypto-sysfs.c | 35 ++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block index 0cceb2badc83..890cde28bf90 100644 --- a/Documentation/ABI/stable/sysfs-block +++ b/Documentation/ABI/stable/sysfs-block @@ -229,6 +229,17 @@ Description: encryption, refer to Documentation/block/inline-encryption.rst. +What: /sys/block//queue/crypto/hw_wrapped_keys +Date: February 2025 +Contact: linux-block@vger.kernel.org +Description: + [RO] The presence of this file indicates that the device + supports hardware-wrapped inline encryption keys, i.e. key blobs + that can only be unwrapped and used by dedicated hardware. For + more information about hardware-wrapped inline encryption keys, + see Documentation/block/inline-encryption.rst. + + What: /sys/block//queue/crypto/max_dun_bits Date: February 2022 Contact: linux-block@vger.kernel.org @@ -267,6 +278,15 @@ Description: use with inline encryption. +What: /sys/block//queue/crypto/raw_keys +Date: February 2025 +Contact: linux-block@vger.kernel.org +Description: + [RO] The presence of this file indicates that the device + supports raw inline encryption keys, i.e. keys that are managed + in raw, plaintext form in software. + + What: /sys/block//queue/dax Date: June 2016 Contact: linux-block@vger.kernel.org diff --git a/block/blk-crypto-sysfs.c b/block/blk-crypto-sysfs.c index a304434489ba..e832f403f200 100644 --- a/block/blk-crypto-sysfs.c +++ b/block/blk-crypto-sysfs.c @@ -31,6 +31,13 @@ static struct blk_crypto_attr *attr_to_crypto_attr(struct attribute *attr) return container_of(attr, struct blk_crypto_attr, attr); } +static ssize_t hw_wrapped_keys_show(struct blk_crypto_profile *profile, + struct blk_crypto_attr *attr, char *page) +{ + /* Always show supported, since the file doesn't exist otherwise. */ + return sysfs_emit(page, "supported\n"); +} + static ssize_t max_dun_bits_show(struct blk_crypto_profile *profile, struct blk_crypto_attr *attr, char *page) { @@ -43,20 +50,48 @@ static ssize_t num_keyslots_show(struct blk_crypto_profile *profile, return sysfs_emit(page, "%u\n", profile->num_slots); } +static ssize_t raw_keys_show(struct blk_crypto_profile *profile, + struct blk_crypto_attr *attr, char *page) +{ + /* Always show supported, since the file doesn't exist otherwise. */ + return sysfs_emit(page, "supported\n"); +} + #define BLK_CRYPTO_RO_ATTR(_name) \ static struct blk_crypto_attr _name##_attr = __ATTR_RO(_name) +BLK_CRYPTO_RO_ATTR(hw_wrapped_keys); BLK_CRYPTO_RO_ATTR(max_dun_bits); BLK_CRYPTO_RO_ATTR(num_keyslots); +BLK_CRYPTO_RO_ATTR(raw_keys); + +static umode_t blk_crypto_is_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct blk_crypto_profile *profile = kobj_to_crypto_profile(kobj); + struct blk_crypto_attr *a = attr_to_crypto_attr(attr); + + if (a == &hw_wrapped_keys_attr && + !(profile->key_types_supported & BLK_CRYPTO_KEY_TYPE_HW_WRAPPED)) + return 0; + if (a == &raw_keys_attr && + !(profile->key_types_supported & BLK_CRYPTO_KEY_TYPE_RAW)) + return 0; + + return 0444; +} static struct attribute *blk_crypto_attrs[] = { + &hw_wrapped_keys_attr.attr, &max_dun_bits_attr.attr, &num_keyslots_attr.attr, + &raw_keys_attr.attr, NULL, }; static const struct attribute_group blk_crypto_attr_group = { .attrs = blk_crypto_attrs, + .is_visible = blk_crypto_is_visible, }; /* -- 2.51.0 From 1ebd4a3c095cd538d3c1c7c12738ef47d8e71f96 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 3 Feb 2025 22:00:37 -0800 Subject: [PATCH 14/16] blk-crypto: add ioctls to create and prepare hardware-wrapped keys Until this point, the kernel can use hardware-wrapped keys to do encryption if userspace provides one -- specifically a key in ephemerally-wrapped form. However, no generic way has been provided for userspace to get such a key in the first place. Getting such a key is a two-step process. First, the key needs to be imported from a raw key or generated by the hardware, producing a key in long-term wrapped form. This happens once in the whole lifetime of the key. Second, the long-term wrapped key needs to be converted into ephemerally-wrapped form. This happens each time the key is "unlocked". In Android, these operations are supported in a generic way through KeyMint, a userspace abstraction layer. However, that method is Android-specific and can't be used on other Linux systems, may rely on proprietary libraries, and also misleads people into supporting KeyMint features like rollback resistance that make sense for other KeyMint keys but don't make sense for hardware-wrapped inline encryption keys. Therefore, this patch provides a generic kernel interface for these operations by introducing new block device ioctls: - BLKCRYPTOIMPORTKEY: convert a raw key to long-term wrapped form. - BLKCRYPTOGENERATEKEY: have the hardware generate a new key, then return it in long-term wrapped form. - BLKCRYPTOPREPAREKEY: convert a key from long-term wrapped form to ephemerally-wrapped form. These ioctls are implemented using new operations in blk_crypto_ll_ops. Signed-off-by: Eric Biggers Tested-by: Bartosz Golaszewski # sm8650 Link: https://lore.kernel.org/r/20250204060041.409950-4-ebiggers@kernel.org Signed-off-by: Jens Axboe --- Documentation/block/inline-encryption.rst | 36 +++++ .../userspace-api/ioctl/ioctl-number.rst | 2 + block/blk-crypto-internal.h | 9 ++ block/blk-crypto-profile.c | 55 +++++++ block/blk-crypto.c | 143 ++++++++++++++++++ block/ioctl.c | 5 + include/linux/blk-crypto-profile.h | 53 +++++++ include/linux/blk-crypto.h | 1 + include/uapi/linux/blk-crypto.h | 44 ++++++ include/uapi/linux/fs.h | 6 +- 10 files changed, 350 insertions(+), 4 deletions(-) create mode 100644 include/uapi/linux/blk-crypto.h diff --git a/Documentation/block/inline-encryption.rst b/Documentation/block/inline-encryption.rst index f03bd5b090d8..6380e6ab492b 100644 --- a/Documentation/block/inline-encryption.rst +++ b/Documentation/block/inline-encryption.rst @@ -492,6 +492,42 @@ when hardware support is available. This works in the following way: blk-crypto-fallback doesn't support hardware-wrapped keys. Therefore, hardware-wrapped keys can only be used with actual inline encryption hardware. +All the above deals with hardware-wrapped keys in ephemerally-wrapped form only. +To get such keys in the first place, new block device ioctls have been added to +provide a generic interface to creating and preparing such keys: + +- ``BLKCRYPTOIMPORTKEY`` converts a raw key to long-term wrapped form. It takes + in a pointer to a ``struct blk_crypto_import_key_arg``. The caller must set + ``raw_key_ptr`` and ``raw_key_size`` to the pointer and size (in bytes) of the + raw key to import. On success, ``BLKCRYPTOIMPORTKEY`` returns 0 and writes + the resulting long-term wrapped key blob to the buffer pointed to by + ``lt_key_ptr``, which is of maximum size ``lt_key_size``. It also updates + ``lt_key_size`` to be the actual size of the key. On failure, it returns -1 + and sets errno. An errno of ``EOPNOTSUPP`` indicates that the block device + does not support hardware-wrapped keys. An errno of ``EOVERFLOW`` indicates + that the output buffer did not have enough space for the key blob. + +- ``BLKCRYPTOGENERATEKEY`` is like ``BLKCRYPTOIMPORTKEY``, but it has the + hardware generate the key instead of importing one. It takes in a pointer to + a ``struct blk_crypto_generate_key_arg``. + +- ``BLKCRYPTOPREPAREKEY`` converts a key from long-term wrapped form to + ephemerally-wrapped form. It takes in a pointer to a ``struct + blk_crypto_prepare_key_arg``. The caller must set ``lt_key_ptr`` and + ``lt_key_size`` to the pointer and size (in bytes) of the long-term wrapped + key blob to convert. On success, ``BLKCRYPTOPREPAREKEY`` returns 0 and writes + the resulting ephemerally-wrapped key blob to the buffer pointed to by + ``eph_key_ptr``, which is of maximum size ``eph_key_size``. It also updates + ``eph_key_size`` to be the actual size of the key. On failure, it returns -1 + and sets errno. Errno values of ``EOPNOTSUPP`` and ``EOVERFLOW`` mean the + same as they do for ``BLKCRYPTOIMPORTKEY``. An errno of ``EBADMSG`` indicates + that the long-term wrapped key is invalid. + +Userspace needs to use either ``BLKCRYPTOIMPORTKEY`` or ``BLKCRYPTOGENERATEKEY`` +once to create a key, and then ``BLKCRYPTOPREPAREKEY`` each time the key is +unlocked and added to the kernel. Note that these ioctls have no relevance for +raw keys; they are only for hardware-wrapped keys. + Testability ----------- diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index 6d1465315df3..d448c010d307 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -85,6 +85,8 @@ Code Seq# Include File Comments 0x10 20-2F arch/s390/include/uapi/asm/hypfs.h 0x12 all linux/fs.h BLK* ioctls linux/blkpg.h + linux/blkzoned.h + linux/blk-crypto.h 0x15 all linux/fs.h FS_IOC_* ioctls 0x1b all InfiniBand Subsystem diff --git a/block/blk-crypto-internal.h b/block/blk-crypto-internal.h index 1893df9a8f06..ccf6dff6ff6b 100644 --- a/block/blk-crypto-internal.h +++ b/block/blk-crypto-internal.h @@ -83,6 +83,9 @@ int __blk_crypto_evict_key(struct blk_crypto_profile *profile, bool __blk_crypto_cfg_supported(struct blk_crypto_profile *profile, const struct blk_crypto_config *cfg); +int blk_crypto_ioctl(struct block_device *bdev, unsigned int cmd, + void __user *argp); + #else /* CONFIG_BLK_INLINE_ENCRYPTION */ static inline int blk_crypto_sysfs_register(struct gendisk *disk) @@ -130,6 +133,12 @@ static inline bool blk_crypto_rq_has_keyslot(struct request *rq) return false; } +static inline int blk_crypto_ioctl(struct block_device *bdev, unsigned int cmd, + void __user *argp) +{ + return -ENOTTY; +} + #endif /* CONFIG_BLK_INLINE_ENCRYPTION */ void __bio_crypt_advance(struct bio *bio, unsigned int bytes); diff --git a/block/blk-crypto-profile.c b/block/blk-crypto-profile.c index a990d9026c83..94a155912bf1 100644 --- a/block/blk-crypto-profile.c +++ b/block/blk-crypto-profile.c @@ -502,6 +502,61 @@ int blk_crypto_derive_sw_secret(struct block_device *bdev, return err; } +int blk_crypto_import_key(struct blk_crypto_profile *profile, + const u8 *raw_key, size_t raw_key_size, + u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE]) +{ + int ret; + + if (!profile) + return -EOPNOTSUPP; + if (!(profile->key_types_supported & BLK_CRYPTO_KEY_TYPE_HW_WRAPPED)) + return -EOPNOTSUPP; + if (!profile->ll_ops.import_key) + return -EOPNOTSUPP; + blk_crypto_hw_enter(profile); + ret = profile->ll_ops.import_key(profile, raw_key, raw_key_size, + lt_key); + blk_crypto_hw_exit(profile); + return ret; +} + +int blk_crypto_generate_key(struct blk_crypto_profile *profile, + u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE]) +{ + int ret; + + if (!profile) + return -EOPNOTSUPP; + if (!(profile->key_types_supported & BLK_CRYPTO_KEY_TYPE_HW_WRAPPED)) + return -EOPNOTSUPP; + if (!profile->ll_ops.generate_key) + return -EOPNOTSUPP; + blk_crypto_hw_enter(profile); + ret = profile->ll_ops.generate_key(profile, lt_key); + blk_crypto_hw_exit(profile); + return ret; +} + +int blk_crypto_prepare_key(struct blk_crypto_profile *profile, + const u8 *lt_key, size_t lt_key_size, + u8 eph_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE]) +{ + int ret; + + if (!profile) + return -EOPNOTSUPP; + if (!(profile->key_types_supported & BLK_CRYPTO_KEY_TYPE_HW_WRAPPED)) + return -EOPNOTSUPP; + if (!profile->ll_ops.prepare_key) + return -EOPNOTSUPP; + blk_crypto_hw_enter(profile); + ret = profile->ll_ops.prepare_key(profile, lt_key, lt_key_size, + eph_key); + blk_crypto_hw_exit(profile); + return ret; +} + /** * blk_crypto_intersect_capabilities() - restrict supported crypto capabilities * by child device diff --git a/block/blk-crypto.c b/block/blk-crypto.c index 72975a980fbc..4b1ad84d1b5a 100644 --- a/block/blk-crypto.c +++ b/block/blk-crypto.c @@ -469,3 +469,146 @@ void blk_crypto_evict_key(struct block_device *bdev, pr_warn_ratelimited("%pg: error %d evicting key\n", bdev, err); } EXPORT_SYMBOL_GPL(blk_crypto_evict_key); + +static int blk_crypto_ioctl_import_key(struct blk_crypto_profile *profile, + void __user *argp) +{ + struct blk_crypto_import_key_arg arg; + u8 raw_key[BLK_CRYPTO_MAX_RAW_KEY_SIZE]; + u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE]; + int ret; + + if (copy_from_user(&arg, argp, sizeof(arg))) + return -EFAULT; + + if (memchr_inv(arg.reserved, 0, sizeof(arg.reserved))) + return -EINVAL; + + if (arg.raw_key_size < 16 || arg.raw_key_size > sizeof(raw_key)) + return -EINVAL; + + if (copy_from_user(raw_key, u64_to_user_ptr(arg.raw_key_ptr), + arg.raw_key_size)) { + ret = -EFAULT; + goto out; + } + ret = blk_crypto_import_key(profile, raw_key, arg.raw_key_size, lt_key); + if (ret < 0) + goto out; + if (ret > arg.lt_key_size) { + ret = -EOVERFLOW; + goto out; + } + arg.lt_key_size = ret; + if (copy_to_user(u64_to_user_ptr(arg.lt_key_ptr), lt_key, + arg.lt_key_size) || + copy_to_user(argp, &arg, sizeof(arg))) { + ret = -EFAULT; + goto out; + } + ret = 0; + +out: + memzero_explicit(raw_key, sizeof(raw_key)); + memzero_explicit(lt_key, sizeof(lt_key)); + return ret; +} + +static int blk_crypto_ioctl_generate_key(struct blk_crypto_profile *profile, + void __user *argp) +{ + struct blk_crypto_generate_key_arg arg; + u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE]; + int ret; + + if (copy_from_user(&arg, argp, sizeof(arg))) + return -EFAULT; + + if (memchr_inv(arg.reserved, 0, sizeof(arg.reserved))) + return -EINVAL; + + ret = blk_crypto_generate_key(profile, lt_key); + if (ret < 0) + goto out; + if (ret > arg.lt_key_size) { + ret = -EOVERFLOW; + goto out; + } + arg.lt_key_size = ret; + if (copy_to_user(u64_to_user_ptr(arg.lt_key_ptr), lt_key, + arg.lt_key_size) || + copy_to_user(argp, &arg, sizeof(arg))) { + ret = -EFAULT; + goto out; + } + ret = 0; + +out: + memzero_explicit(lt_key, sizeof(lt_key)); + return ret; +} + +static int blk_crypto_ioctl_prepare_key(struct blk_crypto_profile *profile, + void __user *argp) +{ + struct blk_crypto_prepare_key_arg arg; + u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE]; + u8 eph_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE]; + int ret; + + if (copy_from_user(&arg, argp, sizeof(arg))) + return -EFAULT; + + if (memchr_inv(arg.reserved, 0, sizeof(arg.reserved))) + return -EINVAL; + + if (arg.lt_key_size > sizeof(lt_key)) + return -EINVAL; + + if (copy_from_user(lt_key, u64_to_user_ptr(arg.lt_key_ptr), + arg.lt_key_size)) { + ret = -EFAULT; + goto out; + } + ret = blk_crypto_prepare_key(profile, lt_key, arg.lt_key_size, eph_key); + if (ret < 0) + goto out; + if (ret > arg.eph_key_size) { + ret = -EOVERFLOW; + goto out; + } + arg.eph_key_size = ret; + if (copy_to_user(u64_to_user_ptr(arg.eph_key_ptr), eph_key, + arg.eph_key_size) || + copy_to_user(argp, &arg, sizeof(arg))) { + ret = -EFAULT; + goto out; + } + ret = 0; + +out: + memzero_explicit(lt_key, sizeof(lt_key)); + memzero_explicit(eph_key, sizeof(eph_key)); + return ret; +} + +int blk_crypto_ioctl(struct block_device *bdev, unsigned int cmd, + void __user *argp) +{ + struct blk_crypto_profile *profile = + bdev_get_queue(bdev)->crypto_profile; + + if (!profile) + return -EOPNOTSUPP; + + switch (cmd) { + case BLKCRYPTOIMPORTKEY: + return blk_crypto_ioctl_import_key(profile, argp); + case BLKCRYPTOGENERATEKEY: + return blk_crypto_ioctl_generate_key(profile, argp); + case BLKCRYPTOPREPAREKEY: + return blk_crypto_ioctl_prepare_key(profile, argp); + default: + return -ENOTTY; + } +} diff --git a/block/ioctl.c b/block/ioctl.c index 6554b728bae6..faa40f383e27 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -15,6 +15,7 @@ #include #include #include "blk.h" +#include "blk-crypto-internal.h" static int blkpg_do_ioctl(struct block_device *bdev, struct blkpg_partition __user *upart, int op) @@ -620,6 +621,10 @@ static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode, case BLKTRACESTOP: case BLKTRACETEARDOWN: return blk_trace_ioctl(bdev, cmd, argp); + case BLKCRYPTOIMPORTKEY: + case BLKCRYPTOGENERATEKEY: + case BLKCRYPTOPREPAREKEY: + return blk_crypto_ioctl(bdev, cmd, argp); case IOC_PR_REGISTER: return blkdev_pr_register(bdev, mode, argp); case IOC_PR_RESERVE: diff --git a/include/linux/blk-crypto-profile.h b/include/linux/blk-crypto-profile.h index 7764b4f7b45b..4f39e9cd7576 100644 --- a/include/linux/blk-crypto-profile.h +++ b/include/linux/blk-crypto-profile.h @@ -71,6 +71,48 @@ struct blk_crypto_ll_ops { int (*derive_sw_secret)(struct blk_crypto_profile *profile, const u8 *eph_key, size_t eph_key_size, u8 sw_secret[BLK_CRYPTO_SW_SECRET_SIZE]); + + /** + * @import_key: Create a hardware-wrapped key by importing a raw key. + * + * This only needs to be implemented if BLK_CRYPTO_KEY_TYPE_HW_WRAPPED + * is supported. + * + * On success, must write the new key in long-term wrapped form to + * @lt_key and return its size in bytes. On failure, must return a + * -errno value. + */ + int (*import_key)(struct blk_crypto_profile *profile, + const u8 *raw_key, size_t raw_key_size, + u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE]); + + /** + * @generate_key: Generate a hardware-wrapped key. + * + * This only needs to be implemented if BLK_CRYPTO_KEY_TYPE_HW_WRAPPED + * is supported. + * + * On success, must write the new key in long-term wrapped form to + * @lt_key and return its size in bytes. On failure, must return a + * -errno value. + */ + int (*generate_key)(struct blk_crypto_profile *profile, + u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE]); + + /** + * @prepare_key: Prepare a hardware-wrapped key to be used. + * + * Prepare a hardware-wrapped key to be used by converting it from + * long-term wrapped form to ephemerally-wrapped form. This only needs + * to be implemented if BLK_CRYPTO_KEY_TYPE_HW_WRAPPED is supported. + * + * On success, must write the key in ephemerally-wrapped form to + * @eph_key and return its size in bytes. On failure, must return + * -EBADMSG if the key is invalid, or another -errno on other error. + */ + int (*prepare_key)(struct blk_crypto_profile *profile, + const u8 *lt_key, size_t lt_key_size, + u8 eph_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE]); }; /** @@ -163,6 +205,17 @@ void blk_crypto_reprogram_all_keys(struct blk_crypto_profile *profile); void blk_crypto_profile_destroy(struct blk_crypto_profile *profile); +int blk_crypto_import_key(struct blk_crypto_profile *profile, + const u8 *raw_key, size_t raw_key_size, + u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE]); + +int blk_crypto_generate_key(struct blk_crypto_profile *profile, + u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE]); + +int blk_crypto_prepare_key(struct blk_crypto_profile *profile, + const u8 *lt_key, size_t lt_key_size, + u8 eph_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE]); + void blk_crypto_intersect_capabilities(struct blk_crypto_profile *parent, const struct blk_crypto_profile *child); diff --git a/include/linux/blk-crypto.h b/include/linux/blk-crypto.h index 81f932b3ea37..58b0c5254a67 100644 --- a/include/linux/blk-crypto.h +++ b/include/linux/blk-crypto.h @@ -8,6 +8,7 @@ #include #include +#include enum blk_crypto_mode_num { BLK_ENCRYPTION_MODE_INVALID, diff --git a/include/uapi/linux/blk-crypto.h b/include/uapi/linux/blk-crypto.h new file mode 100644 index 000000000000..97302c6eb6af --- /dev/null +++ b/include/uapi/linux/blk-crypto.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_BLK_CRYPTO_H +#define _UAPI_LINUX_BLK_CRYPTO_H + +#include +#include + +struct blk_crypto_import_key_arg { + /* Raw key (input) */ + __u64 raw_key_ptr; + __u64 raw_key_size; + /* Long-term wrapped key blob (output) */ + __u64 lt_key_ptr; + __u64 lt_key_size; + __u64 reserved[4]; +}; + +struct blk_crypto_generate_key_arg { + /* Long-term wrapped key blob (output) */ + __u64 lt_key_ptr; + __u64 lt_key_size; + __u64 reserved[4]; +}; + +struct blk_crypto_prepare_key_arg { + /* Long-term wrapped key blob (input) */ + __u64 lt_key_ptr; + __u64 lt_key_size; + /* Ephemerally-wrapped key blob (output) */ + __u64 eph_key_ptr; + __u64 eph_key_size; + __u64 reserved[4]; +}; + +/* + * These ioctls share the block device ioctl space; see uapi/linux/fs.h. + * 140-141 are reserved for future blk-crypto ioctls; any more than that would + * require an additional allocation from the block device ioctl space. + */ +#define BLKCRYPTOIMPORTKEY _IOWR(0x12, 137, struct blk_crypto_import_key_arg) +#define BLKCRYPTOGENERATEKEY _IOWR(0x12, 138, struct blk_crypto_generate_key_arg) +#define BLKCRYPTOPREPAREKEY _IOWR(0x12, 139, struct blk_crypto_prepare_key_arg) + +#endif /* _UAPI_LINUX_BLK_CRYPTO_H */ diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 2bbe00cf1248..e762e1af650c 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -212,10 +212,8 @@ struct fsxattr { #define BLKROTATIONAL _IO(0x12,126) #define BLKZEROOUT _IO(0x12,127) #define BLKGETDISKSEQ _IOR(0x12,128,__u64) -/* - * A jump here: 130-136 are reserved for zoned block devices - * (see uapi/linux/blkzoned.h) - */ +/* 130-136 are used by zoned block device ioctls (uapi/linux/blkzoned.h) */ +/* 137-141 are used by blk-crypto ioctls (uapi/linux/blk-crypto.h) */ #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ #define FIBMAP _IO(0x00,1) /* bmap access */ -- 2.51.0 From 3bee991f2b68175c828dc3f9c26367fe1827319a Mon Sep 17 00:00:00 2001 From: Zhaoyang Huang Date: Fri, 7 Feb 2025 17:19:42 +0800 Subject: [PATCH 15/16] loop: release the lo_work_lock before queue_work queue_work could spin on wq->cpu_pwq->pool->lock which could lead to concurrent loop_process_work failed on lo_work_lock contention and increase the request latency. Remove this combination by moving the lock release ahead of queue_work. Signed-off-by: Zhaoyang Huang Link: https://lore.kernel.org/r/20250207091942.3966756-1-zhaoyang.huang@unisoc.com Signed-off-by: Jens Axboe --- drivers/block/loop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index c05fe27a96b6..68c943a77e41 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -894,8 +894,8 @@ queue_work: cmd_list = &lo->rootcg_cmd_list; } list_add_tail(&cmd->list_entry, cmd_list); - queue_work(lo->workqueue, work); spin_unlock_irq(&lo->lo_work_lock); + queue_work(lo->workqueue, work); } static void loop_set_timer(struct loop_device *lo) -- 2.51.0 From 36d03cb3277e29beedb87b8efb1e4da02b26e0c0 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Sat, 8 Feb 2025 17:04:15 +0800 Subject: [PATCH 16/16] block: introduce init_wait_func() There is already a macro DEFINE_WAIT_FUNC() to declare a wait_queue_entry with a specified waking function. But there is not a counterpart for initializing one wait_queue_entry with a specified waking function. So introducing init_wait_func() for this, which also could be used in iocost and rq-qos. Using default_wake_function() in rq_qos_wait() to wake up waiters, which could remove ->task field from rq_qos_wait_data. Cc: Ingo Molnar Cc: Peter Zijlstra Signed-off-by: Muchun Song Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20250208090416.38642-1-songmuchun@bytedance.com Signed-off-by: Jens Axboe --- block/blk-iocost.c | 3 +-- block/blk-rq-qos.c | 14 +++++++------- include/linux/wait.h | 6 ++++-- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 65a1d4427ccf..6be46e28459b 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -2718,8 +2718,7 @@ retry_lock: * All waiters are on iocg->waitq and the wait states are * synchronized using waitq.lock. */ - init_waitqueue_func_entry(&wait.wait, iocg_wake_fn); - wait.wait.private = current; + init_wait_func(&wait.wait, iocg_wake_fn); wait.bio = bio; wait.abs_cost = abs_cost; wait.committed = false; /* will be set true by waker */ diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index d4d4f4dc0e23..0ed3c81723bb 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -196,7 +196,6 @@ bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle) struct rq_qos_wait_data { struct wait_queue_entry wq; - struct task_struct *task; struct rq_wait *rqw; acquire_inflight_cb_t *cb; void *private_data; @@ -218,7 +217,12 @@ static int rq_qos_wake_function(struct wait_queue_entry *curr, return -1; data->got_token = true; - wake_up_process(data->task); + /* + * autoremove_wake_function() removes the wait entry only when it + * actually changed the task state. We want the wait always removed. + * Remove explicitly and use default_wake_function(). + */ + default_wake_function(curr, mode, wake_flags, key); list_del_init_careful(&curr->entry); return 1; } @@ -244,11 +248,6 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data, cleanup_cb_t *cleanup_cb) { struct rq_qos_wait_data data = { - .wq = { - .func = rq_qos_wake_function, - .entry = LIST_HEAD_INIT(data.wq.entry), - }, - .task = current, .rqw = rqw, .cb = acquire_inflight_cb, .private_data = private_data, @@ -259,6 +258,7 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data, if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) return; + init_wait_func(&data.wq, rq_qos_wake_function); has_sleeper = !prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE); do { diff --git a/include/linux/wait.h b/include/linux/wait.h index 6d90ad974408..2bdc8f47963b 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -1207,14 +1207,16 @@ int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, i #define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function) -#define init_wait(wait) \ +#define init_wait_func(wait, function) \ do { \ (wait)->private = current; \ - (wait)->func = autoremove_wake_function; \ + (wait)->func = function; \ INIT_LIST_HEAD(&(wait)->entry); \ (wait)->flags = 0; \ } while (0) +#define init_wait(wait) init_wait_func(wait, autoremove_wake_function) + typedef int (*task_call_f)(struct task_struct *p, void *arg); extern int task_call_func(struct task_struct *p, task_call_f func, void *arg); -- 2.51.0