From 711f9b8fbe4f4936302804e246e206f0829f628f Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 3 Feb 2025 23:32:05 +0100 Subject: [PATCH 01/16] fsnotify: disable pre-content and permission events by default After introducing pre-content events, we had a regression related to disabling huge faults on files that should never have pre-content events enabled. This happened because the default f_mode of allocated files (0) does not disable pre-content events. Pre-content events are disabled in file_set_fsnotify_mode_by_watchers() but internal files may not get to call this helper. Initialize f_mode to disable permission and pre-content events for all files and if needed they will be enabled for the callers of file_set_fsnotify_mode_by_watchers(). Fixes: 20bf82a898b6 ("mm: don't allow huge faults for files with pre content watches") Reported-by: Alex Williamson Closes: https://lore.kernel.org/linux-fsdevel/20250131121703.1e4d00a7.alex.williamson@redhat.com/ Tested-by: Alex Williamson Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/r/20250203223205.861346-4-amir73il@gmail.com Signed-off-by: Christian Brauner --- fs/file_table.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/file_table.c b/fs/file_table.c index 35b93da6c5cb..5c00dc38558d 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -194,6 +194,11 @@ static int init_file(struct file *f, int flags, const struct cred *cred) * refcount bumps we should reinitialize the reused file first. */ file_ref_init(&f->f_ref, 1); + /* + * Disable permission and pre-content events for all files by default. + * They may be enabled later by file_set_fsnotify_mode_from_watchers(). + */ + file_set_fsnotify_mode(f, FMODE_NONOTIFY_PERM); return 0; } -- 2.51.0 From 091ee63e36e8289f9067f659a48d497911e49d6f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 4 Feb 2025 14:51:20 +0100 Subject: [PATCH 02/16] pidfs: improve ioctl handling Pidfs supports extensible and non-extensible ioctls. The extensible ioctls need to check for the ioctl number itself not just the ioctl command otherwise both backward- and forward compatibility are broken. The pidfs ioctl handler also needs to look at the type of the ioctl command to guard against cases where "[...] a daemon receives some random file descriptor from a (potentially less privileged) client and expects the FD to be of some specific type, it might call ioctl() on this FD with some type-specific command and expect the call to fail if the FD is of the wrong type; but due to the missing type check, the kernel instead performs some action that userspace didn't expect." (cf. [1]] Link: https://lore.kernel.org/r/20250204-work-pidfs-ioctl-v1-1-04987d239575@kernel.org Link: https://lore.kernel.org/r/CAG48ez2K9A5GwtgqO31u9ZL292we8ZwAA=TJwwEv7wRuJ3j4Lw@mail.gmail.com [1] Fixes: 8ce352818820 ("pidfs: check for valid ioctl commands") Acked-by: Luca Boccassi Reported-by: Jann Horn Cc: stable@vger.kernel.org # v6.13; please backport with 8ce352818820 ("pidfs: check for valid ioctl commands") Signed-off-by: Christian Brauner --- fs/pidfs.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/pidfs.c b/fs/pidfs.c index 049352f973de..63f9699ebac3 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -287,7 +287,6 @@ static bool pidfs_ioctl_valid(unsigned int cmd) switch (cmd) { case FS_IOC_GETVERSION: case PIDFD_GET_CGROUP_NAMESPACE: - case PIDFD_GET_INFO: case PIDFD_GET_IPC_NAMESPACE: case PIDFD_GET_MNT_NAMESPACE: case PIDFD_GET_NET_NAMESPACE: @@ -300,6 +299,17 @@ static bool pidfs_ioctl_valid(unsigned int cmd) return true; } + /* Extensible ioctls require some more careful checks. */ + switch (_IOC_NR(cmd)) { + case _IOC_NR(PIDFD_GET_INFO): + /* + * Try to prevent performing a pidfd ioctl when someone + * erronously mistook the file descriptor for a pidfd. + * This is not perfect but will catch most cases. + */ + return (_IOC_TYPE(cmd) == _IOC_TYPE(PIDFD_GET_INFO)); + } + return false; } -- 2.51.0 From 37d11cfc63604b3886308e2111d845d148ced8bc Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Tue, 4 Feb 2025 22:32:07 +0100 Subject: [PATCH 03/16] vfs: sanity check the length passed to inode_set_cached_link() This costs a strlen() call when instatianating a symlink. Preferably it would be hidden behind VFS_WARN_ON (or compatible), but there is no such facility at the moment. With the facility in place the call can be patched out in production kernels. In the meantime, since the cost is being paid unconditionally, use the result to a fixup the bad caller. This is not expected to persist in the long run (tm). Sample splat: bad length passed for symlink [/tmp/syz-imagegen43743633/file0/file0] (got 131109, expected 37) [rest of WARN blurp goes here] Signed-off-by: Mateusz Guzik Link: https://lore.kernel.org/r/20250204213207.337980-1-mjguzik@gmail.com Signed-off-by: Christian Brauner --- include/linux/fs.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/linux/fs.h b/include/linux/fs.h index 7620547432a8..2c3b2f8a621f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -790,6 +790,19 @@ struct inode { static inline void inode_set_cached_link(struct inode *inode, char *link, int linklen) { + int testlen; + + /* + * TODO: patch it into a debug-only check if relevant macros show up. + * In the meantime, since we are suffering strlen even on production kernels + * to find the right length, do a fixup if the wrong value got passed. + */ + testlen = strlen(link); + if (testlen != linklen) { + WARN_ONCE(1, "bad length passed for symlink [%s] (got %d, expected %d)", + link, linklen, testlen); + linklen = testlen; + } inode->i_link = link; inode->i_linklen = linklen; inode->i_opflags |= IOP_CACHED_LINK; -- 2.51.0 From 511121a48bbd12df4ae50a099a8936e833df8c46 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Wed, 5 Feb 2025 19:42:01 +0100 Subject: [PATCH 04/16] MAINTAINERS: Move Pavel to kernel.org address I need to filter my emails better, switch to pavel@kernel.org address to help with that. Signed-off-by: Pavel Machek Signed-off-by: Linus Torvalds --- CREDITS | 6 ++---- MAINTAINERS | 10 +++++----- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/CREDITS b/CREDITS index 1f9f0f078b4a..53d11a46fd69 100644 --- a/CREDITS +++ b/CREDITS @@ -2515,11 +2515,9 @@ D: SLS distribution D: Initial implementation of VC's, pty's and select() N: Pavel Machek -E: pavel@ucw.cz +E: pavel@kernel.org P: 4096R/92DFCE96 4FA7 9EEF FCD4 C44F C585 B8C7 C060 2241 92DF CE96 -D: Softcursor for vga, hypertech cdrom support, vcsa bugfix, nbd, -D: sun4/330 port, capabilities for elf, speedup for rm on ext2, USB, -D: work on suspend-to-ram/disk, killing duplicates from ioctl32, +D: NBD, Sun4/330 port, USB, work on suspend-to-ram/disk, D: Altera SoCFPGA and Nokia N900 support. S: Czech Republic diff --git a/MAINTAINERS b/MAINTAINERS index 873aa2cce4d7..157818de0b55 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9418,7 +9418,7 @@ F: fs/freevxfs/ FREEZER M: "Rafael J. Wysocki" -M: Pavel Machek +M: Pavel Machek L: linux-pm@vger.kernel.org S: Supported F: Documentation/power/freezing-of-tasks.rst @@ -10253,7 +10253,7 @@ F: drivers/video/fbdev/hgafb.c HIBERNATION (aka Software Suspend, aka swsusp) M: "Rafael J. Wysocki" -M: Pavel Machek +M: Pavel Machek L: linux-pm@vger.kernel.org S: Supported B: https://bugzilla.kernel.org @@ -13124,8 +13124,8 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/har F: scripts/leaking_addresses.pl LED SUBSYSTEM -M: Pavel Machek M: Lee Jones +M: Pavel Machek L: linux-leds@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/lee/leds.git @@ -16823,7 +16823,7 @@ F: include/linux/tick.h F: kernel/time/tick*.* NOKIA N900 CAMERA SUPPORT (ET8EK8 SENSOR, AD5820 FOCUS) -M: Pavel Machek +M: Pavel Machek M: Sakari Ailus L: linux-media@vger.kernel.org S: Maintained @@ -22849,7 +22849,7 @@ F: drivers/sh/ SUSPEND TO RAM M: "Rafael J. Wysocki" M: Len Brown -M: Pavel Machek +M: Pavel Machek L: linux-pm@vger.kernel.org S: Supported B: https://bugzilla.kernel.org -- 2.51.0 From 1b3291f00013c86a9bb349d6158a9a7a4f0334fe Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Fri, 7 Feb 2025 03:21:46 +0900 Subject: [PATCH 05/16] MAINTAINERS: Remove myself I no longer have any faith left in the kernel development process or community management approach. Apple/ARM platform development will continue downstream. If I feel like sending some patches upstream in the future myself for whatever subtree I may, or I may not. Anyone who feels like fighting the upstreaming fight themselves is welcome to do so. Signed-off-by: Hector Martin Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 157818de0b55..20c9e0871215 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2209,7 +2209,6 @@ F: sound/soc/codecs/cs42l84.* F: sound/soc/codecs/ssm3515.c ARM/APPLE MACHINE SUPPORT -M: Hector Martin M: Sven Peter R: Alyssa Rosenzweig L: asahi@lists.linux.dev -- 2.51.0 From f354fc88a72ae83dacd68370f6fa040e5733bcfe Mon Sep 17 00:00:00 2001 From: WangYuli Date: Fri, 7 Feb 2025 15:08:55 +0800 Subject: [PATCH 06/16] kbuild: install-extmod-build: add missing quotation marks for CC variable While attempting to build a Debian packages with CC="ccache gcc", I saw the following error as builddeb builds linux-headers-$KERNELVERSION: make HOSTCC=ccache gcc VPATH= srcroot=. -f ./scripts/Makefile.build obj=debian/linux-headers-6.14.0-rc1/usr/src/linux-headers-6.14.0-rc1/scripts make[6]: *** No rule to make target 'gcc'. Stop. Upon investigation, it seems that one instance of $(CC) variable reference in ./scripts/package/install-extmod-build was missing quotation marks, causing the above error. Add the missing quotation marks around $(CC) to fix build. Fixes: 5f73e7d0386d ("kbuild: refactor cross-compiling linux-headers package") Co-developed-by: Mingcong Bai Signed-off-by: Mingcong Bai Tested-by: WangYuli Signed-off-by: WangYuli Signed-off-by: Masahiro Yamada --- scripts/package/install-extmod-build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/package/install-extmod-build b/scripts/package/install-extmod-build index bb6e23c1174e..b724626ea0ca 100755 --- a/scripts/package/install-extmod-build +++ b/scripts/package/install-extmod-build @@ -63,7 +63,7 @@ if [ "${CC}" != "${HOSTCC}" ]; then # Clear VPATH and srcroot because the source files reside in the output # directory. # shellcheck disable=SC2016 # $(MAKE), $(CC), and $(build) will be expanded by Make - "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC=$(CC) VPATH= srcroot=. $(build)='"${destdir}"/scripts + "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC="$(CC)" VPATH= srcroot=. $(build)='"${destdir}"/scripts rm -f "${destdir}/scripts/Kbuild" fi -- 2.51.0 From 8f6629c004b193d23612641c3607e785819e97ab Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 17 Oct 2024 10:09:22 -0700 Subject: [PATCH 07/16] kbuild: Move -Wenum-enum-conversion to W=2 -Wenum-enum-conversion was strengthened in clang-19 to warn for C, which caused the kernel to move it to W=1 in commit 75b5ab134bb5 ("kbuild: Move -Wenum-{compare-conditional,enum-conversion} into W=1") because there were numerous instances that would break builds with -Werror. Unfortunately, this is not a full solution, as more and more developers, subsystems, and distributors are building with W=1 as well, so they continue to see the numerous instances of this warning. Since the move to W=1, there have not been many new instances that have appeared through various build reports and the ones that have appeared seem to be following similar existing patterns, suggesting that most instances of this warning will not be real issues. The only alternatives for silencing this warning are adding casts (which is generally seen as an ugly practice) or refactoring the enums to macro defines or a unified enum (which may be undesirable because of type safety in other parts of the code). Move the warning to W=2, where warnings that occur frequently but may be relevant should reside. Cc: stable@vger.kernel.org Fixes: 75b5ab134bb5 ("kbuild: Move -Wenum-{compare-conditional,enum-conversion} into W=1") Link: https://lore.kernel.org/ZwRA9SOcOjjLJcpi@google.com/ Signed-off-by: Nathan Chancellor Acked-by: Arnd Bergmann Signed-off-by: Linus Torvalds --- scripts/Makefile.extrawarn | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index eb719f6d8d53..a7003c1e66c7 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -133,7 +133,6 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast) KBUILD_CFLAGS += -Wno-tautological-constant-out-of-range-compare KBUILD_CFLAGS += $(call cc-disable-warning, unaligned-access) KBUILD_CFLAGS += -Wno-enum-compare-conditional -KBUILD_CFLAGS += -Wno-enum-enum-conversion endif endif @@ -157,6 +156,10 @@ KBUILD_CFLAGS += -Wno-missing-field-initializers KBUILD_CFLAGS += -Wno-type-limits KBUILD_CFLAGS += -Wno-shift-negative-value +ifdef CONFIG_CC_IS_CLANG +KBUILD_CFLAGS += -Wno-enum-enum-conversion +endif + ifdef CONFIG_CC_IS_GCC KBUILD_CFLAGS += -Wno-maybe-uninitialized endif -- 2.51.0 From c8c9b1d2d5b4377c72a979f5a26e842a869aefc9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 8 Feb 2025 00:15:11 -0500 Subject: [PATCH 08/16] fgraph: Fix set_graph_notrace with setting TRACE_GRAPH_NOTRACE_BIT The code was restructured where the function graph notrace code, that would not trace a function and all its children is done by setting a NOTRACE flag when the function that is not to be traced is hit. There's a TRACE_GRAPH_NOTRACE_BIT which defines the bit in the flags and a TRACE_GRAPH_NOTRACE which is the mask with that bit set. But the restructuring used TRACE_GRAPH_NOTRACE_BIT when it should have used TRACE_GRAPH_NOTRACE. For example: # cd /sys/kernel/tracing # echo set_track_prepare stack_trace_save > set_graph_notrace # echo function_graph > current_tracer # cat trace [..] 0) | __slab_free() { 0) | free_to_partial_list() { 0) | arch_stack_walk() { 0) | __unwind_start() { 0) 0.501 us | get_stack_info(); Where a non filter trace looks like: # echo > set_graph_notrace # cat trace 0) | free_to_partial_list() { 0) | set_track_prepare() { 0) | stack_trace_save() { 0) | arch_stack_walk() { 0) | __unwind_start() { Where the filter should look like: # cat trace 0) | free_to_partial_list() { 0) | _raw_spin_lock_irqsave() { 0) 0.350 us | preempt_count_add(); 0) 0.351 us | do_raw_spin_lock(); 0) 2.440 us | } Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250208001511.535be150@batman.local.home Fixes: b84214890a9bc ("function_graph: Move graph notrace bit to shadow stack global var") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_functions_graph.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 54d850997c0a..136c750b0b4d 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -198,7 +198,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace, * returning from the function. */ if (ftrace_graph_notrace_addr(trace->func)) { - *task_var |= TRACE_GRAPH_NOTRACE_BIT; + *task_var |= TRACE_GRAPH_NOTRACE; /* * Need to return 1 to have the return called * that will clear the NOTRACE bit. -- 2.51.0 From 7585946243d614bd2cd4e13377be2c711c9539e0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 8 Feb 2025 18:54:28 +0100 Subject: [PATCH 09/16] PM: sleep: core: Restrict power.set_active propagation Commit 3775fc538f53 ("PM: sleep: core: Synchronize runtime PM status of parents and children") exposed an issue related to simple_pm_bus_pm_ops that uses pm_runtime_force_suspend() and pm_runtime_force_resume() as bus type PM callbacks for the noirq phases of system-wide suspend and resume. The problem is that pm_runtime_force_suspend() does not distinguish runtime-suspended devices from devices for which runtime PM has never been enabled, so if it sees a device with runtime PM status set to RPM_ACTIVE, it will assume that runtime PM is enabled for that device and so it will attempt to suspend it with the help of its runtime PM callbacks which may not be ready for that. As it turns out, this causes simple_pm_bus_runtime_suspend() to crash due to a NULL pointer dereference. Another problem related to the above commit and simple_pm_bus_pm_ops is that setting runtime PM status of a device handled by the latter to RPM_ACTIVE will actually prevent it from being resumed because pm_runtime_force_resume() only resumes devices with runtime PM status set to RPM_SUSPENDED. To mitigate these issues, do not allow power.set_active to propagate beyond the parent of the device with DPM_FLAG_SMART_SUSPEND set that will need to be resumed, which should be a sufficient stop-gap for the time being, but they will need to be properly addressed in the future because in general during system-wide resume it is necessary to resume all devices in a dependency chain in which at least one device is going to be resumed. Fixes: 3775fc538f53 ("PM: sleep: core: Synchronize runtime PM status of parents and children") Closes: https://lore.kernel.org/linux-pm/1c2433d4-7e0f-4395-b841-b8eac7c25651@nvidia.com/ Reported-by: Jon Hunter Tested-by: Johan Hovold Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/6137505.lOV4Wx5bFT@rjwysocki.net --- drivers/base/power/main.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index d497d448e4b2..40e1d8d8a589 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1191,24 +1191,18 @@ static pm_message_t resume_event(pm_message_t sleep_state) return PMSG_ON; } -static void dpm_superior_set_must_resume(struct device *dev, bool set_active) +static void dpm_superior_set_must_resume(struct device *dev) { struct device_link *link; int idx; - if (dev->parent) { + if (dev->parent) dev->parent->power.must_resume = true; - if (set_active) - dev->parent->power.set_active = true; - } idx = device_links_read_lock(); - list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) { + list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) link->supplier->power.must_resume = true; - if (set_active) - link->supplier->power.set_active = true; - } device_links_read_unlock(idx); } @@ -1287,9 +1281,12 @@ Skip: dev->power.must_resume = true; if (dev->power.must_resume) { - dev->power.set_active = dev->power.set_active || - dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND); - dpm_superior_set_must_resume(dev, dev->power.set_active); + if (dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND)) { + dev->power.set_active = true; + if (dev->parent && !dev->parent->power.ignore_children) + dev->parent->power.set_active = true; + } + dpm_superior_set_must_resume(dev); } Complete: -- 2.51.0 From a64dcfb451e254085a7daee5fe51bf22959d52d3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 9 Feb 2025 12:45:03 -0800 Subject: [PATCH 10/16] Linux 6.14-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9e0d63d9d94b..89628e354ca7 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Baby Opossum Posse # *DOCUMENTATION* -- 2.51.0 From 89cb121e94612cd3bb3c74b0e772ead5b40b7a5d Mon Sep 17 00:00:00 2001 From: =?utf8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Tue, 11 Feb 2025 14:25:28 +0100 Subject: [PATCH 11/16] selftests/landlock: Enable the new CONFIG_AF_UNIX_OOB MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Since commit 5155cbcdbf03 ("af_unix: Add a prompt to CONFIG_AF_UNIX_OOB"), the Landlock selftests's configuration is not enough to build a minimal kernel. Because scoped_signal_test checks with the MSG_OOB flag, we need to enable CONFIG_AF_UNIX_OOB for tests: # RUN fown.no_sandbox.sigurg_socket ... # scoped_signal_test.c:420:sigurg_socket:Expected 1 (1) == send(client_socket, ".", 1, MSG_OOB) (-1) # sigurg_socket: Test terminated by assertion # FAIL fown.no_sandbox.sigurg_socket ... Cc: Günther Noack Acked-by: Florent Revest Link: https://lore.kernel.org/r/20250211132531.1625566-1-mic@digikod.net Signed-off-by: Mickaël Salaün --- tools/testing/selftests/landlock/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/landlock/config b/tools/testing/selftests/landlock/config index 29af19c4e9f9..361f94f8cb0d 100644 --- a/tools/testing/selftests/landlock/config +++ b/tools/testing/selftests/landlock/config @@ -1,3 +1,4 @@ +CONFIG_AF_UNIX_OOB=y CONFIG_CGROUPS=y CONFIG_CGROUP_SCHED=y CONFIG_INET=y -- 2.51.0 From 143c9aae043a1dc174a75be52521192a0caa224b Mon Sep 17 00:00:00 2001 From: Tanya Agarwal Date: Fri, 24 Jan 2025 01:12:10 +0530 Subject: [PATCH 12/16] landlock: Fix grammar error MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Fix grammar error in comments that were identified using the codespell tool. Signed-off-by: Tanya Agarwal Reviewed-by: Günther Noack Link: https://lore.kernel.org/r/20250123194208.2660-1-tanyaagarwal25699@gmail.com [mic: Simplify commit message] Signed-off-by: Mickaël Salaün --- security/landlock/ruleset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c index 241ce44375b6..bff4e40a3093 100644 --- a/security/landlock/ruleset.c +++ b/security/landlock/ruleset.c @@ -124,7 +124,7 @@ create_rule(const struct landlock_id id, return ERR_PTR(-ENOMEM); RB_CLEAR_NODE(&new_rule->node); if (is_object_pointer(id.type)) { - /* This should be catched by insert_rule(). */ + /* This should have been caught by insert_rule(). */ WARN_ON_ONCE(!id.key.object); landlock_get_object(id.key.object); } -- 2.51.0 From 192b7ff29b1fb0335a9b9107991e6286f462f361 Mon Sep 17 00:00:00 2001 From: =?utf8?q?G=C3=BCnther=20Noack?= Date: Fri, 24 Jan 2025 15:44:44 +0000 Subject: [PATCH 13/16] landlock: Minor typo and grammar fixes in IPC scoping documentation MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit * Fix some whitespace, punctuation and minor grammar. * Add a missing sentence about the minimum ABI version, to stay in line with the section next to it. Cc: Tahera Fahimi Cc: Tanya Agarwal Signed-off-by: Günther Noack Link: https://lore.kernel.org/r/20250124154445.162841-1-gnoack@google.com [mic: Add newlines, update doc date] Signed-off-by: Mickaël Salaün --- Documentation/userspace-api/landlock.rst | 6 +++--- include/uapi/linux/landlock.h | 8 ++++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/Documentation/userspace-api/landlock.rst b/Documentation/userspace-api/landlock.rst index d639c61cb472..ad587f53fe41 100644 --- a/Documentation/userspace-api/landlock.rst +++ b/Documentation/userspace-api/landlock.rst @@ -8,7 +8,7 @@ Landlock: unprivileged access control ===================================== :Author: Mickaël Salaün -:Date: October 2024 +:Date: January 2025 The goal of Landlock is to enable restriction of ambient rights (e.g. global filesystem or network access) for a set of processes. Because Landlock @@ -329,11 +329,11 @@ non-sandboxed process, we can specify this restriction with A sandboxed process can connect to a non-sandboxed process when its domain is not scoped. If a process's domain is scoped, it can only connect to sockets created by processes in the same scope. -Moreover, If a process is scoped to send signal to a non-scoped process, it can +Moreover, if a process is scoped to send signal to a non-scoped process, it can only send signals to processes in the same scope. A connected datagram socket behaves like a stream socket when its domain is -scoped, meaning if the domain is scoped after the socket is connected , it can +scoped, meaning if the domain is scoped after the socket is connected, it can still :manpage:`send(2)` data just like a stream socket. However, in the same scenario, a non-connected datagram socket cannot send data (with :manpage:`sendto(2)`) outside its scope. diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h index 33745642f787..e1d2c27533b4 100644 --- a/include/uapi/linux/landlock.h +++ b/include/uapi/linux/landlock.h @@ -268,7 +268,9 @@ struct landlock_net_port_attr { * ~~~~~~~~~~~~~~~~ * * These flags enable to restrict a sandboxed process to a set of network - * actions. This is supported since the Landlock ABI version 4. + * actions. + * + * This is supported since Landlock ABI version 4. * * The following access rights apply to TCP port numbers: * @@ -291,11 +293,13 @@ struct landlock_net_port_attr { * Setting a flag for a ruleset will isolate the Landlock domain to forbid * connections to resources outside the domain. * + * This is supported since Landlock ABI version 6. + * * Scopes: * * - %LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET: Restrict a sandboxed process from * connecting to an abstract UNIX socket created by a process outside the - * related Landlock domain (e.g. a parent domain or a non-sandboxed process). + * related Landlock domain (e.g., a parent domain or a non-sandboxed process). * - %LANDLOCK_SCOPE_SIGNAL: Restrict a sandboxed process from sending a signal * to another process outside the domain. */ -- 2.51.0 From 854277e2cc8c75dc3c216c82e72523258fcf65b9 Mon Sep 17 00:00:00 2001 From: Mikhail Ivanov Date: Wed, 5 Feb 2025 17:36:49 +0800 Subject: [PATCH 14/16] landlock: Fix non-TCP sockets restriction MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Use sk_is_tcp() to check if socket is TCP in bind(2) and connect(2) hooks. SMC, MPTCP, SCTP protocols are currently restricted by TCP access rights. The purpose of TCP access rights is to provide control over ports that can be used by userland to establish a TCP connection. Therefore, it is incorrect to deny bind(2) and connect(2) requests for a socket of another protocol. However, SMC, MPTCP and RDS implementations use TCP internal sockets to establish communication or even to exchange packets over a TCP connection [1]. Landlock rules that configure bind(2) and connect(2) usage for TCP sockets should not cover requests for sockets of such protocols. These protocols have different set of security issues and security properties, therefore, it is necessary to provide the userland with the ability to distinguish between them (eg. [2]). Control over TCP connection used by other protocols can be achieved with upcoming support of socket creation control [3]. [1] https://lore.kernel.org/all/62336067-18c2-3493-d0ec-6dd6a6d3a1b5@huawei-partners.com/ [2] https://lore.kernel.org/all/20241204.fahVio7eicim@digikod.net/ [3] https://lore.kernel.org/all/20240904104824.1844082-1-ivanov.mikhail1@huawei-partners.com/ Closes: https://github.com/landlock-lsm/linux/issues/40 Fixes: fff69fb03dde ("landlock: Support network rules with TCP bind and connect") Signed-off-by: Mikhail Ivanov Link: https://lore.kernel.org/r/20250205093651.1424339-2-ivanov.mikhail1@huawei-partners.com [mic: Format commit message to 72 columns] Signed-off-by: Mickaël Salaün --- security/landlock/net.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/security/landlock/net.c b/security/landlock/net.c index d5dcc4407a19..104b6c01fe50 100644 --- a/security/landlock/net.c +++ b/security/landlock/net.c @@ -63,8 +63,7 @@ static int current_check_access_socket(struct socket *const sock, if (WARN_ON_ONCE(dom->num_layers < 1)) return -EACCES; - /* Checks if it's a (potential) TCP socket. */ - if (sock->type != SOCK_STREAM) + if (!sk_is_tcp(sock->sk)) return 0; /* Checks for minimal header length to safely read sa_family. */ -- 2.51.0 From f5534d511bcd273720f168386de74af76e148a9b Mon Sep 17 00:00:00 2001 From: Mikhail Ivanov Date: Wed, 5 Feb 2025 17:36:50 +0800 Subject: [PATCH 15/16] selftests/landlock: Test TCP accesses with protocol=IPPROTO_TCP MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Extend protocol_variant structure with protocol field (Cf. socket(2)). Extend protocol fixture with TCP test suits with protocol=IPPROTO_TCP which can be used as an alias for IPPROTO_IP (=0) in socket(2). Signed-off-by: Mikhail Ivanov Link: https://lore.kernel.org/r/20250205093651.1424339-3-ivanov.mikhail1@huawei-partners.com Cc: # 6.7.x Signed-off-by: Mickaël Salaün --- tools/testing/selftests/landlock/common.h | 1 + tools/testing/selftests/landlock/net_test.c | 80 +++++++++++++++++---- 2 files changed, 67 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h index a604ea5d8297..6064c9ac0532 100644 --- a/tools/testing/selftests/landlock/common.h +++ b/tools/testing/selftests/landlock/common.h @@ -207,6 +207,7 @@ enforce_ruleset(struct __test_metadata *const _metadata, const int ruleset_fd) struct protocol_variant { int domain; int type; + int protocol; }; struct service_fixture { diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c index 4e0aeb53b225..333263780fae 100644 --- a/tools/testing/selftests/landlock/net_test.c +++ b/tools/testing/selftests/landlock/net_test.c @@ -85,18 +85,18 @@ static void setup_loopback(struct __test_metadata *const _metadata) clear_ambient_cap(_metadata, CAP_NET_ADMIN); } +static bool prot_is_tcp(const struct protocol_variant *const prot) +{ + return (prot->domain == AF_INET || prot->domain == AF_INET6) && + prot->type == SOCK_STREAM && + (prot->protocol == IPPROTO_TCP || prot->protocol == IPPROTO_IP); +} + static bool is_restricted(const struct protocol_variant *const prot, const enum sandbox_type sandbox) { - switch (prot->domain) { - case AF_INET: - case AF_INET6: - switch (prot->type) { - case SOCK_STREAM: - return sandbox == TCP_SANDBOX; - } - break; - } + if (sandbox == TCP_SANDBOX) + return prot_is_tcp(prot); return false; } @@ -105,7 +105,7 @@ static int socket_variant(const struct service_fixture *const srv) int ret; ret = socket(srv->protocol.domain, srv->protocol.type | SOCK_CLOEXEC, - 0); + srv->protocol.protocol); if (ret < 0) return -errno; return ret; @@ -290,22 +290,48 @@ FIXTURE_TEARDOWN(protocol) } /* clang-format off */ -FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp) { +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp1) { /* clang-format on */ .sandbox = NO_SANDBOX, .prot = { .domain = AF_INET, .type = SOCK_STREAM, + /* IPPROTO_IP == 0 */ + .protocol = IPPROTO_IP, }, }; /* clang-format off */ -FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp) { +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp2) { + /* clang-format on */ + .sandbox = NO_SANDBOX, + .prot = { + .domain = AF_INET, + .type = SOCK_STREAM, + .protocol = IPPROTO_TCP, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp1) { /* clang-format on */ .sandbox = NO_SANDBOX, .prot = { .domain = AF_INET6, .type = SOCK_STREAM, + /* IPPROTO_IP == 0 */ + .protocol = IPPROTO_IP, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp2) { + /* clang-format on */ + .sandbox = NO_SANDBOX, + .prot = { + .domain = AF_INET6, + .type = SOCK_STREAM, + .protocol = IPPROTO_TCP, }, }; @@ -350,22 +376,48 @@ FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_unix_datagram) { }; /* clang-format off */ -FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp) { +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp1) { + /* clang-format on */ + .sandbox = TCP_SANDBOX, + .prot = { + .domain = AF_INET, + .type = SOCK_STREAM, + /* IPPROTO_IP == 0 */ + .protocol = IPPROTO_IP, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp2) { /* clang-format on */ .sandbox = TCP_SANDBOX, .prot = { .domain = AF_INET, .type = SOCK_STREAM, + .protocol = IPPROTO_TCP, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp1) { + /* clang-format on */ + .sandbox = TCP_SANDBOX, + .prot = { + .domain = AF_INET6, + .type = SOCK_STREAM, + /* IPPROTO_IP == 0 */ + .protocol = IPPROTO_IP, }, }; /* clang-format off */ -FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp) { +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp2) { /* clang-format on */ .sandbox = TCP_SANDBOX, .prot = { .domain = AF_INET6, .type = SOCK_STREAM, + .protocol = IPPROTO_TCP, }, }; -- 2.51.0 From 3d4033985ff508ef587ca11f1c8361ba57c7e09f Mon Sep 17 00:00:00 2001 From: Mikhail Ivanov Date: Wed, 5 Feb 2025 17:36:51 +0800 Subject: [PATCH 16/16] selftests/landlock: Test that MPTCP actions are not restricted MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Extend protocol fixture with test suits for MPTCP protocol. Add CONFIG_MPTCP and CONFIG_MPTCP_IPV6 options in config. Signed-off-by: Mikhail Ivanov Link: https://lore.kernel.org/r/20250205093651.1424339-4-ivanov.mikhail1@huawei-partners.com Cc: # 6.7.x Signed-off-by: Mickaël Salaün --- tools/testing/selftests/landlock/config | 2 + tools/testing/selftests/landlock/net_test.c | 44 +++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/tools/testing/selftests/landlock/config b/tools/testing/selftests/landlock/config index 361f94f8cb0d..425de4c20271 100644 --- a/tools/testing/selftests/landlock/config +++ b/tools/testing/selftests/landlock/config @@ -4,6 +4,8 @@ CONFIG_CGROUP_SCHED=y CONFIG_INET=y CONFIG_IPV6=y CONFIG_KEYS=y +CONFIG_MPTCP=y +CONFIG_MPTCP_IPV6=y CONFIG_NET=y CONFIG_NET_NS=y CONFIG_OVERLAY_FS=y diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c index 333263780fae..d9de0ee49ebc 100644 --- a/tools/testing/selftests/landlock/net_test.c +++ b/tools/testing/selftests/landlock/net_test.c @@ -312,6 +312,17 @@ FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp2) { }, }; +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_mptcp) { + /* clang-format on */ + .sandbox = NO_SANDBOX, + .prot = { + .domain = AF_INET, + .type = SOCK_STREAM, + .protocol = IPPROTO_MPTCP, + }, +}; + /* clang-format off */ FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp1) { /* clang-format on */ @@ -335,6 +346,17 @@ FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp2) { }, }; +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_mptcp) { + /* clang-format on */ + .sandbox = NO_SANDBOX, + .prot = { + .domain = AF_INET6, + .type = SOCK_STREAM, + .protocol = IPPROTO_MPTCP, + }, +}; + /* clang-format off */ FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_udp) { /* clang-format on */ @@ -398,6 +420,17 @@ FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp2) { }, }; +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_mptcp) { + /* clang-format on */ + .sandbox = TCP_SANDBOX, + .prot = { + .domain = AF_INET, + .type = SOCK_STREAM, + .protocol = IPPROTO_MPTCP, + }, +}; + /* clang-format off */ FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp1) { /* clang-format on */ @@ -421,6 +454,17 @@ FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp2) { }, }; +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_mptcp) { + /* clang-format on */ + .sandbox = TCP_SANDBOX, + .prot = { + .domain = AF_INET6, + .type = SOCK_STREAM, + .protocol = IPPROTO_MPTCP, + }, +}; + /* clang-format off */ FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_udp) { /* clang-format on */ -- 2.51.0