From cb380909ae3b1ebf14d6a455a4f92d7916d790cb Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 27 Feb 2025 15:06:30 -0800 Subject: [PATCH 01/16] vhost: return task creation error instead of NULL Lets callers distinguish why the vhost task creation failed. No one currently cares why it failed, so no real runtime change from this patch, but that will not be the case for long. Signed-off-by: Keith Busch Message-ID: <20250227230631.303431-2-kbusch@meta.com> Reviewed-by: Mike Christie Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 2 +- drivers/vhost/vhost.c | 2 +- kernel/vhost_task.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index d4ac4a1f8b81..18ca1ea6dc24 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -7471,7 +7471,7 @@ static void kvm_mmu_start_lpage_recovery(struct once *once) kvm_nx_huge_page_recovery_worker_kill, kvm, "kvm-nx-lpage-recovery"); - if (!nx_thread) + if (IS_ERR(nx_thread)) return; vhost_task_start(nx_thread); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 9ac25d08f473..63612faeab72 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -666,7 +666,7 @@ static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev) vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed, worker, name); - if (!vtsk) + if (IS_ERR(vtsk)) goto free_worker; mutex_init(&worker->mutex); diff --git a/kernel/vhost_task.c b/kernel/vhost_task.c index 8800f5acc007..2ef2e1b80091 100644 --- a/kernel/vhost_task.c +++ b/kernel/vhost_task.c @@ -133,7 +133,7 @@ struct vhost_task *vhost_task_create(bool (*fn)(void *), vtsk = kzalloc(sizeof(*vtsk), GFP_KERNEL); if (!vtsk) - return NULL; + return ERR_PTR(-ENOMEM); init_completion(&vtsk->exited); mutex_init(&vtsk->exit_mutex); vtsk->data = arg; @@ -145,7 +145,7 @@ struct vhost_task *vhost_task_create(bool (*fn)(void *), tsk = copy_process(NULL, 0, NUMA_NO_NODE, &args); if (IS_ERR(tsk)) { kfree(vtsk); - return NULL; + return ERR_PTR(PTR_ERR(tsk)); } vtsk->task = tsk; -- 2.51.0 From 916b7f42b3b3b539a71c204a9b49fdc4ca92cd82 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 27 Feb 2025 15:06:31 -0800 Subject: [PATCH 02/16] kvm: retry nx_huge_page_recovery_thread creation A VMM may send a non-fatal signal to its threads, including vCPU tasks, at any time, and thus may signal vCPU tasks during KVM_RUN. If a vCPU task receives the signal while its trying to spawn the huge page recovery vhost task, then KVM_RUN will fail due to copy_process() returning -ERESTARTNOINTR. Rework call_once() to mark the call complete if and only if the called function succeeds, and plumb the function's true error code back to the call_once() invoker. This provides userspace with the correct, non-fatal error code so that the VMM doesn't terminate the VM on -ENOMEM, and allows subsequent KVM_RUN a succeed by virtue of retrying creation of the NX huge page task. Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson [implemented the kvm user side] Signed-off-by: Keith Busch Message-ID: <20250227230631.303431-3-kbusch@meta.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 10 ++++----- include/linux/call_once.h | 47 ++++++++++++++++++++++++++++----------- 2 files changed, 38 insertions(+), 19 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 18ca1ea6dc24..8160870398b9 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -7460,7 +7460,7 @@ static bool kvm_nx_huge_page_recovery_worker(void *data) return true; } -static void kvm_mmu_start_lpage_recovery(struct once *once) +static int kvm_mmu_start_lpage_recovery(struct once *once) { struct kvm_arch *ka = container_of(once, struct kvm_arch, nx_once); struct kvm *kvm = container_of(ka, struct kvm, arch); @@ -7472,12 +7472,13 @@ static void kvm_mmu_start_lpage_recovery(struct once *once) kvm, "kvm-nx-lpage-recovery"); if (IS_ERR(nx_thread)) - return; + return PTR_ERR(nx_thread); vhost_task_start(nx_thread); /* Make the task visible only once it is fully started. */ WRITE_ONCE(kvm->arch.nx_huge_page_recovery_thread, nx_thread); + return 0; } int kvm_mmu_post_init_vm(struct kvm *kvm) @@ -7485,10 +7486,7 @@ int kvm_mmu_post_init_vm(struct kvm *kvm) if (nx_hugepage_mitigation_hard_disabled) return 0; - call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery); - if (!kvm->arch.nx_huge_page_recovery_thread) - return -ENOMEM; - return 0; + return call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery); } void kvm_mmu_pre_destroy_vm(struct kvm *kvm) diff --git a/include/linux/call_once.h b/include/linux/call_once.h index 6261aa0b3fb0..13cd6469e7e5 100644 --- a/include/linux/call_once.h +++ b/include/linux/call_once.h @@ -26,20 +26,41 @@ do { \ __once_init((once), #once, &__key); \ } while (0) -static inline void call_once(struct once *once, void (*cb)(struct once *)) +/* + * call_once - Ensure a function has been called exactly once + * + * @once: Tracking struct + * @cb: Function to be called + * + * If @once has never completed successfully before, call @cb and, if + * it returns a zero or positive value, mark @once as completed. Return + * the value returned by @cb + * + * If @once has completed succesfully before, return 0. + * + * The call to @cb is implicitly surrounded by a mutex, though for + * efficiency the * function avoids taking it after the first call. + */ +static inline int call_once(struct once *once, int (*cb)(struct once *)) { - /* Pairs with atomic_set_release() below. */ - if (atomic_read_acquire(&once->state) == ONCE_COMPLETED) - return; - - guard(mutex)(&once->lock); - WARN_ON(atomic_read(&once->state) == ONCE_RUNNING); - if (atomic_read(&once->state) != ONCE_NOT_STARTED) - return; - - atomic_set(&once->state, ONCE_RUNNING); - cb(once); - atomic_set_release(&once->state, ONCE_COMPLETED); + int r, state; + + /* Pairs with atomic_set_release() below. */ + if (atomic_read_acquire(&once->state) == ONCE_COMPLETED) + return 0; + + guard(mutex)(&once->lock); + state = atomic_read(&once->state); + if (unlikely(state != ONCE_NOT_STARTED)) + return WARN_ON_ONCE(state != ONCE_COMPLETED) ? -EINVAL : 0; + + atomic_set(&once->state, ONCE_RUNNING); + r = cb(once); + if (r < 0) + atomic_set(&once->state, ONCE_NOT_STARTED); + else + atomic_set_release(&once->state, ONCE_COMPLETED); + return r; } #endif /* _LINUX_CALL_ONCE_H */ -- 2.51.0 From a2f925a2f62254119cdaa360cfc9c0424bccd531 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Fri, 28 Feb 2025 13:26:04 +0100 Subject: [PATCH 03/16] Revert "ata: libata-core: Add ATA_QUIRK_NOLPM for Samsung SSD 870 QVO drives" This reverts commit cc77e2ce187d26cc66af3577bf896d7410eb25ab. It was reported that adding ATA_QUIRK_NOLPM for Samsung SSD 870 QVO drives breaks entering lower package states for certain systems. It turns out that Samsung SSD 870 QVO actually has working LPM when using a recent SSD firmware version. The author of commit cc77e2ce187d ("ata: libata-core: Add ATA_QUIRK_NOLPM for Samsung SSD 870 QVO drives") reported himself that only older SSD firmware versions have broken LPM: https://lore.kernel.org/stable/93c10d38-718c-459d-84a5-4d87680b4da7@debian.org/ Unfortunately, he did not specify which older firmware version he was using which had broken LPM. Let's revert this quirk, which has FW version field specified as NULL (which means that it applies for all Samsung SSD 870 QVO firmware versions) for now. Once the author reports which older firmware version(s) that are broken, we can create a more fine grained quirk, which populates the FW version field accordingly. Fixes: cc77e2ce187d ("ata: libata-core: Add ATA_QUIRK_NOLPM for Samsung SSD 870 QVO drives") Reported-by: Dieter Mummenschanz Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219747 Link: https://lore.kernel.org/r/20250228122603.91814-2-cassel@kernel.org Signed-off-by: Niklas Cassel --- drivers/ata/libata-core.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 63ec2f218431..c085dd81ebe7 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4143,10 +4143,6 @@ static const struct ata_dev_quirks_entry __ata_dev_quirks[] = { { "Samsung SSD 860*", NULL, ATA_QUIRK_NO_NCQ_TRIM | ATA_QUIRK_ZERO_AFTER_TRIM | ATA_QUIRK_NO_NCQ_ON_ATI }, - { "Samsung SSD 870 QVO*", NULL, ATA_QUIRK_NO_NCQ_TRIM | - ATA_QUIRK_ZERO_AFTER_TRIM | - ATA_QUIRK_NO_NCQ_ON_ATI | - ATA_QUIRK_NOLPM }, { "Samsung SSD 870*", NULL, ATA_QUIRK_NO_NCQ_TRIM | ATA_QUIRK_ZERO_AFTER_TRIM | ATA_QUIRK_NO_NCQ_ON_ATI }, -- 2.51.0 From 7eb172143d5508b4da468ed59ee857c6e5e01da6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 2 Mar 2025 11:48:20 -0800 Subject: [PATCH 04/16] Linux 6.14-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 30dab4c8b012..70bdbf2218fc 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Baby Opossum Posse # *DOCUMENTATION* -- 2.51.0 From d1a088a87273620670b3a54b65ca504fab1febd4 Mon Sep 17 00:00:00 2001 From: =?utf8?q?G=C3=BCnther=20Noack?= Date: Mon, 3 Mar 2025 20:45:12 +0100 Subject: [PATCH 05/16] landlock: Clarify IPC scoping documentation MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit * Clarify terminology * Stop mixing the unix(7) and signal(7) aspects in the explanation. Terminology: * The *IPC Scope* of a Landlock domain is that Landlock domain and its nested domains. * An *operation* (e.g., signaling, connecting to abstract UDS) is said to be *scoped within a domain* when the flag for that operation was set at ruleset creation time. This means that for the purpose of this operation, only processes within the domain's IPC scope are reachable. Signed-off-by: Günther Noack Link: https://lore.kernel.org/r/20250303194510.135506-4-gnoack@google.com [mic: Update doc date] Signed-off-by: Mickaël Salaün --- Documentation/userspace-api/landlock.rst | 55 ++++++++++++------------ 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/Documentation/userspace-api/landlock.rst b/Documentation/userspace-api/landlock.rst index ad587f53fe41..900171e3c494 100644 --- a/Documentation/userspace-api/landlock.rst +++ b/Documentation/userspace-api/landlock.rst @@ -8,7 +8,7 @@ Landlock: unprivileged access control ===================================== :Author: Mickaël Salaün -:Date: January 2025 +:Date: March 2025 The goal of Landlock is to enable restriction of ambient rights (e.g. global filesystem or network access) for a set of processes. Because Landlock @@ -317,33 +317,32 @@ IPC scoping ----------- Similar to the implicit `Ptrace restrictions`_, we may want to further restrict -interactions between sandboxes. Each Landlock domain can be explicitly scoped -for a set of actions by specifying it on a ruleset. For example, if a -sandboxed process should not be able to :manpage:`connect(2)` to a -non-sandboxed process through abstract :manpage:`unix(7)` sockets, we can -specify such a restriction with ``LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET``. -Moreover, if a sandboxed process should not be able to send a signal to a -non-sandboxed process, we can specify this restriction with -``LANDLOCK_SCOPE_SIGNAL``. - -A sandboxed process can connect to a non-sandboxed process when its domain is -not scoped. If a process's domain is scoped, it can only connect to sockets -created by processes in the same scope. -Moreover, if a process is scoped to send signal to a non-scoped process, it can -only send signals to processes in the same scope. - -A connected datagram socket behaves like a stream socket when its domain is -scoped, meaning if the domain is scoped after the socket is connected, it can -still :manpage:`send(2)` data just like a stream socket. However, in the same -scenario, a non-connected datagram socket cannot send data (with -:manpage:`sendto(2)`) outside its scope. - -A process with a scoped domain can inherit a socket created by a non-scoped -process. The process cannot connect to this socket since it has a scoped -domain. - -IPC scoping does not support exceptions, so if a domain is scoped, no rules can -be added to allow access to resources or processes outside of the scope. +interactions between sandboxes. Therefore, at ruleset creation time, each +Landlock domain can restrict the scope for certain operations, so that these +operations can only reach out to processes within the same Landlock domain or in +a nested Landlock domain (the "scope"). + +The operations which can be scoped are: + +``LANDLOCK_SCOPE_SIGNAL`` + This limits the sending of signals to target processes which run within the + same or a nested Landlock domain. + +``LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET`` + This limits the set of abstract :manpage:`unix(7)` sockets to which we can + :manpage:`connect(2)` to socket addresses which were created by a process in + the same or a nested Landlock domain. + + A :manpage:`sendto(2)` on a non-connected datagram socket is treated as if + it were doing an implicit :manpage:`connect(2)` and will be blocked if the + remote end does not stem from the same or a nested Landlock domain. + + A :manpage:`sendto(2)` on a socket which was previously connected will not + be restricted. This works for both datagram and stream sockets. + +IPC scoping does not support exceptions via :manpage:`landlock_add_rule(2)`. +If an operation is scoped within a domain, no rules can be added to allow access +to resources or processes outside of the scope. Truncating files ---------------- -- 2.51.0 From 624f177d8f62032b4f3343c289120269645cec37 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Tue, 18 Mar 2025 17:14:36 +0100 Subject: [PATCH 06/16] landlock: Move code to ease future backports MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit To ease backports in setup.c, let's group changes from __lsm_ro_after_init to __ro_after_init with commit f22f9aaf6c3d ("selinux: remove the runtime disable functionality"), and the landlock_lsmid addition with commit f3b8788cde61 ("LSM: Identify modules by more than name"). That will help to backport the following errata. Cc: Günther Noack Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250318161443.279194-2-mic@digikod.net Fixes: f3b8788cde61 ("LSM: Identify modules by more than name") Signed-off-by: Mickaël Salaün --- security/landlock/setup.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/security/landlock/setup.c b/security/landlock/setup.c index 28519a45b11f..c71832a8e369 100644 --- a/security/landlock/setup.c +++ b/security/landlock/setup.c @@ -19,6 +19,11 @@ bool landlock_initialized __ro_after_init = false; +const struct lsm_id landlock_lsmid = { + .name = LANDLOCK_NAME, + .id = LSM_ID_LANDLOCK, +}; + struct lsm_blob_sizes landlock_blob_sizes __ro_after_init = { .lbs_cred = sizeof(struct landlock_cred_security), .lbs_file = sizeof(struct landlock_file_security), @@ -26,11 +31,6 @@ struct lsm_blob_sizes landlock_blob_sizes __ro_after_init = { .lbs_superblock = sizeof(struct landlock_superblock_security), }; -const struct lsm_id landlock_lsmid = { - .name = LANDLOCK_NAME, - .id = LSM_ID_LANDLOCK, -}; - static int __init landlock_init(void) { landlock_add_cred_hooks(); -- 2.51.0 From 15383a0d63dbcd63dc7e8d9ec1bf3a0f7ebf64ac Mon Sep 17 00:00:00 2001 From: =?utf8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Tue, 18 Mar 2025 17:14:37 +0100 Subject: [PATCH 07/16] landlock: Add the errata interface MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Some fixes may require user space to check if they are applied on the running kernel before using a specific feature. For instance, this applies when a restriction was previously too restrictive and is now getting relaxed (e.g. for compatibility reasons). However, non-visible changes for legitimate use (e.g. security fixes) do not require an erratum. Because fixes are backported down to a specific Landlock ABI, we need a way to avoid cherry-pick conflicts. The solution is to only update a file related to the lower ABI impacted by this issue. All the ABI files are then used to create a bitmask of fixes. The new errata interface is similar to the one used to get the supported Landlock ABI version, but it returns a bitmask instead because the order of fixes may not match the order of versions, and not all fixes may apply to all versions. The actual errata will come with dedicated commits. The description is not actually used in the code but serves as documentation. Create the landlock_abi_version symbol and use its value to check errata consistency. Update test_base's create_ruleset_checks_ordering tests and add errata tests. This commit is backportable down to the first version of Landlock. Fixes: 3532b0b4352c ("landlock: Enable user space to infer supported features") Cc: Günther Noack Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250318161443.279194-3-mic@digikod.net Signed-off-by: Mickaël Salaün --- include/uapi/linux/landlock.h | 2 + security/landlock/errata.h | 87 ++++++++++++++++++++ security/landlock/setup.c | 30 +++++++ security/landlock/setup.h | 3 + security/landlock/syscalls.c | 22 ++++- tools/testing/selftests/landlock/base_test.c | 46 ++++++++++- 6 files changed, 185 insertions(+), 5 deletions(-) create mode 100644 security/landlock/errata.h diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h index e1d2c27533b4..8806a132d7b8 100644 --- a/include/uapi/linux/landlock.h +++ b/include/uapi/linux/landlock.h @@ -57,9 +57,11 @@ struct landlock_ruleset_attr { * * - %LANDLOCK_CREATE_RULESET_VERSION: Get the highest supported Landlock ABI * version. + * - %LANDLOCK_CREATE_RULESET_ERRATA: Get a bitmask of fixed issues. */ /* clang-format off */ #define LANDLOCK_CREATE_RULESET_VERSION (1U << 0) +#define LANDLOCK_CREATE_RULESET_ERRATA (1U << 1) /* clang-format on */ /** diff --git a/security/landlock/errata.h b/security/landlock/errata.h new file mode 100644 index 000000000000..f26b28b9873d --- /dev/null +++ b/security/landlock/errata.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Landlock - Errata information + * + * Copyright © 2025 Microsoft Corporation + */ + +#ifndef _SECURITY_LANDLOCK_ERRATA_H +#define _SECURITY_LANDLOCK_ERRATA_H + +#include + +struct landlock_erratum { + const int abi; + const u8 number; +}; + +/* clang-format off */ +#define LANDLOCK_ERRATUM(NUMBER) \ + { \ + .abi = LANDLOCK_ERRATA_ABI, \ + .number = NUMBER, \ + }, +/* clang-format on */ + +/* + * Some fixes may require user space to check if they are applied on the running + * kernel before using a specific feature. For instance, this applies when a + * restriction was previously too restrictive and is now getting relaxed (for + * compatibility or semantic reasons). However, non-visible changes for + * legitimate use (e.g. security fixes) do not require an erratum. + */ +static const struct landlock_erratum landlock_errata_init[] __initconst = { + +/* + * Only Sparse may not implement __has_include. If a compiler does not + * implement __has_include, a warning will be printed at boot time (see + * setup.c). + */ +#ifdef __has_include + +#define LANDLOCK_ERRATA_ABI 1 +#if __has_include("errata/abi-1.h") +#include "errata/abi-1.h" +#endif +#undef LANDLOCK_ERRATA_ABI + +#define LANDLOCK_ERRATA_ABI 2 +#if __has_include("errata/abi-2.h") +#include "errata/abi-2.h" +#endif +#undef LANDLOCK_ERRATA_ABI + +#define LANDLOCK_ERRATA_ABI 3 +#if __has_include("errata/abi-3.h") +#include "errata/abi-3.h" +#endif +#undef LANDLOCK_ERRATA_ABI + +#define LANDLOCK_ERRATA_ABI 4 +#if __has_include("errata/abi-4.h") +#include "errata/abi-4.h" +#endif +#undef LANDLOCK_ERRATA_ABI + +/* + * For each new erratum, we need to include all the ABI files up to the impacted + * ABI to make all potential future intermediate errata easy to backport. + * + * If such change involves more than one ABI addition, then it must be in a + * dedicated commit with the same Fixes tag as used for the actual fix. + * + * Each commit creating a new security/landlock/errata/abi-*.h file must have a + * Depends-on tag to reference the commit that previously added the line to + * include this new file, except if the original Fixes tag is enough. + * + * Each erratum must be documented in its related ABI file, and a dedicated + * commit must update Documentation/userspace-api/landlock.rst to include this + * erratum. This commit will not be backported. + */ + +#endif + + {} +}; + +#endif /* _SECURITY_LANDLOCK_ERRATA_H */ diff --git a/security/landlock/setup.c b/security/landlock/setup.c index c71832a8e369..0c85ea27e409 100644 --- a/security/landlock/setup.c +++ b/security/landlock/setup.c @@ -6,12 +6,14 @@ * Copyright © 2018-2020 ANSSI */ +#include #include #include #include #include "common.h" #include "cred.h" +#include "errata.h" #include "fs.h" #include "net.h" #include "setup.h" @@ -31,8 +33,36 @@ struct lsm_blob_sizes landlock_blob_sizes __ro_after_init = { .lbs_superblock = sizeof(struct landlock_superblock_security), }; +int landlock_errata __ro_after_init; + +static void __init compute_errata(void) +{ + size_t i; + +#ifndef __has_include + /* + * This is a safeguard to make sure the compiler implements + * __has_include (see errata.h). + */ + WARN_ON_ONCE(1); + return; +#endif + + for (i = 0; landlock_errata_init[i].number; i++) { + const int prev_errata = landlock_errata; + + if (WARN_ON_ONCE(landlock_errata_init[i].abi > + landlock_abi_version)) + continue; + + landlock_errata |= BIT(landlock_errata_init[i].number - 1); + WARN_ON_ONCE(prev_errata == landlock_errata); + } +} + static int __init landlock_init(void) { + compute_errata(); landlock_add_cred_hooks(); landlock_add_task_hooks(); landlock_add_fs_hooks(); diff --git a/security/landlock/setup.h b/security/landlock/setup.h index c4252d46d49d..fca307c35fee 100644 --- a/security/landlock/setup.h +++ b/security/landlock/setup.h @@ -11,7 +11,10 @@ #include +extern const int landlock_abi_version; + extern bool landlock_initialized; +extern int landlock_errata; extern struct lsm_blob_sizes landlock_blob_sizes; extern const struct lsm_id landlock_lsmid; diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c index a9760d252fc2..cf9e0483e542 100644 --- a/security/landlock/syscalls.c +++ b/security/landlock/syscalls.c @@ -160,7 +160,9 @@ static const struct file_operations ruleset_fops = { * the new ruleset. * @size: Size of the pointed &struct landlock_ruleset_attr (needed for * backward and forward compatibility). - * @flags: Supported value: %LANDLOCK_CREATE_RULESET_VERSION. + * @flags: Supported value: + * - %LANDLOCK_CREATE_RULESET_VERSION + * - %LANDLOCK_CREATE_RULESET_ERRATA * * This system call enables to create a new Landlock ruleset, and returns the * related file descriptor on success. @@ -169,6 +171,10 @@ static const struct file_operations ruleset_fops = { * 0, then the returned value is the highest supported Landlock ABI version * (starting at 1). * + * If @flags is %LANDLOCK_CREATE_RULESET_ERRATA and @attr is NULL and @size is + * 0, then the returned value is a bitmask of fixed issues for the current + * Landlock ABI version. + * * Possible returned errors are: * * - %EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time; @@ -192,9 +198,15 @@ SYSCALL_DEFINE3(landlock_create_ruleset, return -EOPNOTSUPP; if (flags) { - if ((flags == LANDLOCK_CREATE_RULESET_VERSION) && !attr && - !size) - return LANDLOCK_ABI_VERSION; + if (attr || size) + return -EINVAL; + + if (flags == LANDLOCK_CREATE_RULESET_VERSION) + return landlock_abi_version; + + if (flags == LANDLOCK_CREATE_RULESET_ERRATA) + return landlock_errata; + return -EINVAL; } @@ -235,6 +247,8 @@ SYSCALL_DEFINE3(landlock_create_ruleset, return ruleset_fd; } +const int landlock_abi_version = LANDLOCK_ABI_VERSION; + /* * Returns an owned ruleset from a FD. It is thus needed to call * landlock_put_ruleset() on the return value. diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c index 1bc16fde2e8a..4766f8fec9f6 100644 --- a/tools/testing/selftests/landlock/base_test.c +++ b/tools/testing/selftests/landlock/base_test.c @@ -98,10 +98,54 @@ TEST(abi_version) ASSERT_EQ(EINVAL, errno); } +/* + * Old source trees might not have the set of Kselftest fixes related to kernel + * UAPI headers. + */ +#ifndef LANDLOCK_CREATE_RULESET_ERRATA +#define LANDLOCK_CREATE_RULESET_ERRATA (1U << 1) +#endif + +TEST(errata) +{ + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE, + }; + int errata; + + errata = landlock_create_ruleset(NULL, 0, + LANDLOCK_CREATE_RULESET_ERRATA); + /* The errata bitmask will not be backported to tests. */ + ASSERT_LE(0, errata); + TH_LOG("errata: 0x%x", errata); + + ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0, + LANDLOCK_CREATE_RULESET_ERRATA)); + ASSERT_EQ(EINVAL, errno); + + ASSERT_EQ(-1, landlock_create_ruleset(NULL, sizeof(ruleset_attr), + LANDLOCK_CREATE_RULESET_ERRATA)); + ASSERT_EQ(EINVAL, errno); + + ASSERT_EQ(-1, + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), + LANDLOCK_CREATE_RULESET_ERRATA)); + ASSERT_EQ(EINVAL, errno); + + ASSERT_EQ(-1, landlock_create_ruleset( + NULL, 0, + LANDLOCK_CREATE_RULESET_VERSION | + LANDLOCK_CREATE_RULESET_ERRATA)); + ASSERT_EQ(-1, landlock_create_ruleset(NULL, 0, + LANDLOCK_CREATE_RULESET_ERRATA | + 1 << 31)); + ASSERT_EQ(EINVAL, errno); +} + /* Tests ordering of syscall argument checks. */ TEST(create_ruleset_checks_ordering) { - const int last_flag = LANDLOCK_CREATE_RULESET_VERSION; + const int last_flag = LANDLOCK_CREATE_RULESET_ERRATA; const int invalid_flag = last_flag << 1; int ruleset_fd; const struct landlock_ruleset_attr ruleset_attr = { -- 2.51.0 From 48fce74fe209ba9e9b416d7100ccee546edc9fc6 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Tue, 18 Mar 2025 17:14:38 +0100 Subject: [PATCH 08/16] landlock: Add erratum for TCP fix MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Add erratum for the TCP socket identification fixed with commit 854277e2cc8c ("landlock: Fix non-TCP sockets restriction"). Fixes: 854277e2cc8c ("landlock: Fix non-TCP sockets restriction") Cc: Günther Noack Cc: Mikhail Ivanov Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250318161443.279194-4-mic@digikod.net Signed-off-by: Mickaël Salaün --- security/landlock/errata/abi-4.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 security/landlock/errata/abi-4.h diff --git a/security/landlock/errata/abi-4.h b/security/landlock/errata/abi-4.h new file mode 100644 index 000000000000..c052ee54f89f --- /dev/null +++ b/security/landlock/errata/abi-4.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/** + * DOC: erratum_1 + * + * Erratum 1: TCP socket identification + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This fix addresses an issue where IPv4 and IPv6 stream sockets (e.g., SMC, + * MPTCP, or SCTP) were incorrectly restricted by TCP access rights during + * :manpage:`bind(2)` and :manpage:`connect(2)` operations. This change ensures + * that only TCP sockets are subject to TCP access rights, allowing other + * protocols to operate without unnecessary restrictions. + */ +LANDLOCK_ERRATUM(1) -- 2.51.0 From 6d9ac5e4d70eba3e336f9809ba91ab2c49de6d87 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Tue, 18 Mar 2025 17:14:39 +0100 Subject: [PATCH 09/16] landlock: Prepare to add second errata MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Potentially include errata for Landlock ABI v5 (Linux 6.10) and v6 (Linux 6.12). That will be useful for the following signal scoping erratum. As explained in errata.h, this commit should be backportable without conflict down to ABI v5. It must then not include the errata/abi-6.h file. Fixes: 54a6e6bbf3be ("landlock: Add signal scoping") Cc: Günther Noack Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250318161443.279194-5-mic@digikod.net Signed-off-by: Mickaël Salaün --- security/landlock/errata.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/security/landlock/errata.h b/security/landlock/errata.h index f26b28b9873d..8e626accac10 100644 --- a/security/landlock/errata.h +++ b/security/landlock/errata.h @@ -63,6 +63,18 @@ static const struct landlock_erratum landlock_errata_init[] __initconst = { #endif #undef LANDLOCK_ERRATA_ABI +#define LANDLOCK_ERRATA_ABI 5 +#if __has_include("errata/abi-5.h") +#include "errata/abi-5.h" +#endif +#undef LANDLOCK_ERRATA_ABI + +#define LANDLOCK_ERRATA_ABI 6 +#if __has_include("errata/abi-6.h") +#include "errata/abi-6.h" +#endif +#undef LANDLOCK_ERRATA_ABI + /* * For each new erratum, we need to include all the ABI files up to the impacted * ABI to make all potential future intermediate errata easy to backport. -- 2.51.0 From 18eb75f3af40be1f0fc2025d4ff821711222a2fd Mon Sep 17 00:00:00 2001 From: =?utf8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Tue, 18 Mar 2025 17:14:40 +0100 Subject: [PATCH 10/16] landlock: Always allow signals between threads of the same process MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Because Linux credentials are managed per thread, user space relies on some hack to synchronize credential update across threads from the same process. This is required by the Native POSIX Threads Library and implemented by set*id(2) wrappers and libcap(3) to use tgkill(2) to synchronize threads. See nptl(7) and libpsx(3). Furthermore, some runtimes like Go do not enable developers to have control over threads [1]. To avoid potential issues, and because threads are not security boundaries, let's relax the Landlock (optional) signal scoping to always allow signals sent between threads of the same process. This exception is similar to the __ptrace_may_access() one. hook_file_set_fowner() now checks if the target task is part of the same process as the caller. If this is the case, then the related signal triggered by the socket will always be allowed. Scoping of abstract UNIX sockets is not changed because kernel objects (e.g. sockets) should be tied to their creator's domain at creation time. Note that creating one Landlock domain per thread puts each of these threads (and their future children) in their own scope, which is probably not what users expect, especially in Go where we do not control threads. However, being able to drop permissions on all threads should not be restricted by signal scoping. We are working on a way to make it possible to atomically restrict all threads of a process with the same domain [2]. Add erratum for signal scoping. Closes: https://github.com/landlock-lsm/go-landlock/issues/36 Fixes: 54a6e6bbf3be ("landlock: Add signal scoping") Fixes: c8994965013e ("selftests/landlock: Test signal scoping for threads") Depends-on: 26f204380a3c ("fs: Fix file_set_fowner LSM hook inconsistencies") Link: https://pkg.go.dev/kernel.org/pub/linux/libs/security/libcap/psx [1] Link: https://github.com/landlock-lsm/linux/issues/2 [2] Cc: Günther Noack Cc: Paul Moore Cc: Serge Hallyn Cc: Tahera Fahimi Cc: stable@vger.kernel.org Acked-by: Christian Brauner Link: https://lore.kernel.org/r/20250318161443.279194-6-mic@digikod.net [mic: Add extra pointer check and RCU guard, and ease backport] Signed-off-by: Mickaël Salaün --- security/landlock/errata/abi-6.h | 19 +++++++++ security/landlock/fs.c | 39 ++++++++++++++++--- security/landlock/task.c | 12 ++++++ .../selftests/landlock/scoped_signal_test.c | 2 +- 4 files changed, 65 insertions(+), 7 deletions(-) create mode 100644 security/landlock/errata/abi-6.h diff --git a/security/landlock/errata/abi-6.h b/security/landlock/errata/abi-6.h new file mode 100644 index 000000000000..df7bc0e1fdf4 --- /dev/null +++ b/security/landlock/errata/abi-6.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/** + * DOC: erratum_2 + * + * Erratum 2: Scoped signal handling + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This fix addresses an issue where signal scoping was overly restrictive, + * preventing sandboxed threads from signaling other threads within the same + * process if they belonged to different domains. Because threads are not + * security boundaries, user space might assume that any thread within the same + * process can send signals between themselves (see :manpage:`nptl(7)` and + * :manpage:`libpsx(3)`). Consistent with :manpage:`ptrace(2)` behavior, direct + * interaction between threads of the same process should always be allowed. + * This change ensures that any thread is allowed to send signals to any other + * thread within the same process, regardless of their domain. + */ +LANDLOCK_ERRATUM(2) diff --git a/security/landlock/fs.c b/security/landlock/fs.c index 71b9dc331aae..c19aab87c4d2 100644 --- a/security/landlock/fs.c +++ b/security/landlock/fs.c @@ -27,7 +27,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -1628,21 +1630,46 @@ static int hook_file_ioctl_compat(struct file *file, unsigned int cmd, return -EACCES; } -static void hook_file_set_fowner(struct file *file) +/* + * Always allow sending signals between threads of the same process. This + * ensures consistency with hook_task_kill(). + */ +static bool control_current_fowner(struct fown_struct *const fown) { - struct landlock_ruleset *new_dom, *prev_dom; + struct task_struct *p; /* * Lock already held by __f_setown(), see commit 26f204380a3c ("fs: Fix * file_set_fowner LSM hook inconsistencies"). */ - lockdep_assert_held(&file_f_owner(file)->lock); - new_dom = landlock_get_current_domain(); - landlock_get_ruleset(new_dom); + lockdep_assert_held(&fown->lock); + + /* + * Some callers (e.g. fcntl_dirnotify) may not be in an RCU read-side + * critical section. + */ + guard(rcu)(); + p = pid_task(fown->pid, fown->pid_type); + if (!p) + return true; + + return !same_thread_group(p, current); +} + +static void hook_file_set_fowner(struct file *file) +{ + struct landlock_ruleset *prev_dom; + struct landlock_ruleset *new_dom = NULL; + + if (control_current_fowner(file_f_owner(file))) { + new_dom = landlock_get_current_domain(); + landlock_get_ruleset(new_dom); + } + prev_dom = landlock_file(file)->fown_domain; landlock_file(file)->fown_domain = new_dom; - /* Called in an RCU read-side critical section. */ + /* May be called in an RCU read-side critical section. */ landlock_put_ruleset_deferred(prev_dom); } diff --git a/security/landlock/task.c b/security/landlock/task.c index dc7dab78392e..4578ce6e319d 100644 --- a/security/landlock/task.c +++ b/security/landlock/task.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -264,6 +265,17 @@ static int hook_task_kill(struct task_struct *const p, /* Dealing with USB IO. */ dom = landlock_cred(cred)->domain; } else { + /* + * Always allow sending signals between threads of the same process. + * This is required for process credential changes by the Native POSIX + * Threads Library and implemented by the set*id(2) wrappers and + * libcap(3) with tgkill(2). See nptl(7) and libpsx(3). + * + * This exception is similar to the __ptrace_may_access() one. + */ + if (same_thread_group(p, current)) + return 0; + dom = landlock_get_current_domain(); } dom = landlock_get_applicable_domain(dom, signal_scope); diff --git a/tools/testing/selftests/landlock/scoped_signal_test.c b/tools/testing/selftests/landlock/scoped_signal_test.c index 475ee62a832d..767f117703b7 100644 --- a/tools/testing/selftests/landlock/scoped_signal_test.c +++ b/tools/testing/selftests/landlock/scoped_signal_test.c @@ -281,7 +281,7 @@ TEST(signal_scoping_threads) /* Restricts the domain after creating the first thread. */ create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL); - ASSERT_EQ(EPERM, pthread_kill(no_sandbox_thread, 0)); + ASSERT_EQ(0, pthread_kill(no_sandbox_thread, 0)); ASSERT_EQ(1, write(thread_pipe[1], ".", 1)); ASSERT_EQ(0, pthread_create(&scoped_thread, NULL, thread_func, NULL)); -- 2.51.0 From bbe72274035a83159c8fff7d553b4a0b3c473690 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Tue, 18 Mar 2025 17:14:41 +0100 Subject: [PATCH 11/16] selftests/landlock: Split signal_scoping_threads tests MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Split signal_scoping_threads tests into signal_scoping_thread_before and signal_scoping_thread_after. Use local variables for thread synchronization. Fix exported function. Replace some asserts with expects. Fixes: c8994965013e ("selftests/landlock: Test signal scoping for threads") Cc: Günther Noack Cc: Tahera Fahimi Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250318161443.279194-7-mic@digikod.net Signed-off-by: Mickaël Salaün --- .../selftests/landlock/scoped_signal_test.c | 49 +++++++++++++------ 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/tools/testing/selftests/landlock/scoped_signal_test.c b/tools/testing/selftests/landlock/scoped_signal_test.c index 767f117703b7..d313cb626225 100644 --- a/tools/testing/selftests/landlock/scoped_signal_test.c +++ b/tools/testing/selftests/landlock/scoped_signal_test.c @@ -249,47 +249,66 @@ TEST_F(scoped_domains, check_access_signal) _metadata->exit_code = KSFT_FAIL; } -static int thread_pipe[2]; - enum thread_return { THREAD_INVALID = 0, THREAD_SUCCESS = 1, THREAD_ERROR = 2, }; -void *thread_func(void *arg) +static void *thread_sync(void *arg) { + const int pipe_read = *(int *)arg; char buf; - if (read(thread_pipe[0], &buf, 1) != 1) + if (read(pipe_read, &buf, 1) != 1) return (void *)THREAD_ERROR; return (void *)THREAD_SUCCESS; } -TEST(signal_scoping_threads) +TEST(signal_scoping_thread_before) { - pthread_t no_sandbox_thread, scoped_thread; + pthread_t no_sandbox_thread; enum thread_return ret = THREAD_INVALID; + int thread_pipe[2]; drop_caps(_metadata); ASSERT_EQ(0, pipe2(thread_pipe, O_CLOEXEC)); - ASSERT_EQ(0, - pthread_create(&no_sandbox_thread, NULL, thread_func, NULL)); + ASSERT_EQ(0, pthread_create(&no_sandbox_thread, NULL, thread_sync, + &thread_pipe[0])); - /* Restricts the domain after creating the first thread. */ + /* Enforces restriction after creating the thread. */ create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL); - ASSERT_EQ(0, pthread_kill(no_sandbox_thread, 0)); - ASSERT_EQ(1, write(thread_pipe[1], ".", 1)); - - ASSERT_EQ(0, pthread_create(&scoped_thread, NULL, thread_func, NULL)); - ASSERT_EQ(0, pthread_kill(scoped_thread, 0)); - ASSERT_EQ(1, write(thread_pipe[1], ".", 1)); + EXPECT_EQ(0, pthread_kill(no_sandbox_thread, 0)); + EXPECT_EQ(1, write(thread_pipe[1], ".", 1)); EXPECT_EQ(0, pthread_join(no_sandbox_thread, (void **)&ret)); EXPECT_EQ(THREAD_SUCCESS, ret); + + EXPECT_EQ(0, close(thread_pipe[0])); + EXPECT_EQ(0, close(thread_pipe[1])); +} + +TEST(signal_scoping_thread_after) +{ + pthread_t scoped_thread; + enum thread_return ret = THREAD_INVALID; + int thread_pipe[2]; + + drop_caps(_metadata); + ASSERT_EQ(0, pipe2(thread_pipe, O_CLOEXEC)); + + /* Enforces restriction before creating the thread. */ + create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL); + + ASSERT_EQ(0, pthread_create(&scoped_thread, NULL, thread_sync, + &thread_pipe[0])); + + EXPECT_EQ(0, pthread_kill(scoped_thread, 0)); + EXPECT_EQ(1, write(thread_pipe[1], ".", 1)); + EXPECT_EQ(0, pthread_join(scoped_thread, (void **)&ret)); EXPECT_EQ(THREAD_SUCCESS, ret); -- 2.51.0 From c5efa393d82cf68812e0ae4d93e339873eabe9fe Mon Sep 17 00:00:00 2001 From: =?utf8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Tue, 18 Mar 2025 17:14:42 +0100 Subject: [PATCH 12/16] selftests/landlock: Add a new test for setuid() MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The new signal_scoping_thread_setuid tests check that the libc's setuid() function works as expected even when a thread is sandboxed with scoped signal restrictions. Before the signal scoping fix, this test would have failed with the setuid() call: [pid 65] getpid() = 65 [pid 65] tgkill(65, 66, SIGRT_1) = -1 EPERM (Operation not permitted) [pid 65] futex(0x40a66cdc, FUTEX_WAKE_PRIVATE, 1) = 0 [pid 65] setuid(1001) = 0 After the fix, tgkill(2) is successfully leveraged to synchronize credentials update across threads: [pid 65] getpid() = 65 [pid 65] tgkill(65, 66, SIGRT_1) = 0 [pid 66] <... read resumed>0x40a65eb7, 1) = ? ERESTARTSYS (To be restarted if SA_RESTART is set) [pid 66] --- SIGRT_1 {si_signo=SIGRT_1, si_code=SI_TKILL, si_pid=65, si_uid=1000} --- [pid 66] getpid() = 65 [pid 66] setuid(1001) = 0 [pid 66] futex(0x40a66cdc, FUTEX_WAKE_PRIVATE, 1) = 0 [pid 66] rt_sigreturn({mask=[]}) = 0 [pid 66] read(3, [pid 65] setuid(1001) = 0 Test coverage for security/landlock is 92.9% of 1137 lines according to gcc/gcov-14. Fixes: c8994965013e ("selftests/landlock: Test signal scoping for threads") Cc: Günther Noack Cc: Tahera Fahimi Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250318161443.279194-8-mic@digikod.net [mic: Update test coverage] Signed-off-by: Mickaël Salaün --- tools/testing/selftests/landlock/common.h | 1 + .../selftests/landlock/scoped_signal_test.c | 59 +++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h index 6064c9ac0532..076a9a625c98 100644 --- a/tools/testing/selftests/landlock/common.h +++ b/tools/testing/selftests/landlock/common.h @@ -41,6 +41,7 @@ static void _init_caps(struct __test_metadata *const _metadata, bool drop_all) CAP_MKNOD, CAP_NET_ADMIN, CAP_NET_BIND_SERVICE, + CAP_SETUID, CAP_SYS_ADMIN, CAP_SYS_CHROOT, /* clang-format on */ diff --git a/tools/testing/selftests/landlock/scoped_signal_test.c b/tools/testing/selftests/landlock/scoped_signal_test.c index d313cb626225..d8bf33417619 100644 --- a/tools/testing/selftests/landlock/scoped_signal_test.c +++ b/tools/testing/selftests/landlock/scoped_signal_test.c @@ -253,6 +253,7 @@ enum thread_return { THREAD_INVALID = 0, THREAD_SUCCESS = 1, THREAD_ERROR = 2, + THREAD_TEST_FAILED = 3, }; static void *thread_sync(void *arg) @@ -316,6 +317,64 @@ TEST(signal_scoping_thread_after) EXPECT_EQ(0, close(thread_pipe[1])); } +struct thread_setuid_args { + int pipe_read, new_uid; +}; + +void *thread_setuid(void *ptr) +{ + const struct thread_setuid_args *arg = ptr; + char buf; + + if (read(arg->pipe_read, &buf, 1) != 1) + return (void *)THREAD_ERROR; + + /* libc's setuid() should update all thread's credentials. */ + if (getuid() != arg->new_uid) + return (void *)THREAD_TEST_FAILED; + + return (void *)THREAD_SUCCESS; +} + +TEST(signal_scoping_thread_setuid) +{ + struct thread_setuid_args arg; + pthread_t no_sandbox_thread; + enum thread_return ret = THREAD_INVALID; + int pipe_parent[2]; + int prev_uid; + + disable_caps(_metadata); + + /* This test does not need to be run as root. */ + prev_uid = getuid(); + arg.new_uid = prev_uid + 1; + EXPECT_LT(0, arg.new_uid); + + ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC)); + arg.pipe_read = pipe_parent[0]; + + /* Capabilities must be set before creating a new thread. */ + set_cap(_metadata, CAP_SETUID); + ASSERT_EQ(0, pthread_create(&no_sandbox_thread, NULL, thread_setuid, + &arg)); + + /* Enforces restriction after creating the thread. */ + create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL); + + EXPECT_NE(arg.new_uid, getuid()); + EXPECT_EQ(0, setuid(arg.new_uid)); + EXPECT_EQ(arg.new_uid, getuid()); + EXPECT_EQ(1, write(pipe_parent[1], ".", 1)); + + EXPECT_EQ(0, pthread_join(no_sandbox_thread, (void **)&ret)); + EXPECT_EQ(THREAD_SUCCESS, ret); + + clear_cap(_metadata, CAP_SETUID); + EXPECT_EQ(0, close(pipe_parent[0])); + EXPECT_EQ(0, close(pipe_parent[1])); +} + const short backlog = 10; static volatile sig_atomic_t signal_received; -- 2.51.0 From 9b08a16637eeef4d6d3a8a3b69714e8930676248 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Thu, 20 Mar 2025 20:06:50 +0100 Subject: [PATCH 13/16] lsm: Add audit_log_lsm_data() helper MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Extract code from dump_common_audit_data() into the audit_log_lsm_data() helper. This helps reuse common LSM audit data while not abusing AUDIT_AVC records because of the common_lsm_audit() helper. Depends-on: 7ccbe076d987 ("lsm: Only build lsm_audit.c if CONFIG_SECURITY and CONFIG_AUDIT are set") Cc: Casey Schaufler Cc: James Morris Cc: Serge E. Hallyn Acked-by: Paul Moore Link: https://lore.kernel.org/r/20250320190717.2287696-2-mic@digikod.net Reviewed-by: Günther Noack Signed-off-by: Mickaël Salaün --- include/linux/lsm_audit.h | 8 ++++++++ security/lsm_audit.c | 27 ++++++++++++++++++--------- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h index e13d2f947b51..bddd694f7c4c 100644 --- a/include/linux/lsm_audit.h +++ b/include/linux/lsm_audit.h @@ -132,6 +132,9 @@ void common_lsm_audit(struct common_audit_data *a, void (*pre_audit)(struct audit_buffer *, void *), void (*post_audit)(struct audit_buffer *, void *)); +void audit_log_lsm_data(struct audit_buffer *ab, + const struct common_audit_data *a); + #else /* CONFIG_AUDIT */ static inline void common_lsm_audit(struct common_audit_data *a, @@ -140,6 +143,11 @@ static inline void common_lsm_audit(struct common_audit_data *a, { } +static inline void audit_log_lsm_data(struct audit_buffer *ab, + const struct common_audit_data *a) +{ +} + #endif /* CONFIG_AUDIT */ #endif diff --git a/security/lsm_audit.c b/security/lsm_audit.c index 52db886dbba8..a61c7ebdb6a7 100644 --- a/security/lsm_audit.c +++ b/security/lsm_audit.c @@ -189,16 +189,13 @@ static inline void print_ipv4_addr(struct audit_buffer *ab, __be32 addr, } /** - * dump_common_audit_data - helper to dump common audit data + * audit_log_lsm_data - helper to log common LSM audit data * @ab : the audit buffer * @a : common audit data - * */ -static void dump_common_audit_data(struct audit_buffer *ab, - struct common_audit_data *a) +void audit_log_lsm_data(struct audit_buffer *ab, + const struct common_audit_data *a) { - char comm[sizeof(current->comm)]; - /* * To keep stack sizes in check force programmers to notice if they * start making this union too large! See struct lsm_network_audit @@ -206,9 +203,6 @@ static void dump_common_audit_data(struct audit_buffer *ab, */ BUILD_BUG_ON(sizeof(a->u) > sizeof(void *)*2); - audit_log_format(ab, " pid=%d comm=", task_tgid_nr(current)); - audit_log_untrustedstring(ab, get_task_comm(comm, current)); - switch (a->type) { case LSM_AUDIT_DATA_NONE: return; @@ -431,6 +425,21 @@ static void dump_common_audit_data(struct audit_buffer *ab, } /* switch (a->type) */ } +/** + * dump_common_audit_data - helper to dump common audit data + * @ab : the audit buffer + * @a : common audit data + */ +static void dump_common_audit_data(struct audit_buffer *ab, + const struct common_audit_data *a) +{ + char comm[sizeof(current->comm)]; + + audit_log_format(ab, " pid=%d comm=", task_tgid_nr(current)); + audit_log_untrustedstring(ab, get_task_comm(comm, current)); + audit_log_lsm_data(ab, a); +} + /** * common_lsm_audit - generic LSM auditing function * @a: auxiliary audit data -- 2.51.0 From d9d2a68ed44bbae598a81cb95e0746fa6b13b57f Mon Sep 17 00:00:00 2001 From: =?utf8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Thu, 20 Mar 2025 20:06:51 +0100 Subject: [PATCH 14/16] landlock: Add unique ID generator MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Landlock IDs can be generated to uniquely identify Landlock objects. For now, only Landlock domains get an ID at creation time. These IDs map to immutable domain hierarchies. Landlock IDs have important properties: - They are unique during the lifetime of the running system thanks to the 64-bit values: at worse, 2^60 - 2*2^32 useful IDs. - They are always greater than 2^32 and must then be stored in 64-bit integer types. - The initial ID (at boot time) is randomly picked between 2^32 and 2^33, which limits collisions in logs across different boots. - IDs are sequential, which enables users to order them. - IDs may not be consecutive but increase with a random 2^4 step, which limits side channels. Such IDs can be exposed to unprivileged processes, even if it is not the case with this audit patch series. The domain IDs will be useful for user space to identify sandboxes and get their properties. These Landlock IDs are more secure that other absolute kernel IDs such as pipe's inodes which rely on a shared global counter. For checkpoint/restore features (i.e. CRIU), we could easily implement a privileged interface (e.g. sysfs) to set the next ID counter. IDR/IDA are not used because we only need a bijection from Landlock objects to Landlock IDs, and we must not recycle IDs. This enables us to identify all Landlock objects during the lifetime of the system (e.g. in logs), but not to access an object from an ID nor know if an ID is assigned. Using a counter is simpler, it scales (i.e. avoids growing memory footprint), and it does not require locking. We'll use proper file descriptors (with IDs used as inode numbers) to access Landlock objects. Cc: Günther Noack Cc: Paul Moore Link: https://lore.kernel.org/r/20250320190717.2287696-3-mic@digikod.net Signed-off-by: Mickaël Salaün --- security/landlock/.kunitconfig | 2 + security/landlock/Makefile | 2 + security/landlock/id.c | 251 +++++++++++++++++++ security/landlock/id.h | 25 ++ security/landlock/setup.c | 2 + tools/testing/kunit/configs/all_tests.config | 2 + 6 files changed, 284 insertions(+) create mode 100644 security/landlock/id.c create mode 100644 security/landlock/id.h diff --git a/security/landlock/.kunitconfig b/security/landlock/.kunitconfig index 03e119466604..f9423f01ac5b 100644 --- a/security/landlock/.kunitconfig +++ b/security/landlock/.kunitconfig @@ -1,4 +1,6 @@ +CONFIG_AUDIT=y CONFIG_KUNIT=y +CONFIG_NET=y CONFIG_SECURITY=y CONFIG_SECURITY_LANDLOCK=y CONFIG_SECURITY_LANDLOCK_KUNIT_TEST=y diff --git a/security/landlock/Makefile b/security/landlock/Makefile index b4538b7cf7d2..e1777abbc413 100644 --- a/security/landlock/Makefile +++ b/security/landlock/Makefile @@ -4,3 +4,5 @@ landlock-y := setup.o syscalls.o object.o ruleset.o \ cred.o task.o fs.o landlock-$(CONFIG_INET) += net.o + +landlock-$(CONFIG_AUDIT) += id.o diff --git a/security/landlock/id.c b/security/landlock/id.c new file mode 100644 index 000000000000..11fab9259c15 --- /dev/null +++ b/security/landlock/id.c @@ -0,0 +1,251 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Landlock - Unique identification number generator + * + * Copyright © 2024-2025 Microsoft Corporation + */ + +#include +#include +#include +#include + +#include "common.h" +#include "id.h" + +#define COUNTER_PRE_INIT 0 + +static atomic64_t next_id = ATOMIC64_INIT(COUNTER_PRE_INIT); + +static void __init init_id(atomic64_t *const counter, const u32 random_32bits) +{ + u64 init; + + /* + * Ensures sure 64-bit values are always used by user space (or may + * fail with -EOVERFLOW), and makes this testable. + */ + init = 1ULL << 32; + + /* + * Makes a large (2^32) boot-time value to limit ID collision in logs + * from different boots, and to limit info leak about the number of + * initially (relative to the reader) created elements (e.g. domains). + */ + init += random_32bits; + + /* Sets first or ignores. This will be the first ID. */ + atomic64_cmpxchg(counter, COUNTER_PRE_INIT, init); +} + +#ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST + +static void __init test_init_min(struct kunit *const test) +{ + atomic64_t counter = ATOMIC64_INIT(COUNTER_PRE_INIT); + + init_id(&counter, 0); + KUNIT_EXPECT_EQ(test, atomic64_read(&counter), 1ULL + U32_MAX); +} + +static void __init test_init_max(struct kunit *const test) +{ + atomic64_t counter = ATOMIC64_INIT(COUNTER_PRE_INIT); + + init_id(&counter, ~0); + KUNIT_EXPECT_EQ(test, atomic64_read(&counter), 1 + (2ULL * U32_MAX)); +} + +static void __init test_init_once(struct kunit *const test) +{ + const u64 first_init = 1ULL + U32_MAX; + atomic64_t counter = ATOMIC64_INIT(COUNTER_PRE_INIT); + + init_id(&counter, 0); + KUNIT_EXPECT_EQ(test, atomic64_read(&counter), first_init); + + init_id(&counter, ~0); + KUNIT_EXPECT_EQ_MSG( + test, atomic64_read(&counter), first_init, + "Should still have the same value after the subsequent init_id()"); +} + +#endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ + +void __init landlock_init_id(void) +{ + return init_id(&next_id, get_random_u32()); +} + +/* + * It's not worth it to try to hide the monotonic counter because it can still + * be inferred (with N counter ranges), and if we are allowed to read the inode + * number we should also be allowed to read the time creation anyway, and it + * can be handy to store and sort domain IDs for user space. + * + * Returns the value of next_id and increment it to let some space for the next + * one. + */ +static u64 get_id_range(size_t number_of_ids, atomic64_t *const counter, + u8 random_4bits) +{ + u64 id, step; + + /* + * We should return at least 1 ID, and we may need a set of consecutive + * ones (e.g. to generate a set of inodes). + */ + if (WARN_ON_ONCE(number_of_ids <= 0)) + number_of_ids = 1; + + /* + * Blurs the next ID guess with 1/16 ratio. We get 2^(64 - 4) - + * (2 * 2^32), so a bit less than 2^60 available IDs, which should be + * much more than enough considering the number of CPU cycles required + * to get a new ID (e.g. a full landlock_restrict_self() call), and the + * cost of draining all available IDs during the system's uptime. + */ + random_4bits = random_4bits % (1 << 4); + step = number_of_ids + random_4bits; + + /* It is safe to cast a signed atomic to an unsigned value. */ + id = atomic64_fetch_add(step, counter); + + /* Warns if landlock_init_id() was not called. */ + WARN_ON_ONCE(id == COUNTER_PRE_INIT); + return id; +} + +#ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST + +static void test_range1_rand0(struct kunit *const test) +{ + atomic64_t counter; + u64 init; + + init = get_random_u32(); + atomic64_set(&counter, init); + KUNIT_EXPECT_EQ(test, get_id_range(1, &counter, 0), init); + KUNIT_EXPECT_EQ( + test, get_id_range(get_random_u8(), &counter, get_random_u8()), + init + 1); +} + +static void test_range1_rand1(struct kunit *const test) +{ + atomic64_t counter; + u64 init; + + init = get_random_u32(); + atomic64_set(&counter, init); + KUNIT_EXPECT_EQ(test, get_id_range(1, &counter, 1), init); + KUNIT_EXPECT_EQ( + test, get_id_range(get_random_u8(), &counter, get_random_u8()), + init + 2); +} + +static void test_range1_rand16(struct kunit *const test) +{ + atomic64_t counter; + u64 init; + + init = get_random_u32(); + atomic64_set(&counter, init); + KUNIT_EXPECT_EQ(test, get_id_range(1, &counter, 16), init); + KUNIT_EXPECT_EQ( + test, get_id_range(get_random_u8(), &counter, get_random_u8()), + init + 1); +} + +static void test_range2_rand0(struct kunit *const test) +{ + atomic64_t counter; + u64 init; + + init = get_random_u32(); + atomic64_set(&counter, init); + KUNIT_EXPECT_EQ(test, get_id_range(2, &counter, 0), init); + KUNIT_EXPECT_EQ( + test, get_id_range(get_random_u8(), &counter, get_random_u8()), + init + 2); +} + +static void test_range2_rand1(struct kunit *const test) +{ + atomic64_t counter; + u64 init; + + init = get_random_u32(); + atomic64_set(&counter, init); + KUNIT_EXPECT_EQ(test, get_id_range(2, &counter, 1), init); + KUNIT_EXPECT_EQ( + test, get_id_range(get_random_u8(), &counter, get_random_u8()), + init + 3); +} + +static void test_range2_rand2(struct kunit *const test) +{ + atomic64_t counter; + u64 init; + + init = get_random_u32(); + atomic64_set(&counter, init); + KUNIT_EXPECT_EQ(test, get_id_range(2, &counter, 2), init); + KUNIT_EXPECT_EQ( + test, get_id_range(get_random_u8(), &counter, get_random_u8()), + init + 4); +} + +static void test_range2_rand16(struct kunit *const test) +{ + atomic64_t counter; + u64 init; + + init = get_random_u32(); + atomic64_set(&counter, init); + KUNIT_EXPECT_EQ(test, get_id_range(2, &counter, 16), init); + KUNIT_EXPECT_EQ( + test, get_id_range(get_random_u8(), &counter, get_random_u8()), + init + 2); +} + +#endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ + +/** + * landlock_get_id_range - Get a range of unique IDs + * + * @number_of_ids: Number of IDs to hold. Must be greater than one. + * + * Returns: The first ID in the range. + */ +u64 landlock_get_id_range(size_t number_of_ids) +{ + return get_id_range(number_of_ids, &next_id, get_random_u8()); +} + +#ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST + +static struct kunit_case __refdata test_cases[] = { + /* clang-format off */ + KUNIT_CASE(test_init_min), + KUNIT_CASE(test_init_max), + KUNIT_CASE(test_init_once), + KUNIT_CASE(test_range1_rand0), + KUNIT_CASE(test_range1_rand1), + KUNIT_CASE(test_range1_rand16), + KUNIT_CASE(test_range2_rand0), + KUNIT_CASE(test_range2_rand1), + KUNIT_CASE(test_range2_rand2), + KUNIT_CASE(test_range2_rand16), + {} + /* clang-format on */ +}; + +static struct kunit_suite test_suite = { + .name = "landlock_id", + .test_cases = test_cases, +}; + +kunit_test_init_section_suite(test_suite); + +#endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ diff --git a/security/landlock/id.h b/security/landlock/id.h new file mode 100644 index 000000000000..45dcfb9e9a8b --- /dev/null +++ b/security/landlock/id.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Landlock - Unique identification number generator + * + * Copyright © 2024-2025 Microsoft Corporation + */ + +#ifndef _SECURITY_LANDLOCK_ID_H +#define _SECURITY_LANDLOCK_ID_H + +#ifdef CONFIG_AUDIT + +void __init landlock_init_id(void); + +u64 landlock_get_id_range(size_t number_of_ids); + +#else /* CONFIG_AUDIT */ + +static inline void __init landlock_init_id(void) +{ +} + +#endif /* CONFIG_AUDIT */ + +#endif /* _SECURITY_LANDLOCK_ID_H */ diff --git a/security/landlock/setup.c b/security/landlock/setup.c index 0c85ea27e409..bd53c7a56ab9 100644 --- a/security/landlock/setup.c +++ b/security/landlock/setup.c @@ -15,6 +15,7 @@ #include "cred.h" #include "errata.h" #include "fs.h" +#include "id.h" #include "net.h" #include "setup.h" #include "task.h" @@ -67,6 +68,7 @@ static int __init landlock_init(void) landlock_add_task_hooks(); landlock_add_fs_hooks(); landlock_add_net_hooks(); + landlock_init_id(); landlock_initialized = true; pr_info("Up and running.\n"); return 0; diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config index b0049be00c70..cdd9782f9646 100644 --- a/tools/testing/kunit/configs/all_tests.config +++ b/tools/testing/kunit/configs/all_tests.config @@ -41,6 +41,8 @@ CONFIG_DAMON_PADDR=y CONFIG_REGMAP_BUILD=y +CONFIG_AUDIT=y + CONFIG_SECURITY=y CONFIG_SECURITY_APPARMOR=y CONFIG_SECURITY_LANDLOCK=y -- 2.51.0 From 5b95b329befaf18020a0d3cd0223a90bd230eeb9 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Thu, 20 Mar 2025 20:06:52 +0100 Subject: [PATCH 15/16] landlock: Move domain hierarchy management MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Create a new domain.h file containing the struct landlock_hierarchy definition and helpers. This type will grow with audit support. This also prepares for a new domain type. Cc: Günther Noack Link: https://lore.kernel.org/r/20250320190717.2287696-4-mic@digikod.net Reviewed-by: Günther Noack Signed-off-by: Mickaël Salaün --- security/landlock/domain.h | 48 +++++++++++++++++++++++++++++++++++++ security/landlock/ruleset.c | 21 +++------------- security/landlock/ruleset.h | 17 +------------ security/landlock/task.c | 1 + 4 files changed, 53 insertions(+), 34 deletions(-) create mode 100644 security/landlock/domain.h diff --git a/security/landlock/domain.h b/security/landlock/domain.h new file mode 100644 index 000000000000..d22712e5fb0f --- /dev/null +++ b/security/landlock/domain.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Landlock - Domain management + * + * Copyright © 2016-2020 Mickaël Salaün + * Copyright © 2018-2020 ANSSI + */ + +#ifndef _SECURITY_LANDLOCK_DOMAIN_H +#define _SECURITY_LANDLOCK_DOMAIN_H + +#include +#include + +/** + * struct landlock_hierarchy - Node in a domain hierarchy + */ +struct landlock_hierarchy { + /** + * @parent: Pointer to the parent node, or NULL if it is a root + * Landlock domain. + */ + struct landlock_hierarchy *parent; + /** + * @usage: Number of potential children domains plus their parent + * domain. + */ + refcount_t usage; +}; + +static inline void +landlock_get_hierarchy(struct landlock_hierarchy *const hierarchy) +{ + if (hierarchy) + refcount_inc(&hierarchy->usage); +} + +static inline void landlock_put_hierarchy(struct landlock_hierarchy *hierarchy) +{ + while (hierarchy && refcount_dec_and_test(&hierarchy->usage)) { + const struct landlock_hierarchy *const freeme = hierarchy; + + hierarchy = hierarchy->parent; + kfree(freeme); + } +} + +#endif /* _SECURITY_LANDLOCK_DOMAIN_H */ diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c index bff4e40a3093..adb7f87828df 100644 --- a/security/landlock/ruleset.c +++ b/security/landlock/ruleset.c @@ -23,6 +23,7 @@ #include #include "access.h" +#include "domain.h" #include "limits.h" #include "object.h" #include "ruleset.h" @@ -307,22 +308,6 @@ int landlock_insert_rule(struct landlock_ruleset *const ruleset, return insert_rule(ruleset, id, &layers, ARRAY_SIZE(layers)); } -static void get_hierarchy(struct landlock_hierarchy *const hierarchy) -{ - if (hierarchy) - refcount_inc(&hierarchy->usage); -} - -static void put_hierarchy(struct landlock_hierarchy *hierarchy) -{ - while (hierarchy && refcount_dec_and_test(&hierarchy->usage)) { - const struct landlock_hierarchy *const freeme = hierarchy; - - hierarchy = hierarchy->parent; - kfree(freeme); - } -} - static int merge_tree(struct landlock_ruleset *const dst, struct landlock_ruleset *const src, const enum landlock_key_type key_type) @@ -477,7 +462,7 @@ static int inherit_ruleset(struct landlock_ruleset *const parent, err = -EINVAL; goto out_unlock; } - get_hierarchy(parent->hierarchy); + landlock_get_hierarchy(parent->hierarchy); child->hierarchy->parent = parent->hierarchy; out_unlock: @@ -501,7 +486,7 @@ static void free_ruleset(struct landlock_ruleset *const ruleset) free_rule(freeme, LANDLOCK_KEY_NET_PORT); #endif /* IS_ENABLED(CONFIG_INET) */ - put_hierarchy(ruleset->hierarchy); + landlock_put_hierarchy(ruleset->hierarchy); kfree(ruleset); } diff --git a/security/landlock/ruleset.h b/security/landlock/ruleset.h index 52f4f0af6ab0..bbb5996545d2 100644 --- a/security/landlock/ruleset.h +++ b/security/landlock/ruleset.h @@ -17,6 +17,7 @@ #include #include "access.h" +#include "domain.h" #include "limits.h" #include "object.h" @@ -108,22 +109,6 @@ struct landlock_rule { struct landlock_layer layers[] __counted_by(num_layers); }; -/** - * struct landlock_hierarchy - Node in a ruleset hierarchy - */ -struct landlock_hierarchy { - /** - * @parent: Pointer to the parent node, or NULL if it is a root - * Landlock domain. - */ - struct landlock_hierarchy *parent; - /** - * @usage: Number of potential children domains plus their parent - * domain. - */ - refcount_t usage; -}; - /** * struct landlock_ruleset - Landlock ruleset * diff --git a/security/landlock/task.c b/security/landlock/task.c index 4578ce6e319d..e04646d80e78 100644 --- a/security/landlock/task.c +++ b/security/landlock/task.c @@ -19,6 +19,7 @@ #include "common.h" #include "cred.h" +#include "domain.h" #include "fs.h" #include "ruleset.h" #include "setup.h" -- 2.51.0 From ae2483a26017d24f505caa87935fb8f17117bbfa Mon Sep 17 00:00:00 2001 From: =?utf8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Thu, 20 Mar 2025 20:06:53 +0100 Subject: [PATCH 16/16] landlock: Prepare to use credential instead of domain for filesystem MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This cosmetic change is needed for audit support, specifically to be able to filter according to cross-execution boundaries. Add landlock_get_applicable_subject(), mainly a copy of landlock_get_applicable_domain(), which will fully replace it in a following commit. Optimize current_check_access_path() to only handle the access request. Partially replace get_current_fs_domain() with explicit calls to landlock_get_applicable_subject(). The remaining ones will follow with more changes. Remove explicit domain->num_layers check which is now part of the landlock_get_applicable_subject() call. Cc: Günther Noack Link: https://lore.kernel.org/r/20250320190717.2287696-5-mic@digikod.net Signed-off-by: Mickaël Salaün --- security/landlock/cred.h | 53 +++++++++++++++++++++++++++++- security/landlock/fs.c | 69 +++++++++++++++++++++++----------------- 2 files changed, 92 insertions(+), 30 deletions(-) diff --git a/security/landlock/cred.h b/security/landlock/cred.h index bf755459838a..eb691130dd67 100644 --- a/security/landlock/cred.h +++ b/security/landlock/cred.h @@ -1,9 +1,10 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Landlock LSM - Credential hooks + * Landlock - Credential hooks * * Copyright © 2019-2020 Mickaël Salaün * Copyright © 2019-2020 ANSSI + * Copyright © 2021-2025 Microsoft Corporation */ #ifndef _SECURITY_LANDLOCK_CRED_H @@ -13,6 +14,7 @@ #include #include +#include "access.h" #include "ruleset.h" #include "setup.h" @@ -53,6 +55,55 @@ static inline bool landlocked(const struct task_struct *const task) return has_dom; } +/** + * landlock_get_applicable_subject - Return the subject's Landlock credential + * if its enforced domain applies to (i.e. + * handles) at least one of the access rights + * specified in @masks + * + * @cred: credential + * @masks: access masks + * @handle_layer: returned youngest layer handling a subset of @masks. Not set + * if the function returns NULL. + * + * Returns: landlock_cred(@cred) if any access rights specified in @masks is + * handled, or NULL otherwise. + */ +static inline const struct landlock_cred_security * +landlock_get_applicable_subject(const struct cred *const cred, + const struct access_masks masks, + size_t *const handle_layer) +{ + const union access_masks_all masks_all = { + .masks = masks, + }; + const struct landlock_ruleset *domain; + ssize_t layer_level; + + if (!cred) + return NULL; + + domain = landlock_cred(cred)->domain; + if (!domain) + return NULL; + + for (layer_level = domain->num_layers - 1; layer_level >= 0; + layer_level--) { + union access_masks_all layer = { + .masks = domain->access_masks[layer_level], + }; + + if (layer.all & masks_all.all) { + if (handle_layer) + *handle_layer = layer_level; + + return landlock_cred(cred); + } + } + + return NULL; +} + __init void landlock_add_cred_hooks(void); #endif /* _SECURITY_LANDLOCK_CRED_H */ diff --git a/security/landlock/fs.c b/security/landlock/fs.c index c19aab87c4d2..f59db97333f3 100644 --- a/security/landlock/fs.c +++ b/security/landlock/fs.c @@ -1,10 +1,10 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Landlock LSM - Filesystem management and hooks + * Landlock - Filesystem management and hooks * * Copyright © 2016-2020 Mickaël Salaün * Copyright © 2018-2020 ANSSI - * Copyright © 2021-2022 Microsoft Corporation + * Copyright © 2021-2025 Microsoft Corporation * Copyright © 2022 Günther Noack * Copyright © 2023-2024 Google LLC */ @@ -773,11 +773,14 @@ static bool is_access_to_paths_allowed( if (!access_request_parent1 && !access_request_parent2) return true; - if (WARN_ON_ONCE(!domain || !path)) + + if (WARN_ON_ONCE(!path)) return true; + if (is_nouser_or_private(path->dentry)) return true; - if (WARN_ON_ONCE(domain->num_layers < 1 || !layer_masks_parent1)) + + if (WARN_ON_ONCE(!layer_masks_parent1)) return false; allowed_parent1 = is_layer_masks_allowed(layer_masks_parent1); @@ -928,16 +931,21 @@ jump_up: static int current_check_access_path(const struct path *const path, access_mask_t access_request) { - const struct landlock_ruleset *const dom = get_current_fs_domain(); + const struct access_masks masks = { + .fs = access_request, + }; + const struct landlock_cred_security *const subject = + landlock_get_applicable_subject(current_cred(), masks, NULL); layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {}; - if (!dom) + if (!subject) return 0; - access_request = landlock_init_layer_masks( - dom, access_request, &layer_masks, LANDLOCK_KEY_INODE); - if (is_access_to_paths_allowed(dom, path, access_request, &layer_masks, - NULL, 0, NULL, NULL)) + access_request = landlock_init_layer_masks(subject->domain, + access_request, &layer_masks, + LANDLOCK_KEY_INODE); + if (is_access_to_paths_allowed(subject->domain, path, access_request, + &layer_masks, NULL, 0, NULL, NULL)) return 0; return -EACCES; @@ -1100,7 +1108,8 @@ static int current_check_refer_path(struct dentry *const old_dentry, struct dentry *const new_dentry, const bool removable, const bool exchange) { - const struct landlock_ruleset *const dom = get_current_fs_domain(); + const struct landlock_cred_security *const subject = + landlock_get_applicable_subject(current_cred(), any_fs, NULL); bool allow_parent1, allow_parent2; access_mask_t access_request_parent1, access_request_parent2; struct path mnt_dir; @@ -1108,10 +1117,9 @@ static int current_check_refer_path(struct dentry *const old_dentry, layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS] = {}, layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS] = {}; - if (!dom) + if (!subject) return 0; - if (WARN_ON_ONCE(dom->num_layers < 1)) - return -EACCES; + if (unlikely(d_is_negative(old_dentry))) return -ENOENT; if (exchange) { @@ -1136,10 +1144,11 @@ static int current_check_refer_path(struct dentry *const old_dentry, * for same-directory referer (i.e. no reparenting). */ access_request_parent1 = landlock_init_layer_masks( - dom, access_request_parent1 | access_request_parent2, + subject->domain, + access_request_parent1 | access_request_parent2, &layer_masks_parent1, LANDLOCK_KEY_INODE); if (is_access_to_paths_allowed( - dom, new_dir, access_request_parent1, + subject->domain, new_dir, access_request_parent1, &layer_masks_parent1, NULL, 0, NULL, NULL)) return 0; return -EACCES; @@ -1162,10 +1171,12 @@ static int current_check_refer_path(struct dentry *const old_dentry, old_dentry->d_parent; /* new_dir->dentry is equal to new_dentry->d_parent */ - allow_parent1 = collect_domain_accesses(dom, mnt_dir.dentry, old_parent, + allow_parent1 = collect_domain_accesses(subject->domain, mnt_dir.dentry, + old_parent, &layer_masks_parent1); - allow_parent2 = collect_domain_accesses( - dom, mnt_dir.dentry, new_dir->dentry, &layer_masks_parent2); + allow_parent2 = collect_domain_accesses(subject->domain, mnt_dir.dentry, + new_dir->dentry, + &layer_masks_parent2); if (allow_parent1 && allow_parent2) return 0; @@ -1177,9 +1188,9 @@ static int current_check_refer_path(struct dentry *const old_dentry, * destination parent access rights. */ if (is_access_to_paths_allowed( - dom, &mnt_dir, access_request_parent1, &layer_masks_parent1, - old_dentry, access_request_parent2, &layer_masks_parent2, - exchange ? new_dentry : NULL)) + subject->domain, &mnt_dir, access_request_parent1, + &layer_masks_parent1, old_dentry, access_request_parent2, + &layer_masks_parent2, exchange ? new_dentry : NULL)) return 0; /* @@ -1506,11 +1517,10 @@ static int hook_file_open(struct file *const file) layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {}; access_mask_t open_access_request, full_access_request, allowed_access, optional_access; - const struct landlock_ruleset *const dom = - landlock_get_applicable_domain( - landlock_cred(file->f_cred)->domain, any_fs); + const struct landlock_cred_security *const subject = + landlock_get_applicable_subject(file->f_cred, any_fs, NULL); - if (!dom) + if (!subject) return 0; /* @@ -1531,9 +1541,10 @@ static int hook_file_open(struct file *const file) full_access_request = open_access_request | optional_access; if (is_access_to_paths_allowed( - dom, &file->f_path, - landlock_init_layer_masks(dom, full_access_request, - &layer_masks, LANDLOCK_KEY_INODE), + subject->domain, &file->f_path, + landlock_init_layer_masks(subject->domain, + full_access_request, &layer_masks, + LANDLOCK_KEY_INODE), &layer_masks, NULL, 0, NULL, NULL)) { allowed_access = full_access_request; } else { -- 2.51.0