]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
pidfs: allow to retrieve exit information
authorChristian Brauner <brauner@kernel.org>
Wed, 5 Mar 2025 10:08:16 +0000 (11:08 +0100)
committerChristian Brauner <brauner@kernel.org>
Wed, 5 Mar 2025 12:26:17 +0000 (13:26 +0100)
Some tools like systemd's jounral need to retrieve the exit and cgroup
information after a process has already been reaped. This can e.g.,
happen when retrieving a pidfd via SCM_PIDFD or SCM_PEERPIDFD.

Link: https://lore.kernel.org/r/20250305-work-pidfs-kill_on_last_close-v3-6-c8c3d8361705@kernel.org
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
fs/pidfs.c
include/uapi/linux/pidfd.h

index c4e6527013e77d07ad9595b9f391348fe3bb73fc..3c630e9d4a629a1e62eb904034c06bf750c61e77 100644 (file)
@@ -36,7 +36,8 @@ struct pidfs_exit_info {
 };
 
 struct pidfs_inode {
-       struct pidfs_exit_info exit_info;
+       struct pidfs_exit_info __pei;
+       struct pidfs_exit_info *exit_info;
        struct inode vfs_inode;
 };
 
@@ -228,17 +229,28 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
        return poll_flags;
 }
 
-static long pidfd_info(struct task_struct *task, unsigned int cmd, unsigned long arg)
+static inline bool pid_in_current_pidns(const struct pid *pid)
+{
+       const struct pid_namespace *ns = task_active_pid_ns(current);
+
+       if (ns->level <= pid->level)
+               return pid->numbers[ns->level].ns == ns;
+
+       return false;
+}
+
+static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
 {
        struct pidfd_info __user *uinfo = (struct pidfd_info __user *)arg;
+       struct inode *inode = file_inode(file);
+       struct pid *pid = pidfd_pid(file);
        size_t usize = _IOC_SIZE(cmd);
        struct pidfd_info kinfo = {};
+       struct pidfs_exit_info *exit_info;
        struct user_namespace *user_ns;
+       struct task_struct *task;
        const struct cred *c;
        __u64 mask;
-#ifdef CONFIG_CGROUPS
-       struct cgroup *cgrp;
-#endif
 
        if (!uinfo)
                return -EINVAL;
@@ -248,6 +260,37 @@ static long pidfd_info(struct task_struct *task, unsigned int cmd, unsigned long
        if (copy_from_user(&mask, &uinfo->mask, sizeof(mask)))
                return -EFAULT;
 
+       /*
+        * Restrict information retrieval to tasks within the caller's pid
+        * namespace hierarchy.
+        */
+       if (!pid_in_current_pidns(pid))
+               return -ESRCH;
+
+       if (mask & PIDFD_INFO_EXIT) {
+               exit_info = READ_ONCE(pidfs_i(inode)->exit_info);
+               if (exit_info) {
+                       kinfo.mask |= PIDFD_INFO_EXIT;
+#ifdef CONFIG_CGROUPS
+                       kinfo.cgroupid = exit_info->cgroupid;
+                       kinfo.mask |= PIDFD_INFO_CGROUPID;
+#endif
+                       kinfo.exit_code = exit_info->exit_code;
+               }
+       }
+
+       task = get_pid_task(pid, PIDTYPE_PID);
+       if (!task) {
+               /*
+                * If the task has already been reaped, only exit
+                * information is available
+                */
+               if (!(mask & PIDFD_INFO_EXIT))
+                       return -ESRCH;
+
+               goto copy_out;
+       }
+
        c = get_task_cred(task);
        if (!c)
                return -ESRCH;
@@ -267,11 +310,15 @@ static long pidfd_info(struct task_struct *task, unsigned int cmd, unsigned long
        put_cred(c);
 
 #ifdef CONFIG_CGROUPS
-       rcu_read_lock();
-       cgrp = task_dfl_cgroup(task);
-       kinfo.cgroupid = cgroup_id(cgrp);
-       kinfo.mask |= PIDFD_INFO_CGROUPID;
-       rcu_read_unlock();
+       if (!kinfo.cgroupid) {
+               struct cgroup *cgrp;
+
+               rcu_read_lock();
+               cgrp = task_dfl_cgroup(task);
+               kinfo.cgroupid = cgroup_id(cgrp);
+               kinfo.mask |= PIDFD_INFO_CGROUPID;
+               rcu_read_unlock();
+       }
 #endif
 
        /*
@@ -291,6 +338,7 @@ static long pidfd_info(struct task_struct *task, unsigned int cmd, unsigned long
        if (kinfo.pid == 0 || kinfo.tgid == 0 || (kinfo.ppid == 0 && kinfo.pid != 1))
                return -ESRCH;
 
+copy_out:
        /*
         * If userspace and the kernel have the same struct size it can just
         * be copied. If userspace provides an older struct, only the bits that
@@ -325,7 +373,6 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
        struct task_struct *task __free(put_task) = NULL;
        struct nsproxy *nsp __free(put_nsproxy) = NULL;
-       struct pid *pid = pidfd_pid(file);
        struct ns_common *ns_common = NULL;
        struct pid_namespace *pid_ns;
 
@@ -340,13 +387,13 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                return put_user(file_inode(file)->i_generation, argp);
        }
 
-       task = get_pid_task(pid, PIDTYPE_PID);
-       if (!task)
-               return -ESRCH;
-
        /* Extensible IOCTL that does not open namespace FDs, take a shortcut */
        if (_IOC_NR(cmd) == _IOC_NR(PIDFD_GET_INFO))
-               return pidfd_info(task, cmd, arg);
+               return pidfd_info(file, cmd, arg);
+
+       task = get_pid_task(pidfd_pid(file), PIDTYPE_PID);
+       if (!task)
+               return -ESRCH;
 
        if (arg)
                return -EINVAL;
@@ -484,7 +531,7 @@ void pidfs_exit(struct task_struct *tsk)
        dentry = stashed_dentry_get(&task_pid(tsk)->stashed);
        if (dentry) {
                struct inode *inode = d_inode(dentry);
-               struct pidfs_exit_info *exit_info = &pidfs_i(inode)->exit_info;
+               struct pidfs_exit_info *exit_info = &pidfs_i(inode)->__pei;
 #ifdef CONFIG_CGROUPS
                struct cgroup *cgrp;
 
@@ -495,6 +542,8 @@ void pidfs_exit(struct task_struct *tsk)
 #endif
                exit_info->exit_code = tsk->exit_code;
 
+               /* Ensure that PIDFD_GET_INFO sees either all or nothing. */
+               smp_store_release(&pidfs_i(inode)->exit_info, &pidfs_i(inode)->__pei);
                dput(dentry);
        }
 }
@@ -562,7 +611,8 @@ static struct inode *pidfs_alloc_inode(struct super_block *sb)
        if (!pi)
                return NULL;
 
-       memset(&pi->exit_info, 0, sizeof(pi->exit_info));
+       memset(&pi->__pei, 0, sizeof(pi->__pei));
+       pi->exit_info = NULL;
 
        return &pi->vfs_inode;
 }
index e0abd0b18841aea7ed0e295d58e410868fa92f27..5cd5dcbfe884857cc39129c521761407a29f13a4 100644 (file)
@@ -20,6 +20,7 @@
 #define PIDFD_INFO_PID                 (1UL << 0) /* Always returned, even if not requested */
 #define PIDFD_INFO_CREDS               (1UL << 1) /* Always returned, even if not requested */
 #define PIDFD_INFO_CGROUPID            (1UL << 2) /* Always returned if available, even if not requested */
+#define PIDFD_INFO_EXIT                        (1UL << 3) /* Only returned if requested. */
 
 #define PIDFD_INFO_SIZE_VER0           64 /* sizeof first published struct */
 
@@ -86,7 +87,7 @@ struct pidfd_info {
        __u32 sgid;
        __u32 fsuid;
        __u32 fsgid;
-       __u32 spare0[1];
+       __s32 exit_code;
 };
 
 #define PIDFS_IOCTL_MAGIC 0xFF