]> www.infradead.org Git - users/hch/xfs.git/commitdiff
pidfs: allow retrieval of namespace file descriptors
authorChristian Brauner <brauner@kernel.org>
Thu, 27 Jun 2024 14:11:42 +0000 (16:11 +0200)
committerChristian Brauner <brauner@kernel.org>
Fri, 28 Jun 2024 08:37:29 +0000 (10:37 +0200)
For users that hold a reference to a pidfd procfs might not even be
available nor is it desirable to parse through procfs just for the sake
of getting namespace file descriptors for a process.

Make it possible to directly retrieve namespace file descriptors from a
pidfd. Pidfds already can be used with setns() to change a set of
namespaces atomically.

Link: https://lore.kernel.org/r/20240627-work-pidfs-v1-4-7e9ab6cc3bb1@kernel.org
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
fs/pidfs.c
include/uapi/linux/pidfd.h

index dbb9d854d1c57c94ec17f1fd2d332c076b2d8228..c9cb14181deff8d136e6e0aff6f3a8caa1ab1c3a 100644 (file)
 #include <linux/proc_fs.h>
 #include <linux/proc_ns.h>
 #include <linux/pseudo_fs.h>
+#include <linux/ptrace.h>
 #include <linux/seq_file.h>
 #include <uapi/linux/pidfd.h>
+#include <linux/ipc_namespace.h>
+#include <linux/time_namespace.h>
+#include <linux/utsname.h>
+#include <net/net_namespace.h>
 
 #include "internal.h"
+#include "mount.h"
 
 #ifdef CONFIG_PROC_FS
 /**
@@ -108,11 +114,95 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
        return poll_flags;
 }
 
+static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+       struct task_struct *task __free(put_task) = NULL;
+       struct nsproxy *nsp __free(put_nsproxy) = NULL;
+       struct pid *pid = pidfd_pid(file);
+       struct ns_common *ns_common;
+
+       if (arg)
+               return -EINVAL;
+
+       task = get_pid_task(pid, PIDTYPE_PID);
+       if (!task)
+               return -ESRCH;
+
+       scoped_guard(task_lock, task) {
+               nsp = task->nsproxy;
+               if (nsp)
+                       get_nsproxy(nsp);
+       }
+       if (!nsp)
+               return -ESRCH; /* just pretend it didn't exist */
+
+       /*
+        * We're trying to open a file descriptor to the namespace so perform a
+        * filesystem cred ptrace check. Also, we mirror nsfs behavior.
+        */
+       if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
+               return -EACCES;
+
+       switch (cmd) {
+       /* Namespaces that hang of nsproxy. */
+       case PIDFD_GET_CGROUP_NAMESPACE:
+               get_cgroup_ns(nsp->cgroup_ns);
+               ns_common = to_ns_common(nsp->cgroup_ns);
+               break;
+       case PIDFD_GET_IPC_NAMESPACE:
+               get_ipc_ns(nsp->ipc_ns);
+               ns_common = to_ns_common(nsp->ipc_ns);
+               break;
+       case PIDFD_GET_MNT_NAMESPACE:
+               get_mnt_ns(nsp->mnt_ns);
+               ns_common = to_ns_common(nsp->mnt_ns);
+               break;
+       case PIDFD_GET_NET_NAMESPACE:
+               ns_common = to_ns_common(nsp->net_ns);
+               get_net_ns(ns_common);
+               break;
+       case PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE:
+               get_pid_ns(nsp->pid_ns_for_children);
+               ns_common = to_ns_common(nsp->pid_ns_for_children);
+               break;
+       case PIDFD_GET_TIME_NAMESPACE:
+               get_time_ns(nsp->time_ns);
+               ns_common = to_ns_common(nsp->time_ns);
+               break;
+       case PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE:
+               get_time_ns(nsp->time_ns_for_children);
+               ns_common = to_ns_common(nsp->time_ns_for_children);
+               break;
+       case PIDFD_GET_UTS_NAMESPACE:
+               get_uts_ns(nsp->uts_ns);
+               ns_common = to_ns_common(nsp->uts_ns);
+               break;
+       /* Namespaces that don't hang of nsproxy. */
+       case PIDFD_GET_USER_NAMESPACE:
+               rcu_read_lock();
+               ns_common = to_ns_common(get_user_ns(task_cred_xxx(task, user_ns)));
+               rcu_read_unlock();
+               break;
+       case PIDFD_GET_PID_NAMESPACE:
+               rcu_read_lock();
+               ns_common = to_ns_common(get_pid_ns(task_active_pid_ns(task)));
+               rcu_read_unlock();
+               break;
+       default:
+               return -ENOIOCTLCMD;
+       }
+
+       /* open_namespace() unconditionally consumes the reference */
+       return open_namespace(ns_common);
+}
+
 static const struct file_operations pidfs_file_operations = {
        .poll           = pidfd_poll,
 #ifdef CONFIG_PROC_FS
        .show_fdinfo    = pidfd_show_fdinfo,
 #endif
+       .unlocked_ioctl = pidfd_ioctl,
+       .compat_ioctl   = compat_ptr_ioctl,
 };
 
 struct pid *pidfd_pid(const struct file *file)
index 72ec000a97cda30dfeea1d04493f0ac7af7ed300..565fc0629fff5e575189078b9ce6a452e82c7ae5 100644 (file)
@@ -5,6 +5,7 @@
 
 #include <linux/types.h>
 #include <linux/fcntl.h>
+#include <linux/ioctl.h>
 
 /* Flags for pidfd_open().  */
 #define PIDFD_NONBLOCK O_NONBLOCK
 #define PIDFD_SIGNAL_THREAD_GROUP      (1UL << 1)
 #define PIDFD_SIGNAL_PROCESS_GROUP     (1UL << 2)
 
+#define PIDFS_IOCTL_MAGIC 0xFF
+
+#define PIDFD_GET_CGROUP_NAMESPACE            _IO(PIDFS_IOCTL_MAGIC, 1)
+#define PIDFD_GET_IPC_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 2)
+#define PIDFD_GET_MNT_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 3)
+#define PIDFD_GET_NET_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 4)
+#define PIDFD_GET_PID_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 5)
+#define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE  _IO(PIDFS_IOCTL_MAGIC, 6)
+#define PIDFD_GET_TIME_NAMESPACE              _IO(PIDFS_IOCTL_MAGIC, 7)
+#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8)
+#define PIDFD_GET_USER_NAMESPACE              _IO(PIDFS_IOCTL_MAGIC, 9)
+#define PIDFD_GET_UTS_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 10)
+
 #endif /* _UAPI_LINUX_PIDFD_H */