* Key/keyring notifications
+ * Mount notifications.
The notifications buffers can be enabled by:
See Documentation/security/keys/core.rst for more information.
+ * WATCH_TYPE_MOUNT_NOTIFY
+
+ Notifications of this type indicate changes to mount attributes and the
+ mount topology within the subtree at the indicated point.
+
Event Filtering
===============
pipe2(fds, O_TMPFILE);
ioctl(fds[1], IOC_WATCH_QUEUE_SET_SIZE, 256);
-It can then be set to receive keyring change notifications::
+It can then be set to receive notifications::
keyctl(KEYCTL_WATCH_KEY, KEY_SPEC_SESSION_KEYRING, fds[1], 0x01);
+ watch_mount(AT_FDCWD, "/", 0, fds[1], 0x02);
The notifications can then be consumed by something like the following::
case WATCH_TYPE_KEY_NOTIFY:
saw_key_change(&n.n);
break;
+ case WATCH_TYPE_MOUNT_NOTIFY:
+ saw_mount_change(&n.n);
+ break;
}
p += len;
547 common openat2 sys_openat2
548 common pidfd_getfd sys_pidfd_getfd
549 common faccessat2 sys_faccessat2
+550 common watch_mount sys_watch_mount
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
+440 common watch_mount sys_watch_mount
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
-#define __NR_compat_syscalls 440
+#define __NR_compat_syscalls 441
#endif
#define __ARCH_WANT_SYS_CLONE
__SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
#define __NR_faccessat2 439
__SYSCALL(__NR_faccessat2, sys_faccessat2)
+#define __NR_watch_mount 440
+__SYSCALL(__NR_watch_mount, sys_watch_mount)
/*
* Please add new compat syscalls above this comment and update
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
+440 common watch_mount sys_watch_mount
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
+440 common watch_mount sys_watch_mount
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
+440 common watch_mount sys_watch_mount
437 n32 openat2 sys_openat2
438 n32 pidfd_getfd sys_pidfd_getfd
439 n32 faccessat2 sys_faccessat2
+440 n32 watch_mount sys_watch_mount
437 n64 openat2 sys_openat2
438 n64 pidfd_getfd sys_pidfd_getfd
439 n64 faccessat2 sys_faccessat2
+440 n64 watch_mount sys_watch_mount
437 o32 openat2 sys_openat2
438 o32 pidfd_getfd sys_pidfd_getfd
439 o32 faccessat2 sys_faccessat2
+440 o32 watch_mount sys_watch_mount
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
+440 common watch_mount sys_watch_mount
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
+440 common watch_mount sys_watch_mount
437 common openat2 sys_openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2 sys_faccessat2
+440 common watch_mount sys_watch_mount sys_watch_mount
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
+440 common watch_mount sys_watch_mount
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
+440 common watch_mount sys_watch_mount
437 i386 openat2 sys_openat2
438 i386 pidfd_getfd sys_pidfd_getfd
439 i386 faccessat2 sys_faccessat2
+440 i386 watch_mount sys_watch_mount
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
+440 common watch_mount sys_watch_mount
#
# x32-specific system call numbers start at 512 to avoid cache impact
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
+440 common watch_mount sys_watch_mount
source "fs/notify/Kconfig"
+config MOUNT_NOTIFICATIONS
+ bool "Mount topology change notifications"
+ select WATCH_QUEUE
+ help
+ This option provides support for getting change notifications on the
+ mount tree topology. This makes use of the /dev/watch_queue misc
+ device to handle the notification buffer and provides the
+ mount_notify() system call to enable/disable watchpoints.
+
source "fs/quota/Kconfig"
source "fs/autofs/Kconfig"
endif
obj-$(CONFIG_PROC_FS) += proc_namespace.o
+obj-$(CONFIG_MOUNT_NOTIFICATIONS) += mount_notify.o
obj-y += notify/
obj-$(CONFIG_EPOLL) += eventpoll.o
#include <linux/poll.h>
#include <linux/ns_common.h>
#include <linux/fs_pin.h>
+#include <linux/watch_queue.h>
struct mnt_namespace {
atomic_t count;
int mnt_expiry_mark; /* true if marked for expiry */
struct hlist_head mnt_pins;
struct hlist_head mnt_stuck_children;
+#ifdef CONFIG_MOUNT_NOTIFICATIONS
+ struct watch_list *mnt_watchers; /* Watches on dentries within this mount */
+#endif
} __randomize_layout;
#define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */
}
extern void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor);
+
+#ifdef CONFIG_MOUNT_NOTIFICATIONS
+extern void notify_mount(struct mount *triggered,
+ struct mount *aux,
+ enum mount_notification_subtype subtype,
+ u32 info_flags);
+#else
+static inline void notify_mount(struct mount *triggered,
+ struct mount *aux,
+ enum mount_notification_subtype subtype,
+ u32 info_flags)
+{
+}
+#endif
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/* Provide mount topology/attribute change notifications.
+ *
+ * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/syscalls.h>
+#include <linux/slab.h>
+#include <linux/security.h>
+#include "mount.h"
+
+/*
+ * Post mount notifications to all watches going rootwards along the tree.
+ *
+ * Must be called with the mount_lock held.
+ */
+static void post_mount_notification(struct mount *changed,
+ struct mount_notification *notify)
+{
+ const struct cred *cred = current_cred();
+ struct path cursor;
+ struct mount *mnt;
+ unsigned seq;
+
+ seq = 0;
+ rcu_read_lock();
+restart:
+ cursor.mnt = &changed->mnt;
+ cursor.dentry = changed->mnt.mnt_root;
+ mnt = real_mount(cursor.mnt);
+ notify->watch.info &= ~NOTIFY_MOUNT_IN_SUBTREE;
+
+ read_seqbegin_or_lock(&rename_lock, &seq);
+ for (;;) {
+ if (mnt->mnt_watchers &&
+ !hlist_empty(&mnt->mnt_watchers->watchers)) {
+ if (cursor.dentry->d_flags & DCACHE_MOUNT_WATCH)
+ post_watch_notification(mnt->mnt_watchers,
+ ¬ify->watch, cred,
+ (unsigned long)cursor.dentry);
+ } else {
+ cursor.dentry = mnt->mnt.mnt_root;
+ }
+ notify->watch.info |= NOTIFY_MOUNT_IN_SUBTREE;
+
+ if (cursor.dentry == cursor.mnt->mnt_root ||
+ IS_ROOT(cursor.dentry)) {
+ struct mount *parent = READ_ONCE(mnt->mnt_parent);
+
+ /* Escaped? */
+ if (cursor.dentry != cursor.mnt->mnt_root)
+ break;
+
+ /* Global root? */
+ if (mnt == parent)
+ break;
+
+ cursor.dentry = READ_ONCE(mnt->mnt_mountpoint);
+ mnt = parent;
+ cursor.mnt = &mnt->mnt;
+ } else {
+ cursor.dentry = cursor.dentry->d_parent;
+ }
+ }
+
+ if (need_seqretry(&rename_lock, seq)) {
+ seq = 1;
+ goto restart;
+ }
+
+ done_seqretry(&rename_lock, seq);
+ rcu_read_unlock();
+}
+
+/*
+ * Generate a mount notification.
+ */
+void notify_mount(struct mount *trigger,
+ struct mount *aux,
+ enum mount_notification_subtype subtype,
+ u32 info_flags)
+{
+
+ struct mount_notification n;
+
+ memset(&n, 0, sizeof(n));
+ n.watch.type = WATCH_TYPE_MOUNT_NOTIFY;
+ n.watch.subtype = subtype;
+ n.watch.info = info_flags | watch_sizeof(n);
+ n.triggered_on = trigger->mnt_id;
+
+ switch (subtype) {
+ case NOTIFY_MOUNT_EXPIRY:
+ case NOTIFY_MOUNT_READONLY:
+ case NOTIFY_MOUNT_SETATTR:
+ break;
+
+ case NOTIFY_MOUNT_NEW_MOUNT:
+ case NOTIFY_MOUNT_UNMOUNT:
+ case NOTIFY_MOUNT_MOVE_FROM:
+ case NOTIFY_MOUNT_MOVE_TO:
+ n.auxiliary_mount = aux->mnt_id;
+ break;
+
+ default:
+ BUG();
+ }
+
+ post_mount_notification(trigger, &n);
+}
+
+static void release_mount_watch(struct watch *watch)
+{
+ struct dentry *dentry = (struct dentry *)(unsigned long)watch->id;
+
+ dput(dentry);
+}
+
+/**
+ * sys_watch_mount - Watch for mount topology/attribute changes
+ * @dfd: Base directory to pathwalk from or fd referring to mount.
+ * @filename: Path to mount to place the watch upon
+ * @at_flags: Pathwalk control flags
+ * @watch_fd: The watch queue to send notifications to.
+ * @watch_id: The watch ID to be placed in the notification (-1 to remove watch)
+ */
+SYSCALL_DEFINE5(watch_mount,
+ int, dfd,
+ const char __user *, filename,
+ unsigned int, at_flags,
+ int, watch_fd,
+ int, watch_id)
+{
+ struct watch_queue *wqueue;
+ struct watch_list *wlist = NULL;
+ struct watch *watch = NULL;
+ struct mount *m;
+ struct path path;
+ unsigned int lookup_flags =
+ LOOKUP_DIRECTORY | LOOKUP_FOLLOW | LOOKUP_AUTOMOUNT;
+ int ret;
+
+ if (watch_id < -1 || watch_id > 0xff)
+ return -EINVAL;
+ if ((at_flags & ~(AT_NO_AUTOMOUNT | AT_EMPTY_PATH)) != 0)
+ return -EINVAL;
+ if (at_flags & AT_NO_AUTOMOUNT)
+ lookup_flags &= ~LOOKUP_AUTOMOUNT;
+ if (at_flags & AT_EMPTY_PATH)
+ lookup_flags |= LOOKUP_EMPTY;
+
+ ret = user_path_at(dfd, filename, lookup_flags, &path);
+ if (ret)
+ return ret;
+
+ ret = inode_permission(path.dentry->d_inode, MAY_EXEC);
+ if (ret)
+ goto err_path;
+
+ wqueue = get_watch_queue(watch_fd);
+ if (IS_ERR(wqueue))
+ goto err_path;
+
+ m = real_mount(path.mnt);
+
+ if (watch_id >= 0) {
+ ret = -ENOMEM;
+ if (!READ_ONCE(m->mnt_watchers)) {
+ wlist = kzalloc(sizeof(*wlist), GFP_KERNEL);
+ if (!wlist)
+ goto err_wqueue;
+ init_watch_list(wlist, release_mount_watch);
+ }
+
+ watch = kzalloc(sizeof(*watch), GFP_KERNEL);
+ if (!watch)
+ goto err_wlist;
+
+ init_watch(watch, wqueue);
+ watch->id = (unsigned long)path.dentry;
+ watch->info_id = (u32)watch_id << WATCH_INFO_ID__SHIFT;
+
+ ret = security_watch_mount(watch, &path);
+ if (ret < 0)
+ goto err_watch;
+
+ down_write(&m->mnt.mnt_sb->s_umount);
+ if (!m->mnt_watchers) {
+ m->mnt_watchers = wlist;
+ wlist = NULL;
+ }
+
+ ret = add_watch_to_object(watch, m->mnt_watchers);
+ if (ret == 0) {
+ spin_lock(&path.dentry->d_lock);
+ path.dentry->d_flags |= DCACHE_MOUNT_WATCH;
+ spin_unlock(&path.dentry->d_lock);
+ dget(path.dentry);
+ watch = NULL;
+ }
+ up_write(&m->mnt.mnt_sb->s_umount);
+ } else {
+ down_write(&m->mnt.mnt_sb->s_umount);
+ ret = remove_watch_from_object(m->mnt_watchers, wqueue,
+ (unsigned long)path.dentry,
+ false);
+ up_write(&m->mnt.mnt_sb->s_umount);
+ }
+
+err_watch:
+ kfree(watch);
+err_wlist:
+ kfree(wlist);
+err_wqueue:
+ put_watch_queue(wqueue);
+err_path:
+ path_put(&path);
+ return ret;
+}
smp_wmb();
mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
unlock_mount_hash();
+ if (ret == 0)
+ notify_mount(mnt, NULL, NOTIFY_MOUNT_READONLY,
+ NOTIFY_MOUNT_IS_NOW_RO);
return ret;
}
lock_mount_hash();
mnt->mnt.mnt_flags &= ~MNT_READONLY;
unlock_mount_hash();
+ notify_mount(mnt, NULL, NOTIFY_MOUNT_READONLY, 0);
return 0;
}
*/
static void umount_mnt(struct mount *mnt)
{
+ notify_mount(mnt->mnt_parent, mnt, NOTIFY_MOUNT_UNMOUNT, 0);
put_mountpoint(unhash_mnt(mnt));
}
mnt->mnt.mnt_flags |= MNT_DOOMED;
rcu_read_unlock();
+#ifdef CONFIG_MOUNT_NOTIFICATIONS
+ if (mnt->mnt_watchers)
+ remove_watch_list(mnt->mnt_watchers, mnt->mnt_id);
+#endif
+
list_del(&mnt->mnt_instance);
if (unlikely(!list_empty(&mnt->mnt_mounts))) {
p = list_first_entry(&tmp_list, struct mount, mnt_list);
list_del_init(&p->mnt_expire);
list_del_init(&p->mnt_list);
+
ns = p->mnt_ns;
if (ns) {
ns->mounts--;
}
if (moving) {
unhash_mnt(source_mnt);
+ notify_mount(source_mnt->mnt_parent, source_mnt,
+ NOTIFY_MOUNT_MOVE_FROM, 0);
attach_mnt(source_mnt, dest_mnt, dest_mp);
+ notify_mount(dest_mnt, source_mnt, NOTIFY_MOUNT_MOVE_TO, 0);
touch_mnt_namespace(source_mnt->mnt_ns);
} else {
if (source_mnt->mnt_ns) {
}
mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
commit_tree(source_mnt);
+ notify_mount(dest_mnt, source_mnt, NOTIFY_MOUNT_NEW_MOUNT,
+ (source_mnt->mnt.mnt_sb->s_flags & SB_RDONLY ?
+ NOTIFY_MOUNT_IS_NOW_RO : 0) |
+ (source_mnt->mnt.mnt_sb->s_flags & SB_SUBMOUNT ?
+ NOTIFY_MOUNT_IS_SUBMOUNT : 0));
}
hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
mnt->mnt.mnt_flags = mnt_flags;
touch_mnt_namespace(mnt->mnt_ns);
unlock_mount_hash();
+ notify_mount(mnt, NULL, NOTIFY_MOUNT_SETATTR,
+ (mnt_flags & SB_RDONLY ? NOTIFY_MOUNT_IS_NOW_RO : 0));
}
static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount *mnt)
propagate_mount_busy(mnt, 1))
continue;
list_move(&mnt->mnt_expire, &graveyard);
+ notify_mount(mnt, NULL, NOTIFY_MOUNT_EXPIRY, 0);
}
while (!list_empty(&graveyard)) {
mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
#define DCACHE_PAR_LOOKUP 0x10000000 /* being looked up (with parent locked shared) */
#define DCACHE_DENTRY_CURSOR 0x20000000
#define DCACHE_NORCU 0x40000000 /* No RCU delay for freeing */
+#define DCACHE_MOUNT_WATCH 0x80000000 /* There's a mount watch here */
extern seqlock_t rename_lock;
siginfo_t __user *info,
unsigned int flags);
asmlinkage long sys_pidfd_getfd(int pidfd, int fd, unsigned int flags);
+asmlinkage long sys_watch_mount(int dfd, const char __user *path,
+ unsigned int at_flags, int watch_fd, int watch_id);
/*
* Architecture-specific system calls
__SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
#define __NR_faccessat2 439
__SYSCALL(__NR_faccessat2, sys_faccessat2)
+#define __NR_watch_mount 440
+__SYSCALL(__NR_watch_mount, sys_watch_mount)
#undef __NR_syscalls
-#define __NR_syscalls 440
+#define __NR_syscalls 441
/*
* 32 bit systems traditionally used different
enum watch_notification_type {
WATCH_TYPE_META = 0, /* Special record */
WATCH_TYPE_KEY_NOTIFY = 1, /* Key change event notification */
- WATCH_TYPE__NR = 2
+ WATCH_TYPE_MOUNT_NOTIFY = 2, /* Mount topology change notification */
+ WATCH_TYPE___NR = 3
};
enum watch_meta_notification_subtype {
__u32 aux; /* Per-type auxiliary data */
};
+/*
+ * Type of mount topology change notification.
+ */
+enum mount_notification_subtype {
+ NOTIFY_MOUNT_NEW_MOUNT = 0, /* New mount added */
+ NOTIFY_MOUNT_UNMOUNT = 1, /* Mount removed manually */
+ NOTIFY_MOUNT_EXPIRY = 2, /* Automount expired */
+ NOTIFY_MOUNT_READONLY = 3, /* Mount R/O state changed */
+ NOTIFY_MOUNT_SETATTR = 4, /* Mount attributes changed */
+ NOTIFY_MOUNT_MOVE_FROM = 5, /* Mount moved from here */
+ NOTIFY_MOUNT_MOVE_TO = 6, /* Mount moved to here (compare op_id) */
+};
+
+#define NOTIFY_MOUNT_IN_SUBTREE WATCH_INFO_FLAG_0 /* Event not actually at watched dentry */
+#define NOTIFY_MOUNT_IS_NOW_RO WATCH_INFO_FLAG_1 /* Mount changed to R/O */
+#define NOTIFY_MOUNT_IS_SUBMOUNT WATCH_INFO_FLAG_2 /* New mount is submount */
+
+/*
+ * Mount topology/configuration change notification record.
+ * - watch.type = WATCH_TYPE_MOUNT_NOTIFY
+ * - watch.subtype = enum mount_notification_subtype
+ */
+struct mount_notification {
+ struct watch_notification watch; /* WATCH_TYPE_MOUNT_NOTIFY */
+ __u64 triggered_on; /* The mount that triggered the notification */
+ __u64 auxiliary_mount; /* Added/moved/removed mount or 0 */
+};
+
#endif /* _UAPI_LINUX_WATCH_QUEUE_H */
/* fs/locks.c */
COND_SYSCALL(flock);
+/* fs/mount_notify.c */
+COND_SYSCALL(watch_mount);
+
/* fs/namei.c */
/* fs/namespace.c */