]> www.infradead.org Git - users/hch/block.git/commitdiff
watch_queue: Implement mount topology and attribute change notifications
authorDavid Howells <dhowells@redhat.com>
Fri, 21 Feb 2020 15:30:56 +0000 (15:30 +0000)
committerDavid Howells <dhowells@redhat.com>
Mon, 3 Aug 2020 11:13:25 +0000 (12:13 +0100)
Add a mount notification facility whereby notifications about changes in
mount topology and configuration can be received.  Note that this only
covers vfsmount topology changes and not superblock events.  A separate
facility will be added for that.

Firstly, a watch queue needs to be created:

pipe2(fds, O_NOTIFICATION_PIPE);
ioctl(fds[1], IOC_WATCH_QUEUE_SET_SIZE, 256);

then a notification can be set up to report notifications via that queue:

struct watch_notification_filter filter = {
.nr_filters = 1,
.filters = {
[0] = {
.type = WATCH_TYPE_MOUNT_NOTIFY,
.subtype_filter[0] = UINT_MAX,
},
},
};
ioctl(fds[1], IOC_WATCH_QUEUE_SET_FILTER, &filter);
watch_mount(AT_FDCWD, "/", 0, fds[1], 0x02);

In this case, it would let me monitor the mount topology subtree rooted at
"/" for events.  Mount notifications propagate up the tree towards the
root, so a watch will catch all of the events happening in the subtree
rooted at the watch.

After setting the watch, records will be placed into the queue when, for
example, as superblock switches between read-write and read-only.  Records
are of the following format:

struct mount_notification {
struct watch_notification watch;
__u64 triggered_on;
__u64 auxiliary_mount;
} *n;

Where:

n->watch.type will be WATCH_TYPE_MOUNT_NOTIFY.

n->watch.subtype will indicate the type of event, such as
NOTIFY_MOUNT_NEW_MOUNT.

n->watch.info & WATCH_INFO_LENGTH will indicate the length of the
record.

n->watch.info & WATCH_INFO_ID will be the fifth argument to
watch_mount(), shifted.

n->watch.info & NOTIFY_MOUNT_IN_SUBTREE if true indicates that the
notification was generated in the mount subtree rooted at the
watch, and not actually in the watch itself.

n->watch.info & NOTIFY_MOUNT_IS_RECURSIVE if true indicates that
the notification was generated by an event (eg. SETATTR) that was
applied recursively.  The notification is only generated for the
object that initially triggered it.

n->watch.info & NOTIFY_MOUNT_IS_NOW_RO will be used for
NOTIFY_MOUNT_READONLY, being set if the mount becomes R/O, and
being cleared otherwise, and for NOTIFY_MOUNT_NEW_MOUNT, being set
if the new mount is readonly.

n->watch.info & NOTIFY_MOUNT_IS_SUBMOUNT if true indicates that the
NOTIFY_MOUNT_NEW_MOUNT notification is in response to a mount
performed by the kernel (e.g. an automount).

n->triggered_on indicates the ID of the mount to which the change
was accounted (e.g. the new parent of a new mount).

n->axiliary_mount indicates the ID of an additional mount that was
affected (e.g. a new mount itself) or 0.

Note that it is permissible for event records to be of variable length -
or, at least, the length may be dependent on the subtype.  Note also that
the queue can be shared between multiple notifications of various types.

Signed-off-by: David Howells <dhowells@redhat.com>
29 files changed:
Documentation/watch_queue.rst
arch/alpha/kernel/syscalls/syscall.tbl
arch/arm/tools/syscall.tbl
arch/arm64/include/asm/unistd.h
arch/arm64/include/asm/unistd32.h
arch/ia64/kernel/syscalls/syscall.tbl
arch/m68k/kernel/syscalls/syscall.tbl
arch/microblaze/kernel/syscalls/syscall.tbl
arch/mips/kernel/syscalls/syscall_n32.tbl
arch/mips/kernel/syscalls/syscall_n64.tbl
arch/mips/kernel/syscalls/syscall_o32.tbl
arch/parisc/kernel/syscalls/syscall.tbl
arch/powerpc/kernel/syscalls/syscall.tbl
arch/s390/kernel/syscalls/syscall.tbl
arch/sh/kernel/syscalls/syscall.tbl
arch/sparc/kernel/syscalls/syscall.tbl
arch/x86/entry/syscalls/syscall_32.tbl
arch/x86/entry/syscalls/syscall_64.tbl
arch/xtensa/kernel/syscalls/syscall.tbl
fs/Kconfig
fs/Makefile
fs/mount.h
fs/mount_notify.c [new file with mode: 0644]
fs/namespace.c
include/linux/dcache.h
include/linux/syscalls.h
include/uapi/asm-generic/unistd.h
include/uapi/linux/watch_queue.h
kernel/sys_ni.c

index 849fad6893efa678607c149d006348190e326fa2..3e647992be310347126e0b2572df558a4512ddf7 100644 (file)
@@ -8,6 +8,7 @@ opened by userspace.  This can be used in conjunction with::
 
   * Key/keyring notifications
 
+  * Mount notifications.
 
 The notifications buffers can be enabled by:
 
@@ -233,6 +234,11 @@ Any particular buffer can be fed from multiple sources.  Sources include:
 
     See Documentation/security/keys/core.rst for more information.
 
+  * WATCH_TYPE_MOUNT_NOTIFY
+
+    Notifications of this type indicate changes to mount attributes and the
+    mount topology within the subtree at the indicated point.
+
 
 Event Filtering
 ===============
@@ -292,9 +298,10 @@ A buffer is created with something like the following::
        pipe2(fds, O_TMPFILE);
        ioctl(fds[1], IOC_WATCH_QUEUE_SET_SIZE, 256);
 
-It can then be set to receive keyring change notifications::
+It can then be set to receive notifications::
 
        keyctl(KEYCTL_WATCH_KEY, KEY_SPEC_SESSION_KEYRING, fds[1], 0x01);
+       watch_mount(AT_FDCWD, "/", 0, fds[1], 0x02);
 
 The notifications can then be consumed by something like the following::
 
@@ -331,6 +338,9 @@ The notifications can then be consumed by something like the following::
                                case WATCH_TYPE_KEY_NOTIFY:
                                        saw_key_change(&n.n);
                                        break;
+                               case WATCH_TYPE_MOUNT_NOTIFY:
+                                       saw_mount_change(&n.n);
+                                       break;
                                }
 
                                p += len;
index 5ddd128d4b7ac0c6cdbf65dbc4bacb993c2a5413..b6cf8403da351a1f4e1faab0a5ccf3b748c74064 100644 (file)
 547    common  openat2                         sys_openat2
 548    common  pidfd_getfd                     sys_pidfd_getfd
 549    common  faccessat2                      sys_faccessat2
+550    common  watch_mount                     sys_watch_mount
index d5cae5ffede0cd6e38846407b9fdce94fd352752..27cc1f53f4a06be2d314945a6b487e115002f90a 100644 (file)
 437    common  openat2                         sys_openat2
 438    common  pidfd_getfd                     sys_pidfd_getfd
 439    common  faccessat2                      sys_faccessat2
+440    common  watch_mount                     sys_watch_mount
index 3b859596840de5238645308ecd1bf389401822c5..b3b2019f8d16bbeef51948c249dcfb6dbd97a0fb 100644 (file)
@@ -38,7 +38,7 @@
 #define __ARM_NR_compat_set_tls                (__ARM_NR_COMPAT_BASE + 5)
 #define __ARM_NR_COMPAT_END            (__ARM_NR_COMPAT_BASE + 0x800)
 
-#define __NR_compat_syscalls           440
+#define __NR_compat_syscalls           441
 #endif
 
 #define __ARCH_WANT_SYS_CLONE
index 6d95d0c8bf2f47f29d028a2d3c4754d6a3c7a4e3..4f9cf98cdf0fdc014d438fe8213b42fef3332cdc 100644 (file)
@@ -885,6 +885,8 @@ __SYSCALL(__NR_openat2, sys_openat2)
 __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
 #define __NR_faccessat2 439
 __SYSCALL(__NR_faccessat2, sys_faccessat2)
+#define __NR_watch_mount 440
+__SYSCALL(__NR_watch_mount, sys_watch_mount)
 
 /*
  * Please add new compat syscalls above this comment and update
index 49e325b604b31917c1e3c967a2c1bae71c81ea7a..fc6d87903781f28fc8c8564aa8cb3f5313b726bf 100644 (file)
 437    common  openat2                         sys_openat2
 438    common  pidfd_getfd                     sys_pidfd_getfd
 439    common  faccessat2                      sys_faccessat2
+440    common  watch_mount                     sys_watch_mount
index f71b1bbcc1988c3e082b17b2d39520282930c1aa..c671aa0e4d255a1c39f99b6fd6541c402310f617 100644 (file)
 437    common  openat2                         sys_openat2
 438    common  pidfd_getfd                     sys_pidfd_getfd
 439    common  faccessat2                      sys_faccessat2
+440    common  watch_mount                     sys_watch_mount
index edacc4561f2b3bd30f28c86328da667c4ae63049..65cc53f129ef1abe2f44358dd8d506554dd4eb47 100644 (file)
 437    common  openat2                         sys_openat2
 438    common  pidfd_getfd                     sys_pidfd_getfd
 439    common  faccessat2                      sys_faccessat2
+440    common  watch_mount                     sys_watch_mount
index f777141f52568fcac820d2ab9bfa5a7075d32314..7f034a239930c12fb9ff863084296bcc3482b737 100644 (file)
 437    n32     openat2                         sys_openat2
 438    n32     pidfd_getfd                     sys_pidfd_getfd
 439    n32     faccessat2                      sys_faccessat2
+440    n32     watch_mount                     sys_watch_mount
index da8c76394e178fc4ca26522684eee4e5ac0a4a2a..d39b90de3642f86869c6c2c8c76ba94df5f6b073 100644 (file)
 437    n64     openat2                         sys_openat2
 438    n64     pidfd_getfd                     sys_pidfd_getfd
 439    n64     faccessat2                      sys_faccessat2
+440    n64     watch_mount                     sys_watch_mount
index 13280625d312e98dcebdaafaca210a353c4a43d9..09f426cb45b119ec50b3f42e62699217dfe02e35 100644 (file)
 437    o32     openat2                         sys_openat2
 438    o32     pidfd_getfd                     sys_pidfd_getfd
 439    o32     faccessat2                      sys_faccessat2
+440    o32     watch_mount                     sys_watch_mount
index 5a758fa6ec52427615cb8a0dfbfc938b4dc0d96c..52ff3454baa19b1516006f9e74691432d27f3121 100644 (file)
 437    common  openat2                         sys_openat2
 438    common  pidfd_getfd                     sys_pidfd_getfd
 439    common  faccessat2                      sys_faccessat2
+440    common  watch_mount                     sys_watch_mount
index f833a319082247ecc302f9bd25806c7677a64221..10b7ed3c7a1bdce71215a11d33bd3b8da3538ecc 100644 (file)
 437    common  openat2                         sys_openat2
 438    common  pidfd_getfd                     sys_pidfd_getfd
 439    common  faccessat2                      sys_faccessat2
+440    common  watch_mount                     sys_watch_mount
index bfdcb7633957355c4fa5b4d397f52e13534a5141..86f317bf52dff83d5937c7d6dcba1aeff196f721 100644 (file)
 437  common    openat2                 sys_openat2                     sys_openat2
 438  common    pidfd_getfd             sys_pidfd_getfd                 sys_pidfd_getfd
 439  common    faccessat2              sys_faccessat2                  sys_faccessat2
+440    common  watch_mount             sys_watch_mount                 sys_watch_mount
index acc35daa1b7924eb697cd822892d757174e51d90..0bb0f0b372c7a64b166297d8efa0a6982081d15a 100644 (file)
 437    common  openat2                         sys_openat2
 438    common  pidfd_getfd                     sys_pidfd_getfd
 439    common  faccessat2                      sys_faccessat2
+440    common  watch_mount                     sys_watch_mount
index 8004a276cb74be3c2e2ff0f9053d884f3936cc61..369ab65c1e9a67e646e92bb113ce22948f241971 100644 (file)
 437    common  openat2                 sys_openat2
 438    common  pidfd_getfd                     sys_pidfd_getfd
 439    common  faccessat2                      sys_faccessat2
+440    common  watch_mount                     sys_watch_mount
index d8f8a1a69ed11f7e11bbba2bb17301dc7501cae1..e760ba92c58d22c5b82effffb943b4085a80815d 100644 (file)
 437    i386    openat2                 sys_openat2
 438    i386    pidfd_getfd             sys_pidfd_getfd
 439    i386    faccessat2              sys_faccessat2
+440    i386    watch_mount             sys_watch_mount
index 78847b32e1370f56f273020e64a36e0a054bded4..5b58621d4f75d3b49db66818c7e0c71b0b6234d6 100644 (file)
 437    common  openat2                 sys_openat2
 438    common  pidfd_getfd             sys_pidfd_getfd
 439    common  faccessat2              sys_faccessat2
+440    common  watch_mount             sys_watch_mount
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
index 69d0d73876b31070dd3d3a98a83b411e7dded16e..5b28ee39f70ffd5fc730e608827658726430b49f 100644 (file)
 437    common  openat2                         sys_openat2
 438    common  pidfd_getfd                     sys_pidfd_getfd
 439    common  faccessat2                      sys_faccessat2
+440    common  watch_mount                     sys_watch_mount
index a88aa3af73c11a4cebb4862c230a5e50de1d39bc..1a55e56d5c54cb76a1ac39d41469f907e2b96d00 100644 (file)
@@ -117,6 +117,15 @@ source "fs/verity/Kconfig"
 
 source "fs/notify/Kconfig"
 
+config MOUNT_NOTIFICATIONS
+       bool "Mount topology change notifications"
+       select WATCH_QUEUE
+       help
+         This option provides support for getting change notifications on the
+         mount tree topology.  This makes use of the /dev/watch_queue misc
+         device to handle the notification buffer and provides the
+         mount_notify() system call to enable/disable watchpoints.
+
 source "fs/quota/Kconfig"
 
 source "fs/autofs/Kconfig"
index 2ce5112b02c86719d546f711baf8980a855daf59..dd0d87e2ef19c8f2eb9d9094369ba86170e658ac 100644 (file)
@@ -22,6 +22,7 @@ obj-y +=      no-block.o
 endif
 
 obj-$(CONFIG_PROC_FS) += proc_namespace.o
+obj-$(CONFIG_MOUNT_NOTIFICATIONS) += mount_notify.o
 
 obj-y                          += notify/
 obj-$(CONFIG_EPOLL)            += eventpoll.o
index c7abb7b394d8eed9ca2833e656d345fc44e3a402..85456a5f5a3a5d70e7c2667e08d7214d68647033 100644 (file)
@@ -4,6 +4,7 @@
 #include <linux/poll.h>
 #include <linux/ns_common.h>
 #include <linux/fs_pin.h>
+#include <linux/watch_queue.h>
 
 struct mnt_namespace {
        atomic_t                count;
@@ -78,6 +79,9 @@ struct mount {
        int mnt_expiry_mark;            /* true if marked for expiry */
        struct hlist_head mnt_pins;
        struct hlist_head mnt_stuck_children;
+#ifdef CONFIG_MOUNT_NOTIFICATIONS
+       struct watch_list *mnt_watchers; /* Watches on dentries within this mount */
+#endif
 } __randomize_layout;
 
 #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */
@@ -159,3 +163,17 @@ static inline bool is_anon_ns(struct mnt_namespace *ns)
 }
 
 extern void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor);
+
+#ifdef CONFIG_MOUNT_NOTIFICATIONS
+extern void notify_mount(struct mount *triggered,
+                        struct mount *aux,
+                        enum mount_notification_subtype subtype,
+                        u32 info_flags);
+#else
+static inline void notify_mount(struct mount *triggered,
+                               struct mount *aux,
+                               enum mount_notification_subtype subtype,
+                               u32 info_flags)
+{
+}
+#endif
diff --git a/fs/mount_notify.c b/fs/mount_notify.c
new file mode 100644 (file)
index 0000000..44f570e
--- /dev/null
@@ -0,0 +1,222 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Provide mount topology/attribute change notifications.
+ *
+ * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/syscalls.h>
+#include <linux/slab.h>
+#include <linux/security.h>
+#include "mount.h"
+
+/*
+ * Post mount notifications to all watches going rootwards along the tree.
+ *
+ * Must be called with the mount_lock held.
+ */
+static void post_mount_notification(struct mount *changed,
+                                   struct mount_notification *notify)
+{
+       const struct cred *cred = current_cred();
+       struct path cursor;
+       struct mount *mnt;
+       unsigned seq;
+
+       seq = 0;
+       rcu_read_lock();
+restart:
+       cursor.mnt = &changed->mnt;
+       cursor.dentry = changed->mnt.mnt_root;
+       mnt = real_mount(cursor.mnt);
+       notify->watch.info &= ~NOTIFY_MOUNT_IN_SUBTREE;
+
+       read_seqbegin_or_lock(&rename_lock, &seq);
+       for (;;) {
+               if (mnt->mnt_watchers &&
+                   !hlist_empty(&mnt->mnt_watchers->watchers)) {
+                       if (cursor.dentry->d_flags & DCACHE_MOUNT_WATCH)
+                               post_watch_notification(mnt->mnt_watchers,
+                                                       &notify->watch, cred,
+                                                       (unsigned long)cursor.dentry);
+               } else {
+                       cursor.dentry = mnt->mnt.mnt_root;
+               }
+               notify->watch.info |= NOTIFY_MOUNT_IN_SUBTREE;
+
+               if (cursor.dentry == cursor.mnt->mnt_root ||
+                   IS_ROOT(cursor.dentry)) {
+                       struct mount *parent = READ_ONCE(mnt->mnt_parent);
+
+                       /* Escaped? */
+                       if (cursor.dentry != cursor.mnt->mnt_root)
+                               break;
+
+                       /* Global root? */
+                       if (mnt == parent)
+                               break;
+
+                       cursor.dentry = READ_ONCE(mnt->mnt_mountpoint);
+                       mnt = parent;
+                       cursor.mnt = &mnt->mnt;
+               } else {
+                       cursor.dentry = cursor.dentry->d_parent;
+               }
+       }
+
+       if (need_seqretry(&rename_lock, seq)) {
+               seq = 1;
+               goto restart;
+       }
+
+       done_seqretry(&rename_lock, seq);
+       rcu_read_unlock();
+}
+
+/*
+ * Generate a mount notification.
+ */
+void notify_mount(struct mount *trigger,
+                 struct mount *aux,
+                 enum mount_notification_subtype subtype,
+                 u32 info_flags)
+{
+
+       struct mount_notification n;
+
+       memset(&n, 0, sizeof(n));
+       n.watch.type    = WATCH_TYPE_MOUNT_NOTIFY;
+       n.watch.subtype = subtype;
+       n.watch.info    = info_flags | watch_sizeof(n);
+       n.triggered_on  = trigger->mnt_id;
+
+       switch (subtype) {
+       case NOTIFY_MOUNT_EXPIRY:
+       case NOTIFY_MOUNT_READONLY:
+       case NOTIFY_MOUNT_SETATTR:
+               break;
+
+       case NOTIFY_MOUNT_NEW_MOUNT:
+       case NOTIFY_MOUNT_UNMOUNT:
+       case NOTIFY_MOUNT_MOVE_FROM:
+       case NOTIFY_MOUNT_MOVE_TO:
+               n.auxiliary_mount       = aux->mnt_id;
+               break;
+
+       default:
+               BUG();
+       }
+
+       post_mount_notification(trigger, &n);
+}
+
+static void release_mount_watch(struct watch *watch)
+{
+       struct dentry *dentry = (struct dentry *)(unsigned long)watch->id;
+
+       dput(dentry);
+}
+
+/**
+ * sys_watch_mount - Watch for mount topology/attribute changes
+ * @dfd: Base directory to pathwalk from or fd referring to mount.
+ * @filename: Path to mount to place the watch upon
+ * @at_flags: Pathwalk control flags
+ * @watch_fd: The watch queue to send notifications to.
+ * @watch_id: The watch ID to be placed in the notification (-1 to remove watch)
+ */
+SYSCALL_DEFINE5(watch_mount,
+               int, dfd,
+               const char __user *, filename,
+               unsigned int, at_flags,
+               int, watch_fd,
+               int, watch_id)
+{
+       struct watch_queue *wqueue;
+       struct watch_list *wlist = NULL;
+       struct watch *watch = NULL;
+       struct mount *m;
+       struct path path;
+       unsigned int lookup_flags =
+               LOOKUP_DIRECTORY | LOOKUP_FOLLOW | LOOKUP_AUTOMOUNT;
+       int ret;
+
+       if (watch_id < -1 || watch_id > 0xff)
+               return -EINVAL;
+       if ((at_flags & ~(AT_NO_AUTOMOUNT | AT_EMPTY_PATH)) != 0)
+               return -EINVAL;
+       if (at_flags & AT_NO_AUTOMOUNT)
+               lookup_flags &= ~LOOKUP_AUTOMOUNT;
+       if (at_flags & AT_EMPTY_PATH)
+               lookup_flags |= LOOKUP_EMPTY;
+
+       ret = user_path_at(dfd, filename, lookup_flags, &path);
+       if (ret)
+               return ret;
+
+       ret = inode_permission(path.dentry->d_inode, MAY_EXEC);
+       if (ret)
+               goto err_path;
+
+       wqueue = get_watch_queue(watch_fd);
+       if (IS_ERR(wqueue))
+               goto err_path;
+
+       m = real_mount(path.mnt);
+
+       if (watch_id >= 0) {
+               ret = -ENOMEM;
+               if (!READ_ONCE(m->mnt_watchers)) {
+                       wlist = kzalloc(sizeof(*wlist), GFP_KERNEL);
+                       if (!wlist)
+                               goto err_wqueue;
+                       init_watch_list(wlist, release_mount_watch);
+               }
+
+               watch = kzalloc(sizeof(*watch), GFP_KERNEL);
+               if (!watch)
+                       goto err_wlist;
+
+               init_watch(watch, wqueue);
+               watch->id       = (unsigned long)path.dentry;
+               watch->info_id  = (u32)watch_id << WATCH_INFO_ID__SHIFT;
+
+               ret = security_watch_mount(watch, &path);
+               if (ret < 0)
+                       goto err_watch;
+
+               down_write(&m->mnt.mnt_sb->s_umount);
+               if (!m->mnt_watchers) {
+                       m->mnt_watchers = wlist;
+                       wlist = NULL;
+               }
+
+               ret = add_watch_to_object(watch, m->mnt_watchers);
+               if (ret == 0) {
+                       spin_lock(&path.dentry->d_lock);
+                       path.dentry->d_flags |= DCACHE_MOUNT_WATCH;
+                       spin_unlock(&path.dentry->d_lock);
+                       dget(path.dentry);
+                       watch = NULL;
+               }
+               up_write(&m->mnt.mnt_sb->s_umount);
+       } else {
+               down_write(&m->mnt.mnt_sb->s_umount);
+               ret = remove_watch_from_object(m->mnt_watchers, wqueue,
+                                              (unsigned long)path.dentry,
+                                              false);
+               up_write(&m->mnt.mnt_sb->s_umount);
+       }
+
+err_watch:
+       kfree(watch);
+err_wlist:
+       kfree(wlist);
+err_wqueue:
+       put_watch_queue(wqueue);
+err_path:
+       path_put(&path);
+       return ret;
+}
index 4a0f600a3328595d6630491e3077ee4209521adc..73ff5bf0c9afd7be364d141c02c21d9e98a5459e 100644 (file)
@@ -498,6 +498,9 @@ static int mnt_make_readonly(struct mount *mnt)
        smp_wmb();
        mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
        unlock_mount_hash();
+       if (ret == 0)
+               notify_mount(mnt, NULL, NOTIFY_MOUNT_READONLY,
+                            NOTIFY_MOUNT_IS_NOW_RO);
        return ret;
 }
 
@@ -506,6 +509,7 @@ static int __mnt_unmake_readonly(struct mount *mnt)
        lock_mount_hash();
        mnt->mnt.mnt_flags &= ~MNT_READONLY;
        unlock_mount_hash();
+       notify_mount(mnt, NULL, NOTIFY_MOUNT_READONLY, 0);
        return 0;
 }
 
@@ -835,6 +839,7 @@ static struct mountpoint *unhash_mnt(struct mount *mnt)
  */
 static void umount_mnt(struct mount *mnt)
 {
+       notify_mount(mnt->mnt_parent, mnt, NOTIFY_MOUNT_UNMOUNT, 0);
        put_mountpoint(unhash_mnt(mnt));
 }
 
@@ -1175,6 +1180,11 @@ static void mntput_no_expire(struct mount *mnt)
        mnt->mnt.mnt_flags |= MNT_DOOMED;
        rcu_read_unlock();
 
+#ifdef CONFIG_MOUNT_NOTIFICATIONS
+       if (mnt->mnt_watchers)
+               remove_watch_list(mnt->mnt_watchers, mnt->mnt_id);
+#endif
+
        list_del(&mnt->mnt_instance);
 
        if (unlikely(!list_empty(&mnt->mnt_mounts))) {
@@ -1503,6 +1513,7 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
                p = list_first_entry(&tmp_list, struct mount, mnt_list);
                list_del_init(&p->mnt_expire);
                list_del_init(&p->mnt_list);
+
                ns = p->mnt_ns;
                if (ns) {
                        ns->mounts--;
@@ -2137,7 +2148,10 @@ static int attach_recursive_mnt(struct mount *source_mnt,
        }
        if (moving) {
                unhash_mnt(source_mnt);
+               notify_mount(source_mnt->mnt_parent, source_mnt,
+                            NOTIFY_MOUNT_MOVE_FROM, 0);
                attach_mnt(source_mnt, dest_mnt, dest_mp);
+               notify_mount(dest_mnt, source_mnt, NOTIFY_MOUNT_MOVE_TO, 0);
                touch_mnt_namespace(source_mnt->mnt_ns);
        } else {
                if (source_mnt->mnt_ns) {
@@ -2146,6 +2160,11 @@ static int attach_recursive_mnt(struct mount *source_mnt,
                }
                mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
                commit_tree(source_mnt);
+               notify_mount(dest_mnt, source_mnt, NOTIFY_MOUNT_NEW_MOUNT,
+                            (source_mnt->mnt.mnt_sb->s_flags & SB_RDONLY ?
+                             NOTIFY_MOUNT_IS_NOW_RO : 0) |
+                            (source_mnt->mnt.mnt_sb->s_flags & SB_SUBMOUNT ?
+                             NOTIFY_MOUNT_IS_SUBMOUNT : 0));
        }
 
        hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
@@ -2522,6 +2541,8 @@ static void set_mount_attributes(struct mount *mnt, unsigned int mnt_flags)
        mnt->mnt.mnt_flags = mnt_flags;
        touch_mnt_namespace(mnt->mnt_ns);
        unlock_mount_hash();
+       notify_mount(mnt, NULL, NOTIFY_MOUNT_SETATTR,
+                    (mnt_flags & SB_RDONLY ? NOTIFY_MOUNT_IS_NOW_RO : 0));
 }
 
 static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount *mnt)
@@ -2992,6 +3013,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
                        propagate_mount_busy(mnt, 1))
                        continue;
                list_move(&mnt->mnt_expire, &graveyard);
+               notify_mount(mnt, NULL, NOTIFY_MOUNT_EXPIRY, 0);
        }
        while (!list_empty(&graveyard)) {
                mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
index a81f0c3cf352994fcc6c00c3422a8ee12f1979d8..a94c551c62a3cc46b51e5bbfcadccab57eecf389 100644 (file)
@@ -219,6 +219,7 @@ struct dentry_operations {
 #define DCACHE_PAR_LOOKUP              0x10000000 /* being looked up (with parent locked shared) */
 #define DCACHE_DENTRY_CURSOR           0x20000000
 #define DCACHE_NORCU                   0x40000000 /* No RCU delay for freeing */
+#define DCACHE_MOUNT_WATCH             0x80000000 /* There's a mount watch here */
 
 extern seqlock_t rename_lock;
 
index b951a87da9877c4c664c7244760a39bd87b83589..88d03fd627ab7591e3469cdec4503855dc5a31b0 100644 (file)
@@ -1005,6 +1005,8 @@ asmlinkage long sys_pidfd_send_signal(int pidfd, int sig,
                                       siginfo_t __user *info,
                                       unsigned int flags);
 asmlinkage long sys_pidfd_getfd(int pidfd, int fd, unsigned int flags);
+asmlinkage long sys_watch_mount(int dfd, const char __user *path,
+                               unsigned int at_flags, int watch_fd, int watch_id);
 
 /*
  * Architecture-specific system calls
index f4a01305d9a65c14fe46652970ec3195a8bce61c..fcdca8c7d30ac84f8d23e6e53027a51f7062d666 100644 (file)
@@ -857,9 +857,11 @@ __SYSCALL(__NR_openat2, sys_openat2)
 __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
 #define __NR_faccessat2 439
 __SYSCALL(__NR_faccessat2, sys_faccessat2)
+#define __NR_watch_mount 440
+__SYSCALL(__NR_watch_mount, sys_watch_mount)
 
 #undef __NR_syscalls
-#define __NR_syscalls 440
+#define __NR_syscalls 441
 
 /*
  * 32 bit systems traditionally used different
index c3d8320b5d3a6a2f7bc25b178391dcaeabaef6ff..83b11242c10e8defe0152e3ed5526ea45caabfd3 100644 (file)
@@ -14,7 +14,8 @@
 enum watch_notification_type {
        WATCH_TYPE_META         = 0,    /* Special record */
        WATCH_TYPE_KEY_NOTIFY   = 1,    /* Key change event notification */
-       WATCH_TYPE__NR          = 2
+       WATCH_TYPE_MOUNT_NOTIFY = 2,    /* Mount topology change notification */
+       WATCH_TYPE___NR         = 3
 };
 
 enum watch_meta_notification_subtype {
@@ -101,4 +102,32 @@ struct key_notification {
        __u32   aux;            /* Per-type auxiliary data */
 };
 
+/*
+ * Type of mount topology change notification.
+ */
+enum mount_notification_subtype {
+       NOTIFY_MOUNT_NEW_MOUNT  = 0, /* New mount added */
+       NOTIFY_MOUNT_UNMOUNT    = 1, /* Mount removed manually */
+       NOTIFY_MOUNT_EXPIRY     = 2, /* Automount expired */
+       NOTIFY_MOUNT_READONLY   = 3, /* Mount R/O state changed */
+       NOTIFY_MOUNT_SETATTR    = 4, /* Mount attributes changed */
+       NOTIFY_MOUNT_MOVE_FROM  = 5, /* Mount moved from here */
+       NOTIFY_MOUNT_MOVE_TO    = 6, /* Mount moved to here (compare op_id) */
+};
+
+#define NOTIFY_MOUNT_IN_SUBTREE                WATCH_INFO_FLAG_0 /* Event not actually at watched dentry */
+#define NOTIFY_MOUNT_IS_NOW_RO         WATCH_INFO_FLAG_1 /* Mount changed to R/O */
+#define NOTIFY_MOUNT_IS_SUBMOUNT       WATCH_INFO_FLAG_2 /* New mount is submount */
+
+/*
+ * Mount topology/configuration change notification record.
+ * - watch.type = WATCH_TYPE_MOUNT_NOTIFY
+ * - watch.subtype = enum mount_notification_subtype
+ */
+struct mount_notification {
+       struct watch_notification watch; /* WATCH_TYPE_MOUNT_NOTIFY */
+       __u64   triggered_on;           /* The mount that triggered the notification */
+       __u64   auxiliary_mount;        /* Added/moved/removed mount or 0 */
+};
+
 #endif /* _UAPI_LINUX_WATCH_QUEUE_H */
index 3b69a560a7ac567020c714c27b8241fe2dffc8ce..3e1c5c9d2efedd38590e7a7b441944ae3c7442e0 100644 (file)
@@ -85,6 +85,9 @@ COND_SYSCALL(ioprio_get);
 /* fs/locks.c */
 COND_SYSCALL(flock);
 
+/* fs/mount_notify.c */
+COND_SYSCALL(watch_mount);
+
 /* fs/namei.c */
 
 /* fs/namespace.c */