]> www.infradead.org Git - nvme.git/commitdiff
bcachefs: BCH_IOCTL_FSCK_OFFLINE
authorKent Overstreet <kent.overstreet@linux.dev>
Wed, 12 Jul 2023 03:23:40 +0000 (23:23 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Mon, 1 Jan 2024 16:47:40 +0000 (11:47 -0500)
This adds a new ioctl for running fsck on a list of devices.

Normally, if we wish to use the kernel's implementation of fsck we'd run
it at mount time with -o fsck. This ioctl lets us run fsck without
mounting, so that userspace bcachefs-tools can transparently switch to
the kernel's implementation of fsck when appropriate - primarily if the
kernel version of bcachefs better matches the filesystem on disk.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/bcachefs_ioctl.h
fs/bcachefs/chardev.c

index 43822c17297c697c070717aa34458a88e830a676..07c490851742bf611497fe115d3202f3bff44dda 100644 (file)
@@ -83,6 +83,8 @@ struct bch_ioctl_incremental {
 
 #define BCH_IOCTL_DEV_USAGE_V2 _IOWR(0xbc,     18, struct bch_ioctl_dev_usage_v2)
 
+#define BCH_IOCTL_FSCK_OFFLINE         _IOW(0xbc,      19,  struct bch_ioctl_fsck_offline)
+
 /* ioctl below act on a particular file, not the filesystem as a whole: */
 
 #define BCHFS_IOC_REINHERIT_ATTRS      _IOR(0xbc, 64, const char __user *)
@@ -386,4 +388,15 @@ struct bch_ioctl_subvolume {
 #define BCH_SUBVOL_SNAPSHOT_CREATE     (1U << 0)
 #define BCH_SUBVOL_SNAPSHOT_RO         (1U << 1)
 
+/*
+ * BCH_IOCTL_FSCK_OFFLINE: run fsck from the 'bcachefs fsck' userspace command,
+ * but with the kernel's implementation of fsck:
+ */
+struct bch_ioctl_fsck_offline {
+       __u64                   flags;
+       __u64                   opts;           /* string */
+       __u64                   nr_devs;
+       __u64                   devs[0];
+};
+
 #endif /* _BCACHEFS_IOCTL_H */
index 30a70cef11743ed2596808601037088847151bc6..78bcfa4cb48f088a11a5fda0578b8e7481962b3a 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/ioctl.h>
 #include <linux/kthread.h>
 #include <linux/major.h>
+#include <linux/poll.h>
 #include <linux/sched/task.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
@@ -32,12 +33,15 @@ static int copy_to_user_errcode(void __user *to, const void *from, unsigned long
 struct thread_with_file {
        struct task_struct      *task;
        int                     ret;
+       bool                    done;
 };
 
 static void thread_with_file_exit(struct thread_with_file *thr)
 {
-       kthread_stop(thr->task);
-       put_task_struct(thr->task);
+       if (thr->task) {
+               kthread_stop(thr->task);
+               put_task_struct(thr->task);
+       }
 }
 
 __printf(4, 0)
@@ -194,8 +198,208 @@ static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg
 }
 #endif
 
+struct fsck_thread {
+       struct thread_with_file thr;
+       struct printbuf         buf;
+       char                    **devs;
+       size_t                  nr_devs;
+       struct bch_opts         opts;
+
+       struct log_output       output;
+       DARRAY(char)            output2;
+};
+
+static void bch2_fsck_thread_free(struct fsck_thread *thr)
+{
+       thread_with_file_exit(&thr->thr);
+       if (thr->devs)
+               for (size_t i = 0; i < thr->nr_devs; i++)
+                       kfree(thr->devs[i]);
+       darray_exit(&thr->output2);
+       printbuf_exit(&thr->output.buf);
+       kfree(thr->devs);
+       kfree(thr);
+}
+
+static int bch2_fsck_thread_release(struct inode *inode, struct file *file)
+{
+       struct fsck_thread *thr = container_of(file->private_data, struct fsck_thread, thr);
+
+       bch2_fsck_thread_free(thr);
+       return 0;
+}
+
+static bool fsck_thread_ready(struct fsck_thread *thr)
+{
+       return thr->output.buf.pos ||
+               thr->output2.nr ||
+               thr->thr.done;
+}
+
+static ssize_t bch2_fsck_thread_read(struct file *file, char __user *buf,
+                                    size_t len, loff_t *ppos)
+{
+       struct fsck_thread *thr = container_of(file->private_data, struct fsck_thread, thr);
+       size_t copied = 0, b;
+       int ret = 0;
+
+       if ((file->f_flags & O_NONBLOCK) &&
+           !fsck_thread_ready(thr))
+               return -EAGAIN;
+
+       ret = wait_event_interruptible(thr->output.wait,
+                       fsck_thread_ready(thr));
+       if (ret)
+               return ret;
+
+       if (thr->thr.done)
+               return 0;
+
+       while (len) {
+               ret = darray_make_room(&thr->output2, thr->output.buf.pos);
+               if (ret)
+                       break;
+
+               spin_lock_irq(&thr->output.lock);
+               b = min_t(size_t, darray_room(thr->output2), thr->output.buf.pos);
+
+               memcpy(&darray_top(thr->output2), thr->output.buf.buf, b);
+               memmove(thr->output.buf.buf,
+                       thr->output.buf.buf + b,
+                       thr->output.buf.pos - b);
+
+               thr->output2.nr += b;
+               thr->output.buf.pos -= b;
+               spin_unlock_irq(&thr->output.lock);
+
+               b = min(len, thr->output2.nr);
+               if (!b)
+                       break;
+
+               b -= copy_to_user(buf, thr->output2.data, b);
+               if (!b) {
+                       ret = -EFAULT;
+                       break;
+               }
+
+               copied  += b;
+               buf     += b;
+               len     -= b;
+
+               memmove(thr->output2.data,
+                       thr->output2.data + b,
+                       thr->output2.nr - b);
+               thr->output2.nr -= b;
+       }
+
+       return copied ?: ret;
+}
+
+static __poll_t bch2_fsck_thread_poll(struct file *file, struct poll_table_struct *wait)
+{
+       struct fsck_thread *thr = container_of(file->private_data, struct fsck_thread, thr);
+
+       poll_wait(file, &thr->output.wait, wait);
+
+       return fsck_thread_ready(thr)
+               ? EPOLLIN|EPOLLHUP
+               : 0;
+}
+
+static const struct file_operations fsck_thread_ops = {
+       .release        = bch2_fsck_thread_release,
+       .read           = bch2_fsck_thread_read,
+       .poll           = bch2_fsck_thread_poll,
+       .llseek         = no_llseek,
+};
+
+static int bch2_fsck_offline_thread_fn(void *arg)
+{
+       struct fsck_thread *thr = container_of(arg, struct fsck_thread, thr);
+       struct bch_fs *c = bch2_fs_open(thr->devs, thr->nr_devs, thr->opts);
+
+       thr->thr.ret = PTR_ERR_OR_ZERO(c);
+       if (!thr->thr.ret)
+               bch2_fs_stop(c);
+
+       thr->thr.done = true;
+       wake_up(&thr->output.wait);
+       return 0;
+}
+
+static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg)
+{
+       struct bch_ioctl_fsck_offline arg;
+       struct fsck_thread *thr = NULL;
+       u64 *devs = NULL;
+       long ret = 0;
+
+       if (copy_from_user(&arg, user_arg, sizeof(arg)))
+               return -EFAULT;
+
+       if (arg.flags)
+               return -EINVAL;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       if (!(devs = kcalloc(arg.nr_devs, sizeof(*devs), GFP_KERNEL)) ||
+           !(thr = kzalloc(sizeof(*thr), GFP_KERNEL)) ||
+           !(thr->devs = kcalloc(arg.nr_devs, sizeof(*thr->devs), GFP_KERNEL))) {
+               ret = -ENOMEM;
+               goto err;
+       }
+
+       thr->nr_devs = arg.nr_devs;
+       thr->output.buf = PRINTBUF;
+       thr->output.buf.atomic++;
+       spin_lock_init(&thr->output.lock);
+       init_waitqueue_head(&thr->output.wait);
+       darray_init(&thr->output2);
+
+       if (copy_from_user(devs, &user_arg->devs[0], sizeof(user_arg->devs[0]) * arg.nr_devs)) {
+               ret = -EINVAL;
+               goto err;
+       }
+
+       for (size_t i = 0; i < arg.nr_devs; i++) {
+               thr->devs[i] = strndup_user((char __user *)(unsigned long) devs[i], PATH_MAX);
+               ret = PTR_ERR_OR_ZERO(thr->devs[i]);
+               if (ret)
+                       goto err;
+       }
+
+       if (arg.opts) {
+               char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16);
+
+               ret =   PTR_ERR_OR_ZERO(optstr) ?:
+                       bch2_parse_mount_opts(NULL, &thr->opts, optstr);
+               kfree(optstr);
+
+               if (ret)
+                       goto err;
+       }
+
+       opt_set(thr->opts, log_output, (u64)(unsigned long)&thr->output);
+
+       ret = run_thread_with_file(&thr->thr,
+                                  &fsck_thread_ops,
+                                  bch2_fsck_offline_thread_fn,
+                                  "bch-fsck");
+err:
+       if (ret < 0) {
+               if (thr)
+                       bch2_fsck_thread_free(thr);
+               pr_err("ret %s", bch2_err_str(ret));
+       }
+       kfree(devs);
+       return ret;
+}
+
 static long bch2_global_ioctl(unsigned cmd, void __user *arg)
 {
+       long ret;
+
        switch (cmd) {
 #if 0
        case BCH_IOCTL_ASSEMBLE:
@@ -203,9 +407,18 @@ static long bch2_global_ioctl(unsigned cmd, void __user *arg)
        case BCH_IOCTL_INCREMENTAL:
                return bch2_ioctl_incremental(arg);
 #endif
+       case BCH_IOCTL_FSCK_OFFLINE: {
+               ret = bch2_ioctl_fsck_offline(arg);
+               break;
+       }
        default:
-               return -ENOTTY;
+               ret = -ENOTTY;
+               break;
        }
+
+       if (ret < 0)
+               ret = bch2_err_class(ret);
+       return ret;
 }
 
 static long bch2_ioctl_query_uuid(struct bch_fs *c,