]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
bcachefs: Discard, invalidate workers are now per device
authorKent Overstreet <kent.overstreet@linux.dev>
Sun, 23 Jun 2024 04:53:44 +0000 (00:53 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Tue, 25 Jun 2024 22:47:55 +0000 (18:47 -0400)
There's no reason for discards to be single threaded across all devices;
this will improve performance on multi device setups.

Additionally, making them per-device simplifies the refcounting on
bch_dev->io_ref; we now hold it for the duration that the discard path
is running, which fixes a race between the discard path and device
removal.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/alloc_background.c
fs/bcachefs/alloc_background.h
fs/bcachefs/alloc_foreground.c
fs/bcachefs/bcachefs.h
fs/bcachefs/super.c

index 8dec2c6cbb7eb05c22a7ee543c007187aa070c2c..1de9fac3bcf4f77f627028bc9cb0a586b039a518 100644 (file)
@@ -29,7 +29,7 @@
 #include <linux/sched/task.h>
 #include <linux/sort.h>
 
-static void bch2_discard_one_bucket_fast(struct bch_fs *c, struct bpos bucket);
+static void bch2_discard_one_bucket_fast(struct bch_dev *, u64);
 
 /* Persistent alloc info: */
 
@@ -893,12 +893,12 @@ int bch2_trigger_alloc(struct btree_trans *trans,
                if (statechange(a->data_type == BCH_DATA_need_discard) &&
                    !bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset) &&
                    bucket_flushed(new_a))
-                       bch2_discard_one_bucket_fast(c, new.k->p);
+                       bch2_discard_one_bucket_fast(ca, new.k->p.offset);
 
                if (statechange(a->data_type == BCH_DATA_cached) &&
                    !bch2_bucket_is_open(c, new.k->p.inode, new.k->p.offset) &&
                    should_invalidate_buckets(ca, bch2_dev_usage_read(ca)))
-                       bch2_do_invalidates(c);
+                       bch2_dev_do_invalidates(ca);
 
                if (statechange(a->data_type == BCH_DATA_need_gc_gens))
                        bch2_gc_gens_async(c);
@@ -1636,34 +1636,38 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
        return ret;
 }
 
-static int discard_in_flight_add(struct bch_fs *c, struct bpos bucket)
+static int discard_in_flight_add(struct bch_dev *ca, u64 bucket, bool in_progress)
 {
        int ret;
 
-       mutex_lock(&c->discard_buckets_in_flight_lock);
-       darray_for_each(c->discard_buckets_in_flight, i)
-               if (bkey_eq(*i, bucket)) {
+       mutex_lock(&ca->discard_buckets_in_flight_lock);
+       darray_for_each(ca->discard_buckets_in_flight, i)
+               if (i->bucket == bucket) {
                        ret = -BCH_ERR_EEXIST_discard_in_flight_add;
                        goto out;
                }
 
-       ret = darray_push(&c->discard_buckets_in_flight, bucket);
+       ret = darray_push(&ca->discard_buckets_in_flight, ((struct discard_in_flight) {
+                          .in_progress = in_progress,
+                          .bucket      = bucket,
+       }));
 out:
-       mutex_unlock(&c->discard_buckets_in_flight_lock);
+       mutex_unlock(&ca->discard_buckets_in_flight_lock);
        return ret;
 }
 
-static void discard_in_flight_remove(struct bch_fs *c, struct bpos bucket)
+static void discard_in_flight_remove(struct bch_dev *ca, u64 bucket)
 {
-       mutex_lock(&c->discard_buckets_in_flight_lock);
-       darray_for_each(c->discard_buckets_in_flight, i)
-               if (bkey_eq(*i, bucket)) {
-                       darray_remove_item(&c->discard_buckets_in_flight, i);
+       mutex_lock(&ca->discard_buckets_in_flight_lock);
+       darray_for_each(ca->discard_buckets_in_flight, i)
+               if (i->bucket == bucket) {
+                       BUG_ON(!i->in_progress);
+                       darray_remove_item(&ca->discard_buckets_in_flight, i);
                        goto found;
                }
        BUG();
 found:
-       mutex_unlock(&c->discard_buckets_in_flight_lock);
+       mutex_unlock(&ca->discard_buckets_in_flight_lock);
 }
 
 struct discard_buckets_state {
@@ -1671,26 +1675,11 @@ struct discard_buckets_state {
        u64             open;
        u64             need_journal_commit;
        u64             discarded;
-       struct bch_dev  *ca;
        u64             need_journal_commit_this_dev;
 };
 
-static void discard_buckets_next_dev(struct bch_fs *c, struct discard_buckets_state *s, struct bch_dev *ca)
-{
-       if (s->ca == ca)
-               return;
-
-       if (s->ca && s->need_journal_commit_this_dev >
-           bch2_dev_usage_read(s->ca).d[BCH_DATA_free].buckets)
-               bch2_journal_flush_async(&c->journal, NULL);
-
-       if (s->ca)
-               percpu_ref_put(&s->ca->io_ref);
-       s->ca = ca;
-       s->need_journal_commit_this_dev = 0;
-}
-
 static int bch2_discard_one_bucket(struct btree_trans *trans,
+                                  struct bch_dev *ca,
                                   struct btree_iter *need_discard_iter,
                                   struct bpos *discard_pos_done,
                                   struct discard_buckets_state *s)
@@ -1704,16 +1693,6 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
        bool discard_locked = false;
        int ret = 0;
 
-       struct bch_dev *ca = s->ca && s->ca->dev_idx == pos.inode
-               ? s->ca
-               : bch2_dev_get_ioref(c, pos.inode, WRITE);
-       if (!ca) {
-               bch2_btree_iter_set_pos(need_discard_iter, POS(pos.inode + 1, 0));
-               return 0;
-       }
-
-       discard_buckets_next_dev(c, s, ca);
-
        if (bch2_bucket_is_open_safe(c, pos.inode, pos.offset)) {
                s->open++;
                goto out;
@@ -1773,7 +1752,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
                goto out;
        }
 
-       if (discard_in_flight_add(c, SPOS(iter.pos.inode, iter.pos.offset, true)))
+       if (discard_in_flight_add(ca, iter.pos.offset, true))
                goto out;
 
        discard_locked = true;
@@ -1811,7 +1790,7 @@ write:
        s->discarded++;
 out:
        if (discard_locked)
-               discard_in_flight_remove(c, iter.pos);
+               discard_in_flight_remove(ca, iter.pos.offset);
        s->seen++;
        bch2_trans_iter_exit(trans, &iter);
        printbuf_exit(&buf);
@@ -1820,7 +1799,8 @@ out:
 
 static void bch2_do_discards_work(struct work_struct *work)
 {
-       struct bch_fs *c = container_of(work, struct bch_fs, discard_work);
+       struct bch_dev *ca = container_of(work, struct bch_dev, discard_work);
+       struct bch_fs *c = ca->fs;
        struct discard_buckets_state s = {};
        struct bpos discard_pos_done = POS_MAX;
        int ret;
@@ -1831,23 +1811,41 @@ static void bch2_do_discards_work(struct work_struct *work)
         * successful commit:
         */
        ret = bch2_trans_run(c,
-               for_each_btree_key(trans, iter,
-                                  BTREE_ID_need_discard, POS_MIN, 0, k,
-                       bch2_discard_one_bucket(trans, &iter, &discard_pos_done, &s)));
-
-       discard_buckets_next_dev(c, &s, NULL);
+               for_each_btree_key_upto(trans, iter,
+                                  BTREE_ID_need_discard,
+                                  POS(ca->dev_idx, 0),
+                                  POS(ca->dev_idx, U64_MAX), 0, k,
+                       bch2_discard_one_bucket(trans, ca, &iter, &discard_pos_done, &s)));
 
        trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded,
                              bch2_err_str(ret));
 
        bch2_write_ref_put(c, BCH_WRITE_REF_discard);
+       percpu_ref_put(&ca->io_ref);
+}
+
+void bch2_dev_do_discards(struct bch_dev *ca)
+{
+       struct bch_fs *c = ca->fs;
+
+       if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
+               return;
+
+       if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard))
+               goto put_ioref;
+
+       if (queue_work(c->write_ref_wq, &ca->discard_work))
+               return;
+
+       bch2_write_ref_put(c, BCH_WRITE_REF_discard);
+put_ioref:
+       percpu_ref_put(&ca->io_ref);
 }
 
 void bch2_do_discards(struct bch_fs *c)
 {
-       if (bch2_write_ref_tryget(c, BCH_WRITE_REF_discard) &&
-           !queue_work(c->write_ref_wq, &c->discard_work))
-               bch2_write_ref_put(c, BCH_WRITE_REF_discard);
+       for_each_member_device(c, ca)
+               bch2_dev_do_discards(ca);
 }
 
 static int bch2_clear_bucket_needs_discard(struct btree_trans *trans, struct bpos bucket)
@@ -1876,68 +1874,69 @@ err:
 
 static void bch2_do_discards_fast_work(struct work_struct *work)
 {
-       struct bch_fs *c = container_of(work, struct bch_fs, discard_fast_work);
+       struct bch_dev *ca = container_of(work, struct bch_dev, discard_fast_work);
+       struct bch_fs *c = ca->fs;
 
        while (1) {
                bool got_bucket = false;
-               struct bpos bucket;
-               struct bch_dev *ca;
+               u64 bucket;
 
-               mutex_lock(&c->discard_buckets_in_flight_lock);
-               darray_for_each(c->discard_buckets_in_flight, i) {
-                       if (i->snapshot)
+               mutex_lock(&ca->discard_buckets_in_flight_lock);
+               darray_for_each(ca->discard_buckets_in_flight, i) {
+                       if (i->in_progress)
                                continue;
 
-                       ca = bch2_dev_get_ioref(c, i->inode, WRITE);
-                       if (!ca) {
-                               darray_remove_item(&c->discard_buckets_in_flight, i);
-                               continue;
-                       }
-
                        got_bucket = true;
-                       bucket = *i;
-                       i->snapshot = true;
+                       bucket = i->bucket;
+                       i->in_progress = true;
                        break;
                }
-               mutex_unlock(&c->discard_buckets_in_flight_lock);
+               mutex_unlock(&ca->discard_buckets_in_flight_lock);
 
                if (!got_bucket)
                        break;
 
                if (ca->mi.discard && !c->opts.nochanges)
                        blkdev_issue_discard(ca->disk_sb.bdev,
-                                            bucket.offset * ca->mi.bucket_size,
+                                            bucket_to_sector(ca, bucket),
                                             ca->mi.bucket_size,
                                             GFP_KERNEL);
 
                int ret = bch2_trans_do(c, NULL, NULL,
-                                       BCH_WATERMARK_btree|
-                                       BCH_TRANS_COMMIT_no_enospc,
-                                       bch2_clear_bucket_needs_discard(trans, bucket));
+                       BCH_WATERMARK_btree|
+                       BCH_TRANS_COMMIT_no_enospc,
+                       bch2_clear_bucket_needs_discard(trans, POS(ca->dev_idx, bucket)));
                bch_err_fn(c, ret);
 
-               percpu_ref_put(&ca->io_ref);
-               discard_in_flight_remove(c, bucket);
+               discard_in_flight_remove(ca, bucket);
 
                if (ret)
                        break;
        }
 
        bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
+       percpu_ref_put(&ca->io_ref);
 }
 
-static void bch2_discard_one_bucket_fast(struct bch_fs *c, struct bpos bucket)
+static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket)
 {
-       rcu_read_lock();
-       struct bch_dev *ca = bch2_dev_rcu(c, bucket.inode);
-       bool dead = !ca || percpu_ref_is_dying(&ca->io_ref);
-       rcu_read_unlock();
+       struct bch_fs *c = ca->fs;
+
+       if (discard_in_flight_add(ca, bucket, false))
+               return;
+
+       if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
+               return;
+
+       if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast))
+               goto put_ioref;
+
+       if (queue_work(c->write_ref_wq, &ca->discard_fast_work))
+               return;
 
-       if (!dead &&
-           !discard_in_flight_add(c, bucket) &&
-           bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast) &&
-           !queue_work(c->write_ref_wq, &c->discard_fast_work))
-               bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
+       bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
+put_ioref:
+       percpu_ref_put(&ca->io_ref);
 }
 
 static int invalidate_one_bucket(struct btree_trans *trans,
@@ -2038,7 +2037,8 @@ again:
 
 static void bch2_do_invalidates_work(struct work_struct *work)
 {
-       struct bch_fs *c = container_of(work, struct bch_fs, invalidate_work);
+       struct bch_dev *ca = container_of(work, struct bch_dev, invalidate_work);
+       struct bch_fs *c = ca->fs;
        struct btree_trans *trans = bch2_trans_get(c);
        int ret = 0;
 
@@ -2046,52 +2046,63 @@ static void bch2_do_invalidates_work(struct work_struct *work)
        if (ret)
                goto err;
 
-       for_each_member_device(c, ca) {
-               s64 nr_to_invalidate =
-                       should_invalidate_buckets(ca, bch2_dev_usage_read(ca));
-               struct btree_iter iter;
-               bool wrapped = false;
-
-               bch2_trans_iter_init(trans, &iter, BTREE_ID_lru,
-                                    lru_pos(ca->dev_idx, 0,
-                                            ((bch2_current_io_time(c, READ) + U32_MAX) &
-                                             LRU_TIME_MAX)), 0);
-
-               while (true) {
-                       bch2_trans_begin(trans);
-
-                       struct bkey_s_c k = next_lru_key(trans, &iter, ca, &wrapped);
-                       ret = bkey_err(k);
-                       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-                               continue;
-                       if (ret)
-                               break;
-                       if (!k.k)
-                               break;
+       s64 nr_to_invalidate =
+               should_invalidate_buckets(ca, bch2_dev_usage_read(ca));
+       struct btree_iter iter;
+       bool wrapped = false;
 
-                       ret = invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate);
-                       if (ret)
-                               break;
+       bch2_trans_iter_init(trans, &iter, BTREE_ID_lru,
+                            lru_pos(ca->dev_idx, 0,
+                                    ((bch2_current_io_time(c, READ) + U32_MAX) &
+                                     LRU_TIME_MAX)), 0);
 
-                       bch2_btree_iter_advance(&iter);
-               }
-               bch2_trans_iter_exit(trans, &iter);
+       while (true) {
+               bch2_trans_begin(trans);
 
-               if (ret < 0) {
-                       bch2_dev_put(ca);
+               struct bkey_s_c k = next_lru_key(trans, &iter, ca, &wrapped);
+               ret = bkey_err(k);
+               if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+                       continue;
+               if (ret)
                        break;
-               }
+               if (!k.k)
+                       break;
+
+               ret = invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate);
+               if (ret)
+                       break;
+
+               bch2_btree_iter_advance(&iter);
        }
+       bch2_trans_iter_exit(trans, &iter);
 err:
        bch2_trans_put(trans);
        bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
+       percpu_ref_put(&ca->io_ref);
+}
+
+void bch2_dev_do_invalidates(struct bch_dev *ca)
+{
+       struct bch_fs *c = ca->fs;
+
+       if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
+               return;
+
+       if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate))
+               goto put_ioref;
+
+       if (queue_work(c->write_ref_wq, &ca->invalidate_work))
+               return;
+
+       bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
+put_ioref:
+       percpu_ref_put(&ca->io_ref);
 }
 
 void bch2_do_invalidates(struct bch_fs *c)
 {
-       if (bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate) &&
-           !queue_work(c->write_ref_wq, &c->invalidate_work))
-               bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
+       for_each_member_device(c, ca)
+               bch2_dev_do_invalidates(ca);
 }
 
 int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca,
@@ -2407,16 +2418,20 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca)
                        set_bit(ca->dev_idx, c->rw_devs[i].d);
 }
 
-void bch2_fs_allocator_background_exit(struct bch_fs *c)
+void bch2_dev_allocator_background_exit(struct bch_dev *ca)
+{
+       darray_exit(&ca->discard_buckets_in_flight);
+}
+
+void bch2_dev_allocator_background_init(struct bch_dev *ca)
 {
-       darray_exit(&c->discard_buckets_in_flight);
+       mutex_init(&ca->discard_buckets_in_flight_lock);
+       INIT_WORK(&ca->discard_work, bch2_do_discards_work);
+       INIT_WORK(&ca->discard_fast_work, bch2_do_discards_fast_work);
+       INIT_WORK(&ca->invalidate_work, bch2_do_invalidates_work);
 }
 
 void bch2_fs_allocator_background_init(struct bch_fs *c)
 {
        spin_lock_init(&c->freelist_lock);
-       mutex_init(&c->discard_buckets_in_flight_lock);
-       INIT_WORK(&c->discard_work, bch2_do_discards_work);
-       INIT_WORK(&c->discard_fast_work, bch2_do_discards_fast_work);
-       INIT_WORK(&c->invalidate_work, bch2_do_invalidates_work);
 }
index c3cc3c5ba5b63fc97d073b233dd7ef72521d0365..ba2c5557a3f0e506ac7c6c84d5e683fca3ce9c68 100644 (file)
@@ -275,6 +275,7 @@ int bch2_trigger_alloc(struct btree_trans *, enum btree_id, unsigned,
                       enum btree_iter_update_trigger_flags);
 int bch2_check_alloc_info(struct bch_fs *);
 int bch2_check_alloc_to_lru_refs(struct bch_fs *);
+void bch2_dev_do_discards(struct bch_dev *);
 void bch2_do_discards(struct bch_fs *);
 
 static inline u64 should_invalidate_buckets(struct bch_dev *ca,
@@ -289,6 +290,7 @@ static inline u64 should_invalidate_buckets(struct bch_dev *ca,
        return clamp_t(s64, want_free - free, 0, u.d[BCH_DATA_cached].buckets);
 }
 
+void bch2_dev_do_invalidates(struct bch_dev *);
 void bch2_do_invalidates(struct bch_fs *);
 
 static inline struct bch_backpointer *alloc_v4_backpointers(struct bch_alloc_v4 *a)
@@ -312,7 +314,9 @@ u64 bch2_min_rw_member_capacity(struct bch_fs *);
 void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *);
 void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
 
-void bch2_fs_allocator_background_exit(struct bch_fs *);
+void bch2_dev_allocator_background_exit(struct bch_dev *);
+void bch2_dev_allocator_background_init(struct bch_dev *);
+
 void bch2_fs_allocator_background_init(struct bch_fs *);
 
 #endif /* _BCACHEFS_ALLOC_BACKGROUND_H */
index 927a5f300b30e8ce7b3a9c3ba5d83c5ede8ba86a..9d3d64746a5be625dfbf5168c079b3f5bfe61378 100644 (file)
@@ -621,13 +621,13 @@ again:
        avail = dev_buckets_free(ca, *usage, watermark);
 
        if (usage->d[BCH_DATA_need_discard].buckets > avail)
-               bch2_do_discards(c);
+               bch2_dev_do_discards(ca);
 
        if (usage->d[BCH_DATA_need_gc_gens].buckets > avail)
                bch2_gc_gens_async(c);
 
        if (should_invalidate_buckets(ca, *usage))
-               bch2_do_invalidates(c);
+               bch2_dev_do_invalidates(ca);
 
        if (!avail) {
                if (cl && !waiting) {
index a6b83ecab7ce5041752602fac3be1d5ff0dbbf32..1106fec6e155e4bd2a941bac7bf03a1bfa97e01d 100644 (file)
@@ -493,6 +493,11 @@ struct io_count {
        u64                     sectors[2][BCH_DATA_NR];
 };
 
+struct discard_in_flight {
+       bool                    in_progress:1;
+       u64                     bucket:63;
+};
+
 struct bch_dev {
        struct kobject          kobj;
 #ifdef CONFIG_BCACHEFS_DEBUG
@@ -554,6 +559,12 @@ struct bch_dev {
        size_t                  inc_gen_really_needs_gc;
        size_t                  buckets_waiting_on_journal;
 
+       struct work_struct      invalidate_work;
+       struct work_struct      discard_work;
+       struct mutex            discard_buckets_in_flight_lock;
+       DARRAY(struct discard_in_flight)        discard_buckets_in_flight;
+       struct work_struct      discard_fast_work;
+
        atomic64_t              rebalance_work;
 
        struct journal_device   journal;
@@ -915,11 +926,6 @@ struct bch_fs {
        unsigned                write_points_nr;
 
        struct buckets_waiting_for_journal buckets_waiting_for_journal;
-       struct work_struct      invalidate_work;
-       struct work_struct      discard_work;
-       struct mutex            discard_buckets_in_flight_lock;
-       DARRAY(struct bpos)     discard_buckets_in_flight;
-       struct work_struct      discard_fast_work;
 
        /* GARBAGE COLLECTION */
        struct work_struct      gc_gens_work;
index 641f2975177b61cc1dd75bcd4ed362390ba3a3d5..fb906467201e92aad00022be6b643e67dd876391 100644 (file)
@@ -536,7 +536,6 @@ static void __bch2_fs_free(struct bch_fs *c)
 
        bch2_find_btree_nodes_exit(&c->found_btree_nodes);
        bch2_free_pending_node_rewrites(c);
-       bch2_fs_allocator_background_exit(c);
        bch2_fs_sb_errors_exit(c);
        bch2_fs_counters_exit(c);
        bch2_fs_snapshots_exit(c);
@@ -1195,6 +1194,7 @@ static void bch2_dev_free(struct bch_dev *ca)
 
        kfree(ca->buckets_nouse);
        bch2_free_super(&ca->disk_sb);
+       bch2_dev_allocator_background_exit(ca);
        bch2_dev_journal_exit(ca);
 
        free_percpu(ca->io_done);
@@ -1317,6 +1317,8 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
        atomic_long_set(&ca->ref, 1);
 #endif
 
+       bch2_dev_allocator_background_init(ca);
+
        if (percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_complete,
                            PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
            !(ca->sb_read_scratch = (void *) __get_free_page(GFP_KERNEL)) ||
@@ -1541,6 +1543,7 @@ static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca)
 
        bch2_dev_allocator_add(c, ca);
        bch2_recalc_capacity(c);
+       bch2_dev_do_discards(ca);
 }
 
 int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,