btrfs: make the extent map shrinker run asynchronously as a work queue job

author Filipe Manana <fdmanana@suse.com>

Thu, 29 Aug 2024 14:23:32 +0000 (15:23 +0100)

committer David Sterba <dsterba@suse.com>

Mon, 11 Nov 2024 13:34:17 +0000 (14:34 +0100)
author Filipe Manana <fdmanana@suse.com>
Thu, 29 Aug 2024 14:23:32 +0000 (15:23 +0100)
committer David Sterba <dsterba@suse.com>
Mon, 11 Nov 2024 13:34:17 +0000 (14:34 +0100)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c

index 02aaee0bc4e3862e8476378b47740439572d6b8c..8ad625a8d9f943844b257842b970dec5dfa65282 100644 (file)
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2785,6 +2785,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
         btrfs_init_scrub(fs_info);
         btrfs_init_balance(fs_info);
         btrfs_init_async_reclaim_work(fs_info);
+       btrfs_init_extent_map_shrinker_work(fs_info);
  
         rwlock_init(&fs_info->block_group_cache_lock);
         fs_info->block_group_cache_tree = RB_ROOT_CACHED;
@@ -4292,6 +4293,7 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
         cancel_work_sync(&fs_info->async_reclaim_work);
         cancel_work_sync(&fs_info->async_data_reclaim_work);
         cancel_work_sync(&fs_info->preempt_reclaim_work);
+       cancel_work_sync(&fs_info->extent_map_shrinker_work);
  
         /* Cancel or finish ongoing discard work */
         btrfs_discard_cleanup(fs_info);
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c

index ed2a5ccbb189b5300fef63a020b57a4cc621981c..4b9ab3f9c731a7e7e3db3e1ee5018e5bc77228b0 100644 (file)
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -1124,7 +1124,8 @@ struct btrfs_em_shrink_ctx {
  
  static long btrfs_scan_inode(struct btrfs_inode *inode, struct btrfs_em_shrink_ctx *ctx)
  {
-       const u64 cur_fs_gen = btrfs_get_fs_generation(inode->root->fs_info);
+       struct btrfs_fs_info *fs_info = inode->root->fs_info;
+       const u64 cur_fs_gen = btrfs_get_fs_generation(fs_info);
         struct extent_map_tree *tree = &inode->extent_tree;
         long nr_dropped = 0;
         struct rb_node *node;
@@ -1197,7 +1198,8 @@ next:
                  * lock. This is to avoid slowing other tasks trying to take the
                  * lock.
                  */
-               if (need_resched() || rwlock_needbreak(&tree->lock))
+               if (need_resched() || rwlock_needbreak(&tree->lock) ||
+                   btrfs_fs_closing(fs_info))
                         break;
                 node = next;
         }
@@ -1221,7 +1223,8 @@ static long btrfs_scan_root(struct btrfs_root *root, struct btrfs_em_shrink_ctx
                 ctx->last_ino = btrfs_ino(inode);
                 btrfs_add_delayed_iput(inode);
  
-               if (ctx->scanned >= ctx->nr_to_scan)
+               if (ctx->scanned >= ctx->nr_to_scan ||
+                   btrfs_fs_closing(inode->root->fs_info))
                         break;
  
                 cond_resched();
@@ -1250,16 +1253,19 @@ static long btrfs_scan_root(struct btrfs_root *root, struct btrfs_em_shrink_ctx
         return nr_dropped;
  }
  
-long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan)
+static void btrfs_extent_map_shrinker_worker(struct work_struct *work)
  {
+       struct btrfs_fs_info *fs_info;
         struct btrfs_em_shrink_ctx ctx;
         u64 start_root_id;
         u64 next_root_id;
         bool cycled = false;
         long nr_dropped = 0;
  
+       fs_info = container_of(work, struct btrfs_fs_info, extent_map_shrinker_work);
+
         ctx.scanned = 0;
-       ctx.nr_to_scan = nr_to_scan;
+       ctx.nr_to_scan = atomic64_read(&fs_info->extent_map_shrinker_nr_to_scan);
  
         /*
          * In case we have multiple tasks running this shrinker, make the next
@@ -1277,12 +1283,12 @@ long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan)
         if (trace_btrfs_extent_map_shrinker_scan_enter_enabled()) {
                 s64 nr = percpu_counter_sum_positive(&fs_info->evictable_extent_maps);
  
-               trace_btrfs_extent_map_shrinker_scan_enter(fs_info, nr_to_scan,
+               trace_btrfs_extent_map_shrinker_scan_enter(fs_info, ctx.nr_to_scan,
                                                            nr, ctx.last_root,
                                                            ctx.last_ino);
         }
  
-       while (ctx.scanned < ctx.nr_to_scan) {
+       while (ctx.scanned < ctx.nr_to_scan && !btrfs_fs_closing(fs_info)) {
                 struct btrfs_root *root;
                 unsigned long count;
  
@@ -1340,5 +1346,34 @@ long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan)
                                                           ctx.last_ino);
         }
  
-       return nr_dropped;
+       atomic64_set(&fs_info->extent_map_shrinker_nr_to_scan, 0);
+}
+
+void btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan)
+{
+       /*
+        * Do nothing if the shrinker is already running. In case of high memory
+        * pressure we can have a lot of tasks calling us and all passing the
+        * same nr_to_scan value, but in reality we may need only to free
+        * nr_to_scan extent maps (or less). In case we need to free more than
+        * that, we will be called again by the fs shrinker, so no worries about
+        * not doing enough work to reclaim memory from extent maps.
+        * We can also be repeatedly called with the same nr_to_scan value
+        * simply because the shrinker runs asynchronously and multiple calls
+        * to this function are made before the shrinker does enough progress.
+        *
+        * That's why we set the atomic counter to nr_to_scan only if its
+        * current value is zero, instead of incrementing the counter by
+        * nr_to_scan.
+        */
+       if (atomic64_cmpxchg(&fs_info->extent_map_shrinker_nr_to_scan, 0, nr_to_scan) != 0)
+               return;
+
+       queue_work(system_unbound_wq, &fs_info->extent_map_shrinker_work);
+}
+
+void btrfs_init_extent_map_shrinker_work(struct btrfs_fs_info *fs_info)
+{
+       atomic64_set(&fs_info->extent_map_shrinker_nr_to_scan, 0);
+       INIT_WORK(&fs_info->extent_map_shrinker_work, btrfs_extent_map_shrinker_worker);
  }
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h

index 5154a8f1d26c94aa5287eb06ba311cfca88865f0..cd123b266b641611188530ca27f53861a38c417c 100644 (file)
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -189,6 +189,7 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode,
  int btrfs_replace_extent_map_range(struct btrfs_inode *inode,
                                    struct extent_map *new_em,
                                    bool modified);
-long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan);
+void btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan);
+void btrfs_init_extent_map_shrinker_work(struct btrfs_fs_info *fs_info);
  
  #endif
diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h

index 85e6a644aff251d5e9a2e537d6dd7bfa63ef12bb..678ce5259332bb81e046237f0e86cf45edaec812 100644 (file)
--- a/fs/btrfs/fs.h
+++ b/fs/btrfs/fs.h
@@ -638,6 +638,8 @@ struct btrfs_fs_info {
         spinlock_t extent_map_shrinker_lock;
         u64 extent_map_shrinker_last_root;
         u64 extent_map_shrinker_last_ino;
+       atomic64_t extent_map_shrinker_nr_to_scan;
+       struct work_struct extent_map_shrinker_work;
  
         /* Protected by 'trans_lock'. */
         struct list_head dirty_cowonly_roots;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c

index e8aeeca0795f338d2b63793b52e891c7ff5c7b9a..7dc7c4fee996ceba4bfb957abdb599abe68db072 100644 (file)
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -28,7 +28,6 @@
  #include <linux/btrfs.h>
  #include <linux/security.h>
  #include <linux/fs_parser.h>
-#include <linux/swap.h>
  #include "messages.h"
  #include "delayed-inode.h"
  #include "ctree.h"
@@ -2408,16 +2407,10 @@ static long btrfs_free_cached_objects(struct super_block *sb, struct shrink_cont
         const long nr_to_scan = min_t(unsigned long, LONG_MAX, sc->nr_to_scan);
         struct btrfs_fs_info *fs_info = btrfs_sb(sb);
  
-       /*
-        * We may be called from any task trying to allocate memory and we don't
-        * want to slow it down with scanning and dropping extent maps. It would
-        * also cause heavy lock contention if many tasks concurrently enter
-        * here. Therefore only allow kswapd tasks to scan and drop extent maps.
-        */
-       if (!current_is_kswapd())
-               return 0;
+       btrfs_free_extent_maps(fs_info, nr_to_scan);
  
-       return btrfs_free_extent_maps(fs_info, nr_to_scan);
+       /* The extent map shrinker runs asynchronously, so always return 0. */
+       return 0;
  }
  
  static const struct super_operations btrfs_super_ops = {
author	Filipe Manana <fdmanana@suse.com>
	Thu, 29 Aug 2024 14:23:32 +0000 (15:23 +0100)
committer	David Sterba <dsterba@suse.com>
	Mon, 11 Nov 2024 13:34:17 +0000 (14:34 +0100)
fs/btrfs/disk-io.c		patch \| blob \| history
fs/btrfs/extent_map.c		patch \| blob \| history
fs/btrfs/extent_map.h		patch \| blob \| history
fs/btrfs/fs.h		patch \| blob \| history
fs/btrfs/super.c		patch \| blob \| history