]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
btrfs: reduce lock contention when eb cache miss for btree search
authorRobbie Ko <robbieko@synology.com>
Tue, 15 Oct 2024 07:41:37 +0000 (15:41 +0800)
committerDavid Sterba <dsterba@suse.com>
Mon, 11 Nov 2024 13:34:17 +0000 (14:34 +0100)
When crawling btree, if an eb cache miss occurs, we change to use the eb
read lock and release all previous locks (including the parent lock) to
reduce lock contention.

If an eb cache miss occurs in a leaf and needs to execute IO, before this
change we released locks only from level 2 and up and we read a leaf's
content from disk while holding a lock on its parent (level 1), causing
the unnecessary lock contention on the parent, after this change we
release locks from level 1 and up, but we lock level 0, and read leaf's
content from disk.

Because we have prepared the check parameters and the read lock of eb we
hold, we can ensure that no race will occur during the check and cause
unexpected errors.

Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Robbie Ko <robbieko@synology.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/ctree.c

index b11ec86102e37b9ff69ce2c2666114dc88334e62..4f34208126f785238f2504bbc3f449965be5250c 100644 (file)
@@ -1515,12 +1515,14 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
        struct btrfs_tree_parent_check check = { 0 };
        u64 blocknr;
        u64 gen;
-       struct extent_buffer *tmp;
-       int ret;
+       struct extent_buffer *tmp = NULL;
+       int ret = 0;
        int parent_level;
-       bool unlock_up;
+       int err;
+       bool read_tmp = false;
+       bool tmp_locked = false;
+       bool path_released = false;
 
-       unlock_up = ((level + 1 < BTRFS_MAX_LEVEL) && p->locks[level + 1]);
        blocknr = btrfs_node_blockptr(*eb_ret, slot);
        gen = btrfs_node_ptr_generation(*eb_ret, slot);
        parent_level = btrfs_header_level(*eb_ret);
@@ -1551,68 +1553,105 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
                         */
                        if (btrfs_verify_level_key(tmp,
                                        parent_level - 1, &check.first_key, gen)) {
-                               free_extent_buffer(tmp);
-                               return -EUCLEAN;
+                               ret = -EUCLEAN;
+                               goto out;
                        }
                        *eb_ret = tmp;
-                       return 0;
+                       tmp = NULL;
+                       ret = 0;
+                       goto out;
                }
 
                if (p->nowait) {
-                       free_extent_buffer(tmp);
-                       return -EAGAIN;
+                       ret = -EAGAIN;
+                       goto out;
                }
 
-               if (unlock_up)
+               if (!p->skip_locking) {
                        btrfs_unlock_up_safe(p, level + 1);
-
-               /* now we're allowed to do a blocking uptodate check */
-               ret = btrfs_read_extent_buffer(tmp, &check);
-               if (ret) {
-                       free_extent_buffer(tmp);
+                       tmp_locked = true;
+                       btrfs_tree_read_lock(tmp);
                        btrfs_release_path(p);
-                       return ret;
+                       ret = -EAGAIN;
+                       path_released = true;
                }
 
-               if (unlock_up)
-                       ret = -EAGAIN;
+               /* Now we're allowed to do a blocking uptodate check. */
+               err = btrfs_read_extent_buffer(tmp, &check);
+               if (err) {
+                       ret = err;
+                       goto out;
+               }
 
+               if (ret == 0) {
+                       ASSERT(!tmp_locked);
+                       *eb_ret = tmp;
+                       tmp = NULL;
+               }
                goto out;
        } else if (p->nowait) {
-               return -EAGAIN;
+               ret = -EAGAIN;
+               goto out;
        }
 
-       if (unlock_up) {
+       if (!p->skip_locking) {
                btrfs_unlock_up_safe(p, level + 1);
                ret = -EAGAIN;
-       } else {
-               ret = 0;
        }
 
        if (p->reada != READA_NONE)
                reada_for_search(fs_info, p, level, slot, key->objectid);
 
-       tmp = read_tree_block(fs_info, blocknr, &check);
+       tmp = btrfs_find_create_tree_block(fs_info, blocknr, check.owner_root, check.level);
        if (IS_ERR(tmp)) {
+               ret = PTR_ERR(tmp);
+               tmp = NULL;
+               goto out;
+       }
+       read_tmp = true;
+
+       if (!p->skip_locking) {
+               ASSERT(ret == -EAGAIN);
+               tmp_locked = true;
+               btrfs_tree_read_lock(tmp);
                btrfs_release_path(p);
-               return PTR_ERR(tmp);
+               path_released = true;
+       }
+
+       /* Now we're allowed to do a blocking uptodate check. */
+       err = btrfs_read_extent_buffer(tmp, &check);
+       if (err) {
+               ret = err;
+               goto out;
        }
+
        /*
         * If the read above didn't mark this buffer up to date,
         * it will never end up being up to date.  Set ret to EIO now
         * and give up so that our caller doesn't loop forever
         * on our EAGAINs.
         */
-       if (!extent_buffer_uptodate(tmp))
+       if (!extent_buffer_uptodate(tmp)) {
                ret = -EIO;
+               goto out;
+       }
 
-out:
        if (ret == 0) {
+               ASSERT(!tmp_locked);
                *eb_ret = tmp;
-       } else {
-               free_extent_buffer(tmp);
-               btrfs_release_path(p);
+               tmp = NULL;
+       }
+out:
+       if (tmp) {
+               if (tmp_locked)
+                       btrfs_tree_read_unlock(tmp);
+               if (read_tmp && ret && ret != -EAGAIN)
+                       free_extent_buffer_stale(tmp);
+               else
+                       free_extent_buffer(tmp);
        }
+       if (ret && !path_released)
+               btrfs_release_path(p);
 
        return ret;
 }
@@ -2198,7 +2237,7 @@ cow_done:
                }
 
                err = read_block_for_search(root, p, &b, level, slot, key);
-               if (err == -EAGAIN)
+               if (err == -EAGAIN && !p->nowait)
                        goto again;
                if (err) {
                        ret = err;
@@ -2325,7 +2364,7 @@ again:
                }
 
                err = read_block_for_search(root, p, &b, level, slot, key);
-               if (err == -EAGAIN)
+               if (err == -EAGAIN && !p->nowait)
                        goto again;
                if (err) {
                        ret = err;