]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
btrfs: force chunk allocation if our global rsv is larger than metadata
authorJosef Bacik <josef@toxicpanda.com>
Fri, 13 Mar 2020 19:28:48 +0000 (15:28 -0400)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 22 Jun 2020 07:31:13 +0000 (09:31 +0200)
commit 9c343784c4328781129bcf9e671645f69fe4b38a upstream.

Nikolay noticed a bunch of test failures with my global rsv steal
patches.  At first he thought they were introduced by them, but they've
been failing for a while with 64k nodes.

The problem is with 64k nodes we have a global reserve that calculates
out to 13MiB on a freshly made file system, which only has 8MiB of
metadata space.  Because of changes I previously made we no longer
account for the global reserve in the overcommit logic, which means we
correctly allow overcommit to happen even though we are already
overcommitted.

However in some corner cases, for example btrfs/170, we will allocate
the entire file system up with data chunks before we have enough space
pressure to allocate a metadata chunk.  Then once the fs is full we
ENOSPC out because we cannot overcommit and the global reserve is taking
up all of the available space.

The most ideal way to deal with this is to change our space reservation
stuff to take into account the height of the tree's that we're
modifying, so that our global reserve calculation does not end up so
obscenely large.

However that is a huge undertaking.  Instead fix this by forcing a chunk
allocation if the global reserve is larger than the total metadata
space.  This gives us essentially the same behavior that happened
before, we get a chunk allocated and these tests can pass.

This is meant to be a stop-gap measure until we can tackle the "tree
height only" project.

Fixes: 0096420adb03 ("btrfs: do not account global reserve in can_overcommit")
CC: stable@vger.kernel.org # 5.4+
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Tested-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
fs/btrfs/block-rsv.c
fs/btrfs/transaction.c

index d07bd41a7c1e01b2b12254802702198606796108..343400d49bd1f595a403227adabd1fd6bd958fe3 100644 (file)
@@ -5,6 +5,7 @@
 #include "block-rsv.h"
 #include "space-info.h"
 #include "transaction.h"
+#include "block-group.h"
 
 static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
                                    struct btrfs_block_rsv *block_rsv,
@@ -313,6 +314,8 @@ void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info)
        else
                block_rsv->full = 0;
 
+       if (block_rsv->size >= sinfo->total_bytes)
+               sinfo->force_alloc = CHUNK_ALLOC_FORCE;
        spin_unlock(&block_rsv->lock);
        spin_unlock(&sinfo->lock);
 }
index a94270a95bbdc4f55fb8578c97548a7cebf5ab51..54589e940f9af1b1c7ba7f47c73e6104ae3c724d 100644 (file)
@@ -21,6 +21,7 @@
 #include "dev-replace.h"
 #include "qgroup.h"
 #include "block-group.h"
+#include "space-info.h"
 
 #define BTRFS_ROOT_TRANS_TAG 0
 
@@ -451,6 +452,7 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
        u64 num_bytes = 0;
        u64 qgroup_reserved = 0;
        bool reloc_reserved = false;
+       bool do_chunk_alloc = false;
        int ret;
 
        /* Send isn't supposed to start transactions. */
@@ -513,6 +515,9 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
                                                          delayed_refs_bytes);
                        num_bytes -= delayed_refs_bytes;
                }
+
+               if (rsv->space_info->force_alloc)
+                       do_chunk_alloc = true;
        } else if (num_items == 0 && flush == BTRFS_RESERVE_FLUSH_ALL &&
                   !delayed_refs_rsv->full) {
                /*
@@ -594,6 +599,19 @@ got_it:
        if (!current->journal_info)
                current->journal_info = h;
 
+       /*
+        * If the space_info is marked ALLOC_FORCE then we'll get upgraded to
+        * ALLOC_FORCE the first run through, and then we won't allocate for
+        * anybody else who races in later.  We don't care about the return
+        * value here.
+        */
+       if (do_chunk_alloc && num_bytes) {
+               u64 flags = h->block_rsv->space_info->flags;
+
+               btrfs_chunk_alloc(h, btrfs_get_alloc_profile(fs_info, flags),
+                                 CHUNK_ALLOC_NO_FORCE);
+       }
+
        /*
         * btrfs_record_root_in_trans() needs to alloc new extents, and may
         * call btrfs_join_transaction() while we're also starting a