From: Josef Bacik Date: Wed, 15 Dec 2021 20:40:08 +0000 (-0500) Subject: btrfs: add support for multiple global roots X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=f7238e5094048f20ab6f92d7c2f5ec50fc247f26;p=users%2Fhch%2Fblock.git btrfs: add support for multiple global roots With extent tree v2 you will be able to create multiple csum, extent, and free space trees. They will be used based on the block group, which will now use the block_group_item->chunk_objectid to point to the set of global roots that it will use. When allocating new block groups we'll simply mod the gigabyte offset of the block group against the number of global roots we have and that will be the block groups global id. >From there we can take the bytenr that we're modifying in the respective tree, look up the block group and get that block groups corresponding global root id. From there we can get to the appropriate global root for that bytenr. Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 8202ad6aa131..3113f6d7f335 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -2006,6 +2006,7 @@ static int read_one_block_group(struct btrfs_fs_info *info, cache->length = key->offset; cache->used = btrfs_stack_block_group_used(bgi); cache->flags = btrfs_stack_block_group_flags(bgi); + cache->global_root_id = btrfs_stack_block_group_chunk_objectid(bgi); set_free_space_tree_thresholds(cache); @@ -2288,7 +2289,7 @@ static int insert_block_group_item(struct btrfs_trans_handle *trans, spin_lock(&block_group->lock); btrfs_set_stack_block_group_used(&bgi, block_group->used); btrfs_set_stack_block_group_chunk_objectid(&bgi, - BTRFS_FIRST_CHUNK_TREE_OBJECTID); + block_group->global_root_id); btrfs_set_stack_block_group_flags(&bgi, block_group->flags); key.objectid = block_group->start; key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; @@ -2444,6 +2445,27 @@ next: btrfs_trans_release_chunk_metadata(trans); } +/* + * For extent tree v2 we use the block_group_item->chunk_offset to point at our + * global root id. For v1 it's always set to BTRFS_FIRST_CHUNK_TREE_OBJECTID. + */ +static u64 calculate_global_root_id(struct btrfs_fs_info *fs_info, u64 offset) +{ + u64 div = SZ_1G; + u64 index; + + if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) + return BTRFS_FIRST_CHUNK_TREE_OBJECTID; + + /* If we have a smaller fs index based on 128MiB. */ + if (btrfs_super_total_bytes(fs_info->super_copy) <= (SZ_1G * 10ULL)) + div = SZ_128M; + + offset = div64_u64(offset, div); + div64_u64_rem(offset, fs_info->nr_global_roots, &index); + return index; +} + struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used, u64 type, u64 chunk_offset, u64 size) @@ -2464,6 +2486,8 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran cache->flags = type; cache->last_byte_to_unpin = (u64)-1; cache->cached = BTRFS_CACHE_FINISHED; + cache->global_root_id = calculate_global_root_id(fs_info, cache->start); + if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) cache->needs_free_space = 1; @@ -2693,7 +2717,7 @@ static int update_block_group_item(struct btrfs_trans_handle *trans, bi = btrfs_item_ptr_offset(leaf, path->slots[0]); btrfs_set_stack_block_group_used(&bgi, cache->used); btrfs_set_stack_block_group_chunk_objectid(&bgi, - BTRFS_FIRST_CHUNK_TREE_OBJECTID); + cache->global_root_id); btrfs_set_stack_block_group_flags(&bgi, cache->flags); write_extent_buffer(leaf, &bgi, bi, sizeof(bgi)); btrfs_mark_buffer_dirty(leaf); diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index 5878b7ce3b78..93aabc68bb6a 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -68,6 +68,7 @@ struct btrfs_block_group { u64 bytes_super; u64 flags; u64 cache_generation; + u64 global_root_id; /* * If the free space extent count exceeds this number, convert the block diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f460a7bb9ae8..54513c4e891a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1058,6 +1058,8 @@ struct btrfs_fs_info { spinlock_t relocation_bg_lock; u64 data_reloc_bg; + u64 nr_global_roots; + spinlock_t zone_active_bgs_lock; struct list_head zone_active_bgs; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index fe1349737edb..ed62e81c0b66 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1289,12 +1289,33 @@ struct btrfs_root *btrfs_global_root(struct btrfs_fs_info *fs_info, return root; } +static u64 btrfs_global_root_id(struct btrfs_fs_info *fs_info, u64 bytenr) +{ + struct btrfs_block_group *block_group; + u64 ret; + + if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) + return 0; + + if (bytenr) + block_group = btrfs_lookup_block_group(fs_info, bytenr); + else + block_group = btrfs_lookup_first_block_group(fs_info, bytenr); + ASSERT(block_group); + if (!block_group) + return 0; + ret = block_group->global_root_id; + btrfs_put_block_group(block_group); + + return ret; +} + struct btrfs_root *btrfs_csum_root(struct btrfs_fs_info *fs_info, u64 bytenr) { struct btrfs_key key = { .objectid = BTRFS_CSUM_TREE_OBJECTID, .type = BTRFS_ROOT_ITEM_KEY, - .offset = 0, + .offset = btrfs_global_root_id(fs_info, bytenr), }; return btrfs_global_root(fs_info, &key); @@ -1305,7 +1326,7 @@ struct btrfs_root *btrfs_extent_root(struct btrfs_fs_info *fs_info, u64 bytenr) struct btrfs_key key = { .objectid = BTRFS_EXTENT_TREE_OBJECTID, .type = BTRFS_ROOT_ITEM_KEY, - .offset = 0, + .offset = btrfs_global_root_id(fs_info, bytenr), }; return btrfs_global_root(fs_info, &key); @@ -2096,7 +2117,6 @@ static void backup_super_roots(struct btrfs_fs_info *info) { const int next_backup = info->backup_root_index; struct btrfs_root_backup *root_backup; - struct btrfs_root *csum_root = btrfs_csum_root(info, 0); root_backup = info->super_for_commit->super_roots + next_backup; @@ -2130,6 +2150,7 @@ static void backup_super_roots(struct btrfs_fs_info *info) btrfs_header_level(info->block_group_root->node)); } else { struct btrfs_root *extent_root = btrfs_extent_root(info, 0); + struct btrfs_root *csum_root = btrfs_csum_root(info, 0); btrfs_set_backup_extent_root(root_backup, extent_root->node->start); @@ -2137,6 +2158,12 @@ static void backup_super_roots(struct btrfs_fs_info *info) btrfs_header_generation(extent_root->node)); btrfs_set_backup_extent_root_level(root_backup, btrfs_header_level(extent_root->node)); + + btrfs_set_backup_csum_root(root_backup, csum_root->node->start); + btrfs_set_backup_csum_root_gen(root_backup, + btrfs_header_generation(csum_root->node)); + btrfs_set_backup_csum_root_level(root_backup, + btrfs_header_level(csum_root->node)); } /* @@ -2158,12 +2185,6 @@ static void backup_super_roots(struct btrfs_fs_info *info) btrfs_set_backup_dev_root_level(root_backup, btrfs_header_level(info->dev_root->node)); - btrfs_set_backup_csum_root(root_backup, csum_root->node->start); - btrfs_set_backup_csum_root_gen(root_backup, - btrfs_header_generation(csum_root->node)); - btrfs_set_backup_csum_root_level(root_backup, - btrfs_header_level(csum_root->node)); - btrfs_set_backup_total_bytes(root_backup, btrfs_super_total_bytes(info->super_copy)); btrfs_set_backup_bytes_used(root_backup, @@ -2546,6 +2567,7 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root, { struct btrfs_fs_info *fs_info = tree_root->fs_info; struct btrfs_root *root; + u64 max_global_id = 0; int ret; struct btrfs_key key = { .objectid = objectid, @@ -2581,6 +2603,13 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root, break; btrfs_release_path(path); + /* + * Just worry about this for extent tree, it'll be the same for + * everybody. + */ + if (objectid == BTRFS_EXTENT_TREE_OBJECTID) + max_global_id = max(max_global_id, key.offset); + found = true; root = read_tree_root_path(tree_root, path, &key); if (IS_ERR(root)) { @@ -2598,6 +2627,9 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root, } btrfs_release_path(path); + if (objectid == BTRFS_EXTENT_TREE_OBJECTID) + fs_info->nr_global_roots = max_global_id + 1; + if (!found || ret) { if (objectid == BTRFS_CSUM_TREE_OBJECTID) set_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state); diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index 655aad0f9e1c..0ae54d8c10d6 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -25,6 +25,8 @@ static struct btrfs_root *btrfs_free_space_root( .offset = 0, }; + if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2)) + key.offset = block_group->global_root_id; return btrfs_global_root(block_group->fs_info, &key); } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 1f1c25db6f6b..37f6ec2a3c56 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1911,6 +1911,14 @@ static void update_super_roots(struct btrfs_fs_info *fs_info) super->cache_generation = 0; if (test_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags)) super->uuid_tree_generation = root_item->generation; + + if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) { + root_item = &fs_info->block_group_root->root_item; + + super->block_group_root = root_item->bytenr; + super->block_group_root_generation = root_item->generation; + super->block_group_root_level = root_item->level; + } } int btrfs_transaction_in_commit(struct btrfs_fs_info *info) @@ -2362,6 +2370,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) list_add_tail(&fs_info->chunk_root->dirty_list, &cur_trans->switch_commits); + if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) { + btrfs_set_root_node(&fs_info->block_group_root->root_item, + fs_info->block_group_root->node); + list_add_tail(&fs_info->block_group_root->dirty_list, + &cur_trans->switch_commits); + } + switch_commit_roots(trans); ASSERT(list_empty(&cur_trans->dirty_bgs)); diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 64c7d2a2bb3e..e56c0107eea3 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -639,8 +639,10 @@ static void block_group_err(const struct extent_buffer *eb, int slot, static int check_block_group_item(struct extent_buffer *leaf, struct btrfs_key *key, int slot) { + struct btrfs_fs_info *fs_info = leaf->fs_info; struct btrfs_block_group_item bgi; u32 item_size = btrfs_item_size(leaf, slot); + u64 chunk_objectid; u64 flags; u64 type; @@ -663,8 +665,23 @@ static int check_block_group_item(struct extent_buffer *leaf, read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot), sizeof(bgi)); - if (unlikely(btrfs_stack_block_group_chunk_objectid(&bgi) != - BTRFS_FIRST_CHUNK_TREE_OBJECTID)) { + chunk_objectid = btrfs_stack_block_group_chunk_objectid(&bgi); + if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) { + /* + * We don't init the nr_global_roots until we load the global + * roots, so this could be 0 at mount time. If it's 0 we'll + * just assume we're fine, and later we'll check against our + * actual value. + */ + if (unlikely(fs_info->nr_global_roots && + chunk_objectid >= fs_info->nr_global_roots)) { + block_group_err(leaf, slot, + "invalid block group global root id, have %llu, needs to be <= %llu", + chunk_objectid, + fs_info->nr_global_roots); + return -EUCLEAN; + } + } else if (unlikely(chunk_objectid != BTRFS_FIRST_CHUNK_TREE_OBJECTID)) { block_group_err(leaf, slot, "invalid block group chunk objectid, have %llu expect %llu", btrfs_stack_block_group_chunk_objectid(&bgi),